[FFmpeg-devel] [PATCH] avfilter/vf_zscale: add slice threading
Paul B Mahol
onemda at gmail.com
Wed May 29 22:22:05 EEST 2019
Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
libavfilter/vf_zscale.c | 335 +++++++++++++++++++++++++---------------
1 file changed, 211 insertions(+), 124 deletions(-)
diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
index f0309272fa..082150adf0 100644
--- a/libavfilter/vf_zscale.c
+++ b/libavfilter/vf_zscale.c
@@ -74,6 +74,16 @@ enum var_name {
VARS_NB
};
+typedef struct ZScaleThreadContext {
+ void *tmp;
+ size_t tmp_size;
+
+ zimg_image_format src_format, dst_format;
+ zimg_image_format alpha_src_format, alpha_dst_format;
+ zimg_graph_builder_params alpha_params, params;
+ zimg_filter_graph *alpha_graph, *graph;
+} ZScaleThreadContext;
+
typedef struct ZScaleContext {
const AVClass *class;
@@ -100,6 +110,8 @@ typedef struct ZScaleContext {
double nominal_peak_luminance;
int approximate_gamma;
+ int nb_threads;
+
char *w_expr; ///< width expression string
char *h_expr; ///< height expression string
@@ -110,13 +122,7 @@ typedef struct ZScaleContext {
int force_original_aspect_ratio;
- void *tmp;
- size_t tmp_size;
-
- zimg_image_format src_format, dst_format;
- zimg_image_format alpha_src_format, alpha_dst_format;
- zimg_graph_builder_params alpha_params, params;
- zimg_filter_graph *alpha_graph, *graph;
+ ZScaleThreadContext *ztd;
enum AVColorSpace in_colorspace, out_colorspace;
enum AVColorTransferCharacteristic in_trc, out_trc;
@@ -204,6 +210,12 @@ static int config_props(AVFilterLink *outlink)
int ret;
int factor_w, factor_h;
+ s->nb_threads = ff_filter_get_nb_threads(ctx);
+ av_freep(&s->ztd);
+ s->ztd = av_calloc(s->nb_threads, sizeof(*s->ztd));
+ if (!s->ztd)
+ return AVERROR(ENOMEM);
+
var_values[VAR_IN_W] = var_values[VAR_IW] = inlink->w;
var_values[VAR_IN_H] = var_values[VAR_IH] = inlink->h;
var_values[VAR_OUT_W] = var_values[VAR_OW] = NAN;
@@ -458,10 +470,12 @@ static int convert_range(enum AVColorRange color_range)
}
static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
- int colorspace, int primaries, int transfer, int range, int location)
+ int colorspace, int primaries, int transfer, int range, int location,
+ int width, int height,
+ int slice_start, int slice_end)
{
- format->width = frame->width;
- format->height = frame->height;
+ format->width = width;
+ format->height = height;
format->subsample_w = desc->log2_chroma_w;
format->subsample_h = desc->log2_chroma_h;
format->depth = desc->comp[0].depth;
@@ -472,6 +486,10 @@ static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFm
format->transfer_characteristics = transfer == - 1 ? convert_trc(frame->color_trc) : transfer;
format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
+ format->active_region.left = 0;
+ format->active_region.top = slice_start;
+ format->active_region.width = width;
+ format->active_region.height = slice_end - slice_start;
}
static int graph_build(zimg_filter_graph **graph, zimg_graph_builder_params *params,
@@ -502,16 +520,163 @@ static int graph_build(zimg_filter_graph **graph, zimg_graph_builder_params *par
return 0;
}
+typedef struct ThreadData {
+ AVFrame *in, *out;
+ const AVPixFmtDescriptor *desc, *odesc;
+} ThreadData;
+
+static int prepare_graph(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+ ZScaleContext *s = ctx->priv;
+ ThreadData *td = arg;
+ AVFrame *in = td->in;
+ AVFrame *out = td->out;
+ const AVPixFmtDescriptor *desc = td->desc;
+ const AVPixFmtDescriptor *odesc = td->odesc;
+ const int in_slice_start = (in->height * jobnr) / nb_jobs;
+ const int in_slice_end = (in->height * (jobnr+1)) / nb_jobs;
+ const int out_slice_start = (out->height * jobnr) / nb_jobs;
+ const int out_slice_end = (out->height * (jobnr+1)) / nb_jobs;
+ int ret;
+
+ zimg_image_format_default(&s->ztd[jobnr].src_format, ZIMG_API_VERSION);
+ zimg_image_format_default(&s->ztd[jobnr].dst_format, ZIMG_API_VERSION);
+ zimg_graph_builder_params_default(&s->ztd[jobnr].params, ZIMG_API_VERSION);
+
+ s->ztd[jobnr].params.dither_type = s->dither;
+ s->ztd[jobnr].params.cpu_type = ZIMG_CPU_AUTO;
+ s->ztd[jobnr].params.resample_filter = s->filter;
+ s->ztd[jobnr].params.resample_filter_uv = s->filter;
+ s->ztd[jobnr].params.nominal_peak_luminance = s->nominal_peak_luminance;
+ s->ztd[jobnr].params.allow_approximate_gamma = s->approximate_gamma;
+
+ format_init(&s->ztd[jobnr].src_format, in, desc, s->colorspace_in,
+ s->primaries_in, s->trc_in, s->range_in, s->chromal_in,
+ in->width, in->height,
+ in_slice_start, in_slice_end);
+ format_init(&s->ztd[jobnr].dst_format, out, odesc, s->colorspace,
+ s->primaries, s->trc, s->range, s->chromal,
+ out->width, out_slice_end - out_slice_start,
+ 0, out_slice_end - out_slice_start);
+
+ ret = graph_build(&s->ztd[jobnr].graph, &s->ztd[jobnr].params, &s->ztd[jobnr].src_format, &s->ztd[jobnr].dst_format,
+ &s->ztd[jobnr].tmp, &s->ztd[jobnr].tmp_size);
+ if (ret)
+ return ret;
+
+ if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+ zimg_image_format_default(&s->ztd[jobnr].alpha_src_format, ZIMG_API_VERSION);
+ zimg_image_format_default(&s->ztd[jobnr].alpha_dst_format, ZIMG_API_VERSION);
+ zimg_graph_builder_params_default(&s->ztd[jobnr].alpha_params, ZIMG_API_VERSION);
+
+ s->ztd[jobnr].alpha_params.dither_type = s->dither;
+ s->ztd[jobnr].alpha_params.cpu_type = ZIMG_CPU_AUTO;
+ s->ztd[jobnr].alpha_params.resample_filter = s->filter;
+
+ s->ztd[jobnr].alpha_src_format.width = in->width;
+ s->ztd[jobnr].alpha_src_format.height = in->height;
+ s->ztd[jobnr].alpha_src_format.depth = desc->comp[0].depth;
+ s->ztd[jobnr].alpha_src_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
+ s->ztd[jobnr].alpha_src_format.color_family = ZIMG_COLOR_GREY;
+ s->ztd[jobnr].alpha_src_format.active_region.left = 0;
+ s->ztd[jobnr].alpha_src_format.active_region.top = in_slice_start;
+ s->ztd[jobnr].alpha_src_format.active_region.width = in->width;
+ s->ztd[jobnr].alpha_src_format.active_region.height = in_slice_end - in_slice_start;
+
+ s->ztd[jobnr].alpha_dst_format.width = out->width;
+ s->ztd[jobnr].alpha_dst_format.height = out->height;
+ s->ztd[jobnr].alpha_dst_format.depth = odesc->comp[0].depth;
+ s->ztd[jobnr].alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
+ s->ztd[jobnr].alpha_dst_format.color_family = ZIMG_COLOR_GREY;
+ s->ztd[jobnr].alpha_dst_format.active_region.left = 0;
+ s->ztd[jobnr].alpha_dst_format.active_region.top = 0;
+ s->ztd[jobnr].alpha_dst_format.active_region.width = out->width;
+ s->ztd[jobnr].alpha_dst_format.active_region.height = out_slice_end - out_slice_start;
+
+ zimg_filter_graph_free(s->ztd[jobnr].alpha_graph);
+ s->ztd[jobnr].alpha_graph = zimg_filter_graph_build(&s->ztd[jobnr].alpha_src_format, &s->ztd[jobnr].alpha_dst_format, &s->ztd[jobnr].alpha_params);
+ if (!s->ztd[jobnr].alpha_graph) {
+ return print_zimg_error(ctx);
+ }
+ }
+
+ return 0;
+}
+
+static int zscale_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+ ZScaleContext *s = ctx->priv;
+ ThreadData *td = arg;
+ AVFrame *in = td->in;
+ AVFrame *out = td->out;
+ const AVPixFmtDescriptor *desc = td->desc;
+ const AVPixFmtDescriptor *odesc = td->odesc;
+ zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
+ zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
+ int ret;
+
+ for (int plane = 0; plane < 3; plane++) {
+ const int height = plane > 0 ? AV_CEIL_RSHIFT(out->height, odesc->log2_chroma_h) : out->height;
+ const int out_slice_start = (height * jobnr) / nb_jobs;
+ int p = desc->comp[plane].plane;
+
+ src_buf.plane[plane].data = in->data[p];
+ src_buf.plane[plane].stride = in->linesize[p];
+ src_buf.plane[plane].mask = -1;
+
+ p = odesc->comp[plane].plane;
+ dst_buf.plane[plane].data = out->data[p] + out_slice_start * out->linesize[p];
+ dst_buf.plane[plane].stride = out->linesize[p];
+ dst_buf.plane[plane].mask = -1;
+ }
+
+ ret = zimg_filter_graph_process(s->ztd[jobnr].graph, &src_buf, &dst_buf, s->ztd[jobnr].tmp, 0, 0, 0, 0);
+ if (ret)
+ return print_zimg_error(ctx);
+
+ if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+ const int out_slice_start = (out->height * jobnr) / nb_jobs;
+
+ src_buf.plane[0].data = in->data[3];
+ src_buf.plane[0].stride = in->linesize[3];
+ src_buf.plane[0].mask = -1;
+
+ dst_buf.plane[0].data = out->data[3] + out_slice_start * out->linesize[3];
+ dst_buf.plane[0].stride = out->linesize[3];
+ dst_buf.plane[0].mask = -1;
+
+ ret = zimg_filter_graph_process(s->ztd[jobnr].alpha_graph, &src_buf, &dst_buf, s->ztd[jobnr].tmp, 0, 0, 0, 0);
+ if (ret)
+ return print_zimg_error(ctx);
+ } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+ int x, y;
+
+ if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
+ for (y = 0; y < out->height; y++) {
+ for (x = 0; x < out->width; x++) {
+ AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
+ av_float2int(1.0f));
+ }
+ }
+ } else {
+ for (y = 0; y < out->height; y++)
+ memset(out->data[3] + y * out->linesize[3], 0xff, out->width);
+ }
+ }
+
+ return 0;
+}
+
static int filter_frame(AVFilterLink *link, AVFrame *in)
{
- ZScaleContext *s = link->dst->priv;
- AVFilterLink *outlink = link->dst->outputs[0];
+ AVFilterContext *ctx = link->dst;
+ ZScaleContext *s = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
- zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
- zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
char buf[32];
- int ret = 0, plane;
+ int ret = 0;
+ ThreadData td;
AVFrame *out;
out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
@@ -552,41 +717,28 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
return ret;
}
- zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
- zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
- zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
-
- s->params.dither_type = s->dither;
- s->params.cpu_type = ZIMG_CPU_AUTO;
- s->params.resample_filter = s->filter;
- s->params.resample_filter_uv = s->filter;
- s->params.nominal_peak_luminance = s->nominal_peak_luminance;
- s->params.allow_approximate_gamma = s->approximate_gamma;
-
- format_init(&s->src_format, in, desc, s->colorspace_in,
- s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
- format_init(&s->dst_format, out, odesc, s->colorspace,
- s->primaries, s->trc, s->range, s->chromal);
+ td.out = out;
+ td.in = in;
+ td.desc = desc;
+ td.odesc = odesc;
+ ret = ctx->internal->execute(ctx, prepare_graph, &td, NULL, FFMIN3(in->height, out->height, s->nb_threads));
+ if (ret)
+ goto fail;
if (s->colorspace != -1)
- out->colorspace = (int)s->dst_format.matrix_coefficients;
+ out->colorspace = (int)s->ztd[0].dst_format.matrix_coefficients;
if (s->primaries != -1)
- out->color_primaries = (int)s->dst_format.color_primaries;
+ out->color_primaries = (int)s->ztd[0].dst_format.color_primaries;
if (s->range != -1)
- out->color_range = (int)s->dst_format.pixel_range + 1;
+ out->color_range = (int)s->ztd[0].dst_format.pixel_range + 1;
if (s->trc != -1)
- out->color_trc = (int)s->dst_format.transfer_characteristics;
+ out->color_trc = (int)s->ztd[0].dst_format.transfer_characteristics;
if (s->chromal != -1)
- out->chroma_location = (int)s->dst_format.chroma_location - 1;
-
- ret = graph_build(&s->graph, &s->params, &s->src_format, &s->dst_format,
- &s->tmp, &s->tmp_size);
- if (ret < 0)
- goto fail;
+ out->chroma_location = (int)s->ztd[0].dst_format.chroma_location - 1;
s->in_colorspace = in->colorspace;
s->in_trc = in->color_trc;
@@ -596,101 +748,33 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
s->out_trc = out->color_trc;
s->out_primaries = out->color_primaries;
s->out_range = out->color_range;
-
- if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
- zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
- zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
- zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
-
- s->alpha_params.dither_type = s->dither;
- s->alpha_params.cpu_type = ZIMG_CPU_AUTO;
- s->alpha_params.resample_filter = s->filter;
-
- s->alpha_src_format.width = in->width;
- s->alpha_src_format.height = in->height;
- s->alpha_src_format.depth = desc->comp[0].depth;
- s->alpha_src_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
- s->alpha_src_format.color_family = ZIMG_COLOR_GREY;
-
- s->alpha_dst_format.width = out->width;
- s->alpha_dst_format.height = out->height;
- s->alpha_dst_format.depth = odesc->comp[0].depth;
- s->alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
- s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
-
- zimg_filter_graph_free(s->alpha_graph);
- s->alpha_graph = zimg_filter_graph_build(&s->alpha_src_format, &s->alpha_dst_format, &s->alpha_params);
- if (!s->alpha_graph) {
- ret = print_zimg_error(link->dst);
- goto fail;
- }
- }
}
if (s->colorspace != -1)
- out->colorspace = (int)s->dst_format.matrix_coefficients;
+ out->colorspace = (int)s->ztd[0].dst_format.matrix_coefficients;
if (s->primaries != -1)
- out->color_primaries = (int)s->dst_format.color_primaries;
+ out->color_primaries = (int)s->ztd[0].dst_format.color_primaries;
if (s->range != -1)
- out->color_range = (int)s->dst_format.pixel_range;
+ out->color_range = (int)s->ztd[0].dst_format.pixel_range;
if (s->trc != -1)
- out->color_trc = (int)s->dst_format.transfer_characteristics;
+ out->color_trc = (int)s->ztd[0].dst_format.transfer_characteristics;
+
+ if (s->chromal != -1)
+ out->chroma_location = (int)s->ztd[0].dst_format.chroma_location - 1;
av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
(int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
(int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
INT_MAX);
- for (plane = 0; plane < 3; plane++) {
- int p = desc->comp[plane].plane;
- src_buf.plane[plane].data = in->data[p];
- src_buf.plane[plane].stride = in->linesize[p];
- src_buf.plane[plane].mask = -1;
-
- p = odesc->comp[plane].plane;
- dst_buf.plane[plane].data = out->data[p];
- dst_buf.plane[plane].stride = out->linesize[p];
- dst_buf.plane[plane].mask = -1;
- }
-
- ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
- if (ret) {
- ret = print_zimg_error(link->dst);
- goto fail;
- }
-
- if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
- src_buf.plane[0].data = in->data[3];
- src_buf.plane[0].stride = in->linesize[3];
- src_buf.plane[0].mask = -1;
-
- dst_buf.plane[0].data = out->data[3];
- dst_buf.plane[0].stride = out->linesize[3];
- dst_buf.plane[0].mask = -1;
-
- ret = zimg_filter_graph_process(s->alpha_graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
- if (ret) {
- ret = print_zimg_error(link->dst);
- goto fail;
- }
- } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
- int x, y;
-
- if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
- for (y = 0; y < out->height; y++) {
- for (x = 0; x < out->width; x++) {
- AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
- av_float2int(1.0f));
- }
- }
- } else {
- for (y = 0; y < outlink->h; y++)
- memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
- }
- }
+ td.out = out;
+ td.in = in;
+ td.desc = desc;
+ td.odesc = odesc;
+ ret = ctx->internal->execute(ctx, zscale_slice, &td, NULL, FFMIN3(in->height, out->height, s->nb_threads));
fail:
av_frame_free(&in);
@@ -706,10 +790,12 @@ static void uninit(AVFilterContext *ctx)
{
ZScaleContext *s = ctx->priv;
- zimg_filter_graph_free(s->graph);
- zimg_filter_graph_free(s->alpha_graph);
- av_freep(&s->tmp);
- s->tmp_size = 0;
+ for (int i = 0; i < s->nb_threads; i++) {
+ zimg_filter_graph_free(s->ztd[i].graph);
+ zimg_filter_graph_free(s->ztd[i].alpha_graph);
+ av_freep(&s->ztd[i].tmp);
+ s->ztd[i].tmp_size = 0;
+ }
}
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
@@ -890,4 +976,5 @@ AVFilter ff_vf_zscale = {
.inputs = avfilter_vf_zscale_inputs,
.outputs = avfilter_vf_zscale_outputs,
.process_command = process_command,
+ .flags = AVFILTER_FLAG_SLICE_THREADS,
};
--
2.17.1
More information about the ffmpeg-devel
mailing list