[FFmpeg-devel] [PATCH 24/24] lavfi/vf_scale: implement slice threading

Anton Khirnov anton at khirnov.net
Mon May 31 10:55:15 EEST 2021


---
 libavfilter/vf_scale.c | 182 +++++++++++++++++++++++++++++++----------
 1 file changed, 141 insertions(+), 41 deletions(-)

diff --git a/libavfilter/vf_scale.c b/libavfilter/vf_scale.c
index cdd7c4da0d..87317393bd 100644
--- a/libavfilter/vf_scale.c
+++ b/libavfilter/vf_scale.c
@@ -106,8 +106,16 @@ enum EvalMode {
 
 typedef struct ScaleContext {
     const AVClass *class;
-    struct SwsContext *sws;     ///< software scaler context
-    struct SwsContext *isws[2]; ///< software scaler context for interlaced material
+
+    /**
+     * Scaler contexts.
+     * [0]   - progressive
+     * [1/2] - top/bottom fields
+     */
+    struct SwsContext *(*scalers)[3];
+    unsigned int      nb_scalers;
+    int                  *scaler_res;
+
     AVDictionary *opts;
 
     /**
@@ -122,6 +130,7 @@ typedef struct ScaleContext {
     double param[2];            // sws params
 
     int hsub, vsub;             ///< chroma subsampling
+    int ohsub, ovsub;           ///< output chroma subsampling
     int slice_y;                ///< top of current output slice
     int input_is_pal;           ///< set to 1 if the input format is paletted
     int output_is_pal;          ///< set to 1 if the output format is paletted
@@ -153,6 +162,7 @@ typedef struct ScaleContext {
 
     int eval_mode;              ///< expression evaluation mode
 
+    int passthrough;
 } ScaleContext;
 
 const AVFilter ff_vf_scale2ref;
@@ -330,13 +340,11 @@ static av_cold int init_dict(AVFilterContext *ctx, AVDictionary **opts)
 
 static void scaler_free(ScaleContext *s)
 {
-    sws_freeContext(s->sws);
-    sws_freeContext(s->isws[0]);
-    sws_freeContext(s->isws[1]);
+    for (int i = 0; i < s->nb_scalers; i++)
+        for (int j = 0; j < 3; j++)
+            sws_freeContext(s->scalers[i][j]);
 
-    s->sws     = NULL;
-    s->isws[0] = NULL;
-    s->isws[1] = NULL;
+    av_freep(&s->scalers);
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
@@ -346,6 +354,7 @@ static av_cold void uninit(AVFilterContext *ctx)
     av_expr_free(scale->h_pexpr);
     scale->w_pexpr = scale->h_pexpr = NULL;
     scaler_free(scale);
+    av_freep(&scale->scaler_res);
     av_dict_free(&scale->opts);
 }
 
@@ -522,19 +531,28 @@ static int config_props(AVFilterLink *outlink)
 
     scaler_free(scale);
 
-    if (inlink0->w == outlink->w &&
-        inlink0->h == outlink->h &&
-        !scale->out_color_matrix &&
-        scale->in_range == scale->out_range &&
-        inlink0->format == outlink->format)
-        ;
-    else {
-        struct SwsContext **swscs[3] = {&scale->sws, &scale->isws[0], &scale->isws[1]};
-        int i;
-
-        for (i = 0; i < 3; i++) {
+    scale->passthrough = inlink0->w == outlink->w &&
+                         inlink0->h == outlink->h &&
+                         !scale->out_color_matrix &&
+                         scale->in_range == scale->out_range &&
+                         inlink0->format == outlink->format;
+
+    if (!scale->passthrough) {
+        int nb_scalers = ff_filter_get_nb_threads(ctx);
+
+        scale->scalers = av_mallocz_array(nb_scalers, 3 * sizeof(struct SwsContext*));
+        if (!scale->scalers)
+            return AVERROR(ENOMEM);
+
+        ret = av_reallocp_array(&scale->scaler_res, nb_scalers, sizeof(*scale->scaler_res));
+        if (ret < 0)
+            return ret;
+
+        for (int i = 0; i < 3; i++) {
+        for (int t = 0; t < nb_scalers; t++) {
             int in_v_chr_pos = scale->in_v_chr_pos, out_v_chr_pos = scale->out_v_chr_pos;
-            struct SwsContext **s = swscs[i];
+            struct SwsContext **s = &scale->scalers[t][i];
+
             *s = sws_alloc_context();
             if (!*s)
                 return AVERROR(ENOMEM);
@@ -580,9 +598,29 @@ static int config_props(AVFilterLink *outlink)
 
             if ((ret = sws_init_context(*s, NULL, NULL)) < 0)
                 return ret;
+
+            /* do not multithread error-diffusion dithering */
+            if (i == 0 && t == 0) {
+                const AVOption *opt;
+                int64_t dither;
+
+                av_opt_get_int(*s, "sws_dither", 0, &dither);
+                opt = av_opt_find2(*s, "ed", "sws_dither", 0, 0, NULL);
+                if (!opt)
+                    return AVERROR_BUG;
+
+                if (dither == opt->default_val.i64) {
+                    av_log(ctx, AV_LOG_WARNING, "Error-diffusion dithering is "
+                           "used, conversion will be single-threaded.\n");
+                    nb_scalers = 1;
+                }
+            }
+            }
+
             if (!scale->interlaced)
                 break;
         }
+        scale->nb_scalers = nb_scalers;
     }
 
     if (inlink0->sample_aspect_ratio.num){
@@ -625,7 +663,8 @@ static int request_frame_ref(AVFilterLink *outlink)
     return ff_request_frame(outlink->src->inputs[1]);
 }
 
-static int scale_slice(ScaleContext *scale, AVFrame *out_buf, AVFrame *cur_pic, struct SwsContext *sws, int y, int h, int mul, int field)
+static int scale_slice(ScaleContext *scale, AVFrame *out_buf, AVFrame *cur_pic, struct SwsContext *sws,
+                       int y, int h, int mul, int field, int dst)
 {
     const uint8_t *in[4];
     uint8_t *out[4];
@@ -633,9 +672,10 @@ static int scale_slice(ScaleContext *scale, AVFrame *out_buf, AVFrame *cur_pic,
     int i;
 
     for (i=0; i<4; i++) {
-        int vsub= ((i+1)&2) ? scale->vsub : 0;
-        ptrdiff_t  in_offset = ((y>>vsub)+field) * cur_pic->linesize[i];
-        ptrdiff_t out_offset =            field  * out_buf->linesize[i];
+        int vsub  = ((i+1)&2) ? scale->vsub  : 0;
+        int ovsub = ((i+1)&2) ? scale->ovsub : 0;
+        ptrdiff_t  in_offset = (((y * !dst) >> vsub)  + field) * cur_pic->linesize[i];
+        ptrdiff_t out_offset = (((y *  dst) >> ovsub) + field) * out_buf->linesize[i];
          in_stride[i] = cur_pic->linesize[i] * mul;
         out_stride[i] = out_buf->linesize[i] * mul;
          in[i] = FF_PTR_ADD(cur_pic->data[i],  in_offset);
@@ -646,17 +686,57 @@ static int scale_slice(ScaleContext *scale, AVFrame *out_buf, AVFrame *cur_pic,
     if (scale->output_is_pal)
         out[1] = out_buf->data[1];
 
+    if (dst)
+        return sws_scale_dst_slice(sws, in, in_stride,
+                                   out, out_stride, y / mul, h);
+
     return sws_scale(sws, in, in_stride, y/mul, h,
                          out,out_stride);
 }
 
+typedef struct ScaleThreadData {
+    AVFrame *frame_in;
+    AVFrame *frame_out;
+    int      scaler_idx;
+} ScaleThreadData;
+
+static int scaler_res(ScaleContext *scale)
+{
+    for (int i = 0; i < scale->nb_scalers; i++)
+        if (scale->scaler_res[i] < 0)
+            return scale->scaler_res[i];
+    return 0;
+}
+
+static int scale_job(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ScaleContext *scale = ctx->priv;
+    ScaleThreadData *td = arg;
+    int     stride_mul  = 1 << (td->scaler_idx > 0);
+    int     first_field = td->scaler_idx == 1;
+    int  picture_height = (td->frame_out->height + first_field) / stride_mul;
+    int    slice_height = FFALIGN(FFMAX((picture_height + nb_jobs - 1) / nb_jobs, 1),
+                                  1 << scale->ovsub);
+    int     slice_start = jobnr * slice_height;
+    int     slice_end   = FFMIN((jobnr + 1) * slice_height, picture_height);
+
+    if (slice_start < slice_end) {
+        scale_slice(scale, td->frame_out, td->frame_in,
+                    scale->scalers[jobnr][td->scaler_idx], slice_start,
+                    slice_end - slice_start, stride_mul, td->scaler_idx == 2, 1);
+    }
+
+    return 0;
+}
+
 static int scale_frame(AVFilterLink *link, AVFrame *in, AVFrame **frame_out)
 {
     AVFilterContext *ctx = link->dst;
     ScaleContext *scale = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *out;
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
+    const AVPixFmtDescriptor *desc  = av_pix_fmt_desc_get(link->format);
+    const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
     char buf[32];
     int ret;
     int in_range;
@@ -723,13 +803,15 @@ static int scale_frame(AVFilterLink *link, AVFrame *in, AVFrame **frame_out)
     }
 
 scale:
-    if (!scale->sws) {
+    if (!scale->nb_scalers) {
         *frame_out = in;
         return 0;
     }
 
     scale->hsub = desc->log2_chroma_w;
     scale->vsub = desc->log2_chroma_h;
+    scale->ohsub = odesc->log2_chroma_w;
+    scale->ovsub = odesc->log2_chroma_h;
 
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     if (!out) {
@@ -755,7 +837,7 @@ scale:
         int in_full, out_full, brightness, contrast, saturation;
         const int *inv_table, *table;
 
-        sws_getColorspaceDetails(scale->sws, (int **)&inv_table, &in_full,
+        sws_getColorspaceDetails(scale->scalers[0][0], (int **)&inv_table, &in_full,
                                  (int **)&table, &out_full,
                                  &brightness, &contrast, &saturation);
 
@@ -773,17 +855,14 @@ scale:
         if (scale->out_range != AVCOL_RANGE_UNSPECIFIED)
             out_full = (scale->out_range == AVCOL_RANGE_JPEG);
 
-        sws_setColorspaceDetails(scale->sws, inv_table, in_full,
+        for (int i = 0; i < 3; i++)
+            for (int j = 0; j < scale->nb_scalers; j++) {
+                if (!scale->scalers[j][i])
+                    continue;
+        sws_setColorspaceDetails(scale->scalers[j][i], inv_table, in_full,
                                  table, out_full,
                                  brightness, contrast, saturation);
-        if (scale->isws[0])
-            sws_setColorspaceDetails(scale->isws[0], inv_table, in_full,
-                                     table, out_full,
-                                     brightness, contrast, saturation);
-        if (scale->isws[1])
-            sws_setColorspaceDetails(scale->isws[1], inv_table, in_full,
-                                     table, out_full,
-                                     brightness, contrast, saturation);
+            }
 
         out->color_range = out_full ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
     }
@@ -793,10 +872,22 @@ scale:
               (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
               INT_MAX);
 
+    memset(scale->scaler_res, 0, scale->nb_scalers * sizeof(*scale->scaler_res));
+
     if (scale->interlaced>0 || (scale->interlaced<0 && in->interlaced_frame)) {
-        ret = scale_slice(scale, out, in, scale->isws[0], 0, (link->h+1)/2, 2, 0);
-        if (ret >= 0)
-            ret = scale_slice(scale, out, in, scale->isws[1], 0,  link->h   /2, 2, 1);
+        ScaleThreadData td = {
+            .scaler_idx = 1,
+            .frame_in   = in,
+            .frame_out  = out,
+        };
+
+        ctx->internal->execute(ctx, scale_job, &td, scale->scaler_res, scale->nb_scalers);
+
+        if (scaler_res(scale) >= 0) {
+            td.scaler_idx = 2;
+            memset(scale->scaler_res, 0, scale->nb_scalers * sizeof(*scale->scaler_res));
+            ctx->internal->execute(ctx, scale_job, &td, scale->scaler_res, scale->nb_scalers);
+        }
     } else if (scale->nb_slices) {
         int i, slice_h, slice_start, slice_end = 0;
         const int nb_slices = FFMIN(scale->nb_slices, link->h);
@@ -804,14 +895,22 @@ scale:
             slice_start = slice_end;
             slice_end   = (link->h * (i+1)) / nb_slices;
             slice_h     = slice_end - slice_start;
-            ret = scale_slice(scale, out, in, scale->sws, slice_start, slice_h, 1, 0);
+            ret = scale_slice(scale, out, in, scale->scalers[0][0], slice_start, slice_h, 1, 0, 0);
             if (ret < 0)
                 break;
         }
     } else {
-        ret = scale_slice(scale, out, in, scale->sws, 0, link->h, 1, 0);
+        ScaleThreadData td = {
+            .scaler_idx = 0,
+            .frame_in   = in,
+            .frame_out  = out,
+        };
+
+        ctx->internal->execute(ctx, scale_job, &td, scale->scaler_res, scale->nb_scalers);
     }
 
+    ret = scaler_res(scale);
+
     av_frame_free(&in);
     if (ret < 0)
         av_frame_free(frame_out);
@@ -984,6 +1083,7 @@ const AVFilter ff_vf_scale = {
     .inputs          = avfilter_vf_scale_inputs,
     .outputs         = avfilter_vf_scale_outputs,
     .process_command = process_command,
+    .flags           = AVFILTER_FLAG_SLICE_THREADS,
 };
 
 static const AVClass scale2ref_class = {
-- 
2.30.2



More information about the ffmpeg-devel mailing list