[FFmpeg-cvslog] avfilter/vf_corr: add slice threading support

Paul B Mahol git at videolan.org
Sun Dec 3 04:08:28 EET 2023


ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Sun Dec  3 02:49:50 2023 +0100| [aad3223978526403034ce028bc02c380c7f1e79e] | committer: Paul B Mahol

avfilter/vf_corr: add slice threading support

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=aad3223978526403034ce028bc02c380c7f1e79e
---

 libavfilter/vf_corr.c | 158 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 127 insertions(+), 31 deletions(-)

diff --git a/libavfilter/vf_corr.c b/libavfilter/vf_corr.c
index fb2770539e..e2e794851e 100644
--- a/libavfilter/vf_corr.c
+++ b/libavfilter/vf_corr.c
@@ -29,22 +29,40 @@
 #include "framesync.h"
 #include "internal.h"
 
+typedef struct Sums {
+    uint64_t s[2];
+} Sums;
+
+typedef struct QSums {
+    float s[3];
+} QSums;
+
 typedef struct CorrContext {
     const AVClass *class;
     FFFrameSync fs;
     double score, min_score, max_score, score_comp[4];
     uint64_t nb_frames;
+    int nb_threads;
     int is_rgb;
     uint8_t rgba_map[4];
     int max[4];
     char comps[4];
+    float mean[4][2];
+    Sums *sums;
+    QSums *qsums;
     int nb_components;
     int planewidth[4];
     int planeheight[4];
-    int (*filter_slice)(AVFilterContext *ctx, void *arg,
-                        int jobnr, int nb_jobs);
+    int (*sum_slice)(AVFilterContext *ctx, void *arg,
+                     int jobnr, int nb_jobs);
+    int (*corr_slice)(AVFilterContext *ctx, void *arg,
+                      int jobnr, int nb_jobs);
 } CorrContext;
 
+typedef struct ThreadData {
+    AVFrame *master, *ref;
+} ThreadData;
+
 #define OFFSET(x) offsetof(CorrContext, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
 
@@ -66,27 +84,31 @@ static void set_meta(AVFilterContext *ctx,
     }
 }
 
-#define CORR(type, name)                                     \
-static void f##name(AVFilterContext *ctx, AVFrame *master,   \
-                   AVFrame *ref, double *comp_score)         \
+#define SUM(type, name)                                      \
+static int sum_##name(AVFilterContext *ctx, void *arg,       \
+                      int jobnr, int nb_jobs)                \
 {                                                            \
     CorrContext *s = ctx->priv;                              \
+    ThreadData *td = arg;                                    \
+    AVFrame *master = td->master;                            \
+    AVFrame *ref = td->ref;                                  \
                                                              \
     for (int c = 0; c < s->nb_components; c++) {             \
         const ptrdiff_t linesize1 = master->linesize[c] /    \
                                     sizeof(type);            \
         const ptrdiff_t linesize2 = ref->linesize[c] /       \
                                     sizeof(type);            \
-        const type *src1 = (const type *)master->data[c];    \
-        const type *src2 = (const type *)ref->data[c];       \
         const int h = s->planeheight[c];                     \
         const int w = s->planewidth[c];                      \
-        const float scale = 1.f / s->max[c];                 \
+        const int slice_start = (h * jobnr) / nb_jobs;       \
+        const int slice_end = (h * (jobnr+1)) / nb_jobs;     \
+        const type *src1 = (const type *)master->data[c] +   \
+                            linesize1 * slice_start;         \
+        const type *src2 = (const type *)ref->data[c] +      \
+                            linesize2 * slice_start;         \
         uint64_t sum1 = 0, sum2 = 0;                         \
-        float sum12, sum1q, sum2q;                           \
-        float sumq, mean1, mean2;                            \
                                                              \
-        for (int y = 0; y < h; y++) {                        \
+        for (int y = slice_start; y < slice_end; y++) {      \
             for (int x = 0; x < w; x++) {                    \
                 sum1 += src1[x];                             \
                 sum2 += src2[x];                             \
@@ -96,17 +118,47 @@ static void f##name(AVFilterContext *ctx, AVFrame *master,   \
             src2 += linesize2;                               \
         }                                                    \
                                                              \
-        mean1 = scale * (sum1 /(double)(w * h));             \
-        mean2 = scale * (sum2 /(double)(w * h));             \
+        s->sums[jobnr * s->nb_components + c].s[0] = sum1;   \
+        s->sums[jobnr * s->nb_components + c].s[1] = sum2;   \
+    }                                                        \
                                                              \
-        src1 = (const type *)master->data[c];                \
-        src2 = (const type *)ref->data[c];                   \
+    return 0;                                                \
+}
+
+SUM(uint8_t, slice8)
+SUM(uint16_t, slice16)
+
+#define CORR(type, name)                                     \
+static int corr_##name(AVFilterContext *ctx, void *arg,      \
+                       int jobnr, int nb_jobs)               \
+{                                                            \
+    CorrContext *s = ctx->priv;                              \
+    ThreadData *td = arg;                                    \
+    AVFrame *master = td->master;                            \
+    AVFrame *ref = td->ref;                                  \
+                                                             \
+    for (int c = 0; c < s->nb_components; c++) {             \
+        const ptrdiff_t linesize1 = master->linesize[c] /    \
+                                    sizeof(type);            \
+        const ptrdiff_t linesize2 = ref->linesize[c] /       \
+                                    sizeof(type);            \
+        const type *src1 = (const type *)master->data[c];    \
+        const type *src2 = (const type *)ref->data[c];       \
+        const int h = s->planeheight[c];                     \
+        const int w = s->planewidth[c];                      \
+        const int slice_start = (h * jobnr) / nb_jobs;       \
+        const int slice_end = (h * (jobnr+1)) / nb_jobs;     \
+        const float scale = 1.f / s->max[c];                 \
+        const float mean1 = s->mean[c][0];                   \
+        const float mean2 = s->mean[c][1];                   \
+        float sum12 = 0.f, sum1q = 0.f, sum2q = 0.f;         \
                                                              \
-        sum12 = 0.f;                                         \
-        sum1q = 0.f;                                         \
-        sum2q = 0.f;                                         \
+        src1 = (const type *)master->data[c] +               \
+                     slice_start * linesize1;                \
+        src2 = (const type *)ref->data[c] +                  \
+                     slice_start * linesize2;                \
                                                              \
-        for (int y = 0; y < h; y++) {                        \
+        for (int y = slice_start; y < slice_end; y++) {      \
             for (int x = 0; x < w; x++) {                    \
                 const float f1 = scale * src1[x] - mean1;    \
                 const float f2 = scale * src2[x] - mean2;    \
@@ -120,17 +172,16 @@ static void f##name(AVFilterContext *ctx, AVFrame *master,   \
             src2 += linesize2;                               \
         }                                                    \
                                                              \
-        sumq = sqrtf(sum1q * sum2q);                         \
-        if (sumq > 0.f) {                                    \
-            comp_score[c] = av_clipf(sum12 / sumq,-1.f,1.f); \
-        } else {                                             \
-            comp_score[c] = sum1q == sum2q ? 1.f : 0.f;      \
-        }                                                    \
+        s->qsums[jobnr * s->nb_components + c].s[0] = sum12; \
+        s->qsums[jobnr * s->nb_components + c].s[1] = sum1q; \
+        s->qsums[jobnr * s->nb_components + c].s[2] = sum2q; \
     }                                                        \
+                                                             \
+    return 0;                                                \
 }
 
-CORR(uint8_t, corr8)
-CORR(uint16_t, corr16)
+CORR(uint8_t, slice8)
+CORR(uint16_t, slice16)
 
 static int do_corr(FFFrameSync *fs)
 {
@@ -139,6 +190,7 @@ static int do_corr(FFFrameSync *fs)
     AVFrame *master, *ref;
     double comp_score[4], score = 0.;
     AVDictionary **metadata;
+    ThreadData td;
     int ret;
 
     ret = ff_framesync_dualinput_get(fs, &master, &ref);
@@ -148,10 +200,42 @@ static int do_corr(FFFrameSync *fs)
         return ff_filter_frame(ctx->outputs[0], master);
     metadata = &master->metadata;
 
-    if (s->max[0] > 255) {
-        fcorr16(ctx, master, ref, comp_score);
-    } else {
-        fcorr8(ctx, master, ref, comp_score);
+    td.master = master;
+    td.ref = ref;
+    ff_filter_execute(ctx, s->sum_slice, &td, NULL,
+                      FFMIN(s->planeheight[1], s->nb_threads));
+
+    for (int c = 0; c < s->nb_components; c++) {
+        const double scale = 1.f / s->max[c];
+        uint64_t sum1 = 0, sum2 = 0;
+
+        for (int n = 0; n < s->nb_threads; n++) {
+            sum1 += s->sums[n * s->nb_components + c].s[0];
+            sum2 += s->sums[n * s->nb_components + c].s[1];
+        }
+
+        s->mean[c][0] = scale * (sum1 /(double)(s->planewidth[c] * s->planeheight[c]));
+        s->mean[c][1] = scale * (sum2 /(double)(s->planewidth[c] * s->planeheight[c]));
+    }
+
+    ff_filter_execute(ctx, s->corr_slice, &td, NULL,
+                      FFMIN(s->planeheight[1], s->nb_threads));
+
+    for (int c = 0; c < s->nb_components; c++) {
+        double sumq, sum12 = 0.0, sum1q = 0.0, sum2q = 0.0;
+
+        for (int n = 0; n < s->nb_threads; n++) {
+            sum12 += s->qsums[n * s->nb_components + c].s[0];
+            sum1q += s->qsums[n * s->nb_components + c].s[1];
+            sum2q += s->qsums[n * s->nb_components + c].s[2];
+        }
+
+        sumq = sqrt(sum1q * sum2q);
+        if (sumq > 0.0) {
+            comp_score[c] = av_clipd(sum12 / sumq,-1.0,1.0);
+        } else {
+            comp_score[c] = sum1q == sum2q ? 1.f : 0.f;
+        }
     }
 
     for (int c = 0; c < s->nb_components; c++)
@@ -205,6 +289,7 @@ static int config_input_ref(AVFilterLink *inlink)
     AVFilterContext *ctx  = inlink->dst;
     CorrContext *s = ctx->priv;
 
+    s->nb_threads = ff_filter_get_nb_threads(ctx);
     s->nb_components = desc->nb_components;
     if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
         ctx->inputs[0]->h != ctx->inputs[1]->h) {
@@ -223,6 +308,11 @@ static int config_input_ref(AVFilterLink *inlink)
     s->planewidth[1]  = s->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
     s->planewidth[0]  = s->planewidth[3]  = inlink->w;
 
+    s->sums = av_calloc(s->nb_threads * s->nb_components, sizeof(*s->sums));
+    s->qsums = av_calloc(s->nb_threads * s->nb_components, sizeof(*s->qsums));
+    if (!s->qsums || !s->sums)
+        return AVERROR(ENOMEM);
+
     s->min_score = +INFINITY;
     s->max_score = -INFINITY;
 
@@ -231,6 +321,9 @@ static int config_input_ref(AVFilterLink *inlink)
     s->max[2] = (1 << desc->comp[2].depth) - 1;
     s->max[3] = (1 << desc->comp[3].depth) - 1;
 
+    s->sum_slice = desc->comp[0].depth > 8 ? sum_slice16 : sum_slice8;
+    s->corr_slice = desc->comp[0].depth > 8 ? corr_slice16 : corr_slice8;
+
     return 0;
 }
 
@@ -291,6 +384,8 @@ static av_cold void uninit(AVFilterContext *ctx)
     }
 
     ff_framesync_uninit(&s->fs);
+    av_freep(&s->qsums);
+    av_freep(&s->sums);
 }
 
 static const AVFilterPad corr_inputs[] = {
@@ -332,5 +427,6 @@ const AVFilter ff_vf_corr = {
     FILTER_OUTPUTS(corr_outputs),
     FILTER_PIXFMTS_ARRAY(pix_fmts),
     .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
+                     AVFILTER_FLAG_SLICE_THREADS             |
                      AVFILTER_FLAG_METADATA_ONLY,
 };



More information about the ffmpeg-cvslog mailing list