[FFmpeg-cvslog] avfilter/vf_morpho: add slice threading support

Paul B Mahol git at videolan.org
Mon May 8 18:52:45 EEST 2023


ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Mon May  8 17:17:19 2023 +0200| [1eed7f65624cd590a4acd944f9bbeb9f03634a3f] | committer: Paul B Mahol

avfilter/vf_morpho: add slice threading support

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1eed7f65624cd590a4acd944f9bbeb9f03634a3f
---

 libavfilter/vf_morpho.c | 257 +++++++++++++++++++++++++++++-------------------
 1 file changed, 158 insertions(+), 99 deletions(-)

diff --git a/libavfilter/vf_morpho.c b/libavfilter/vf_morpho.c
index 55886d2b8a..a218499d61 100644
--- a/libavfilter/vf_morpho.c
+++ b/libavfilter/vf_morpho.c
@@ -95,6 +95,8 @@ typedef struct chord_set {
     unsigned nb_elements;
 } chord_set;
 
+#define MAX_THREADS 64
+
 typedef struct MorphoContext {
     const AVClass *class;
     FFFrameSync fs;
@@ -102,7 +104,7 @@ typedef struct MorphoContext {
     chord_set SE[4];
     IPlane SEimg[4];
     IPlane g[4], f[4], h[4];
-    LUT Ty[2][4];
+    LUT Ty[MAX_THREADS][2][4];
 
     int mode;
     int planes;
@@ -460,14 +462,14 @@ static void line_erode(IPlane *g, LUT *Ty, chord_set *SE, int y, int tid)
     }
 }
 
-static int dilate(IPlane *g, IPlane *f, chord_set *SE, LUT *Ty)
+static int dilate(IPlane *g, IPlane *f, chord_set *SE, LUT *Ty, int y0, int y1)
 {
-    int ret = compute_max_lut(Ty, f, SE, 0, 1);
+    int ret = compute_max_lut(Ty, f, SE, y0, 1);
     if (ret < 0)
         return ret;
 
-    line_dilate(g, Ty, SE, 0, 0);
-    for (int y = 1; y < f->h; y++) {
+    line_dilate(g, Ty, SE, y0, 0);
+    for (int y = y0 + 1; y < y1; y++) {
         update_max_lut(f, Ty, SE, y, 0, 1);
         line_dilate(g, Ty, SE, y, 0);
     }
@@ -475,14 +477,14 @@ static int dilate(IPlane *g, IPlane *f, chord_set *SE, LUT *Ty)
     return 0;
 }
 
-static int erode(IPlane *g, IPlane *f, chord_set *SE, LUT *Ty)
+static int erode(IPlane *g, IPlane *f, chord_set *SE, LUT *Ty, int y0, int y1)
 {
-    int ret = compute_min_lut(Ty, f, SE, 0, 1);
+    int ret = compute_min_lut(Ty, f, SE, y0, 1);
     if (ret < 0)
         return ret;
 
-    line_erode(g, Ty, SE, 0, 0);
-    for (int y = 1; y < f->h; y++) {
+    line_erode(g, Ty, SE, y0, 0);
+    for (int y = y0 + 1; y < y1; y++) {
         update_min_lut(f, Ty, SE, y, 0, 1);
         line_erode(g, Ty, SE, y, 0);
     }
@@ -490,15 +492,15 @@ static int erode(IPlane *g, IPlane *f, chord_set *SE, LUT *Ty)
     return 0;
 }
 
-static void difference(IPlane *g, IPlane *f)
+static void difference(IPlane *g, IPlane *f, int y0, int y1)
 {
-    for (int y = 0; y < f->h; y++)
+    for (int y = y0; y < y1; y++)
         f->diff_in_place(g->img[y], f->img[y], f->w);
 }
 
-static void difference2(IPlane *g, IPlane *f)
+static void difference2(IPlane *g, IPlane *f, int y0, int y1)
 {
-    for (int y = 0; y < f->h; y++)
+    for (int y = y0; y < y1; y++)
         f->diff_rin_place(g->img[y], f->img[y], f->w);
 }
 
@@ -785,12 +787,133 @@ static int activate(AVFilterContext *ctx)
     return ff_framesync_activate(&s->fs);
 }
 
+typedef struct ThreadData {
+    AVFrame *in, *out;
+} ThreadData;
+
+static int morpho_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    MorphoContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *out = td->out;
+    AVFrame *in = td->in;
+    int ret;
+
+    for (int p = 0; p < s->nb_planes; p++) {
+        const int width = s->planewidth[p];
+        const int height = s->planeheight[p];
+        const int y0 = (height *  jobnr   ) / nb_jobs;
+        const int y1 = (height * (jobnr+1)) / nb_jobs;
+        const int depth = s->depth;
+
+        if (ctx->is_disabled || !(s->planes & (1 << p))) {
+copy:
+            av_image_copy_plane(out->data[p] + y0 * out->linesize[p],
+                out->linesize[p],
+                in->data[p] + y0 * in->linesize[p],
+                in->linesize[p],
+                width * ((depth + 7) / 8),
+                y1 - y0);
+            continue;
+        }
+
+        if (s->SE[p].minX == INT16_MAX ||
+            s->SE[p].minY == INT16_MAX ||
+            s->SE[p].maxX == INT16_MIN ||
+            s->SE[p].maxY == INT16_MIN)
+            goto copy;
+
+        switch (s->mode) {
+        case ERODE:
+            ret = erode(&s->g[p], &s->f[p], &s->SE[p], &s->Ty[jobnr][0][p], y0, y1);
+            break;
+        case DILATE:
+        case GRADIENT:
+            ret = dilate(&s->g[p], &s->f[p], &s->SE[p], &s->Ty[jobnr][0][p], y0, y1);
+            break;
+        case OPEN:
+        case TOPHAT:
+            ret = erode(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[jobnr][0][p], y0, y1);
+            break;
+        case CLOSE:
+        case BLACKHAT:
+            ret = dilate(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[jobnr][0][p], y0, y1);
+            break;
+        default:
+            av_assert0(0);
+        }
+
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static int morpho_sliceX(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    MorphoContext *s = ctx->priv;
+    int ret;
+
+    for (int p = 0; p < s->nb_planes; p++) {
+        const int height = s->planeheight[p];
+        const int y0 = (height *  jobnr   ) / nb_jobs;
+        const int y1 = (height * (jobnr+1)) / nb_jobs;
+
+        if (ctx->is_disabled || !(s->planes & (1 << p))) {
+copy:
+            continue;
+        }
+
+        if (s->SE[p].minX == INT16_MAX ||
+            s->SE[p].minY == INT16_MAX ||
+            s->SE[p].maxX == INT16_MIN ||
+            s->SE[p].maxY == INT16_MIN)
+            goto copy;
+
+        switch (s->mode) {
+        case OPEN:
+            ret = dilate(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[jobnr][1][p], y0, y1);
+            break;
+        case CLOSE:
+            ret = erode(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[jobnr][1][p], y0, y1);
+            break;
+        case GRADIENT:
+            ret = erode(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[jobnr][1][p], y0, y1);
+            if (ret < 0)
+                break;
+            difference(&s->g[p], &s->h[p], y0, y1);
+            break;
+        case TOPHAT:
+            ret = dilate(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[jobnr][1][p], y0, y1);
+            if (ret < 0)
+                break;
+            difference2(&s->g[p], &s->f[p], y0, y1);
+            break;
+        case BLACKHAT:
+            ret = erode(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[jobnr][1][p], y0, y1);
+            if (ret < 0)
+                break;
+            difference(&s->g[p], &s->f[p], y0, y1);
+            break;
+        default:
+            av_assert0(0);
+        }
+
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
 static int do_morpho(FFFrameSync *fs)
 {
     AVFilterContext *ctx = fs->parent;
     AVFilterLink *outlink = ctx->outputs[0];
     MorphoContext *s = ctx->priv;
     AVFrame *in = NULL, *structurepic = NULL;
+    ThreadData td;
     AVFrame *out;
     int ret;
 
@@ -812,6 +935,12 @@ static int do_morpho(FFFrameSync *fs)
         const int ssrc_linesize = structurepic->linesize[p];
         const int swidth = s->splanewidth[p];
         const int sheight = s->splaneheight[p];
+        const uint8_t *src = in->data[p];
+        int src_linesize = in->linesize[p];
+        uint8_t *dst = out->data[p];
+        int dst_linesize = out->linesize[p];
+        const int width = s->planewidth[p];
+        const int height = s->planeheight[p];
         const int depth = s->depth;
         int type_size = s->type_size;
 
@@ -826,34 +955,6 @@ static int do_morpho(FFFrameSync *fs)
                 goto fail;
             s->got_structure[p] = 1;
         }
-    }
-
-    for (int p = 0; p < s->nb_planes; p++) {
-        const uint8_t *src = in->data[p];
-        int src_linesize = in->linesize[p];
-        uint8_t *dst = out->data[p];
-        int dst_linesize = out->linesize[p];
-        const int width = s->planewidth[p];
-        const int height = s->planeheight[p];
-        const int depth = s->depth;
-        int type_size = s->type_size;
-
-        if (ctx->is_disabled || !(s->planes & (1 << p))) {
-copy:
-            av_image_copy_plane(out->data[p] + 0 * out->linesize[p],
-                out->linesize[p],
-                in->data[p] + 0 * in->linesize[p],
-                in->linesize[p],
-                width * ((depth + 7) / 8),
-                height);
-            continue;
-        }
-
-        if (s->SE[p].minX == INT16_MAX ||
-            s->SE[p].minY == INT16_MAX ||
-            s->SE[p].maxX == INT16_MIN ||
-            s->SE[p].maxY == INT16_MIN)
-            goto copy;
 
         ret = read_iplane(&s->f[p], src, src_linesize, width, height, 1, type_size, depth);
         if (ret < 0)
@@ -864,74 +965,29 @@ copy:
             goto fail;
 
         switch (s->mode) {
-        case ERODE:
-            ret = erode(&s->g[p], &s->f[p], &s->SE[p], &s->Ty[0][p]);
-            break;
-        case DILATE:
-            ret = dilate(&s->g[p], &s->f[p], &s->SE[p], &s->Ty[0][p]);
-            break;
         case OPEN:
-            ret = read_iplane(&s->h[p], s->temp->data[p], s->temp->linesize[p], width, height, 1, type_size, depth);
-            if (ret < 0)
-                break;
-            ret = erode(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[0][p]);
-            if (ret < 0)
-                break;
-            ret = dilate(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[1][p]);
-            break;
         case CLOSE:
-            ret = read_iplane(&s->h[p], s->temp->data[p], s->temp->linesize[p], width, height, 1, type_size, depth);
-            if (ret < 0)
-                break;
-            ret = dilate(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[0][p]);
-            if (ret < 0)
-                break;
-            ret = erode(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[1][p]);
-            break;
         case GRADIENT:
-            ret = read_iplane(&s->h[p], s->temp->data[p], s->temp->linesize[p], width, height, 1, type_size, depth);
-            if (ret < 0)
-                break;
-            ret = dilate(&s->g[p], &s->f[p], &s->SE[p], &s->Ty[0][p]);
-            if (ret < 0)
-                break;
-            ret = erode(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[1][p]);
-            if (ret < 0)
-                break;
-            difference(&s->g[p], &s->h[p]);
-            break;
         case TOPHAT:
-            ret = read_iplane(&s->h[p], s->temp->data[p], s->temp->linesize[p], width, height, 1, type_size, depth);
-            if (ret < 0)
-                break;
-            ret = erode(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[0][p]);
-            if (ret < 0)
-                break;
-            ret = dilate(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[1][p]);
-            if (ret < 0)
-                break;
-            difference2(&s->g[p], &s->f[p]);
-            break;
         case BLACKHAT:
             ret = read_iplane(&s->h[p], s->temp->data[p], s->temp->linesize[p], width, height, 1, type_size, depth);
-            if (ret < 0)
-                break;
-            ret = dilate(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[0][p]);
-            if (ret < 0)
-                break;
-            ret = erode(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[1][p]);
-            if (ret < 0)
-                break;
-            difference(&s->g[p], &s->f[p]);
             break;
-        default:
-            av_assert0(0);
         }
 
         if (ret < 0)
             goto fail;
     }
 
+    td.in = in; td.out = out;
+    ret = ff_filter_execute(ctx, morpho_slice, &td, NULL,
+                            FFMIN3(s->planeheight[1], s->planeheight[2],
+                                   FFMIN(MAX_THREADS, ff_filter_get_nb_threads(ctx))));
+    if (ret == 0 && (s->mode != ERODE && s->mode != DILATE)) {
+        ff_filter_execute(ctx, morpho_sliceX, NULL, NULL,
+                          FFMIN3(s->planeheight[1], s->planeheight[2],
+                                 FFMIN(MAX_THREADS, ff_filter_get_nb_threads(ctx))));
+    }
+
     av_frame_free(&in);
     out->pts = av_rescale_q(s->fs.pts, s->fs.time_base, outlink->time_base);
     return ff_filter_frame(outlink, out);
@@ -984,8 +1040,10 @@ static av_cold void uninit(AVFilterContext *ctx)
         free_iplane(&s->g[p]);
         free_iplane(&s->h[p]);
         free_chord_set(&s->SE[p]);
-        free_lut(&s->Ty[0][p]);
-        free_lut(&s->Ty[1][p]);
+        for (int n = 0; n < MAX_THREADS; n++) {
+            free_lut(&s->Ty[n][0][p]);
+            free_lut(&s->Ty[n][1][p]);
+        }
     }
 
     ff_framesync_uninit(&s->fs);
@@ -1027,6 +1085,7 @@ const AVFilter ff_vf_morpho = {
     FILTER_INPUTS(morpho_inputs),
     FILTER_OUTPUTS(morpho_outputs),
     FILTER_PIXFMTS_ARRAY(pix_fmts),
-    .flags           = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+    .flags           = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
+                       AVFILTER_FLAG_SLICE_THREADS,
     .process_command = ff_filter_process_command,
 };



More information about the ffmpeg-cvslog mailing list