[FFmpeg-devel] [PATCH v5 2/3] vf_find_rect.c: use the optimized sad function to improve the find performance

Wed Jun 12 13:57:30 EEST 2019

From: Limin Wang <lance.lmwang at gmail.com>

benchmark on x86_64: 6.4 -> 16 with below command:
./ffmpeg  -i 1920x1080.mp4 -vf find_rect=./find.tif,cover_rect=./cover.jpg:mode=cover -f null -
6.4 fps -> 16fps

Signed-off-by: Limin Wang <lance.lmwang at gmail.com>
---
 libavfilter/vf_find_rect.c | 53 +++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 32 deletions(-)

diff --git a/libavfilter/vf_find_rect.c b/libavfilter/vf_find_rect.c
index ee6c3f4b45..ed15885bc2 100644
--- a/libavfilter/vf_find_rect.c
+++ b/libavfilter/vf_find_rect.c
@@ -26,6 +26,7 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
 #include "internal.h"
+#include "scene_sad.h"
 
 #include "lavfutils.h"
 #include "lswsutils.h"
@@ -36,6 +37,8 @@ typedef struct FOCContext {
     AVClass *class;
     float threshold;
     int mipmaps;
+    ff_scene_sad_fn sad;
+    int bitdepth;
     int xmin, ymin, xmax, ymax;
     char *obj_filename;
     int last_x, last_y;
@@ -103,54 +106,40 @@ static AVFrame *downscale(AVFrame *in)
     return frame;
 }
 
-static float compare(const AVFrame *haystack, const AVFrame *obj, int offx, int offy)
+static float compare_sad(FOCContext *foc, AVFrame *haystack, AVFrame *obj, int offx, int offy)
 {
-    int x,y;
-    int o_sum_v = 0;
-    int h_sum_v = 0;
-    int64_t oo_sum_v = 0;
-    int64_t hh_sum_v = 0;
-    int64_t oh_sum_v = 0;
-    float c;
+    uint64_t sad = 0;
     int n = obj->height * obj->width;
-    const uint8_t *odat = obj     ->data[0];
+    double mafd;
+    const uint8_t *odat = obj->data[0];
     const uint8_t *hdat = haystack->data[0] + offx + offy * haystack->linesize[0];
-    int64_t o_sigma, h_sigma;
-
-    for(y = 0; y < obj->height; y++) {
-        for(x = 0; x < obj->width; x++) {
-            int o_v = odat[x];
-            int h_v = hdat[x];
-            o_sum_v += o_v;
-            h_sum_v += h_v;
-            oo_sum_v += o_v * o_v;
-            hh_sum_v += h_v * h_v;
-            oh_sum_v += o_v * h_v;
-        }
-        odat += obj->linesize[0];
-        hdat += haystack->linesize[0];
-    }
-    o_sigma = n*oo_sum_v - o_sum_v*(int64_t)o_sum_v;
-    h_sigma = n*hh_sum_v - h_sum_v*(int64_t)h_sum_v;
 
-    if (o_sigma == 0 || h_sigma == 0)
-        return 1.0;
+    foc->sad(hdat, haystack->linesize[0], odat, obj->linesize[0],
+            obj->width, obj->height, &sad);
+    emms_c();
+    mafd = (double)sad / n / (1ULL << foc->bitdepth);
 
-    c = (n*oh_sum_v - o_sum_v*(int64_t)h_sum_v) / (sqrt(o_sigma)*sqrt(h_sigma));
-
-    return 1 - fabs(c);
+    return mafd;
 }
 
 static int config_input(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->dst;
     FOCContext *foc = ctx->priv;
+    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
 
     if (foc->xmax <= 0)
         foc->xmax = inlink->w - foc->obj_frame->width;
     if (foc->ymax <= 0)
         foc->ymax = inlink->h - foc->obj_frame->height;
 
+    foc->bitdepth = pix_desc->comp[0].depth;
+
+    foc->sad = ff_scene_sad_get_fn(foc->bitdepth == 8 ? 8 : 16);
+    if (!foc->sad)
+        return AVERROR(EINVAL);
+
+
     return 0;
 }
 
@@ -169,7 +158,7 @@ static float search(FOCContext *foc, int pass, int maxpass, int xmin, int xmax,
 
     for (y = ymin; y <= ymax; y++) {
         for (x = xmin; x <= xmax; x++) {
-            float score = compare(foc->haystack_frame[pass], foc->needle_frame[pass], x, y);
+            float score = compare_sad(foc, foc->haystack_frame[pass], foc->needle_frame[pass], x, y);
             av_assert0(score != 0);
             if (score < best_score) {
                 best_score = score;
-- 
2.21.0