[FFmpeg-devel] [PATCH] avfilter: add motion filter

Thu Jul 13 10:44:50 EEST 2017

Hi, this is motion filter, one of the component filters of VMAF.
It takes two videos as input.
Run it using: ffmpeg -i main -i ref -lavfi motion -f null -
Currently it outputs the average motion score over all frames.

---
 Changelog                 |   1 +
 doc/filters.texi          |  19 +++
 libavfilter/Makefile      |   2 +
 libavfilter/allfilters.c  |   1 +
 libavfilter/convolution.c | 144 +++++++++++++++++++++
 libavfilter/convolution.h |  29 +++++
 libavfilter/motion.h      |  29 +++++
 libavfilter/vf_motion.c   | 316 ++++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 541 insertions(+)
 create mode 100644 libavfilter/convolution.c
 create mode 100644 libavfilter/convolution.h
 create mode 100644 libavfilter/motion.h
 create mode 100644 libavfilter/vf_motion.c

diff --git a/Changelog b/Changelog
index 1778980..69657f4 100644
--- a/Changelog
+++ b/Changelog
@@ -10,6 +10,7 @@ version <next>:
 - config.log and other configuration files moved into ffbuild/ directory
 - update cuvid/nvenc headers to Video Codec SDK 8.0.14
 - afir audio filter
+- motion video filter
 
 version 3.3:
 - CrystalHD decoder moved to new decode API
diff --git a/doc/filters.texi b/doc/filters.texi
index 5985db6..011086b 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -9900,6 +9900,25 @@ Default method is @samp{fdiff}.
 Scene change detection threshold. Default is @code{5.0}.
 @end table
 
+ at section motion
+
+Obtain the average motion score between two input videos.
+
+This filter takes two input videos.
+
+Both input videos must have the same resolution and pixel format for
+this filter to work correctly. Also it assumes that both inputs
+have the same number of frames, which are compared one by one.
+
+The obtained average motion score is printed through the logging system.
+
+In the below example the input file @file{main.mpg} being processed is compared
+with the reference file @file{ref.mpg}.
+
+ at example
+ffmpeg -i main.mpg -i ref.mpg -lavfi motion -f null -
+ at end example
+
 @section mpdecimate
 
 Drop frames that do not differ greatly from the previous frame in
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index f7dfe8a..446e367 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -13,6 +13,7 @@ OBJS = allfilters.o                                                     \
        avfiltergraph.o                                                  \
        buffersink.o                                                     \
        buffersrc.o                                                      \
+       convolution.o                                                    \
        drawutils.o                                                      \
        fifo.o                                                           \
        formats.o                                                        \
@@ -227,6 +228,7 @@ OBJS-$(CONFIG_MESTIMATE_FILTER)              += vf_mestimate.o motion_estimation
 OBJS-$(CONFIG_METADATA_FILTER)               += f_metadata.o
 OBJS-$(CONFIG_MIDEQUALIZER_FILTER)           += vf_midequalizer.o framesync.o
 OBJS-$(CONFIG_MINTERPOLATE_FILTER)           += vf_minterpolate.o motion_estimation.o
+OBJS-$(CONFIG_MOTION_FILTER)                 += vf_motion.o dualinput.o framesync.o
 OBJS-$(CONFIG_MPDECIMATE_FILTER)             += vf_mpdecimate.o
 OBJS-$(CONFIG_NEGATE_FILTER)                 += vf_lut.o
 OBJS-$(CONFIG_NLMEANS_FILTER)                += vf_nlmeans.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index cd35ae4..7381799 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -239,6 +239,7 @@ static void register_all(void)
     REGISTER_FILTER(METADATA,       metadata,       vf);
     REGISTER_FILTER(MIDEQUALIZER,   midequalizer,   vf);
     REGISTER_FILTER(MINTERPOLATE,   minterpolate,   vf);
+    REGISTER_FILTER(MOTION,         motion,         vf);
     REGISTER_FILTER(MPDECIMATE,     mpdecimate,     vf);
     REGISTER_FILTER(NEGATE,         negate,         vf);
     REGISTER_FILTER(NLMEANS,        nlmeans,        vf);
diff --git a/libavfilter/convolution.c b/libavfilter/convolution.c
new file mode 100644
index 0000000..5c4520d
--- /dev/null
+++ b/libavfilter/convolution.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2017 Ronald S. Bultje <rsbultje at gmail.com>
+ * Copyright (c) 2017 Ashish Pratap Singh <ashk43712 at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdbool.h>
+#include "convolution.h"
+
+#define FORCE_INLINE __attribute__((always_inline))
+#define RESTRICT __restrict
+
+static inline int floorn(int n, int m)
+{
+    return n - n % m;
+}
+
+static inline int ceiln(int n, int m)
+{
+    return n % m ? n + (m - n % m) : n;
+}
+
+FORCE_INLINE inline float convolution_edge(bool horizontal, const float *filter,
+                                           int filt_width, const float *src,
+                                           int w, int h, int stride, int i,
+                                           int j)
+{
+    int radius = filt_width / 2;
+
+    float accum = 0;
+    for (int k = 0; k < filt_width; ++k) {
+        int i_tap = horizontal ? i : i - radius + k;
+        int j_tap = horizontal ? j - radius + k : j;
+
+        if (horizontal) {
+            if (j_tap < 0)
+                j_tap = -j_tap;
+            else if (j_tap >= w)
+                j_tap = w - (j_tap - w + 1);
+        } else {
+            if (i_tap < 0)
+                i_tap = -i_tap;
+            else if (i_tap >= h)
+                i_tap = h - (i_tap - h + 1);
+        }
+
+        accum += filter[k] * src[i_tap * stride + j_tap];
+    }
+    return accum;
+}
+
+static void convolution_x_c(const float *filter, int filt_width,
+                            const float *src, float *dst, int w, int h,
+                            int src_stride, int dst_stride, int step)
+{
+    int radius = filt_width / 2;
+    int borders_left = ceiln(radius, step);
+    int borders_right = floorn(w - (filt_width - radius), step);
+
+    for (int i = 0; i < h; i++) {
+        for (int j = 0; j < borders_left; j += step) {
+            dst[i * dst_stride + j / step] = convolution_edge(true, filter,
+                                                              filt_width, src,
+                                                              w, h, src_stride,
+                                                              i, j);
+        }
+
+        for (int j = borders_left; j < borders_right; j += step) {
+            float accum = 0;
+            for (int k = 0; k < filt_width; k++) {
+                accum += filter[k] * src[i * src_stride + j - radius + k];
+            }
+            dst[i * dst_stride + j / step] = accum;
+        }
+
+        for (int j = borders_right; j < w; j += step) {
+            dst[i * dst_stride + j / step] = convolution_edge(true, filter,
+                                                              filt_width, src,
+                                                              w, h, src_stride,
+                                                              i, j);
+        }
+    }
+}
+
+static void convolution_y_c(const float *filter, int filt_width,
+                            const float *src, float *dst, int w, int h,
+                            int src_stride, int dst_stride, int step)
+{
+    int radius = filt_width / 2;
+    int borders_top = ceiln(radius, step);
+    int borders_bottom = floorn(h - (filt_width - radius), step);
+
+    for (int i = 0; i < borders_top; i += step) {
+        for (int j = 0; j < w; j++) {
+            dst[(i / step) * dst_stride + j] = convolution_edge(false, filter,
+                                                                filt_width, src,
+                                                                w, h, src_stride,
+                                                                i, j);
+        }
+    }
+    for (int i = borders_top; i < borders_bottom; i += step) {
+        for (int j = 0; j < w; j++) {
+            float accum = 0;
+            for (int k = 0; k < filt_width; k++) {
+                accum += filter[k] * src[(i - radius + k) * src_stride + j];
+            }
+            dst[(i / step) * dst_stride + j] = accum;
+        }
+    }
+    for (int i = borders_bottom; i < h; i += step) {
+        for (int j = 0; j < w; j++) {
+            dst[(i / step) * dst_stride + j] = convolution_edge(false, filter,
+                                                                filt_width, src,
+                                                                w, h, src_stride,
+                                                                i, j);
+        }
+    }
+}
+
+void convolution_f32_c(const float *filter, int filt_width, const float *src,
+                       float *dst, float *tmp, int w, int h, int src_stride,
+                       int dst_stride)
+{
+    convolution_y_c(filter, filt_width, src, tmp, w, h, src_stride,
+                    dst_stride, 1);
+    convolution_x_c(filter, filt_width, tmp, dst, w, h, src_stride,
+                    dst_stride, 1);
+}
+
diff --git a/libavfilter/convolution.h b/libavfilter/convolution.h
new file mode 100644
index 0000000..34474f4
--- /dev/null
+++ b/libavfilter/convolution.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2017 Ronald S. Bultje <rsbultje at gmail.com>
+ * Copyright (c) 2017 Ashish Pratap Singh <ashk43712 at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef CONVOLUTION_H_
+#define CONVOLUTION_H_
+
+void convolution_f32_c(const float *filter, int filter_width, const float *src,
+                       float *dst, float *tmp, int width, int height,
+                       int src_stride, int dst_stride);
+
+#endif // CONVOLUTION_H_
diff --git a/libavfilter/motion.h b/libavfilter/motion.h
new file mode 100644
index 0000000..0da41db
--- /dev/null
+++ b/libavfilter/motion.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2017 Ronald S. Bultje <rsbultje at gmail.com>
+ * Copyright (c) 2017 Ashish Pratap Singh <ashk43712 at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef MOTION_TOOLS_H_
+#define MOTION_TOOLS_H_
+
+static int compute_motion(const float *ref, const float *dis, int w, int h,
+                          int ref_stride, int dis_stride, double *score,
+                          void *ctx);
+
+#endif /* MOTION_TOOLS_H_ */
diff --git a/libavfilter/vf_motion.c b/libavfilter/vf_motion.c
new file mode 100644
index 0000000..38568a9
--- /dev/null
+++ b/libavfilter/vf_motion.c
@@ -0,0 +1,316 @@
+/*
+ * Copyright (c) 2017 Ronald S. Bultje <rsbultje at gmail.com>
+ * Copyright (c) 2017 Ashish Pratap Singh <ashk43712 at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Calculate motion score between two input videos.
+ */
+
+#include <inttypes.h>
+#include "libavutil/avstring.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "dualinput.h"
+#include "drawutils.h"
+#include "formats.h"
+#include "internal.h"
+#include "motion.h"
+#include "video.h"
+#include "convolution.h"
+
+typedef struct MOTIONContext {
+    const AVClass *class;
+    FFDualInputContext dinput;
+    int width;
+    int height;
+    uint8_t type;
+    float *ref_data;
+    float *prev_blur_data;
+    float *blur_data;
+    float *temp_data;
+    double motion_sum;
+    uint64_t nb_frames;
+} MOTIONContext;
+
+#define OFFSET(x) offsetof(MOTIONContext, x)
+#define MAX_ALIGN 32
+#define ALIGN_CEIL(x) ((x) + ((x) % MAX_ALIGN ? MAX_ALIGN - (x) % MAX_ALIGN : 0))
+#define OPT_RANGE_PIXEL_OFFSET (-128)
+
+static const AVOption motion_options[] = {
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(motion);
+
+static const float FILTER_5[5] = {
+    0.054488685,
+    0.244201342,
+    0.402619947,
+    0.244201342,
+    0.054488685
+};
+
+static inline double get_motion_avg(double motion_sum, uint64_t nb_frames)
+{
+    return motion_sum / nb_frames;
+}
+
+static void offset(MOTIONContext *s, const AVFrame *ref, int stride)
+{
+    int w = s->width;
+    int h = s->height;
+    int i,j;
+
+    int ref_stride = ref->linesize[0];
+
+    uint8_t *ref_ptr = ref->data[0];
+
+    float *ref_ptr_data = s->ref_data;
+
+    for(i = 0; i < h; i++) {
+        for(j = 0; j < w; j++) {
+            ref_ptr_data[j] = (float) ref_ptr[j] + OPT_RANGE_PIXEL_OFFSET;
+        }
+        ref_ptr += ref_stride / sizeof(uint8_t);
+        ref_ptr_data += stride / sizeof(float);
+    }
+}
+
+static double image_sad_c(const float *img1, const float *img2, int w,
+                          int h, int img1_stride, int img2_stride)
+{
+    float accum = (float)0.0;
+
+    for (int i = 0; i < h; i++) {
+        for (int j = 0; j < w; j++) {
+            float img1px = img1[i * img1_stride + j];
+            float img2px = img2[i * img2_stride + j];
+
+            accum += fabs(img1px - img2px);
+        }
+    }
+
+    return (float) (accum / (w * h));
+}
+
+static int compute_motion(const float *ref, const float *dis, int w, int h,
+                          int ref_stride, int dis_stride, double *score,
+                          void *ctx)
+{
+    *score = image_sad_c(ref, dis, w, h, ref_stride / sizeof(float),
+                         dis_stride / sizeof(float));
+
+    return 0;
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d)
+{
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static AVFrame *do_motion(AVFilterContext *ctx, AVFrame *main, const AVFrame *ref)
+{
+    MOTIONContext *s = ctx->priv;
+    AVDictionary **metadata = &main->metadata;
+    int stride;
+    size_t data_sz;
+    double score;
+
+    stride = ALIGN_CEIL(s->width * sizeof(float));
+    data_sz = (size_t)stride * s->height;
+
+    offset(s, ref, stride);
+
+    convolution_f32_c(FILTER_5, 5, s->ref_data, s->blur_data, s->temp_data,
+                      s->width, s->height, stride / sizeof(float), stride /
+                      sizeof(float));
+
+    if(!s->nb_frames) {
+        score = 0.0;
+    } else {
+        compute_motion(s->prev_blur_data, s->blur_data, s->width, s->height,
+                       stride, stride, &score, s);
+    }
+
+    memcpy(s->prev_blur_data, s->blur_data, data_sz);
+
+    set_meta(metadata, "lavfi.motion.score", score);
+
+    s->nb_frames++;
+
+    s->motion_sum += score;
+
+    return main;
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    MOTIONContext *s = ctx->priv;
+
+    s->dinput.process = do_motion;
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV420P10LE,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static int config_input_ref(AVFilterLink *inlink)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    AVFilterContext *ctx  = inlink->dst;
+    MOTIONContext *s = ctx->priv;
+    int stride;
+    size_t data_sz;
+
+    if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
+        ctx->inputs[0]->h != ctx->inputs[1]->h) {
+        av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
+        return AVERROR(EINVAL);
+    }
+    if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
+        av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
+        return AVERROR(EINVAL);
+    }
+
+    s->width = ctx->inputs[0]->w;
+    s->height = ctx->inputs[0]->h;
+
+    stride = ALIGN_CEIL(s->width * sizeof(float));
+    data_sz = (size_t)stride * s->height;
+
+    if (!(s->ref_data = av_malloc(data_sz))) {
+        av_log(ctx, AV_LOG_ERROR, "ref_buf allocation failed.\n");
+        return AVERROR(EINVAL);
+    }
+    if (!(s->prev_blur_data = av_mallocz(data_sz))) {
+        av_log(ctx, AV_LOG_ERROR, "prev_blur_buf allocation failed.\n");
+        return AVERROR(EINVAL);
+    }
+    if (!(s->blur_data = av_mallocz(data_sz))) {
+        av_log(ctx, AV_LOG_ERROR, "blur_buf allocation failed.\n");
+        return AVERROR(EINVAL);
+    }
+    if (!(s->temp_data = av_mallocz(data_sz * 2))) {
+        av_log(ctx, AV_LOG_ERROR, "temp_buf allocation failed.\n");
+        return AVERROR(EINVAL);
+    }
+
+    s->type = desc->comp[0].depth > 8 ? 10 : 8;
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    MOTIONContext *s = ctx->priv;
+    AVFilterLink *mainlink = ctx->inputs[0];
+    int ret;
+
+    outlink->w = mainlink->w;
+    outlink->h = mainlink->h;
+    outlink->time_base = mainlink->time_base;
+    outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
+    outlink->frame_rate = mainlink->frame_rate;
+    if ((ret = ff_dualinput_init(ctx, &s->dinput)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
+{
+    MOTIONContext *s = inlink->dst->priv;
+    return ff_dualinput_filter_frame(&s->dinput, inlink, inpicref);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    MOTIONContext *s = outlink->src->priv;
+    return ff_dualinput_request_frame(&s->dinput, outlink);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    MOTIONContext *s = ctx->priv;
+
+    ff_dualinput_uninit(&s->dinput);
+
+    av_free(s->ref_data);
+    av_free(s->prev_blur_data);
+    av_free(s->blur_data);
+    av_free(s->temp_data);
+
+    av_log(ctx, AV_LOG_INFO, "MOTION AVG: %.3f\n", get_motion_avg(s->motion_sum,
+                                                                  s->nb_frames));
+}
+
+static const AVFilterPad motion_inputs[] = {
+    {
+        .name         = "main",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },{
+        .name         = "reference",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input_ref,
+    },
+    { NULL }
+};
+
+static const AVFilterPad motion_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_motion = {
+    .name          = "motion",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate the MOTION between two video streams."),
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .priv_size     = sizeof(MOTIONContext),
+    .priv_class    = &motion_class,
+    .inputs        = motion_inputs,
+    .outputs       = motion_outputs,
+};
-- 
2.7.4