[FFmpeg-devel] [PATCH] GSoC: Add guided filter To-Do-List: 1. Fast guided filter 2. Improve the derain/dehaze/denoise performance of guided filter

Mon May 3 18:05:53 EEST 2021

From: Xuewei Meng <xwmeng96 at gmail.com>

Signed-off-by: Xuewei Meng <xwmeng96 at gmail.com>
---
 doc/filters.texi         |  21 +++
 libavfilter/Makefile     |   1 +
 libavfilter/allfilters.c |   1 +
 libavfilter/vf_guided.c  | 381 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 404 insertions(+)
 create mode 100644 libavfilter/vf_guided.c

diff --git a/doc/filters.texi b/doc/filters.texi
index 36e35a1..d027ce9 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -12918,6 +12918,27 @@ greyedge=difford=1:minknorm=0:sigma=2
 
 @end itemize
 
+ at section guided filter
+Apply guided filter for dehazing, deraining and denoising.
+
+The filter accepts the following options:
+ at table @option
+ at item radius
+Set the radius in pixels.
+Allowed range is 1 to 20. Default is 3.
+
+ at item eps
+Set regularization parameter.
+Allowed range is 0 to 1. Default is 0.2.
+
+ at item planes
+Set planes to filter. Default is first only.
+ at end table
+
+ at subsection Commands
+
+This filter supports the all above options as @ref{commands}.
+
 @anchor{haldclut}
 @section haldclut
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 5a28736..7091508 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -284,6 +284,7 @@ OBJS-$(CONFIG_GBLUR_FILTER)                  += vf_gblur.o
 OBJS-$(CONFIG_GEQ_FILTER)                    += vf_geq.o
 OBJS-$(CONFIG_GRADFUN_FILTER)                += vf_gradfun.o
 OBJS-$(CONFIG_GRAPHMONITOR_FILTER)           += f_graphmonitor.o
+OBJS-$(CONFIG_GUIDED_FILTER)                 += vf_guided.o
 OBJS-$(CONFIG_GREYEDGE_FILTER)               += vf_colorconstancy.o
 OBJS-$(CONFIG_HALDCLUT_FILTER)               += vf_lut3d.o framesync.o
 OBJS-$(CONFIG_HFLIP_FILTER)                  += vf_hflip.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 931d7db..962f656 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -270,6 +270,7 @@ extern const AVFilter ff_vf_geq;
 extern const AVFilter ff_vf_gradfun;
 extern const AVFilter ff_vf_graphmonitor;
 extern const AVFilter ff_vf_greyedge;
+extern const AVFilter ff_vf_guided;
 extern const AVFilter ff_vf_haldclut;
 extern const AVFilter ff_vf_hflip;
 extern const AVFilter ff_vf_histeq;
diff --git a/libavfilter/vf_guided.c b/libavfilter/vf_guided.c
new file mode 100644
index 0000000..715550f
--- /dev/null
+++ b/libavfilter/vf_guided.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 2021 Xuewei Mengs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "framesync.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct GuidedContext {
+    const AVClass *class;
+    FFFrameSync fs;
+
+    int radius;
+    float eps;
+
+    int planes;
+
+    int width;
+    int height;
+
+    int nb_planes;
+    int depth;
+    int planewidth[4];
+    int planeheight[4];
+} GuidedContext;
+
+#define OFFSET(x) offsetof(GuidedContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
+
+static const AVOption guided_options[] = {
+    { "radius", "set radius",           OFFSET(radius), AV_OPT_TYPE_INT,   {.i64=3    },   1,  20, FLAGS },
+    { "eps",    "set eps",              OFFSET(eps),    AV_OPT_TYPE_FLOAT, {.dbl=0.2  }, 0.0,   1, FLAGS },
+    { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,   {.i64=1    },   0, 0xF, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(guided);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P,
+        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
+        AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
+        AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12,
+        AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
+        AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+        AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+        AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_NONE
+    };
+
+    return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    GuidedContext *s = ctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+
+    if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
+        ctx->inputs[0]->h != ctx->inputs[1]->h) {
+        av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
+        return AVERROR(EINVAL);
+    }
+    if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
+        av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
+        return AVERROR(EINVAL);
+    }
+
+
+    s->depth = desc->comp[0].depth;
+    s->width = ctx->inputs[0]->w;
+    s->height = ctx->inputs[0]->h;
+
+    s->planewidth[1]  = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->planewidth[0]  = s->planewidth[3] = inlink->w;
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+
+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
+    return 0;
+}
+
+#define BOX(type, name)                                                            \
+static inline void box_##name(type *dst, int dst_stride, const type *src,          \
+                                 int src_stride, int radius, int width, int height)\
+{                                                                                  \
+    int w;                                                                         \
+    int numPix;                                                                    \
+    w = (radius << 1) + 1;                                                         \
+    numPix = w * w;                                                                \
+    for (int i = 0;i < height;i++) {                                               \
+      for (int j = 0;j < width;j++) {                                              \
+        float temp = 0.0;                                                          \
+        for (int row = -radius;row <= radius;row++) {                              \
+          for (int col = -radius;col <= radius;col++) {                            \
+            int x = i + row;                                                       \
+            int y = j + col;                                                       \
+            x = (x < 0) ? 0 : (x >= height ? height - 1 : x);                      \
+            y = (y < 0) ? 0 : (y >= width ? width - 1 : y);                        \
+            temp += src[x * src_stride + y];                                       \
+          }                                                                        \
+        }                                                                          \
+        dst[i * dst_stride + j] = temp / numPix;                                   \
+      }                                                                            \
+    }                                                                              \
+}
+
+BOX(float,   byte)
+
+#undef BOX
+
+#define GUIDED(type, name)                                                              \
+static void guided_##name(GuidedContext *s, const uint8_t *ssrc, const uint8_t *ssrcRef,\
+                          uint8_t *ddst, int radius, float eps, int width, int height,  \
+                          int src_stride, int src_ref_stride, int dst_stride)           \
+{                                                                                       \
+    type *dst = (type *)ddst;                                                           \
+    const type *src = (const type *)ssrc;                                               \
+    const type *srcRef = (const type *)ssrcRef;                                         \
+    float *I;                                                                           \
+    float *II;                                                                          \
+    float *P;                                                                           \
+    float *IP;                                                                          \
+    float *meanI;                                                                       \
+    float *meanII;                                                                      \
+    float *meanP;                                                                       \
+    float *meanIP;                                                                      \
+    float *A;                                                                           \
+    float *B;                                                                           \
+    float *meanA;                                                                       \
+    float *meanB;                                                                       \
+                                                                                        \
+    I      = av_calloc(width * height, sizeof(float));                                  \
+    II     = av_calloc(width * height, sizeof(float));                                  \
+    P      = av_calloc(width * height, sizeof(float));                                  \
+    IP     = av_calloc(width * height, sizeof(float));                                  \
+    meanI  = av_calloc(width * height, sizeof(float));                                  \
+    meanII = av_calloc(width * height, sizeof(float));                                  \
+    meanP  = av_calloc(width * height, sizeof(float));                                  \
+    meanIP = av_calloc(width * height, sizeof(float));                                  \
+                                                                                        \
+    A       = av_calloc(width * height, sizeof(float));                                 \
+    B       = av_calloc(width * height, sizeof(float));                                 \
+    meanA   = av_calloc(width * height, sizeof(float));                                 \
+    meanB   = av_calloc(width * height, sizeof(float));                                 \
+                                                                                        \
+    for (int i = 0;i < height;i++) {                                                    \
+      for (int j = 0;j < width;j++) {                                                   \
+        int x = i * width + j;                                                          \
+        I[x]  = src[i * src_stride + j];                                                \
+        II[x] = src[i * src_stride + j] * src[i * src_stride + j];                      \
+        P[x]  = srcRef[i * src_ref_stride + j];                                         \
+        IP[x] = src[i * src_stride + j] * srcRef[i * src_ref_stride + j];               \
+      }                                                                                 \
+    }                                                                                   \
+                                                                                        \
+    box_byte(meanI, width, I, width, radius, width, height);                            \
+    box_byte(meanII, width, II, width, radius, width, height);                          \
+    box_byte(meanP, width, P, width, radius, width, height);                            \
+    box_byte(meanIP, width, IP, width, radius, width, height);                          \
+                                                                                        \
+    for (int i = 0;i < height;i++) {                                                    \
+      for (int j = 0;j < width;j++) {                                                   \
+        int x = i * width + j;                                                          \
+        float varI = meanII[x] - (meanI[x] * meanI[x]);                                 \
+        float covIP = meanIP[x] - (meanI[x] * meanP[x]);                                \
+        A[x] = covIP / (varI + eps);                                                    \
+        B[x] = meanP[x] - A[x] * meanI[x];                                              \
+      }                                                                                 \
+    }                                                                                   \
+                                                                                        \
+    box_byte(meanA, width, A, width, radius, width, height);                            \
+    box_byte(meanB, width, B, width, radius, width, height);                            \
+                                                                                        \
+    for (int i = 0;i < height;i++) {                                                    \
+      for (int j = 0;j < width;j++) {                                                   \
+        int x = i * width + j;                                                          \
+        dst[i * dst_stride + j] = meanA[x] * src[i * src_stride + j] + meanB[x];        \
+      }                                                                                 \
+    }                                                                                   \
+    av_freep(&I);                                                                       \
+    av_freep(&II);                                                                      \
+    av_freep(&P);                                                                       \
+    av_freep(&IP);                                                                      \
+    av_freep(&meanI);                                                                   \
+    av_freep(&meanII);                                                                  \
+    av_freep(&meanP);                                                                   \
+    av_freep(&meanIP);                                                                  \
+    av_freep(&A);                                                                       \
+    av_freep(&B);                                                                       \
+    av_freep(&meanA);                                                                   \
+    av_freep(&meanB);                                                                   \
+}
+
+GUIDED(uint8_t, byte)
+GUIDED(uint16_t, word)
+
+static int process_frame(FFFrameSync *fs)
+{
+    AVFilterContext *ctx = fs->parent;
+    GuidedContext *s = fs->opaque;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out_frame = NULL, *main_frame = NULL, *ref_frame = NULL;
+    int ret;
+
+    ret = ff_framesync_dualinput_get(fs, &main_frame, &ref_frame);
+    if (ret < 0)
+        return ret;
+
+    out_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out_frame) {
+        av_frame_free(&main_frame);
+        av_frame_free(&ref_frame);
+        return AVERROR(ENOMEM);
+    }
+    av_frame_copy_props(out_frame, main_frame);
+
+    if (ctx->is_disabled || !ref_frame) {
+        av_frame_copy_props(ref_frame, main_frame);
+    } 
+
+    for (int plane = 0; plane < s->nb_planes; plane++) {
+        if (!(s->planes & (1 << plane))) {
+            av_image_copy_plane(out_frame->data[plane], out_frame->linesize[plane],
+                                main_frame->data[plane], main_frame->linesize[plane],
+                                s->planewidth[plane] * ((s->depth + 7) / 8), s->planeheight[plane]);
+            continue;
+        }
+        if (s->depth <= 8)
+           guided_byte(s, main_frame->data[plane], ref_frame->data[plane], out_frame->data[plane], s->radius, s->eps,
+                       s->planewidth[plane], s->planeheight[plane],
+                       main_frame->linesize[plane], ref_frame->linesize[plane], out_frame->linesize[plane]);
+        else
+           guided_word(s, main_frame->data[plane], ref_frame->data[plane], out_frame->data[plane], s->radius, s->eps,
+                       s->planewidth[plane], s->planeheight[plane],
+                       main_frame->linesize[plane] / 2, ref_frame->linesize[plane] / 2, out_frame->linesize[plane] / 2);
+    }
+
+    return ff_filter_frame(outlink, out_frame);
+}
+
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+
+    GuidedContext *s = ctx->priv;
+    AVFilterLink *mainlink = ctx->inputs[0];
+    FFFrameSyncIn *in;
+    int ret;
+
+
+    outlink->w = mainlink->w;
+    outlink->h = mainlink->h;
+    outlink->time_base = mainlink->time_base;
+    outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
+    outlink->frame_rate = mainlink->frame_rate;
+    if ((ret = ff_framesync_init(&s->fs, ctx, 2)) < 0)
+        return ret;
+
+    outlink->time_base = s->fs.time_base;
+
+    in = s->fs.in;
+    in[0].time_base = mainlink->time_base;
+    in[1].time_base = ctx->inputs[1]->time_base;
+    in[0].sync   = 2;
+    in[0].before = EXT_INFINITY;
+    in[0].after  = EXT_INFINITY;
+    in[1].sync   = 1;
+    in[1].before = EXT_INFINITY;
+    in[1].after  = EXT_INFINITY;
+    s->fs.opaque   = s;
+    s->fs.on_event = process_frame;
+
+    return ff_framesync_configure(&s->fs);
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    GuidedContext *s = ctx->priv;
+    return ff_framesync_activate(&s->fs);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    GuidedContext *s = ctx->priv;
+    ff_framesync_uninit(&s->fs);
+    return;
+}
+
+
+static int process_command(AVFilterContext *ctx,
+                           const char *cmd,
+                           const char *arg,
+                           char *res,
+                           int res_len,
+                           int flags)
+{
+    int ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags);
+
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static const AVFilterPad guided_inputs[] = {
+    {
+        .name         = "main",
+        .type         = AVMEDIA_TYPE_VIDEO,
+    },{
+        .name         = "reference",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad guided_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_guided = {
+    .name          = "guided",
+    .description   = NULL_IF_CONFIG_SMALL("Apply Guided filter."),
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .priv_size     = sizeof(GuidedContext),
+    .priv_class    = &guided_class,
+    .activate      = activate,
+    .inputs        = guided_inputs,
+    .outputs       = guided_outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+    .process_command = process_command,
+};
-- 
1.9.1