[FFmpeg-devel] [PATCH] GSoC: Support fast guided filter.

Tue May 11 08:08:46 EEST 2021

On Mon, May 10, 2021 at 9:42 PM Xuewei Meng <928826483 at qq.com> wrote:
>
> From: Xuewei Meng <xwmeng96 at gmail.com>
>
> Two modes are supported in guided filter, basic mode and fast mode.
> Basic mode is the initial pushed guided filter without optimization.
> Fast mode is implemented based on the basic one by sub-sampling method.
> The sub-sampling ratio which can be defined by users controls the
> algorithm complexity. The larger the sub-sampling ratio, the lower
> the algorithm complexity.
>
> Signed-off-by: Xuewei Meng <xwmeng96 at gmail.com>
> ---
>  doc/filters.texi        |  20 +++++++---
>  libavfilter/vf_guided.c | 104 ++++++++++++++++++++++++++++++++----------------
>  2 files changed, 85 insertions(+), 39 deletions(-)
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 03ca9ae..eb747cb 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -12963,12 +12963,22 @@ Apply guided filter for edge-preserving smoothing, dehazing and so on.
>  The filter accepts the following options:
>  @table @option
>  @item radius
> -Set the radius in pixels.
> +Set the box radius in pixels.
>  Allowed range is 1 to 20. Default is 3.
>
>  @item eps
> -Set regularization parameter.
> -Allowed range is 0 to 1. Default is 0.1.
> +Set regularization parameter (with square).
> +Allowed range is 0 to 1. Default is 0.01.
> +
> + at item mode
> +Set filter mode. Can be @code{basic} or @code{fast}.
> +Default is @code{basic}.
> +
> + at item sub
> +Set subsampling ratio.
> +Allowed range is 1 to 64.
> +Default is always 1 for @code{basic} value of @var{mode} option,
> +and 4 for @code{fast} value of @var{mode} option.
>
>  @item planes
>  Set planes to filter. Default is first only.
> @@ -12987,8 +12997,8 @@ ffmpeg -i in.png -i in.png -filter_complex guided out.png
>
>  @item
>  Dehazing, structure-transferring filtering, detail enhancement with guided filter.
> -For the generation of guidance image,
> -see @url{http://kaiminghe.com/publications/pami12guidedfilter.pdf}.
> +For the generation of guidance image, refer to paper "Guided Image Filtering".
> +See: @url{http://kaiminghe.com/publications/pami12guidedfilter.pdf}.
>  @example
>  ffmpeg -i in.png -i guidance.png -filter_complex guided out.png
>  @end example
> diff --git a/libavfilter/vf_guided.c b/libavfilter/vf_guided.c
> index 86c0db5..230fb7b 100644
> --- a/libavfilter/vf_guided.c
> +++ b/libavfilter/vf_guided.c
> @@ -27,12 +27,20 @@
>  #include "internal.h"
>  #include "video.h"
>
> +enum FilterModes {
> +    BASIC,
> +    FAST,
> +    NB_MODES,
> +};
> +
>  typedef struct GuidedContext {
>      const AVClass *class;
>      FFFrameSync fs;
>
>      int radius;
>      float eps;
> +    int mode;
> +    int sub;
>
>      int planes;
>
> @@ -51,9 +59,13 @@ typedef struct GuidedContext {
>  #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
>
>  static const AVOption guided_options[] = {
> -    { "radius", "set the box radius",           OFFSET(radius), AV_OPT_TYPE_INT,   {.i64=3    },   1,  20, FLAGS },
> -    { "eps",    "set the regularization parameter (with square)",              OFFSET(eps),    AV_OPT_TYPE_FLOAT, {.dbl=0.01  }, 0.0,   1, FLAGS },
> -    { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,   {.i64=1    },   0, 0xF, FLAGS },
> +    { "radius", "set the box radius",                               OFFSET(radius), AV_OPT_TYPE_INT,   {.i64 = 3    },   1,           20, FLAGS },
> +    { "eps",    "set the regularization parameter (with square)",   OFFSET(eps),    AV_OPT_TYPE_FLOAT, {.dbl = 0.01 }, 0.0,            1, FLAGS },
> +    { "mode",   "set filtering mode (0: basic mode; 1: fast mode)", OFFSET(mode),   AV_OPT_TYPE_INT,   {.i64 = BASIC},   0, NB_MODES - 1, FLAGS, "mode" },
> +    { "basic",  "basic guided filter",                              0,              AV_OPT_TYPE_CONST, {.i64 = BASIC},   0,            0, FLAGS, "mode" },
> +    { "fast",   "fast guided filter",                               0,              AV_OPT_TYPE_CONST, {.i64 = FAST },   0,            0, FLAGS, "mode" },
> +    { "sub",    "subsampling ratio",                                OFFSET(sub),    AV_OPT_TYPE_INT,   {.i64 = 1    },   1,           64, FLAGS },
> +    { "planes", "set planes to filter",                             OFFSET(planes), AV_OPT_TYPE_INT,   {.i64=1      },   0,          0xF, FLAGS },
>      { NULL }
>  };
>
> @@ -147,6 +159,26 @@ static int config_input(AVFilterLink *inlink)
>          return AVERROR(EINVAL);
>      }
>
> +    if (s->mode == BASIC) {
> +        if (s->sub != 1) {
> +            av_log(ctx, AV_LOG_WARNING, "Subsampling ratio is 1 in basic mode.\n");
> +            s->sub = 1;
> +        }
> +    }
> +    else if (s->mode == FAST) {
> +        if (s->sub == 1) {
> +            av_log(ctx, AV_LOG_WARNING, "Subsampling ratio is larger than 1 in fast mode.\n");
> +            s->sub = 4;
> +        }
> +        if (s->radius >= s->sub)
> +            s->radius = s->radius / s->sub;
> +        else {
> +            s->radius = 1;
> +        }
> +    }
> +    else {
> +        return AVERROR_BUG;
> +    }
>
>      s->depth = desc->comp[0].depth;
>      s->width = ctx->inputs[0]->w;
> @@ -174,6 +206,10 @@ static int guided_##name(AVFilterContext *ctx, GuidedContext *s,
>      const type *src = (const type *)ssrc;                                               \
>      const type *srcRef = (const type *)ssrcRef;                                         \
>                                                                                          \
> +    int sub = s->sub;                                                                   \
> +    int h = (height % sub) == 0 ? height / sub : height / sub + 1;                      \
> +    int w = (width % sub) == 0 ? width / sub : width / sub + 1;                         \
> +                                                                                        \
>      ThreadData t;                                                                       \
>      const int nb_threads = ff_filter_get_nb_threads(ctx);                               \
>      float *I;                                                                           \
> @@ -189,55 +225,55 @@ static int guided_##name(AVFilterContext *ctx, GuidedContext *s,
>      float *meanA;                                                                       \
>      float *meanB;                                                                       \
>                                                                                          \
> -    I      = av_calloc(width * height, sizeof(float));                                  \
> -    II     = av_calloc(width * height, sizeof(float));                                  \
> -    P      = av_calloc(width * height, sizeof(float));                                  \
> -    IP     = av_calloc(width * height, sizeof(float));                                  \
> -    meanI  = av_calloc(width * height, sizeof(float));                                  \
> -    meanII = av_calloc(width * height, sizeof(float));                                  \
> -    meanP  = av_calloc(width * height, sizeof(float));                                  \
> -    meanIP = av_calloc(width * height, sizeof(float));                                  \
> +    I      = av_calloc(w * h, sizeof(float));                                           \
> +    II     = av_calloc(w * h, sizeof(float));                                           \
> +    P      = av_calloc(w * h, sizeof(float));                                           \
> +    IP     = av_calloc(w * h, sizeof(float));                                           \
> +    meanI  = av_calloc(w * h, sizeof(float));                                           \
> +    meanII = av_calloc(w * h, sizeof(float));                                           \
> +    meanP  = av_calloc(w * h, sizeof(float));                                           \
> +    meanIP = av_calloc(w * h, sizeof(float));                                           \
>                                                                                          \
> -    A      = av_calloc(width * height, sizeof(float));                                  \
> -    B      = av_calloc(width * height, sizeof(float));                                  \
> -    meanA  = av_calloc(width * height, sizeof(float));                                  \
> -    meanB  = av_calloc(width * height, sizeof(float));                                  \
> +    A      = av_calloc(w * h, sizeof(float));                                           \
> +    B      = av_calloc(w * h, sizeof(float));                                           \
> +    meanA  = av_calloc(w * h, sizeof(float));                                           \
> +    meanB  = av_calloc(w * h, sizeof(float));                                           \
>                                                                                          \
>      if (!I || !II || !P || !IP || !meanI || !meanII || !meanP ||                        \
>          !meanIP || !A || !B || !meanA || !meanB){                                       \
>          ret = AVERROR(ENOMEM);                                                          \
>          goto end;                                                                       \
>      }                                                                                   \
> -    for (int i = 0;i < height;i++) {                                                    \
> -      for (int j = 0;j < width;j++) {                                                   \
> -        int x = i * width + j;                                                          \
> -        I[x]  = src[i * src_stride + j] / maxval;                                       \
> +    for (int i = 0;i < h;i++) {                                                         \
> +      for (int j = 0;j < w;j++) {                                                       \
> +        int x = i * w + j;                                                              \
> +        I[x]  = src[(i * src_stride + j) * sub] / maxval;                               \
>          II[x] = I[x] * I[x];                                                            \
> -        P[x]  = srcRef[i * src_ref_stride + j] / maxval;                                \
> +        P[x]  = srcRef[(i * src_ref_stride + j) * sub] / maxval;                        \
>          IP[x] = I[x] * P[x];                                                            \
>        }                                                                                 \
>      }                                                                                   \
>                                                                                          \
> -    t.width  = width;                                                                   \
> -    t.height = height;                                                                  \
> -    t.srcStride = width;                                                                \
> -    t.dstStride = width;                                                                \
> +    t.width  = w;                                                                       \
> +    t.height = h;                                                                       \
> +    t.srcStride = w;                                                                    \
> +    t.dstStride = w;                                                                    \
>      t.src = I;                                                                          \
>      t.dst = meanI;                                                                      \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>      t.src = II;                                                                         \
>      t.dst = meanII;                                                                     \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>      t.src = P;                                                                          \
>      t.dst = meanP;                                                                      \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>      t.src = IP;                                                                         \
>      t.dst = meanIP;                                                                     \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>                                                                                          \
> -    for (int i = 0;i < height;i++) {                                                    \
> -      for (int j = 0;j < width;j++) {                                                   \
> -        int x = i * width + j;                                                          \
> +    for (int i = 0;i < h;i++) {                                                         \
> +      for (int j = 0;j < w;j++) {                                                       \
> +        int x = i * w + j;                                                              \
>          float varI = meanII[x] - (meanI[x] * meanI[x]);                                 \
>          float covIP = meanIP[x] - (meanI[x] * meanP[x]);                                \
>          A[x] = covIP / (varI + eps);                                                    \
> @@ -247,14 +283,14 @@ static int guided_##name(AVFilterContext *ctx, GuidedContext *s,
>                                                                                          \
>      t.src = A;                                                                          \
>      t.dst = meanA;                                                                      \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>      t.src = B;                                                                          \
>      t.dst = meanB;                                                                      \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>                                                                                          \
>      for (int i = 0;i < height;i++) {                                                    \
>        for (int j = 0;j < width;j++) {                                                   \
> -        int x = i * width + j;                                                          \
> +        int x = i / sub * w + j / sub;                                                  \
>          dst[i * dst_stride + j] = meanA[x] * src[i * src_stride + j] +                  \
>                                    meanB[x] * maxval;                                    \
>        }                                                                                 \
> --
> 1.9.1
I think you submit version4 - version3 diff part for fast mode, it's
strange, you need to submit a full version 4 patch