[FFmpeg-devel] [PATCH] boxblur: Templatize blur{8,16}

Sun Nov 1 20:10:20 CET 2015

On Sun, Nov 1, 2015 at 1:40 PM, Timothy Gu <timothygu99 at gmail.com> wrote:
> ---
>  libavfilter/vf_boxblur.c | 110 +++++++++++++++++++----------------------------
>  1 file changed, 44 insertions(+), 66 deletions(-)
>
> diff --git a/libavfilter/vf_boxblur.c b/libavfilter/vf_boxblur.c
> index ef01cf9..6934076 100644
> --- a/libavfilter/vf_boxblur.c
> +++ b/libavfilter/vf_boxblur.c
> @@ -204,75 +204,53 @@ static int config_input(AVFilterLink *inlink)
>      return 0;
>  }
>
> -static inline void blur8(uint8_t *dst, int dst_step, const uint8_t *src, int src_step,
> -                        int len, int radius)
> -{
> -    /* Naive boxblur would sum source pixels from x-radius .. x+radius
> -     * for destination pixel x. That would be O(radius*width).
> -     * If you now look at what source pixels represent 2 consecutive
> -     * output pixels, then you see they are almost identical and only
> -     * differ by 2 pixels, like:
> -     * src0       111111111
> -     * dst0           1
> -     * src1        111111111
> -     * dst1            1
> -     * src0-src1  1       -1
> -     * so when you know one output pixel you can find the next by just adding
> -     * and subtracting 1 input pixel.
> -     * The following code adopts this faster variant.
> -     */
> -    const int length = radius*2 + 1;
> -    const int inv = ((1<<16) + length/2)/length;
> -    int x, sum = src[radius*src_step];
> -
> -    for (x = 0; x < radius; x++)
> -        sum += src[x*src_step]<<1;
> -
> -    sum = sum*inv + (1<<15);
> -
> -    for (x = 0; x <= radius; x++) {
> -        sum += (src[(radius+x)*src_step] - src[(radius-x)*src_step])*inv;
> -        dst[x*dst_step] = sum>>16;
> -    }
> -
> -    for (; x < len-radius; x++) {
> -        sum += (src[(radius+x)*src_step] - src[(x-radius-1)*src_step])*inv;
> -        dst[x*dst_step] = sum >>16;
> -    }
> -
> -    for (; x < len; x++) {
> -        sum += (src[(2*len-radius-x-1)*src_step] - src[(x-radius-1)*src_step])*inv;
> -        dst[x*dst_step] = sum>>16;
> -    }
> +/* Naive boxblur would sum source pixels from x-radius .. x+radius
> + * for destination pixel x. That would be O(radius*width).
> + * If you now look at what source pixels represent 2 consecutive
> + * output pixels, then you see they are almost identical and only
> + * differ by 2 pixels, like:
> + * src0       111111111
> + * dst0           1
> + * src1        111111111
> + * dst1            1
> + * src0-src1  1       -1
> + * so when you know one output pixel you can find the next by just adding
> + * and subtracting 1 input pixel.
> + * The following code adopts this faster variant.
> + */
> +#define BLUR(type, depth)                                                   \
> +static inline void blur ## depth(type *dst, int dst_step, const type *src,  \
> +                                 int src_step, int len, int radius)         \
> +{                                                                           \
> +    const int length = radius*2 + 1;                                        \
> +    const int inv = ((1<<16) + length/2)/length;                            \
> +    int x, sum = src[radius*src_step];                                      \
> +                                                                            \
> +    for (x = 0; x < radius; x++)                                            \
> +        sum += src[x*src_step]<<1;                                          \
> +                                                                            \
> +    sum = sum*inv + (1<<15);                                                \
> +                                                                            \
> +    for (x = 0; x <= radius; x++) {                                         \
> +        sum += (src[(radius+x)*src_step] - src[(radius-x)*src_step])*inv;   \
> +        dst[x*dst_step] = sum>>16;                                          \
> +    }                                                                       \
> +                                                                            \
> +    for (; x < len-radius; x++) {                                           \
> +        sum += (src[(radius+x)*src_step] - src[(x-radius-1)*src_step])*inv; \
> +        dst[x*dst_step] = sum >>16;                                         \
> +    }                                                                       \
> +                                                                            \
> +    for (; x < len; x++) {                                                  \
> +        sum += (src[(2*len-radius-x-1)*src_step] - src[(x-radius-1)*src_step])*inv; \
> +        dst[x*dst_step] = sum>>16;                                          \
> +    }                                                                       \
>  }
>
> -static inline void blur16(uint16_t *dst, int dst_step, const uint16_t *src, int src_step,
> -                          int len, int radius)
> -{
> -    const int length = radius*2 + 1;
> -    const int inv = ((1<<16) + length/2)/length;
> -    int x, sum = src[radius*src_step];
> -
> -    for (x = 0; x < radius; x++)
> -        sum += src[x*src_step]<<1;
> -
> -    sum = sum*inv + (1<<15);
> -
> -    for (x = 0; x <= radius; x++) {
> -        sum += (src[(radius+x)*src_step] - src[(radius-x)*src_step])*inv;
> -        dst[x*dst_step] = sum>>16;
> -    }
> -
> -    for (; x < len-radius; x++) {
> -        sum += (src[(radius+x)*src_step] - src[(x-radius-1)*src_step])*inv;
> -        dst[x*dst_step] = sum >>16;
> -    }
> +BLUR(uint8_t,   8)
> +BLUR(uint16_t, 16)
>
> -    for (; x < len; x++) {
> -        sum += (src[(2*len-radius-x-1)*src_step] - src[(x-radius-1)*src_step])*inv;
> -        dst[x*dst_step] = sum>>16;
> -    }
> -}
> +#undef BLUR

Have not tested, but just a general comment. Personally, I follow the
twice repition is ok, thrice means it is good to factor out. I would
have been happier if the diff-stat was better than 44+/66- in terms of
deletions. This is by no means a nack, but a neutral vote in the
absence of testing.

Unless you are planning to extend in some way with 32, etc, in which case ok.

>
>  static inline void blur(uint8_t *dst, int dst_step, const uint8_t *src, int src_step,
>                          int len, int radius, int pixsize)
> --
> 2.1.4
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel