[FFmpeg-devel] [PATCH] Port gradfun to libavfilter (GCI)

Mon Nov 29 15:00:14 CET 2010

On date Monday 2010-11-29 07:18:14 -0500, Nolan L encoded:
> As part of a GCI task, I've ported the gradfun debanding filter from mplayer
> to libavfilter.

Just for curiosity, to which project is the GCI task related?

> The patch includes changes to the build system to account for CPU
> optimizations that weren't present previously.
> 
> There is a SSE2 method that remains unported due to lack of SSE2 detection
> in the configure script that I wasn't quite sure how to add.

> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 210510f..f50c100 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -28,6 +28,7 @@ OBJS-$(CONFIG_FORMAT_FILTER)                 += vf_format.o
>  OBJS-$(CONFIG_FREI0R_FILTER)                 += vf_frei0r.o
>  OBJS-$(CONFIG_HFLIP_FILTER)                  += vf_hflip.o
>  OBJS-$(CONFIG_NOFORMAT_FILTER)               += vf_format.o
> +OBJS-$(CONFIG_GRADFUN_FILTER)                += vf_gradfun.o
>  OBJS-$(CONFIG_NULL_FILTER)                   += vf_null.o
>  OBJS-$(CONFIG_OCV_SMOOTH_FILTER)             += vf_libopencv.o
>  OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o
> @@ -52,6 +53,9 @@ OBJS-$(CONFIG_NULLSINK_FILTER)               += vsink_nullsink.o
>  
>  -include $(SUBDIR)$(ARCH)/Makefile
>  
> +OBJS-$(HAVE_MMX2)  += $(MMX2-OBJS-yes)
> +OBJS-$(HAVE_SSSE3) += $(SSSE3-OBJS-yes)
> +
>  DIRS = x86
>  
>  include $(SUBDIR)../subdir.mak
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index c3067b8..6ac517c 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -48,6 +48,7 @@ void avfilter_register_all(void)
>      REGISTER_FILTER (FORMAT,      format,      vf);
>      REGISTER_FILTER (FREI0R,      frei0r,      vf);
>      REGISTER_FILTER (HFLIP,       hflip,       vf);
> +    REGISTER_FILTER (GRADFUN,     gradfun,     vf);
>      REGISTER_FILTER (NOFORMAT,    noformat,    vf);
>      REGISTER_FILTER (NULL,        null,        vf);
>      REGISTER_FILTER (OCV_SMOOTH,  ocv_smooth,  vf);
> diff --git a/libavfilter/gradfun.h b/libavfilter/gradfun.h
> new file mode 100644
> index 0000000..f67292b
> --- /dev/null
> +++ b/libavfilter/gradfun.h
> @@ -0,0 +1,62 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +

> +#ifndef AVFILTER_YADIF_H
> +#define AVFILTER_YADIF_H

Ehm...

> +
> +#include "avfilter.h"
> +
> +#define CHROMA_WIDTH(link)     -((-link->w) >> av_pix_fmt_descriptors[link->format].log2_chroma_w)
> +#define CHROMA_HEIGHT(link)    -((-link->h) >> av_pix_fmt_descriptors[link->format].log2_chroma_h)
> +#define RADIUS_CHROMA(r, link) (((r >> av_pix_fmt_descriptors[link->format].log2_chroma_w) + \
> +                                (r >> av_pix_fmt_descriptors[link->format].log2_chroma_w)) / 2)

This can be moved to the implementation, or even better it can be
avoided.

> +
> +typedef struct {
> +    int thresh;
> +    int radius;
> +    uint16_t *buf;
> +    void (*filter_line) (uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers);
> +    void (*blur_line) (uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int sstride, int width);

Nit:
sstride -> src_stride (or src_linesize)

> +} GradFunContext;
> +
> +static const uint16_t __attribute__((aligned(16))) pw_7f[8] = {127,127,127,127,127,127,127,127};
> +static const uint16_t __attribute__((aligned(16))) pw_ff[8] = {255,255,255,255,255,255,255,255};
> +static const uint16_t __attribute__((aligned(16))) dither[8][8] = {
> +    {  0, 96, 24,120,  6,102, 30,126 },
> +    { 64, 32, 88, 56, 70, 38, 94, 62 },
> +    { 16,112,  8,104, 22,118, 14,110 },
> +    { 80, 48, 72, 40, 86, 54, 78, 46 },
> +    {  4,100, 28,124,  2, 98, 26,122 },
> +    { 68, 36, 92, 60, 66, 34, 90, 58 },
> +    { 20,116, 12,108, 18,114, 10,106 },
> +    { 84, 52, 76, 44, 82, 50, 74, 42 },
> +};
> +
> +void filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers);
> +void blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int sstride, int width);
> +
> +void filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc,
> +                             int width, int thresh, const uint16_t *dithers);
> +
> +void filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc,
> +                              int width, int thresh, const uint16_t *dithers);
> +
> +void blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1,
> +                           uint8_t *src, int sstride, int width);
> +
> +#endif /* AVFILTER_YADIF_H */
> diff --git a/libavfilter/vf_gradfun.c b/libavfilter/vf_gradfun.c
> new file mode 100644
> index 0000000..369c893
> --- /dev/null
> +++ b/libavfilter/vf_gradfun.c
> @@ -0,0 +1,257 @@
> +/*
> + * copyright (c) 2010 Nolan Lum 
> + *               2009 Loren Merritt <lorenm at u.washignton.edu>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +

> +/**
> + * @file
> + * scale video filter

It's not a scale video filter.

> + */
> +
> +/*
> + * Debanding algorithm (from gradfun2db by prunedtree):
> + * Boxblur.
> + * Foreach pixel, if it's within threshold of the blurred value, make it closer.
> + * So now we have a smoothed and higher bitdepth version of all the shallow
> + * gradients, while leaving detailed areas untouched.
> + * Dither it back to 8bit.
> + */
> +
> +#include "avfilter.h"
> +#include "libavutil/cpu.h"
> +#include "libavutil/pixdesc.h"
> +#include "gradfun.h"

Nit++:
#include "libavutil/cpu.h"
#include "libavutil/pixdesc.h"
#include "avfilter.h"
#include "gradfun.h"

> +
> +void filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers)
> +{
> +    int x;
> +    for (x=0; x<width; x++, dc += x & 1) {
> +        int pix = src[x] << 7;
> +        int delta = dc[0] - pix;
> +        int m = abs(delta) * thresh >> 16;
> +        m = FFMAX(0, 127 - m);
> +        m = m * m * delta >> 14;
> +        pix += m + dithers[x & 7];
> +        dst[x] = av_clip_uint8(pix >> 7);
> +    }
> +}
> +
> +void blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int sstride, int width)
> +{
> +    int x, v, old;
> +    for (x=0; x < width; x++) {
> +        v = buf1[x] + src[2 * x] + src[2 * x + 1] + src[2 * x + sstride] + src[2 * x + 1 + sstride];
> +        old = buf[x];
> +        buf[x] = v;
> +        dc[x] = v - old;
> +    }
> +}
> +
> +static void filter(GradFunContext *ctx, uint8_t *dst, uint8_t *src, int width, int height, int dstride, int sstride, int r)
> +{
> +    int bstride = ((width + 15) & ~15) / 2;
> +    int y;
> +    uint32_t dc_factor = (1 << 21) / (r * r);
> +    uint16_t *dc = ctx->buf + 16;
> +    uint16_t *buf = ctx->buf + bstride + 32;
> +    int thresh = ctx->thresh;
> +
> +    memset(dc, 0, (bstride + 16) * sizeof(*buf));
> +    for (y = 0; y < r; y++)
> +        ctx->blur_line(dc, buf + y * bstride, buf + (y - 1) * bstride, src + 2 * y * sstride, sstride, width / 2);
> +    for (;;) {
> +        if (y < height - r) {
> +            int mod = ((y + r) / 2) % r;
> +            uint16_t *buf0 = buf + mod * bstride;
> +            uint16_t *buf1 = buf + (mod ? mod - 1 : r - 1) * bstride;
> +            int x, v;
> +            ctx->blur_line(dc, buf0, buf1, src + (y + r) * sstride, sstride, width / 2);
> +            for (x = v = 0; x < r; x++)
> +                v += dc[x];
> +            for (; x < width / 2; x++) {
> +                v += dc[x] - dc[x-r];
> +                dc[x-r] = v * dc_factor >> 16;
> +            }
> +            for (; x < (width + r + 1) / 2; x++)
> +                dc[x-r] = v * dc_factor >> 16;
> +            for (x = -r / 2; x < 0; x++)
> +                dc[x] = dc[0];
> +        }
> +        if (y == r) {
> +            for (y = 0; y < r; y++)
> +                ctx->filter_line(dst + y * dstride, src + y * sstride, dc - r / 2, width, thresh, dither[y & 7]);
> +        }
> +        ctx->filter_line(dst + y * dstride, src + y * sstride, dc - r / 2, width, thresh, dither[y & 7]);
> +        if (++y >= height) break;
> +        ctx->filter_line(dst + y * dstride, src + y * sstride, dc - r / 2, width, thresh, dither[y & 7]);
> +        if (++y >= height) break;
> +    }
> +}
> +

> +static inline void memcpy_pic(void* dst, const void* src, int bytesPerLine, int height, int dstStride, int srcStride)
> +{
> +    int i;
> +
> +    if(dstStride == srcStride) {
> +        if (srcStride < 0) {
> +            src = (const uint8_t*) src + (height - 1) * srcStride;
> +            dst = (uint8_t*) dst + (height - 1) * dstStride;
> +            srcStride = -srcStride;
> +        }
> +        memcpy(dst, src, srcStride * height);
> +    } else {
> +        for(i = 0; i < height; i++) {
> +            memcpy(dst, src, bytesPerLine);
> +            src = (const uint8_t*)src + srcStride;
> +            dst = (uint8_t*)dst + dstStride;
> +        }
> +    }
> +}

Maybe av_image_copy_plane() can be used instead.

> +
> +static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
> +{
> +    GradFunContext *gf = ctx->priv;
> +    float thresh = 1.2;
> +    int radius = 16;
> +    av_unused int cpu_flags = av_get_cpu_flags();
> +
> +    if (args) sscanf(args, "%f:%d", &thresh, &radius);
> +    
> +    thresh = av_clipf(thresh, 0.51, 255);
> +    gf->thresh = (1 << 15) / thresh;
> +    gf->radius = av_clip((radius + 1) & ~1, 4, 32);
> +    
> +    av_log(ctx, AV_LOG_INFO, "threshold:%.2f radius:%d\n", thresh, gf->radius);
> +
> +    gf->blur_line = blur_line_c;
> +    gf->filter_line = filter_line_c;
> +
> +#if HAVE_MMX2
> +    if (cpu_flags & AV_CPU_FLAG_MMX2)
> +        gf->filter_line = filter_line_mmx2;

if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2)

check also vf_yadif.c

> +#endif
> +#if HAVE_SSSE3
> +    if (cpu_flags & AV_CPU_FLAG_SSSE3)
> +        gf->filter_line = filter_line_ssse3;
> +#endif /*
> +#if HAVE_SSE2
> +    if (cpu_flags & AV_CPU_FLAG_SSE2)
> +        gf->blur_line = blur_line_sse2;
> +    //no support for sse2 in ./configure.
> +#endif */
> +
> +    return 0;
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    GradFunContext *gf = ctx->priv;
> +    if(gf->buf) av_free(gf->buf);
> +}
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> +    static const enum PixelFormat pix_fmts[] = {
> +        PIX_FMT_YUV410P,            PIX_FMT_YUV420P,
> +        PIX_FMT_GRAY8,              PIX_FMT_NV12,
> +        PIX_FMT_NV21,               PIX_FMT_YUV444P,
> +        PIX_FMT_YUV422P,            PIX_FMT_YUV411P,
> +        PIX_FMT_NONE
> +    };
> +
> +    avfilter_set_common_formats(ctx, avfilter_make_format_list(pix_fmts));
> +
> +    return 0;
> +}
> +
> +static int config_input(AVFilterLink *inlink)
> +{
> +    GradFunContext *gf = inlink->dst->priv;
> +    av_free(gf->buf);
> +    gf->buf = av_mallocz((((inlink->w + 15) & ~15) * (gf->radius + 1) / 2 + 32) * sizeof(uint16_t));
> +    
> +    return !gf->buf;

AVERROR(ENOMEM)

> +}
> +
> +static int config_output(AVFilterLink *outlink)
> +{
> +    AVFilterLink *inlink = outlink->src->inputs[0];
> +    
> +    outlink->w = inlink->w;
> +    outlink->h = inlink->h;
> +    
> +    return 0;
> +}

useless

> +
> +static void end_frame(AVFilterLink *link)

nit: inlink

> +{
> +    GradFunContext *gf = link->dst->priv;
> +    AVFilterBufferRef *inpic = link->cur_buf;
> +    AVFilterBufferRef *outpic = link->dst->outputs[0]->out_buf;
> +    int p;
> +    
> +    for (p = 0; p < 4 && inpic->data[p]; p++) {
> +        int w = link->w;
> +        int h = link->h;
> +        int r = gf->radius;
> +        if (p) {
> +            w = CHROMA_WIDTH(link);
> +            h = CHROMA_HEIGHT(link);
> +            r = av_clip((RADIUS_CHROMA(r, link) + 1) & ~1, 4, 32);
> +        }
> +        
> +        if (FFMIN(w, h) > 2 * r)
> +            filter(gf, outpic->data[p], inpic->data[p], w, h, outpic->linesize[p], inpic->linesize[p], r);

> +        else if (outpic->data[p] != inpic->data[p])

Can this actually happen?

> +            memcpy_pic(outpic->data[p], inpic->data[p], w, h, outpic->linesize[p], inpic->linesize[p]);     
> +    }
> +
> +    avfilter_unref_buffer(inpic);
> +    avfilter_draw_slice(link->dst->outputs[0], 0, link->h, 1);
> +    avfilter_end_frame(link->dst->outputs[0]);
> +    avfilter_unref_buffer(outpic);
> +}
> +
> +static void draw_slice(AVFilterLink *link, int y, int h, int slice_dir)
> +{
> +}
> +
> +AVFilter avfilter_vf_gradfun = {
> +    .name      = "gradfun",

> +    .description = NULL_IF_CONFIG_SMALL("Fast, simple debander using gradients."),

Nit: change this to a sentence (verbal form, rather than a nominal form).

[...]

Thanks for the patch, regards.
-- 
FFmpeg = Fast and Frenzy Most Plastic Everlasting Gymnast