[FFmpeg-devel] [PATCH] lavfi: Add OpenCL avgblur filter

Mark Thompson sw at jkqxz.net
Sun Mar 18 21:16:05 EET 2018


On 18/03/18 12:48, dylanf123 at gmail.com wrote:
> From: drfer3 <drfer3 at student.monash.edu>
> 
> Behaves like the existing avgblur filter, except working on OpenCL
> hardware frames. Takes exactly the same options.
> ---
>  configure                       |   1 +
>  libavfilter/Makefile            |   2 +
>  libavfilter/allfilters.c        |   1 +
>  libavfilter/opencl/avgblur.cl   |  60 ++++++++
>  libavfilter/opencl_source.h     |   1 +
>  libavfilter/vf_avgblur_opencl.c | 318 ++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 383 insertions(+)
>  create mode 100644 libavfilter/opencl/avgblur.cl
>  create mode 100644 libavfilter/vf_avgblur_opencl.c
> 
> diff --git a/configure b/configure
> index 0c5ed07a07..481d338caf 100755
> --- a/configure
> +++ b/configure
> @@ -3202,6 +3202,7 @@ aresample_filter_deps="swresample"
>  ass_filter_deps="libass"
>  atempo_filter_deps="avcodec"
>  atempo_filter_select="rdft"
> +avgblur_opencl_filter_deps="opencl"
>  azmq_filter_deps="libzmq"
>  blackframe_filter_deps="gpl"
>  boxblur_filter_deps="gpl"
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index fc16512e2c..1043b41d80 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -139,6 +139,8 @@ OBJS-$(CONFIG_ALPHAMERGE_FILTER)             += vf_alphamerge.o
>  OBJS-$(CONFIG_ASS_FILTER)                    += vf_subtitles.o
>  OBJS-$(CONFIG_ATADENOISE_FILTER)             += vf_atadenoise.o
>  OBJS-$(CONFIG_AVGBLUR_FILTER)                += vf_avgblur.o
> +OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER)         += vf_avgblur_opencl.o opencl.o \
> +                                                opencl/avgblur.o
>  OBJS-$(CONFIG_BBOX_FILTER)                   += bbox.o vf_bbox.o
>  OBJS-$(CONFIG_BENCH_FILTER)                  += f_bench.o
>  OBJS-$(CONFIG_BITPLANENOISE_FILTER)          += vf_bitplanenoise.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index cc423af738..3f67e321bf 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -149,6 +149,7 @@ static void register_all(void)
>      REGISTER_FILTER(ASS,            ass,            vf);
>      REGISTER_FILTER(ATADENOISE,     atadenoise,     vf);
>      REGISTER_FILTER(AVGBLUR,        avgblur,        vf);
> +    REGISTER_FILTER(AVGBLUR_OPENCL, avgblur_opencl, vf);
>      REGISTER_FILTER(BBOX,           bbox,           vf);
>      REGISTER_FILTER(BENCH,          bench,          vf);
>      REGISTER_FILTER(BITPLANENOISE,  bitplanenoise,  vf);
> diff --git a/libavfilter/opencl/avgblur.cl b/libavfilter/opencl/avgblur.cl
> new file mode 100644
> index 0000000000..28e0c90d15
> --- /dev/null
> +++ b/libavfilter/opencl/avgblur.cl
> @@ -0,0 +1,60 @@
> +/*
> + * Copyright (c) 2018 Dylan Fernando
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +
> +__kernel void avgblur_horiz(__write_only image2d_t dst,
> +                            __read_only  image2d_t src,
> +                            int rad)
> +{
> +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
> +                               CLK_FILTER_NEAREST);
> +    int2 loc = (int2)(get_global_id(0), get_global_id(1));
> +    int2 size = (int2)(get_global_size(0), get_global_size(1));
> +
> +    int count = 0;
> +    float4 acc = (float4)(0,0,0,0);
> +
> +    for (int xx = max(0,loc.x-rad); xx < min(loc.x+rad+1,size.x); xx++) {

Keep to the same style as the rest of the code - spaces around operators.

> +        count++;
> +        acc += read_imagef(src, sampler, (int2)(xx, loc.y));

Setting CLK_ADDRESS_CLAMP_TO_EDGE in the sampler would let you read over the edges and possibly avoid some of the min/max branching?  (I don't know whether that would actually help at all, would need to be measured - feel free to ignore this suggestion.)

> +    }
> +
> +    write_imagef(dst, loc, acc / count);
> +}
> +
> +__kernel void avgblur_vert(__write_only image2d_t dst,
> +                           __read_only  image2d_t src,
> +                           int radv)
> +{
> +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
> +                               CLK_FILTER_NEAREST);
> +    int2 loc = (int2)(get_global_id(0), get_global_id(1));
> +    int2 size = (int2)(get_global_size(0), get_global_size(1));
> +
> +    int count = 0;
> +    float4 acc = (float4)(0,0,0,0);
> +
> +    for (int yy = max(0,loc.y-radv); yy < min(loc.y+radv+1,size.y); yy++) {
> +        count++;
> +        acc += read_imagef(src, sampler, (int2)(loc.x, yy));
> +    }
> +
> +    write_imagef(dst, loc, acc / count);
> +}
> diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
> index 23cdfc6ac9..02bc1723b0 100644
> --- a/libavfilter/opencl_source.h
> +++ b/libavfilter/opencl_source.h
> @@ -19,6 +19,7 @@
>  #ifndef AVFILTER_OPENCL_SOURCE_H
>  #define AVFILTER_OPENCL_SOURCE_H
>  
> +extern const char *ff_opencl_source_avgblur;
>  extern const char *ff_opencl_source_overlay;
>  extern const char *ff_opencl_source_unsharp;
>  
> diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_opencl.c
> new file mode 100644
> index 0000000000..7e866b462a
> --- /dev/null
> +++ b/libavfilter/vf_avgblur_opencl.c
> @@ -0,0 +1,318 @@
> +/*
> + * Copyright (c) 2018 Dylan Fernando
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/common.h"
> +#include "libavutil/imgutils.h"
> +#include "libavutil/mem.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
> +
> +#include "avfilter.h"
> +#include "internal.h"
> +#include "opencl.h"
> +#include "opencl_source.h"
> +#include "video.h"
> +
> +
> +typedef struct AverageBlurOpenCLContext {
> +    OpenCLFilterContext ocf;
> +
> +    int              initialised;
> +    cl_kernel        kernel_horiz;
> +    cl_kernel        kernel_vert;
> +    cl_command_queue command_queue;
> +
> +    int radius;
> +    int radiusV;
> +    int planes;
> +
> +} AverageBlurOpenCLContext;
> +
> +
> +static int avgblur_opencl_init(AVFilterContext *avctx)
> +{
> +    AverageBlurOpenCLContext *ctx = avctx->priv;
> +    cl_int cle;
> +    int err;
> +
> +    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_avgblur, 1);
> +    if (err < 0)
> +        goto fail;
> +
> +    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
> +                                              ctx->ocf.hwctx->device_id,
> +                                              0, &cle);
> +    if (!ctx->command_queue) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL "
> +               "command queue: %d.\n", cle);
> +        err = AVERROR(EIO);
> +        goto fail;
> +    }
> +
> +    ctx->kernel_horiz = clCreateKernel(ctx->ocf.program,"avgblur_horiz", &cle);
> +    if (!ctx->kernel_horiz) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
> +        err = AVERROR(EIO);
> +        goto fail;
> +    }
> +
> +    ctx->kernel_vert = clCreateKernel(ctx->ocf.program,"avgblur_vert", &cle);
> +    if (!ctx->kernel_vert) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
> +        err = AVERROR(EIO);
> +        goto fail;
> +    }
> +
> +    if (ctx->radiusV <= 0) {
> +        ctx->radiusV = ctx->radius;
> +    }
> +
> +    ctx->initialised = 1;
> +    return 0;
> +
> +fail:
> +    if (ctx->command_queue)
> +        clReleaseCommandQueue(ctx->command_queue);
> +    if (ctx->kernel_horiz)
> +        clReleaseKernel(ctx->kernel_horiz);
> +    if (ctx->kernel_vert)
> +        clReleaseKernel(ctx->kernel_vert);
> +    return err;
> +}
> +
> +static int avgblur_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
> +{
> +    AVFilterContext    *avctx = inlink->dst;
> +    AVFilterLink     *outlink = avctx->outputs[0];
> +    AverageBlurOpenCLContext *ctx = avctx->priv;
> +    AVFrame *output = NULL;
> +    cl_int cle;
> +    size_t global_work[2];
> +    cl_mem src, dst;
> +    int err, p;
> +
> +    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
> +           av_get_pix_fmt_name(input->format),
> +           input->width, input->height, input->pts);
> +
> +    if (!input->hw_frames_ctx)
> +        return AVERROR(EINVAL);
> +
> +    if (!ctx->initialised) {
> +        err = avgblur_opencl_init(avctx);
> +        if (err < 0)
> +            goto fail;
> +
> +    }
> +
> +    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> +    if (!output) {
> +        err = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
> +    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
> +        src = (cl_mem) input->data[p];
> +        dst = (cl_mem)output->data[p];
> +
> +        if (!dst)
> +            break;
> +
> +        int radius_x = ctx->radius;
> +        int radius_y = ctx->radiusV;
> +
> +        if (!(ctx->planes & (1 << p))) {
> +            radius_x = 0;
> +            radius_y = 0;
> +        }
> +
> +        cle = clSetKernelArg(ctx->kernel_horiz, 0, sizeof(cl_mem), &dst);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "destination image argument: %d.\n", cle);
> +            goto fail;
> +        }
> +        cle = clSetKernelArg(ctx->kernel_horiz, 1, sizeof(cl_mem), &src);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "source image argument: %d.\n", cle);
> +            goto fail;
> +        }
> +        cle = clSetKernelArg(ctx->kernel_horiz, 2, sizeof(cl_int), &radius_x);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "sizeX argument: %d.\n", cle);
> +            goto fail;
> +        }
> +
> +        global_work[0] = output->width;
> +        global_work[1] = output->height;
> +
> +        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
> +               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
> +               p, global_work[0], global_work[1]);
> +
> +        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_horiz, 2, NULL,
> +                                     global_work, NULL,
> +                                     0, NULL, NULL);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
> +                   cle);
> +            err = AVERROR(EIO);
> +            goto fail;
> +        }
> +
> +        cle = clSetKernelArg(ctx->kernel_vert, 0, sizeof(cl_mem), &dst);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "destination image argument: %d.\n", cle);
> +            goto fail;
> +        }
> +        cle = clSetKernelArg(ctx->kernel_vert, 1, sizeof(cl_mem), &dst);

You can't specify the same image multiple times in the kernel to both read from and write to it like this.

The problem here is that some workitems can end up writing to locations of the image before they have been read by other workitems looking at neighbouring pixels.  That means that the output is not consistent (try it, the output changes between runs), and pixels can be blurred downwards across a much larger distance than the given radius.

To fix that, I think you need to make a temporary image for each plane to be an intermediate between the horizontal and vertical filter steps?  If you don't want to have a temporary image then I don't think the two steps can be done independently.

> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "source image argument: %d.\n", cle);
> +            goto fail;
> +        }
> +        cle = clSetKernelArg(ctx->kernel_vert, 2, sizeof(cl_int), &radius_y);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "sizeY argument: %d.\n", cle);
> +            goto fail;
> +        }
> +
> +        global_work[0] = output->width;
> +        global_work[1] = output->height;
> +
> +        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
> +               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
> +               p, global_work[0], global_work[1]);
> +
> +        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_vert, 2, NULL,
> +                                     global_work, NULL,
> +                                     0, NULL, NULL);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
> +                   cle);
> +            err = AVERROR(EIO);
> +            goto fail;
> +        }
> +
> +    }
> +
> +    cle = clFinish(ctx->command_queue);
> +    if (cle != CL_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue: %d.\n",
> +               cle);
> +        err = AVERROR(EIO);
> +        goto fail;
> +    }
> +
> +    err = av_frame_copy_props(output, input);
> +    if (err < 0)
> +        goto fail;
> +
> +    av_frame_free(&input);
> +
> +    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
> +           av_get_pix_fmt_name(output->format),
> +           output->width, output->height, output->pts);
> +
> +    return ff_filter_frame(outlink, output);
> +
> +fail:
> +    clFinish(ctx->command_queue);
> +    av_frame_free(&input);
> +    av_frame_free(&output);
> +    return err;
> +}
> +
> +static av_cold void avgblur_opencl_uninit(AVFilterContext *avctx)
> +{
> +    AverageBlurOpenCLContext *ctx = avctx->priv;
> +    cl_int cle;
> +
> +
> +    if (ctx->kernel_horiz) {
> +        cle = clReleaseKernel(ctx->kernel_horiz);
> +        if (cle != CL_SUCCESS)
> +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> +                   "kernel: %d.\n", cle);
> +    }
> +
> +    if (ctx->kernel_vert) {
> +        cle = clReleaseKernel(ctx->kernel_vert);
> +        if (cle != CL_SUCCESS)
> +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> +                   "kernel: %d.\n", cle);
> +    }
> +
> +    if (ctx->command_queue) {
> +        cle = clReleaseCommandQueue(ctx->command_queue);
> +        if (cle != CL_SUCCESS)
> +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> +                   "command queue: %d.\n", cle);
> +    }
> +
> +    ff_opencl_filter_uninit(avctx);
> +}
> +
> +#define OFFSET(x) offsetof(AverageBlurOpenCLContext, x)
> +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
> +static const AVOption avgblur_opencl_options[] = {
> +    { "sizeX",  "set horizontal size",  OFFSET(radius),  AV_OPT_TYPE_INT, {.i64=1},   1, 1024, FLAGS },
> +    { "planes", "set planes to filter", OFFSET(planes),  AV_OPT_TYPE_INT, {.i64=0xF}, 0,  0xF, FLAGS },
> +    { "sizeY",  "set vertical size",    OFFSET(radiusV), AV_OPT_TYPE_INT, {.i64=0},   0, 1024, FLAGS },
> +    { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(avgblur_opencl);
> +
> +static const AVFilterPad avgblur_opencl_inputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .filter_frame = &avgblur_opencl_filter_frame,
> +        .config_props = &ff_opencl_filter_config_input,
> +    },
> +    { NULL }
> +};
> +
> +static const AVFilterPad avgblur_opencl_outputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .config_props = &ff_opencl_filter_config_output,
> +    },
> +    { NULL }
> +};
> +
> +AVFilter ff_vf_avgblur_opencl = {
> +    .name           = "avgblur_opencl",
> +    .description    = NULL_IF_CONFIG_SMALL("Apply average blur filter"),
> +    .priv_size      = sizeof(AverageBlurOpenCLContext),
> +    .priv_class     = &avgblur_opencl_class,
> +    .init           = &ff_opencl_filter_init,
> +    .uninit         = &avgblur_opencl_uninit,
> +    .query_formats  = &ff_opencl_filter_query_formats,
> +    .inputs         = avgblur_opencl_inputs,
> +    .outputs        = avgblur_opencl_outputs,
> +    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> +};
> 

Rest looks good to me; also tested and works well (ignoring than the inconsistent results from the vertical filtering step).

Thanks,

- Mark


More information about the ffmpeg-devel mailing list