[FFmpeg-devel] [PATCH]libavfilter: deshake opencl based on comments on 20130402 2nd
Stefano Sabatini
stefasab at gmail.com
Tue Apr 2 14:14:26 CEST 2013
On date Tuesday 2013-04-02 19:30:44 +0800, Wei Gao encoded:
>
> From 2f095498919ee8984097cde50df11d7770c6a315 Mon Sep 17 00:00:00 2001
> From: highgod0401 <highgod0401 at gmail.com>
> Date: Tue, 2 Apr 2013 19:28:09 +0800
> Subject: [PATCH] deshake opencl based on comments on 20130402 2nd
>
> ---
> doc/filters.texi | 6 +-
> libavfilter/Makefile | 2 +
> libavfilter/allfilters.c | 2 +
> libavfilter/deshake.h | 104 +++++++++++++++++++
> libavfilter/deshake_kernel.h | 215 ++++++++++++++++++++++++++++++++++++++++
> libavfilter/deshake_opencl.c | 180 +++++++++++++++++++++++++++++++++
> libavfilter/deshake_opencl.h | 39 ++++++++
> libavfilter/opencl_allkernels.c | 39 ++++++++
> libavfilter/opencl_allkernels.h | 29 ++++++
> libavfilter/vf_deshake.c | 117 +++++++++++-----------
> 10 files changed, 675 insertions(+), 58 deletions(-)
> create mode 100644 libavfilter/deshake.h
> create mode 100644 libavfilter/deshake_kernel.h
> create mode 100644 libavfilter/deshake_opencl.c
> create mode 100644 libavfilter/deshake_opencl.h
> create mode 100644 libavfilter/opencl_allkernels.c
> create mode 100644 libavfilter/opencl_allkernels.h
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 2c82ac3..401125b 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -2504,7 +2504,7 @@ tripod, moving on a vehicle, etc.
> The filter accepts parameters as a list of @var{key}=@var{value}
> pairs, separated by ":". If the key of the first options is omitted,
> the arguments are interpreted according to the syntax
> - at var{x}:@var{y}:@var{w}:@var{h}:@var{rx}:@var{ry}:@var{edge}:@var{blocksize}:@var{contrast}:@var{search}:@var{filename}.
> + at var{x}:@var{y}:@var{w}:@var{h}:@var{rx}:@var{ry}:@var{edge}:@var{blocksize}:@var{contrast}:@var{search}:@var{filename}:@var{opencl}.
>
> A description of the accepted parameters follows.
>
> @@ -2570,6 +2570,10 @@ Default value is @samp{exhaustive}.
> If set then a detailed log of the motion search is written to the
> specified file.
>
> + at item opencl
> +If set to 1, specify using OpenCL capabilities, only available if
> +FFmpeg was configured with @code{--enable-opencl}. Default value is 0.
> +
> @end table
>
> @section drawbox
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 690b1cb..e865aef 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -40,6 +40,7 @@ OBJS = allfilters.o \
> formats.o \
> graphdump.o \
> graphparser.o \
> + opencl_allkernels.o \
> transform.o \
> video.o \
>
> @@ -139,6 +140,7 @@ OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o
> OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o
> OBJS-$(CONFIG_NULL_FILTER) += vf_null.o
> OBJS-$(CONFIG_OCV_FILTER) += vf_libopencv.o
> +OBJS-$(CONFIG_OPENCL) += deshake_opencl.o
> OBJS-$(CONFIG_OVERLAY_FILTER) += vf_overlay.o
> OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o
> OBJS-$(CONFIG_PERMS_FILTER) += f_perms.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 45a67e5..4ca180a 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -21,6 +21,7 @@
>
> #include "avfilter.h"
> #include "config.h"
> +#include "opencl_allkernels.h"
>
>
> #define REGISTER_FILTER(X, x, y) \
> @@ -199,4 +200,5 @@ void avfilter_register_all(void)
> REGISTER_FILTER_UNCONDITIONAL(vsink_buffer);
> REGISTER_FILTER_UNCONDITIONAL(af_afifo);
> REGISTER_FILTER_UNCONDITIONAL(vf_fifo);
> + ff_opencl_register_filter_kernel_code_all();
> }
> diff --git a/libavfilter/deshake.h b/libavfilter/deshake.h
> new file mode 100644
> index 0000000..c24090e
> --- /dev/null
> +++ b/libavfilter/deshake.h
> @@ -0,0 +1,104 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_DESHAKE_H
> +#define AVFILTER_DESHAKE_H
> +
> +#include "config.h"
> +#include "avfilter.h"
> +#include "libavcodec/dsputil.h"
> +#include "transform.h"
> +#if CONFIG_OPENCL
> +#include "libavutil/opencl.h"
> +#endif
> +
> +
> +enum SearchMethod {
> + EXHAUSTIVE, ///< Search all possible positions
> + SMART_EXHAUSTIVE, ///< Search most possible positions (faster)
> + SEARCH_COUNT
> +};
> +
> +typedef struct {
> + int x; ///< Horizontal shift
> + int y; ///< Vertical shift
> +} IntMotionVector;
> +
> +typedef struct {
> + double x; ///< Horizontal shift
> + double y; ///< Vertical shift
> +} MotionVector;
> +
> +typedef struct {
> + MotionVector vector; ///< Motion vector
> + double angle; ///< Angle of rotation
> + double zoom; ///< Zoom percentage
> +} Transform;
> +
> +#if CONFIG_OPENCL
> +
> +typedef struct {
> + size_t matrix_size;
> + float matrix_y[9];
> + float matrix_uv[9];
> + cl_mem cl_matrix_y;
> + cl_mem cl_matrix_uv;
> + int in_plane_size[8];
> + int out_plane_size[8];
> + int plane_num;
> + cl_mem cl_inbuf;
> + size_t cl_inbuf_size;
> + cl_mem cl_outbuf;
> + size_t cl_outbuf_size;
> + AVOpenCLKernelEnv kernel_env;
> +} DeshakeOpenclContext;
> +
> +#endif
> +
> +typedef struct {
> + const AVClass *class;
> + AVFrame *ref; ///< Previous frame
> + int rx; ///< Maximum horizontal shift
> + int ry; ///< Maximum vertical shift
> + int edge; ///< Edge fill method
> + int blocksize; ///< Size of blocks to compare
> + int contrast; ///< Contrast threshold
> + int search; ///< Motion search method
> + AVCodecContext *avctx;
> + DSPContext c; ///< Context providing optimized SAD methods
> + Transform last; ///< Transform from last frame
> + int refcount; ///< Number of reference frames (defines averaging window)
> + FILE *fp;
> + Transform avg;
> + int cw; ///< Crop motion search to this box
> + int ch;
> + int cx;
> + int cy;
> + char *filename; ///< Motion search detailed log filename
> + int opencl;
> +#if CONFIG_OPENCL
> + DeshakeOpenclContext opencl_ctx;
> +#endif
> + int (* transform)(AVFilterContext *ctx, int width, int height, int cw, int ch,
> + const float *matrix_y, const float *matrix_uv, enum InterpolateMethod interpolate,
> + enum FillMethod fill, AVFrame *in, AVFrame *out);
> +} DeshakeContext;
> +
> +#endif /* AVFILTER_DESHAKE_H */
> diff --git a/libavfilter/deshake_kernel.h b/libavfilter/deshake_kernel.h
> new file mode 100644
> index 0000000..0adddb6
> --- /dev/null
> +++ b/libavfilter/deshake_kernel.h
> @@ -0,0 +1,215 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_DESHAKE_KERNEL_H
> +#define AVFILTER_DESHAKE_KERNEL_H
> +
> +#include "libavutil/opencl.h"
> +
> +const char *ff_kernel_deshake_opencl = AV_OPENCL_KERNEL(
> +
> +inline unsigned char pixel(global const unsigned char *src, float x, float y,int w, int h,int stride, unsigned char def)
> +{
> + return (x < 0 || y < 0 || x >= w || y >= h) ? def : src[(int)x + (int)y * stride];
> +}
> +unsigned char interpolate_nearest(float x, float y, global const unsigned char *src,
> + int width, int height, int stride, unsigned char def)
> +{
> + return pixel(src, (int)(x + 0.5), (int)(y + 0.5), width, height, stride, def);
> +}
> +
> +unsigned char interpolate_bilinear(float x, float y, global const unsigned char *src,
> + int width, int height, int stride, unsigned char def)
Nit, here and below:
unsigned char interpolate_bilinear(float x, float y, global const unsigned char *src,
int width, int height, int stride, unsigned char def)
The second line should be aligned to the first argument in the first
line.
> +{
> + int x_c, x_f, y_c, y_f;
> + int v1, v2, v3, v4;
> +
> + if (x < -1 || x > width || y < -1 || y > height) {
> + return def;
> + } else {
> + x_f = (int)x;
> + x_c = x_f + 1;
> +
> + y_f = (int)y;
> + y_c = y_f + 1;
> +
> + v1 = pixel(src, x_c, y_c, width, height, stride, def);
> + v2 = pixel(src, x_c, y_f, width, height, stride, def);
> + v3 = pixel(src, x_f, y_c, width, height, stride, def);
> + v4 = pixel(src, x_f, y_f, width, height, stride, def);
> +
> + return (v1*(x - x_f)*(y - y_f) + v2*((x - x_f)*(y_c - y)) +
> + v3*(x_c - x)*(y - y_f) + v4*((x_c - x)*(y_c - y)));
> + }
> +}
> +
> +unsigned char interpolate_biquadratic(float x, float y, global const unsigned char *src,
> + int width, int height, int stride, unsigned char def)
> +{
> + int x_c, x_f, y_c, y_f;
> + unsigned char v1, v2, v3, v4;
> + float f1, f2, f3, f4;
> +
> + if (x < - 1 || x > width || y < -1 || y > height)
> + return def;
> + else {
> + x_f = (int)x;
> + x_c = x_f + 1;
> + y_f = (int)y;
> + y_c = y_f + 1;
> +
> + v1 = pixel(src, x_c, y_c, width, height, stride, def);
> + v2 = pixel(src, x_c, y_f, width, height, stride, def);
> + v3 = pixel(src, x_f, y_c, width, height, stride, def);
> + v4 = pixel(src, x_f, y_f, width, height, stride, def);
> +
> + f1 = 1 - sqrt((x_c - x) * (y_c - y));
> + f2 = 1 - sqrt((x_c - x) * (y - y_f));
> + f3 = 1 - sqrt((x - x_f) * (y_c - y));
> + f4 = 1 - sqrt((x - x_f) * (y - y_f));
> + return (v1 * f1 + v2 * f2 + v3 * f3 + v4 * f4) / (f1 + f2 + f3 + f4);
> + }
> +}
> +
> +inline const float clipf(float a, float amin, float amax)
> +{
> + if (a < amin) return amin;
> + else if (a > amax) return amax;
> + else return a;
> +}
> +
> +inline int mirror(int v, int m)
> +{
> + while ((unsigned)v > (unsigned)m) {
> + v = -v;
> + if (v < 0)
> + v += 2 * m;
> + }
> + return v;
> +}
> +
> +kernel void avfilter_transform(global unsigned char *src,
> + global unsigned char *dst,
> + global float *matrix,
> + global float *matrix2,
> + int interpolate,
> + int fillmethod,
> + int src_stride_lu,
> + int dst_stride_lu,
> + int src_stride_ch,
> + int dst_stride_ch,
> + int height,
> + int width,
> + int ch,
> + int cw)
> +{
> + int global_id = get_global_id(0);
> +
> + global unsigned char *dst_y = dst;
> + global unsigned char *dst_u = dst_y + height * dst_stride_lu;
> + global unsigned char *dst_v = dst_u + ch * dst_stride_ch;
> +
> + global unsigned char *src_y = src;
> + global unsigned char *src_u = src_y + height * src_stride_lu;
> + global unsigned char *src_v = src_u + ch * src_stride_ch;
> +
> + global unsigned char *tempdst;
> + global unsigned char *tempsrc;
> +
> + int x;
> + int y;
> + float x_s;
> + float y_s;
> + int tempsrc_stride;
> + int tempdst_stride;
> + int temp_height;
> + int temp_width;
> + int curpos;
> + unsigned char def;
needs to be set to 0, like in transform.c...
> + if (global_id < width*height) {
> + y = global_id/width;
> + x = global_id%width;
> + x_s = x * matrix[0] + y * matrix[1] + matrix[2];
> + y_s = x * matrix[3] + y * matrix[4] + matrix[5];
> + tempdst = dst_y;
> + tempsrc = src_y;
> + tempsrc_stride = src_stride_lu;
> + tempdst_stride = dst_stride_lu;
> + temp_height = height;
> + temp_width = width;
> + } else if ((global_id >= width*height)&&(global_id < width*height + ch*cw)) {
> + y = (global_id - width*height)/cw;
> + x = (global_id - width*height)%cw;
> + x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2];
> + y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5];
> + tempdst = dst_u;
> + tempsrc = src_u;
> + tempsrc_stride = src_stride_ch;
> + tempdst_stride = dst_stride_ch;
> + temp_height = height;
> + temp_width = width;
> + temp_height = ch;
> + temp_width = cw;
> + } else {
> + y = (global_id - width*height - ch*cw)/cw;
> + x = (global_id - width*height - ch*cw)%cw;
> + x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2];
> + y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5];
> + tempdst = dst_v;
> + tempsrc = src_v;
> + tempsrc_stride = src_stride_ch;
> + tempdst_stride = dst_stride_ch;
> + temp_height = ch;
> + temp_width = cw;
> + }
> + curpos = y * tempdst_stride + x;
> + switch (fillmethod) {
... or you add a specific case:
case 0; //FILL_BLANK
def = 0;
break;
> + case 1: //FILL_ORIGINAL
> + def = tempsrc[y*tempsrc_stride+x];
> + break;
> + case 2: //FILL_CLAMP
> + y_s = clipf(y_s, 0, temp_height - 1);
> + x_s = clipf(x_s, 0, temp_width - 1);
> + def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s];
> + break;
> + case 3: //FILL_MIRROR
> + y_s = mirror(y_s,temp_height - 1);
> + x_s = mirror(x_s,temp_width - 1);
> + def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s];
> + break;
> + }
weird indent (it should be aligned to the switch()
> + switch (interpolate) {
> + case 0: //INTERPOLATE_NEAREST
> + tempdst[curpos] = interpolate_nearest(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
> + break;
> + case 1: //INTERPOLATE_BILINEAR
> + tempdst[curpos] = interpolate_bilinear(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
> + break;
> + case 2: //INTERPOLATE_BIQUADRATIC
> + tempdst[curpos] = interpolate_biquadratic(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
> + break;
> + default:
> + return;
> + }
> +}
> +);
> +
> +#endif /* AVFILTER_DESHAKE_KERNEL_H */
> diff --git a/libavfilter/deshake_opencl.c b/libavfilter/deshake_opencl.c
> new file mode 100644
> index 0000000..6ed8a60
> --- /dev/null
> +++ b/libavfilter/deshake_opencl.c
> @@ -0,0 +1,180 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * transform input video
> + */
> +
> +#include "libavutil/common.h"
> +#include "libavutil/dict.h"
> +#include "libavutil/pixdesc.h"
> +#include "deshake_opencl.h"
> +
> +#define MATRIX_SIZE 6
> +#define PLANE_NUM 3
> +
> +#define TRANSFORM_OPENCL_CHECK(method, ...) \
> + status = method(__VA_ARGS__); \
> + if (status != CL_SUCCESS) { \
> + av_log(ctx, AV_LOG_ERROR, "error %s %d\n", # method, status); \
> + return AVERROR_EXTERNAL; \
> + }
> +
> +#define TRANSFORM_OPENCL_SET_KERNEL_ARG(arg_ptr) \
> + status = clSetKernelArg((kernel),(arg_no++),(sizeof(arg_ptr)),(void*)(&(arg_ptr))); \
> + if (status != CL_SUCCESS) { \
> + av_log(ctx, AV_LOG_ERROR, "cannot set kernel argument: %d\n", status ); \
> + return AVERROR_EXTERNAL; \
> + }
> +
> +int ff_opencl_transform(AVFilterContext *ctx,
> + int width, int height, int cw, int ch,
> + const float *matrix_y, const float *matrix_uv,
> + enum InterpolateMethod interpolate,
> + enum FillMethod fill, AVFrame *in, AVFrame *out)
> +{
> + int arg_no, ret = 0;
> + const size_t global_work_size = width * height + 2 * ch * cw;
> + cl_kernel kernel;
> + cl_int status;
> + DeshakeContext *deshake = ctx->priv;
> + ret = av_opencl_buffer_write(deshake->opencl_ctx.cl_matrix_y, (uint8_t *)matrix_y, deshake->opencl_ctx.matrix_size * sizeof(cl_float));
> + if (ret < 0)
> + return ret;
> + ret = av_opencl_buffer_write(deshake->opencl_ctx.cl_matrix_uv, (uint8_t *)matrix_uv, deshake->opencl_ctx.matrix_size * sizeof(cl_float));
> + if (ret < 0)
> + return ret;
> + kernel = deshake->opencl_ctx.kernel_env.kernel;
> + arg_no = 0;
> +
> + if((unsigned int)interpolate > INTERPOLATE_BIQUADRATIC) {
nit++: if_((
> + av_log(ctx, AV_LOG_ERROR, "Selected interpolate method is invalid\n");
> + return AVERROR(EINVAL);
> + }
> + TRANSFORM_OPENCL_SET_KERNEL_ARG(deshake->opencl_ctx.cl_inbuf);
> + TRANSFORM_OPENCL_SET_KERNEL_ARG(deshake->opencl_ctx.cl_outbuf);
> + TRANSFORM_OPENCL_SET_KERNEL_ARG(deshake->opencl_ctx.cl_matrix_y);
> + TRANSFORM_OPENCL_SET_KERNEL_ARG(deshake->opencl_ctx.cl_matrix_uv);
> + TRANSFORM_OPENCL_SET_KERNEL_ARG(interpolate);
> + TRANSFORM_OPENCL_SET_KERNEL_ARG(fill);
> + TRANSFORM_OPENCL_SET_KERNEL_ARG(in->linesize[0]);
> + TRANSFORM_OPENCL_SET_KERNEL_ARG(out->linesize[0]);
> + TRANSFORM_OPENCL_SET_KERNEL_ARG(in->linesize[1]);
> + TRANSFORM_OPENCL_SET_KERNEL_ARG(out->linesize[1]);
> + TRANSFORM_OPENCL_SET_KERNEL_ARG(height);
> + TRANSFORM_OPENCL_SET_KERNEL_ARG(width);
> + TRANSFORM_OPENCL_SET_KERNEL_ARG(ch);
> + TRANSFORM_OPENCL_SET_KERNEL_ARG(cw);
> + TRANSFORM_OPENCL_CHECK(clEnqueueNDRangeKernel, deshake->opencl_ctx.kernel_env.command_queue, deshake->opencl_ctx.kernel_env.kernel, 1, NULL,
> + &global_work_size, NULL, 0, NULL, NULL);
> + clFinish(deshake->opencl_ctx.kernel_env.command_queue);
> + ret = av_opencl_buffer_read_image(out->data, deshake->opencl_ctx.out_plane_size,
> + deshake->opencl_ctx.plane_num, deshake->opencl_ctx.cl_outbuf,
> + deshake->opencl_ctx.cl_outbuf_size);
> + if (ret < 0)
> + return ret;
> + return ret;
> +}
> +
> +int ff_opencl_deshake_init(AVFilterContext *ctx)
> +{
> + int ret = 0;
> + DeshakeContext *deshake = ctx->priv;
> + AVDictionary *options = NULL;
> + av_dict_set(&options, "build_options", "-I.", 0);
> + ret = av_opencl_init(options, NULL);
> + av_dict_free(&options);
> + if (ret < 0)
> + return ret;
> + deshake->opencl_ctx.matrix_size = MATRIX_SIZE;
> + deshake->opencl_ctx.plane_num = PLANE_NUM;
> + ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_matrix_y,
> + deshake->opencl_ctx.matrix_size*sizeof(cl_float), CL_MEM_READ_ONLY, NULL);
> + if (ret < 0)
> + return ret;
> + ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_matrix_uv,
> + deshake->opencl_ctx.matrix_size*sizeof(cl_float), CL_MEM_READ_ONLY, NULL);
> + if (ret < 0)
> + return ret;
> + if (!deshake->opencl_ctx.kernel_env.kernel) {
> + ret = av_opencl_create_kernel(&deshake->opencl_ctx.kernel_env, "avfilter_transform");
> + if (ret < 0) {
> + av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel for name 'avfilter_transform'\n");
> + return ret;
> + }
> + }
> + return ret;
> +}
> +
> +void ff_opencl_deshake_uninit(AVFilterContext *ctx)
> +{
> + DeshakeContext *deshake = ctx->priv;
> + av_opencl_buffer_release(&deshake->opencl_ctx.cl_inbuf);
> + av_opencl_buffer_release(&deshake->opencl_ctx.cl_outbuf);
> + av_opencl_buffer_release(&deshake->opencl_ctx.cl_matrix_y);
> + av_opencl_buffer_release(&deshake->opencl_ctx.cl_matrix_uv);
> + av_opencl_release_kernel(&deshake->opencl_ctx.kernel_env);
> + av_opencl_uninit();
> +}
> +
> +
> +int ff_opencl_deshake_process_inout_buf(AVFilterLink *link, AVFrame *in, AVFrame *out)
> +{
I think it is better if you pass the filter context like you was doing
in your previous patch, and get the link as: link = ctx->inputs[0];
Alternatively you can store the chroma subsampling in the context
during configuration.
> + int ret = 0;
> + DeshakeContext *deshake = link->dst->priv;
> + int chroma_height = -((-link->h) >> av_pix_fmt_desc_get(link->format)->log2_chroma_h);
> +
> + if ((!deshake->opencl_ctx.cl_inbuf) || (!deshake->opencl_ctx.cl_outbuf)) {
> + deshake->opencl_ctx.in_plane_size[0] = (in->linesize[0] * in->height);
> + deshake->opencl_ctx.in_plane_size[1] = (in->linesize[1] * chroma_height);
> + deshake->opencl_ctx.in_plane_size[2] = (in->linesize[2] * chroma_height);
> + deshake->opencl_ctx.out_plane_size[0] = (out->linesize[0] * out->height);
> + deshake->opencl_ctx.out_plane_size[1] = (out->linesize[1] * chroma_height);
> + deshake->opencl_ctx.out_plane_size[2] = (out->linesize[2] * chroma_height);
> + deshake->opencl_ctx.cl_inbuf_size = deshake->opencl_ctx.in_plane_size[0] +
> + deshake->opencl_ctx.in_plane_size[1] +
> + deshake->opencl_ctx.in_plane_size[2];
> + deshake->opencl_ctx.cl_outbuf_size = deshake->opencl_ctx.out_plane_size[0] +
> + deshake->opencl_ctx.out_plane_size[1] +
> + deshake->opencl_ctx.out_plane_size[2];
> + if (!deshake->opencl_ctx.cl_inbuf) {
> + ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_inbuf,
> + deshake->opencl_ctx.cl_inbuf_size,
> + CL_MEM_READ_ONLY, NULL);
> + if (ret < 0)
> + return ret;
> + }
> + if (!deshake->opencl_ctx.cl_outbuf) {
> + ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_outbuf,
> + deshake->opencl_ctx.cl_outbuf_size,
> + CL_MEM_READ_WRITE, NULL);
> + if (ret < 0)
> + return ret;
> + }
> + }
> + ret = av_opencl_buffer_write_image(deshake->opencl_ctx.cl_inbuf,
> + deshake->opencl_ctx.cl_inbuf_size,
> + 0, in->data,deshake->opencl_ctx.in_plane_size,
> + deshake->opencl_ctx.plane_num);
> + if(ret < 0)
> + return ret;
> + return ret;
> +}
> diff --git a/libavfilter/deshake_opencl.h b/libavfilter/deshake_opencl.h
> new file mode 100644
> index 0000000..69f6e4e
> --- /dev/null
> +++ b/libavfilter/deshake_opencl.h
> @@ -0,0 +1,39 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_DESHAKE_OPENCL_H
> +#define AVFILTER_DESHAKE_OPENCL_H
> +
> +#include "deshake.h"
> +
> +int ff_opencl_deshake_init(AVFilterContext *ctx);
> +
> +void ff_opencl_deshake_uninit(AVFilterContext *ctx);
> +
> +int ff_opencl_deshake_process_inout_buf(AVFilterLink *link, AVFrame *in, AVFrame *out);
> +
> +int ff_opencl_transform(AVFilterContext *ctx,
> + int width, int height, int cw, int ch,
> + const float *matrix_y, const float *matrix_uv,
> + enum InterpolateMethod interpolate,
> + enum FillMethod fill, AVFrame *in, AVFrame *out);
> +
> +#endif /* AVFILTER_DESHAKE_OPENCL_H */
> +
> diff --git a/libavfilter/opencl_allkernels.c b/libavfilter/opencl_allkernels.c
> new file mode 100644
> index 0000000..021eec2
> --- /dev/null
> +++ b/libavfilter/opencl_allkernels.c
> @@ -0,0 +1,39 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "opencl_allkernels.h"
> +#if CONFIG_OPENCL
> +#include "libavutil/opencl.h"
> +#include "deshake_kernel.h"
> +#endif
> +
> +#define OPENCL_REGISTER_KERNEL_CODE(X, x) \
> + { \
> + if (CONFIG_##X##_FILTER) { \
> + av_opencl_register_kernel_code(ff_kernel_##x##_opencl); \
> + } \
> + }
> +
> +void ff_opencl_register_filter_kernel_code_all(void)
> +{
> + #if CONFIG_OPENCL
> + OPENCL_REGISTER_KERNEL_CODE(DESHAKE, deshake);
> + #endif
> +}
> diff --git a/libavfilter/opencl_allkernels.h b/libavfilter/opencl_allkernels.h
> new file mode 100644
> index 0000000..aca02e0
> --- /dev/null
> +++ b/libavfilter/opencl_allkernels.h
> @@ -0,0 +1,29 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_OPENCL_ALLKERNEL_H
> +#define AVFILTER_OPENCL_ALLKERNEL_H
> +
> +#include "avfilter.h"
> +#include "config.h"
> +
> +void ff_opencl_register_filter_kernel_code_all(void);
> +
> +#endif /* AVFILTER_OPENCL_ALLKERNEL_H */
> diff --git a/libavfilter/vf_deshake.c b/libavfilter/vf_deshake.c
> index 2740bba..56b06a7 100644
> --- a/libavfilter/vf_deshake.c
> +++ b/libavfilter/vf_deshake.c
> @@ -59,55 +59,12 @@
> #include "libavutil/pixdesc.h"
> #include "libavcodec/dsputil.h"
>
> -#include "transform.h"
> +#include "deshake.h"
> +#include "deshake_opencl.h"
>
> #define CHROMA_WIDTH(link) -((-link->w) >> av_pix_fmt_desc_get(link->format)->log2_chroma_w)
> #define CHROMA_HEIGHT(link) -((-link->h) >> av_pix_fmt_desc_get(link->format)->log2_chroma_h)
>
> -enum SearchMethod {
> - EXHAUSTIVE, ///< Search all possible positions
> - SMART_EXHAUSTIVE, ///< Search most possible positions (faster)
> - SEARCH_COUNT
> -};
> -
> -typedef struct {
> - int x; ///< Horizontal shift
> - int y; ///< Vertical shift
> -} IntMotionVector;
> -
> -typedef struct {
> - double x; ///< Horizontal shift
> - double y; ///< Vertical shift
> -} MotionVector;
> -
> -typedef struct {
> - MotionVector vector; ///< Motion vector
> - double angle; ///< Angle of rotation
> - double zoom; ///< Zoom percentage
> -} Transform;
> -
> -typedef struct {
> - const AVClass *class;
> - AVFrame *ref; ///< Previous frame
> - int rx; ///< Maximum horizontal shift
> - int ry; ///< Maximum vertical shift
> - int edge; ///< Edge fill method
> - int blocksize; ///< Size of blocks to compare
> - int contrast; ///< Contrast threshold
> - int search; ///< Motion search method
> - AVCodecContext *avctx;
> - DSPContext c; ///< Context providing optimized SAD methods
> - Transform last; ///< Transform from last frame
> - int refcount; ///< Number of reference frames (defines averaging window)
> - FILE *fp;
> - Transform avg;
> - int cw; ///< Crop motion search to this box
> - int ch;
> - int cx;
> - int cy;
> - char *filename; ///< Motion search detailed log filename
> -} DeshakeContext;
> -
> #define OFFSET(x) offsetof(DeshakeContext, x)
> #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
>
> @@ -129,6 +86,7 @@ static const AVOption deshake_options[] = {
> { "exhaustive", "exhaustive search", 0, AV_OPT_TYPE_CONST, {.i64=EXHAUSTIVE}, INT_MIN, INT_MAX, FLAGS, "smode" },
> { "less", "less exhaustive search", 0, AV_OPT_TYPE_CONST, {.i64=SMART_EXHAUSTIVE}, INT_MIN, INT_MAX, FLAGS, "smode" },
> { "filename", "set motion search detailed log file name", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
> + { "opencl", "use OpenCL filtering capabilities", OFFSET(opencl), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, .flags = FLAGS },
> { NULL }
> };
>
> @@ -360,8 +318,35 @@ static void find_motion(DeshakeContext *deshake, uint8_t *src1, uint8_t *src2,
> av_free(angles);
> }
>
> +static int deshake_transform_c(AVFilterContext *ctx,
> + int width, int height, int cw, int ch,
> + const float *matrix_y, const float *matrix_uv,
> + enum InterpolateMethod interpolate,
> + enum FillMethod fill, AVFrame *in, AVFrame *out)
> +{
> + int i = 0, ret = 0;
> + const float *matrixs[3];
> + int plane_w[3], plane_h[3];
> + matrixs[0] = matrix_y;
> + matrixs[1] = matrixs[2] = matrix_uv;
> + plane_w[0] = width;
> + plane_w[1] = plane_w[2] = cw;
> + plane_h[0] = height;
> + plane_h[1] = plane_h[2] = ch;
> +
> + for (i = 0; i < 3; i++) {
> + // Transform the luma and chroma planes
> + ret = avfilter_transform(in->data[i], out->data[i], in->linesize[i], out->linesize[i],
> + plane_w[i], plane_h[i], matrixs[i], interpolate, fill);
> + if (ret < 0)
> + return ret;
> + }
> + return ret;
> +}
> +
> static av_cold int init(AVFilterContext *ctx, const char *args)
> {
> + int ret;
> DeshakeContext *deshake = ctx->priv;
>
> deshake->refcount = 20; // XXX: add to options?
> @@ -379,7 +364,18 @@ static av_cold int init(AVFilterContext *ctx, const char *args)
> deshake->cw += deshake->cx - (deshake->cx & ~15);
> deshake->cx &= ~15;
> }
> + deshake->transform = deshake_transform_c;
> + if (!CONFIG_OPENCL && deshake->opencl) {
> + av_log(ctx, AV_LOG_ERROR, "OpenCL support was not enabled in this build, cannot be selected\n");
> + return AVERROR(EINVAL);
> + }
>
> + if (deshake->opencl && CONFIG_OPENCL) {
> + deshake->transform = ff_opencl_transform;
> + ret = ff_opencl_deshake_init(ctx);
> + if (ret < 0)
> + return ret;
> + }
> av_log(ctx, AV_LOG_VERBOSE, "cx: %d, cy: %d, cw: %d, ch: %d, rx: %d, ry: %d, edge: %d blocksize: %d contrast: %d search: %d\n",
> deshake->cx, deshake->cy, deshake->cw, deshake->ch,
> deshake->rx, deshake->ry, deshake->edge, deshake->blocksize * 2, deshake->contrast, deshake->search);
> @@ -419,7 +415,9 @@ static int config_props(AVFilterLink *link)
> static av_cold void uninit(AVFilterContext *ctx)
> {
> DeshakeContext *deshake = ctx->priv;
> -
> + if (deshake->opencl && CONFIG_OPENCL) {
> + ff_opencl_deshake_uninit(ctx);
> + }
> av_frame_free(&deshake->ref);
> if (deshake->fp)
> fclose(deshake->fp);
> @@ -434,9 +432,10 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
> AVFilterLink *outlink = link->dst->outputs[0];
> AVFrame *out;
> Transform t = {{0},0}, orig = {{0},0};
> - float matrix[9];
> + float matrix_y[9], matrix_uv[9];
> float alpha = 2.0 / deshake->refcount;
> char tmp[256];
> + int ret = 0;
>
> out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> if (!out) {
> @@ -445,6 +444,12 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
> }
> av_frame_copy_props(out, in);
>
> + if (deshake->opencl && CONFIG_OPENCL) {
> + ret = ff_opencl_deshake_process_inout_buf(link,in, out);
> + if (ret < 0)
> + return ret;
> + }
> +
> if (deshake->cx < 0 || deshake->cy < 0 || deshake->cw < 0 || deshake->ch < 0) {
> // Find the most likely global motion for the current frame
> find_motion(deshake, (deshake->ref == NULL) ? in->data[0] : deshake->ref->data[0], in->data[0], link->w, link->h, in->linesize[0], &t);
> @@ -517,21 +522,19 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
> deshake->last.zoom = t.zoom;
>
> // Generate a luma transformation matrix
> - avfilter_get_matrix(t.vector.x, t.vector.y, t.angle, 1.0 + t.zoom / 100.0, matrix);
> -
> - // Transform the luma plane
> - avfilter_transform(in->data[0], out->data[0], in->linesize[0], out->linesize[0], link->w, link->h, matrix, INTERPOLATE_BILINEAR, deshake->edge);
> -
> + avfilter_get_matrix(t.vector.x, t.vector.y, t.angle, 1.0 + t.zoom / 100.0, matrix_y);
> // Generate a chroma transformation matrix
> - avfilter_get_matrix(t.vector.x / (link->w / CHROMA_WIDTH(link)), t.vector.y / (link->h / CHROMA_HEIGHT(link)), t.angle, 1.0 + t.zoom / 100.0, matrix);
> -
> - // Transform the chroma planes
> - avfilter_transform(in->data[1], out->data[1], in->linesize[1], out->linesize[1], CHROMA_WIDTH(link), CHROMA_HEIGHT(link), matrix, INTERPOLATE_BILINEAR, deshake->edge);
> - avfilter_transform(in->data[2], out->data[2], in->linesize[2], out->linesize[2], CHROMA_WIDTH(link), CHROMA_HEIGHT(link), matrix, INTERPOLATE_BILINEAR, deshake->edge);
> + avfilter_get_matrix(t.vector.x / (link->w / CHROMA_WIDTH(link)), t.vector.y / (link->h / CHROMA_HEIGHT(link)), t.angle, 1.0 + t.zoom / 100.0, matrix_uv);
> + // Transform the luma and chroma planes
> + ret = deshake->transform(link->dst, link->w, link->h, CHROMA_WIDTH(link), CHROMA_HEIGHT(link),
> + matrix_y, matrix_uv, INTERPOLATE_BILINEAR, deshake->edge, in, out);
>
> // Cleanup the old reference frame
> av_frame_free(&deshake->ref);
>
> + if (ret < 0)
> + return ret;
> +
> // Store the current frame as the reference frame for calculating the
> // motion of the next frame
> deshake->ref = in;
Looks good to me otherwise, thank you.
BTW we should add some tests to FATE for deshake, with and without
OpenCL support.
--
FFmpeg = Fostering and Funny Mind-dumbing Purposeless Extreme Goblin
More information about the ffmpeg-devel
mailing list