[FFmpeg-cvslog] deshake opencl based on comments on 20130402 3rd

highgod0401 git at videolan.org
Tue Apr 2 16:10:04 CEST 2013


ffmpeg | branch: master | highgod0401 <highgod0401 at gmail.com> | Tue Apr  2 20:54:22 2013 +0800| [90793591417f29302e97dbd9823481423eabdacc] | committer: Michael Niedermayer

deshake opencl based on comments on 20130402 3rd

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=90793591417f29302e97dbd9823481423eabdacc
---

 doc/filters.texi                |    6 +-
 libavfilter/Makefile            |    2 +
 libavfilter/allfilters.c        |    2 +
 libavfilter/deshake.h           |  104 +++++++++++++++++++
 libavfilter/deshake_kernel.h    |  219 +++++++++++++++++++++++++++++++++++++++
 libavfilter/deshake_opencl.c    |  181 ++++++++++++++++++++++++++++++++
 libavfilter/deshake_opencl.h    |   38 +++++++
 libavfilter/opencl_allkernels.c |   39 +++++++
 libavfilter/opencl_allkernels.h |   29 ++++++
 libavfilter/vf_deshake.c        |  117 +++++++++++----------
 10 files changed, 679 insertions(+), 58 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index 2c82ac3..401125b 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -2504,7 +2504,7 @@ tripod, moving on a vehicle, etc.
 The filter accepts parameters as a list of @var{key}=@var{value}
 pairs, separated by ":". If the key of the first options is omitted,
 the arguments are interpreted according to the syntax
- at var{x}:@var{y}:@var{w}:@var{h}:@var{rx}:@var{ry}:@var{edge}:@var{blocksize}:@var{contrast}:@var{search}:@var{filename}.
+ at var{x}:@var{y}:@var{w}:@var{h}:@var{rx}:@var{ry}:@var{edge}:@var{blocksize}:@var{contrast}:@var{search}:@var{filename}:@var{opencl}.
 
 A description of the accepted parameters follows.
 
@@ -2570,6 +2570,10 @@ Default value is @samp{exhaustive}.
 If set then a detailed log of the motion search is written to the
 specified file.
 
+ at item opencl
+If set to 1, specify using OpenCL capabilities, only available if
+FFmpeg was configured with @code{--enable-opencl}. Default value is 0.
+
 @end table
 
 @section drawbox
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 690b1cb..e865aef 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -40,6 +40,7 @@ OBJS = allfilters.o                                                     \
        formats.o                                                        \
        graphdump.o                                                      \
        graphparser.o                                                    \
+       opencl_allkernels.o                                              \
        transform.o                                                      \
        video.o                                                          \
 
@@ -139,6 +140,7 @@ OBJS-$(CONFIG_NOFORMAT_FILTER)               += vf_format.o
 OBJS-$(CONFIG_NOISE_FILTER)                  += vf_noise.o
 OBJS-$(CONFIG_NULL_FILTER)                   += vf_null.o
 OBJS-$(CONFIG_OCV_FILTER)                    += vf_libopencv.o
+OBJS-$(CONFIG_OPENCL)                        += deshake_opencl.o
 OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o
 OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o
 OBJS-$(CONFIG_PERMS_FILTER)                  += f_perms.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 45a67e5..4ca180a 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -21,6 +21,7 @@
 
 #include "avfilter.h"
 #include "config.h"
+#include "opencl_allkernels.h"
 
 
 #define REGISTER_FILTER(X, x, y)                                        \
@@ -199,4 +200,5 @@ void avfilter_register_all(void)
     REGISTER_FILTER_UNCONDITIONAL(vsink_buffer);
     REGISTER_FILTER_UNCONDITIONAL(af_afifo);
     REGISTER_FILTER_UNCONDITIONAL(vf_fifo);
+    ff_opencl_register_filter_kernel_code_all();
 }
diff --git a/libavfilter/deshake.h b/libavfilter/deshake.h
new file mode 100644
index 0000000..c24090e
--- /dev/null
+++ b/libavfilter/deshake.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_DESHAKE_H
+#define AVFILTER_DESHAKE_H
+
+#include "config.h"
+#include "avfilter.h"
+#include "libavcodec/dsputil.h"
+#include "transform.h"
+#if CONFIG_OPENCL
+#include "libavutil/opencl.h"
+#endif
+
+
+enum SearchMethod {
+    EXHAUSTIVE,        ///< Search all possible positions
+    SMART_EXHAUSTIVE,  ///< Search most possible positions (faster)
+    SEARCH_COUNT
+};
+
+typedef struct {
+    int x;             ///< Horizontal shift
+    int y;             ///< Vertical shift
+} IntMotionVector;
+
+typedef struct {
+    double x;             ///< Horizontal shift
+    double y;             ///< Vertical shift
+} MotionVector;
+
+typedef struct {
+    MotionVector vector;  ///< Motion vector
+    double angle;         ///< Angle of rotation
+    double zoom;          ///< Zoom percentage
+} Transform;
+
+#if CONFIG_OPENCL
+
+typedef struct {
+    size_t matrix_size;
+    float matrix_y[9];
+    float matrix_uv[9];
+    cl_mem cl_matrix_y;
+    cl_mem cl_matrix_uv;
+    int in_plane_size[8];
+    int out_plane_size[8];
+    int plane_num;
+    cl_mem cl_inbuf;
+    size_t cl_inbuf_size;
+    cl_mem cl_outbuf;
+    size_t cl_outbuf_size;
+    AVOpenCLKernelEnv kernel_env;
+} DeshakeOpenclContext;
+
+#endif
+
+typedef struct {
+    const AVClass *class;
+    AVFrame *ref;              ///< Previous frame
+    int rx;                    ///< Maximum horizontal shift
+    int ry;                    ///< Maximum vertical shift
+    int edge;                  ///< Edge fill method
+    int blocksize;             ///< Size of blocks to compare
+    int contrast;              ///< Contrast threshold
+    int search;                ///< Motion search method
+    AVCodecContext *avctx;
+    DSPContext c;              ///< Context providing optimized SAD methods
+    Transform last;            ///< Transform from last frame
+    int refcount;              ///< Number of reference frames (defines averaging window)
+    FILE *fp;
+    Transform avg;
+    int cw;                    ///< Crop motion search to this box
+    int ch;
+    int cx;
+    int cy;
+    char *filename;            ///< Motion search detailed log filename
+    int opencl;
+#if CONFIG_OPENCL
+    DeshakeOpenclContext opencl_ctx;
+#endif
+    int (* transform)(AVFilterContext *ctx, int width, int height, int cw, int ch,
+                      const float *matrix_y, const float *matrix_uv, enum InterpolateMethod interpolate,
+                      enum FillMethod fill, AVFrame *in, AVFrame *out);
+} DeshakeContext;
+
+#endif /* AVFILTER_DESHAKE_H */
diff --git a/libavfilter/deshake_kernel.h b/libavfilter/deshake_kernel.h
new file mode 100644
index 0000000..335a77e
--- /dev/null
+++ b/libavfilter/deshake_kernel.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
+ *
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_DESHAKE_KERNEL_H
+#define AVFILTER_DESHAKE_KERNEL_H
+
+#include "libavutil/opencl.h"
+
+const char *ff_kernel_deshake_opencl = AV_OPENCL_KERNEL(
+
+inline unsigned char pixel(global const unsigned char *src, float x, float y,
+                           int w, int h,int stride, unsigned char def)
+{
+    return (x < 0 || y < 0 || x >= w || y >= h) ? def : src[(int)x + (int)y * stride];
+}
+unsigned char interpolate_nearest(float x, float y, global const unsigned char *src,
+                                  int width, int height, int stride, unsigned char def)
+{
+    return pixel(src, (int)(x + 0.5), (int)(y + 0.5), width, height, stride, def);
+}
+
+unsigned char interpolate_bilinear(float x, float y, global const unsigned char *src,
+                                   int width, int height, int stride, unsigned char def)
+{
+    int x_c, x_f, y_c, y_f;
+    int v1, v2, v3, v4;
+
+    if (x < -1 || x > width || y < -1 || y > height) {
+        return def;
+    } else {
+        x_f = (int)x;
+        x_c = x_f + 1;
+
+        y_f = (int)y;
+        y_c = y_f + 1;
+
+        v1 = pixel(src, x_c, y_c, width, height, stride, def);
+        v2 = pixel(src, x_c, y_f, width, height, stride, def);
+        v3 = pixel(src, x_f, y_c, width, height, stride, def);
+        v4 = pixel(src, x_f, y_f, width, height, stride, def);
+
+        return (v1*(x - x_f)*(y - y_f) + v2*((x - x_f)*(y_c - y)) +
+                v3*(x_c - x)*(y - y_f) + v4*((x_c - x)*(y_c - y)));
+    }
+}
+
+unsigned char interpolate_biquadratic(float x, float y, global const unsigned char *src,
+                                      int width, int height, int stride, unsigned char def)
+{
+    int     x_c, x_f, y_c, y_f;
+    unsigned char v1,  v2,  v3,  v4;
+    float   f1,  f2,  f3,  f4;
+
+    if (x < - 1 || x > width || y < -1 || y > height)
+        return def;
+    else {
+        x_f = (int)x;
+        x_c = x_f + 1;
+        y_f = (int)y;
+        y_c = y_f + 1;
+
+        v1 = pixel(src, x_c, y_c, width, height, stride, def);
+        v2 = pixel(src, x_c, y_f, width, height, stride, def);
+        v3 = pixel(src, x_f, y_c, width, height, stride, def);
+        v4 = pixel(src, x_f, y_f, width, height, stride, def);
+
+        f1 = 1 - sqrt((x_c - x) * (y_c - y));
+        f2 = 1 - sqrt((x_c - x) * (y - y_f));
+        f3 = 1 - sqrt((x - x_f) * (y_c - y));
+        f4 = 1 - sqrt((x - x_f) * (y - y_f));
+        return (v1 * f1 + v2 * f2 + v3 * f3 + v4 * f4) / (f1 + f2 + f3 + f4);
+    }
+}
+
+inline const float clipf(float a, float amin, float amax)
+{
+    if      (a < amin) return amin;
+    else if (a > amax) return amax;
+    else               return a;
+}
+
+inline int mirror(int v, int m)
+{
+    while ((unsigned)v > (unsigned)m) {
+        v = -v;
+        if (v < 0)
+            v += 2 * m;
+    }
+    return v;
+}
+
+kernel void avfilter_transform(global  unsigned char *src,
+                               global  unsigned char *dst,
+                               global          float *matrix,
+                               global          float *matrix2,
+                                                 int interpolate,
+                                                 int fillmethod,
+                                                 int src_stride_lu,
+                                                 int dst_stride_lu,
+                                                 int src_stride_ch,
+                                                 int dst_stride_ch,
+                                                 int height,
+                                                 int width,
+                                                 int ch,
+                                                 int cw)
+{
+     int global_id = get_global_id(0);
+
+     global unsigned char *dst_y = dst;
+     global unsigned char *dst_u = dst_y + height * dst_stride_lu;
+     global unsigned char *dst_v = dst_u + ch * dst_stride_ch;
+
+     global unsigned char *src_y = src;
+     global unsigned char *src_u = src_y + height * src_stride_lu;
+     global unsigned char *src_v = src_u + ch * src_stride_ch;
+
+     global unsigned char *tempdst;
+     global unsigned char *tempsrc;
+
+     int x;
+     int y;
+     float x_s;
+     float y_s;
+     int tempsrc_stride;
+     int tempdst_stride;
+     int temp_height;
+     int temp_width;
+     int curpos;
+     unsigned char def = 0;
+     if (global_id < width*height) {
+        y = global_id/width;
+        x = global_id%width;
+        x_s = x * matrix[0] + y * matrix[1] + matrix[2];
+        y_s = x * matrix[3] + y * matrix[4] + matrix[5];
+        tempdst = dst_y;
+        tempsrc = src_y;
+        tempsrc_stride = src_stride_lu;
+        tempdst_stride = dst_stride_lu;
+        temp_height = height;
+        temp_width = width;
+     } else if ((global_id >= width*height)&&(global_id < width*height + ch*cw)) {
+        y = (global_id - width*height)/cw;
+        x = (global_id - width*height)%cw;
+        x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2];
+        y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5];
+        tempdst = dst_u;
+        tempsrc = src_u;
+        tempsrc_stride = src_stride_ch;
+        tempdst_stride = dst_stride_ch;
+        temp_height = height;
+        temp_width = width;
+        temp_height = ch;
+        temp_width = cw;
+     } else {
+        y = (global_id - width*height - ch*cw)/cw;
+        x = (global_id - width*height - ch*cw)%cw;
+        x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2];
+        y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5];
+        tempdst = dst_v;
+        tempsrc = src_v;
+        tempsrc_stride = src_stride_ch;
+        tempdst_stride = dst_stride_ch;
+        temp_height = ch;
+        temp_width = cw;
+     }
+     curpos = y * tempdst_stride + x;
+     switch (fillmethod) {
+        case 0: //FILL_BLANK
+            def = 0;
+            break;
+        case 1: //FILL_ORIGINAL
+            def = tempsrc[y*tempsrc_stride+x];
+            break;
+        case 2: //FILL_CLAMP
+            y_s = clipf(y_s, 0, temp_height - 1);
+            x_s = clipf(x_s, 0, temp_width - 1);
+            def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s];
+            break;
+        case 3: //FILL_MIRROR
+            y_s = mirror(y_s,temp_height - 1);
+            x_s = mirror(x_s,temp_width - 1);
+            def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s];
+            break;
+    }
+    switch (interpolate) {
+        case 0: //INTERPOLATE_NEAREST
+            tempdst[curpos] = interpolate_nearest(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
+            break;
+        case 1: //INTERPOLATE_BILINEAR
+            tempdst[curpos] = interpolate_bilinear(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
+            break;
+        case 2: //INTERPOLATE_BIQUADRATIC
+            tempdst[curpos] = interpolate_biquadratic(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
+            break;
+        default:
+            return;
+    }
+}
+);
+
+#endif /* AVFILTER_DESHAKE_KERNEL_H */
diff --git a/libavfilter/deshake_opencl.c b/libavfilter/deshake_opencl.c
new file mode 100644
index 0000000..63d144a
--- /dev/null
+++ b/libavfilter/deshake_opencl.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * transform input video
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/dict.h"
+#include "libavutil/pixdesc.h"
+#include "deshake_opencl.h"
+
+#define MATRIX_SIZE 6
+#define PLANE_NUM 3
+
+#define TRANSFORM_OPENCL_CHECK(method, ...)                                                                  \
+    status = method(__VA_ARGS__);                                                                            \
+    if (status != CL_SUCCESS) {                                                                              \
+        av_log(ctx, AV_LOG_ERROR, "error %s %d\n", # method, status);                                        \
+        return AVERROR_EXTERNAL;                                                                             \
+    }
+
+#define TRANSFORM_OPENCL_SET_KERNEL_ARG(arg_ptr)                                                             \
+    status = clSetKernelArg((kernel),(arg_no++),(sizeof(arg_ptr)),(void*)(&(arg_ptr)));                      \
+    if (status != CL_SUCCESS) {                                                                              \
+        av_log(ctx, AV_LOG_ERROR, "cannot set kernel argument: %d\n", status );                              \
+        return AVERROR_EXTERNAL;                                                                             \
+    }
+
+int ff_opencl_transform(AVFilterContext *ctx,
+                        int width, int height, int cw, int ch,
+                        const float *matrix_y, const float *matrix_uv,
+                        enum InterpolateMethod interpolate,
+                        enum FillMethod fill, AVFrame *in, AVFrame *out)
+{
+    int arg_no, ret = 0;
+    const size_t global_work_size = width * height + 2 * ch * cw;
+    cl_kernel kernel;
+    cl_int status;
+    DeshakeContext *deshake = ctx->priv;
+    ret = av_opencl_buffer_write(deshake->opencl_ctx.cl_matrix_y, (uint8_t *)matrix_y, deshake->opencl_ctx.matrix_size * sizeof(cl_float));
+    if (ret < 0)
+        return ret;
+    ret = av_opencl_buffer_write(deshake->opencl_ctx.cl_matrix_uv, (uint8_t *)matrix_uv, deshake->opencl_ctx.matrix_size * sizeof(cl_float));
+    if (ret < 0)
+        return ret;
+    kernel = deshake->opencl_ctx.kernel_env.kernel;
+    arg_no = 0;
+
+    if ((unsigned int)interpolate > INTERPOLATE_BIQUADRATIC) {
+        av_log(ctx, AV_LOG_ERROR, "Selected interpolate method is invalid\n");
+        return AVERROR(EINVAL);
+    }
+    TRANSFORM_OPENCL_SET_KERNEL_ARG(deshake->opencl_ctx.cl_inbuf);
+    TRANSFORM_OPENCL_SET_KERNEL_ARG(deshake->opencl_ctx.cl_outbuf);
+    TRANSFORM_OPENCL_SET_KERNEL_ARG(deshake->opencl_ctx.cl_matrix_y);
+    TRANSFORM_OPENCL_SET_KERNEL_ARG(deshake->opencl_ctx.cl_matrix_uv);
+    TRANSFORM_OPENCL_SET_KERNEL_ARG(interpolate);
+    TRANSFORM_OPENCL_SET_KERNEL_ARG(fill);
+    TRANSFORM_OPENCL_SET_KERNEL_ARG(in->linesize[0]);
+    TRANSFORM_OPENCL_SET_KERNEL_ARG(out->linesize[0]);
+    TRANSFORM_OPENCL_SET_KERNEL_ARG(in->linesize[1]);
+    TRANSFORM_OPENCL_SET_KERNEL_ARG(out->linesize[1]);
+    TRANSFORM_OPENCL_SET_KERNEL_ARG(height);
+    TRANSFORM_OPENCL_SET_KERNEL_ARG(width);
+    TRANSFORM_OPENCL_SET_KERNEL_ARG(ch);
+    TRANSFORM_OPENCL_SET_KERNEL_ARG(cw);
+    TRANSFORM_OPENCL_CHECK(clEnqueueNDRangeKernel, deshake->opencl_ctx.kernel_env.command_queue, deshake->opencl_ctx.kernel_env.kernel, 1, NULL,
+                           &global_work_size, NULL, 0, NULL, NULL);
+    clFinish(deshake->opencl_ctx.kernel_env.command_queue);
+    ret = av_opencl_buffer_read_image(out->data, deshake->opencl_ctx.out_plane_size,
+                                      deshake->opencl_ctx.plane_num, deshake->opencl_ctx.cl_outbuf,
+                                      deshake->opencl_ctx.cl_outbuf_size);
+    if (ret < 0)
+        return ret;
+    return ret;
+}
+
+int ff_opencl_deshake_init(AVFilterContext *ctx)
+{
+    int ret = 0;
+    DeshakeContext *deshake = ctx->priv;
+    AVDictionary *options = NULL;
+    av_dict_set(&options, "build_options", "-I.", 0);
+    ret = av_opencl_init(options, NULL);
+    av_dict_free(&options);
+    if (ret < 0)
+        return ret;
+    deshake->opencl_ctx.matrix_size = MATRIX_SIZE;
+    deshake->opencl_ctx.plane_num   = PLANE_NUM;
+    ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_matrix_y,
+        deshake->opencl_ctx.matrix_size*sizeof(cl_float), CL_MEM_READ_ONLY, NULL);
+    if (ret < 0)
+        return ret;
+    ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_matrix_uv,
+        deshake->opencl_ctx.matrix_size*sizeof(cl_float), CL_MEM_READ_ONLY, NULL);
+    if (ret < 0)
+        return ret;
+    if (!deshake->opencl_ctx.kernel_env.kernel) {
+        ret =  av_opencl_create_kernel(&deshake->opencl_ctx.kernel_env, "avfilter_transform");
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel for name 'avfilter_transform'\n");
+            return ret;
+        }
+    }
+    return ret;
+}
+
+void ff_opencl_deshake_uninit(AVFilterContext *ctx)
+{
+    DeshakeContext *deshake = ctx->priv;
+    av_opencl_buffer_release(&deshake->opencl_ctx.cl_inbuf);
+    av_opencl_buffer_release(&deshake->opencl_ctx.cl_outbuf);
+    av_opencl_buffer_release(&deshake->opencl_ctx.cl_matrix_y);
+    av_opencl_buffer_release(&deshake->opencl_ctx.cl_matrix_uv);
+    av_opencl_release_kernel(&deshake->opencl_ctx.kernel_env);
+    av_opencl_uninit();
+}
+
+
+int ff_opencl_deshake_process_inout_buf(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
+{
+    int ret = 0;
+    AVFilterLink *link = ctx->inputs[0];
+    DeshakeContext *deshake = ctx->priv;
+    int chroma_height = -((-link->h) >> av_pix_fmt_desc_get(link->format)->log2_chroma_h);
+
+    if ((!deshake->opencl_ctx.cl_inbuf) || (!deshake->opencl_ctx.cl_outbuf)) {
+        deshake->opencl_ctx.in_plane_size[0]  = (in->linesize[0] * in->height);
+        deshake->opencl_ctx.in_plane_size[1]  = (in->linesize[1] * chroma_height);
+        deshake->opencl_ctx.in_plane_size[2]  = (in->linesize[2] * chroma_height);
+        deshake->opencl_ctx.out_plane_size[0] = (out->linesize[0] * out->height);
+        deshake->opencl_ctx.out_plane_size[1] = (out->linesize[1] * chroma_height);
+        deshake->opencl_ctx.out_plane_size[2] = (out->linesize[2] * chroma_height);
+        deshake->opencl_ctx.cl_inbuf_size  = deshake->opencl_ctx.in_plane_size[0] +
+                                             deshake->opencl_ctx.in_plane_size[1] +
+                                             deshake->opencl_ctx.in_plane_size[2];
+        deshake->opencl_ctx.cl_outbuf_size = deshake->opencl_ctx.out_plane_size[0] +
+                                             deshake->opencl_ctx.out_plane_size[1] +
+                                             deshake->opencl_ctx.out_plane_size[2];
+        if (!deshake->opencl_ctx.cl_inbuf) {
+            ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_inbuf,
+                                            deshake->opencl_ctx.cl_inbuf_size,
+                                            CL_MEM_READ_ONLY, NULL);
+            if (ret < 0)
+                return ret;
+        }
+        if (!deshake->opencl_ctx.cl_outbuf) {
+            ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_outbuf,
+                                            deshake->opencl_ctx.cl_outbuf_size,
+                                            CL_MEM_READ_WRITE, NULL);
+            if (ret < 0)
+                return ret;
+        }
+    }
+    ret = av_opencl_buffer_write_image(deshake->opencl_ctx.cl_inbuf,
+                                 deshake->opencl_ctx.cl_inbuf_size,
+                                 0, in->data,deshake->opencl_ctx.in_plane_size,
+                                 deshake->opencl_ctx.plane_num);
+    if(ret < 0)
+        return ret;
+    return ret;
+}
diff --git a/libavfilter/deshake_opencl.h b/libavfilter/deshake_opencl.h
new file mode 100644
index 0000000..30d17d4
--- /dev/null
+++ b/libavfilter/deshake_opencl.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_DESHAKE_OPENCL_H
+#define AVFILTER_DESHAKE_OPENCL_H
+
+#include "deshake.h"
+
+int ff_opencl_deshake_init(AVFilterContext *ctx);
+
+void ff_opencl_deshake_uninit(AVFilterContext *ctx);
+
+int ff_opencl_deshake_process_inout_buf(AVFilterContext *ctx, AVFrame *in, AVFrame *out);
+
+int ff_opencl_transform(AVFilterContext *ctx,
+                        int width, int height, int cw, int ch,
+                        const float *matrix_y, const float *matrix_uv,
+                        enum InterpolateMethod interpolate,
+                        enum FillMethod fill, AVFrame *in, AVFrame *out);
+
+#endif /* AVFILTER_DESHAKE_OPENCL_H */
diff --git a/libavfilter/opencl_allkernels.c b/libavfilter/opencl_allkernels.c
new file mode 100644
index 0000000..021eec2
--- /dev/null
+++ b/libavfilter/opencl_allkernels.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "opencl_allkernels.h"
+#if CONFIG_OPENCL
+#include "libavutil/opencl.h"
+#include "deshake_kernel.h"
+#endif
+
+#define OPENCL_REGISTER_KERNEL_CODE(X, x)                                              \
+    {                                                                                  \
+        if (CONFIG_##X##_FILTER) {                                                     \
+            av_opencl_register_kernel_code(ff_kernel_##x##_opencl);                    \
+        }                                                                              \
+    }
+
+void ff_opencl_register_filter_kernel_code_all(void)
+{
+ #if CONFIG_OPENCL
+   OPENCL_REGISTER_KERNEL_CODE(DESHAKE,     deshake);
+ #endif
+}
diff --git a/libavfilter/opencl_allkernels.h b/libavfilter/opencl_allkernels.h
new file mode 100644
index 0000000..aca02e0
--- /dev/null
+++ b/libavfilter/opencl_allkernels.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_OPENCL_ALLKERNEL_H
+#define AVFILTER_OPENCL_ALLKERNEL_H
+
+#include "avfilter.h"
+#include "config.h"
+
+void ff_opencl_register_filter_kernel_code_all(void);
+
+#endif /* AVFILTER_OPENCL_ALLKERNEL_H */
diff --git a/libavfilter/vf_deshake.c b/libavfilter/vf_deshake.c
index 2740bba..ee6e474 100644
--- a/libavfilter/vf_deshake.c
+++ b/libavfilter/vf_deshake.c
@@ -59,55 +59,12 @@
 #include "libavutil/pixdesc.h"
 #include "libavcodec/dsputil.h"
 
-#include "transform.h"
+#include "deshake.h"
+#include "deshake_opencl.h"
 
 #define CHROMA_WIDTH(link)  -((-link->w) >> av_pix_fmt_desc_get(link->format)->log2_chroma_w)
 #define CHROMA_HEIGHT(link) -((-link->h) >> av_pix_fmt_desc_get(link->format)->log2_chroma_h)
 
-enum SearchMethod {
-    EXHAUSTIVE,        ///< Search all possible positions
-    SMART_EXHAUSTIVE,  ///< Search most possible positions (faster)
-    SEARCH_COUNT
-};
-
-typedef struct {
-    int x;             ///< Horizontal shift
-    int y;             ///< Vertical shift
-} IntMotionVector;
-
-typedef struct {
-    double x;             ///< Horizontal shift
-    double y;             ///< Vertical shift
-} MotionVector;
-
-typedef struct {
-    MotionVector vector;  ///< Motion vector
-    double angle;         ///< Angle of rotation
-    double zoom;          ///< Zoom percentage
-} Transform;
-
-typedef struct {
-    const AVClass *class;
-    AVFrame *ref;              ///< Previous frame
-    int rx;                    ///< Maximum horizontal shift
-    int ry;                    ///< Maximum vertical shift
-    int edge;                  ///< Edge fill method
-    int blocksize;             ///< Size of blocks to compare
-    int contrast;              ///< Contrast threshold
-    int search;                ///< Motion search method
-    AVCodecContext *avctx;
-    DSPContext c;              ///< Context providing optimized SAD methods
-    Transform last;            ///< Transform from last frame
-    int refcount;              ///< Number of reference frames (defines averaging window)
-    FILE *fp;
-    Transform avg;
-    int cw;                    ///< Crop motion search to this box
-    int ch;
-    int cx;
-    int cy;
-    char *filename;            ///< Motion search detailed log filename
-} DeshakeContext;
-
 #define OFFSET(x) offsetof(DeshakeContext, x)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 
@@ -129,6 +86,7 @@ static const AVOption deshake_options[] = {
         { "exhaustive", "exhaustive search",      0, AV_OPT_TYPE_CONST, {.i64=EXHAUSTIVE},       INT_MIN, INT_MAX, FLAGS, "smode" },
         { "less",       "less exhaustive search", 0, AV_OPT_TYPE_CONST, {.i64=SMART_EXHAUSTIVE}, INT_MIN, INT_MAX, FLAGS, "smode" },
     { "filename", "set motion search detailed log file name", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
+    { "opencl", "use OpenCL filtering capabilities", OFFSET(opencl), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, .flags = FLAGS },
     { NULL }
 };
 
@@ -360,8 +318,35 @@ static void find_motion(DeshakeContext *deshake, uint8_t *src1, uint8_t *src2,
     av_free(angles);
 }
 
+static int deshake_transform_c(AVFilterContext *ctx,
+                                    int width, int height, int cw, int ch,
+                                    const float *matrix_y, const float *matrix_uv,
+                                    enum InterpolateMethod interpolate,
+                                    enum FillMethod fill, AVFrame *in, AVFrame *out)
+{
+    int i = 0, ret = 0;
+    const float *matrixs[3];
+    int plane_w[3], plane_h[3];
+    matrixs[0] = matrix_y;
+    matrixs[1] =  matrixs[2] = matrix_uv;
+    plane_w[0] = width;
+    plane_w[1] = plane_w[2] = cw;
+    plane_h[0] = height;
+    plane_h[1] = plane_h[2] = ch;
+
+    for (i = 0; i < 3; i++) {
+        // Transform the luma and chroma planes
+        ret = avfilter_transform(in->data[i], out->data[i], in->linesize[i], out->linesize[i],
+                                 plane_w[i], plane_h[i], matrixs[i], interpolate, fill);
+        if (ret < 0)
+            return ret;
+    }
+    return ret;
+}
+
 static av_cold int init(AVFilterContext *ctx, const char *args)
 {
+    int ret;
     DeshakeContext *deshake = ctx->priv;
 
     deshake->refcount = 20; // XXX: add to options?
@@ -379,7 +364,18 @@ static av_cold int init(AVFilterContext *ctx, const char *args)
         deshake->cw += deshake->cx - (deshake->cx & ~15);
         deshake->cx &= ~15;
     }
+    deshake->transform = deshake_transform_c;
+    if (!CONFIG_OPENCL && deshake->opencl) {
+        av_log(ctx, AV_LOG_ERROR, "OpenCL support was not enabled in this build, cannot be selected\n");
+        return AVERROR(EINVAL);
+    }
 
+    if (deshake->opencl && CONFIG_OPENCL) {
+        deshake->transform = ff_opencl_transform;
+        ret = ff_opencl_deshake_init(ctx);
+        if (ret < 0)
+            return ret;
+    }
     av_log(ctx, AV_LOG_VERBOSE, "cx: %d, cy: %d, cw: %d, ch: %d, rx: %d, ry: %d, edge: %d blocksize: %d contrast: %d search: %d\n",
            deshake->cx, deshake->cy, deshake->cw, deshake->ch,
            deshake->rx, deshake->ry, deshake->edge, deshake->blocksize * 2, deshake->contrast, deshake->search);
@@ -419,7 +415,9 @@ static int config_props(AVFilterLink *link)
 static av_cold void uninit(AVFilterContext *ctx)
 {
     DeshakeContext *deshake = ctx->priv;
-
+    if (deshake->opencl && CONFIG_OPENCL) {
+        ff_opencl_deshake_uninit(ctx);
+    }
     av_frame_free(&deshake->ref);
     if (deshake->fp)
         fclose(deshake->fp);
@@ -434,9 +432,10 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
     AVFilterLink *outlink = link->dst->outputs[0];
     AVFrame *out;
     Transform t = {{0},0}, orig = {{0},0};
-    float matrix[9];
+    float matrix_y[9], matrix_uv[9];
     float alpha = 2.0 / deshake->refcount;
     char tmp[256];
+    int ret = 0;
 
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     if (!out) {
@@ -445,6 +444,12 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
     }
     av_frame_copy_props(out, in);
 
+    if (deshake->opencl && CONFIG_OPENCL) {
+        ret = ff_opencl_deshake_process_inout_buf(link->dst,in, out);
+        if (ret < 0)
+            return ret;
+    }
+
     if (deshake->cx < 0 || deshake->cy < 0 || deshake->cw < 0 || deshake->ch < 0) {
         // Find the most likely global motion for the current frame
         find_motion(deshake, (deshake->ref == NULL) ? in->data[0] : deshake->ref->data[0], in->data[0], link->w, link->h, in->linesize[0], &t);
@@ -517,21 +522,19 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
     deshake->last.zoom = t.zoom;
 
     // Generate a luma transformation matrix
-    avfilter_get_matrix(t.vector.x, t.vector.y, t.angle, 1.0 + t.zoom / 100.0, matrix);
-
-    // Transform the luma plane
-    avfilter_transform(in->data[0], out->data[0], in->linesize[0], out->linesize[0], link->w, link->h, matrix, INTERPOLATE_BILINEAR, deshake->edge);
-
+    avfilter_get_matrix(t.vector.x, t.vector.y, t.angle, 1.0 + t.zoom / 100.0, matrix_y);
     // Generate a chroma transformation matrix
-    avfilter_get_matrix(t.vector.x / (link->w / CHROMA_WIDTH(link)), t.vector.y / (link->h / CHROMA_HEIGHT(link)), t.angle, 1.0 + t.zoom / 100.0, matrix);
-
-    // Transform the chroma planes
-    avfilter_transform(in->data[1], out->data[1], in->linesize[1], out->linesize[1], CHROMA_WIDTH(link), CHROMA_HEIGHT(link), matrix, INTERPOLATE_BILINEAR, deshake->edge);
-    avfilter_transform(in->data[2], out->data[2], in->linesize[2], out->linesize[2], CHROMA_WIDTH(link), CHROMA_HEIGHT(link), matrix, INTERPOLATE_BILINEAR, deshake->edge);
+    avfilter_get_matrix(t.vector.x / (link->w / CHROMA_WIDTH(link)), t.vector.y / (link->h / CHROMA_HEIGHT(link)), t.angle, 1.0 + t.zoom / 100.0, matrix_uv);
+    // Transform the luma and chroma planes
+    ret = deshake->transform(link->dst, link->w, link->h, CHROMA_WIDTH(link), CHROMA_HEIGHT(link),
+                             matrix_y, matrix_uv, INTERPOLATE_BILINEAR, deshake->edge, in, out);
 
     // Cleanup the old reference frame
     av_frame_free(&deshake->ref);
 
+    if (ret < 0)
+        return ret;
+
     // Store the current frame as the reference frame for calculating the
     // motion of the next frame
     deshake->ref = in;



More information about the ffmpeg-cvslog mailing list