[FFmpeg-devel] [PATCH] libavutil/libavfilter:add opencl warpper and opencl deshake filter

Stefano Sabatini stefasab at gmail.com
Sun Mar 17 17:59:48 CET 2013


On date Sunday 2013-03-10 22:20:33 +0800, Wei Gao encoded:
> Hi,
> 
> Stefano, the attachment is the patch modified according to your comments.
[...]

> From dd254c133be45ebe07cfd3ae0ebf14d9c6fa151f Mon Sep 17 00:00:00 2001
> From: highgod0401 <highgod0401 at gmail.com>
> Date: Sun, 10 Mar 2013 22:16:12 +0800
> Subject: [PATCH] add opencl warpper and opencl deshake filter

wrapper

> 
> ---
>  configure                      |   5 +
>  libavfilter/Makefile           |   2 +
>  libavfilter/allfilters.c       |  21 +
>  libavfilter/deshake_kernel.h   | 201 ++++++++++
>  libavfilter/transform_opencl.c | 155 +++++++
>  libavfilter/transform_opencl.h |  40 ++
>  libavfilter/vf_deshake.c       | 222 ++++++++++
>  libavutil/Makefile             |   4 +
>  libavutil/opencl.c             | 892 +++++++++++++++++++++++++++++++++++++++++
>  libavutil/opencl.h             | 246 ++++++++++++

Please split the patch in two distinct lavu and lavfi patches.

>  10 files changed, 1788 insertions(+)
>  create mode 100644 libavfilter/deshake_kernel.h
>  create mode 100644 libavfilter/transform_opencl.c
>  create mode 100644 libavfilter/transform_opencl.h
>  create mode 100644 libavutil/opencl.c
>  create mode 100644 libavutil/opencl.h
> 
> diff --git a/configure b/configure
> index b61359c..e3a7f3a 100755
> --- a/configure
> +++ b/configure
> @@ -140,6 +140,7 @@ Component options:
>    --disable-rdft           disable RDFT code
>    --disable-fft            disable FFT code
>    --enable-dxva2           enable DXVA2 code
> +  --enable-opencl          enable OpenCL code
>    --enable-vaapi           enable VAAPI code [autodetect]
>    --enable-vda             enable VDA code   [autodetect]
>    --enable-vdpau           enable VDPAU code [autodetect]
> @@ -1196,6 +1197,7 @@ CONFIG_LIST="
>      network
>      nonfree
>      openal
> +    opencl
>      openssl
>      pic
>      rdft
> @@ -1990,6 +1992,7 @@ cropdetect_filter_deps="gpl"
>  decimate_filter_deps="gpl avcodec"
>  delogo_filter_deps="gpl"
>  deshake_filter_deps="avcodec"
> +deshake_opencl_filter_deps="opencl deshake_filter"
>  drawtext_filter_deps="libfreetype"
>  ebur128_filter_deps="gpl"
>  flite_filter_deps="libflite"
> @@ -3885,6 +3888,7 @@ enabled openal     && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32
>                          die "ERROR: openal not found"; } &&
>                        { check_cpp_condition "AL/al.h" "defined(AL_VERSION_1_1)" ||
>                          die "ERROR: openal must be installed and version must be 1.1 or compatible"; }
> +enabled opencl    && require2 opencl CL/cl.h clEnqueueNDRangeKernel -lOpenCL
>  enabled openssl    && { check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto ||
>                          check_lib openssl/ssl.h SSL_library_init -lssl32 -leay32 ||
>                          check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto -lws2_32 -lgdi32 ||
> @@ -4295,6 +4299,7 @@ echo "libx264 enabled           ${libx264-no}"
>  echo "libxavs enabled           ${libxavs-no}"
>  echo "libxvid enabled           ${libxvid-no}"
>  echo "openal enabled            ${openal-no}"
> +echo "opencl enabled            ${opencl-no}"
>  echo "openssl enabled           ${openssl-no}"
>  echo "zlib enabled              ${zlib-no}"
>  echo "bzlib enabled             ${bzlib-no}"
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 938b183..69b8816 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -9,6 +9,7 @@ FFLIBS-$(CONFIG_ASYNCTS_FILTER)              += avresample
>  FFLIBS-$(CONFIG_ATEMPO_FILTER)               += avcodec
>  FFLIBS-$(CONFIG_DECIMATE_FILTER)             += avcodec
>  FFLIBS-$(CONFIG_DESHAKE_FILTER)              += avcodec
> +FFLIBS-$(CONFIG_DESHAKE_OPENCL_FILTER)       += avcodec
>  FFLIBS-$(CONFIG_MOVIE_FILTER)                += avformat avcodec
>  FFLIBS-$(CONFIG_MP_FILTER)                   += avcodec
>  FFLIBS-$(CONFIG_PAN_FILTER)                  += swresample
> @@ -108,6 +109,7 @@ OBJS-$(CONFIG_CROPDETECT_FILTER)             += vf_cropdetect.o
>  OBJS-$(CONFIG_DECIMATE_FILTER)               += vf_decimate.o
>  OBJS-$(CONFIG_DELOGO_FILTER)                 += vf_delogo.o
>  OBJS-$(CONFIG_DESHAKE_FILTER)                += vf_deshake.o
> +OBJS-$(CONFIG_DESHAKE_OPENCL_FILTER)         += vf_deshake.o transform_opencl.o
>  OBJS-$(CONFIG_DRAWBOX_FILTER)                += vf_drawbox.o
>  OBJS-$(CONFIG_DRAWTEXT_FILTER)               += vf_drawtext.o
>  OBJS-$(CONFIG_EDGEDETECT_FILTER)             += vf_edgedetect.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 47158f9..ea88f62 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -22,6 +22,10 @@
>  #include "avfilter.h"
>  #include "config.h"
>  
> +#if CONFIG_OPENCL
> +#include "libavutil/opencl.h"
> +#include "deshake_kernel.h"
> +#endif
>  
>  #define REGISTER_FILTER(X, x, y)                                        \
>      {                                                                   \
> @@ -35,7 +39,21 @@
>          extern AVFilter avfilter_##x;                                   \
>          avfilter_register(&avfilter_##x);                               \
>      }
> +#if CONFIG_OPENCL
> +#define OPENCL_REGISTER_FILTER(X, x, y)                                        \
> +    {                                                                          \
> +        extern AVFilter avfilter_##y##_##x;                                    \
> +        if (CONFIG_##X##_FILTER) {                                             \
> +            avfilter_register(&avfilter_##y##_##x);                            \
> +            av_opencl_regist_kernel((avfilter_##y##_##x).name,ff_kernel_##x);  \
> +        }                                                                      \
> +    }
>  
> +static void opencl_filters_register_all(void)
> +{
> +    OPENCL_REGISTER_FILTER(DESHAKE_OPENCL,     deshake_opencl,       vf);
> +}
> +#endif
>  void avfilter_register_all(void)
>  {
>      static int initialized;
> @@ -192,4 +210,7 @@ void avfilter_register_all(void)
>      REGISTER_FILTER_UNCONDITIONAL(vsink_buffer);
>      REGISTER_FILTER_UNCONDITIONAL(af_afifo);
>      REGISTER_FILTER_UNCONDITIONAL(vf_fifo);
> +#if CONFIG_OPENCL
> +    opencl_filters_register_all();
> +#endif
>  }
> diff --git a/libavfilter/deshake_kernel.h b/libavfilter/deshake_kernel.h
> new file mode 100644
> index 0000000..2b2faab
> --- /dev/null
> +++ b/libavfilter/deshake_kernel.h
> @@ -0,0 +1,201 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/opencl.h"
> +
> +const char *ff_kernel_deshake_opencl = FF_OPENCL_KERNEL(
> +
> +inline unsigned char pixel(global const unsigned char *src,float x, float y,int w, int h,int stride, unsigned char def)

nit: *src, float x, ...

In general add a space after a ",", just like in English prose.
This applies to the remainder of the patch as well.

> +{
> +    return (x < 0 || y < 0 || x >= w || y >= h) ? def : src[(int)x + (int)y * stride];
> +}
> +unsigned char interpolate_nearest(float x, float y, global const unsigned char *src,
> +                        int width, int height, int stride, unsigned char def)
> +{
> +    return pixel(src, (int)(x + 0.5), (int)(y + 0.5), width, height, stride, def);
> +}
> +
> +unsigned char interpolate_bilinear(float x, float y, global const unsigned char *src,
> +                        int width, int height, int stride, unsigned char def)
> +{
> +    int x_c, x_f, y_c, y_f;
> +    int v1, v2, v3, v4;
> +
> +    if (x < -1 || x > width || y < -1 || y > height) {
> +        return def;
> +    } else {
> +        x_f = (int)x;
> +        x_c = x_f + 1;
> +
> +        y_f = (int)y;
> +        y_c = y_f + 1;
> +
> +        v1 = pixel(src, x_c, y_c, width, height, stride, def);
> +        v2 = pixel(src, x_c, y_f, width, height, stride, def);
> +        v3 = pixel(src, x_f, y_c, width, height, stride, def);
> +        v4 = pixel(src, x_f, y_f, width, height, stride, def);
> +
> +        return (v1*(x - x_f)*(y - y_f) + v2*((x - x_f)*(y_c - y)) +
> +                v3*(x_c - x)*(y - y_f) + v4*((x_c - x)*(y_c - y)));
> +    }
> +}
> +
> +unsigned char interpolate_biquadratic(float x, float y, global const unsigned char *src,
> +                        int width, int height, int stride, unsigned char def)
> +{
> +    int     x_c, x_f, y_c, y_f;
> +    unsigned char v1,  v2,  v3,  v4;
> +    float   f1,  f2,  f3,  f4;
> +
> +    if (x < - 1 || x > width || y < -1 || y > height)
> +        return def;
> +    else {
> +        x_f = (int)x;
> +        x_c = x_f + 1;
> +        y_f = (int)y;
> +        y_c = y_f + 1;
> +
> +        v1 = pixel(src, x_c, y_c, width, height, stride, def);
> +        v2 = pixel(src, x_c, y_f, width, height, stride, def);
> +        v3 = pixel(src, x_f, y_c, width, height, stride, def);
> +        v4 = pixel(src, x_f, y_f, width, height, stride, def);
> +
> +        f1 = 1 - sqrt((x_c - x) * (y_c - y));
> +        f2 = 1 - sqrt((x_c - x) * (y - y_f));
> +        f3 = 1 - sqrt((x - x_f) * (y_c - y));
> +        f4 = 1 - sqrt((x - x_f) * (y - y_f));
> +        return (v1 * f1 + v2 * f2 + v3 * f3 + v4 * f4) / (f1 + f2 + f3 + f4);
> +    }
> +}
> +
> +inline const float clipf(float a, float amin, float amax)
> +{
> +    if      (a < amin) return amin;
> +    else if (a > amax) return amax;
> +    else               return a;
> +}
> +
> +kernel void avfilter_transform(global  unsigned char *src,
> +                               global  unsigned char *dst,
> +                               global          float *matrix,
> +                               global          float *matrix2,
> +                                                 int interpolate,
> +                                                 int fillmethod,
> +                                                 int src_stride_lu,
> +                                                 int dst_stride_lu,
> +                                                 int src_stride_ch,
> +                                                 int dst_stride_ch,
> +                                                 int height,
> +                                                 int width,
> +                                                 int ch,
> +                                                 int cw)
> +{
> +     int global_id = get_global_id(0);
> +
> +     global unsigned char *dst_y = dst;
> +     global unsigned char *dst_u = dst_y + height * dst_stride_lu;
> +     global unsigned char *dst_v = dst_u + ch * dst_stride_ch;
> +
> +     global unsigned char *src_y = src;
> +     global unsigned char *src_u = src_y + height * src_stride_lu;
> +     global unsigned char *src_v = src_u + ch * src_stride_ch;
> +
> +     global unsigned char *tempdst;
> +     global unsigned char *tempsrc;

sorry for the ignorance, what is "global" used for?

> +
> +     int x;
> +     int y;
> +     float x_s;
> +     float y_s;
> +     int tempsrc_stride;
> +     int tempdst_stride;
> +     int temp_height;
> +     int temp_width;
> +     int curpos;
> +     unsigned char def;
> +     if (global_id < width*height) {
> +        y = global_id/width;
> +        x = global_id%width;
> +        x_s = x * matrix[0] + y * matrix[1] + matrix[2];
> +        y_s = x * matrix[3] + y * matrix[4] + matrix[5];
> +        tempdst = dst_y;
> +        tempsrc = src_y;
> +        tempsrc_stride = src_stride_lu;
> +        tempdst_stride = dst_stride_lu;
> +        temp_height = height;
> +        temp_width = width;
> +     }
> +     else if ((global_id >= width*height)&&(global_id < width*height + ch*cw)) {
> +        y = (global_id - width*height)/cw;
> +        x = (global_id - width*height)%cw;
> +        x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2];
> +        y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5];
> +        tempdst = dst_u;
> +        tempsrc = src_u;
> +        tempsrc_stride = src_stride_ch;
> +        tempdst_stride = dst_stride_ch;
> +        temp_height = height;
> +        temp_width = width;
> +        temp_height = ch;
> +        temp_width = cw;
> +     }
> +     else {
> +        y = (global_id - width*height - ch*cw)/cw;
> +        x = (global_id - width*height - ch*cw)%cw;
> +        x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2];
> +        y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5];
> +        tempdst = dst_v;
> +        tempsrc = src_v;
> +        tempsrc_stride = src_stride_ch;
> +        tempdst_stride = dst_stride_ch;
> +        temp_height = ch;
> +        temp_width = cw;
> +     }
> +     curpos = y * tempdst_stride + x;
> +     switch (fillmethod) {
> +        case 1:
> +            def = tempsrc[y*tempsrc_stride+x];
> +            break;
> +        case 2:
> +            y_s = clipf(y_s, 0, temp_height - 1);
> +            x_s = clipf(x_s, 0, temp_width - 1);
> +            def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s];
> +            break;
> +        case 3:
> +            y_s = (y_s < 0) ? -y_s : (y_s >= temp_height) ? (temp_height + temp_height - y_s) : y_s;
> +            x_s = (x_s < 0) ? -x_s : (x_s >= temp_width) ? (temp_width + temp_width - x_s) : x_s;
> +            def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s];
> +            break;
> +         }
> +    switch (interpolate) {
> +        case 0:
> +            tempdst[curpos] = interpolate_nearest(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
> +            break;
> +        case 1:
> +            tempdst[curpos] = interpolate_bilinear(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
> +            break;
> +        case 2:
> +            tempdst[curpos] = interpolate_biquadratic(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
> +            break;
> +        }
> +}
> +
> +);
> diff --git a/libavfilter/transform_opencl.c b/libavfilter/transform_opencl.c
> new file mode 100644
> index 0000000..9fd75b9
> --- /dev/null
> +++ b/libavfilter/transform_opencl.c
> @@ -0,0 +1,155 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * transform input video
> + */
> +
> +#include "libavutil/common.h"
> +#include "libavutil/avassert.h"
> +#include "libavutil/avstring.h"
> +#include "libavutil/opencl.h"
> +#include "transform_opencl.h"
> +
> +
> +
> +static int ff_filter_transform_func(void **userdata, AVOpenCLKernelEnv *kenv)
> +{
> +    cl_mem src = (cl_mem)userdata[0];
> +    cl_mem dst = (cl_mem)userdata[1];
> +    int src_stride_lu = (int)userdata[2];
> +    int dst_stride_lu = (int)userdata[3];
> +    int src_stride_ch = (int)userdata[4];
> +    int dst_stride_ch = (int)userdata[5];
> +    int width      = (int)userdata[6];
> +    int height     = (int)userdata[7];
> +    int cw          = (int)userdata[8];
> +    int ch     = (int)userdata[9];
> +    float *matrix  = (float *)userdata[10];
> +    float *matrix2  = (float *)userdata[11];
> +    int interpolate = (int)userdata[12];
> +    int fillmethod  = (int)userdata[13];
> +    cl_mem matrix_buf = (cl_mem)userdata[14];
> +    cl_mem matrix_buf2  = (cl_mem)userdata[15];
> +    AVOpenCLKernelEnv *env  = (AVOpenCLKernelEnv *)userdata[16];

nit: weird vertical align

> +    cl_uint status;
> +    void *mapped;
> +    const size_t global_work_size = width * height + 2 * ch * cw;

> +    int m_size = 6;

stands for "magic size"? Please document the value or give it a more
meaningful name.

> +    cl_kernel kernel;
> +    int arg_no;
> +
> +
> +    mapped = clEnqueueMapBuffer(kenv->command_queue, matrix_buf, CL_TRUE, CL_MAP_WRITE, 0, m_size*sizeof(cl_float),0,NULL, NULL, NULL);
> +    memcpy(mapped,matrix,m_size*sizeof(cl_float));
> +    clEnqueueUnmapMemObject(kenv->command_queue, matrix_buf, mapped, 0, NULL, NULL);
> +
> +    mapped = clEnqueueMapBuffer(kenv->command_queue, matrix_buf2, CL_TRUE, CL_MAP_WRITE, 0, m_size*sizeof(cl_float),0,NULL, NULL, NULL);
> +    memcpy(mapped,matrix2,m_size*sizeof(cl_float));
> +    clEnqueueUnmapMemObject(kenv->command_queue, matrix_buf2, mapped, 0, NULL, NULL);
> +
> +    if (!env->kernel) {
> +        status =  av_opencl_create_kernel("avfilter_transform", kenv);

> +        if (status) {
> +            av_log(NULL,AV_LOG_ERROR,"clCreateKernel Error %s\n","avfilter_transform");
> +            return 0;

avoid NULL context, and return meaningful error code.

> +        }
> +        env->command_queue = kenv->command_queue;
> +        env->context = kenv->context;
> +        env->kernel = kenv->kernel;
> +        av_strlcpy(env->kernel_name,kenv->kernel_name,150);
> +        env->program = kenv->program;
> +    }
> +    kernel = env->kernel;
> +    arg_no = 0;
> +    AV_OPENCL_SET_KERNEL_ARG(src);
> +    AV_OPENCL_SET_KERNEL_ARG(dst);
> +    AV_OPENCL_SET_KERNEL_ARG(matrix_buf);
> +    AV_OPENCL_SET_KERNEL_ARG(matrix_buf2);
> +    AV_OPENCL_SET_KERNEL_ARG(interpolate);
> +    AV_OPENCL_SET_KERNEL_ARG(fillmethod);
> +    AV_OPENCL_SET_KERNEL_ARG(src_stride_lu);
> +    AV_OPENCL_SET_KERNEL_ARG(dst_stride_lu);
> +    AV_OPENCL_SET_KERNEL_ARG(src_stride_ch);
> +    AV_OPENCL_SET_KERNEL_ARG(dst_stride_ch);
> +    AV_OPENCL_SET_KERNEL_ARG(height);
> +    AV_OPENCL_SET_KERNEL_ARG(width);
> +    AV_OPENCL_SET_KERNEL_ARG(ch);
> +    AV_OPENCL_SET_KERNEL_ARG(cw);
> +
> +    AV_OPENCL_CHECK( clEnqueueNDRangeKernel, env->command_queue, env->kernel, 1, NULL,
> +              &global_work_size, NULL, 0, NULL, NULL);
> +    clFinish(kenv->command_queue);//add for time test
> +    return 1;
> +}
> +
> +
> +void ff_opencl_transform( void *src,  void *dst,
> +                        int src_stride_lu, int dst_stride_lu,
> +                        int src_stride_ch, int dst_stride_ch,
> +                        int width, int height, int cw, int ch,
> +                        const float *matrix, const float *matrix2,
> +                        const void *matrix_cl, const void *matrix2_cl,
> +                        enum InterpolateMethod interpolate,
> +                        enum FillMethod fill,AVOpenCLKernelEnv *env)
> +
> +{
> +        int interpolate_t = interpolate;
> +        int fillmethod    = fill;

You can remove the intermediary variables.

> +        void *userdata[17];
> +
> +        userdata[0] = (void *)src;
> +        userdata[1] = (void *)dst;
> +        userdata[2] = (void *)src_stride_lu;
> +        userdata[3] = (void *)dst_stride_lu;
> +        userdata[4] = (void *)src_stride_ch;
> +        userdata[5] = (void *)dst_stride_ch;
> +        userdata[6] = (void *)width;
> +        userdata[7] = (void *)height;
> +        userdata[8] = (void *)cw;
> +        userdata[9] = (void *)ch;
> +        userdata[10] = (void *)matrix;
> +        userdata[11] = (void *)matrix2;
> +        userdata[12] = (void *)interpolate_t;
> +        userdata[13] = (void *)fillmethod;
> +        userdata[14] = (void *)matrix_cl;
> +        userdata[15] = (void *)matrix2_cl;
> +        userdata[16] = (void *)env;
> +
> +
> +
> +        if(!av_opencl_run_kernel("deshake_opencl", userdata)) {

if_(

> +            av_log( NULL,AV_LOG_ERROR,"run kernel[%s] faild\n", "deshake_opencl" );

weird spacing

av_log(NULL, ...) should be avoided. You should pass a context, and
return an error message so that the caller has a chance to know that
an error occurred. Also I suggest this error message:

"OpenCL failed running kernel for function 'deshake_opencl'\n"


> +            return;
> +        }
> +}
> +
> +int ff_opencl_transform_init(void)
> +{
> +    int st = av_opencl_register_kernel_wrapper( "deshake_opencl", ff_filter_transform_func);
> +    if (!st) {
> +        av_log(NULL,AV_LOG_ERROR, "register kernel[%s] faild\n", "avfilter_transform" );
> +        return AVERROR(EIO);
> +    }
> +    return 0;
> +}
> +
> +
> diff --git a/libavfilter/transform_opencl.h b/libavfilter/transform_opencl.h
> new file mode 100644
> index 0000000..727ab83
> --- /dev/null
> +++ b/libavfilter/transform_opencl.h
> @@ -0,0 +1,40 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_TRANSFORM_OPENCL_H
> +#define AVFILTER_TRANSFORM_OPENCL_H
> +
> +#include <stdint.h>
> +#include "transform.h"
> +
> +
> +void ff_opencl_transform( void *src,  void *dst,
> +                        int src_stride_lu, int dst_stride_lu,
> +                        int src_stride_ch, int dst_stride_ch,
> +                        int width, int height, int cw, int ch,
> +                        const float *matrix, const float *matrix2,
> +                        const void *matrix_cl, const void *matrix2_cl,
> +                        enum InterpolateMethod interpolate,
> +                        enum FillMethod fill, AVOpenCLKernelEnv *env);
> +int ff_opencl_transform_init(void);
> +
> +
> +
> +#endif /* AVFILTER_TRANSFORM_H */
> diff --git a/libavfilter/vf_deshake.c b/libavfilter/vf_deshake.c
> index c03919c..715bd2d 100644
> --- a/libavfilter/vf_deshake.c
> +++ b/libavfilter/vf_deshake.c
> @@ -1,6 +1,8 @@
>  /*
>   * Copyright (C) 2010 Georg Martius <georg.martius at web.de>
>   * Copyright (C) 2010 Daniel G. Taylor <dan at programmer-art.org>

> + * Modified by 2013 Wei Gao <weigao at multicorewareinc.com>

remove this (we have git for tracking authorship)

> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
>   *
>   * This file is part of FFmpeg.
>   *
> @@ -59,6 +61,10 @@
>  #include "libavcodec/dsputil.h"
>  
>  #include "transform.h"
> +#if CONFIG_DESHAKE_OPENCL_FILTER
> +#include "libavutil/opencl.h"
> +#include "transform_opencl.h"
> +#endif
>  
>  #define CHROMA_WIDTH(link)  -((-link->w) >> av_pix_fmt_desc_get(link->format)->log2_chroma_w)
>  #define CHROMA_HEIGHT(link) -((-link->h) >> av_pix_fmt_desc_get(link->format)->log2_chroma_h)
> @@ -85,6 +91,17 @@ typedef struct {
>      double zoom;          ///< Zoom percentage
>  } Transform;
>  
> +#if CONFIG_DESHAKE_OPENCL_FILTER
> +typedef struct {
> +    int pre_filter_type;
> +    int next_filter_type;
> +    void *cl_inbuf;
> +    void *cl_outbuf;
> +    void *matrix_buf;
> +    void *matrix_buf2;

> +    AVOpenCLKernelEnv kernelev;
> +}DeshakeOpenclEv;

Env?


> +#endif
>  typedef struct {
>      AVClass av_class;
>      AVFilterBufferRef *ref;    ///< Previous frame
> @@ -104,6 +121,9 @@ typedef struct {
>      int ch;
>      int cx;
>      int cy;
> +#if CONFIG_DESHAKE_OPENCL_FILTER
> +    DeshakeOpenclEv opencl_ev;
> +#endif
>  } DeshakeContext;
>  
>  static int cmp(const double *a, const double *b)
> @@ -536,6 +556,172 @@ static int filter_frame(AVFilterLink *link, AVFilterBufferRef *in)
>  
>      return ff_filter_frame(outlink, out);
>  }
> +#if CONFIG_DESHAKE_OPENCL_FILTER

> +static av_cold int init_opencl(AVFilterContext *ctx, const char *args)
> +{
> +    DeshakeContext *deshake = ctx->priv;
> +    int ret = init(ctx,args);
> +    if (ret)
> +        return ret;
> +    if (av_opencl_init_run_env("-I.",NULL)) {
> +        av_log(ctx,AV_LOG_ERROR,"Init OpenCL Failed\n");

> +        return AVERROR(EIO);

possibly return a meaningful error code (which ideally is the one
returned by av_opencl_init_run_env).

> +    }
> +    memset(&(deshake->opencl_ev),0,sizeof(DeshakeOpenclEv));
> +    deshake->opencl_ev.cl_inbuf= NULL;
> +    deshake->opencl_ev.cl_outbuf = NULL;
> +    av_opencl_create_buffer(&(deshake->opencl_ev.matrix_buf),
> +        CL_MEM_READ_ONLY,6*sizeof(cl_float),NULL);
> +    av_opencl_create_buffer(&(deshake->opencl_ev.matrix_buf2),
> +        CL_MEM_READ_ONLY,6*sizeof(cl_float),NULL);
> +    return ff_opencl_transform_init();
> +}
> +
> +static av_cold void uninit_opencl(AVFilterContext *ctx)
> +{
> +    DeshakeContext *deshake = ctx->priv;
> +    if (deshake->opencl_ev.cl_inbuf) {
> +        av_opencl_release_buffer(deshake->opencl_ev.cl_inbuf);
> +    }
> +    if (deshake->opencl_ev.cl_outbuf) {
> +        av_opencl_release_buffer(deshake->opencl_ev.cl_outbuf);
> +    }
> +    if (deshake->opencl_ev.matrix_buf) {
> +        av_opencl_release_buffer(deshake->opencl_ev.matrix_buf);
> +    }
> +    if (deshake->opencl_ev.matrix_buf2) {
> +        av_opencl_release_buffer(deshake->opencl_ev.matrix_buf2);
> +    }

you may move the non-NULL check on av_opencl_release functions.

> +    av_opencl_release_kernel(&(deshake->opencl_ev.kernelev));
> +    av_opencl_release_opencl_run_env();
> +    uninit(ctx);
> +}
> +
> +static int filter_frame_opencl(AVFilterLink *link, AVFilterBufferRef *in)
> +{
> +    DeshakeContext *deshake = link->dst->priv;
> +    AVFilterLink *outlink = link->dst->outputs[0];
> +    AVFilterBufferRef *out;

Uhm this needs to be updated after the recent merge (you should
directly make use of AVFrame).

> +    Transform t = {{0},0}, orig = {{0},0};
> +    float alpha = 2.0 / deshake->refcount;
> +    char tmp[256];

> +    float matrixY[9];
> +    float matrixUV[9];

matrix_y, matrix_uv

> +
> +    out = ff_get_video_buffer(outlink, AV_PERM_WRITE, outlink->w, outlink->h);
> +    if (!out) {
> +        avfilter_unref_bufferp(&in);
> +        return AVERROR(ENOMEM);
> +    }
> +    avfilter_copy_buffer_ref_props(out, in);
> +
> +     if (!deshake->opencl_ev.cl_inbuf) {
> +        av_opencl_create_buffer(&(deshake->opencl_ev.cl_inbuf), CL_MEM_READ_ONLY,
> +                                (in->linesize[0] * in->video->h) + (in->linesize[1] * (in->video->h>>1)) +
> +                                (in->linesize[2] * (in->video->h>>1)), NULL);
> +    }
> +    if (!deshake->opencl_ev.cl_outbuf) {
> +        av_opencl_create_buffer(&(deshake->opencl_ev.cl_outbuf), CL_MEM_READ_WRITE,
> +                                (out->linesize[0] * out->video->h) + (out->linesize[1] * (out->video->h>>1)) +
> +                                (out->linesize[2] * (out->video->h>>1)), NULL);
> +    }
> +    av_opencl_write_cl_buffer(deshake->opencl_ev.cl_inbuf, in->data[0], in->data[1],
> +                              in->data[2], in->linesize[0], in->linesize[1],
> +                              in->linesize[2], link->h, 0);

> +    if (deshake->cx < 0 || deshake->cy < 0 || deshake->cw < 0 || deshake->ch < 0) {
> +        // Find the most likely global motion for the current frame
> +        find_motion(deshake, (deshake->ref == NULL) ? in->data[0] : deshake->ref->data[0], in->data[0], link->w, link->h, in->linesize[0], &t);
> +    } else {
> +        uint8_t *src1 = (deshake->ref == NULL) ? in->data[0] : deshake->ref->data[0];
> +        uint8_t *src2 = in->data[0];
> +
> +        deshake->cx = FFMIN(deshake->cx, link->w);
> +        deshake->cy = FFMIN(deshake->cy, link->h);
> +
> +        if ((unsigned)deshake->cx + (unsigned)deshake->cw > link->w) deshake->cw = link->w - deshake->cx;
> +        if ((unsigned)deshake->cy + (unsigned)deshake->ch > link->h) deshake->ch = link->h - deshake->cy;
> +
> +        // Quadword align right margin
> +        deshake->cw &= ~15;
> +
> +        src1 += deshake->cy * in->linesize[0] + deshake->cx;
> +        src2 += deshake->cy * in->linesize[0] + deshake->cx;
> +
> +        find_motion(deshake, src1, src2, deshake->cw, deshake->ch, in->linesize[0], &t);
> +    }
> +
> +
> +    // Copy transform so we can output it later to compare to the smoothed value
> +    orig.vector.x = t.vector.x;
> +    orig.vector.y = t.vector.y;
> +    orig.angle = t.angle;
> +    orig.zoom = t.zoom;
> +
> +    // Generate a one-sided moving exponential average
> +    deshake->avg.vector.x = alpha * t.vector.x + (1.0 - alpha) * deshake->avg.vector.x;
> +    deshake->avg.vector.y = alpha * t.vector.y + (1.0 - alpha) * deshake->avg.vector.y;
> +    deshake->avg.angle = alpha * t.angle + (1.0 - alpha) * deshake->avg.angle;
> +    deshake->avg.zoom = alpha * t.zoom + (1.0 - alpha) * deshake->avg.zoom;
> +
> +    // Remove the average from the current motion to detect the motion that
> +    // is not on purpose, just as jitter from bumping the camera
> +    t.vector.x -= deshake->avg.vector.x;
> +    t.vector.y -= deshake->avg.vector.y;
> +    t.angle -= deshake->avg.angle;
> +    t.zoom -= deshake->avg.zoom;
> +
> +    // Invert the motion to undo it
> +    t.vector.x *= -1;
> +    t.vector.y *= -1;
> +    t.angle *= -1;
> +
> +    // Write statistics to file
> +    if (deshake->fp) {
> +        snprintf(tmp, 256, "%f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f\n", orig.vector.x, deshake->avg.vector.x, t.vector.x, orig.vector.y, deshake->avg.vector.y, t.vector.y, orig.angle, deshake->avg.angle, t.angle, orig.zoom, deshake->avg.zoom, t.zoom);
> +        fwrite(tmp, sizeof(char), strlen(tmp), deshake->fp);
> +    }
> +
> +    // Turn relative current frame motion into absolute by adding it to the
> +    // last absolute motion
> +    t.vector.x += deshake->last.vector.x;
> +    t.vector.y += deshake->last.vector.y;
> +    t.angle += deshake->last.angle;
> +    t.zoom += deshake->last.zoom;
> +
> +    // Shrink motion by 10% to keep things centered in the camera frame
> +    t.vector.x *= 0.9;
> +    t.vector.y *= 0.9;
> +    t.angle *= 0.9;
> +
> +    // Store the last absolute motion information
> +    deshake->last.vector.x = t.vector.x;
> +    deshake->last.vector.y = t.vector.y;
> +    deshake->last.angle = t.angle;
> +    deshake->last.zoom = t.zoom;
> +    avfilter_get_matrix(t.vector.x, t.vector.y, t.angle, 1.0 + t.zoom / 100.0, matrixY);
> +    avfilter_get_matrix(t.vector.x / (link->w / CHROMA_WIDTH(link)), t.vector.y / (link->h / CHROMA_HEIGHT(link)), t.angle, 1.0 + t.zoom / 100.0, matrixUV);
> +    ff_opencl_transform(deshake->opencl_ev.cl_inbuf, deshake->opencl_ev.cl_outbuf,
> +                    in->linesize[0], out->linesize[0],
> +                    in->linesize[1], out->linesize[1],
> +                    link->w, link->h, CHROMA_WIDTH(link), CHROMA_HEIGHT(link),
> +                    matrixY, matrixUV,
> +                    deshake->opencl_ev.matrix_buf, deshake->opencl_ev.matrix_buf2,
> +                    INTERPOLATE_BILINEAR, deshake->edge, &(deshake->opencl_ev.kernelev));
> +    av_opencl_read_to_frame_buffer(deshake->opencl_ev.cl_outbuf,
> +                                   out->data[0], out->data[1], out->data[2], out->linesize[0],
> +                                   out->linesize[1], out->linesize[2], link->h);

Most of this is duplicated code. You should put the function calls to
opencl specific functions in the Deshake context, and call them
depenending on the filter variant.

E.g.:
deshake->transform(...);
if (deshake->is_opencl) {
    av_opencl_read_to_frame_buffer(...);
    // more specific OpenCL code
}

> +
> +    // Cleanup the old reference frame
> +    avfilter_unref_buffer(deshake->ref);
> +
> +    // Store the current frame as the reference frame for calculating the
> +    // motion of the next frame
> +    deshake->ref = in;
> +
> +    return ff_filter_frame(outlink, out);
> +}
> +#endif
> +
>  
>  static const AVFilterPad deshake_inputs[] = {
>      {
> @@ -566,3 +752,39 @@ AVFilter avfilter_vf_deshake = {
>      .inputs        = deshake_inputs,
>      .outputs       = deshake_outputs,
>  };
[...]
> diff --git a/libavutil/opencl.c b/libavutil/opencl.c
> new file mode 100644
> index 0000000..1e870e5
> --- /dev/null
> +++ b/libavutil/opencl.c
> @@ -0,0 +1,892 @@
> +/*
> + * Copyright (C) 2012 Peng Gao <peng at multicorewareinc.com>
> + * Copyright (C) 2012 Li   Cao <li at multicorewareinc.com>
> + * Copyright (C) 2012 Wei  Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "opencl.h"
> +#include "avstring.h"
> +#include "log.h"
> +
> +

> +#define MAX_KERNEL_STRING_LEN   64
> +#define MAX_CLFILE_NUM 50
> +#define MAX_CLKERNEL_NUM 200
> +#define MAX_CLFILE_PATH 255
> +#define MAX_KERNEL_NUM  50
> +#define MAX_KERNEL_NAME_LEN 64
> +#define MAX_FILTER_NAME_LEN 64
> +#define MAX_FILTER_NUM 200
> +
> +typedef struct OpenCLEnv {
> +    cl_platform_id platform;
> +    cl_context   context;
> +    cl_device_id devices;
> +    cl_command_queue command_queue;
> +}OpenCLEnv;

Nit:
} OpenCLEnv;

(a space after "}"), here and below

> +
> +typedef struct GPUEnv {
> +    //share vb in all modules in hb library
> +    cl_platform_id platform;
> +    cl_device_type device_type;
> +    cl_context context;
> +    cl_device_id *devices_id;
> +    cl_device_id  dev;
> +    cl_command_queue command_queue;
> +    cl_kernel kernels[MAX_CLFILE_NUM];
> +    cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file
> +    char  kernel_srcfile[MAX_CLFILE_NUM][256];   //the max len of kernel file name is 256
> +    int file_count; // only one kernel file
> +
> +    char kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN+1];
> +    av_opencl_kernel_function kernel_functions[MAX_CLKERNEL_NUM];
> +    const char *kernel_code[MAX_CLKERNEL_NUM];
> +    int kernel_count;
> +    int reg_kernel_count;
> +    int is_user_created; // 1: created , 0:no create and needed to create by opencl wrapper
> +    uint8_t *temp_buffer;
> +    int temp_buffer_size;
> +}GPUEnv;
> +
> +typedef struct FilterBufferNode {
> +    char filter_name[MAX_FILTER_NAME_LEN+1];
> +    void *cl_inbuf;
> +    int buf_size;
> +}FilterBufferNode;
> +
> +typedef struct OpenclUtils {
> +    const AVClass *class;
> +    int   log_offset;
> +    void *log_ctx;
> +} OpenclUtils;
> +

> +typedef struct OpenclErrorMsg {
> +    int err_code;
> +    const char *err_str;
> +}OpenclErrorMsg;
> +
> +static OpenclErrorMsg opencl_err_msg[] = {
> +        {CL_DEVICE_NOT_FOUND,                               "DEVICE NOT FOUND"},
> +        {CL_DEVICE_NOT_AVAILABLE,                           "DEVICE NOT AVAILABLE"},
> +        {CL_COMPILER_NOT_AVAILABLE,                         "COMPILER NOT AVAILABLE"},
> +        {CL_MEM_OBJECT_ALLOCATION_FAILURE,                  "MEM OBJECT ALLOCATION FAILURE"},
> +        {CL_OUT_OF_RESOURCES,                               "OUT OF RESOURCES"},
> +        {CL_OUT_OF_HOST_MEMORY,                             "OUT OF HOST MEMORY"},
> +        {CL_PROFILING_INFO_NOT_AVAILABLE,                   "PROFILING INFO NOT AVAILABLE"},
> +        {CL_MEM_COPY_OVERLAP,                               "MEM COPY OVERLAP"},
> +        {CL_IMAGE_FORMAT_MISMATCH,                          "IMAGE FORMAT MISMATCH"},
> +        {CL_IMAGE_FORMAT_NOT_SUPPORTED,                     "IMAGE FORMAT NOT_SUPPORTED"},
> +        {CL_BUILD_PROGRAM_FAILURE,                          "BUILD PROGRAM FAILURE"},
> +        {CL_MAP_FAILURE,                                    "MAP FAILURE"},
> +        {CL_MISALIGNED_SUB_BUFFER_OFFSET,                   "MISALIGNED SUB BUFFER OFFSET"},
> +        {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST,      "EXEC STATUS ERROR FOR EVENTS IN WAIT LIST"},
> +        {CL_COMPILE_PROGRAM_FAILURE,                        "COMPILE PROGRAM FAILURE"},
> +        {CL_LINKER_NOT_AVAILABLE,                           "LINKER NOT AVAILABLE"},
> +        {CL_LINK_PROGRAM_FAILURE,                           "LINK PROGRAM FAILURE"},
> +        {CL_DEVICE_PARTITION_FAILED,                        "DEVICE PARTITION FAILED"},
> +        {CL_KERNEL_ARG_INFO_NOT_AVAILABLE,                  "KERNEL ARG INFO NOT AVAILABLE"},
> +        {CL_INVALID_VALUE,                                  "INVALID VALUE"},
> +        {CL_INVALID_DEVICE_TYPE,                            "INVALID DEVICE TYPE"},
> +        {CL_INVALID_PLATFORM,                               "INVALID PLATFORM"},
> +        {CL_INVALID_DEVICE,                                 "INVALID DEVICE"},
> +        {CL_INVALID_CONTEXT,                                "INVALID CONTEXT"},
> +        {CL_INVALID_QUEUE_PROPERTIES,                       "INVALID QUEUE PROPERTIES"},
> +        {CL_INVALID_COMMAND_QUEUE,                          "INVALID COMMAND QUEUE"},
> +        {CL_INVALID_HOST_PTR,                               "INVALID HOST PTR"},
> +        {CL_INVALID_MEM_OBJECT,                             "INVALID MEM OBJECT"},
> +        {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,                "INVALID IMAGE FORMAT DESCRIPTOR"},
> +        {CL_INVALID_IMAGE_SIZE,                             "INVALID IMAGE SIZE"},
> +        {CL_INVALID_SAMPLER,                                "INVALID SAMPLER"},
> +        {CL_INVALID_BINARY,                                 "INVALID BINARY"},
> +        {CL_INVALID_BUILD_OPTIONS,                          "INVALID BUILD OPTIONS"},
> +        {CL_INVALID_PROGRAM,                                "INVALID PROGRAM"},
> +        {CL_INVALID_PROGRAM_EXECUTABLE,                     "INVALID PROGRAM EXECUTABLE"},
> +        {CL_INVALID_KERNEL_NAME,                            "INVALID KERNEL NAME"},
> +        {CL_INVALID_KERNEL_DEFINITION,                      "INVALID KERNEL DEFINITION"},
> +        {CL_INVALID_KERNEL,                                 "INVALID KERNEL"},
> +        {CL_INVALID_ARG_INDEX,                              "INVALID ARG INDEX"},
> +        {CL_INVALID_ARG_VALUE,                              "INVALID ARG VALUE"},
> +        {CL_INVALID_ARG_SIZE,                               "INVALID ARG_SIZE"},
> +        {CL_INVALID_KERNEL_ARGS,                            "INVALID KERNEL ARGS"},
> +        {CL_INVALID_WORK_DIMENSION,                         "INVALID WORK DIMENSION"},
> +        {CL_INVALID_WORK_GROUP_SIZE,                        "INVALID WORK GROUP SIZE"},
> +        {CL_INVALID_WORK_ITEM_SIZE,                         "INVALID WORK ITEM SIZE"},
> +        {CL_INVALID_GLOBAL_OFFSET,                          "INVALID GLOBAL OFFSET"},
> +        {CL_INVALID_EVENT_WAIT_LIST,                        "INVALID EVENT WAIT LIST"},
> +        {CL_INVALID_EVENT,                                  "INVALID EVENT"},
> +        {CL_INVALID_OPERATION,                              "INVALID OPERATION"},
> +        {CL_INVALID_GL_OBJECT,                              "INVALID GL OBJECT"},
> +        {CL_INVALID_BUFFER_SIZE,                            "INVALID BUFFER SIZE"},
> +        {CL_INVALID_MIP_LEVEL,                              "INVALID MIP LEVEL"},
> +        {CL_INVALID_GLOBAL_WORK_SIZE,                       "INVALID GLOBAL WORK SIZE"},
> +        {CL_INVALID_PROPERTY,                               "INVALID PROPERTY"},
> +        {CL_INVALID_IMAGE_DESCRIPTOR,                       "INVALID IMAGE DESCRIPTOR"},
> +        {CL_INVALID_COMPILER_OPTIONS,                       "INVALID COMPILER OPTIONS"},
> +        {CL_INVALID_LINKER_OPTIONS,                         "INVALID LINKER OPTIONS"},
> +        {CL_INVALID_DEVICE_PARTITION_COUNT,                 "INVALID DEVICE PARTITION COUNT"},

This is going to be a pain to maintain. Rather return a generic error
message (OpenCL error with code %d occurred) in the code.

Ideally the library should provide error string utilities (e.g. like
the ones in libavutil/error.h) so that it is not required to keep them
in sync in each and every single one OpenCL application.

> +};
> +
> +static const AVClass openclutils_class = { "OPENCLUTILS", av_default_item_name,
> +                                                   NULL, LIBAVUTIL_VERSION_INT,
> +                                                   offsetof(OpenclUtils, log_offset),
> +                                                   offsetof(OpenclUtils, log_ctx) };
> +static OpenclUtils openclutils = {&openclutils_class,0,NULL};
> +static GPUEnv gpu_env = {0};
> +static FilterBufferNode filter_buffer[MAX_FILTER_NUM] = {{"", NULL,0}};
> +static int isinited = 0;
> +
> +void av_opencl_regist_kernel(const char *kernel_name,const char *kernel_code)
> +{
> +    gpu_env.kernel_code[gpu_env.kernel_count] = kernel_code;
> +    av_strlcpy(gpu_env.kernel_names[gpu_env.kernel_count],kernel_name,MAX_KERNEL_STRING_LEN+1);
> +    gpu_env.kernel_count++;
> +}
> +static const char* opencl_errstr(int status)
> +{
> +    for (int i = 0;i < sizeof(opencl_err_msg);i++) {
> +        if(opencl_err_msg[i].err_code == status)
> +            return opencl_err_msg[i].err_str;
> +    }
> +    return "unknown error";
> +}
> +

> +static int binary_generated(cl_context context, const char * cl_file_name, FILE ** fhandle)

The usual convenction for a function name is to use a verb indicating
what the function does. This could be "generate_binary".

> +{
> +    int i = 0;
> +    cl_int status;
> +    size_t numdevices;
> +    cl_device_id *devices;
> +    FILE * fd = NULL;
> +    status = clGetContextInfo(context,
> +                              CL_CONTEXT_NUM_DEVICES,
> +                              sizeof(numdevices),
> +                              &numdevices,
> +                              NULL);
> +    if (status != CL_SUCCESS){
> +        av_log(&openclutils,AV_LOG_ERROR,"binary_generated error,clGetContextInfo:%s\n",opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +
> +    devices = av_malloc(sizeof(cl_device_id) * numdevices);
> +    if (!devices)
> +        return AVERROR(ENOMEM);
> +
> +    /* grab the handles to all of the devices in the context. */
> +    status = clGetContextInfo(context,
> +                              CL_CONTEXT_DEVICES,
> +                              sizeof(cl_device_id) * numdevices,
> +                              devices,
> +                              NULL);
> +
> +    status = 0;
> +    /* dump out each binary into its own separate file. */
> +    for (i = 0; i < numdevices; i++) {
> +        char filename[256] = {0};
> +        char cl_name[128] = {0};
> +        if (devices[i] != 0) {
> +            char devicename[1024];

> +            status = clGetDeviceInfo(devices[i],
> +                                     CL_DEVICE_NAME,
> +                                     sizeof(devicename),
> +                                     devicename,
> +                                     NULL);

the return value is never used

> +            memcpy(cl_name,cl_file_name,strlen(cl_file_name));

possible buffer overflow

> +            cl_name[strlen(cl_file_name) + 1] = '\0';
> +            snprintf(filename, sizeof(filename),"./%s-%s.bin", cl_name, devicename);
> +            fd = fopen(filename,"rb");

> +            status = (fd != NULL) ? 1 : 0;


> +        }
> +    }
> +
> +    if (devices)
> +        av_free(devices);
> +
> +    if (fd)
> +        *fhandle = fd;
> +    return status;

This is relative to the last operation performed in the loop, which
doesn't seem very useful as error code.

> +}
> +
> +static int generat_bin_from_kernel_source(cl_program program, const char * cl_file_name)

generate_ ... ?

> +{
> +    int i = 0;
> +    cl_int status;
> +    size_t *binarysizes, numdevices;
> +    cl_device_id *devices;
> +    char **binaries;
> +    status = clGetProgramInfo(program,
> +                              CL_PROGRAM_NUM_DEVICES,
> +                              sizeof(numdevices),
> +                              &numdevices,
> +                              NULL);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"generat_bin_from_kernel_source error,clGetProgramInfo:%s\n",opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    devices = av_malloc(sizeof(cl_device_id) * numdevices);
> +    if(!devices)
> +        return AVERROR(ENOMEM);
> +    /* grab the handles to all of the devices in the program. */
> +    status = clGetProgramInfo(program,
> +                              CL_PROGRAM_DEVICES,
> +                              sizeof(cl_device_id) * numdevices,
> +                              devices,
> +                              NULL);
> +

> +    /* figure out the sizes of each of the binaries. */
> +    binarysizes = av_malloc(sizeof(size_t) * numdevices);

missing malloc check

> +
> +    status = clGetProgramInfo(program,
> +                              CL_PROGRAM_BINARY_SIZES,
> +                              sizeof(size_t) * numdevices,
> +                              binarysizes, NULL);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"generat_bin_from_kernel_source error,clGetProgramInfo:%s\n",opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }

> +    /* copy over all of the generated binaries. */

> +    binaries = av_malloc(sizeof(char *) * numdevices);
> +    if(!binaries)
> +        return AVERROR(ENOMEM);
> +    memset(binaries,0,sizeof(char *) * numdevices);

av_mallocz

> +    for (i = 0; i < numdevices; i++) {
> +        if (binarysizes[i] != 0) {
> +            binaries[i] = av_malloc(sizeof(char) * binarysizes[i]);
> +            if(!binaries[i])
> +                return AVERROR(ENOMEM);

leaking memory, you're not freeing data previously allocated in the
function

> +        }
> +    }
> +

> +    status = clGetProgramInfo(program,
> +                              CL_PROGRAM_BINARIES,
> +                              sizeof(char *) * numdevices,
> +                              binaries,
> +                              NULL);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"generat_bin_from_kernel_source error,clGetProgramInfo:%s\n",opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }


> +    /* dump out each binary into its own separate file. */
> +    for (i = 0; i < numdevices; i++) {
> +        char filename[256] = {0};
> +        char cl_name[128] = {0};
> +        FILE *output = NULL;
> +        if (binarysizes[i] != 0) {
> +            char devicename[1024];
> +            status = clGetDeviceInfo(devices[i],
> +                                     CL_DEVICE_NAME,
> +                                     sizeof(devicename),
> +                                     devicename,
> +                                     NULL);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils,AV_LOG_ERROR,"generat_bin_from_kernel_source error,clGetDeviceInfo:%s\n",opencl_errstr(status));
> +                return AVERROR_EXTERNAL;
> +            }
> +            memcpy(cl_name, cl_file_name, strlen(cl_file_name));
> +            cl_name[strlen(cl_file_name) + 1] = '\0';
> +            snprintf(filename,sizeof(filename), "./%s-%s.bin", cl_name, devicename);
> +            output = fopen(filename, "wb");
> +            if(!output)
> +                return AVERROR_EXTERNAL;
> +            fwrite(binaries[i], sizeof(char), binarysizes[i], output);
> +            fclose(output);
> +        }
> +    }

duplicated code with binary_generated()?

> +

> +    // Release all resouces and memory
> +    for (i = 0;i < numdevices;i++ ) {
> +        if (binaries[i])
> +            av_free(binaries[i]);
> +    }
> +    if (binaries)
> +        av_free(binaries);
> +
> +    if (binarysizes)
> +        av_free(binarysizes);
> +
> +    if (devices)
> +        av_free(devices);

you can skip all the NULL checks (implemented in av_free)

> +    return 0;
> +}
> +
> +int av_opencl_create_kernel(const char * kernelname, AVOpenCLKernelEnv * env)
> +{
> +    int status;
> +    env->kernel = clCreateKernel(gpu_env.programs[0], kernelname, &status);
> +    env->context = gpu_env.context;
> +    env->command_queue = gpu_env.command_queue;
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"av_opencl_create_kernel error,clCreateKernel:%s\n",opencl_errstr(status));
> +    }

> +    return status != CL_SUCCESS ? 1 : 0;

return meaningful error code (could be AVERROR_EXTERNAL in this case).


> +}
> +
> +int av_opencl_release_kernel(AVOpenCLKernelEnv * env)
> +{
> +    int status = clReleaseKernel(env->kernel);

> +    return status != CL_SUCCESS ? 1 : 0;

same here

> +}
> +
> +static int init_opencl_env(GPUEnv *gpu_info,void *ext_opencl_info)
> +{
> +    size_t length;
> +    cl_int status;
> +    cl_uint numplatforms, numdevices;
> +    cl_platform_id *platforms;
> +    cl_context_properties cps[3];
> +    char platform_name[100];
> +    unsigned int i;
> +    AVOpenCLExternalInfo *opencl_info = ext_opencl_info;
> +    if (opencl_info) {
> +        if(gpu_info->is_user_created)
> +            return 1;
> +        gpu_info->platform = opencl_info->platform;
> +        gpu_info->is_user_created = 1;
> +        gpu_info->command_queue = opencl_info->command_queue;
> +        gpu_info->context = opencl_info->context;
> +        gpu_info->devices_id = opencl_info->devices_id;
> +        gpu_info->dev = opencl_info->dev;
> +        gpu_info->device_type = opencl_info->device_type;
> +    } else {
> +        if (!gpu_info->is_user_created) {
> +            status = clGetPlatformIDs(0,NULL,&numplatforms);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils,AV_LOG_ERROR,"init_opencl_env error,clGetPlatformIDs:%s\n",opencl_errstr(status));
> +                return 1;
> +            }
> +            gpu_info->platform = NULL;
> +            if (0 < numplatforms) {
> +                platforms = av_malloc(
> +                    numplatforms * sizeof(cl_platform_id));

missing NULL check

> +                if (platforms == (cl_platform_id*)NULL) {

if (!platform) {

> +                    return 1;
> +                }
> +                status = clGetPlatformIDs(numplatforms, platforms, NULL);
> +                if (status != CL_SUCCESS) {
> +                    av_log(&openclutils,AV_LOG_ERROR,"init_opencl_env error,clGetPlatformIDs:%s\n",opencl_errstr(status));
> +                    return 1;
> +                }
> +                for (i = 0; i < numplatforms; i++) {
> +                    status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
> +                                               sizeof(platform_name), platform_name,
> +                                               NULL);
> +
> +                    if ( status != CL_SUCCESS ) {
> +                        av_log(&openclutils,AV_LOG_ERROR,"init_opencl_env error,clGetPlatformInfo:%s\n",opencl_errstr(status));
> +                        return 1;

leak on platforms

The way we handle middle-function failures is usually to put an "end:"
label at the end of the function, where you do cleanup and return the
error code.

> +                    }
> +                    gpu_info->platform = platforms[i];
> +                    av_free(gpu_info->devices_id);
> +                    gpu_info->devices_id = NULL;
> +                    status = clGetDeviceIDs(gpu_info->platform /* platform */,
> +                                            CL_DEVICE_TYPE_GPU /* device_type */,
> +                                            0 /* num_entries */,
> +                                            NULL /* devices */,
> +                                            &numdevices);
> +

> +                    if (0 == numdevices) {

weird style (with regards to FFmpeg codebase style). Either
if (!numdevices)
or
if (numdevices == 0)

is preferred

> +                        //find CPU device
> +                        status = clGetDeviceIDs( gpu_info->platform /* platform */,
> +                                             CL_DEVICE_TYPE_CPU /* device_type */,
> +                                             0 /* num_entries */,
> +                                             NULL /* devices */,
> +                                             &numdevices );
> +                    }
> +                    if (status != CL_SUCCESS) {
> +                        av_log(&openclutils,AV_LOG_ERROR,"init_opencl_env error,clGetDeviceIDs:%s\n",opencl_errstr(status));
> +                        return 1;
> +                    }
> +                    if(numdevices)
> +                       break;
> +
> +                }
> +                av_free(platforms);
> +            }
> +            if (!gpu_info->platform) {
> +                return 1;
> +            }
> +
> +       /*
> +             * Use available platform.
> +             */

> +            av_log(&openclutils,AV_LOG_INFO,"Platform Name: %s\n",platform_name);

AV_LOG_VERBOSE

> +            cps[0] = CL_CONTEXT_PLATFORM;
> +            cps[1] = (cl_context_properties)gpu_info->platform;
> +            cps[2] = 0;
> +            /* Check for GPU. */
> +            gpu_info->device_type = CL_DEVICE_TYPE_GPU;
> +            gpu_info->context = clCreateContextFromType(
> +                cps, gpu_info->device_type, NULL, NULL, &status );

> +            if ((gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS)) {

if (!gpu_info->context ...

same below

> +                gpu_info->device_type = CL_DEVICE_TYPE_CPU;
> +                gpu_info->context = clCreateContextFromType(
> +                    cps, gpu_info->device_type, NULL, NULL, &status );
> +            }
> +            if ((gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS)) {
> +                gpu_info->device_type = CL_DEVICE_TYPE_DEFAULT;
> +                gpu_info->context = clCreateContextFromType(
> +                    cps, gpu_info->device_type, NULL, NULL, &status );
> +            }
> +            if ((gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS)) {
> +                av_log(&openclutils,AV_LOG_ERROR,"init_opencl_env error,clCreateContextFromType:%s\n",opencl_errstr(status));
> +                return 1;
> +            }
> +            /* Detect OpenCL devices. */
> +            /* First, get the size of device list data */
> +            status = clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES,
> +                                      0, NULL, &length);
> +            if ((status != CL_SUCCESS) || (length == 0)) {
> +                av_log(&openclutils,AV_LOG_ERROR,"init_opencl_env error,clGetContextInfo:%s\n",opencl_errstr(status));
> +                return 1;
> +            }
> +            /* Now allocate memory for device list based on the size we got earlier */
> +            gpu_info->devices_id = av_malloc( length );
> +            if (gpu_info->devices_id == (cl_device_id*)NULL) {
> +                return 1;
> +            }
> +            /* Now, get the device list data */
> +            status = clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES, length,
> +                                      gpu_info->devices_id, NULL);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils,AV_LOG_ERROR,"init_opencl_env error,clGetContextInfo:%s\n",opencl_errstr(status));
> +                return 1;
> +            }
> +            /* Create OpenCL command queue. */
> +            gpu_info->command_queue = clCreateCommandQueue(gpu_info->context,
> +                                                           gpu_info->devices_id[0],
> +                                                           0, &status);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils,AV_LOG_ERROR,"init_opencl_env error,clCreateCommandQueue:%s\n",opencl_errstr(status));
> +                return 1;
> +            }
> +        }
> +    }
> +    return 0;
> +}
> +
> +static int release_opencl_env( GPUEnv *gpu_info )
> +{
> +    int i, status;
> +    if (!isinited)
> +        return 1;
> +    gpu_info->reg_kernel_count--;
> +    if (!gpu_info->reg_kernel_count) {
> +        for (i = 0; i<gpu_env.file_count; i++) {
> +            if (gpu_env.programs[i]) {
> +                status = clReleaseProgram(gpu_env.programs[i]);
> +                if (status != CL_SUCCESS) {
> +                    av_log(&openclutils,AV_LOG_ERROR,"release_opencl_env error,clReleaseProgram:%s\n",opencl_errstr(status));
> +                }
> +                gpu_env.programs[i] = NULL;
> +            }
> +        }
> +        if (gpu_env.command_queue) {
> +            clReleaseCommandQueue(gpu_env.command_queue);
> +            gpu_env.command_queue = NULL;
> +        }
> +        if (gpu_env.context) {
> +            clReleaseContext(gpu_env.context);
> +            gpu_env.context = NULL;
> +        }
> +        isinited = 0;
> +        gpu_info->is_user_created = 0;
> +    }
> +    return 1;
> +}
> +
> +int av_opencl_register_kernel_wrapper(const char *kernel_name, av_opencl_kernel_function function)
> +{
> +    for (int i = 0; i < gpu_env.kernel_count; i++) {
> +        if (av_strcasecmp(kernel_name, gpu_env.kernel_names[i])==0) {
> +            gpu_env.kernel_functions[i] = function;
> +            gpu_env.reg_kernel_count++;
> +            return 1;
> +        }
> +    }
> +    return 0;
> +}
> +

> +static int cached_of_kerner_prg(const GPUEnv *gpu_env, const char * cl_file_name)

obfuscated function name

> +{
> +    for (int i = 0; i < gpu_env->file_count; i++) {
> +        if (av_strcasecmp(gpu_env->kernel_srcfile[i], cl_file_name)==0) {
> +            if(gpu_env->programs[i])
> +                return 1;
> +        }
> +    }
> +    return 0;
> +}
> +
> +static int compile_kernel_file(const char *filename, GPUEnv *gpu_info,
> +                            int indx, const char *build_option)
> +{
> +    cl_int status;
> +    size_t length;
> +    char *source_str;
> +    const char *source;
> +    size_t source_size[1];
> +    char *buildlog = NULL;
> +    int b_error, binary_status, binary_existed;
> +    char * binary;
> +    char *temp;
> +    size_t numdevices;
> +    cl_device_id *devices;
> +    FILE * fd;
> +    FILE * fd1;
> +    int idx;
> +    int kernel_src_size = 0;
> +    if (cached_of_kerner_prg(gpu_info, filename) == 1)
> +        return 1;
> +
> +    idx = gpu_info->file_count;
> +    for (int i = 0;i < gpu_env.kernel_count;i++) {
> +        kernel_src_size += strlen(gpu_env.kernel_code[i]);
> +    }

> +    source_str = av_malloc(kernel_src_size + 2);
> +    temp = source_str;
> +    memset(source_str,0x00,kernel_src_size + 2);

av_mallocz, and missing NULL check

> +    for (int i = 0;i < gpu_env.kernel_count;i++) {
> +        memcpy(temp,gpu_env.kernel_code[i],strlen(gpu_env.kernel_code[i]));
> +        temp += strlen(gpu_env.kernel_code[i]);
> +    }
> +    source = source_str;
> +    source_size[0] = strlen(source);
> +
> +    binary_existed = 0;
> +    if ((binary_existed = binary_generated(gpu_info->context, filename, &fd)) == 1) {
> +        status = clGetContextInfo(gpu_info->context,
> +                                  CL_CONTEXT_NUM_DEVICES,
> +                                  sizeof(numdevices),
> +                                  &numdevices,
> +                                  NULL);
> +        if(status != CL_SUCCESS) {
> +            av_log(&openclutils,AV_LOG_ERROR,"compile_kernel_file error,clGetContextInfo:%s\n",
> +                   opencl_errstr(status));
> +            return 0;

leak, more leaks below

[...]
> +static int get_kernel_env_and_func(const char *kernel_name,
> +                                AVOpenCLKernelEnv *env,
> +                                av_opencl_kernel_function *function)
> +{
> +    for (int i = 0; i < gpu_env.kernel_count; i++) {
> +        if (av_strcasecmp(kernel_name, gpu_env.kernel_names[i])==0) {

> +            //program_idx = 0;
> +            //GetProgramIndex(i, &gpu_env, &program_idx);

remove commented code

> +            env->context = gpu_env.context;
> +            env->command_queue = gpu_env.command_queue;
> +            env->program = gpu_env.programs[0];
> +            env->kernel = gpu_env.kernels[i];
> +            *function = gpu_env.kernel_functions[i];
> +            return 1;
> +        }
> +    }
> +    return 0;
> +}
> +
> +int av_opencl_run_kernel(const char *kernel_name, void **userdata)
> +{
> +    AVOpenCLKernelEnv env;
> +    av_opencl_kernel_function function;
> +    int status;
> +    memset(&env, 0, sizeof(AVOpenCLKernelEnv));
> +    status = get_kernel_env_and_func(kernel_name, &env, &function);

> +    av_strlcpy(env.kernel_name,kernel_name,150);

magic number, overflow in case env.kernel_name has size less than 150

> +    if (status == 1) {
> +        return(function(userdata, &env));
> +    }
> +    return 0;
> +}
> +
> +int av_opencl_init_run_env(const char *build_option,void *ext_opencl_info)
> +{
> +    int status;
> +    if (!isinited) {
> +        /*initialize devices, context, comand_queue*/
> +        status = init_opencl_env(&gpu_env,ext_opencl_info);
> +        if (status) {
> +            av_log(&openclutils,AV_LOG_ERROR,"init_opencl_env Failed\n");
> +            return AVERROR(EIO);
> +        }
> +        /*initialize program, kernel_name, kernel_count*/

> +        //file_name = argv[i];

remove this

> +        status = compile_kernel_file("ffmpeg-kernels", &gpu_env, 0, build_option);
> +
> +        if (status == 0 || gpu_env.kernel_count == 0) {
> +            av_log(&openclutils,AV_LOG_ERROR,"compile_kernel_file Failed status = %d,kernel_count = %d\n",status,gpu_env.kernel_count);
> +            return AVERROR(EIO);
> +        }
> +        isinited = 1;
> +    }
> +    return 0;
> +}
> +
> +int av_opencl_release_opencl_run_env(void)
> +{
> +    return release_opencl_env(&gpu_env);
> +}
> +
> +int av_opencl_stats(void)
> +{
> +    return isinited;
> +}
> +
> +void av_opencl_get_kernel_env(AVOpenCLKernelEnv *env)
> +{
> +    env->context = gpu_env.context;
> +    env->command_queue = gpu_env.command_queue;
> +    env->program = gpu_env.programs[0];
> +}
> +
> +int av_opencl_create_buffer(void **cl_buf,int flags,int size,void *host_ptr)
> +{
> +    int status;
> +    *cl_buf = clCreateBuffer(gpu_env.context, (flags), (size), host_ptr, &status);
> +
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"av_opencl_create_buffer error,clCreateBuffer:%s\n",opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    return 0;
> +}
> +
> +void av_opencl_release_buffer(void *cl_buf)
> +{
> +    clReleaseMemObject(cl_buf);
> +}
> +
> +int av_opencl_read_cl_buffer(void *cl_inbuf,uint8_t *outbuf,int size)
> +{
> +    int status;
> +    void *mapped = clEnqueueMapBuffer(gpu_env.command_queue, cl_inbuf,
> +                                      CL_TRUE,CL_MAP_READ, 0, sizeof(uint8_t) * size,
> +                                      0, NULL, NULL, &status);
> +
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"av_opencl_read_cl_buffer error,clEnqueueMapBuffer:%s\n",opencl_errstr(status));
> +        return 0;
> +    }
> +    memcpy(outbuf,mapped,sizeof(uint8_t) * size);
> +
> +    status = clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inbuf, mapped, 0, NULL, NULL);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"av_opencl_read_cl_buffer error,clEnqueueUnmapMemObject:%s\n",opencl_errstr(status));
> +        return 0;
> +    }
> +    return 1;
> +}
> +
> +int av_opencl_write_cl_buffer(void *cl_inbuf, uint8_t *ybuf, uint8_t *ubuf,
> +                                   uint8_t *vbuf, int linesize0, int linesize1,
> +                                   int linesize2, int height, int offset)
> +{
> +    int chr_h = -(-height >> 1);
> +    int buffersize = (linesize0 * height + linesize1 * chr_h * 2);
> +    uint8_t *temp;
> +    void *mapped = clEnqueueMapBuffer(gpu_env.command_queue, cl_inbuf,
> +                                      CL_TRUE,CL_MAP_WRITE, 0, buffersize + offset,
> +                                      0, NULL, NULL, NULL);
> +    if(!mapped)
> +        return 0;
> +    temp = mapped;
> +    temp += offset;
> +    memcpy(temp,ybuf,linesize0 * height);
> +    memcpy(temp + linesize0 * height, ubuf, linesize1 * chr_h);
> +    memcpy(temp + (linesize0 * height + linesize1 * chr_h), vbuf, linesize2 * chr_h);
> +    clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inbuf, mapped, 0, NULL, NULL);
> +    return 1;
> +}
> +
> +cl_device_id av_opencl_get_device_id(void)
> +{
> +    return *(gpu_env.devices_id);
> +}
> +
> +cl_context av_opencl_get_context(void)
> +{
> +    return gpu_env.context;
> +}
> +
> +cl_command_queue av_opencl_get_command_queue(void)
> +{
> +    return gpu_env.command_queue;
> +}
> +
> +int av_opencl_read_to_frame_buffer(void *cl_inbuf, uint8_t *ybuf, uint8_t *ubuf,
> +                                           uint8_t *vbuf, int linesize0, int linesize1,
> +                                           int linesize2, int height)
> +{
> +
> +    int chr_h = -(-height >> 1);
> +    int size = (linesize0 * height + linesize1 * chr_h * 2);
> +    if (!(gpu_env.temp_buffer)) {
> +        gpu_env.temp_buffer = av_malloc(size);
> +        gpu_env.temp_buffer_size = size;
> +    }
> +
> +    if(size > gpu_env.temp_buffer_size) {
> +        av_free(gpu_env.temp_buffer);
> +        gpu_env.temp_buffer = av_malloc(size);
> +        gpu_env.temp_buffer_size = size;
> +    }
> +
> +    if (av_opencl_read_cl_buffer(cl_inbuf,gpu_env.temp_buffer,
> +                                 (linesize0 + linesize1)*height)) {
> +        memcpy(ybuf,gpu_env.temp_buffer,linesize0 * height);
> +        memcpy(ubuf,gpu_env.temp_buffer + linesize0 * height,linesize1 *chr_h);
> +        memcpy(vbuf,gpu_env.temp_buffer + linesize0 * height + linesize1 * chr_h, linesize2 * chr_h);
> +    } else {
> +        av_log(&openclutils,AV_LOG_ERROR,"av_opencl_read_to_frame_buffer error\n");
> +        return 0;
> +    }
> +    return 1;
> +}
> +
> +int av_opencl_save_buffer(const char *filtername,void *cl_inbuf,int buf_size)
> +{
> +    int i = 0;
> +    while (strlen(filter_buffer[i].filter_name)) {
> +        i++;
> +    }
> +    if (i > (MAX_FILTER_NUM - 1)) {
> +        av_log(&openclutils,AV_LOG_ERROR,"filter num is too large\n");
> +        return AVERROR(EIO);
> +    }
> +    if(strlen(filtername) > MAX_FILTER_NAME_LEN) {
> +        av_log(&openclutils,AV_LOG_ERROR,"filter name is too long\n");
> +        return AVERROR(EIO);
> +    }
> +    av_strlcpy(filter_buffer[i].filter_name,filtername,MAX_FILTER_NAME_LEN+1);
> +    filter_buffer[i].cl_inbuf = cl_inbuf;
> +    filter_buffer[i].buf_size = buf_size;
> +    return 0;
> +}
> +
> +void *av_opencl_get_buffer(const char *filtername,int buf_size)
> +{
> +    for (int i = 0;i < MAX_FILTER_NUM;i++) {
> +        if (!strcmp(filtername,filter_buffer[i].filter_name)) {
> +            if (buf_size <= filter_buffer[i].buf_size) {
> +                return filter_buffer[i].cl_inbuf;
> +            } else {
> +                av_log(&openclutils,AV_LOG_ERROR,"buffer size is too large\n");
> +                return NULL;
> +            }
> +        }
> +    }
> +    return NULL;
> +}
> diff --git a/libavutil/opencl.h b/libavutil/opencl.h
> new file mode 100644
> index 0000000..dd1b59f
> --- /dev/null
> +++ b/libavutil/opencl.h
> @@ -0,0 +1,246 @@
> +/*
> + * Copyright (C) 2012 Peng Gao <peng at multicorewareinc.com>
> + * Copyright (C) 2012 Li   Cao <li at multicorewareinc.com>
> + * Copyright (C) 2012 Wei  Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "config.h"
> +
> +#ifndef LIBAVUTIL_OPENCLWRAPPER_H
> +#define LIBAVUTIL_OPENCLWRAPPER_H
> +
> +#include <CL/cl.h>
> +

> +#define AV_OPENCL_CHECK(method, ...)\
> +    status = method( __VA_ARGS__ ); if( status != CL_SUCCESS ) {\
> +        av_log(NULL,AV_LOG_ERROR, " error %s %d\n", # method, status );  return status; }
> +
> +#define AV_OPENCL_SET_KERNEL_ARG(arg_ptr)\
> +    status = clSetKernelArg((kernel),(arg_no++),(sizeof(arg_ptr)),(void*)(&(arg_ptr)));if( status != CL_SUCCESS ) {\
> +        av_log(NULL,AV_LOG_ERROR, " error %s %d\n", "clSetKernelArg", status );  return status; }

macros assuming a certain variable in the context are not acceptable
in a public header. Please move them to the place where they are
defined, they can be changed later if the need arises.

> +
> +
> +#define FF_OPENCL_KERNEL( ... )# __VA_ARGS__

FF_ if reserved for internal API, use AV_

> +
> +typedef struct AVOpenCLKernelEnv {
> +    cl_context context;
> +    cl_command_queue command_queue;
> +    cl_program program;
> +    cl_kernel kernel;
> +    char kernel_name[150];
> +}AVOpenCLKernelEnv;
> +
> +typedef struct AVOpenCLExternalInfo {
> +    cl_platform_id platform;
> +    cl_device_type device_type;
> +    cl_context context;

> +    cl_device_id *devices_id;
> +    cl_device_id  dev;

device_ids
device_id

may be better names

> +    cl_command_queue command_queue;
> +    char *platform_name;
> +}AVOpenCLExternalInfo;
> +
> +/**
> + * user defined, this is function wrapper which is used to set the input parameters.
> + * launch kernel and copy data from GPU to CPU or CPU to GPU.
> + */

User defined function, used to set the input parameter in the kernel
environment. This function launches kernel and copies data from GPU to
CPU, or from CPU to GPU.

...

I'm not yet sure what "to launch kernel" means.

> +
> +typedef int (*av_opencl_kernel_function)(void **userdata, AVOpenCLKernelEnv *kenv);
> +
> +/**
> + * register a wrapper for running the kernel specified by the kernel name.

Register ...

@param kernel_name name of the kernel
@param function user defined function
@return ...

> + *
> + */
> +
> +int av_opencl_register_kernel_wrapper(const char *kernel_name, av_opencl_kernel_function function);
> +
> +/**
> + *Launch OpenCL kernel.
> + *
> + *@param kernel_name   this kernel name is used to find the kernel in OpenCL runtime environment.
> + *@param userdata         this userdata is the all parameters for running the kernel specified by kernel name

> + *@return 1 on success, 0 on failure

about the error code, the convenction is to return >= 0 on success,
and an error code on failure. The negative error code should be one
defined in libavutil/error.h.

> + */
> +
> +int av_opencl_run_kernel(const char *kernel_name, void **userdata);
> +

> +/**
> + * Init the run time  OpenCL environment.
> + *
> + *This function must be called befor calling any function related to OpenCL.
> + *
> + *
> + *@param build_option         option of build the kernel in OpenCL runtime environment.

A link or a mention to spec would be useful.

> + *@param ext_opencl_info    this is the extern OpenCL environment witch the application program has created
> + *@return zero on success, a negative value on error
> + */
> +
> +int av_opencl_init_run_env(const char *build_option,void *ext_opencl_info);
> +
> +/**
> + * Relase all resource about the OpenCL , this function must be called after calling any functions related to OpenCL.

Release OpenCL resources. ...

> + */
> +
> +int av_opencl_release_opencl_run_env(void);

av_opencl_release_run_env() seems better/less redundant

what's the return code used for?


> +/**
> + * Get the OpenCL status, this function is used the check whether or not the OpenCL run time has been created.
> + *
> + *@return 0 not init, 1, inited;
> + *
> + */
> +int av_opencl_stats(void);

what about:
av_opencl_is_inited(void)?

Also how the function is supposed to be useful?

> +
> +/**
> + * Create kernel object  by a kernel name on the specified OpenCL run time indicated by env parameter.
> + *
> + *@param kernelname          kernel name.
> + *@param env                     The kernel environment witch has been created at the init OpenCL stage
> + *@return zero on success, a negative value on error
> + *
> + */
> +
> +int av_opencl_create_kernel(const char *kernelname, AVOpenCLKernelEnv *env);
> +

> +/**
> + *  Release kernel object.
> + *
> + *@param env  The kernel environment witch has been created at the init OpenCL stage.
> + *@return zero on success, a negative value on error
> + */
> +int av_opencl_release_kernel(AVOpenCLKernelEnv * env);

> +
> +/**
> + *  Get the kernel environment.
> + *
> + *@param env  The kernel environment witch has been created at the init OpenCL stage.

s/witch/which/ here and below

pointer to kernel environment which is filled with the kernel created
in the init OpenCL stage, must not be NULL.

Also "init OpenCL stage" is not really clear, you should mention the
function that created it.

> + *
> + */
> +
> +void av_opencl_get_kernel_env(AVOpenCLKernelEnv *env);


> +
> +/**
> + *  Create OpenCL buffer.
> + *
> + *@param cl_buf         The pointer of OpenCL buffer.
> + *@param flags           The flags witch used to control buffer attribute
> + *@param size            The size of OpenCL buffer
> + *@param host_ptr      The host pointer of OpenCL buffer
> + *@return zero on success, a negative value on error

mention what a buffer is useful for

> + */
> +
> +int av_opencl_create_buffer(void **cl_buf, int flags, int size,void *host_ptr);
> +
> +/**
> + *  Read OpenCL buffer data to memory.

from memory

> + *
> + *@param cl_buf         The pointer of OpenCL buffer.
> + *@param outbuf         CPU memory
> + *@param size            The size of OpenCL buffer
> + *@return zero on success, a negative value on error
> + */
> +int av_opencl_read_cl_buffer(void *cl_inbuf, uint8_t *outbuf, int size);

size_t is favored for size

av_opencl_read_buffer()

> +
> +/**
> + *  Write data from memroy to OpenCL buffer.

typo

> + *
> + *@param cl_buf                The pointer of OpenCL buffer.

> + *@param ybuf                  Y plane buffer
> + *@param ubuf                  U plane buffer
> + *@param vbuf                  V plane buffer

> + *@param linesize0            Y plane linesize
> + *@param linesize1            U plane linesize
> + *@param linesize2            V plane linesize

why not a data[4] and linesize[4] array?

Also does OpenCL support alpha planes (that implies that you need 4
planes to represent a buffer)?

> + *@param height               The height of video
> + *@param offset                The offset of OpenCL buffer start position
> + *@return 1 on success, 0 on error
> + */
> +
> +int av_opencl_write_cl_buffer(void *cl_inbuf, uint8_t *ybuf, uint8_t *ubuf, uint8_t *vbuf, int linesize0, int linesize1, int linesize2, int height, int offset);

av_opencl_write_buffer

> +
> +/**
> + *  Get OpenCL device id.
> + *
> + */
> +
> +cl_device_id av_opencl_get_device_id(void);
> +
> +/**
> + *  Get OpenCL context.
> + *
> + */
> +
> +cl_context av_opencl_get_context(void);
> +
> +/**
> + *  Get OpenCL command queue.
> + *
> + */
> +
> +cl_command_queue av_opencl_get_command_queue(void);
> +

> +/**
> + *  Release OpenCL buffer.
> + *
> + */
> +
> +void av_opencl_release_buffer(void *cl_buf);
> +

> +/**
> + *  Read frame data form OpenCL buffer to frame buffer.

typo, also not very clear

what's the source buffer, what's the destination buffer?

> + *
> + *@param cl_buf                The pointer of OpenCL buffer.
> + *@param ybuf                  Y plane buffer
> + *@param ubuf                  U plane buffer
> + *@param vbuf                  V plane buffer
> + *@param linesize0            Y plane linesize
> + *@param linesize1            U plane linesize
> + *@param linesize2            V plane linesize
> + *@param height               The height of video

usual consideration about data[4], linesize[4]

> + *@return 1 on success, 0 on error
> + */
> +
> +int av_opencl_read_to_frame_buffer(void *cl_inbuf, uint8_t *ybuf, uint8_t *ubuf, uint8_t *vbuf, int linesize0, int linesize1, int linesize2, int height);

> +
> +/**
> + *  Save OpenCL buffer as share buffer.

what is a share buffer?

> + *
> + *@param filtername                  filter name
> + *@param cl_inbuf                     OpenCL buffer
> + */
> +
> +int av_opencl_save_buffer(const char *filtername,void *cl_inbuf,int buf_size);
> +
> +/**
> + *  Get the OpenCL share buffer.
> + *
> + *@param filtername                  filter name
> + */
> +
> +void *av_opencl_get_buffer(const char *filtername,int buf_size);
> +
> +/**
> + *  Regist kernels.

Register kernels? kernel?

> + *
> + *@param kernel_name                  Regist kernel name
> + *@param kernel_code                   Kernel code
> + */
> +
> +void av_opencl_regist_kernel(const char *kernel_name,const char *kernel_code);

av_opencl_register_kernel, characters are cheap this days.

Also this will plainly crash unless you perform some operations
before, which are not documented.
-- 
FFmpeg = Faithless and Friendly Mere Proud Energized God


More information about the ffmpeg-devel mailing list