[FFmpeg-devel] [PATCH 1/2] libavutil/libavfilter: opencl wrapper based on comments on 20130401

Stefano Sabatini stefasab at gmail.com
Mon Apr 1 12:23:51 CEST 2013


On date Monday 2013-04-01 17:52:13 +0800, Wei Gao encoded:
> 

> From 8d7554e834fbb36bdfaaf384905320bce5bc29f0 Mon Sep 17 00:00:00 2001
> From: highgod0401 <highgod0401 at gmail.com>
> Date: Mon, 1 Apr 2013 17:46:11 +0800
> Subject: [PATCH 1/2] opencl wrapper based on comments on 20130401
> 
> ---
>  configure          |   4 +
>  libavutil/Makefile |   3 +
>  libavutil/opencl.c | 719 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  libavutil/opencl.h | 201 +++++++++++++++
>  4 files changed, 927 insertions(+)
>  create mode 100644 libavutil/opencl.c
>  create mode 100644 libavutil/opencl.h

Missing APIchanges entry and libavutil minor bump, but these can be
added by the committer.

> 
> diff --git a/configure b/configure
> index 8443db4..9c42a85 100755
> --- a/configure
> +++ b/configure
> @@ -233,6 +233,7 @@ External library support:
>    --enable-libxvid         enable Xvid encoding via xvidcore,
>                             native MPEG-4/Xvid encoder exists [no]
>    --enable-openal          enable OpenAL 1.1 capture support [no]
> +  --enable-opencl          enable OpenCL code
>    --enable-openssl         enable openssl [no]
>    --enable-x11grab         enable X11 grabbing [no]
>    --enable-zlib            enable zlib [autodetect]
> @@ -1178,6 +1179,7 @@ EXTERNAL_LIBRARY_LIST="
>      libxavs
>      libxvid
>      openal
> +    opencl
>      openssl
>      x11grab
>      zlib
> @@ -3982,6 +3984,7 @@ enabled openal     && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32
>                          die "ERROR: openal not found"; } &&
>                        { check_cpp_condition "AL/al.h" "defined(AL_VERSION_1_1)" ||
>                          die "ERROR: openal must be installed and version must be 1.1 or compatible"; }
> +enabled opencl     && require2 opencl CL/cl.h clEnqueueNDRangeKernel -lOpenCL
>  enabled openssl    && { check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto ||
>                          check_lib openssl/ssl.h SSL_library_init -lssl32 -leay32 ||
>                          check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto -lws2_32 -lgdi32 ||
> @@ -4350,6 +4353,7 @@ echo "network support           ${network-no}"
>  echo "threading support         ${thread_type-no}"
>  echo "safe bitstream reader     ${safe_bitstream_reader-no}"
>  echo "SDL support               ${sdl-no}"
> +echo "opencl enabled            ${opencl-no}"
>  echo "texi2html enabled         ${texi2html-no}"
>  echo "perl enabled              ${perl-no}"
>  echo "pod2man enabled           ${pod2man-no}"
> diff --git a/libavutil/Makefile b/libavutil/Makefile
> index 103ce5e..b520473 100644
> --- a/libavutil/Makefile
> +++ b/libavutil/Makefile
> @@ -52,6 +52,8 @@ HEADERS = adler32.h                                                     \
>  
>  HEADERS-$(CONFIG_LZO)                   += lzo.h
>  
> +HEADERS-$(CONFIG_OPENCL)                += opencl.h
> +
>  ARCH_HEADERS = bswap.h                                                  \
>                 intmath.h                                                \
>                 intreadwrite.h                                           \
> @@ -106,6 +108,7 @@ OBJS = adler32.o                                                        \
>         xtea.o                                                           \
>  
>  OBJS-$(CONFIG_LZO)                      += lzo.o
> +OBJS-$(CONFIG_OPENCL)                   += opencl.o
>  
>  OBJS += $(COMPAT_OBJS:%=../compat/%)
>  
> diff --git a/libavutil/opencl.c b/libavutil/opencl.c
> new file mode 100644
> index 0000000..1e8f9f1
> --- /dev/null
> +++ b/libavutil/opencl.c
> @@ -0,0 +1,719 @@
> +/*
> + * Copyright (C) 2012 Peng Gao <peng at multicorewareinc.com>
> + * Copyright (C) 2012 Li   Cao <li at multicorewareinc.com>
> + * Copyright (C) 2012 Wei  Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "opencl.h"
> +#include "avstring.h"
> +#include "log.h"
> +#include "avassert.h"
> +
> +#if HAVE_PTHREADS
> +
> +#include <pthread.h>
> +static pthread_mutex_t atomic_opencl_lock = PTHREAD_MUTEX_INITIALIZER;
> +
> +#define LOCK_OPENCL pthread_mutex_lock(&atomic_opencl_lock);
> +#define UNLOCK_OPENCL pthread_mutex_unlock(&atomic_opencl_lock);
> +
> +#elif !HAVE_THREADS
> +#define LOCK_OPENCL
> +#define UNLOCK_OPENCL
> +#endif
> +
> +
> +#define MAX_KERNEL_NUM 500
> +#define MAX_KERNEL_CODE_NUM 200
> +
> +typedef struct {
> +    int dev_idx;

nit: device_idx seems more explicit

> +    int platform_idx;
> +} UserSpecDevInfo;

Same for the name: UserSpecDeviceInfo

> +
> +typedef struct {
> +    int is_compiled;
> +    const char *kernel_string;
> +} KernelCode;
> +
> +typedef struct {
> +    int init_count;
> +    UserSpecDevInfo usr_spec_dev_info;
> +    cl_platform_id platform_id;
> +    cl_device_type device_type;
> +    cl_context context;
> +    cl_device_id *device_ids;
> +    cl_device_id device_id;
> +    cl_command_queue command_queue;
> +    int program_count;
> +    cl_program programs[MAX_KERNEL_CODE_NUM];
> +    int kernel_code_count;
> +    KernelCode kernel_code[MAX_KERNEL_CODE_NUM];
> +    int kernel_count;
> +    /**
> +         * if set to 1, the OpenCL environment was created by the user and
> +         * passed as AVOpenCLExternalEnv when initing ,0:created by opencl wrapper.
> +         */
> +    int is_user_created;
> +} GPUEnv;
> +
> +typedef struct {
> +    const AVClass *class;
> +    int log_offset;
> +    void *log_ctx;
> +} OpenclUtils;
> +
> +static const AVClass openclutils_class = {"OPENCLUTILS", av_default_item_name,
> +                                                   NULL, LIBAVUTIL_VERSION_INT,
> +                                                   offsetof(OpenclUtils, log_offset),
> +                                                   offsetof(OpenclUtils, log_ctx)};
> +static OpenclUtils openclutils = {&openclutils_class};
> +static GPUEnv gpu_env;
> +
> +typedef struct {
> +    int err_code;
> +    const char *err_str;
> +} OpenclErrorMsg;
> +
> +static const OpenclErrorMsg opencl_err_msg[] = {
> +        {CL_DEVICE_NOT_FOUND,                               "DEVICE NOT FOUND"},
> +        {CL_DEVICE_NOT_AVAILABLE,                           "DEVICE NOT AVAILABLE"},
> +        {CL_COMPILER_NOT_AVAILABLE,                         "COMPILER NOT AVAILABLE"},
> +        {CL_MEM_OBJECT_ALLOCATION_FAILURE,                  "MEM OBJECT ALLOCATION FAILURE"},
> +        {CL_OUT_OF_RESOURCES,                               "OUT OF RESOURCES"},
> +        {CL_OUT_OF_HOST_MEMORY,                             "OUT OF HOST MEMORY"},
> +        {CL_PROFILING_INFO_NOT_AVAILABLE,                   "PROFILING INFO NOT AVAILABLE"},
> +        {CL_MEM_COPY_OVERLAP,                               "MEM COPY OVERLAP"},
> +        {CL_IMAGE_FORMAT_MISMATCH,                          "IMAGE FORMAT MISMATCH"},
> +        {CL_IMAGE_FORMAT_NOT_SUPPORTED,                     "IMAGE FORMAT NOT_SUPPORTED"},
> +        {CL_BUILD_PROGRAM_FAILURE,                          "BUILD PROGRAM FAILURE"},
> +        {CL_MAP_FAILURE,                                    "MAP FAILURE"},
> +        {CL_MISALIGNED_SUB_BUFFER_OFFSET,                   "MISALIGNED SUB BUFFER OFFSET"},
> +        {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST,      "EXEC STATUS ERROR FOR EVENTS IN WAIT LIST"},
> +        {CL_COMPILE_PROGRAM_FAILURE,                        "COMPILE PROGRAM FAILURE"},
> +        {CL_LINKER_NOT_AVAILABLE,                           "LINKER NOT AVAILABLE"},
> +        {CL_LINK_PROGRAM_FAILURE,                           "LINK PROGRAM FAILURE"},
> +        {CL_DEVICE_PARTITION_FAILED,                        "DEVICE PARTITION FAILED"},
> +        {CL_KERNEL_ARG_INFO_NOT_AVAILABLE,                  "KERNEL ARG INFO NOT AVAILABLE"},
> +        {CL_INVALID_VALUE,                                  "INVALID VALUE"},
> +        {CL_INVALID_DEVICE_TYPE,                            "INVALID DEVICE TYPE"},
> +        {CL_INVALID_PLATFORM,                               "INVALID PLATFORM"},
> +        {CL_INVALID_DEVICE,                                 "INVALID DEVICE"},
> +        {CL_INVALID_CONTEXT,                                "INVALID CONTEXT"},
> +        {CL_INVALID_QUEUE_PROPERTIES,                       "INVALID QUEUE PROPERTIES"},
> +        {CL_INVALID_COMMAND_QUEUE,                          "INVALID COMMAND QUEUE"},
> +        {CL_INVALID_HOST_PTR,                               "INVALID HOST PTR"},
> +        {CL_INVALID_MEM_OBJECT,                             "INVALID MEM OBJECT"},
> +        {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,                "INVALID IMAGE FORMAT DESCRIPTOR"},
> +        {CL_INVALID_IMAGE_SIZE,                             "INVALID IMAGE SIZE"},
> +        {CL_INVALID_SAMPLER,                                "INVALID SAMPLER"},
> +        {CL_INVALID_BINARY,                                 "INVALID BINARY"},
> +        {CL_INVALID_BUILD_OPTIONS,                          "INVALID BUILD OPTIONS"},
> +        {CL_INVALID_PROGRAM,                                "INVALID PROGRAM"},
> +        {CL_INVALID_PROGRAM_EXECUTABLE,                     "INVALID PROGRAM EXECUTABLE"},
> +        {CL_INVALID_KERNEL_NAME,                            "INVALID KERNEL NAME"},
> +        {CL_INVALID_KERNEL_DEFINITION,                      "INVALID KERNEL DEFINITION"},
> +        {CL_INVALID_KERNEL,                                 "INVALID KERNEL"},
> +        {CL_INVALID_ARG_INDEX,                              "INVALID ARG INDEX"},
> +        {CL_INVALID_ARG_VALUE,                              "INVALID ARG VALUE"},
> +        {CL_INVALID_ARG_SIZE,                               "INVALID ARG_SIZE"},
> +        {CL_INVALID_KERNEL_ARGS,                            "INVALID KERNEL ARGS"},
> +        {CL_INVALID_WORK_DIMENSION,                         "INVALID WORK DIMENSION"},
> +        {CL_INVALID_WORK_GROUP_SIZE,                        "INVALID WORK GROUP SIZE"},
> +        {CL_INVALID_WORK_ITEM_SIZE,                         "INVALID WORK ITEM SIZE"},
> +        {CL_INVALID_GLOBAL_OFFSET,                          "INVALID GLOBAL OFFSET"},
> +        {CL_INVALID_EVENT_WAIT_LIST,                        "INVALID EVENT WAIT LIST"},
> +        {CL_INVALID_EVENT,                                  "INVALID EVENT"},
> +        {CL_INVALID_OPERATION,                              "INVALID OPERATION"},
> +        {CL_INVALID_GL_OBJECT,                              "INVALID GL OBJECT"},
> +        {CL_INVALID_BUFFER_SIZE,                            "INVALID BUFFER SIZE"},
> +        {CL_INVALID_MIP_LEVEL,                              "INVALID MIP LEVEL"},
> +        {CL_INVALID_GLOBAL_WORK_SIZE,                       "INVALID GLOBAL WORK SIZE"},
> +        {CL_INVALID_PROPERTY,                               "INVALID PROPERTY"},
> +        {CL_INVALID_IMAGE_DESCRIPTOR,                       "INVALID IMAGE DESCRIPTOR"},
> +        {CL_INVALID_COMPILER_OPTIONS,                       "INVALID COMPILER OPTIONS"},
> +        {CL_INVALID_LINKER_OPTIONS,                         "INVALID LINKER OPTIONS"},
> +        {CL_INVALID_DEVICE_PARTITION_COUNT,                 "INVALID DEVICE PARTITION COUNT"},
> +};
> +
> +static const char *opencl_errstr(cl_int status)
> +{
> +    int i;
> +    for (i = 0; i < sizeof(opencl_err_msg); i++) {
> +        if (opencl_err_msg[i].err_code == status)
> +            return opencl_err_msg[i].err_str;
> +    }
> +    return "unknown error";
> +}
> +
> +AVOpenCLExternalEnv *av_opencl_alloc_external_env(void)
> +{
> +    AVOpenCLExternalEnv *ext = av_mallocz(sizeof(AVOpenCLExternalEnv));
> +    if (!ext) {
> +        av_log(&openclutils, AV_LOG_ERROR,
> +         "Could not malloc external opencl environment data space\n");
> +    }
> +    return ext;
> +}
> +
> +void av_opencl_free_external_env(AVOpenCLExternalEnv **ext_opencl_env)
> +{
> +    av_freep(ext_opencl_env);
> +}
> +
> +int av_opencl_register_kernel_code(const char *kernel_code)
> +{
> +    int i, ret = 0;
> +    LOCK_OPENCL;
> +    if (gpu_env.kernel_code_count >= MAX_KERNEL_CODE_NUM) {
> +        av_log(&openclutils, AV_LOG_ERROR,
> +         "Could not register kernel code, maximum number of registered kernel code %d already reached\n",
> +         MAX_KERNEL_CODE_NUM);
> +        ret = AVERROR(EINVAL);
> +        goto end;
> +    }
> +    for (i = 0; i < gpu_env.kernel_code_count; i++) {
> +        if (gpu_env.kernel_code[i].kernel_string == kernel_code) {
> +            av_log(&openclutils, AV_LOG_WARNING, "Same kernel code has been registered\n");
> +            goto end;
> +        }
> +    }
> +    gpu_env.kernel_code[gpu_env.kernel_code_count].kernel_string = kernel_code;
> +    gpu_env.kernel_code[gpu_env.kernel_code_count].is_compiled = 0;
> +    gpu_env.kernel_code_count++;
> +end:
> +    UNLOCK_OPENCL;
> +    return ret;
> +}
> +
> +int av_opencl_create_kernel(AVOpenCLKernelEnv *env, const char *kernel_name)
> +{
> +    cl_int status;
> +    int i, ret = 0;
> +    LOCK_OPENCL;
> +    if (strlen(kernel_name) + 1 > AV_OPENCL_MAX_KERNEL_NAME_SIZE) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Created kernel name %s is too long\n", kernel_name);
> +        ret = AVERROR(EINVAL);
> +        goto end;
> +    }
> +    if (!env->kernel) {
> +        if (gpu_env.kernel_count >= MAX_KERNEL_NUM) {
> +            av_log(&openclutils, AV_LOG_ERROR,
> +            "Could not create kernel with name '%s', maximum number of kernels %d already reached\n",
> +                kernel_name, MAX_KERNEL_NUM);
> +            ret = AVERROR(EINVAL);
> +            goto end;
> +        }
> +        for (i = 0; i < gpu_env.program_count; i++) {
> +            env->kernel = clCreateKernel(gpu_env.programs[i], kernel_name, &status);
> +            if (status == CL_SUCCESS)
> +                break;
> +        }
> +        if (status != CL_SUCCESS) {
> +            av_log(&openclutils, AV_LOG_ERROR, "Could not create OpenCL kernel: %s\n", opencl_errstr(status));
> +            ret = AVERROR_EXTERNAL;
> +            goto end;
> +        }
> +        gpu_env.kernel_count++;
> +        env->command_queue = gpu_env.command_queue;
> +        av_strlcpy(env->kernel_name, kernel_name, sizeof(env->kernel_name));
> +    }
> +end:
> +    UNLOCK_OPENCL;
> +    return ret;
> +}
> +
> +void av_opencl_release_kernel(AVOpenCLKernelEnv *env)
> +{
> +    cl_int status;
> +    LOCK_OPENCL
> +    if (!env->kernel)
> +        goto end;
> +    status = clReleaseKernel(env->kernel);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not release kernel: %s\n",
> +              opencl_errstr(status));
> +    }
> +    env->kernel = NULL;
> +    env->command_queue = NULL;
> +    env->kernel_name[0] = 0;
> +    gpu_env.kernel_count--;
> +end:
> +    UNLOCK_OPENCL
> +}
> +
> +static int init_opencl_env(GPUEnv *gpu_env, AVOpenCLExternalEnv *ext_opencl_env)
> +{
> +    size_t device_length;
> +    cl_int status;
> +    cl_uint num_platforms, num_devices;
> +    cl_platform_id *platform_ids = NULL;
> +    cl_context_properties cps[3];
> +    char platform_name[100];
> +    int i, ret = 0;
> +    cl_device_type device_type[] = {CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_DEFAULT};
> +    if (ext_opencl_env) {
> +        if (gpu_env->is_user_created)
> +            return 0;
> +        gpu_env->platform_id     = ext_opencl_env->platform_id;
> +        gpu_env->is_user_created = 1;
> +        gpu_env->command_queue   = ext_opencl_env->command_queue;
> +        gpu_env->context         = ext_opencl_env->context;
> +        gpu_env->device_ids      = ext_opencl_env->device_ids;
> +        gpu_env->device_id       = ext_opencl_env->device_id;
> +        gpu_env->device_type     = ext_opencl_env->device_type;
> +    } else {
> +        if (!gpu_env->is_user_created) {
> +            status = clGetPlatformIDs(0, NULL, &num_platforms);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platform ids: %s\n", opencl_errstr(status));
> +                return AVERROR_EXTERNAL;
> +            }
> +            if (gpu_env->usr_spec_dev_info.platform_idx >= 0) {
> +                if (num_platforms < gpu_env->usr_spec_dev_info.platform_idx + 1) {
> +                    av_log(&openclutils, AV_LOG_ERROR, "User set platform index not exist\n");
> +                    return AVERROR(EINVAL);
> +                }
> +            }
> +            if (num_platforms > 0) {
> +                platform_ids = av_mallocz(num_platforms * sizeof(cl_platform_id));
> +                if (!platform_ids) {
> +                    ret = AVERROR(ENOMEM);
> +                    goto end;
> +                }
> +                status = clGetPlatformIDs(num_platforms, platform_ids, NULL);
> +                if (status != CL_SUCCESS) {
> +                    av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platform ids: %s\n", opencl_errstr(status));
> +                    ret = AVERROR_EXTERNAL;
> +                    goto end;
> +                }
> +                i = 0;
> +                if (gpu_env->usr_spec_dev_info.platform_idx >= 0) {
> +                    i = gpu_env->usr_spec_dev_info.platform_idx;
> +                }
> +                while (i < num_platforms) {
> +                    status = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR,
> +                                               sizeof(platform_name), platform_name,
> +                                               NULL);
> +
> +                    if (status != CL_SUCCESS) {
> +                        av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platform info: %s\n", opencl_errstr(status));
> +                        ret = AVERROR_EXTERNAL;
> +                        goto end;
> +                    }
> +                    gpu_env->platform_id = platform_ids[i];
> +                    status = clGetDeviceIDs(gpu_env->platform_id, CL_DEVICE_TYPE_GPU,
> +                                            0, NULL, &num_devices);
> +                    if (status != CL_SUCCESS) {
> +                        av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL device number:%s\n", opencl_errstr(status));
> +                        ret = AVERROR_EXTERNAL;
> +                        goto end;
> +                    }
> +                    if (num_devices == 0) {
> +                        //find CPU device
> +                        status = clGetDeviceIDs(gpu_env->platform_id, CL_DEVICE_TYPE_CPU,
> +                                             0, NULL, &num_devices);
> +                    }
> +                    if (status != CL_SUCCESS) {
> +                        av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL device ids: %s\n", opencl_errstr(status));
> +                        ret = AVERROR(EINVAL);;
> +                        goto end;
> +                    }
> +                    if (num_devices)
> +                       break;
> +                    if (gpu_env->usr_spec_dev_info.platform_idx >= 0) {
> +                        av_log(&openclutils, AV_LOG_ERROR, "Device number of user set platform is 0\n");
> +                        ret = AVERROR_EXTERNAL;
> +                        goto end;
> +                    }
> +                    i++;
> +
> +                }
> +            }
> +            if (!gpu_env->platform_id) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platforms\n");
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +            if (gpu_env->usr_spec_dev_info.dev_idx >= 0) {
> +                if (num_devices < gpu_env->usr_spec_dev_info.dev_idx + 1) {
> +                    av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL device idx in the user set platform\n");
> +                    ret = AVERROR(EINVAL);;
> +                    goto end;
> +                }
> +            }
> +
> +           /*
> +                    * Use available platform.
> +                    */
> +
> +            av_log(&openclutils, AV_LOG_VERBOSE, "Platform Name: %s\n", platform_name);
> +            cps[0] = CL_CONTEXT_PLATFORM;
> +            cps[1] = (cl_context_properties)gpu_env->platform_id;
> +            cps[2] = 0;
> +            /* Check for GPU. */
> +
> +            for (i = 0; i < sizeof(device_type); i++) {
> +                gpu_env->device_type = device_type[i];
> +                gpu_env->context     = clCreateContextFromType(cps, gpu_env->device_type,
> +                                                               NULL, NULL, &status);
> +                if (status != CL_SUCCESS) {
> +                    av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL context from device type: %s\n", opencl_errstr(status));
> +                    ret = AVERROR_EXTERNAL;
> +                    goto end;
> +                }
> +                if (gpu_env->context)
> +                    break;
> +            }
> +            if (!gpu_env->context) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL context from device type\n");
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +            /* Detect OpenCL devices. */
> +            /* First, get the size of device list data */
> +            status = clGetContextInfo(gpu_env->context, CL_CONTEXT_DEVICES,
> +                                      0, NULL, &device_length);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL device length: %s\n", opencl_errstr(status));
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +            if (device_length == 0) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL device length\n");
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +            /* Now allocate memory for device list based on the size we got earlier */
> +            gpu_env->device_ids = av_mallocz(device_length);
> +            if (!gpu_env->device_ids) {
> +                ret = AVERROR(ENOMEM);
> +                goto end;
> +            }
> +            /* Now, get the device list data */
> +            status = clGetContextInfo(gpu_env->context, CL_CONTEXT_DEVICES, device_length,
> +                                      gpu_env->device_ids, NULL);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL context info: %s\n", opencl_errstr(status));
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +            /* Create OpenCL command queue. */
> +            i = 0;
> +            if (gpu_env->usr_spec_dev_info.dev_idx >= 0) {
> +                i = gpu_env->usr_spec_dev_info.dev_idx;
> +            }
> +            gpu_env->command_queue = clCreateCommandQueue(gpu_env->context, gpu_env->device_ids[i],
> +                                                          0, &status);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not create OpenCL command queue: %s\n", opencl_errstr(status));
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +        }
> +    }
> +end:
> +    av_free(platform_ids);
> +    return ret;
> +}
> +
> +static int compile_kernel_file(GPUEnv *gpu_env, const char *build_options)
> +{
> +    cl_int status;
> +    char *temp, *source_str = NULL;
> +    size_t source_str_len = 0;
> +    int i, ret = 0;
> +
> +    for (i = 0; i < gpu_env->kernel_code_count; i++) {
> +        if (!gpu_env->kernel_code[i].is_compiled)
> +            source_str_len += strlen(gpu_env->kernel_code[i].kernel_string);
> +    }
> +    if (!source_str_len) {
> +        return 0;
> +    }
> +    source_str = av_mallocz(source_str_len + 1);
> +    if (!source_str) {
> +        return AVERROR(ENOMEM);
> +    }
> +    temp = source_str;
> +    for (i = 0; i < gpu_env->kernel_code_count; i++) {
> +        if (!gpu_env->kernel_code[i].is_compiled) {
> +            memcpy(temp, gpu_env->kernel_code[i].kernel_string,
> +                        strlen(gpu_env->kernel_code[i].kernel_string));
> +            gpu_env->kernel_code[i].is_compiled = 1;
> +            temp += strlen(gpu_env->kernel_code[i].kernel_string);
> +        }
> +    }
> +    /* create a CL program using the kernel source */
> +    gpu_env->programs[gpu_env->program_count] = clCreateProgramWithSource(gpu_env->context,
> +                                                           1, (const char **)(&source_str),
> +                                                                   &source_str_len, &status);
> +    if(status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not create OpenCL program with source code: %s\n",
> +               opencl_errstr(status));
> +        ret = AVERROR_EXTERNAL;
> +        goto end;
> +    }
> +    if (!gpu_env->programs[gpu_env->program_count]) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Created program is NULL\n");
> +        ret = AVERROR_EXTERNAL;
> +        goto end;
> +    }
> +    i = 0;
> +    if (gpu_env->usr_spec_dev_info.dev_idx >= 0)
> +        i = gpu_env->usr_spec_dev_info.dev_idx;
> +    /* create a cl program executable for all the devices specified */
> +    if (!gpu_env->is_user_created)
> +        status = clBuildProgram(gpu_env->programs[gpu_env->program_count], 1, &gpu_env->device_ids[i],
> +                                build_options, NULL, NULL);
> +    else
> +        status = clBuildProgram(gpu_env->programs[gpu_env->program_count], 1, &(gpu_env->device_id),
> +                                 build_options, NULL, NULL);
> +
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not compile OpenCL kernel: %s\n", opencl_errstr(status));
> +        ret = AVERROR_EXTERNAL;
> +        goto end;
> +    }
> +    gpu_env->program_count++;
> +end:
> +    av_free(source_str);
> +    return ret;
> +}
> +
> +int av_opencl_init(AVDictionary *options, AVOpenCLExternalEnv *ext_opencl_env)
> +{
> +    int ret = 0;
> +    AVDictionaryEntry *opt_build_entry;
> +    AVDictionaryEntry *opt_platform_entry;
> +    AVDictionaryEntry *opt_device_entry;
> +    LOCK_OPENCL
> +    if (!gpu_env.init_count) {
> +        opt_platform_entry = av_dict_get(options, "platform_idx", NULL, 0);
> +        opt_device_entry   = av_dict_get(options, "device_idx", NULL, 0);
> +        /*initialize devices, context, command_queue*/
> +        gpu_env.usr_spec_dev_info.platform_idx = -1;
> +        gpu_env.usr_spec_dev_info.dev_idx = -1;

> +        if (opt_platform_entry) {
> +            gpu_env.usr_spec_dev_info.platform_idx = strtol(opt_platform_entry->value, NULL, 10);
> +        }
> +        if (opt_device_entry) {
> +            gpu_env.usr_spec_dev_info.dev_idx = strtol(opt_device_entry->value, NULL, 10);
> +        }

Missing checks in case the string is not valid, for example if it is
"foo" it is interpreted as 0 (check for example
libavfilter/vf_framestep.c).

On the other hand I see there are many other parts of the code where
such checks are missing, so this is probably non-blocking (and we may
need a parse_number utility at some point).

> +        ret = init_opencl_env(&gpu_env, ext_opencl_env);
> +        if (ret < 0)
> +            goto end;
> +    }
> +    /*initialize program, kernel_name, kernel_count*/
> +    opt_build_entry = av_dict_get(options, "build_options", NULL, 0);
> +    if (opt_build_entry)
> +        ret = compile_kernel_file(&gpu_env, opt_build_entry->value);
> +    else
> +        ret = compile_kernel_file(&gpu_env, NULL);
> +    if (ret < 0)
> +        goto end;
> +    av_assert1(gpu_env.kernel_code_count > 0);
> +    gpu_env.init_count++;
> +
> +end:
> +    UNLOCK_OPENCL
> +    return ret;
> +}
> +
> +void av_opencl_uninit(void)
> +{
> +    cl_int status;
> +    int i;
> +    LOCK_OPENCL
> +    gpu_env.init_count--;
> +    if (gpu_env.is_user_created)
> +        goto end;
> +    if ((gpu_env.init_count > 0) || (gpu_env.kernel_count > 0))
> +        goto end;
> +    for (i = 0; i < gpu_env.program_count; i++) {
> +        if (gpu_env.programs[i]) {
> +            status = clReleaseProgram(gpu_env.programs[i]);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not release OpenCL program: %s\n", opencl_errstr(status));
> +            }
> +            gpu_env.programs[i] = NULL;
> +        }
> +    }
> +    if (gpu_env.command_queue) {
> +        status = clReleaseCommandQueue(gpu_env.command_queue);
> +        if (status != CL_SUCCESS) {
> +            av_log(&openclutils, AV_LOG_ERROR, "Could not release OpenCL command queue: %s\n", opencl_errstr(status));
> +        }
> +        gpu_env.command_queue = NULL;
> +    }
> +    if (gpu_env.context) {
> +        status = clReleaseContext(gpu_env.context);
> +        if (status != CL_SUCCESS) {
> +            av_log(&openclutils, AV_LOG_ERROR, "Could not release OpenCL context: %s\n", opencl_errstr(status));
> +        }
> +        gpu_env.context = NULL;
> +    }
> +    av_freep(&(gpu_env.device_ids));
> +end:
> +    UNLOCK_OPENCL
> +}
> +
> +int av_opencl_buffer_create(cl_mem *cl_buf, size_t cl_buf_size, int flags, void *host_ptr)
> +{
> +    cl_int status;
> +    *cl_buf = clCreateBuffer(gpu_env.context, flags, cl_buf_size, host_ptr, &status);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not create OpenCL buffer: %s\n", opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    return 0;
> +}
> +
> +void av_opencl_buffer_release(cl_mem *cl_buf)
> +{
> +    cl_int status = 0;
> +    if (!cl_buf)
> +        return;
> +    status = clReleaseMemObject(*cl_buf);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not release OpenCL buffer: %s\n", opencl_errstr(status));
> +    }
> +    memset(cl_buf, 0, sizeof(*cl_buf));
> +}
> +
> +int av_opencl_buffer_write(cl_mem dst_cl_buf, uint8_t *src_buf, size_t buf_size)
> +{
> +    cl_int status;
> +    void *mapped = clEnqueueMapBuffer(gpu_env.command_queue, dst_cl_buf,
> +                                      CL_TRUE,CL_MAP_WRITE, 0, sizeof(uint8_t) * buf_size,
> +                                      0, NULL, NULL, &status);
> +
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not map OpenCL buffer: %s\n", opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    memcpy(mapped, src_buf, buf_size);
> +
> +    status = clEnqueueUnmapMemObject(gpu_env.command_queue, dst_cl_buf, mapped, 0, NULL, NULL);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not unmap OpenCL buffer: %s\n", opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    return 0;
> +}
> +
> +int av_opencl_buffer_read(uint8_t *dst_buf, cl_mem src_cl_buf, size_t buf_size)
> +{
> +    cl_int status;
> +    void *mapped = clEnqueueMapBuffer(gpu_env.command_queue, src_cl_buf,
> +                                      CL_TRUE,CL_MAP_READ, 0, buf_size,
> +                                      0, NULL, NULL, &status);
> +
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not map OpenCL buffer: %s\n", opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    memcpy(dst_buf, mapped, buf_size);
> +
> +    status = clEnqueueUnmapMemObject(gpu_env.command_queue, src_cl_buf, mapped, 0, NULL, NULL);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not unmap OpenCL buffer: %s\n", opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    return 0;
> +}
> +
> +int av_opencl_buffer_write_image(cl_mem dst_cl_buf, size_t cl_buffer_size, int dst_cl_offset,
> +                                                    uint8_t **src_data, int *plane_size, int plane_num)
> +{
> +    int i, buffer_size = 0;
> +    uint8_t *temp;
> +    cl_int status;
> +    void *mapped;
> +    if ((unsigned int)plane_num > 8) {
> +        return AVERROR(EINVAL);
> +    }
> +    for (i = 0;i < plane_num;i++) {
> +        buffer_size += plane_size[i];
> +    }
> +    if (buffer_size > cl_buffer_size) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Cannot write image to OpenCL buffer: buffer too small\n");
> +        return AVERROR(EINVAL);
> +    }
> +    mapped = clEnqueueMapBuffer(gpu_env.command_queue, dst_cl_buf,
> +                                      CL_TRUE,CL_MAP_WRITE, 0, buffer_size + dst_cl_offset,
> +                                      0, NULL, NULL, &status);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not map OpenCL buffer: %s\n", opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    temp = mapped;
> +    temp += dst_cl_offset;
> +    for (i = 0; i < plane_num; i++) {
> +        memcpy(temp, src_data[i], plane_size[i]);
> +        temp += plane_size[i];
> +    }
> +    status = clEnqueueUnmapMemObject(gpu_env.command_queue, dst_cl_buf, mapped, 0, NULL, NULL);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not unmap OpenCL buffer: %s\n", opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    return 0;
> +}
> +
> +int av_opencl_buffer_read_image(uint8_t **dst_data, int *plane_size, int plane_num,
> +                                       cl_mem src_cl_buf, size_t cl_buffer_size)
> +{
> +    int i,buffer_size = 0,ret = 0;
> +    uint8_t *temp;
> +    void *mapped;
> +    cl_int status;
> +    if ((unsigned int)plane_num > 8) {
> +        return AVERROR(EINVAL);
> +    }
> +    for (i = 0;i < plane_num;i++) {
> +        buffer_size += plane_size[i];
> +    }
> +    if (buffer_size > cl_buffer_size) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Cannot write image to CPU buffer: OpenCL buffer too small\n");
> +        return AVERROR(EINVAL);
> +    }
> +    mapped = clEnqueueMapBuffer(gpu_env.command_queue, src_cl_buf,
> +                                      CL_TRUE,CL_MAP_READ, 0, buffer_size,
> +                                      0, NULL, NULL, &status);
> +
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not map OpenCL buffer: %s\n", opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    temp = mapped;
> +    if (ret >= 0) {
> +        for (i = 0;i < plane_num;i++) {
> +            memcpy(dst_data[i], temp, plane_size[i]);
> +            temp += plane_size[i];
> +        }
> +    }
> +    status = clEnqueueUnmapMemObject(gpu_env.command_queue, src_cl_buf, mapped, 0, NULL, NULL);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not unmap OpenCL buffer: %s\n", opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    return 0;
> +}
> +
> diff --git a/libavutil/opencl.h b/libavutil/opencl.h
> new file mode 100644
> index 0000000..941018a
> --- /dev/null
> +++ b/libavutil/opencl.h
> @@ -0,0 +1,201 @@
> +/*
> + * Copyright (C) 2012 Peng Gao <peng at multicorewareinc.com>
> + * Copyright (C) 2012 Li   Cao <li at multicorewareinc.com>
> + * Copyright (C) 2012 Wei  Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * OpenCL wrapper
> + *
> + * This interface is considered still experimental and its API and ABI may
> + * change without prior notice.
> + */
> +
> +#ifndef LIBAVUTIL_OPENCLWRAPPER_H
> +#define LIBAVUTIL_OPENCLWRAPPER_H
> +
> +#include <CL/cl.h>
> +#include "config.h"
> +#include "dict.h"
> +
> +#define AV_OPENCL_KERNEL( ... )# __VA_ARGS__
> +
> +#define AV_OPENCL_MAX_KERNEL_NAME_SIZE 150
> +
> +typedef struct {
> +    cl_command_queue command_queue;
> +    cl_kernel kernel;
> +    char kernel_name[AV_OPENCL_MAX_KERNEL_NAME_SIZE];
> +} AVOpenCLKernelEnv;
> +
> +typedef struct {
> +    cl_platform_id platform_id;
> +    cl_device_type device_type;
> +    cl_context context;
> +    cl_device_id *device_ids;
> +    cl_device_id  device_id;
> +    cl_command_queue command_queue;
> +    char *platform_name;
> +} AVOpenCLExternalEnv;
> +
> +/**
> + * Allocate OpenCL external environment.
> + *
> + * It must be freed with av_opencl_free_external_env().
> + *
> + * @return pointer to allocated OpenCL external environment
> + */
> +AVOpenCLExternalEnv *av_opencl_alloc_external_env(void);
> +
> +/**
> + * Free OpenCL external environment.
> + *
> + * @param ext_opencl_env pointer to OpenCL external environment created by av_opencl_alloc_external_env()
> + */
> +void av_opencl_free_external_env(AVOpenCLExternalEnv **ext_opencl_env);
> +
> +/**
> + * Register kernel code.
> + *
> + *  The registered kernel code is stored in a global context, and compiled
> + *  in the runtime environment when av_opencl_init() is called.
> + *
> + * @param kernel_code    kernel code to be compiled in the OpenCL runtime environment
> + * @return  >=0 on success, a negative error code in case of failure
> + */
> +int av_opencl_register_kernel_code(const char *kernel_code);
> +
> +/**
> + * Initialize the run time OpenCL environment and compile the kernel code registered with
> + * av_opencl_register_kernel_code().
> + *
> + * Currently, the only accepted option is "build_options", used to set
> + * options to compile registered kernels code. See reference "OpenCL
> + * Specification Version: 1.2 chapter 5.6.4".
> + *
> + * @param options                  dictionary of key/value options
> + * @param ext_opencl_env      external OpenCL environment, created by an
> + *                                          application program, ignored if set to NULL
> + * @return  >=0 on success, a negative error code in case of failure
> + */
> + int av_opencl_init(AVDictionary *options, AVOpenCLExternalEnv *ext_opencl_env);
> +
> +/**
> + * Create kernel object in the specified kernel environment.
> + *
> + * @param env                 pointer to kernel environment which is filled with the environment,
> + *                                   used to run the kernel
> + * @param kernel_name      kernel function name
> + * @return >=0 on success, a negative error code in case of failure
> + */
> +int av_opencl_create_kernel(AVOpenCLKernelEnv *env, const char *kernel_name);
> +
> +/**
> + * Create OpenCL buffer.
> + *
> + * The buffer is used to save the data used or created by an OpenCL
> + * kernel.
> + * The created buffer must be released with av_opencl_buffer_release().
> + *
> + * See clCreateBuffer() function reference for more information about
> + * the parameters.
> + *
> + * @param cl_buf       pointer to OpenCL buffer
> + * @param cl_buf_size  size in bytes of the OpenCL buffer to create
> + * @param flags        flags used to control buffer attributes
> + * @param host_ptr     host pointer of the OpenCL buffer
> + * @return >=0 on success, a negative error code in case of failure
> + */
> +int av_opencl_buffer_create(cl_mem *cl_buf, size_t cl_buf_size, int flags, void *host_ptr);
> +
> +/**
> + * Write OpenCL buffer with data from src_buf.
> + *
> + * @param dst_cl_buf        pointer to OpenCL destination buffer
> + * @param src_buf           pointer to source buffer
> + * @param buf_size          size in bytes of the source and destination buffers
> + * @return >=0 on success, a negative error code in case of failure
> + */
> +int av_opencl_buffer_write(cl_mem dst_cl_buf, uint8_t *src_buf, size_t buf_size);
> +
> +/**
> + * Read data from OpenCL buffer to memory buffer.
> + *
> + * @param dst_buf           pointer to destination buffer (CPU memory)
> + * @param src_cl_buf        pointer to source OpenCL buffer
> + * @param buf_size          size in bytes of the source and destination buffers
> + * @return >=0 on success, a negative error code in case of failure
> + */
> +int av_opencl_buffer_read(uint8_t *dst_buf, cl_mem src_cl_buf, size_t buf_size);
> +
> +/**
> + * Write image data from memory to OpenCL buffer.
> + *
> + * The source must be an array of pointers to image plane buffers.
> + *
> + * @param dst_cl_buf         pointer to destination OpenCL buffer
> + * @param dst_cl_buf_size    size in bytes of OpenCL buffer
> + * @param dst_cl_buf_offset  the offset of the OpenCL buffer start position
> + * @param src_data           array of pointers to source plane buffers
> + * @param src_plane_sizes    array of sizes in bytes of the source plane buffers
> + * @param src_plane_num      number of source image planes
> + * @return >=0 on success, a negative error code in case of failure
> + */
> +int av_opencl_buffer_write_image(cl_mem dst_cl_buf, size_t cl_buffer_size, int dst_cl_offset,
> +                                        uint8_t **src_data, int *plane_size, int plane_num);
> +/**
> + * Read image data from OpenCL buffer.
> + *

> + * src buffer is OpenCL buffer, dst buffer is frame buffer(data[0],data[1]....).

This comment could be probably skipped.

> + *
> + * @param dst_data           array of pointers to destination plane buffers
> + * @param dst_plane_sizes    array of pointers to destination plane buffers
> + * @param dst_plane_num      number of destination image planes
> + * @param src_cl_buf         pointer to source OpenCL buffer
> + * @param src_cl_buf_size    size in bytes of OpenCL buffer
> + * @return >=0 on success, a negative error code in case of failure
> + */
> +
> +int av_opencl_buffer_read_image(uint8_t **dst_data, int *plane_size, int plane_num,
> +                                       cl_mem src_cl_buf, size_t cl_buffer_size);
> +/**
> + * Release OpenCL buffer.
> + *
> + * @param cl_buf  pointer to OpenCL buffer to release, which was previously filled with av_opencl_buffer_create()
> + */
> +void av_opencl_buffer_release(cl_mem *cl_buf);
> +
> +/**
> + *  Release kernel object.
> + *
> + * @param env  kernel environment where the kernel object was created with av_opencl_create_kernel
> + */
> +void av_opencl_release_kernel(AVOpenCLKernelEnv *env);
> +
> +/**
> + *  Release OpenCL environment.
> + *
> + * The OpenCL environment is effectively released only if all the created
> + * kernels had been released with av_opencl_release_kernel().
> + */
> +void av_opencl_uninit(void);
> +
> +#endif/*LIBAVUTIL_OPENCL_H*/

No more comments from me, we can fix it later in case of need as the
API is marked as experimental, and other minor formatting nits can be
fixed when it's already in.

Thanks for the work so far.
-- 
FFmpeg = Fancy and Faithful Mega Pitiful Enigmatic Gospel


More information about the ffmpeg-devel mailing list