[FFmpeg-devel] [PATCH 1/2] libavutil/libavfilter: opencl wrapper based on comments on 20130329
Michael Niedermayer
michaelni at gmx.at
Fri Mar 29 16:13:14 CET 2013
On Fri, Mar 29, 2013 at 09:32:35PM +0800, Wei Gao wrote:
>
[...]
> +int av_opencl_create_kernel(AVOpenCLKernelEnv *env, const char *kernel_name)
> +{
> + cl_int status;
> + int i, ret = 0;
> + LOCK_OPENCL;
> + if (strlen(kernel_name) > sizeof(env->kernel_name)) {
that check is off by 1
a string of 1 char needs a 2 char buffer, the second for the 0
> + av_log(&openclutils, AV_LOG_ERROR, "Created kernel name %s is too long\n", kernel_name);
> + ret = AVERROR(EINVAL);
> + goto end;
> + }
> + if (!env->kernel) {
> + for (i = 0;i < gpu_env.kernel_count;i++) {
> + if (av_strcasecmp(kernel_name, gpu_env.kernel_node[i].kernel_name) == 0) {
> + env->kernel = gpu_env.kernel_node[i].kernel;
> + gpu_env.kernel_node[i].kernel_ref++;
> + break;
> + }
> + }
> + if (!env->kernel) {
> + env->kernel = clCreateKernel(gpu_env.program, kernel_name, &status);
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not create OpenCL kernel: %s\n", opencl_errstr(status));
> + ret = AVERROR_EXTERNAL;
> + goto end;
> + }
> + av_strlcpy(gpu_env.kernel_node[gpu_env.kernel_count].kernel_name, kernel_name,
> + sizeof(gpu_env.kernel_node[gpu_env.kernel_count].kernel_name));
> + gpu_env.kernel_node[gpu_env.kernel_count].kernel= env->kernel;
> + gpu_env.kernel_node[gpu_env.kernel_count].kernel_ref++;
> + gpu_env.kernel_count++;
this is missing a check against the size of kernel_node
or a comment why such check would not be needed
[...]
> +static int init_opencl_env(GPUEnv *gpu_env, AVOpenCLExternalInfo *ext_opencl_info)
> +{
> + size_t device_length;
> + cl_int status;
> + cl_uint num_platforms, num_devices;
> + cl_platform_id *platform_ids = NULL;
> + cl_context_properties cps[3];
> + char platform_name[100];
> + int i, ret = 0;
> + cl_device_type device_type[] = {CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_DEFAULT};
> + if (ext_opencl_info) {
> + if (gpu_env->is_user_created)
> + return 0;
> + gpu_env->platform = ext_opencl_info->platform;
> + gpu_env->is_user_created = 1;
> + gpu_env->command_queue = ext_opencl_info->command_queue;
> + gpu_env->context = ext_opencl_info->context;
> + gpu_env->device_ids = ext_opencl_info->device_ids;
> + gpu_env->device_id = ext_opencl_info->device_id;
> + gpu_env->device_type = ext_opencl_info->device_type;
> + } else {
> + if (!gpu_env->is_user_created) {
> + status = clGetPlatformIDs(0, NULL, &num_platforms);
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platform ids: %s\n", opencl_errstr(status));
> + return AVERROR_EXTERNAL;
> + }
> + if (num_platforms > 0) {
> + platform_ids = av_mallocz(num_platforms * sizeof(cl_platform_id));
> + if (!platform_ids) {
> + ret = AVERROR(ENOMEM);
> + goto end;
> + }
> + status = clGetPlatformIDs(num_platforms, platform_ids, NULL);
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platform ids: %s\n", opencl_errstr(status));
> + ret = AVERROR_EXTERNAL;
> + goto end;
> + }
> + for (i = 0; i < num_platforms; i++) {
> + status = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR,
> + sizeof(platform_name), platform_name,
> + NULL);
> +
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platform info: %s\n", opencl_errstr(status));
> + ret = AVERROR_EXTERNAL;
> + goto end;
> + }
> + gpu_env->platform = platform_ids[i];
> + status = clGetDeviceIDs(gpu_env->platform, CL_DEVICE_TYPE_GPU,
> + 0, NULL, &num_devices);
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL device number:%s\n", opencl_errstr(status));
> + ret = AVERROR_EXTERNAL;
> + goto end;
> + }
> + if (num_devices == 0) {
> + //find CPU device
> + status = clGetDeviceIDs(gpu_env->platform, CL_DEVICE_TYPE_CPU,
> + 0, NULL, &num_devices);
> + }
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL device ids: %s\n", opencl_errstr(status));
> + ret = AVERROR_EXTERNAL;
> + goto end;
> + }
> + if (num_devices)
> + break;
> +
> + }
> + }
> + if (!gpu_env->platform) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platforms\n");
> + ret = AVERROR_EXTERNAL;
> + goto end;
> + }
> +
> + /*
> + * Use available platform.
> + */
> + av_log(&openclutils, AV_LOG_VERBOSE, "Platform Name: %s\n", platform_name);
> + cps[0] = CL_CONTEXT_PLATFORM;
> + cps[1] = (cl_context_properties)gpu_env->platform;
> + cps[2] = 0;
> + /* Check for GPU. */
> +
> + for (i = 0; i < sizeof(device_type); i++) {
> + gpu_env->device_type = device_type[i];
> + gpu_env->context = clCreateContextFromType(cps, gpu_env->device_type,
> + NULL, NULL, &status);
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL context from device type: %s\n", opencl_errstr(status));
> + ret = AVERROR_EXTERNAL;
> + goto end;
> + }
> + if (gpu_env->context)
> + break;
> + }
> + if (!gpu_env->context) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL context from device type\n");
> + ret = AVERROR_EXTERNAL;
> + goto end;
> + }
> + /* Detect OpenCL devices. */
> + /* First, get the size of device list data */
> + status = clGetContextInfo(gpu_env->context, CL_CONTEXT_DEVICES,
> + 0, NULL, &device_length);
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL device length: %s\n", opencl_errstr(status));
> + ret = AVERROR_EXTERNAL;
> + goto end;
> + }
> + if (device_length == 0) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL device length\n");
> + ret = AVERROR_EXTERNAL;
> + goto end;
> + }
> + /* Now allocate memory for device list based on the size we got earlier */
> + gpu_env->device_ids = av_mallocz(device_length);
> + if (!gpu_env->device_ids) {
> + ret = AVERROR(ENOMEM);
> + goto end;
> + }
> + /* Now, get the device list data */
> + status = clGetContextInfo(gpu_env->context, CL_CONTEXT_DEVICES, device_length,
> + gpu_env->device_ids, NULL);
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL context info: %s\n", opencl_errstr(status));
> + ret = AVERROR_EXTERNAL;
> + goto end;
> + }
> + /* Create OpenCL command queue. */
> + gpu_env->command_queue = clCreateCommandQueue(gpu_env->context, gpu_env->device_ids[0],
> + 0, &status);
This looks like only the first device would ever be used if there are
multiple.
Seems not optimal for a system the contains multiple devices
[...]
> +int av_opencl_buffer_write(cl_mem dst_cl_buf, uint8_t *src_buf, size_t buf_size)
> +{
> + cl_int status;
> + void *mapped = clEnqueueMapBuffer(gpu_env.command_queue, dst_cl_buf,
> + CL_TRUE,CL_MAP_WRITE, 0, sizeof(uint8_t) * buf_size,
> + 0, NULL, NULL, &status);
> +
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not map OpenCL buffer: %s\n", opencl_errstr(status));
> + return AVERROR_EXTERNAL;
> + }
> + memcpy(mapped, src_buf, buf_size);
> +
> + status = clEnqueueUnmapMemObject(gpu_env.command_queue, dst_cl_buf, mapped, 0, NULL, NULL);
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not unmap OpenCL buffer: %s\n", opencl_errstr(status));
> + return AVERROR_EXTERNAL;
> + }
> + return 0;
> +}
> +
> +int av_opencl_buffer_read(uint8_t *dst_buf, cl_mem src_cl_buf, size_t buf_size)
> +{
> + cl_int status;
> + void *mapped = clEnqueueMapBuffer(gpu_env.command_queue, src_cl_buf,
> + CL_TRUE,CL_MAP_READ, 0, buf_size,
> + 0, NULL, NULL, &status);
> +
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not map OpenCL buffer: %s\n", opencl_errstr(status));
> + return AVERROR_EXTERNAL;
> + }
> + memcpy(dst_buf, mapped, buf_size);
Why is the buffer copied instead of the data directly used ?
this also applies to av_opencl_buffer_read_image, av_opencl_buffer_write and av_opencl_buffer_write_image
[...]
> +typedef struct AVOpenCLKernelEnv {
> + cl_context context;
> + cl_command_queue command_queue;
> + cl_program program;
> + cl_kernel kernel;
> + char kernel_name[AV_OPENCL_MAX_KERNEL_NAME_SIZE];
> +} AVOpenCLKernelEnv;
> +
> +typedef struct AVOpenCLExternalInfo {
> + cl_platform_id platform;
> + cl_device_type device_type;
> + cl_context context;
> + cl_device_id *device_ids;
> + cl_device_id device_id;
> + cl_command_queue command_queue;
> + char *platform_name;
> +} AVOpenCLExternalInfo;
If a user (application) has to create this struct then it cannot be
extended later (would break ABI). I dont know if this could become a
problem, a solution would be to provide a dunction to allocate such
structure
Also the whole should be tested with valgrind so it does no out of
array accesses and has no memleaks
[...]
Thanks
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
You can kill me, but you cannot change the truth.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130329/c55ffb28/attachment.asc>
More information about the ffmpeg-devel
mailing list