[FFmpeg-devel] [PATCH 1/2] libavutil/libavfilter: opencl wrapper based on comments on 20130331

Sun Mar 31 15:14:45 CEST 2013

Hi,

Thanks for your reviewing, some explanations as follows

Thanks
Best regards

2013/3/31 Michael Niedermayer <michaelni at gmx.at>

> On Sun, Mar 31, 2013 at 03:57:21PM +0800, Wei Gao wrote:
> [...]
> > +void av_opencl_free_external_environment(AVOpenCLExternalEnv
> *ext_opencl_env)
> > +{
> > +    av_freep(&ext_opencl_env);
> > +}
>
> this doesnt work, av_freep sets the _local_ pointer to zero not the
> one from the user, you need
>
> void av_opencl_free_external_environment(AVOpenCLExternalEnv
> **ext_opencl_env)
>
> for the zeroing to work
>
>
> [...]
> > +int av_opencl_create_kernel(AVOpenCLKernelEnv *env, const char
> *kernel_name)
> > +{
> > +    cl_int status;
> > +    int i, ret = 0;
> > +    LOCK_OPENCL;
> > +    if (strlen(kernel_name) + 1 > AV_OPENCL_MAX_KERNEL_NAME_LEN) {
> > +        av_log(&openclutils, AV_LOG_ERROR, "Created kernel name %s is
> too long\n", kernel_name);
> > +        ret = AVERROR(EINVAL);
> > +        goto end;
> > +    }
> > +    if (!env->kernel) {
> > +        for (i = 0;i < gpu_env.kernel_count;i++) {
> > +            if (av_strcasecmp(kernel_name,
> gpu_env.kernel_node[i].kernel_name) == 0) {
> > +                env->kernel = gpu_env.kernel_node[i].kernel;
> > +                gpu_env.kernel_node[i].kernel_ref++;
> > +                break;
> > +            }
> > +        }
> > +        if (!env->kernel) {
> > +            if (gpu_env.kernel_count >= MAX_KERNEL_NUM) {
> > +                av_log(&openclutils, AV_LOG_ERROR,
> > +                "Could not create kernel with name '%s', maximum number
> of kernels %d already reached\n",
> > +                    kernel_name, MAX_KERNEL_NUM);
> > +                ret = AVERROR(EINVAL);
> > +                goto end;
> > +            }
> > +            for (i = 0; i < gpu_env.program_count; i++) {
> > +                env->kernel = clCreateKernel(gpu_env.programs[i],
> kernel_name, &status);
> > +                if (status == CL_SUCCESS)
> > +                    break;
> > +            }
> > +            if (status != CL_SUCCESS) {
> > +                av_log(&openclutils, AV_LOG_ERROR, "Could not create
> OpenCL kernel: %s\n", opencl_errstr(status));
> > +                ret = AVERROR_EXTERNAL;
> > +                goto end;
> > +            }
> > +
>  av_strlcpy(gpu_env.kernel_node[gpu_env.kernel_count].kernel_name,
> kernel_name,
> > +
> sizeof(gpu_env.kernel_node[gpu_env.kernel_count].kernel_name));
> > +            gpu_env.kernel_node[gpu_env.kernel_count].kernel=
> env->kernel;
> > +            gpu_env.kernel_node[gpu_env.kernel_count].kernel_ref++;
> > +            gpu_env.kernel_count++;
> > +        }
> > +        env->command_queue = gpu_env.command_queue;
> > +        av_strlcpy(env->kernel_name, kernel_name,
> sizeof(env->kernel_name));
> > +    }
> > +end:
> > +    UNLOCK_OPENCL;
> > +    return ret;
> > +}
> > +
> > +void av_opencl_release_kernel(AVOpenCLKernelEnv *env)
> > +{
> > +    cl_int status;
> > +    int i;
> > +    LOCK_OPENCL
> > +    if (!env->kernel)
> > +        goto end;
> > +    for (i = 0;i < gpu_env.kernel_count;i++) {
> > +        if (gpu_env.kernel_node[i].kernel == env->kernel) {
> > +            if (gpu_env.kernel_node[i].kernel_ref == 1) {
> > +                status = clReleaseKernel(env->kernel);
> > +                if (status != CL_SUCCESS) {
> > +                    av_log(&openclutils, AV_LOG_ERROR, "Could not
> release kernel: %s\n",
> > +                          opencl_errstr(status));
> > +                }
> > +                gpu_env.kernel_node[i].kernel = NULL;
> > +                env->kernel = NULL;
> > +                memset(gpu_env.kernel_node[i].kernel_name, 0,
> sizeof(gpu_env.kernel_node[i].kernel_name));
> > +                gpu_env.kernel_count--;
> > +            }
> > +            gpu_env.kernel_node[i].kernel_ref--;
>
> This doesnt look like creating kernels A,B,C,D and then releasing B
> would work
> as the kernel_count would then point to D and overwrite it on the
> next create or loose it not being able to release
>
> also, i wonder if caching kernels like this is worth it, is
> clCreateKernel() slow ?
>
I think I make it complex, the reuse of kernel is just for save the
resource of GPU, but GPU resource seems enough. I think make it sample that
when create kernel,then kernel_count + 1, release kernel then kernel_count
- 1, and at the unit_opencl function, check the kernel_cout and init count,


>
>
> [...]
> > +void av_opencl_uninit(void)
> > +{
> > +    cl_int status;
> > +    int i;
> > +    LOCK_OPENCL
> > +    if (!gpu_env.opencl_is_inited)
> > +        goto end;
> > +    if (gpu_env.is_user_created)
> > +        goto end;
> > +    for (i = 0;i < gpu_env.kernel_count;i++) {
> > +        if (gpu_env.kernel_node[i].kernel)
> > +            goto end;
> > +    }
> > +    for (i = 0; i < gpu_env.program_count; i++) {
> > +        if (gpu_env.programs[i]) {
> > +            status = clReleaseProgram(gpu_env.programs[i]);
> > +            if (status != CL_SUCCESS) {
> > +                av_log(&openclutils, AV_LOG_ERROR, "Could not release
> OpenCL program: %s\n", opencl_errstr(status));
> > +            }
> > +            gpu_env.programs[i] = NULL;
> > +        }
> > +    }
> > +    if (gpu_env.command_queue) {
> > +        status = clReleaseCommandQueue(gpu_env.command_queue);
> > +        if (status != CL_SUCCESS) {
> > +            av_log(&openclutils, AV_LOG_ERROR, "Could not release
> OpenCL command queue: %s\n", opencl_errstr(status));
> > +        }
> > +        gpu_env.command_queue = NULL;
> > +    }
> > +    if (gpu_env.context) {
> > +        status = clReleaseContext(gpu_env.context);
> > +        if (status != CL_SUCCESS) {
> > +            av_log(&openclutils, AV_LOG_ERROR, "Could not release
> OpenCL context: %s\n", opencl_errstr(status));
> > +        }
> > +        gpu_env.context = NULL;
> > +    }
> > +    av_freep(&(gpu_env.device_ids));
> > +    gpu_env.opencl_is_inited = 0;
>
> this can be hit by a race condition
>
> consider
> Thread1 av_opencl_init, av_opencl_create_kernel, av_opencl_release_kernel,
> av_opencl_uninit
> Thread2 av_opencl_init,
>                    av_opencl_create_kernel, av_opencl_release_kernel,
> av_opencl_uninit
>
> Thread1 uninit frees the various things that thread2 will still access
>
Yes, I should check the init count, my mistake

>
> [...]
>
> --
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Many things microsoft did are stupid, but not doing something just because
> microsoft did it is even more stupid. If everything ms did were stupid they
> would be bankrupt already.
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>