[FFmpeg-devel] [PATCH] avfilter/vf_program_opencl: allow setting kernel per plane

Mark Thompson sw at jkqxz.net
Wed Feb 26 00:54:55 EET 2020


On 24/02/2020 10:01, Paul B Mahol wrote:
> Fixes #7190
> 
> Signed-off-by: Paul B Mahol <onemda at gmail.com>
> ---
>  doc/filters.texi                | 22 ++++++++++++
>  libavfilter/vf_program_opencl.c | 64 ++++++++++++++++++++++-----------
>  2 files changed, 65 insertions(+), 21 deletions(-)
> 
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 70fd7a4cc7..6b10f649b9 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -21302,6 +21302,17 @@ Number of inputs to the filter.  Defaults to 1.
>  @item size, s
>  Size of output frames.  Defaults to the same as the first input.
>  
> + at item kernel2
> +Kernel name in program for 2nd plane, if not set kernel from option
> + at var{kernel} is used.
> +
> + at item kernel3
> +Kernel name in program for 3nd plane, if not set kernel from option
> + at var{kernel} is used.

Why this default?  The kernel for the second plane feels a more obvious choice to me for cases like yuv420p.

> +
> + at item kernel4
> +Kernel name in program for 4nd plane, if not set kernel from option
> + at var{kernel} is used.
>  @end table
>  
>  The program source file must contain a kernel function with the given name,

An example using it would be nice to show the intended setup.

> @@ -22488,6 +22499,17 @@ Pixel format to use for the generated frames.  This must be set.
>  @item rate, r
>  Number of frames generated every second.  Default value is '25'.
>  
> + at item kernel2
> +Kernel name in program for 2nd plane, if not set kernel from option
> + at var{kernel} is used.
> +
> + at item kernel3
> +Kernel name in program for 3nd plane, if not set kernel from option
> + at var{kernel} is used.
> +
> + at item kernel4
> +Kernel name in program for 4nd plane, if not set kernel from option
> + at var{kernel} is used.
>  @end table
>  
>  For details of how the program loading works, see the @ref{program_opencl}
> diff --git a/libavfilter/vf_program_opencl.c b/libavfilter/vf_program_opencl.c
> index ec25e931f5..f748b15037 100644
> --- a/libavfilter/vf_program_opencl.c
> +++ b/libavfilter/vf_program_opencl.c
> @@ -33,14 +33,14 @@ typedef struct ProgramOpenCLContext {
>  
>      int                 loaded;
>      cl_uint             index;
> -    cl_kernel           kernel;
> +    cl_kernel           kernel[4];
>      cl_command_queue    command_queue;
>  
>      FFFrameSync         fs;
>      AVFrame           **frames;
>  
>      const char         *source_file;
> -    const char         *kernel_name;
> +    const char         *kernel_name[4];
>      int                 nb_inputs;
>      int                 width, height;
>      enum AVPixelFormat  source_format;
> @@ -66,15 +66,17 @@ static int program_opencl_load(AVFilterContext *avctx)
>          return AVERROR(EIO);
>      }
>  
> -    ctx->kernel = clCreateKernel(ctx->ocf.program, ctx->kernel_name, &cle);
> -    if (!ctx->kernel) {
> -        if (cle == CL_INVALID_KERNEL_NAME) {
> -            av_log(avctx, AV_LOG_ERROR, "Kernel function '%s' not found in "
> -                   "program.\n", ctx->kernel_name);
> -        } else {
> -            av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
> +    for (int i = 0; i < 4; i++) {

I don't think it's a good idea to make kernel objects for absent planes, and it should be an error to provide more kernels than planes.

> +        ctx->kernel[i] = clCreateKernel(ctx->ocf.program, ctx->kernel_name[i] ? ctx->kernel_name[i] : ctx->kernel_name[0], &cle);

Since the kernel you end up with is exactly the same, perhaps you would be better making only the named kernels and then choosing later which one to use rather than having many copies of the same object.

(Also, please avoid overlong lines.)

> +        if (!ctx->kernel[i]) {
> +            if (cle == CL_INVALID_KERNEL_NAME) {
> +                av_log(avctx, AV_LOG_ERROR, "Kernel function '%s' not found in "
> +                       "program.\n", ctx->kernel_name[i] ? ctx->kernel_name[i] : ctx->kernel_name[0]);
> +            } else {
> +                av_log(avctx, AV_LOG_ERROR, "Failed to create kernel%d: %d.\n", i, cle);
> +            }
> +            return AVERROR(EIO);
>          }
> -        return AVERROR(EIO);
>      }
>  
>      ctx->loaded = 1;
> @@ -108,14 +110,14 @@ static int program_opencl_run(AVFilterContext *avctx)
>          if (!dst)
>              break;
>  
> -        cle = clSetKernelArg(ctx->kernel, 0, sizeof(cl_mem), &dst);
> +        cle = clSetKernelArg(ctx->kernel[plane], 0, sizeof(cl_mem), &dst);
>          if (cle != CL_SUCCESS) {
>              av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
>                     "destination image argument: %d.\n", cle);
>              err = AVERROR_UNKNOWN;
>              goto fail;
>          }
> -        cle = clSetKernelArg(ctx->kernel, 1, sizeof(cl_uint), &ctx->index);
> +        cle = clSetKernelArg(ctx->kernel[plane], 1, sizeof(cl_uint), &ctx->index);
>          if (cle != CL_SUCCESS) {
>              av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
>                     "index argument: %d.\n", cle);
> @@ -129,7 +131,7 @@ static int program_opencl_run(AVFilterContext *avctx)
>              src = (cl_mem)ctx->frames[input]->data[plane];
>              av_assert0(src);
>  
> -            cle = clSetKernelArg(ctx->kernel, 2 + input, sizeof(cl_mem), &src);
> +            cle = clSetKernelArg(ctx->kernel[plane], 2 + input, sizeof(cl_mem), &src);
>              if (cle != CL_SUCCESS) {
>                  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
>                         "source image argument %d: %d.\n", input, cle);
> @@ -147,7 +149,7 @@ static int program_opencl_run(AVFilterContext *avctx)
>                 "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
>                 plane, global_work[0], global_work[1]);
>  
> -        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
> +        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel[plane], 2, NULL,
>                                       global_work, NULL, 0, NULL, NULL);
>          CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle);
>      }
> @@ -312,11 +314,13 @@ static av_cold void program_opencl_uninit(AVFilterContext *avctx)
>              av_freep(&avctx->input_pads[i].name);
>      }
>  
> -    if (ctx->kernel) {
> -        cle = clReleaseKernel(ctx->kernel);
> -        if (cle != CL_SUCCESS)
> -            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> -                   "kernel: %d.\n", cle);
> +    for (i = 0; i < 4; i++) {
> +        if (ctx->kernel[i]) {
> +            cle = clReleaseKernel(ctx->kernel[i]);
> +            if (cle != CL_SUCCESS)
> +                av_log(avctx, AV_LOG_ERROR, "Failed to release "
> +                       "kernel%d: %d.\n", i, cle);
> +        }
>      }
>  
>      if (ctx->command_queue) {
> @@ -337,7 +341,7 @@ static av_cold void program_opencl_uninit(AVFilterContext *avctx)
>  static const AVOption program_opencl_options[] = {
>      { "source", "OpenCL program source file", OFFSET(source_file),
>        AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
> -    { "kernel", "Kernel name in program",     OFFSET(kernel_name),
> +    { "kernel", "Kernel name in program",     OFFSET(kernel_name[0]),
>        AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
>  
>      { "inputs", "Number of inputs", OFFSET(nb_inputs),
> @@ -348,6 +352,15 @@ static const AVOption program_opencl_options[] = {
>      { "s",      "Video size",       OFFSET(width),
>        AV_OPT_TYPE_IMAGE_SIZE,       { .str = NULL }, 0, 0, FLAGS },
>  
> +    { "kernel2", "Kernel name in program for 2nd plane", OFFSET(kernel_name[1]),
> +      AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
> +
> +    { "kernel3", "Kernel name in program for 3rd plane", OFFSET(kernel_name[2]),
> +      AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
> +
> +    { "kernel4", "Kernel name in program for 4th plane", OFFSET(kernel_name[3]),
> +      AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
> +
>      { NULL },
>  };
>  
> @@ -384,7 +397,7 @@ AVFilter ff_vf_program_opencl = {
>  static const AVOption openclsrc_options[] = {
>      { "source", "OpenCL program source file", OFFSET(source_file),
>        AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
> -    { "kernel", "Kernel name in program",     OFFSET(kernel_name),
> +    { "kernel", "Kernel name in program",     OFFSET(kernel_name[0]),
>        AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
>  
>      { "size",   "Video size",       OFFSET(width),
> @@ -400,6 +413,15 @@ static const AVOption openclsrc_options[] = {
>      { "r",      "Video frame rate", OFFSET(source_rate),
>        AV_OPT_TYPE_VIDEO_RATE,       { .str = "25" }, 0, INT_MAX, FLAGS },
>  
> +    { "kernel2", "Kernel name in program for 2nd plane", OFFSET(kernel_name[1]),
> +      AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
> +
> +    { "kernel3", "Kernel name in program for 3rd plane", OFFSET(kernel_name[2]),
> +      AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
> +
> +    { "kernel4", "Kernel name in program for 4th plane", OFFSET(kernel_name[3]),
> +      AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
> +
>      { NULL },
>  };

The extra similar arguments are rather ugly to use ("-vf source=foo.cl:kernel=foo_y:kernel2=foo_y:kernel3=foo_v:kernel4=foo_a").  Perhaps a single string separated by '+' ("-vf source=foo.cl:kernel=foo_y+foo_u+foo+v+foo_a") would be cleaner for the user?

- Mark


More information about the ffmpeg-devel mailing list