[FFmpeg-devel] [PATCH] avfilter/vf_program_opencl: allow setting kernel per plane
Paul B Mahol
onemda at gmail.com
Wed Feb 26 11:28:46 EET 2020
On 2/25/20, Mark Thompson <sw at jkqxz.net> wrote:
> On 24/02/2020 10:01, Paul B Mahol wrote:
>> Fixes #7190
>>
>> Signed-off-by: Paul B Mahol <onemda at gmail.com>
>> ---
>> doc/filters.texi | 22 ++++++++++++
>> libavfilter/vf_program_opencl.c | 64 ++++++++++++++++++++++-----------
>> 2 files changed, 65 insertions(+), 21 deletions(-)
>>
>> diff --git a/doc/filters.texi b/doc/filters.texi
>> index 70fd7a4cc7..6b10f649b9 100644
>> --- a/doc/filters.texi
>> +++ b/doc/filters.texi
>> @@ -21302,6 +21302,17 @@ Number of inputs to the filter. Defaults to 1.
>> @item size, s
>> Size of output frames. Defaults to the same as the first input.
>>
>> + at item kernel2
>> +Kernel name in program for 2nd plane, if not set kernel from option
>> + at var{kernel} is used.
>> +
>> + at item kernel3
>> +Kernel name in program for 3nd plane, if not set kernel from option
>> + at var{kernel} is used.
>
> Why this default? The kernel for the second plane feels a more obvious
> choice to me for cases like yuv420p.
It is easier.
>
>> +
>> + at item kernel4
>> +Kernel name in program for 4nd plane, if not set kernel from option
>> + at var{kernel} is used.
>> @end table
>>
>> The program source file must contain a kernel function with the given
>> name,
>
> An example using it would be nice to show the intended setup.
Example is omitted because its trivial.
>
>> @@ -22488,6 +22499,17 @@ Pixel format to use for the generated frames.
>> This must be set.
>> @item rate, r
>> Number of frames generated every second. Default value is '25'.
>>
>> + at item kernel2
>> +Kernel name in program for 2nd plane, if not set kernel from option
>> + at var{kernel} is used.
>> +
>> + at item kernel3
>> +Kernel name in program for 3nd plane, if not set kernel from option
>> + at var{kernel} is used.
>> +
>> + at item kernel4
>> +Kernel name in program for 4nd plane, if not set kernel from option
>> + at var{kernel} is used.
>> @end table
>>
>> For details of how the program loading works, see the
>> @ref{program_opencl}
>> diff --git a/libavfilter/vf_program_opencl.c
>> b/libavfilter/vf_program_opencl.c
>> index ec25e931f5..f748b15037 100644
>> --- a/libavfilter/vf_program_opencl.c
>> +++ b/libavfilter/vf_program_opencl.c
>> @@ -33,14 +33,14 @@ typedef struct ProgramOpenCLContext {
>>
>> int loaded;
>> cl_uint index;
>> - cl_kernel kernel;
>> + cl_kernel kernel[4];
>> cl_command_queue command_queue;
>>
>> FFFrameSync fs;
>> AVFrame **frames;
>>
>> const char *source_file;
>> - const char *kernel_name;
>> + const char *kernel_name[4];
>> int nb_inputs;
>> int width, height;
>> enum AVPixelFormat source_format;
>> @@ -66,15 +66,17 @@ static int program_opencl_load(AVFilterContext *avctx)
>> return AVERROR(EIO);
>> }
>>
>> - ctx->kernel = clCreateKernel(ctx->ocf.program, ctx->kernel_name,
>> &cle);
>> - if (!ctx->kernel) {
>> - if (cle == CL_INVALID_KERNEL_NAME) {
>> - av_log(avctx, AV_LOG_ERROR, "Kernel function '%s' not found
>> in "
>> - "program.\n", ctx->kernel_name);
>> - } else {
>> - av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n",
>> cle);
>> + for (int i = 0; i < 4; i++) {
>
> I don't think it's a good idea to make kernel objects for absent planes, and
> it should be an error to provide more kernels than planes.
I disagree.
>
>> + ctx->kernel[i] = clCreateKernel(ctx->ocf.program,
>> ctx->kernel_name[i] ? ctx->kernel_name[i] : ctx->kernel_name[0], &cle);
>
> Since the kernel you end up with is exactly the same, perhaps you would be
> better making only the named kernels and then choosing later which one to
> use rather than having many copies of the same object.
>
My way is much simpler.
> (Also, please avoid overlong lines.)
>
>> + if (!ctx->kernel[i]) {
>> + if (cle == CL_INVALID_KERNEL_NAME) {
>> + av_log(avctx, AV_LOG_ERROR, "Kernel function '%s' not
>> found in "
>> + "program.\n", ctx->kernel_name[i] ?
>> ctx->kernel_name[i] : ctx->kernel_name[0]);
>> + } else {
>> + av_log(avctx, AV_LOG_ERROR, "Failed to create kernel%d:
>> %d.\n", i, cle);
>> + }
>> + return AVERROR(EIO);
>> }
>> - return AVERROR(EIO);
>> }
>>
>> ctx->loaded = 1;
>> @@ -108,14 +110,14 @@ static int program_opencl_run(AVFilterContext
>> *avctx)
>> if (!dst)
>> break;
>>
>> - cle = clSetKernelArg(ctx->kernel, 0, sizeof(cl_mem), &dst);
>> + cle = clSetKernelArg(ctx->kernel[plane], 0, sizeof(cl_mem),
>> &dst);
>> if (cle != CL_SUCCESS) {
>> av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
>> "destination image argument: %d.\n", cle);
>> err = AVERROR_UNKNOWN;
>> goto fail;
>> }
>> - cle = clSetKernelArg(ctx->kernel, 1, sizeof(cl_uint),
>> &ctx->index);
>> + cle = clSetKernelArg(ctx->kernel[plane], 1, sizeof(cl_uint),
>> &ctx->index);
>> if (cle != CL_SUCCESS) {
>> av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
>> "index argument: %d.\n", cle);
>> @@ -129,7 +131,7 @@ static int program_opencl_run(AVFilterContext *avctx)
>> src = (cl_mem)ctx->frames[input]->data[plane];
>> av_assert0(src);
>>
>> - cle = clSetKernelArg(ctx->kernel, 2 + input, sizeof(cl_mem),
>> &src);
>> + cle = clSetKernelArg(ctx->kernel[plane], 2 + input,
>> sizeof(cl_mem), &src);
>> if (cle != CL_SUCCESS) {
>> av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
>> "source image argument %d: %d.\n", input, cle);
>> @@ -147,7 +149,7 @@ static int program_opencl_run(AVFilterContext *avctx)
>> "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
>> plane, global_work[0], global_work[1]);
>>
>> - cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2,
>> NULL,
>> + cle = clEnqueueNDRangeKernel(ctx->command_queue,
>> ctx->kernel[plane], 2, NULL,
>> global_work, NULL, 0, NULL, NULL);
>> CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n",
>> cle);
>> }
>> @@ -312,11 +314,13 @@ static av_cold void
>> program_opencl_uninit(AVFilterContext *avctx)
>> av_freep(&avctx->input_pads[i].name);
>> }
>>
>> - if (ctx->kernel) {
>> - cle = clReleaseKernel(ctx->kernel);
>> - if (cle != CL_SUCCESS)
>> - av_log(avctx, AV_LOG_ERROR, "Failed to release "
>> - "kernel: %d.\n", cle);
>> + for (i = 0; i < 4; i++) {
>> + if (ctx->kernel[i]) {
>> + cle = clReleaseKernel(ctx->kernel[i]);
>> + if (cle != CL_SUCCESS)
>> + av_log(avctx, AV_LOG_ERROR, "Failed to release "
>> + "kernel%d: %d.\n", i, cle);
>> + }
>> }
>>
>> if (ctx->command_queue) {
>> @@ -337,7 +341,7 @@ static av_cold void
>> program_opencl_uninit(AVFilterContext *avctx)
>> static const AVOption program_opencl_options[] = {
>> { "source", "OpenCL program source file", OFFSET(source_file),
>> AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
>> - { "kernel", "Kernel name in program", OFFSET(kernel_name),
>> + { "kernel", "Kernel name in program", OFFSET(kernel_name[0]),
>> AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
>>
>> { "inputs", "Number of inputs", OFFSET(nb_inputs),
>> @@ -348,6 +352,15 @@ static const AVOption program_opencl_options[] = {
>> { "s", "Video size", OFFSET(width),
>> AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, FLAGS },
>>
>> + { "kernel2", "Kernel name in program for 2nd plane",
>> OFFSET(kernel_name[1]),
>> + AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
>> +
>> + { "kernel3", "Kernel name in program for 3rd plane",
>> OFFSET(kernel_name[2]),
>> + AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
>> +
>> + { "kernel4", "Kernel name in program for 4th plane",
>> OFFSET(kernel_name[3]),
>> + AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
>> +
>> { NULL },
>> };
>>
>> @@ -384,7 +397,7 @@ AVFilter ff_vf_program_opencl = {
>> static const AVOption openclsrc_options[] = {
>> { "source", "OpenCL program source file", OFFSET(source_file),
>> AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
>> - { "kernel", "Kernel name in program", OFFSET(kernel_name),
>> + { "kernel", "Kernel name in program", OFFSET(kernel_name[0]),
>> AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
>>
>> { "size", "Video size", OFFSET(width),
>> @@ -400,6 +413,15 @@ static const AVOption openclsrc_options[] = {
>> { "r", "Video frame rate", OFFSET(source_rate),
>> AV_OPT_TYPE_VIDEO_RATE, { .str = "25" }, 0, INT_MAX, FLAGS },
>>
>> + { "kernel2", "Kernel name in program for 2nd plane",
>> OFFSET(kernel_name[1]),
>> + AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
>> +
>> + { "kernel3", "Kernel name in program for 3rd plane",
>> OFFSET(kernel_name[2]),
>> + AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
>> +
>> + { "kernel4", "Kernel name in program for 4th plane",
>> OFFSET(kernel_name[3]),
>> + AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
>> +
>> { NULL },
>> };
>
> The extra similar arguments are rather ugly to use ("-vf
> source=foo.cl:kernel=foo_y:kernel2=foo_y:kernel3=foo_v:kernel4=foo_a").
> Perhaps a single string separated by '+' ("-vf
> source=foo.cl:kernel=foo_y+foo_u+foo+v+foo_a") would be cleaner for the
> user?
How are they ugly?
The parsing of way you are proposing is very evil and ugly. And what
to do if kernel name contains + character?
Why you changed your mind again about using other options for kernels?
If you insist on this I will happily forget I ever sent this patch to
mailing list.
>
> - Mark
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
More information about the ffmpeg-devel
mailing list