[FFmpeg-devel] [PATCH] lavfi: add vflip_opencl, hflip_opencl
Mark Thompson
sw at jkqxz.net
Thu Jul 12 01:36:50 EEST 2018
On 09/07/18 03:26, Danil Iashchenko wrote:
> lavfi: add vflip_opencl, hflip_opencl.
> Behaves like existing vflip, hflip filters.
> ---
> configure | 2 +
> libavfilter/Makefile | 4 +
> libavfilter/allfilters.c | 2 +
> libavfilter/opencl/vflip.cl | 60 ++++++++++
> libavfilter/opencl_source.h | 1 +
> libavfilter/vf_vflip_opencl.c | 270 ++++++++++++++++++++++++++++++++++++++++++
> 6 files changed, 339 insertions(+)
> create mode 100644 libavfilter/opencl/vflip.cl
> create mode 100644 libavfilter/vf_vflip_opencl.c
These two filters feel a bit too trivial to make new files for? Currently they can be implemented with program_opencl and a handful of lines of code:
hflip.cl:
"""
__kernel void hflip(__write_only image2d_t dst,
unsigned int index,
__read_only image2d_t src)
{
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE;
int2 dst_loc = (int2)(get_global_id(0), get_global_id(1));
int2 src_loc = (int2)(get_image_dim(dst).x - 1 - dst_loc.x, dst_loc.y);
write_imagef(dst, dst_loc, read_imagef(src, sampler, src_loc));
}
"""
+
-vf ...,program_opencl=source=hflip.cl:kernel=hflip,...
and equivalently for vflip.
> ...
> diff --git a/libavfilter/opencl/vflip.cl b/libavfilter/opencl/vflip.cl
> new file mode 100644
> index 0000000..4ed2f43
> --- /dev/null
> +++ b/libavfilter/opencl/vflip.cl
> @@ -0,0 +1,60 @@
> ...
> +
> +void swap_pix(__write_only image2d_t dst,
> + __read_only image2d_t src,
> + int2 loc,
> + int2 loc1) {
> +
> + const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
> + CLK_ADDRESS_CLAMP_TO_EDGE |
> + CLK_FILTER_NEAREST);
> +
> + float4 px = read_imagef(src, sampler, loc );
> + float4 px1 = read_imagef(src, sampler, loc1);
> +
> + write_imagef(dst, loc, px1);
> + write_imagef(dst, loc1, px );
> +}
Is this swap approach better than just writing the one pixel as above? Intuitively it feels slightly worse to me - every workitem ends up touching two completely different places in the input and output, which feels bad for optimisation/caching.
> +__kernel void vflip_global(__write_only image2d_t dst,
> + __read_only image2d_t src)
> +{
> +
> + int2 imgSize = get_image_dim(src);
> + int2 loc = (int2)(get_global_id(0), get_global_id(1));
> + int2 loc1 = (int2)(loc.x, imgSize.y - loc.y - 1);
> +
> + swap_pix(dst, src, loc, loc1);
> +}
> +
> +
> +__kernel void hflip_global(__write_only image2d_t dst,
> + __read_only image2d_t src)
> +{
> +
> + int2 imgSize = get_image_dim(src);
> + int2 loc = (int2)(get_global_id(0), get_global_id(1));
> + int2 loc1 = (int2)(imgSize.x - loc.x - 1, loc.y);
> +
> + swap_pix(dst, src, loc, loc1);
> +}
> ...
- Mark
More information about the ffmpeg-devel
mailing list