[FFmpeg-devel] [PATCH] avfilter/vf_lut3d_opencl Initial support for OpenCL implementation of vf_lut3d.
Jan Studený
jendas1 at yahoo.com
Mon Apr 28 13:37:27 EEST 2025
---
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/opencl/lut3d.cl | 177 ++++++++++++++
libavfilter/opencl_source.h | 2 +
libavfilter/vf_lut3d_opencl.c | 444 ++++++++++++++++++++++++++++++++++
5 files changed, 625 insertions(+)
create mode 100644 libavfilter/opencl/lut3d.cl
create mode 100644 libavfilter/vf_lut3d_opencl.c
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 7c0d879ec9..6524d0f91a 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -378,6 +378,7 @@ OBJS-$(CONFIG_LUT1D_FILTER) += vf_lut3d.o
OBJS-$(CONFIG_LUT_FILTER) += vf_lut.o
OBJS-$(CONFIG_LUT2_FILTER) += vf_lut2.o framesync.o
OBJS-$(CONFIG_LUT3D_FILTER) += vf_lut3d.o framesync.o
+OBJS-$(CONFIG_LUT3D_OPENCL_FILTER) += vf_lut3d_opencl.o opencl.o opencl/lut3d.o
OBJS-$(CONFIG_LUTRGB_FILTER) += vf_lut.o
OBJS-$(CONFIG_LUTYUV_FILTER) += vf_lut.o
OBJS-$(CONFIG_MASKEDCLAMP_FILTER) += vf_maskedclamp.o framesync.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 740d9ab265..72c2f48ac4 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -353,6 +353,7 @@ extern const FFFilter ff_vf_lut;
extern const FFFilter ff_vf_lut1d;
extern const FFFilter ff_vf_lut2;
extern const FFFilter ff_vf_lut3d;
+extern const FFFilter ff_vf_lut3d_opencl;
extern const FFFilter ff_vf_lutrgb;
extern const FFFilter ff_vf_lutyuv;
extern const FFFilter ff_vf_maskedclamp;
diff --git a/libavfilter/opencl/lut3d.cl b/libavfilter/opencl/lut3d.cl
new file mode 100644
index 0000000000..16dfecdc4e
--- /dev/null
+++ b/libavfilter/opencl/lut3d.cl
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2025 Jan Studeny
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+typedef struct rgbvec {
+ float r, g, b, a;
+} rgbvec;
+
+#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+
+#define NEAR(x) ((int)((x) + .5))
+#define PREV(x) ((int)(x))
+#define NEXT(x) (MIN((int)(x) + 1, lut_edge_size - 1))
+
+/**
+ * Get the nearest defined point
+ */
+static rgbvec interp_nearest(float4 px, __global const rgbvec *lut, int lut_edge_size)
+{
+ int r = NEAR(px[0]);
+ int g = NEAR(px[1]);
+ int b = NEAR(px[2]);
+ int index = r * lut_edge_size * lut_edge_size + g * lut_edge_size + b;
+ return lut[index];
+}
+
+static float lerpf(float v0, float v1, float f)
+{
+ return v0 + (v1 - v0) * f;
+}
+
+static rgbvec lerp(const rgbvec *v0, const rgbvec *v1, float f)
+{
+ rgbvec v = {
+ lerpf(v0->r, v1->r, f), lerpf(v0->g, v1->g, f), lerpf(v0->b, v1->b, f)
+ };
+ return v;
+}
+/**
+ * Interpolate using the 8 vertices of a cube
+ * @see https://en.wikipedia.org/wiki/Trilinear_interpolation
+ */
+static rgbvec interp_trilinear(float4 px, __global const rgbvec *lut, int lut_edge_size)
+{
+ const int lutsize2 = lut_edge_size * lut_edge_size;
+ const int lutsize = lut_edge_size;
+
+ const int prev[] = { PREV(px[0]), PREV(px[1]), PREV(px[2]) };
+ const int next[] = { NEXT(px[0]), NEXT(px[1]), NEXT(px[2]) };
+
+ const rgbvec d = {
+ px[0] - prev[0],
+ px[1] - prev[1],
+ px[2] - prev[2]
+ };
+
+ const rgbvec c000 = lut[prev[0] * lutsize2 + prev[1] * lutsize + prev[2]];
+ const rgbvec c001 = lut[prev[0] * lutsize2 + prev[1] * lutsize + next[2]];
+ const rgbvec c010 = lut[prev[0] * lutsize2 + next[1] * lutsize + prev[2]];
+ const rgbvec c011 = lut[prev[0] * lutsize2 + next[1] * lutsize + next[2]];
+ const rgbvec c100 = lut[next[0] * lutsize2 + prev[1] * lutsize + prev[2]];
+ const rgbvec c101 = lut[next[0] * lutsize2 + prev[1] * lutsize + next[2]];
+ const rgbvec c110 = lut[next[0] * lutsize2 + next[1] * lutsize + prev[2]];
+ const rgbvec c111 = lut[next[0] * lutsize2 + next[1] * lutsize + next[2]];
+
+ const rgbvec c00 = lerp(&c000, &c100, d.r);
+ const rgbvec c10 = lerp(&c010, &c110, d.r);
+ const rgbvec c01 = lerp(&c001, &c101, d.r);
+ const rgbvec c11 = lerp(&c011, &c111, d.r);
+
+ const rgbvec c0 = lerp(&c00, &c10, d.g);
+ const rgbvec c1 = lerp(&c01, &c11, d.g);
+
+ return lerp(&c0, &c1, d.b);
+}
+
+/**
+ * Tetrahedral interpolation. Based on code found in Truelight Software Library paper.
+ * @see http://www.filmlight.ltd.uk/pdf/whitepapers/FL-TL-TN-0057-SoftwareLib.pdf
+ */
+
+static rgbvec interp_tetrahedral(float4 px, __global const rgbvec *lut, int lut_edge_size)
+{
+ const int lutsize2 = lut_edge_size*lut_edge_size;
+ const int lutsize = lut_edge_size;
+ const int prev[] = {PREV(px[0]), PREV(px[1]), PREV(px[2])};
+ const int next[] = {NEXT(px[0]), NEXT(px[1]), NEXT(px[2])};
+ const rgbvec d = {px[0] - prev[0], px[1] - prev[1], px[2] - prev[2]};
+ const rgbvec c000 = lut[prev[0] * lutsize2 + prev[1] * lutsize + prev[2]];
+ const rgbvec c111 = lut[next[0] * lutsize2 + next[1] * lutsize + next[2]];
+ rgbvec c;
+ if (d.r > d.g) {
+ if (d.g > d.b) {
+ const rgbvec c100 = lut[next[0] * lutsize2 + prev[1] * lutsize + prev[2]];
+ const rgbvec c110 = lut[next[0] * lutsize2 + next[1] * lutsize + prev[2]];
+ c.r = (1-d.r) * c000.r + (d.r-d.g) * c100.r + (d.g-d.b) * c110.r + (d.b) * c111.r;
+ c.g = (1-d.r) * c000.g + (d.r-d.g) * c100.g + (d.g-d.b) * c110.g + (d.b) * c111.g;
+ c.b = (1-d.r) * c000.b + (d.r-d.g) * c100.b + (d.g-d.b) * c110.b + (d.b) * c111.b;
+ } else if (d.r > d.b) {
+ const rgbvec c100 = lut[next[0] * lutsize2 + prev[1] * lutsize + prev[2]];
+ const rgbvec c101 = lut[next[0] * lutsize2 + prev[1] * lutsize + next[2]];
+ c.r = (1-d.r) * c000.r + (d.r-d.b) * c100.r + (d.b-d.g) * c101.r + (d.g) * c111.r;
+ c.g = (1-d.r) * c000.g + (d.r-d.b) * c100.g + (d.b-d.g) * c101.g + (d.g) * c111.g;
+ c.b = (1-d.r) * c000.b + (d.r-d.b) * c100.b + (d.b-d.g) * c101.b + (d.g) * c111.b;
+ } else {
+ const rgbvec c001 = lut[prev[0] * lutsize2 + prev[1] * lutsize + next[2]];
+ const rgbvec c101 = lut[next[0] * lutsize2 + prev[1] * lutsize + next[2]];
+ c.r = (1-d.b) * c000.r + (d.b-d.r) * c001.r + (d.r-d.g) * c101.r + (d.g) * c111.r;
+ c.g = (1-d.b) * c000.g + (d.b-d.r) * c001.g + (d.r-d.g) * c101.g + (d.g) * c111.g;
+ c.b = (1-d.b) * c000.b + (d.b-d.r) * c001.b + (d.r-d.g) * c101.b + (d.g) * c111.b;
+ }
+ } else {
+ if (d.b > d.g) {
+ const rgbvec c001 = lut[prev[0] * lutsize2 + prev[1] * lutsize + next[2]];
+ const rgbvec c011 = lut[prev[0] * lutsize2 + next[1] * lutsize + next[2]];
+ c.r = (1-d.b) * c000.r + (d.b-d.g) * c001.r + (d.g-d.r) * c011.r + (d.r) * c111.r;
+ c.g = (1-d.b) * c000.g + (d.b-d.g) * c001.g + (d.g-d.r) * c011.g + (d.r) * c111.g;
+ c.b = (1-d.b) * c000.b + (d.b-d.g) * c001.b + (d.g-d.r) * c011.b + (d.r) * c111.b;
+ } else if (d.b > d.r) {
+ const rgbvec c010 = lut[prev[0] * lutsize2 + next[1] * lutsize + prev[2]];
+ const rgbvec c011 = lut[prev[0] * lutsize2 + next[1] * lutsize + next[2]];
+ c.r = (1-d.g) * c000.r + (d.g-d.b) * c010.r + (d.b-d.r) * c011.r + (d.r) * c111.r;
+ c.g = (1-d.g) * c000.g + (d.g-d.b) * c010.g + (d.b-d.r) * c011.g + (d.r) * c111.g;
+ c.b = (1-d.g) * c000.b + (d.g-d.b) * c010.b + (d.b-d.r) * c011.b + (d.r) * c111.b;
+ } else {
+ const rgbvec c010 = lut[prev[0] * lutsize2 + next[1] * lutsize + prev[2]];
+ const rgbvec c110 = lut[next[0] * lutsize2 + next[1] * lutsize + prev[2]];
+ c.r = (1-d.g) * c000.r + (d.g-d.r) * c010.r + (d.r-d.b) * c110.r + (d.b) * c111.r;
+ c.g = (1-d.g) * c000.g + (d.g-d.r) * c010.g + (d.r-d.b) * c110.g + (d.b) * c111.g;
+ c.b = (1-d.g) * c000.b + (d.g-d.r) * c010.b + (d.r-d.b) * c110.b + (d.b) * c111.b;
+ }
+ }
+ return c;
+}
+
+#define LUT3D_KERNEL(INTERP_FUNC) \
+__kernel void lut3d_##INTERP_FUNC( \
+ __read_only image2d_t src, \
+ __write_only image2d_t dst, \
+ __global const rgbvec* lut, \
+ int lut_edge_size) \
+{ \
+ const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | \
+ CLK_ADDRESS_CLAMP_TO_EDGE | \
+ CLK_FILTER_NEAREST); \
+ \
+ int2 loc = (int2)(get_global_id(0), get_global_id(1)); \
+ float4 px = read_imagef(src, sampler, loc); \
+ \
+ for (int i = 0; i < 3; i++) { \
+ px[i] *= (lut_edge_size - 1); \
+ } \
+ \
+ rgbvec lutpx = INTERP_FUNC(px, lut, lut_edge_size); \
+ \
+ write_imagef(dst, loc, (float4)(lutpx.r, lutpx.g, lutpx.b, 0.0f)); \
+}
+
+LUT3D_KERNEL(interp_nearest)
+LUT3D_KERNEL(interp_trilinear)
+LUT3D_KERNEL(interp_tetrahedral)
diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
index b6930fb686..d143286d21 100644
--- a/libavfilter/opencl_source.h
+++ b/libavfilter/opencl_source.h
@@ -26,6 +26,7 @@ extern const char *ff_source_convolution_cl;
extern const char *ff_source_deshake_cl;
extern const char *ff_source_neighbor_cl;
extern const char *ff_source_nlmeans_cl;
+extern const char *ff_source_lut3d_cl;
extern const char *ff_source_overlay_cl;
extern const char *ff_source_pad_cl;
extern const char *ff_source_remap_cl;
@@ -34,4 +35,5 @@ extern const char *ff_source_transpose_cl;
extern const char *ff_source_unsharp_cl;
extern const char *ff_source_xfade_cl;
+
#endif /* AVFILTER_OPENCL_SOURCE_H */
diff --git a/libavfilter/vf_lut3d_opencl.c b/libavfilter/vf_lut3d_opencl.c
new file mode 100644
index 0000000000..127a81edce
--- /dev/null
+++ b/libavfilter/vf_lut3d_opencl.c
@@ -0,0 +1,444 @@
+/*
+ * Copyright (c) 2025 Jan Studeny
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config_components.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/avstring.h"
+
+#include "libavutil/file_open.h"
+
+#include "avfilter.h"
+#include "filters.h"
+#include "opencl.h"
+#include "drawutils.h"
+#include "opencl_source.h"
+#include "video.h"
+
+
+#define MAX_LINE_SIZE 512
+
+enum interp_mode {
+ INTERPOLATE_NEAREST,
+ INTERPOLATE_TRILINEAR,
+ INTERPOLATE_TETRAHEDRAL,
+ INTERPOLATE_PYRAMID,
+ INTERPOLATE_PRISM,
+ NB_INTERP_MODE
+};
+
+typedef struct rgbvec {
+ cl_float r, g, b, a;
+} rgbvec;
+
+#define MAX_LEVEL 256
+
+
+typedef struct LUT3DOpenCLContext {
+ OpenCLFilterContext ocf;
+
+ int initialised;
+ cl_kernel kernel;
+ cl_command_queue command_queue;
+ cl_mem lut3d_buf;
+
+ struct rgbvec *lut;
+ int lutsize;
+ int lutsize2;
+ struct rgbvec scale;
+ int interpolation; ///<interp_mode
+ char *file;
+} LUT3DOpenCLContext;
+
+static int allocate_3dlut(AVFilterContext *ctx, int lutsize)
+{
+ LUT3DOpenCLContext *lut3d = ctx->priv;
+ if (lutsize < 2 || lutsize > MAX_LEVEL) {
+ av_log(ctx, AV_LOG_ERROR, "Too large or invalid 3D LUT size\n");
+ return AVERROR(EINVAL);
+ }
+
+ av_freep(&lut3d->lut);
+ lut3d->lut = av_malloc_array(lutsize * lutsize * lutsize, sizeof(*lut3d->lut));
+ if (!lut3d->lut)
+ return AVERROR(ENOMEM);
+
+ lut3d->lutsize = lutsize;
+ lut3d->lutsize2 = lutsize * lutsize;
+ return 0;
+}
+
+static int set_identity_matrix(AVFilterContext *ctx, int size)
+{
+ LUT3DOpenCLContext *lut3d = ctx->priv;
+ int ret, i, j, k;
+ const int size2 = size * size;
+ const float c = 1. / (size - 1);
+
+ ret = allocate_3dlut(ctx, size);
+ if (ret < 0)
+ return ret;
+
+ for (k = 0; k < size; k++) {
+ for (j = 0; j < size; j++) {
+ for (i = 0; i < size; i++) {
+ struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i];
+ vec->r = k * c;
+ vec->g = j * c;
+ vec->b = i * c;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int skip_line(const char *p)
+{
+ while (*p && av_isspace(*p))
+ p++;
+ return !*p || *p == '#';
+}
+
+#define NEXT_LINE(loop_cond) do { \
+ if (!fgets(line, sizeof(line), f)) { \
+ av_log(ctx, AV_LOG_ERROR, "Unexpected EOF\n"); \
+ return AVERROR_INVALIDDATA; \
+ } \
+} while (loop_cond)
+
+static int parse_cube(AVFilterContext *ctx, FILE *f)
+{
+ LUT3DOpenCLContext *lut3d = ctx->priv;
+ char line[MAX_LINE_SIZE];
+
+ while (fgets(line, sizeof(line), f)) {
+ if (!strncmp(line, "LUT_3D_SIZE", 11)) {
+ int ret, i, j, k;
+ const int size = strtol(line + 12, NULL, 0);
+ const int size2 = size * size;
+
+ ret = allocate_3dlut(ctx, size);
+ if (ret < 0)
+ return ret;
+
+ for (k = 0; k < size; k++) {
+ for (j = 0; j < size; j++) {
+ for (i = 0; i < size; i++) {
+ struct rgbvec *vec = &lut3d->lut[i * size2 + j * size + k];
+
+ do {
+try_again:
+ NEXT_LINE(0);
+ if (!strncmp(line, "DOMAIN_", 7)) {
+ av_log(ctx, AV_LOG_ERROR, "Min/max not supported in this format\n");
+ return AVERROR_INVALIDDATA;
+ } else if (!strncmp(line, "TITLE", 5)) {
+ goto try_again;
+ }
+ } while (skip_line(line));
+ if (av_sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3)
+ return AVERROR_INVALIDDATA;
+ }
+ }
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int lut3d_opencl_init_device(AVFilterContext *avctx)
+{
+ int err;
+ LUT3DOpenCLContext *ctx = avctx->priv;
+ cl_int cle;
+
+
+ size_t n = ctx->lutsize;
+ size_t total = n * n * n;
+
+
+ cl_mem lut3d_buf = clCreateBuffer(ctx->ocf.hwctx->context,
+ CL_MEM_READ_ONLY |
+ CL_MEM_COPY_HOST_PTR |
+ CL_MEM_HOST_NO_ACCESS,
+ sizeof(rgbvec) * total,
+ ctx->lut, &cle);
+
+ if (!lut3d_buf) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to create buffer: "
+ "%d.\n", cle);
+ return AVERROR(EIO);
+ }
+ ctx->lut3d_buf = lut3d_buf;
+
+ av_log(avctx, AV_LOG_DEBUG, "LUT3D data loaded onto host\n");
+
+
+
+
+ err = ff_opencl_filter_load_program(avctx, &ff_source_lut3d_cl, 1);
+ if (err < 0)
+ return err;
+
+ ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+ ctx->ocf.hwctx->device_id,
+ 0, &cle);
+ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
+ "command queue %d.\n", cle);
+
+ const char *kernel_name;
+ switch (ctx->interpolation) {
+ case INTERPOLATE_NEAREST: kernel_name = "lut3d_interp_nearest"; break;
+ case INTERPOLATE_TRILINEAR: kernel_name = "lut3d_interp_trilinear"; break;
+ case INTERPOLATE_TETRAHEDRAL: kernel_name = "lut3d_interp_tetrahedral"; break;
+ default:
+ av_assert0(0);
+ }
+ ctx->kernel = clCreateKernel(ctx->ocf.program, kernel_name, &cle);
+ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create "
+ "kernel %d.\n", cle);
+
+ ctx->initialised = 1;
+ return 0;
+ fail:
+ if (ctx->command_queue)
+ clReleaseCommandQueue(ctx->command_queue);
+ if (ctx->kernel)
+ clReleaseKernel(ctx->kernel);
+ return err;
+}
+
+static int lut3d_opencl_init(AVFilterContext *avctx)
+{
+
+ av_log(avctx, AV_LOG_DEBUG, "Starting intialization of LUT3D OpenCL\n");
+ LUT3DOpenCLContext *ctx = avctx->priv;
+ int err = 0;
+
+ ff_opencl_filter_init(avctx);
+
+ av_log(avctx, AV_LOG_DEBUG, "LUT3D OpenCL filter initialized\n");
+
+
+ FILE *f;
+ const char *ext;
+
+ if (!ctx->file) {
+ return set_identity_matrix(avctx, 32);
+ }
+ else {
+ ext = strrchr(ctx->file, '.');
+ if (!ext) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to guess the format from the extension\n");
+ err = AVERROR_INVALIDDATA;
+ return err;
+ }
+ ext++;
+ if (!av_strcasecmp(ext, "cube")) {
+ f = avpriv_fopen_utf8(ctx->file, "r");
+ if (!f) {
+ err = AVERROR(errno);
+ av_log(avctx, AV_LOG_ERROR, "%s: %s\n", ctx->file, av_err2str(err));
+ return err;
+ }
+ err = parse_cube(avctx, f);
+ fclose(f);
+ } else {
+ av_log(avctx, AV_LOG_ERROR, "Unrecognized '.%s' file type\n", ext);
+ err = AVERROR(EINVAL);
+ return err;
+ }
+ if (!err && !ctx->lutsize) {
+ av_log(avctx, AV_LOG_ERROR, "3D LUT is empty\n");
+ err = AVERROR_INVALIDDATA;
+ return err;
+ }
+
+ }
+ av_log(avctx, AV_LOG_DEBUG, "LUT3D OpenCL data loaded\n");
+ return err;
+}
+
+static int lut3d_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+ AVFilterContext *avctx = inlink->dst;
+ AVFilterLink *outlink = avctx->outputs[0];
+ LUT3DOpenCLContext *ctx = avctx->priv;
+ AVFrame *output = NULL;
+ cl_int cle;
+ size_t global_work[2];
+ cl_mem src, dst;
+ int err, p;
+
+ av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+ av_get_pix_fmt_name(input->format),
+ input->width, input->height, input->pts);
+
+ if (!input->hw_frames_ctx)
+ return AVERROR(EINVAL);
+
+ if (!ctx->initialised) {
+ err = lut3d_opencl_init_device(avctx);
+ if (err < 0)
+ goto fail;
+ }
+
+ output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!output) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+
+ for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
+ src = (cl_mem) input->data[p];
+ dst = (cl_mem)output->data[p];
+
+ if (!dst)
+ break;
+
+ CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &src);
+ CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &dst);
+ CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_mem, &ctx->lut3d_buf);
+ CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_int, &ctx->lutsize);
+
+ err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0);
+ if (err < 0)
+ goto fail;
+
+ av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
+ "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
+ p, global_work[0], global_work[1]);
+
+ cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
+ global_work, NULL,
+ 0, NULL, NULL);
+ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue "
+ "kernel: %d.\n", cle);
+ }
+
+ cle = clFinish(ctx->command_queue);
+ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
+
+ err = av_frame_copy_props(output, input);
+ if (err < 0)
+ goto fail;
+
+ av_frame_free(&input);
+
+ av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+ av_get_pix_fmt_name(output->format),
+ output->width, output->height, output->pts);
+
+ return ff_filter_frame(outlink, output);
+
+fail:
+ clFinish(ctx->command_queue);
+ av_frame_free(&input);
+ av_frame_free(&output);
+ return err;
+}
+
+static av_cold void lut3d_opencl_uninit(AVFilterContext *avctx)
+{
+ LUT3DOpenCLContext *ctx = avctx->priv;
+ cl_int cle;
+
+ clReleaseMemObject(ctx->lut3d_buf);
+
+ if (ctx->kernel) {
+ cle = clReleaseKernel(ctx->kernel);
+ if (cle != CL_SUCCESS)
+ av_log(avctx, AV_LOG_ERROR, "Failed to release "
+ "kernel: %d.\n", cle);
+ }
+
+ if (ctx->command_queue) {
+ cle = clReleaseCommandQueue(ctx->command_queue);
+ if (cle != CL_SUCCESS)
+ av_log(avctx, AV_LOG_ERROR, "Failed to release "
+ "command queue: %d.\n", cle);
+ }
+
+ av_freep(&ctx->lut);
+
+ ff_opencl_filter_uninit(avctx);
+}
+
+static const AVFilterPad lut3d_opencl_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .filter_frame = &lut3d_opencl_filter_frame,
+ .config_props = &ff_opencl_filter_config_input,
+ },
+};
+
+static const AVFilterPad lut3d_opencl_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = &ff_opencl_filter_config_output,
+ },
+};
+
+#define OFFSET(x) offsetof(LUT3DOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+#define TFLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
+
+
+
+#if CONFIG_LUT3D_OPENCL_FILTER
+
+
+static const AVOption lut3d_opencl_options[] = {
+ { "file", "set 3D LUT file name", OFFSET(file), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
+ { "interp", "select interpolation mode", OFFSET(interpolation), AV_OPT_TYPE_INT, {.i64=INTERPOLATE_TETRAHEDRAL}, 0, NB_INTERP_MODE-1, TFLAGS, .unit = "interp_mode" },
+ { "nearest", "use values from the nearest defined points", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_NEAREST}, 0, 0, TFLAGS, .unit = "interp_mode" },
+ { "trilinear", "interpolate values using the 8 points defining a cube", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_TRILINEAR}, 0, 0, TFLAGS, .unit = "interp_mode" },
+ { "tetrahedral", "interpolate values using a tetrahedron", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_TETRAHEDRAL}, 0, 0, TFLAGS, .unit = "interp_mode" }, \
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(lut3d_opencl);
+
+const FFFilter ff_vf_lut3d_opencl = {
+ .p.name = "lut3d_opencl",
+ .p.description = NULL_IF_CONFIG_SMALL("Adjust colors using a 3D LUT."),
+ .p.priv_class = &lut3d_opencl_class,
+ .p.flags = AVFILTER_FLAG_HWDEVICE,
+ .priv_size = sizeof(LUT3DOpenCLContext),
+ .init = &lut3d_opencl_init,
+ .uninit = &lut3d_opencl_uninit,
+ FILTER_INPUTS(lut3d_opencl_inputs),
+ FILTER_OUTPUTS(lut3d_opencl_outputs),
+ FILTER_SINGLE_PIXFMT(AV_PIX_FMT_OPENCL),
+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif /* CONFIG_LUT3D_OPENCL_FILTER */
--
2.39.5 (Apple Git-154)
More information about the ffmpeg-devel
mailing list