[FFmpeg-devel] [PATCH v3 1/1] avfilter/vf_vpp_qsv: apply 3D LUT from file.

Sat Jan 27 19:27:07 EET 2024

Usage: "vpp_qsv=lut3d_file=<path to file>"

Requires oneVPL, using system memory 3D LUT surface.

Signed-off-by: Chen Yufei <cyfdecyf at gmail.com>
---
 libavfilter/Makefile     |   8 +-
 libavfilter/lut3d.c      | 669 +++++++++++++++++++++++++++++++++++++++
 libavfilter/lut3d.h      |  13 +
 libavfilter/vf_lut3d.c   | 590 +---------------------------------
 libavfilter/vf_vpp_qsv.c | 119 ++++++-
 5 files changed, 807 insertions(+), 592 deletions(-)
 create mode 100644 libavfilter/lut3d.c

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index bba0219876..f682ea53c2 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -332,7 +332,7 @@ OBJS-$(CONFIG_GRAPHMONITOR_FILTER)           += f_graphmonitor.o
 OBJS-$(CONFIG_GRAYWORLD_FILTER)              += vf_grayworld.o
 OBJS-$(CONFIG_GREYEDGE_FILTER)               += vf_colorconstancy.o
 OBJS-$(CONFIG_GUIDED_FILTER)                 += vf_guided.o
-OBJS-$(CONFIG_HALDCLUT_FILTER)               += vf_lut3d.o framesync.o
+OBJS-$(CONFIG_HALDCLUT_FILTER)               += vf_lut3d.o lut3d.o framesync.o
 OBJS-$(CONFIG_HFLIP_FILTER)                  += vf_hflip.o
 OBJS-$(CONFIG_HFLIP_VULKAN_FILTER)           += vf_flip_vulkan.o vulkan.o
 OBJS-$(CONFIG_HISTEQ_FILTER)                 += vf_histeq.o
@@ -370,10 +370,10 @@ OBJS-$(CONFIG_LIMITDIFF_FILTER)              += vf_limitdiff.o framesync.o
 OBJS-$(CONFIG_LIMITER_FILTER)                += vf_limiter.o
 OBJS-$(CONFIG_LOOP_FILTER)                   += f_loop.o
 OBJS-$(CONFIG_LUMAKEY_FILTER)                += vf_lumakey.o
-OBJS-$(CONFIG_LUT1D_FILTER)                  += vf_lut3d.o
+OBJS-$(CONFIG_LUT1D_FILTER)                  += vf_lut3d.o lut3d.o
 OBJS-$(CONFIG_LUT_FILTER)                    += vf_lut.o
 OBJS-$(CONFIG_LUT2_FILTER)                   += vf_lut2.o framesync.o
-OBJS-$(CONFIG_LUT3D_FILTER)                  += vf_lut3d.o framesync.o
+OBJS-$(CONFIG_LUT3D_FILTER)                  += vf_lut3d.o lut3d.o framesync.o
 OBJS-$(CONFIG_LUTRGB_FILTER)                 += vf_lut.o
 OBJS-$(CONFIG_LUTYUV_FILTER)                 += vf_lut.o
 OBJS-$(CONFIG_MASKEDCLAMP_FILTER)            += vf_maskedclamp.o framesync.o
@@ -553,7 +553,7 @@ OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER)       += vidstabutils.o vf_vidstabtransfo
 OBJS-$(CONFIG_VIF_FILTER)                    += vf_vif.o framesync.o
 OBJS-$(CONFIG_VIGNETTE_FILTER)               += vf_vignette.o
 OBJS-$(CONFIG_VMAFMOTION_FILTER)             += vf_vmafmotion.o framesync.o
-OBJS-$(CONFIG_VPP_QSV_FILTER)                += vf_vpp_qsv.o
+OBJS-$(CONFIG_VPP_QSV_FILTER)                += vf_vpp_qsv.o lut3d.o
 OBJS-$(CONFIG_VSTACK_FILTER)                 += vf_stack.o framesync.o
 OBJS-$(CONFIG_W3FDIF_FILTER)                 += vf_w3fdif.o
 OBJS-$(CONFIG_WAVEFORM_FILTER)               += vf_waveform.o
diff --git a/libavfilter/lut3d.c b/libavfilter/lut3d.c
new file mode 100644
index 0000000000..173979adcc
--- /dev/null
+++ b/libavfilter/lut3d.c
@@ -0,0 +1,669 @@
+/*
+ * Copyright (c) 2013 Clément Bœsch
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "lut3d.h"
+
+#include <float.h>
+
+#include "libavutil/avstring.h"
+#include "libavutil/file_open.h"
+
+#define EXPONENT_MASK 0x7F800000
+#define MANTISSA_MASK 0x007FFFFF
+#define SIGN_MASK     0x80000000
+
+static inline float sanitizef(float f)
+{
+    union av_intfloat32 t;
+    t.f = f;
+
+    if ((t.i & EXPONENT_MASK) == EXPONENT_MASK) {
+        if ((t.i & MANTISSA_MASK) != 0) {
+            // NAN
+            return 0.0f;
+        } else if (t.i & SIGN_MASK) {
+            // -INF
+            return -FLT_MAX;
+        } else {
+            // +INF
+            return FLT_MAX;
+        }
+    }
+    return f;
+}
+
+static inline float lerpf(float v0, float v1, float f)
+{
+    return v0 + (v1 - v0) * f;
+}
+
+static inline struct rgbvec lerp(const struct rgbvec *v0, const struct rgbvec *v1, float f)
+{
+    struct rgbvec v = {
+        lerpf(v0->r, v1->r, f), lerpf(v0->g, v1->g, f), lerpf(v0->b, v1->b, f)
+    };
+    return v;
+}
+
+int ff_allocate_3dlut(AVFilterContext *ctx, LUT3DContext *lut3d, int lutsize, int prelut)
+{
+    int i;
+    if (lutsize < 2 || lutsize > MAX_LEVEL) {
+        av_log(ctx, AV_LOG_ERROR, "Too large or invalid 3D LUT size\n");
+        return AVERROR(EINVAL);
+    }
+
+    av_freep(&lut3d->lut);
+    lut3d->lut = av_malloc_array(lutsize * lutsize * lutsize, sizeof(*lut3d->lut));
+    if (!lut3d->lut)
+        return AVERROR(ENOMEM);
+
+    if (prelut) {
+        lut3d->prelut.size = PRELUT_SIZE;
+        for (i = 0; i < 3; i++) {
+            av_freep(&lut3d->prelut.lut[i]);
+            lut3d->prelut.lut[i] = av_malloc_array(PRELUT_SIZE, sizeof(*lut3d->prelut.lut[0]));
+            if (!lut3d->prelut.lut[i])
+                return AVERROR(ENOMEM);
+        }
+    } else {
+        lut3d->prelut.size = 0;
+        for (i = 0; i < 3; i++) {
+            av_freep(&lut3d->prelut.lut[i]);
+        }
+    }
+    lut3d->lutsize = lutsize;
+    lut3d->lutsize2 = lutsize * lutsize;
+    return 0;
+}
+
+static int set_identity_matrix(AVFilterContext *ctx, LUT3DContext *lut3d, int size)
+{
+    int ret, i, j, k;
+    const int size2 = size * size;
+    const float c = 1. / (size - 1);
+
+    ret = ff_allocate_3dlut(ctx, lut3d, size, 0);
+    if (ret < 0)
+        return ret;
+
+    for (k = 0; k < size; k++) {
+        for (j = 0; j < size; j++) {
+            for (i = 0; i < size; i++) {
+                struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i];
+                vec->r = k * c;
+                vec->g = j * c;
+                vec->b = i * c;
+            }
+        }
+    }
+
+    return 0;
+}
+
+#define MAX_LINE_SIZE 512
+
+static int skip_line(const char *p)
+{
+    while (*p && av_isspace(*p))
+        p++;
+    return !*p || *p == '#';
+}
+
+static char* fget_next_word(char* dst, int max, FILE* f)
+{
+    int c;
+    char *p = dst;
+
+    /* for null */
+    max--;
+    /* skip until next non whitespace char */
+    while ((c = fgetc(f)) != EOF) {
+        if (av_isspace(c))
+            continue;
+
+        *p++ = c;
+        max--;
+        break;
+    }
+
+    /* get max bytes or up until next whitespace char */
+    for (; max > 0; max--) {
+        if ((c = fgetc(f)) == EOF)
+            break;
+
+        if (av_isspace(c))
+            break;
+
+        *p++ = c;
+    }
+
+    *p = 0;
+    if (p == dst)
+        return NULL;
+    return p;
+}
+
+
+#define NEXT_LINE(loop_cond) do {                           \
+    if (!fgets(line, sizeof(line), f)) {                    \
+        av_log(ctx, AV_LOG_ERROR, "Unexpected EOF\n");      \
+        return AVERROR_INVALIDDATA;                         \
+    }                                                       \
+} while (loop_cond)
+
+#define NEXT_LINE_OR_GOTO(loop_cond, label) do {            \
+    if (!fgets(line, sizeof(line), f)) {                    \
+        av_log(ctx, AV_LOG_ERROR, "Unexpected EOF\n");      \
+        ret = AVERROR_INVALIDDATA;                          \
+        goto label;                                         \
+    }                                                       \
+} while (loop_cond)
+
+/* Basically r g and b float values on each line, with a facultative 3DLUTSIZE
+ * directive; seems to be generated by Davinci */
+static int parse_dat(AVFilterContext *ctx, LUT3DContext *lut3d, FILE *f)
+{
+    char line[MAX_LINE_SIZE];
+    int ret, i, j, k, size, size2;
+
+    lut3d->lutsize = size = 33;
+    size2 = size * size;
+
+    NEXT_LINE(skip_line(line));
+    if (!strncmp(line, "3DLUTSIZE ", 10)) {
+        size = strtol(line + 10, NULL, 0);
+
+        NEXT_LINE(skip_line(line));
+    }
+
+    ret = ff_allocate_3dlut(ctx, lut3d, size, 0);
+    if (ret < 0)
+        return ret;
+
+    for (k = 0; k < size; k++) {
+        for (j = 0; j < size; j++) {
+            for (i = 0; i < size; i++) {
+                struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i];
+                if (k != 0 || j != 0 || i != 0)
+                    NEXT_LINE(skip_line(line));
+                if (av_sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3)
+                    return AVERROR_INVALIDDATA;
+            }
+        }
+    }
+    return 0;
+}
+
+/* Iridas format */
+static int parse_cube(AVFilterContext *ctx, LUT3DContext *lut3d, FILE *f)
+{
+    char line[MAX_LINE_SIZE];
+    float min[3] = {0.0, 0.0, 0.0};
+    float max[3] = {1.0, 1.0, 1.0};
+
+    while (fgets(line, sizeof(line), f)) {
+        if (!strncmp(line, "LUT_3D_SIZE", 11)) {
+            int ret, i, j, k;
+            const int size = strtol(line + 12, NULL, 0);
+            const int size2 = size * size;
+
+            ret = ff_allocate_3dlut(ctx, lut3d, size, 0);
+            if (ret < 0)
+                return ret;
+
+            for (k = 0; k < size; k++) {
+                for (j = 0; j < size; j++) {
+                    for (i = 0; i < size; i++) {
+                        struct rgbvec *vec = &lut3d->lut[i * size2 + j * size + k];
+
+                        do {
+try_again:
+                            NEXT_LINE(0);
+                            if (!strncmp(line, "DOMAIN_", 7)) {
+                                float *vals = NULL;
+                                if      (!strncmp(line + 7, "MIN ", 4)) vals = min;
+                                else if (!strncmp(line + 7, "MAX ", 4)) vals = max;
+                                if (!vals)
+                                    return AVERROR_INVALIDDATA;
+                                av_sscanf(line + 11, "%f %f %f", vals, vals + 1, vals + 2);
+                                av_log(ctx, AV_LOG_DEBUG, "min: %f %f %f | max: %f %f %f\n",
+                                       min[0], min[1], min[2], max[0], max[1], max[2]);
+                                goto try_again;
+                            } else if (!strncmp(line, "TITLE", 5)) {
+                                goto try_again;
+                            }
+                        } while (skip_line(line));
+                        if (av_sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3)
+                            return AVERROR_INVALIDDATA;
+                    }
+                }
+            }
+            break;
+        }
+    }
+
+    lut3d->scale.r = av_clipf(1. / (max[0] - min[0]), 0.f, 1.f);
+    lut3d->scale.g = av_clipf(1. / (max[1] - min[1]), 0.f, 1.f);
+    lut3d->scale.b = av_clipf(1. / (max[2] - min[2]), 0.f, 1.f);
+
+    return 0;
+}
+
+/* Assume 17x17x17 LUT with a 16-bit depth
+ * FIXME: it seems there are various 3dl formats */
+static int parse_3dl(AVFilterContext *ctx, LUT3DContext *lut3d, FILE *f)
+{
+    char line[MAX_LINE_SIZE];
+    int ret, i, j, k;
+    const int size = 17;
+    const int size2 = 17 * 17;
+    const float scale = 16*16*16;
+
+    lut3d->lutsize = size;
+
+    ret = ff_allocate_3dlut(ctx, lut3d, size, 0);
+    if (ret < 0)
+        return ret;
+
+    NEXT_LINE(skip_line(line));
+    for (k = 0; k < size; k++) {
+        for (j = 0; j < size; j++) {
+            for (i = 0; i < size; i++) {
+                int r, g, b;
+                struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i];
+
+                NEXT_LINE(skip_line(line));
+                if (av_sscanf(line, "%d %d %d", &r, &g, &b) != 3)
+                    return AVERROR_INVALIDDATA;
+                vec->r = r / scale;
+                vec->g = g / scale;
+                vec->b = b / scale;
+            }
+        }
+    }
+    return 0;
+}
+
+/* Pandora format */
+static int parse_m3d(AVFilterContext *ctx, LUT3DContext *lut3d, FILE *f)
+{
+    float scale;
+    int ret, i, j, k, size, size2, in = -1, out = -1;
+    char line[MAX_LINE_SIZE];
+    uint8_t rgb_map[3] = {0, 1, 2};
+
+    while (fgets(line, sizeof(line), f)) {
+        if      (!strncmp(line, "in",  2)) in  = strtol(line + 2, NULL, 0);
+        else if (!strncmp(line, "out", 3)) out = strtol(line + 3, NULL, 0);
+        else if (!strncmp(line, "values", 6)) {
+            const char *p = line + 6;
+#define SET_COLOR(id) do {                  \
+    while (av_isspace(*p))                  \
+        p++;                                \
+    switch (*p) {                           \
+    case 'r': rgb_map[id] = 0; break;       \
+    case 'g': rgb_map[id] = 1; break;       \
+    case 'b': rgb_map[id] = 2; break;       \
+    }                                       \
+    while (*p && !av_isspace(*p))           \
+        p++;                                \
+} while (0)
+            SET_COLOR(0);
+            SET_COLOR(1);
+            SET_COLOR(2);
+            break;
+        }
+    }
+
+    if (in == -1 || out == -1) {
+        av_log(ctx, AV_LOG_ERROR, "in and out must be defined\n");
+        return AVERROR_INVALIDDATA;
+    }
+    if (in < 2 || out < 2 ||
+        in  > MAX_LEVEL*MAX_LEVEL*MAX_LEVEL ||
+        out > MAX_LEVEL*MAX_LEVEL*MAX_LEVEL) {
+        av_log(ctx, AV_LOG_ERROR, "invalid in (%d) or out (%d)\n", in, out);
+        return AVERROR_INVALIDDATA;
+    }
+    for (size = 1; size*size*size < in; size++);
+    lut3d->lutsize = size;
+    size2 = size * size;
+
+    ret = ff_allocate_3dlut(ctx, lut3d, size, 0);
+    if (ret < 0)
+        return ret;
+
+    scale = 1. / (out - 1);
+
+    for (k = 0; k < size; k++) {
+        for (j = 0; j < size; j++) {
+            for (i = 0; i < size; i++) {
+                struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i];
+                float val[3];
+
+                NEXT_LINE(0);
+                if (av_sscanf(line, "%f %f %f", val, val + 1, val + 2) != 3)
+                    return AVERROR_INVALIDDATA;
+                vec->r = val[rgb_map[0]] * scale;
+                vec->g = val[rgb_map[1]] * scale;
+                vec->b = val[rgb_map[2]] * scale;
+            }
+        }
+    }
+    return 0;
+}
+
+static int nearest_sample_index(float *data, float x, int low, int hi)
+{
+    int mid;
+    if (x < data[low])
+        return low;
+
+    if (x > data[hi])
+        return hi;
+
+    for (;;) {
+        av_assert0(x >= data[low]);
+        av_assert0(x <= data[hi]);
+        av_assert0((hi-low) > 0);
+
+        if (hi - low == 1)
+            return low;
+
+        mid = (low + hi) / 2;
+
+        if (x < data[mid])
+            hi = mid;
+        else
+            low = mid;
+    }
+
+    return 0;
+}
+
+#define NEXT_FLOAT_OR_GOTO(value, label)                    \
+    if (!fget_next_word(line, sizeof(line) ,f)) {           \
+        ret = AVERROR_INVALIDDATA;                          \
+        goto label;                                         \
+    }                                                       \
+    if (av_sscanf(line, "%f", &value) != 1) {               \
+        ret = AVERROR_INVALIDDATA;                          \
+        goto label;                                         \
+    }
+
+static int parse_cinespace(AVFilterContext *ctx, LUT3DContext *lut3d, FILE *f)
+{
+    char line[MAX_LINE_SIZE];
+    float in_min[3]  = {0.0, 0.0, 0.0};
+    float in_max[3]  = {1.0, 1.0, 1.0};
+    float out_min[3] = {0.0, 0.0, 0.0};
+    float out_max[3] = {1.0, 1.0, 1.0};
+    int inside_metadata = 0, size, size2;
+    int prelut = 0;
+    int ret = 0;
+
+    int prelut_sizes[3] = {0, 0, 0};
+    float *in_prelut[3]  = {NULL, NULL, NULL};
+    float *out_prelut[3] = {NULL, NULL, NULL};
+
+    NEXT_LINE_OR_GOTO(skip_line(line), end);
+    if (strncmp(line, "CSPLUTV100", 10)) {
+        av_log(ctx, AV_LOG_ERROR, "Not cineSpace LUT format\n");
+        ret = AVERROR(EINVAL);
+        goto end;
+    }
+
+    NEXT_LINE_OR_GOTO(skip_line(line), end);
+    if (strncmp(line, "3D", 2)) {
+        av_log(ctx, AV_LOG_ERROR, "Not 3D LUT format\n");
+        ret = AVERROR(EINVAL);
+        goto end;
+    }
+
+    while (1) {
+        NEXT_LINE_OR_GOTO(skip_line(line), end);
+
+        if (!strncmp(line, "BEGIN METADATA", 14)) {
+            inside_metadata = 1;
+            continue;
+        }
+        if (!strncmp(line, "END METADATA", 12)) {
+            inside_metadata = 0;
+            continue;
+        }
+        if (inside_metadata == 0) {
+            int size_r, size_g, size_b;
+
+            for (int i = 0; i < 3; i++) {
+                int npoints = strtol(line, NULL, 0);
+
+                if (npoints > 2) {
+                    float v,last;
+
+                    if (npoints > PRELUT_SIZE) {
+                        av_log(ctx, AV_LOG_ERROR, "Prelut size too large.\n");
+                        ret = AVERROR_INVALIDDATA;
+                        goto end;
+                    }
+
+                    if (in_prelut[i] || out_prelut[i]) {
+                        av_log(ctx, AV_LOG_ERROR, "Invalid file has multiple preluts.\n");
+                        ret = AVERROR_INVALIDDATA;
+                        goto end;
+                    }
+
+                    in_prelut[i]  = (float*)av_malloc(npoints * sizeof(float));
+                    out_prelut[i] = (float*)av_malloc(npoints * sizeof(float));
+                    if (!in_prelut[i] || !out_prelut[i]) {
+                        ret = AVERROR(ENOMEM);
+                        goto end;
+                    }
+
+                    prelut_sizes[i] = npoints;
+                    in_min[i] = FLT_MAX;
+                    in_max[i] = -FLT_MAX;
+                    out_min[i] = FLT_MAX;
+                    out_max[i] = -FLT_MAX;
+
+                    for (int j = 0; j < npoints; j++) {
+                        NEXT_FLOAT_OR_GOTO(v, end)
+                        in_min[i] = FFMIN(in_min[i], v);
+                        in_max[i] = FFMAX(in_max[i], v);
+                        in_prelut[i][j] = v;
+                        if (j > 0 && v < last) {
+                            av_log(ctx, AV_LOG_ERROR, "Invalid file, non increasing prelut.\n");
+                            ret = AVERROR(ENOMEM);
+                            goto end;
+                        }
+                        last = v;
+                    }
+
+                    for (int j = 0; j < npoints; j++) {
+                        NEXT_FLOAT_OR_GOTO(v, end)
+                        out_min[i] = FFMIN(out_min[i], v);
+                        out_max[i] = FFMAX(out_max[i], v);
+                        out_prelut[i][j] = v;
+                    }
+
+                } else if (npoints == 2)  {
+                    NEXT_LINE_OR_GOTO(skip_line(line), end);
+                    if (av_sscanf(line, "%f %f", &in_min[i], &in_max[i]) != 2) {
+                        ret = AVERROR_INVALIDDATA;
+                        goto end;
+                    }
+                    NEXT_LINE_OR_GOTO(skip_line(line), end);
+                    if (av_sscanf(line, "%f %f", &out_min[i], &out_max[i]) != 2) {
+                        ret = AVERROR_INVALIDDATA;
+                        goto end;
+                    }
+
+                } else {
+                    av_log(ctx, AV_LOG_ERROR, "Unsupported number of pre-lut points.\n");
+                    ret = AVERROR_PATCHWELCOME;
+                    goto end;
+                }
+
+                NEXT_LINE_OR_GOTO(skip_line(line), end);
+            }
+
+            if (av_sscanf(line, "%d %d %d", &size_r, &size_g, &size_b) != 3) {
+                ret = AVERROR(EINVAL);
+                goto end;
+            }
+            if (size_r != size_g || size_r != size_b) {
+                av_log(ctx, AV_LOG_ERROR, "Unsupported size combination: %dx%dx%d.\n", size_r, size_g, size_b);
+                ret = AVERROR_PATCHWELCOME;
+                goto end;
+            }
+
+            size = size_r;
+            size2 = size * size;
+
+            if (prelut_sizes[0] && prelut_sizes[1] && prelut_sizes[2])
+                prelut = 1;
+
+            ret = ff_allocate_3dlut(ctx, lut3d, size, prelut);
+            if (ret < 0)
+                return ret;
+
+            for (int k = 0; k < size; k++) {
+                for (int j = 0; j < size; j++) {
+                    for (int i = 0; i < size; i++) {
+                        struct rgbvec *vec = &lut3d->lut[i * size2 + j * size + k];
+
+                        NEXT_LINE_OR_GOTO(skip_line(line), end);
+                        if (av_sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3) {
+                            ret = AVERROR_INVALIDDATA;
+                            goto end;
+                        }
+
+                        vec->r *= out_max[0] - out_min[0];
+                        vec->g *= out_max[1] - out_min[1];
+                        vec->b *= out_max[2] - out_min[2];
+                    }
+                }
+            }
+
+            break;
+        }
+    }
+
+    if (prelut) {
+        for (int c = 0; c < 3; c++) {
+
+            lut3d->prelut.min[c] = in_min[c];
+            lut3d->prelut.max[c] = in_max[c];
+            lut3d->prelut.scale[c] =  (1.0f / (float)(in_max[c] - in_min[c])) * (lut3d->prelut.size - 1);
+
+            for (int i = 0; i < lut3d->prelut.size; ++i) {
+                float mix = (float) i / (float)(lut3d->prelut.size - 1);
+                float x = lerpf(in_min[c], in_max[c], mix), a, b;
+
+                int idx = nearest_sample_index(in_prelut[c], x, 0, prelut_sizes[c]-1);
+                av_assert0(idx + 1 < prelut_sizes[c]);
+
+                a   = out_prelut[c][idx + 0];
+                b   = out_prelut[c][idx + 1];
+                mix = x - in_prelut[c][idx];
+
+                lut3d->prelut.lut[c][i] = sanitizef(lerpf(a, b, mix));
+            }
+        }
+        lut3d->scale.r = 1.00f;
+        lut3d->scale.g = 1.00f;
+        lut3d->scale.b = 1.00f;
+
+    } else {
+        lut3d->scale.r = av_clipf(1. / (in_max[0] - in_min[0]), 0.f, 1.f);
+        lut3d->scale.g = av_clipf(1. / (in_max[1] - in_min[1]), 0.f, 1.f);
+        lut3d->scale.b = av_clipf(1. / (in_max[2] - in_min[2]), 0.f, 1.f);
+    }
+
+end:
+    for (int c = 0; c < 3; c++) {
+        av_freep(&in_prelut[c]);
+        av_freep(&out_prelut[c]);
+    }
+    return ret;
+}
+
+av_cold int ff_lut3d_init(AVFilterContext *ctx, LUT3DContext *lut3d)
+{
+    int ret;
+    FILE *f;
+    const char *ext;
+
+    lut3d->scale.r = lut3d->scale.g = lut3d->scale.b = 1.f;
+
+    if (!lut3d->file) {
+        return set_identity_matrix(ctx, lut3d, 32);
+    }
+
+    f = avpriv_fopen_utf8(lut3d->file, "r");
+    if (!f) {
+        ret = AVERROR(errno);
+        av_log(ctx, AV_LOG_ERROR, "%s: %s\n", lut3d->file, av_err2str(ret));
+        return ret;
+    }
+
+    ext = strrchr(lut3d->file, '.');
+    if (!ext) {
+        av_log(ctx, AV_LOG_ERROR, "Unable to guess the format from the extension\n");
+        ret = AVERROR_INVALIDDATA;
+        goto end;
+    }
+    ext++;
+
+    if (!av_strcasecmp(ext, "dat")) {
+        ret = parse_dat(ctx, lut3d, f);
+    } else if (!av_strcasecmp(ext, "3dl")) {
+        ret = parse_3dl(ctx, lut3d, f);
+    } else if (!av_strcasecmp(ext, "cube")) {
+        ret = parse_cube(ctx, lut3d, f);
+    } else if (!av_strcasecmp(ext, "m3d")) {
+        ret = parse_m3d(ctx, lut3d, f);
+    } else if (!av_strcasecmp(ext, "csp")) {
+        ret = parse_cinespace(ctx, lut3d, f);
+    } else {
+        av_log(ctx, AV_LOG_ERROR, "Unrecognized '.%s' file type\n", ext);
+        ret = AVERROR(EINVAL);
+    }
+
+    if (!ret && !lut3d->lutsize) {
+        av_log(ctx, AV_LOG_ERROR, "3D LUT is empty\n");
+        ret = AVERROR_INVALIDDATA;
+    }
+
+end:
+    fclose(f);
+    return ret;
+}
+
+av_cold void ff_lut3d_uninit(LUT3DContext *lut3d)
+{
+    int i;
+    av_freep(&lut3d->lut);
+
+    for (i = 0; i < 3; i++) {
+        av_freep(&lut3d->prelut.lut[i]);
+    }
+}
diff --git a/libavfilter/lut3d.h b/libavfilter/lut3d.h
index 14e3c7fea6..b6aaed85f1 100644
--- a/libavfilter/lut3d.h
+++ b/libavfilter/lut3d.h
@@ -84,4 +84,17 @@ typedef struct ThreadData {
 
 void ff_lut3d_init_x86(LUT3DContext *s, const AVPixFmtDescriptor *desc);
 
+int ff_allocate_3dlut(AVFilterContext *ctx, LUT3DContext *lut3d, int lutsize, int prelut);
+
+/**
+ * Load 3D LUT from file.
+ *
+ * @param lut3d LUT3DContext Load 3D LUT from path specified by `lut3d->file`.
+ *     If `lut3d->file` is NULL, initialize an identity 3D LUT.
+ */
+int ff_lut3d_init(AVFilterContext *ctx, LUT3DContext *lut3d);
+
+/**  Release memory used to hold 3D LUT. */
+void ff_lut3d_uninit(LUT3DContext *lut3d);
+
 #endif /* AVFILTER_LUT3D_H */
diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c
index 4edcc2c7a7..1da798e210 100644
--- a/libavfilter/vf_lut3d.c
+++ b/libavfilter/vf_lut3d.c
@@ -552,39 +552,6 @@ static int skip_line(const char *p)
     return !*p || *p == '#';
 }
 
-static char* fget_next_word(char* dst, int max, FILE* f)
-{
-    int c;
-    char *p = dst;
-
-    /* for null */
-    max--;
-    /* skip until next non whitespace char */
-    while ((c = fgetc(f)) != EOF) {
-        if (av_isspace(c))
-            continue;
-
-        *p++ = c;
-        max--;
-        break;
-    }
-
-    /* get max bytes or up until next whitespace char */
-    for (; max > 0; max--) {
-        if ((c = fgetc(f)) == EOF)
-            break;
-
-        if (av_isspace(c))
-            break;
-
-        *p++ = c;
-    }
-
-    *p = 0;
-    if (p == dst)
-        return NULL;
-    return p;
-}
 
 #define NEXT_LINE(loop_cond) do {                           \
     if (!fgets(line, sizeof(line), f)) {                    \
@@ -593,505 +560,6 @@ static char* fget_next_word(char* dst, int max, FILE* f)
     }                                                       \
 } while (loop_cond)
 
-#define NEXT_LINE_OR_GOTO(loop_cond, label) do {            \
-    if (!fgets(line, sizeof(line), f)) {                    \
-        av_log(ctx, AV_LOG_ERROR, "Unexpected EOF\n");      \
-        ret = AVERROR_INVALIDDATA;                          \
-        goto label;                                         \
-    }                                                       \
-} while (loop_cond)
-
-static int allocate_3dlut(AVFilterContext *ctx, int lutsize, int prelut)
-{
-    LUT3DContext *lut3d = ctx->priv;
-    int i;
-    if (lutsize < 2 || lutsize > MAX_LEVEL) {
-        av_log(ctx, AV_LOG_ERROR, "Too large or invalid 3D LUT size\n");
-        return AVERROR(EINVAL);
-    }
-
-    av_freep(&lut3d->lut);
-    lut3d->lut = av_malloc_array(lutsize * lutsize * lutsize, sizeof(*lut3d->lut));
-    if (!lut3d->lut)
-        return AVERROR(ENOMEM);
-
-    if (prelut) {
-        lut3d->prelut.size = PRELUT_SIZE;
-        for (i = 0; i < 3; i++) {
-            av_freep(&lut3d->prelut.lut[i]);
-            lut3d->prelut.lut[i] = av_malloc_array(PRELUT_SIZE, sizeof(*lut3d->prelut.lut[0]));
-            if (!lut3d->prelut.lut[i])
-                return AVERROR(ENOMEM);
-        }
-    } else {
-        lut3d->prelut.size = 0;
-        for (i = 0; i < 3; i++) {
-            av_freep(&lut3d->prelut.lut[i]);
-        }
-    }
-    lut3d->lutsize = lutsize;
-    lut3d->lutsize2 = lutsize * lutsize;
-    return 0;
-}
-
-/* Basically r g and b float values on each line, with a facultative 3DLUTSIZE
- * directive; seems to be generated by Davinci */
-static int parse_dat(AVFilterContext *ctx, FILE *f)
-{
-    LUT3DContext *lut3d = ctx->priv;
-    char line[MAX_LINE_SIZE];
-    int ret, i, j, k, size, size2;
-
-    lut3d->lutsize = size = 33;
-    size2 = size * size;
-
-    NEXT_LINE(skip_line(line));
-    if (!strncmp(line, "3DLUTSIZE ", 10)) {
-        size = strtol(line + 10, NULL, 0);
-
-        NEXT_LINE(skip_line(line));
-    }
-
-    ret = allocate_3dlut(ctx, size, 0);
-    if (ret < 0)
-        return ret;
-
-    for (k = 0; k < size; k++) {
-        for (j = 0; j < size; j++) {
-            for (i = 0; i < size; i++) {
-                struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i];
-                if (k != 0 || j != 0 || i != 0)
-                    NEXT_LINE(skip_line(line));
-                if (av_sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3)
-                    return AVERROR_INVALIDDATA;
-            }
-        }
-    }
-    return 0;
-}
-
-/* Iridas format */
-static int parse_cube(AVFilterContext *ctx, FILE *f)
-{
-    LUT3DContext *lut3d = ctx->priv;
-    char line[MAX_LINE_SIZE];
-    float min[3] = {0.0, 0.0, 0.0};
-    float max[3] = {1.0, 1.0, 1.0};
-
-    while (fgets(line, sizeof(line), f)) {
-        if (!strncmp(line, "LUT_3D_SIZE", 11)) {
-            int ret, i, j, k;
-            const int size = strtol(line + 12, NULL, 0);
-            const int size2 = size * size;
-
-            ret = allocate_3dlut(ctx, size, 0);
-            if (ret < 0)
-                return ret;
-
-            for (k = 0; k < size; k++) {
-                for (j = 0; j < size; j++) {
-                    for (i = 0; i < size; i++) {
-                        struct rgbvec *vec = &lut3d->lut[i * size2 + j * size + k];
-
-                        do {
-try_again:
-                            NEXT_LINE(0);
-                            if (!strncmp(line, "DOMAIN_", 7)) {
-                                float *vals = NULL;
-                                if      (!strncmp(line + 7, "MIN ", 4)) vals = min;
-                                else if (!strncmp(line + 7, "MAX ", 4)) vals = max;
-                                if (!vals)
-                                    return AVERROR_INVALIDDATA;
-                                av_sscanf(line + 11, "%f %f %f", vals, vals + 1, vals + 2);
-                                av_log(ctx, AV_LOG_DEBUG, "min: %f %f %f | max: %f %f %f\n",
-                                       min[0], min[1], min[2], max[0], max[1], max[2]);
-                                goto try_again;
-                            } else if (!strncmp(line, "TITLE", 5)) {
-                                goto try_again;
-                            }
-                        } while (skip_line(line));
-                        if (av_sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3)
-                            return AVERROR_INVALIDDATA;
-                    }
-                }
-            }
-            break;
-        }
-    }
-
-    lut3d->scale.r = av_clipf(1. / (max[0] - min[0]), 0.f, 1.f);
-    lut3d->scale.g = av_clipf(1. / (max[1] - min[1]), 0.f, 1.f);
-    lut3d->scale.b = av_clipf(1. / (max[2] - min[2]), 0.f, 1.f);
-
-    return 0;
-}
-
-/* Assume 17x17x17 LUT with a 16-bit depth
- * FIXME: it seems there are various 3dl formats */
-static int parse_3dl(AVFilterContext *ctx, FILE *f)
-{
-    char line[MAX_LINE_SIZE];
-    LUT3DContext *lut3d = ctx->priv;
-    int ret, i, j, k;
-    const int size = 17;
-    const int size2 = 17 * 17;
-    const float scale = 16*16*16;
-
-    lut3d->lutsize = size;
-
-    ret = allocate_3dlut(ctx, size, 0);
-    if (ret < 0)
-        return ret;
-
-    NEXT_LINE(skip_line(line));
-    for (k = 0; k < size; k++) {
-        for (j = 0; j < size; j++) {
-            for (i = 0; i < size; i++) {
-                int r, g, b;
-                struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i];
-
-                NEXT_LINE(skip_line(line));
-                if (av_sscanf(line, "%d %d %d", &r, &g, &b) != 3)
-                    return AVERROR_INVALIDDATA;
-                vec->r = r / scale;
-                vec->g = g / scale;
-                vec->b = b / scale;
-            }
-        }
-    }
-    return 0;
-}
-
-/* Pandora format */
-static int parse_m3d(AVFilterContext *ctx, FILE *f)
-{
-    LUT3DContext *lut3d = ctx->priv;
-    float scale;
-    int ret, i, j, k, size, size2, in = -1, out = -1;
-    char line[MAX_LINE_SIZE];
-    uint8_t rgb_map[3] = {0, 1, 2};
-
-    while (fgets(line, sizeof(line), f)) {
-        if      (!strncmp(line, "in",  2)) in  = strtol(line + 2, NULL, 0);
-        else if (!strncmp(line, "out", 3)) out = strtol(line + 3, NULL, 0);
-        else if (!strncmp(line, "values", 6)) {
-            const char *p = line + 6;
-#define SET_COLOR(id) do {                  \
-    while (av_isspace(*p))                  \
-        p++;                                \
-    switch (*p) {                           \
-    case 'r': rgb_map[id] = 0; break;       \
-    case 'g': rgb_map[id] = 1; break;       \
-    case 'b': rgb_map[id] = 2; break;       \
-    }                                       \
-    while (*p && !av_isspace(*p))           \
-        p++;                                \
-} while (0)
-            SET_COLOR(0);
-            SET_COLOR(1);
-            SET_COLOR(2);
-            break;
-        }
-    }
-
-    if (in == -1 || out == -1) {
-        av_log(ctx, AV_LOG_ERROR, "in and out must be defined\n");
-        return AVERROR_INVALIDDATA;
-    }
-    if (in < 2 || out < 2 ||
-        in  > MAX_LEVEL*MAX_LEVEL*MAX_LEVEL ||
-        out > MAX_LEVEL*MAX_LEVEL*MAX_LEVEL) {
-        av_log(ctx, AV_LOG_ERROR, "invalid in (%d) or out (%d)\n", in, out);
-        return AVERROR_INVALIDDATA;
-    }
-    for (size = 1; size*size*size < in; size++);
-    lut3d->lutsize = size;
-    size2 = size * size;
-
-    ret = allocate_3dlut(ctx, size, 0);
-    if (ret < 0)
-        return ret;
-
-    scale = 1. / (out - 1);
-
-    for (k = 0; k < size; k++) {
-        for (j = 0; j < size; j++) {
-            for (i = 0; i < size; i++) {
-                struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i];
-                float val[3];
-
-                NEXT_LINE(0);
-                if (av_sscanf(line, "%f %f %f", val, val + 1, val + 2) != 3)
-                    return AVERROR_INVALIDDATA;
-                vec->r = val[rgb_map[0]] * scale;
-                vec->g = val[rgb_map[1]] * scale;
-                vec->b = val[rgb_map[2]] * scale;
-            }
-        }
-    }
-    return 0;
-}
-
-static int nearest_sample_index(float *data, float x, int low, int hi)
-{
-    int mid;
-    if (x < data[low])
-        return low;
-
-    if (x > data[hi])
-        return hi;
-
-    for (;;) {
-        av_assert0(x >= data[low]);
-        av_assert0(x <= data[hi]);
-        av_assert0((hi-low) > 0);
-
-        if (hi - low == 1)
-            return low;
-
-        mid = (low + hi) / 2;
-
-        if (x < data[mid])
-            hi = mid;
-        else
-            low = mid;
-    }
-
-    return 0;
-}
-
-#define NEXT_FLOAT_OR_GOTO(value, label)                    \
-    if (!fget_next_word(line, sizeof(line) ,f)) {           \
-        ret = AVERROR_INVALIDDATA;                          \
-        goto label;                                         \
-    }                                                       \
-    if (av_sscanf(line, "%f", &value) != 1) {               \
-        ret = AVERROR_INVALIDDATA;                          \
-        goto label;                                         \
-    }
-
-static int parse_cinespace(AVFilterContext *ctx, FILE *f)
-{
-    LUT3DContext *lut3d = ctx->priv;
-    char line[MAX_LINE_SIZE];
-    float in_min[3]  = {0.0, 0.0, 0.0};
-    float in_max[3]  = {1.0, 1.0, 1.0};
-    float out_min[3] = {0.0, 0.0, 0.0};
-    float out_max[3] = {1.0, 1.0, 1.0};
-    int inside_metadata = 0, size, size2;
-    int prelut = 0;
-    int ret = 0;
-
-    int prelut_sizes[3] = {0, 0, 0};
-    float *in_prelut[3]  = {NULL, NULL, NULL};
-    float *out_prelut[3] = {NULL, NULL, NULL};
-
-    NEXT_LINE_OR_GOTO(skip_line(line), end);
-    if (strncmp(line, "CSPLUTV100", 10)) {
-        av_log(ctx, AV_LOG_ERROR, "Not cineSpace LUT format\n");
-        ret = AVERROR(EINVAL);
-        goto end;
-    }
-
-    NEXT_LINE_OR_GOTO(skip_line(line), end);
-    if (strncmp(line, "3D", 2)) {
-        av_log(ctx, AV_LOG_ERROR, "Not 3D LUT format\n");
-        ret = AVERROR(EINVAL);
-        goto end;
-    }
-
-    while (1) {
-        NEXT_LINE_OR_GOTO(skip_line(line), end);
-
-        if (!strncmp(line, "BEGIN METADATA", 14)) {
-            inside_metadata = 1;
-            continue;
-        }
-        if (!strncmp(line, "END METADATA", 12)) {
-            inside_metadata = 0;
-            continue;
-        }
-        if (inside_metadata == 0) {
-            int size_r, size_g, size_b;
-
-            for (int i = 0; i < 3; i++) {
-                int npoints = strtol(line, NULL, 0);
-
-                if (npoints > 2) {
-                    float v,last;
-
-                    if (npoints > PRELUT_SIZE) {
-                        av_log(ctx, AV_LOG_ERROR, "Prelut size too large.\n");
-                        ret = AVERROR_INVALIDDATA;
-                        goto end;
-                    }
-
-                    if (in_prelut[i] || out_prelut[i]) {
-                        av_log(ctx, AV_LOG_ERROR, "Invalid file has multiple preluts.\n");
-                        ret = AVERROR_INVALIDDATA;
-                        goto end;
-                    }
-
-                    in_prelut[i]  = (float*)av_malloc(npoints * sizeof(float));
-                    out_prelut[i] = (float*)av_malloc(npoints * sizeof(float));
-                    if (!in_prelut[i] || !out_prelut[i]) {
-                        ret = AVERROR(ENOMEM);
-                        goto end;
-                    }
-
-                    prelut_sizes[i] = npoints;
-                    in_min[i] = FLT_MAX;
-                    in_max[i] = -FLT_MAX;
-                    out_min[i] = FLT_MAX;
-                    out_max[i] = -FLT_MAX;
-
-                    for (int j = 0; j < npoints; j++) {
-                        NEXT_FLOAT_OR_GOTO(v, end)
-                        in_min[i] = FFMIN(in_min[i], v);
-                        in_max[i] = FFMAX(in_max[i], v);
-                        in_prelut[i][j] = v;
-                        if (j > 0 && v < last) {
-                            av_log(ctx, AV_LOG_ERROR, "Invalid file, non increasing prelut.\n");
-                            ret = AVERROR(ENOMEM);
-                            goto end;
-                        }
-                        last = v;
-                    }
-
-                    for (int j = 0; j < npoints; j++) {
-                        NEXT_FLOAT_OR_GOTO(v, end)
-                        out_min[i] = FFMIN(out_min[i], v);
-                        out_max[i] = FFMAX(out_max[i], v);
-                        out_prelut[i][j] = v;
-                    }
-
-                } else if (npoints == 2)  {
-                    NEXT_LINE_OR_GOTO(skip_line(line), end);
-                    if (av_sscanf(line, "%f %f", &in_min[i], &in_max[i]) != 2) {
-                        ret = AVERROR_INVALIDDATA;
-                        goto end;
-                    }
-                    NEXT_LINE_OR_GOTO(skip_line(line), end);
-                    if (av_sscanf(line, "%f %f", &out_min[i], &out_max[i]) != 2) {
-                        ret = AVERROR_INVALIDDATA;
-                        goto end;
-                    }
-
-                } else {
-                    av_log(ctx, AV_LOG_ERROR, "Unsupported number of pre-lut points.\n");
-                    ret = AVERROR_PATCHWELCOME;
-                    goto end;
-                }
-
-                NEXT_LINE_OR_GOTO(skip_line(line), end);
-            }
-
-            if (av_sscanf(line, "%d %d %d", &size_r, &size_g, &size_b) != 3) {
-                ret = AVERROR(EINVAL);
-                goto end;
-            }
-            if (size_r != size_g || size_r != size_b) {
-                av_log(ctx, AV_LOG_ERROR, "Unsupported size combination: %dx%dx%d.\n", size_r, size_g, size_b);
-                ret = AVERROR_PATCHWELCOME;
-                goto end;
-            }
-
-            size = size_r;
-            size2 = size * size;
-
-            if (prelut_sizes[0] && prelut_sizes[1] && prelut_sizes[2])
-                prelut = 1;
-
-            ret = allocate_3dlut(ctx, size, prelut);
-            if (ret < 0)
-                return ret;
-
-            for (int k = 0; k < size; k++) {
-                for (int j = 0; j < size; j++) {
-                    for (int i = 0; i < size; i++) {
-                        struct rgbvec *vec = &lut3d->lut[i * size2 + j * size + k];
-
-                        NEXT_LINE_OR_GOTO(skip_line(line), end);
-                        if (av_sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3) {
-                            ret = AVERROR_INVALIDDATA;
-                            goto end;
-                        }
-
-                        vec->r *= out_max[0] - out_min[0];
-                        vec->g *= out_max[1] - out_min[1];
-                        vec->b *= out_max[2] - out_min[2];
-                    }
-                }
-            }
-
-            break;
-        }
-    }
-
-    if (prelut) {
-        for (int c = 0; c < 3; c++) {
-
-            lut3d->prelut.min[c] = in_min[c];
-            lut3d->prelut.max[c] = in_max[c];
-            lut3d->prelut.scale[c] =  (1.0f / (float)(in_max[c] - in_min[c])) * (lut3d->prelut.size - 1);
-
-            for (int i = 0; i < lut3d->prelut.size; ++i) {
-                float mix = (float) i / (float)(lut3d->prelut.size - 1);
-                float x = lerpf(in_min[c], in_max[c], mix), a, b;
-
-                int idx = nearest_sample_index(in_prelut[c], x, 0, prelut_sizes[c]-1);
-                av_assert0(idx + 1 < prelut_sizes[c]);
-
-                a   = out_prelut[c][idx + 0];
-                b   = out_prelut[c][idx + 1];
-                mix = x - in_prelut[c][idx];
-
-                lut3d->prelut.lut[c][i] = sanitizef(lerpf(a, b, mix));
-            }
-        }
-        lut3d->scale.r = 1.00f;
-        lut3d->scale.g = 1.00f;
-        lut3d->scale.b = 1.00f;
-
-    } else {
-        lut3d->scale.r = av_clipf(1. / (in_max[0] - in_min[0]), 0.f, 1.f);
-        lut3d->scale.g = av_clipf(1. / (in_max[1] - in_min[1]), 0.f, 1.f);
-        lut3d->scale.b = av_clipf(1. / (in_max[2] - in_min[2]), 0.f, 1.f);
-    }
-
-end:
-    for (int c = 0; c < 3; c++) {
-        av_freep(&in_prelut[c]);
-        av_freep(&out_prelut[c]);
-    }
-    return ret;
-}
-
-static int set_identity_matrix(AVFilterContext *ctx, int size)
-{
-    LUT3DContext *lut3d = ctx->priv;
-    int ret, i, j, k;
-    const int size2 = size * size;
-    const float c = 1. / (size - 1);
-
-    ret = allocate_3dlut(ctx, size, 0);
-    if (ret < 0)
-        return ret;
-
-    for (k = 0; k < size; k++) {
-        for (j = 0; j < size; j++) {
-            for (i = 0; i < size; i++) {
-                struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i];
-                vec->r = k * c;
-                vec->g = j * c;
-                vec->b = i * c;
-            }
-        }
-    }
-
-    return 0;
-}
-
 static const enum AVPixelFormat pix_fmts[] = {
     AV_PIX_FMT_RGB24,  AV_PIX_FMT_BGR24,
     AV_PIX_FMT_RGBA,   AV_PIX_FMT_BGRA,
@@ -1230,66 +698,14 @@ AVFILTER_DEFINE_CLASS_EXT(lut3d, "lut3d", lut3d_haldclut_options);
 
 static av_cold int lut3d_init(AVFilterContext *ctx)
 {
-    int ret;
-    FILE *f;
-    const char *ext;
     LUT3DContext *lut3d = ctx->priv;
-
-    lut3d->scale.r = lut3d->scale.g = lut3d->scale.b = 1.f;
-
-    if (!lut3d->file) {
-        return set_identity_matrix(ctx, 32);
-    }
-
-    f = avpriv_fopen_utf8(lut3d->file, "r");
-    if (!f) {
-        ret = AVERROR(errno);
-        av_log(ctx, AV_LOG_ERROR, "%s: %s\n", lut3d->file, av_err2str(ret));
-        return ret;
-    }
-
-    ext = strrchr(lut3d->file, '.');
-    if (!ext) {
-        av_log(ctx, AV_LOG_ERROR, "Unable to guess the format from the extension\n");
-        ret = AVERROR_INVALIDDATA;
-        goto end;
-    }
-    ext++;
-
-    if (!av_strcasecmp(ext, "dat")) {
-        ret = parse_dat(ctx, f);
-    } else if (!av_strcasecmp(ext, "3dl")) {
-        ret = parse_3dl(ctx, f);
-    } else if (!av_strcasecmp(ext, "cube")) {
-        ret = parse_cube(ctx, f);
-    } else if (!av_strcasecmp(ext, "m3d")) {
-        ret = parse_m3d(ctx, f);
-    } else if (!av_strcasecmp(ext, "csp")) {
-        ret = parse_cinespace(ctx, f);
-    } else {
-        av_log(ctx, AV_LOG_ERROR, "Unrecognized '.%s' file type\n", ext);
-        ret = AVERROR(EINVAL);
-    }
-
-    if (!ret && !lut3d->lutsize) {
-        av_log(ctx, AV_LOG_ERROR, "3D LUT is empty\n");
-        ret = AVERROR_INVALIDDATA;
-    }
-
-end:
-    fclose(f);
-    return ret;
+    return ff_lut3d_init(ctx, lut3d);
 }
 
 static av_cold void lut3d_uninit(AVFilterContext *ctx)
 {
     LUT3DContext *lut3d = ctx->priv;
-    int i;
-    av_freep(&lut3d->lut);
-
-    for (i = 0; i < 3; i++) {
-        av_freep(&lut3d->prelut.lut[i]);
-    }
+    ff_lut3d_uninit(lut3d);
 }
 
 static const AVFilterPad lut3d_inputs[] = {
@@ -1499,7 +915,7 @@ static int config_clut(AVFilterLink *inlink)
         return AVERROR(EINVAL);
     }
 
-    return allocate_3dlut(ctx, level, 0);
+    return ff_allocate_3dlut(ctx, lut3d, level, 0);
 }
 
 static int update_apply_clut(FFFrameSync *fs)
diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c
index 5356103e00..0a03848d7d 100644
--- a/libavfilter/vf_vpp_qsv.c
+++ b/libavfilter/vf_vpp_qsv.c
@@ -37,6 +37,7 @@
 #include "internal.h"
 #include "avfilter.h"
 #include "filters.h"
+#include "lut3d.h"
 
 #include "qsvvpp.h"
 #include "transpose.h"
@@ -67,6 +68,13 @@ typedef struct VPPContext{
     /** HDR parameters attached on the input frame */
     mfxExtMasteringDisplayColourVolume mdcv_conf;
     mfxExtContentLightLevelInfo clli_conf;
+
+    /** LUT parameters attached on the input frame */
+    mfxExtVPP3DLut lut3d_conf;
+    LUT3DContext lut3d;
+    mfxU16* lut3d_r;
+    mfxU16* lut3d_g;
+    mfxU16* lut3d_b;
 #endif
 
     /**
@@ -388,6 +396,75 @@ static mfxStatus get_mfx_version(const AVFilterContext *ctx, mfxVersion *mfx_ver
     return MFXQueryVersion(device_hwctx->session, mfx_version);
 }
 
+#if QSV_ONEVPL
+// Create 3D LUT surface using system memory.
+// Reference https://intel.github.io/libvpl/latest/programming_guide/VPL_prg_vpp.html#video-processing-3dlut
+static void init_3dlut_surface(AVFilterContext *ctx)
+{
+    VPPContext *vpp = ctx->priv;
+    LUT3DContext *lut3d = &vpp->lut3d;
+    mfxExtVPP3DLut *lut3d_conf = &vpp->lut3d_conf;
+    int r, g, b, idx;
+    struct rgbvec *v = NULL;
+    int lut_size = lut3d->lutsize;
+    int lut_size2 = lut_size * lut_size;
+    int lut_size3 = lut_size * lut_size2;
+
+    av_log(ctx, AV_LOG_DEBUG, "create 3D LUT surface with system memory, LUT size: %d.\n", lut_size);
+
+    vpp->lut3d_r = av_calloc(lut_size3, sizeof(mfxU16));
+    vpp->lut3d_g = av_calloc(lut_size3, sizeof(mfxU16));
+    vpp->lut3d_b = av_calloc(lut_size3, sizeof(mfxU16));
+
+    // Copy 3D LUT to system memory surface.
+    for (r = 0, idx = 0; r < lut_size; ++r) {
+        for (g = 0; g < lut_size; ++g) {
+            for (b = 0; b < lut_size; ++b) {
+                v = &lut3d->lut[r * lut_size2 + g * lut_size + b];
+
+                vpp->lut3d_r[idx] = (mfxU16)(v->r * UINT16_MAX);
+                vpp->lut3d_g[idx] = (mfxU16)(v->g * UINT16_MAX);
+                vpp->lut3d_b[idx] = (mfxU16)(v->b * UINT16_MAX);
+                idx++;
+            }
+        }
+    }
+
+    memset(lut3d_conf, 0, sizeof(*lut3d_conf));
+    lut3d_conf->Header.BufferId = MFX_EXTBUFF_VPP_3DLUT;
+    lut3d_conf->Header.BufferSz = sizeof(*lut3d_conf);
+    lut3d_conf->ChannelMapping = MFX_3DLUT_CHANNEL_MAPPING_RGB_RGB;
+    lut3d_conf->BufferType = MFX_RESOURCE_SYSTEM_SURFACE;
+
+    lut3d_conf->SystemBuffer.Channel[0].DataType = MFX_DATA_TYPE_U16;
+    lut3d_conf->SystemBuffer.Channel[0].Size = lut_size;
+    lut3d_conf->SystemBuffer.Channel[0].Data16 = vpp->lut3d_r;
+
+    lut3d_conf->SystemBuffer.Channel[1].DataType = MFX_DATA_TYPE_U16;
+    lut3d_conf->SystemBuffer.Channel[1].Size = lut_size;
+    lut3d_conf->SystemBuffer.Channel[1].Data16 = vpp->lut3d_g;
+
+    lut3d_conf->SystemBuffer.Channel[2].DataType = MFX_DATA_TYPE_U16;
+    lut3d_conf->SystemBuffer.Channel[2].Size = lut_size;
+    lut3d_conf->SystemBuffer.Channel[2].Data16 = vpp->lut3d_b;
+}
+
+static void uninit_3dlut_surface(AVFilterContext *ctx) {
+    VPPContext *vpp = ctx->priv;
+    mfxExtVPP3DLut *lut3d_conf = &vpp->lut3d_conf;
+
+    if (lut3d_conf->Header.BufferId == MFX_EXTBUFF_VPP_3DLUT) {
+        av_free(vpp->lut3d_r);
+        av_free(vpp->lut3d_g);
+        av_free(vpp->lut3d_b);
+        vpp->lut3d_r = NULL;
+        vpp->lut3d_g = NULL;
+        vpp->lut3d_b = NULL;
+    }
+    memset(lut3d_conf, 0, sizeof(*lut3d_conf));
+}
+#endif // QSV_ONEVPL
+
 static int vpp_set_frame_ext_params(AVFilterContext *ctx, const AVFrame *in, AVFrame *out,  QSVVPPFrameParam *fp)
 {
 #if QSV_ONEVPL
@@ -499,6 +576,10 @@ static int vpp_set_frame_ext_params(AVFilterContext *ctx, const AVFrame *in, AVF
     outvsi_conf.MatrixCoefficients       = (out->colorspace == AVCOL_SPC_UNSPECIFIED) ? AVCOL_SPC_BT709 : out->colorspace;
     outvsi_conf.ColourDescriptionPresent = 1;
 
+    // 3D LUT does not depend on in/out frame, so initialize just once.
+    if ((vpp->lut3d_conf.Header.BufferId == 0) && vpp->lut3d.file)
+        init_3dlut_surface(ctx);
+
     if (memcmp(&vpp->invsi_conf, &invsi_conf, sizeof(mfxExtVideoSignalInfo)) ||
         memcmp(&vpp->mdcv_conf, &mdcv_conf, sizeof(mfxExtMasteringDisplayColourVolume)) ||
         memcmp(&vpp->clli_conf, &clli_conf, sizeof(mfxExtContentLightLevelInfo)) ||
@@ -516,6 +597,9 @@ static int vpp_set_frame_ext_params(AVFilterContext *ctx, const AVFrame *in, AVF
         vpp->clli_conf                     = clli_conf;
         if (clli_conf.Header.BufferId)
             fp->ext_buf[fp->num_ext_buf++] = (mfxExtBuffer*)&vpp->clli_conf;
+
+        if (vpp->lut3d_conf.Header.BufferId)
+            fp->ext_buf[fp->num_ext_buf++] = (mfxExtBuffer *)&vpp->lut3d_conf;
     }
 #endif
 
@@ -703,6 +787,26 @@ static int config_output(AVFilterLink *outlink)
 #undef INIT_MFX_EXTBUF
 #undef SET_MFX_PARAM_FIELD
 
+#if QSV_ONEVPL
+    if (vpp->lut3d.file) {
+        if (QSV_RUNTIME_VERSION_ATLEAST(mfx_version, 2, 11)) {
+            // For oneVPL-intel-gpu, lowest version that works for applying 3D LUT
+            // is 24.1.1 which has API version 2.10.
+            // By requiring runtime version 2.11, we ensure using a working version.
+            int ret;
+            av_log(ctx, AV_LOG_INFO, "Load 3D LUT from file: %s\n", vpp->lut3d.file);
+            ret = ff_lut3d_init(ctx, &vpp->lut3d);
+            if (ret != 0)
+                return ret;
+        } else {
+            av_free(vpp->lut3d.file);
+            vpp->lut3d.file = NULL;
+            av_log(ctx, AV_LOG_WARNING, "The QSV VPP 3D LUT processing option "
+                   "lut3d_file is not supported with this MSDK version.\n");
+        }
+    }
+#endif
+
     if (vpp->use_frc || vpp->use_crop || vpp->deinterlace || vpp->denoise ||
         vpp->detail || vpp->procamp || vpp->rotate || vpp->hflip ||
         inlink->w != outlink->w || inlink->h != outlink->h || in_format != vpp->out_format ||
@@ -711,6 +815,9 @@ static int config_output(AVFilterLink *outlink)
         vpp->color_transfer != AVCOL_TRC_UNSPECIFIED ||
         vpp->color_matrix != AVCOL_SPC_UNSPECIFIED ||
         vpp->tonemap ||
+#if QSV_ONEVPL
+        vpp->lut3d.file ||
+#endif
         !vpp->has_passthrough)
         return ff_qsvvpp_init(ctx, &param);
     else {
@@ -801,6 +908,14 @@ eof:
 
 static av_cold void vpp_uninit(AVFilterContext *ctx)
 {
+#if QSV_ONEVPL
+    VPPContext *vpp = ctx->priv;
+
+    uninit_3dlut_surface(ctx);
+    if (vpp->lut3d.file)
+        ff_lut3d_uninit(&vpp->lut3d);
+#endif
+
     ff_qsvvpp_close(ctx);
 }
 
@@ -924,7 +1039,9 @@ static const AVOption vpp_options[] = {
       OFFSET(color_transfer_str),  AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
 
     {"tonemap", "Perform tonemapping (0=disable tonemapping, 1=perform tonemapping if the input has HDR metadata)", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, .flags = FLAGS},
-
+#if QSV_ONEVPL
+    { "lut3d_file", "Load and apply 3D LUT file", OFFSET(lut3d) + offsetof(LUT3DContext, file), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
+#endif
     { NULL }
 };
 
-- 
2.43.0