[FFmpeg-devel] [PATCH 12/15] avfilter/palettegen: base split decision on a perceptual model

Clément Bœsch u at pkh.me
Sat Nov 5 17:26:14 EET 2022


Similar to the change in paletteuse, we rely on a perceptual model to
decide how and where to split the box.
---
 libavfilter/Makefile               |  2 +-
 libavfilter/vf_palettegen.c        | 79 ++++++++++++++++--------------
 tests/ref/fate/filter-palettegen-1 |  2 +-
 tests/ref/fate/filter-palettegen-2 |  2 +-
 4 files changed, 44 insertions(+), 41 deletions(-)

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index e6b6d59d2d..0a31b76c6a 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -401,7 +401,7 @@ OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER)         += vf_overlay_vulkan.o vulkan.o vul
 OBJS-$(CONFIG_OWDENOISE_FILTER)              += vf_owdenoise.o
 OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o
 OBJS-$(CONFIG_PAD_OPENCL_FILTER)             += vf_pad_opencl.o opencl.o opencl/pad.o
-OBJS-$(CONFIG_PALETTEGEN_FILTER)             += vf_palettegen.o
+OBJS-$(CONFIG_PALETTEGEN_FILTER)             += vf_palettegen.o palette.o
 OBJS-$(CONFIG_PALETTEUSE_FILTER)             += vf_paletteuse.o framesync.o palette.o
 OBJS-$(CONFIG_PERMS_FILTER)                  += f_perms.o
 OBJS-$(CONFIG_PERSPECTIVE_FILTER)            += vf_perspective.o
diff --git a/libavfilter/vf_palettegen.c b/libavfilter/vf_palettegen.c
index b8e4463539..4c2bcba7f7 100644
--- a/libavfilter/vf_palettegen.c
+++ b/libavfilter/vf_palettegen.c
@@ -23,6 +23,8 @@
  * Generate one palette for a whole video stream.
  */
 
+#include <float.h>
+
 #include "libavutil/avassert.h"
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
@@ -35,13 +37,14 @@
 /* Reference a color and how much it's used */
 struct color_ref {
     uint32_t color;
+    struct Lab lab;
     uint64_t count;
 };
 
 /* Store a range of colors */
 struct range_box {
     uint32_t color;     // average color
-    int64_t variance;   // overall variance of the box (how much the colors are spread)
+    double variance;    // overall variance of the box (how much the colors are spread)
     int start;          // index in PaletteGenContext->refs
     int len;            // number of referenced colors
     int sorted_by;      // whether range of colors is sorted by red (0), green (1) or blue (2)
@@ -109,20 +112,19 @@ static int query_formats(AVFilterContext *ctx)
 
 typedef int (*cmp_func)(const void *, const void *);
 
-#define DECLARE_CMP_FUNC(name, pos)                     \
+#define DECLARE_CMP_FUNC(name)                          \
 static int cmp_##name(const void *pa, const void *pb)   \
 {                                                       \
     const struct color_ref * const *a = pa;             \
     const struct color_ref * const *b = pb;             \
-    return   (int)((*a)->color >> (8 * (2 - (pos))) & 0xff)  \
-           - (int)((*b)->color >> (8 * (2 - (pos))) & 0xff); \
+    return FFDIFFSIGN((*a)->lab.name, (*b)->lab.name);  \
 }
 
-DECLARE_CMP_FUNC(r, 0)
-DECLARE_CMP_FUNC(g, 1)
-DECLARE_CMP_FUNC(b, 2)
+DECLARE_CMP_FUNC(L)
+DECLARE_CMP_FUNC(a)
+DECLARE_CMP_FUNC(b)
 
-static const cmp_func cmp_funcs[] = {cmp_r, cmp_g, cmp_b};
+static const cmp_func cmp_funcs[] = {cmp_L, cmp_a, cmp_b};
 
 /**
  * Simple color comparison for sorting the final palette
@@ -134,19 +136,19 @@ static int cmp_color(const void *a, const void *b)
     return FFDIFFSIGN(box1->color , box2->color);
 }
 
-static av_always_inline int diff(const uint32_t a, const uint32_t b)
+static av_always_inline float diff(const uint32_t a, const uint32_t b)
 {
-    const uint8_t c1[] = {a >> 16 & 0xff, a >> 8 & 0xff, a & 0xff};
-    const uint8_t c2[] = {b >> 16 & 0xff, b >> 8 & 0xff, b & 0xff};
-    const int dr = c1[0] - c2[0];
-    const int dg = c1[1] - c2[1];
-    const int db = c1[2] - c2[2];
-    return dr*dr + dg*dg + db*db;
+    const struct Lab lab0 = ff_srgb_u8_to_oklab(a);
+    const struct Lab lab1 = ff_srgb_u8_to_oklab(b);
+    const float dL = lab0.L - lab1.L;
+    const float da = lab0.a - lab1.a;
+    const float db = lab0.b - lab1.b;
+    return dL*dL + da*da + db*db;
 }
 
 static void compute_box_variance(PaletteGenContext *s, struct range_box *box)
 {
-    int64_t variance = 0;
+    double variance = 0.0;
 
     for (int i = 0; i < box->len; i++) {
         const struct color_ref *ref = s->refs[box->start + i];
@@ -179,7 +181,7 @@ static void compute_box_variance(PaletteGenContext *s, struct range_box *box)
 static int get_next_box_id_to_split(PaletteGenContext *s)
 {
     int box_id, best_box_id = -1;
-    int64_t max_variance = -1;
+    double max_variance = -1.0;
 
     if (s->nb_boxes == s->max_colors - s->reserve_transparent)
         return -1;
@@ -188,14 +190,14 @@ static int get_next_box_id_to_split(PaletteGenContext *s)
         struct range_box *box = &s->boxes[box_id];
 
         if (s->boxes[box_id].len >= 2) {
-            if (box->variance == -1)
+            if (box->variance == -1.0)
                 compute_box_variance(s, box);
             if (box->variance > max_variance) {
                 best_box_id = box_id;
                 max_variance = box->variance;
             }
         } else {
-            box->variance = -1;
+            box->variance = -1.0;
         }
     }
     return best_box_id;
@@ -245,8 +247,8 @@ static void split_box(PaletteGenContext *s, struct range_box *box, int n)
 
     box->color     = get_avg_color(s->refs, box);
     new_box->color = get_avg_color(s->refs, new_box);
-    box->variance     = -1;
-    new_box->variance = -1;
+    box->variance     = -1.0;
+    new_box->variance = -1.0;
 }
 
 /**
@@ -343,39 +345,39 @@ static AVFrame *get_palette_frame(AVFilterContext *ctx)
     box->len = s->nb_refs;
     box->sorted_by = -1;
     box->color = get_avg_color(s->refs, box);
-    box->variance = -1;
+    box->variance = -1.0;
     s->nb_boxes = 1;
 
     while (box && box->len > 1) {
-        int i, rr, gr, br, longest;
+        int i, longest;
+        double Lr, ar, br;
         uint64_t median, box_weight = 0;
 
         /* compute the box weight (sum all the weights of the colors in the
          * range) and its boundings */
-        uint8_t min[3] = {0xff, 0xff, 0xff};
-        uint8_t max[3] = {0x00, 0x00, 0x00};
+        float min[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
+        float max[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
         for (i = box->start; i < box->start + box->len; i++) {
             const struct color_ref *ref = s->refs[i];
-            const uint32_t rgb = ref->color;
-            const uint8_t r = rgb >> 16 & 0xff, g = rgb >> 8 & 0xff, b = rgb & 0xff;
-            min[0] = FFMIN(r, min[0]), max[0] = FFMAX(r, max[0]);
-            min[1] = FFMIN(g, min[1]), max[1] = FFMAX(g, max[1]);
-            min[2] = FFMIN(b, min[2]), max[2] = FFMAX(b, max[2]);
+            const struct Lab lab = ref->lab;
+            min[0] = FFMIN(lab.L, min[0]), max[0] = FFMAX(lab.L, max[0]);
+            min[1] = FFMIN(lab.a, min[1]), max[1] = FFMAX(lab.a, max[1]);
+            min[2] = FFMIN(lab.b, min[2]), max[2] = FFMAX(lab.b, max[2]);
             box_weight += ref->count;
         }
 
         /* define the axis to sort by according to the widest range of colors */
-        rr = max[0] - min[0];
-        gr = max[1] - min[1];
+        Lr = max[0] - min[0];
+        ar = max[1] - min[1];
         br = max[2] - min[2];
-        longest = 1; // pick green by default (the color the eye is the most sensitive to)
-        if (br >= rr && br >= gr) longest = 2;
-        if (rr >= gr && rr >= br) longest = 0;
-        if (gr >= rr && gr >= br) longest = 1; // prefer green again
+        longest = 0;
+        if (br >= Lr && br >= ar) longest = 2;
+        if (ar >= Lr && ar >= br) longest = 1;
+        if (Lr >= ar && Lr >= br) longest = 0;
 
-        ff_dlog(ctx, "box #%02X [%6d..%-6d] (%6d) w:%-6"PRIu64" ranges:[%2x %2x %2x] sort by %c (already sorted:%c) ",
+        ff_dlog(ctx, "box #%02X [%6d..%-6d] (%6d) w:%-6"PRIu64" ranges:[%.3f %.3f %.3f] sort by %c (already sorted:%c) ",
                 box_id, box->start, box->start + box->len - 1, box->len, box_weight,
-                rr, gr, br, "rgb"[longest], box->sorted_by == longest ? 'y':'n');
+                Lr, ar, br, "Lab"[longest], box->sorted_by == longest ? 'y':'n');
 
         /* sort the range by its longest axis if it's not already sorted */
         if (box->sorted_by != longest) {
@@ -449,6 +451,7 @@ static int color_inc(struct hist_node *hist, uint32_t color)
     if (!e)
         return AVERROR(ENOMEM);
     e->color = color;
+    e->lab = ff_srgb_u8_to_oklab(color);
     e->count = 1;
     return 1;
 }
diff --git a/tests/ref/fate/filter-palettegen-1 b/tests/ref/fate/filter-palettegen-1
index df3b714ebb..7b7ce98b76 100644
--- a/tests/ref/fate/filter-palettegen-1
+++ b/tests/ref/fate/filter-palettegen-1
@@ -3,4 +3,4 @@
 #codec_id 0: rawvideo
 #dimensions 0: 16x16
 #sar 0: 1/1
-0,          0,          0,        1,     1024, 0x69ec37aa
+0,          0,          0,        1,     1024, 0xf1fb64c1
diff --git a/tests/ref/fate/filter-palettegen-2 b/tests/ref/fate/filter-palettegen-2
index 08320a8359..b856a79273 100644
--- a/tests/ref/fate/filter-palettegen-2
+++ b/tests/ref/fate/filter-palettegen-2
@@ -3,4 +3,4 @@
 #codec_id 0: rawvideo
 #dimensions 0: 16x16
 #sar 0: 1/1
-0,          0,          0,        1,     1024, 0x76078b2e
+0,          0,          0,        1,     1024, 0xe84a671a
-- 
2.38.1



More information about the ffmpeg-devel mailing list