[FFmpeg-devel] [PATCH 12/15] avfilter/palettegen: base split decision on a perceptual model
Andreas Rheinhardt
andreas.rheinhardt at outlook.com
Sat Nov 5 21:07:42 EET 2022
Clément Bœsch:
> Similar to the change in paletteuse, we rely on a perceptual model to
> decide how and where to split the box.
> ---
> libavfilter/Makefile | 2 +-
> libavfilter/vf_palettegen.c | 79 ++++++++++++++++--------------
> tests/ref/fate/filter-palettegen-1 | 2 +-
> tests/ref/fate/filter-palettegen-2 | 2 +-
> 4 files changed, 44 insertions(+), 41 deletions(-)
>
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index e6b6d59d2d..0a31b76c6a 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -401,7 +401,7 @@ OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER) += vf_overlay_vulkan.o vulkan.o vul
> OBJS-$(CONFIG_OWDENOISE_FILTER) += vf_owdenoise.o
> OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o
> OBJS-$(CONFIG_PAD_OPENCL_FILTER) += vf_pad_opencl.o opencl.o opencl/pad.o
> -OBJS-$(CONFIG_PALETTEGEN_FILTER) += vf_palettegen.o
> +OBJS-$(CONFIG_PALETTEGEN_FILTER) += vf_palettegen.o palette.o
> OBJS-$(CONFIG_PALETTEUSE_FILTER) += vf_paletteuse.o framesync.o palette.o
> OBJS-$(CONFIG_PERMS_FILTER) += f_perms.o
> OBJS-$(CONFIG_PERSPECTIVE_FILTER) += vf_perspective.o
> diff --git a/libavfilter/vf_palettegen.c b/libavfilter/vf_palettegen.c
> index b8e4463539..4c2bcba7f7 100644
> --- a/libavfilter/vf_palettegen.c
> +++ b/libavfilter/vf_palettegen.c
> @@ -23,6 +23,8 @@
> * Generate one palette for a whole video stream.
> */
>
> +#include <float.h>
> +
> #include "libavutil/avassert.h"
> #include "libavutil/internal.h"
> #include "libavutil/opt.h"
> @@ -35,13 +37,14 @@
> /* Reference a color and how much it's used */
> struct color_ref {
> uint32_t color;
> + struct Lab lab;
> uint64_t count;
> };
>
> /* Store a range of colors */
> struct range_box {
> uint32_t color; // average color
> - int64_t variance; // overall variance of the box (how much the colors are spread)
> + double variance; // overall variance of the box (how much the colors are spread)
> int start; // index in PaletteGenContext->refs
> int len; // number of referenced colors
> int sorted_by; // whether range of colors is sorted by red (0), green (1) or blue (2)
> @@ -109,20 +112,19 @@ static int query_formats(AVFilterContext *ctx)
>
> typedef int (*cmp_func)(const void *, const void *);
>
> -#define DECLARE_CMP_FUNC(name, pos) \
> +#define DECLARE_CMP_FUNC(name) \
> static int cmp_##name(const void *pa, const void *pb) \
> { \
> const struct color_ref * const *a = pa; \
> const struct color_ref * const *b = pb; \
> - return (int)((*a)->color >> (8 * (2 - (pos))) & 0xff) \
> - - (int)((*b)->color >> (8 * (2 - (pos))) & 0xff); \
> + return FFDIFFSIGN((*a)->lab.name, (*b)->lab.name); \
> }
>
> -DECLARE_CMP_FUNC(r, 0)
> -DECLARE_CMP_FUNC(g, 1)
> -DECLARE_CMP_FUNC(b, 2)
> +DECLARE_CMP_FUNC(L)
> +DECLARE_CMP_FUNC(a)
> +DECLARE_CMP_FUNC(b)
>
> -static const cmp_func cmp_funcs[] = {cmp_r, cmp_g, cmp_b};
> +static const cmp_func cmp_funcs[] = {cmp_L, cmp_a, cmp_b};
>
> /**
> * Simple color comparison for sorting the final palette
> @@ -134,19 +136,19 @@ static int cmp_color(const void *a, const void *b)
> return FFDIFFSIGN(box1->color , box2->color);
> }
>
> -static av_always_inline int diff(const uint32_t a, const uint32_t b)
> +static av_always_inline float diff(const uint32_t a, const uint32_t b)
> {
> - const uint8_t c1[] = {a >> 16 & 0xff, a >> 8 & 0xff, a & 0xff};
> - const uint8_t c2[] = {b >> 16 & 0xff, b >> 8 & 0xff, b & 0xff};
> - const int dr = c1[0] - c2[0];
> - const int dg = c1[1] - c2[1];
> - const int db = c1[2] - c2[2];
> - return dr*dr + dg*dg + db*db;
> + const struct Lab lab0 = ff_srgb_u8_to_oklab(a);
> + const struct Lab lab1 = ff_srgb_u8_to_oklab(b);
> + const float dL = lab0.L - lab1.L;
> + const float da = lab0.a - lab1.a;
> + const float db = lab0.b - lab1.b;
> + return dL*dL + da*da + db*db;
> }
>
> static void compute_box_variance(PaletteGenContext *s, struct range_box *box)
> {
> - int64_t variance = 0;
> + double variance = 0.0;
>
> for (int i = 0; i < box->len; i++) {
> const struct color_ref *ref = s->refs[box->start + i];
> @@ -179,7 +181,7 @@ static void compute_box_variance(PaletteGenContext *s, struct range_box *box)
> static int get_next_box_id_to_split(PaletteGenContext *s)
> {
> int box_id, best_box_id = -1;
> - int64_t max_variance = -1;
> + double max_variance = -1.0;
>
> if (s->nb_boxes == s->max_colors - s->reserve_transparent)
> return -1;
> @@ -188,14 +190,14 @@ static int get_next_box_id_to_split(PaletteGenContext *s)
> struct range_box *box = &s->boxes[box_id];
>
> if (s->boxes[box_id].len >= 2) {
> - if (box->variance == -1)
> + if (box->variance == -1.0)
> compute_box_variance(s, box);
> if (box->variance > max_variance) {
> best_box_id = box_id;
> max_variance = box->variance;
> }
> } else {
> - box->variance = -1;
> + box->variance = -1.0;
> }
> }
> return best_box_id;
> @@ -245,8 +247,8 @@ static void split_box(PaletteGenContext *s, struct range_box *box, int n)
>
> box->color = get_avg_color(s->refs, box);
> new_box->color = get_avg_color(s->refs, new_box);
> - box->variance = -1;
> - new_box->variance = -1;
> + box->variance = -1.0;
> + new_box->variance = -1.0;
> }
>
> /**
> @@ -343,39 +345,39 @@ static AVFrame *get_palette_frame(AVFilterContext *ctx)
> box->len = s->nb_refs;
> box->sorted_by = -1;
> box->color = get_avg_color(s->refs, box);
> - box->variance = -1;
> + box->variance = -1.0;
> s->nb_boxes = 1;
>
> while (box && box->len > 1) {
> - int i, rr, gr, br, longest;
> + int i, longest;
> + double Lr, ar, br;
> uint64_t median, box_weight = 0;
>
> /* compute the box weight (sum all the weights of the colors in the
> * range) and its boundings */
> - uint8_t min[3] = {0xff, 0xff, 0xff};
> - uint8_t max[3] = {0x00, 0x00, 0x00};
> + float min[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
> + float max[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
> for (i = box->start; i < box->start + box->len; i++) {
> const struct color_ref *ref = s->refs[i];
> - const uint32_t rgb = ref->color;
> - const uint8_t r = rgb >> 16 & 0xff, g = rgb >> 8 & 0xff, b = rgb & 0xff;
> - min[0] = FFMIN(r, min[0]), max[0] = FFMAX(r, max[0]);
> - min[1] = FFMIN(g, min[1]), max[1] = FFMAX(g, max[1]);
> - min[2] = FFMIN(b, min[2]), max[2] = FFMAX(b, max[2]);
> + const struct Lab lab = ref->lab;
> + min[0] = FFMIN(lab.L, min[0]), max[0] = FFMAX(lab.L, max[0]);
> + min[1] = FFMIN(lab.a, min[1]), max[1] = FFMAX(lab.a, max[1]);
> + min[2] = FFMIN(lab.b, min[2]), max[2] = FFMAX(lab.b, max[2]);
> box_weight += ref->count;
> }
>
> /* define the axis to sort by according to the widest range of colors */
> - rr = max[0] - min[0];
> - gr = max[1] - min[1];
> + Lr = max[0] - min[0];
> + ar = max[1] - min[1];
> br = max[2] - min[2];
> - longest = 1; // pick green by default (the color the eye is the most sensitive to)
> - if (br >= rr && br >= gr) longest = 2;
> - if (rr >= gr && rr >= br) longest = 0;
> - if (gr >= rr && gr >= br) longest = 1; // prefer green again
> + longest = 0;
> + if (br >= Lr && br >= ar) longest = 2;
> + if (ar >= Lr && ar >= br) longest = 1;
> + if (Lr >= ar && Lr >= br) longest = 0;
>
> - ff_dlog(ctx, "box #%02X [%6d..%-6d] (%6d) w:%-6"PRIu64" ranges:[%2x %2x %2x] sort by %c (already sorted:%c) ",
> + ff_dlog(ctx, "box #%02X [%6d..%-6d] (%6d) w:%-6"PRIu64" ranges:[%.3f %.3f %.3f] sort by %c (already sorted:%c) ",
> box_id, box->start, box->start + box->len - 1, box->len, box_weight,
> - rr, gr, br, "rgb"[longest], box->sorted_by == longest ? 'y':'n');
> + Lr, ar, br, "Lab"[longest], box->sorted_by == longest ? 'y':'n');
>
> /* sort the range by its longest axis if it's not already sorted */
> if (box->sorted_by != longest) {
> @@ -449,6 +451,7 @@ static int color_inc(struct hist_node *hist, uint32_t color)
> if (!e)
> return AVERROR(ENOMEM);
> e->color = color;
> + e->lab = ff_srgb_u8_to_oklab(color);
> e->count = 1;
> return 1;
> }
> diff --git a/tests/ref/fate/filter-palettegen-1 b/tests/ref/fate/filter-palettegen-1
> index df3b714ebb..7b7ce98b76 100644
> --- a/tests/ref/fate/filter-palettegen-1
> +++ b/tests/ref/fate/filter-palettegen-1
> @@ -3,4 +3,4 @@
> #codec_id 0: rawvideo
> #dimensions 0: 16x16
> #sar 0: 1/1
> -0, 0, 0, 1, 1024, 0x69ec37aa
> +0, 0, 0, 1, 1024, 0xf1fb64c1
> diff --git a/tests/ref/fate/filter-palettegen-2 b/tests/ref/fate/filter-palettegen-2
> index 08320a8359..b856a79273 100644
> --- a/tests/ref/fate/filter-palettegen-2
> +++ b/tests/ref/fate/filter-palettegen-2
> @@ -3,4 +3,4 @@
> #codec_id 0: rawvideo
> #dimensions 0: 16x16
> #sar 0: 1/1
> -0, 0, 0, 1, 1024, 0x76078b2e
> +0, 0, 0, 1, 1024, 0xe84a671a
You are adding floating point to places where there was no floating
point before (some other patches of this patchset do the same). Is this
still bitexact across all supported arches?
https://patchwork.ffmpeg.org/project/ffmpeg/patch/20221105152617.1809282-13-u@pkh.me/
makes me believe that the answer is no.
- Andreas
More information about the ffmpeg-devel
mailing list