[FFmpeg-devel] [PATCH] swscale: use 16-bit intermediate precision for RGB/XYZ conversion

Sat Dec 21 11:50:52 EET 2024

On Mon, 16 Dec 2024 14:56:07 +0100 Niklas Haas <ffmpeg at haasn.xyz> wrote:
> From: Niklas Haas <git at haasn.dev>
>
> The current logic uses 12-bit linear light math, which is woefully insufficient
> and leads to nasty postarization artifacts. This patch simply switches the
> internal logic to 16-bit precision.
>
> This raises the memory requirement of these tables from 32 kB to 272 kB.

Will merge this one (with the FATE changes) in a few days if there are no
further objections.

Should we try and lazily allocate these tables to save the ~272 kB of static
memory or is that considered an insignificant amount?

>
> Fixes: ticket 4829
> Signed-off-by: Niklas Haas <git at haasn.dev>
> Sponsored-by: Sovereign Tech Fund
> ---
>  libswscale/swscale.c          | 16 ++++++++--------
>  libswscale/swscale_internal.h |  8 ++++----
>  libswscale/utils.c            | 19 ++++++++++++-------
>  3 files changed, 24 insertions(+), 19 deletions(-)
>
> diff --git a/libswscale/swscale.c b/libswscale/swscale.c
> index 96634acfd6..da3a082905 100644
> --- a/libswscale/swscale.c
> +++ b/libswscale/swscale.c
> @@ -773,10 +773,10 @@ void ff_xyz12Torgb48(const SwsInternal *c, uint8_t *dst, int dst_stride,
>                  c->xyz2rgb_matrix[2][1] * y +
>                  c->xyz2rgb_matrix[2][2] * z >> 12;
>
> -            // limit values to 12-bit depth
> -            r = av_clip_uintp2(r, 12);
> -            g = av_clip_uintp2(g, 12);
> -            b = av_clip_uintp2(b, 12);
> +            // limit values to 16-bit depth
> +            r = av_clip_uint16(r);
> +            g = av_clip_uint16(g);
> +            b = av_clip_uint16(b);
>
>              // convert from sRGBlinear to RGB and scale from 12bit to 16bit
>              if (desc->flags & AV_PIX_FMT_FLAG_BE) {
> @@ -832,10 +832,10 @@ void ff_rgb48Toxyz12(const SwsInternal *c, uint8_t *dst, int dst_stride,
>                  c->rgb2xyz_matrix[2][1] * g +
>                  c->rgb2xyz_matrix[2][2] * b >> 12;
>
> -            // limit values to 12-bit depth
> -            x = av_clip_uintp2(x, 12);
> -            y = av_clip_uintp2(y, 12);
> -            z = av_clip_uintp2(z, 12);
> +            // limit values to 16-bit depth
> +            x = av_clip_uint16(x);
> +            y = av_clip_uint16(y);
> +            z = av_clip_uint16(z);
>
>              // convert from XYZlinear to X'Y'Z' and scale from 12bit to 16bit
>              if (desc->flags & AV_PIX_FMT_FLAG_BE) {
> diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
> index 768e394560..5acd277b50 100644
> --- a/libswscale/swscale_internal.h
> +++ b/libswscale/swscale_internal.h
> @@ -547,10 +547,10 @@ struct SwsInternal {
>  /* pre defined color-spaces gamma */
>  #define XYZ_GAMMA (2.6f)
>  #define RGB_GAMMA (2.2f)
> -    int16_t *xyzgamma;
> -    int16_t *rgbgamma;
> -    int16_t *xyzgammainv;
> -    int16_t *rgbgammainv;
> +    uint16_t *xyzgamma;
> +    uint16_t *rgbgamma;
> +    uint16_t *xyzgammainv;
> +    uint16_t *rgbgammainv;
>      int16_t xyz2rgb_matrix[3][4];
>      int16_t rgb2xyz_matrix[3][4];
>
> diff --git a/libswscale/utils.c b/libswscale/utils.c
> index 4dedbfc394..937e19f651 100644
> --- a/libswscale/utils.c
> +++ b/libswscale/utils.c
> @@ -948,7 +948,8 @@ static void fill_xyztables(SwsInternal *c)
>          {1689, 1464,  739},
>          { 871, 2929,  296},
>          {  79,  488, 3891} };
> -    static int16_t xyzgamma_tab[4096], rgbgamma_tab[4096], xyzgammainv_tab[4096], rgbgammainv_tab[4096];
> +    static uint16_t xyzgamma_tab[4096],  rgbgammainv_tab[4096];
> +    static uint16_t rgbgamma_tab[65536], xyzgammainv_tab[65536];
>
>      memcpy(c->xyz2rgb_matrix, xyz2rgb_matrix, sizeof(c->xyz2rgb_matrix));
>      memcpy(c->rgb2xyz_matrix, rgb2xyz_matrix, sizeof(c->rgb2xyz_matrix));
> @@ -957,15 +958,19 @@ static void fill_xyztables(SwsInternal *c)
>      c->xyzgammainv = xyzgammainv_tab;
>      c->rgbgammainv = rgbgammainv_tab;
>
> -    if (rgbgamma_tab[4095])
> +    if (xyzgamma_tab[4095])
>          return;
>
> -    /* set gamma vectors */
> +    /* set input gamma vectors */
>      for (i = 0; i < 4096; i++) {
> -        xyzgamma_tab[i] = lrint(pow(i / 4095.0, xyzgamma) * 4095.0);
> -        rgbgamma_tab[i] = lrint(pow(i / 4095.0, rgbgamma) * 4095.0);
> -        xyzgammainv_tab[i] = lrint(pow(i / 4095.0, xyzgammainv) * 4095.0);
> -        rgbgammainv_tab[i] = lrint(pow(i / 4095.0, rgbgammainv) * 4095.0);
> +        xyzgamma_tab[i] = lrint(pow(i / 4095.0, xyzgamma) * 65535.0);
> +        rgbgammainv_tab[i] = lrint(pow(i / 4095.0, rgbgammainv) * 65535.0);
> +    }
> +
> +    /* set output gamma vectors */
> +    for (i = 0; i < 65536; i++) {
> +        rgbgamma_tab[i] = lrint(pow(i / 65535.0, rgbgamma) * 4095.0);
> +        xyzgammainv_tab[i] = lrint(pow(i / 65535.0, xyzgammainv) * 4095.0);
>      }
>  }
>
> --
> 2.47.0
>