[FFmpeg-devel] [PATCH] lavu/pixdesc: favour formats where bit depth exactly matches

Fri Sep 9 09:31:09 EEST 2022

On Thu, 2022-09-08 at 20:46 -0700, Philip Langdale wrote:
> Since introducing the various packed formats used by VAAPI (and p012),
> we've noticed that there's actually a gap in how
> av_find_best_pix_fmt_of_2 works. It doesn't actually assign any value
> to having the same bit depth as the source format, when comparing
> against formats with a higher bit depth. This usually doesn't matter,
> because av_get_padded_bits_per_pixel() will account for it.
> 
> However, as many of these formats use padding internally, we find that
> av_get_padded_bits_per_pixel() actually returns the same value for the
> 10 bit, 12, bit, 16 bit flavours, etc. In these tied situations, we end

12 bit,

> up just picking the first of the two provided formats, even if the
> second one should be preferred because it matches the actual bit depth.
> 
> This bug already existed if you tried to compare yuv420p10 against p016
> and p010, for example, but it simply hadn't come up before so we never
> noticed.
> 
> But now, we actually got a situation in the VAAPI VP9 decoder where it
> offers both p010 and p012 because Profile 3 could be either depth and
> ends up picking p012 for 10 bit content due to the ordering of the
> testing.
> 
> In addition, in the process of testing the fix, I realised we have the
> same gap when it comes to chroma subsampling - we do not favour a
> format that has exactly the same subsampling vs one with less
> subsampling when all else is equal.
> 
> To fix this, I'm introducing a small score penalty if the bit depth or
> subsampling doesn't exactly match the source format. This will break
> the tie in favour of the format with the exact match, but not offset
> any of the other scoring penalties we already have.
> 
> I have added a set of tests around these formats which will fail
> without this fix.
> 
> Signed-off-by: Philip Langdale <philipl at overt.org>
> ---
>  libavutil/pixdesc.c           | 16 +++++-
>  libavutil/tests/pixfmt_best.c | 93 ++++++++++++++++++++++++++++-------
>  tests/ref/fate/pixfmt_best    |  2 +-
>  3 files changed, 89 insertions(+), 22 deletions(-)
> 
> diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
> index d7c6ebfdc4..412e257a30 100644
> --- a/libavutil/pixdesc.c
> +++ b/libavutil/pixdesc.c
> @@ -3004,6 +3004,11 @@ static int get_pix_fmt_score(enum AVPixelFormat
> dst_pix_fmt,
>      if ((ret = get_pix_fmt_depth(&dst_min_depth, &dst_max_depth,
> dst_pix_fmt)) < 0)
>          return -3;
>  
> +    // Favour formats where bit depth exactly matches. If all other scoring
> is
> +    // equal, we'd rather use a lower bit depth that matches the source.
> +    if (src_min_depth != dst_min_depth || src_max_depth != dst_max_depth)
> +        score -= 64;
> +
>      src_color = get_color_type(src_desc);
>      dst_color = get_color_type(dst_desc);
>      if (dst_pix_fmt == AV_PIX_FMT_PAL8)
> @@ -3020,14 +3025,21 @@ static int get_pix_fmt_score(enum AVPixelFormat
> dst_pix_fmt,
>      }
>  
>      if (consider & FF_LOSS_RESOLUTION) {
> +        // Apply a large penalty if there's a loss of resolution, but also
> apply
> +        // a small penalty of the dst resolution is higher so that we favour
> +        // exactly matching formats.
>          if (dst_desc->log2_chroma_w > src_desc->log2_chroma_w) {
>              loss |= FF_LOSS_RESOLUTION;
>              score -= 256 << dst_desc->log2_chroma_w;
> -        }
> +        } else if (dst_desc->log2_chroma_w < src_desc->log2_chroma_w)
> +            score -= 32;
> +
>          if (dst_desc->log2_chroma_h > src_desc->log2_chroma_h) {
>              loss |= FF_LOSS_RESOLUTION;
>              score -= 256 << dst_desc->log2_chroma_h;
> -        }
> +        } else if (dst_desc->log2_chroma_h < src_desc->log2_chroma_h)
> +            score -= 32;
> +
>          // don't favor 422 over 420 if downsampling is needed, because 420
> has much better support on the decoder side
>          if (dst_desc->log2_chroma_w == 1 && src_desc->log2_chroma_w == 0 &&
>              dst_desc->log2_chroma_h == 1 && src_desc->log2_chroma_h == 0 ) {
> diff --git a/libavutil/tests/pixfmt_best.c b/libavutil/tests/pixfmt_best.c
> index 0542af494f..b5d4d04976 100644
> --- a/libavutil/tests/pixfmt_best.c
> +++ b/libavutil/tests/pixfmt_best.c
> @@ -39,32 +39,59 @@ static const enum AVPixelFormat pixfmt_list[] = {
>      AV_PIX_FMT_VAAPI,
>  };
>  
> -static enum AVPixelFormat find_best(enum AVPixelFormat pixfmt)
> +static const enum AVPixelFormat semiplanar_list[] = {
> +    AV_PIX_FMT_P016,
> +    AV_PIX_FMT_P012,
> +    AV_PIX_FMT_P010,
> +    AV_PIX_FMT_NV12,
> +};
> +
> +static const enum AVPixelFormat packed_list[] = {
> +    AV_PIX_FMT_XV36,
> +    AV_PIX_FMT_XV30,
> +    AV_PIX_FMT_VUYX,
> +    AV_PIX_FMT_Y212,
> +    AV_PIX_FMT_Y210,
> +    AV_PIX_FMT_YUYV422,
> +};
> +
> +typedef enum AVPixelFormat (*find_best_t)(enum AVPixelFormat pixfmt);
> +
> +#define find_best_wrapper(name, list) \
> +static enum AVPixelFormat find_best_ ## name (enum AVPixelFormat pixfmt)    \
> +{                                                                           \
> +    enum AVPixelFormat best = AV_PIX_FMT_NONE;                              \
> +    int i;                                                                  \
> +    for (i = 0; i < FF_ARRAY_ELEMS(list); i++)                              \
> +        best = av_find_best_pix_fmt_of_2(best, list[i],                     \
> +                                         pixfmt, 0, NULL);                  \
> +    return best;                                                            \
> +}
> +
> +find_best_wrapper(base, pixfmt_list)
> +find_best_wrapper(seminplanar, semiplanar_list)
> +find_best_wrapper(packed, packed_list)
> +
> +static void test(enum AVPixelFormat input, enum AVPixelFormat expected,
> +                 int *pass, int *fail, find_best_t find_best_fn)
>  {
> -    enum AVPixelFormat best = AV_PIX_FMT_NONE;
> -    int i;
> -    for (i = 0; i < FF_ARRAY_ELEMS(pixfmt_list); i++)
> -        best = av_find_best_pix_fmt_of_2(best, pixfmt_list[i],
> -                                         pixfmt, 0, NULL);
> -    return best;
> +        enum AVPixelFormat output = find_best_fn(input);
> +        if (output != expected) {
> +            printf("Matching %s: got %s, expected %s\n",
> +                   av_get_pix_fmt_name(input),
> +                   av_get_pix_fmt_name(output),
> +                   av_get_pix_fmt_name(expected));
> +            ++(*fail);
> +        } else
> +            ++(*pass);
>  }
>  
>  int main(void)
>  {
> -    enum AVPixelFormat output;
>      int i, pass = 0, fail = 0;
>  
> -#define TEST(input, expected) do {                              \
> -        output = find_best(input);                              \
> -        if (output != expected) {                               \
> -            printf("Matching %s: got %s, expected %s\n",        \
> -                   av_get_pix_fmt_name(input),                  \
> -                   av_get_pix_fmt_name(output),                 \
> -                   av_get_pix_fmt_name(expected));              \
> -            ++fail;                                             \
> -        } else                                                  \
> -            ++pass;                                             \
> -    } while (0)
> +#define TEST(input, expected) \
> +    test(input, expected, &pass, &fail, find_best_base);
>  
>      // Same formats.
>      for (i = 0; i < FF_ARRAY_ELEMS(pixfmt_list); i++)
> @@ -137,6 +164,34 @@ int main(void)
>      // Opaque formats are least unlike each other.
>      TEST(AV_PIX_FMT_DXVA2_VLD, AV_PIX_FMT_VDPAU);
>  
> +#define TEST_SEMIPLANAR(input, expected) \
> +    test(input, expected, &pass, &fail, find_best_seminplanar);
> +
> +    // Same formats.
> +    for (i = 0; i < FF_ARRAY_ELEMS(semiplanar_list); i++)
> +        TEST_SEMIPLANAR(semiplanar_list[i], semiplanar_list[i]);
> +
> +    // Formats containing the same data in different layouts.
> +    TEST_SEMIPLANAR(AV_PIX_FMT_YUV420P, AV_PIX_FMT_NV12);
> +    TEST_SEMIPLANAR(AV_PIX_FMT_YUV420P10, AV_PIX_FMT_P010);
> +    TEST_SEMIPLANAR(AV_PIX_FMT_YUV420P12, AV_PIX_FMT_P012);
> +    TEST_SEMIPLANAR(AV_PIX_FMT_YUV420P16, AV_PIX_FMT_P016);
> +
> +#define TEST_PACKED(input, expected) \
> +    test(input, expected, &pass, &fail, find_best_packed);
> +
> +    // Same formats.
> +    for (i = 0; i < FF_ARRAY_ELEMS(packed_list); i++)
> +        TEST_PACKED(packed_list[i], packed_list[i]);
> +
> +    // Formats containing the same data in different layouts.
> +    TEST_PACKED(AV_PIX_FMT_YUV444P, AV_PIX_FMT_VUYX);
> +    TEST_PACKED(AV_PIX_FMT_YUV444P10, AV_PIX_FMT_XV30);
> +    TEST_PACKED(AV_PIX_FMT_YUV444P12, AV_PIX_FMT_XV36);
> +    TEST_PACKED(AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUYV422);
> +    TEST_PACKED(AV_PIX_FMT_YUV422P10, AV_PIX_FMT_Y210);
> +    TEST_PACKED(AV_PIX_FMT_YUV422P12, AV_PIX_FMT_Y212);
> +
>      printf("%d tests passed, %d tests failed.\n", pass, fail);
>      return !!fail;
>  }
> diff --git a/tests/ref/fate/pixfmt_best b/tests/ref/fate/pixfmt_best
> index 783c5fe640..63911e7198 100644
> --- a/tests/ref/fate/pixfmt_best
> +++ b/tests/ref/fate/pixfmt_best
> @@ -1 +1 @@
> -75 tests passed, 0 tests failed.
> +95 tests passed, 0 tests failed.

Patchset LGTM and works well for me.

BRs
Haihao