[FFmpeg-devel] [PATCH 3/4] checkasm/sw_rgb: add tests for yuv2packed{1, 2, X}

Tue Dec 17 11:32:49 EET 2024

On Mon, 16 Dec 2024 12:48:19 +0100 Niklas Haas <ffmpeg at haasn.xyz> wrote:
> From: Niklas Haas <git at haasn.dev>
>
> ---
>  tests/checkasm/sw_rgb.c | 316 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 316 insertions(+)
>
> diff --git a/tests/checkasm/sw_rgb.c b/tests/checkasm/sw_rgb.c
> index 05370c1e41..3b2dffc423 100644
> --- a/tests/checkasm/sw_rgb.c
> +++ b/tests/checkasm/sw_rgb.c
> @@ -453,6 +453,306 @@ static void check_rgb_to_uv(SwsContext *sws)
>      }
>  }
>
> +static const int packed_rgb_fmts[] = {
> +    AV_PIX_FMT_RGB24,
> +    AV_PIX_FMT_BGR24,
> +    AV_PIX_FMT_ARGB,
> +    AV_PIX_FMT_RGBA,
> +    AV_PIX_FMT_ABGR,
> +    AV_PIX_FMT_BGRA,
> +    AV_PIX_FMT_RGB48BE,
> +    AV_PIX_FMT_RGB48LE,
> +    AV_PIX_FMT_RGB565BE,
> +    AV_PIX_FMT_RGB565LE,
> +    AV_PIX_FMT_RGB555BE,
> +    AV_PIX_FMT_RGB555LE,
> +    AV_PIX_FMT_BGR565BE,
> +    AV_PIX_FMT_BGR565LE,
> +    AV_PIX_FMT_BGR555BE,
> +    AV_PIX_FMT_BGR555LE,
> +    AV_PIX_FMT_RGB444LE,
> +    AV_PIX_FMT_RGB444BE,
> +    AV_PIX_FMT_BGR444LE,
> +    AV_PIX_FMT_BGR444BE,
> +    AV_PIX_FMT_BGR48BE,
> +    AV_PIX_FMT_BGR48LE,
> +    AV_PIX_FMT_RGBA64BE,
> +    AV_PIX_FMT_RGBA64LE,
> +    AV_PIX_FMT_BGRA64BE,
> +    AV_PIX_FMT_BGRA64LE,
> +    AV_PIX_FMT_RGB8,
> +    AV_PIX_FMT_BGR8,
> +    AV_PIX_FMT_RGB4,
> +    AV_PIX_FMT_BGR4,
> +    AV_PIX_FMT_RGB4_BYTE,
> +    AV_PIX_FMT_BGR4_BYTE,
> +};
> +
> +#define INPUT_SIZE 512
> +
> +static void check_yuv2packed1(SwsContext *sws)
> +{
> +    SwsInternal *c = sws_internal(sws);
> +    static const int alpha_values[] = {0, 2048, 4096};
> +
> +    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT,
> +                      void, SwsInternal *c, const int16_t *lumSrc,
> +                      const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
> +                      const int16_t *alpSrc, uint8_t *dest,
> +                      int dstW, int uvalpha, int y);
> +
> +    const int16_t *luma;
> +    const int16_t *chru[2];
> +    const int16_t *chrv[2];
> +    const int16_t *alpha;
> +
> +    LOCAL_ALIGNED_8(int32_t, src_y, [INPUT_SIZE]);
> +    LOCAL_ALIGNED_8(int32_t, src_u, [INPUT_SIZE]);
> +    LOCAL_ALIGNED_8(int32_t, src_v, [INPUT_SIZE]);
> +    LOCAL_ALIGNED_8(int32_t, src_a, [INPUT_SIZE]);
> +
> +    LOCAL_ALIGNED_8(uint8_t, dst0, [INPUT_SIZE * sizeof(int32_t[4])]);
> +    LOCAL_ALIGNED_8(uint8_t, dst1, [INPUT_SIZE * sizeof(int32_t[4])]);
> +
> +    randomize_buffers((uint8_t*)src_y, INPUT_SIZE * sizeof(int32_t));
> +    randomize_buffers((uint8_t*)src_u, INPUT_SIZE * sizeof(int32_t));
> +    randomize_buffers((uint8_t*)src_v, INPUT_SIZE * sizeof(int32_t));
> +    randomize_buffers((uint8_t*)src_a, INPUT_SIZE * sizeof(int32_t));
> +
> +    /* Limit to 14 bit input range */
> +    for (int i = 0; i < INPUT_SIZE; i++) {
> +        src_y[i] &= 0x3FFF3FFF;
> +        src_a[i] &= 0x3FFF3FFF;
> +        src_u[i] &= 0x3FFF3FFF;
> +        src_v[i] &= 0x3FFF3FFF;
> +        src_u[i + INPUT_SIZE] &= 0x3FFF3FFF;
> +        src_v[i + INPUT_SIZE] &= 0x3FFF3FFF;
> +    }
> +
> +    luma =  (int16_t *)src_y;
> +    alpha = (int16_t *)src_a;
> +    for (int i = 0; i < 2; i++) {
> +        chru[i] =  (int16_t *)(src_u + i*INPUT_SIZE);
> +        chrv[i] =  (int16_t *)(src_v + i*INPUT_SIZE);
> +    }
> +
> +    for (int fmi = 0; fmi < FF_ARRAY_ELEMS(packed_rgb_fmts); fmi++) {
> +        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(packed_rgb_fmts[fmi]);
> +        int line_size = INPUT_SIZE * desc->comp[0].step;
> +        if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM)
> +            line_size = AV_CEIL_RSHIFT(line_size, 3);
> +
> +        sws->dst_format = packed_rgb_fmts[fmi];
> +        sws->dither = SWS_DITHER_NONE;
> +
> +        /* This sets the yuv2rgb tables */
> +        sws_setColorspaceDetails(sws, c->srcColorspaceTable, sws->src_range,
> +                                 c->dstColorspaceTable, sws->dst_range,
> +                                 c->brightness, c->contrast, c->saturation);
> +
> +        ff_sws_init_scale(c);
> +
> +        for (int ai = 0; ai < FF_ARRAY_ELEMS(alpha_values); ai++) {
> +            const int chr_alpha  = alpha_values[ai];
> +                if (check_func(c->yuv2packed1, "yuv2%s_1_%d_%d", desc->name, chr_alpha, INPUT_SIZE)) {
> +                    memset(dst0, 0xFF, INPUT_SIZE * sizeof(int32_t[4]));
> +                    memset(dst1, 0xFF, INPUT_SIZE * sizeof(int32_t[4]));
> +
> +                    call_ref(c, luma, chru, chrv, alpha, dst0, INPUT_SIZE, chr_alpha, 0);
> +                    call_new(c, luma, chru, chrv, alpha, dst1, INPUT_SIZE, chr_alpha, 0);
> +
> +                    if (memcmp(dst0, dst1, line_size))
> +                        fail();
> +
> +                    bench_new(c, luma, chru, chrv, alpha, dst1, INPUT_SIZE, chr_alpha, 0);
> +                }
> +        }
> +    }
> +}
> +
> +static void check_yuv2packed2(SwsContext *sws)
> +{
> +    SwsInternal *c = sws_internal(sws);
> +    static const int alpha_values[] = {0, 2048, 4096};
> +
> +    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT,
> +                      void, SwsInternal *c, const int16_t *lumSrc[2],
> +                      const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
> +                      const int16_t *alpSrc[2], uint8_t *dest,
> +                      int dstW, int yalpha, int uvalpha, int y);
> +
> +    const int16_t *luma[2];
> +    const int16_t *chru[2];
> +    const int16_t *chrv[2];
> +    const int16_t *alpha[2];
> +
> +    LOCAL_ALIGNED_8(int32_t, src_y, [2 * INPUT_SIZE]);
> +    LOCAL_ALIGNED_8(int32_t, src_u, [2 * INPUT_SIZE]);
> +    LOCAL_ALIGNED_8(int32_t, src_v, [2 * INPUT_SIZE]);
> +    LOCAL_ALIGNED_8(int32_t, src_a, [2 * INPUT_SIZE]);
> +
> +    LOCAL_ALIGNED_8(uint8_t, dst0, [INPUT_SIZE * sizeof(int32_t[4])]);
> +    LOCAL_ALIGNED_8(uint8_t, dst1, [INPUT_SIZE * sizeof(int32_t[4])]);
> +
> +    randomize_buffers((uint8_t*)src_y, 2 * INPUT_SIZE * sizeof(int32_t));
> +    randomize_buffers((uint8_t*)src_u, 2 * INPUT_SIZE * sizeof(int32_t));
> +    randomize_buffers((uint8_t*)src_v, 2 * INPUT_SIZE * sizeof(int32_t));
> +    randomize_buffers((uint8_t*)src_a, 2 * INPUT_SIZE * sizeof(int32_t));
> +
> +    /* Limit to 14 bit input range */
> +    for (int i = 0; i < 2 * INPUT_SIZE; i++) {
> +        src_y[i] &= 0x3FFF3FFF;
> +        src_u[i] &= 0x3FFF3FFF;
> +        src_v[i] &= 0x3FFF3FFF;
> +        src_a[i] &= 0x3FFF3FFF;
> +    }
> +
> +    for (int i = 0; i < 2; i++) {
> +        luma[i] =  (int16_t *)(src_y + i*INPUT_SIZE);
> +        chru[i] =  (int16_t *)(src_u + i*INPUT_SIZE);
> +        chrv[i] =  (int16_t *)(src_v + i*INPUT_SIZE);
> +        alpha[i] = (int16_t *)(src_a + i*INPUT_SIZE);
> +    }
> +
> +    for (int fmi = 0; fmi < FF_ARRAY_ELEMS(packed_rgb_fmts); fmi++) {
> +        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(packed_rgb_fmts[fmi]);
> +        int line_size = INPUT_SIZE * desc->comp[0].step;
> +        if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM)
> +            line_size = AV_CEIL_RSHIFT(line_size, 3);
> +
> +        sws->dst_format = packed_rgb_fmts[fmi];
> +        sws->dither = SWS_DITHER_NONE;
> +
> +        /* This sets the yuv2rgb tables */
> +        sws_setColorspaceDetails(sws, c->srcColorspaceTable, sws->src_range,
> +                                 c->dstColorspaceTable, sws->dst_range,
> +                                 c->brightness, c->contrast, c->saturation);
> +
> +        ff_sws_init_scale(c);
> +
> +        for (int ai = 0; ai < FF_ARRAY_ELEMS(alpha_values); ai++) {
> +            const int lum_alpha = alpha_values[ai];
> +            const int chr_alpha  = alpha_values[ai];
> +            if (check_func(c->yuv2packed2, "yuv2%s_2_%d_%d", desc->name, lum_alpha, INPUT_SIZE)) {
> +                memset(dst0, 0xFF, INPUT_SIZE * sizeof(int32_t[4]));
> +                memset(dst1, 0xFF, INPUT_SIZE * sizeof(int32_t[4]));
> +
> +                call_ref(c, luma, chru, chrv, alpha, dst0, INPUT_SIZE, lum_alpha, chr_alpha, 0);
> +                call_new(c, luma, chru, chrv, alpha, dst1, INPUT_SIZE, lum_alpha, chr_alpha, 0);
> +
> +                if (memcmp(dst0, dst1, line_size))
> +                    fail();
> +
> +                bench_new(c, luma, chru, chrv, alpha, dst1, INPUT_SIZE, lum_alpha, chr_alpha, 0);
> +            }
> +        }
> +    }
> +}
> +
> +static void check_yuv2packedX(SwsContext *sws)
> +{
> +    SwsInternal *c = sws_internal(sws);
> +#define LARGEST_FILTER 16
> +    static const int filter_sizes[] = {2, 16};
> +
> +    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT,
> +                      void, SwsInternal *c, const int16_t *lumFilter,
> +                      const int16_t **lumSrcx, int lumFilterSize,
> +                      const int16_t *chrFilter, const int16_t **chrUSrcx,
> +                      const int16_t **chrVSrcx, int chrFilterSize,
> +                      const int16_t **alpSrcx, uint8_t *dest,
> +                      int dstW, int y);
> +
> +    const int16_t *luma[LARGEST_FILTER];
> +    const int16_t *chru[LARGEST_FILTER];
> +    const int16_t *chrv[LARGEST_FILTER];
> +    const int16_t *alpha[LARGEST_FILTER];
> +
> +    LOCAL_ALIGNED_8(int16_t, luma_filter, [LARGEST_FILTER]);
> +    LOCAL_ALIGNED_8(int16_t, chr_filter, [LARGEST_FILTER]);
> +
> +    LOCAL_ALIGNED_8(int32_t, src_y, [LARGEST_FILTER * INPUT_SIZE]);
> +    LOCAL_ALIGNED_8(int32_t, src_u, [LARGEST_FILTER * INPUT_SIZE]);
> +    LOCAL_ALIGNED_8(int32_t, src_v, [LARGEST_FILTER * INPUT_SIZE]);
> +    LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_FILTER * INPUT_SIZE]);
> +
> +    LOCAL_ALIGNED_8(uint8_t, dst0, [INPUT_SIZE * sizeof(int32_t[4])]);
> +    LOCAL_ALIGNED_8(uint8_t, dst1, [INPUT_SIZE * sizeof(int32_t[4])]);
> +
> +    randomize_buffers((uint8_t*)src_y, LARGEST_FILTER * INPUT_SIZE * sizeof(int32_t));
> +    randomize_buffers((uint8_t*)src_u, LARGEST_FILTER * INPUT_SIZE * sizeof(int32_t));
> +    randomize_buffers((uint8_t*)src_v, LARGEST_FILTER * INPUT_SIZE * sizeof(int32_t));
> +    randomize_buffers((uint8_t*)src_a, LARGEST_FILTER * INPUT_SIZE * sizeof(int32_t));
> +
> +    /* Limit to 14 bit input range */
> +    for (int i = 0; i < LARGEST_FILTER * INPUT_SIZE; i++) {
> +        src_y[i] &= 0x3FFF3FFF;
> +        src_u[i] &= 0x3FFF3FFF;
> +        src_v[i] &= 0x3FFF3FFF;
> +        src_a[i] &= 0x3FFF3FFF;
> +    }
> +
> +    for (int i = 0; i < LARGEST_FILTER; i++) {
> +        luma[i] =  (int16_t *)(src_y + i*INPUT_SIZE);
> +        chru[i] =  (int16_t *)(src_u + i*INPUT_SIZE);
> +        chrv[i] =  (int16_t *)(src_v + i*INPUT_SIZE);
> +        alpha[i] = (int16_t *)(src_a + i*INPUT_SIZE);
> +    }
> +
> +    for (int fmi = 0; fmi < FF_ARRAY_ELEMS(packed_rgb_fmts); fmi++) {
> +        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(packed_rgb_fmts[fmi]);
> +        int line_size = INPUT_SIZE * desc->comp[0].step;
> +        if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM)
> +            line_size = AV_CEIL_RSHIFT(line_size, 3);
> +
> +        sws->dst_format = packed_rgb_fmts[fmi];
> +        sws->dither = SWS_DITHER_NONE;
> +
> +        /* This sets the yuv2rgb tables */
> +        sws_setColorspaceDetails(sws, c->srcColorspaceTable, sws->src_range,
> +                                 c->dstColorspaceTable, sws->dst_range,
> +                                 c->brightness, c->contrast, c->saturation);
> +
> +        ff_sws_init_scale(c);
> +
> +        for (int fsi = 0; fsi < FF_ARRAY_ELEMS(filter_sizes); fsi++) {
> +            const int luma_filter_size = filter_sizes[fsi];
> +            const int chr_filter_size = filter_sizes[fsi];
> +
> +            for (int i = 0; i < luma_filter_size; i++)
> +                luma_filter[i] = -((1 << 12) / (luma_filter_size - 1));
> +            luma_filter[rnd() % luma_filter_size] = (1 << 13) - 1;
> +
> +            for (int i = 0; i < chr_filter_size; i++)
> +                chr_filter[i] = -((1 << 12) / (chr_filter_size - 1));
> +            chr_filter[rnd() % chr_filter_size] = (1 << 13) - 1;
> +
> +            if (check_func(c->yuv2packedX, "yuv2%s_X_%d_%d", desc->name, luma_filter_size, INPUT_SIZE)) {
> +                memset(dst0, 0xFF, INPUT_SIZE * sizeof(int32_t[4]));
> +                memset(dst1, 0xFF, INPUT_SIZE * sizeof(int32_t[4]));
> +
> +                call_ref(c, luma_filter, luma, luma_filter_size,
> +                            chr_filter, chru, chrv, chr_filter_size,
> +                            alpha, dst0, INPUT_SIZE, 0);
> +
> +                call_new(c, luma_filter, luma, luma_filter_size,
> +                            chr_filter, chru, chrv, chr_filter_size,
> +                            alpha, dst1, INPUT_SIZE, 0);
> +
> +                if (memcmp(dst0, dst1, line_size))
> +                    fail();
> +
> +                bench_new(c, luma_filter, luma, luma_filter_size,
> +                            chr_filter, chru, chrv, chr_filter_size,
> +                            alpha, dst1, INPUT_SIZE, 0);
> +            }
> +        }
> +    }
> +}
> +
> +#undef INPUT_SIZE
> +#undef LARGEST_FILTER
> +
>  void checkasm_check_sw_rgb(void)
>  {
>      SwsContext *sws;
> @@ -498,5 +798,21 @@ void checkasm_check_sw_rgb(void)
>      check_rgb24toyv12(sws);
>      report("rgb24toyv12");
>
> +    sws_freeContext(sws);
> +    sws = sws_getContext(MAX_LINE_SIZE, MAX_LINE_SIZE, AV_PIX_FMT_YUV420P,
> +                         MAX_LINE_SIZE, MAX_LINE_SIZE, AV_PIX_FMT_RGB24,
> +                         SWS_ACCURATE_RND | SWS_BITEXACT, NULL, NULL, NULL);
> +    if (!sws)
> +        fail();
> +
> +    check_yuv2packed1(sws);
> +    report("yuv2packedX");
> +
> +    check_yuv2packed2(sws);
> +    report("yuv2packedX");
> +
> +    check_yuv2packedX(sws);
> +    report("yuv2packedX");

Wrong names here, fixed.

> +
>      sws_freeContext(sws);
>  }
> --
> 2.47.0
>