[FFmpeg-devel] [PATCH 14/17] tests/checkasm: add checkasm tests for swscale ops

Sat Apr 26 20:41:18 EEST 2025

From: Niklas Haas <git at haasn.dev>

Because of the lack of an external ABI on low-level kernels, we cannot
directly test internal functions. Instead, we construct a minimal op chain
consisting of a read, the op to be tested, and a write.

The bigger complication arises from the fact that the backend may generate
arbitrary internal state that needs to be passed back to the implementation,
which means we cannot directly call `func_ref` on the generated chain. To get
around this, always compile the op chain twice - once using the backend to be
tested, and once using the reference C backend.
---
 tests/checkasm/Makefile   |   8 +-
 tests/checkasm/checkasm.c |   1 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/sw_ops.c   | 748 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 757 insertions(+), 1 deletion(-)
 create mode 100644 tests/checkasm/sw_ops.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index d5c50e5599..be4c6b265f 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -65,7 +65,13 @@ AVFILTEROBJS-$(CONFIG_SOBEL_FILTER)      += vf_convolution.o
 CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes)
 
 # swscale tests
-SWSCALEOBJS                             += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o sw_yuv2rgb.o sw_yuv2yuv.o
+SWSCALEOBJS                             += sw_gbrp.o            \
+                                           sw_ops.o             \
+                                           sw_range_convert.o   \
+                                           sw_rgb.o             \
+                                           sw_scale.o           \
+                                           sw_yuv2rgb.o         \
+                                           sw_yuv2yuv.o
 
 CHECKASMOBJS-$(CONFIG_SWSCALE)  += $(SWSCALEOBJS)
 
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 6e99d33d70..5f3a900bfd 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -294,6 +294,7 @@ static const struct {
     { "sw_scale", checkasm_check_sw_scale },
     { "sw_yuv2rgb", checkasm_check_sw_yuv2rgb },
     { "sw_yuv2yuv", checkasm_check_sw_yuv2yuv },
+    { "sw_ops", checkasm_check_sw_ops },
 #endif
 #if CONFIG_AVUTIL
         { "aes",       checkasm_check_aes },
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 0f8dea82e9..959e66d9f8 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -131,6 +131,7 @@ void checkasm_check_sw_rgb(void);
 void checkasm_check_sw_scale(void);
 void checkasm_check_sw_yuv2rgb(void);
 void checkasm_check_sw_yuv2yuv(void);
+void checkasm_check_sw_ops(void);
 void checkasm_check_takdsp(void);
 void checkasm_check_utvideodsp(void);
 void checkasm_check_v210dec(void);
diff --git a/tests/checkasm/sw_ops.c b/tests/checkasm/sw_ops.c
new file mode 100644
index 0000000000..7b38bd6902
--- /dev/null
+++ b/tests/checkasm/sw_ops.c
@@ -0,0 +1,748 @@
+/**
+ * Copyright (C) 2025 Niklas Haas
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/refstruct.h"
+
+#include "libswscale/ops.h"
+#include "libswscale/ops_internal.h"
+
+#include "checkasm.h"
+
+enum {
+    PIXELS = 64,
+};
+
+enum {
+    U8  = SWS_PIXEL_U8,
+    U16 = SWS_PIXEL_U16,
+    U32 = SWS_PIXEL_U32,
+    F32 = SWS_PIXEL_F32,
+};
+
+#define FMT(fmt, ...) tprintf((char[256]) {0}, 256, fmt, __VA_ARGS__)
+static const char *tprintf(char buf[], size_t size, const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    vsnprintf(buf, size, fmt, ap);
+    va_end(ap);
+    return buf;
+}
+
+static int rw_pixel_bits(const SwsOp *op)
+{
+    const int elems = op->rw.packed ? op->rw.elems : 1;
+    const int size  = ff_sws_pixel_type_size(op->type);
+    const int bits  = 8 >> op->rw.frac;
+    av_assert1(bits >= 1);
+    return elems * size * bits;
+}
+
+static float rndf(void)
+{
+    union { uint32_t u; float f; } x;
+    do {
+        x.u = rnd();
+    } while (!isnormal(x.f));
+    return x.f;
+}
+
+static void fill32f(float *line, int num, unsigned range)
+{
+    const float scale = (float) range / UINT32_MAX;
+    for (int i = 0; i < num; i++)
+        line[i] = range ? scale * rnd() : rndf();
+}
+
+static void fill32(uint32_t *line, int num, unsigned range)
+{
+    for (int i = 0; i < num; i++)
+        line[i] = range ? rnd() % (range + 1) : rnd();
+}
+
+static void fill16(uint16_t *line, int num, unsigned range)
+{
+    if (!range) {
+        fill32((uint32_t *) line, AV_CEIL_RSHIFT(num, 1), 0);
+    } else {
+        for (int i = 0; i < num; i++)
+            line[i] = rnd() % (range + 1);
+    }
+}
+
+static void fill8(uint8_t *line, int num, unsigned range)
+{
+    if (!range) {
+        fill32((uint32_t *) line, AV_CEIL_RSHIFT(num, 2), 0);
+    } else {
+        for (int i = 0; i < num; i++)
+            line[i] = rnd() % (range + 1);
+    }
+}
+
+static void check_ops(const char *report, unsigned range, const SwsOp *ops)
+{
+    SwsContext *ctx = sws_alloc_context();
+    SwsCompiledOp comp_ref = {0}, comp_new = {0};
+    SwsOpList oplist = { .ops = (SwsOp *) ops };
+    const SwsOp *read_op, *write_op;
+
+    declare_func(void, const SwsOpExec *exec, const void *priv, int pixels);
+
+    DECLARE_ALIGNED_64(char, src0)[4][PIXELS * sizeof(uint32_t[4])];
+    DECLARE_ALIGNED_64(char, src1)[4][PIXELS * sizeof(uint32_t[4])];
+    DECLARE_ALIGNED_64(char, dst0)[4][PIXELS * sizeof(uint32_t[4])];
+    DECLARE_ALIGNED_64(char, dst1)[4][PIXELS * sizeof(uint32_t[4])];
+
+    if (!ctx)
+        return;
+    ctx->flags = SWS_BITEXACT;
+
+    read_op = &ops[0];
+    for (oplist.num_ops = 0; ops[oplist.num_ops].op; oplist.num_ops++)
+        write_op = &ops[oplist.num_ops];
+
+    for (int p = 0; p < 4; p++) {
+        void *plane = src0[p];
+        switch (read_op->type) {
+        case U8:    fill8(plane, sizeof(src0[p]) /  sizeof(uint8_t), range); break;
+        case U16:  fill16(plane, sizeof(src0[p]) / sizeof(uint16_t), range); break;
+        case U32:  fill32(plane, sizeof(src0[p]) / sizeof(uint32_t), range); break;
+        case F32: fill32f(plane, sizeof(src0[p]) / sizeof(uint32_t), range); break;
+        }
+    }
+
+    memcpy(src1, src0, sizeof(src0));
+    memset(dst0, 0, sizeof(dst0));
+    memset(dst1, 0, sizeof(dst1));
+
+    /* Compile `ops` using both the asm and c backends */
+    for (int n = 0; ff_sws_op_backends[n]; n++) {
+        const SwsOpBackend *backend = ff_sws_op_backends[n];
+        const bool is_ref = !strcmp(backend->name, "c");
+        if (is_ref || !comp_new.func) {
+            SwsCompiledOp comp;
+            int ret = ff_sws_ops_compile_backend(ctx, backend, &oplist, &comp);
+            if (ret == AVERROR(ENOTSUP))
+                continue;
+            else if (ret < 0)
+                fail();
+            else if (PIXELS % comp.block_size != 0)
+                fail();
+
+            if (is_ref)
+                comp_ref = comp;
+            if (!comp_new.func)
+                comp_new = comp;
+        }
+    }
+
+    av_assert0(comp_ref.func && comp_new.func);
+
+    SwsOpExec exec = {0};
+    exec.pixel_bits_in  = rw_pixel_bits(read_op);
+    exec.pixel_bits_out = rw_pixel_bits(write_op);
+    exec.width = PIXELS;
+    exec.height = exec.slice_h = 1;
+    for (int i = 0; i < 4; i++) {
+        exec.in_stride[i]  = sizeof(src0[i]);
+        exec.out_stride[i] = sizeof(dst0[i]);
+    }
+
+    if (check_func(comp_new.func, "%s", report)) {
+        func_ref = comp_ref.func; /* ignore any other asm versions */
+
+        for (int i = 0; i < 4; i++) {
+            exec.in[i]  = (void *) src0[i];
+            exec.out[i] = (void *) dst0[i];
+        }
+        call_ref(&exec, comp_ref.priv, PIXELS / comp_ref.block_size);
+
+        for (int i = 0; i < 4; i++) {
+            exec.in[i]  = (void *) src1[i];
+            exec.out[i] = (void *) dst1[i];
+        }
+        call_new(&exec, comp_new.priv, PIXELS / comp_new.block_size);
+
+        for (int i = 0; i < 4; i++) {
+            const char *name = FMT("%s[%d]", report, i);
+            const int size   = PIXELS * exec.pixel_bits_out >> 3;
+            const int stride = sizeof(dst0[i]);
+
+            switch (write_op->type) {
+            case U8:
+                checkasm_check(uint8_t, (void *) dst0[i], stride,
+                                        (void *) dst1[i], stride,
+                                        size, 1, name);
+                break;
+            case U16:
+                checkasm_check(uint16_t, (void *) dst0[i], stride,
+                                         (void *) dst1[i], stride,
+                                         size >> 1, 1, name);
+                break;
+            case U32:
+                checkasm_check(uint32_t, (void *) dst0[i], stride,
+                                         (void *) dst1[i], stride,
+                                         size >> 2, 1, name);
+                break;
+            case F32:
+                checkasm_check(float, (void *) dst0[i], stride,
+                                      (void *) dst1[i], stride,
+                                      size >> 2, 1, name);
+                break;
+            }
+
+            /* Check for over-write */
+            for (int x = size + comp_new.over_write; x < sizeof(dst1[i]); x++) {
+                if (dst1[i][x] != 0) {
+                    fprintf(stderr, "Overwrite detected in %s: [%d] = 0x%02x\n",
+                            name, x, dst1[i][x]);
+                    fail();
+                }
+            }
+
+            if (write_op->rw.packed)
+                break;
+        }
+
+        bench_new(&exec, comp_new.priv, PIXELS / comp_new.block_size);
+    }
+
+    if (comp_new.func != comp_ref.func && comp_new.free)
+        comp_new.free(comp_new.priv);
+    if (comp_ref.free)
+        comp_ref.free(comp_ref.priv);
+    sws_free_context(&ctx);
+}
+
+#define CHECK_RANGE(NAME, RANGE, N_IN, N_OUT, IN, OUT, ...)                     \
+  do {                                                                          \
+    check_ops(NAME, RANGE, (SwsOp[]) {                                          \
+        {                                                                       \
+            .op = SWS_OP_READ,                                                  \
+            .type = IN,                                                         \
+            .rw.elems = N_IN,                                                   \
+        },                                                                      \
+        __VA_ARGS__,                                                            \
+        {                                                                       \
+            .op = SWS_OP_WRITE,                                                 \
+            .type = OUT,                                                        \
+            .rw.elems = N_OUT,                                                  \
+        }, {0}                                                                  \
+    });                                                                         \
+  } while (0)
+
+#define CHECK_COMMON_RANGE(NAME, RANGE, IN, OUT, ...)                           \
+    CHECK_RANGE(FMT("%s_p1000", NAME), RANGE, 1, 1, IN, OUT, __VA_ARGS__);      \
+    CHECK_RANGE(FMT("%s_p1110", NAME), RANGE, 3, 3, IN, OUT, __VA_ARGS__);      \
+    CHECK_RANGE(FMT("%s_p1111", NAME), RANGE, 4, 4, IN, OUT, __VA_ARGS__);      \
+    CHECK_RANGE(FMT("%s_p1001", NAME), RANGE, 4, 2, IN, OUT, __VA_ARGS__, {     \
+        .op = SWS_OP_SWIZZLE,                                                   \
+        .type = OUT,                                                            \
+        .swizzle = SWS_SWIZZLE(0, 3, 1, 2),                                     \
+    })
+
+#define CHECK(NAME, N_IN, N_OUT, IN, OUT, ...) \
+    CHECK_RANGE(NAME, 0, N_IN, N_OUT, IN, OUT, __VA_ARGS__)
+
+#define CHECK_COMMON(NAME, IN, OUT, ...) \
+    CHECK_COMMON_RANGE(NAME, 0, IN, OUT, __VA_ARGS__)
+
+static void check_read_write(void)
+{
+    for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
+        const char *type = ff_sws_pixel_type_name(t);
+        for (int i = 1; i <= 4; i++) {
+            /* Test N->N planar read/write */
+            for (int o = 1; o <= i; o++) {
+                check_ops(FMT("rw_%d_%d_%s", i, o, type), 0, (SwsOp[]) {
+                    {
+                        .op = SWS_OP_READ,
+                        .type = t,
+                        .rw.elems = i,
+                    }, {
+                        .op = SWS_OP_WRITE,
+                        .type = t,
+                        .rw.elems = o,
+                    }, {0}
+                });
+            }
+
+            /* Test packed read/write */
+            if (i == 1)
+                continue;
+
+            check_ops(FMT("read_packed%d_%s", i, type), 0, (SwsOp[]) {
+                {
+                    .op = SWS_OP_READ,
+                    .type = t,
+                    .rw.elems = i,
+                    .rw.packed = true,
+                }, {
+                    .op = SWS_OP_WRITE,
+                    .type = t,
+                    .rw.elems = i,
+                }, {0}
+            });
+
+            check_ops(FMT("write_packed%d_%s", i, type), 0, (SwsOp[]) {
+                {
+                    .op = SWS_OP_READ,
+                    .type = t,
+                    .rw.elems = i,
+                }, {
+                    .op = SWS_OP_WRITE,
+                    .type = t,
+                    .rw.elems = i,
+                    .rw.packed = true,
+                }, {0}
+            });
+        }
+    }
+
+    /* Test fractional reads/writes */
+    for (int frac = 1; frac <= 3; frac++) {
+        const int bits = 8 >> frac;
+        const int range = (1 << bits) - 1;
+        if (bits == 2)
+            continue; /* no 2 bit packed formats currently exist */
+
+        check_ops(FMT("read_frac%d", frac), 0, (SwsOp[]) {
+            {
+                .op = SWS_OP_READ,
+                .type = U8,
+                .rw.elems = 1,
+                .rw.frac  = frac,
+            }, {
+                .op = SWS_OP_WRITE,
+                .type = U8,
+                .rw.elems = 1,
+            }, {0}
+        });
+
+        check_ops(FMT("write_frac%d", frac), range, (SwsOp[]) {
+            {
+                .op = SWS_OP_READ,
+                .type = U8,
+                .rw.elems = 1,
+            }, {
+                .op = SWS_OP_WRITE,
+                .type = U8,
+                .rw.elems = 1,
+                .rw.frac  = frac,
+            }, {0}
+        });
+    }
+}
+
+static void check_swap_bytes(void)
+{
+    CHECK_COMMON("swap_bytes_16", U16, U16, {
+        .op   = SWS_OP_SWAP_BYTES,
+        .type = U16,
+    });
+
+    CHECK_COMMON("swap_bytes_32", U32, U32, {
+        .op   = SWS_OP_SWAP_BYTES,
+        .type = U32,
+    });
+}
+
+static void check_pack_unpack(void)
+{
+    const struct {
+        SwsPixelType type;
+        SwsPackOp op;
+    } patterns[] = {
+        { U8, {{ 3,  3,  2 }}},
+        { U8, {{ 2,  3,  3 }}},
+        { U8, {{ 1,  2,  1 }}},
+        {U16, {{ 5,  6,  5 }}},
+        {U16, {{ 5,  5,  5 }}},
+        {U16, {{ 4,  4,  4 }}},
+        {U32, {{ 2, 10, 10, 10 }}},
+        {U32, {{10, 10, 10,  2 }}},
+    };
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
+        const SwsPixelType type = patterns[i].type;
+        const SwsPackOp pack = patterns[i].op;
+        const int num = pack.pattern[3] ? 4 : 3;
+        const char *pat = FMT("%d%d%d%d", pack.pattern[0], pack.pattern[1],
+                                          pack.pattern[2], pack.pattern[3]);
+
+        CHECK(FMT("pack_%s", pat), num, 1, type, type, {
+            .op   = SWS_OP_PACK,
+            .type = type,
+            .pack = pack,
+        });
+
+        CHECK(FMT("unpack_%s", pat), 1, num, type, type, {
+            .op   = SWS_OP_UNPACK,
+            .type = type,
+            .pack = pack,
+        });
+    }
+}
+
+static AVRational rndq(SwsPixelType t)
+{
+    const unsigned num = rnd();
+    if (ff_sws_pixel_type_is_int(t)) {
+        const unsigned mask = (1 << (ff_sws_pixel_type_size(t) * 8)) - 1;
+        return (AVRational) { num & mask, 1 };
+    } else {
+        const unsigned den = rnd();
+        return (AVRational) { num, den ? den : 1 };
+    }
+}
+
+static void check_clear(void)
+{
+    for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
+        const char *type = ff_sws_pixel_type_name(t);
+        const int bits = ff_sws_pixel_type_size(t) * 8;
+
+        /* TODO: AVRational can't fit 32 bit constants */
+        if (bits < 32) {
+            const AVRational chroma = (AVRational) { 1 << (bits - 1), 1};
+            const AVRational alpha  = (AVRational) { (1 << bits) - 1, 1};
+            const AVRational zero   = (AVRational) { 0, 1};
+            const AVRational none = {0};
+
+            const SwsConst patterns[] = {
+                /* Zero only */
+                {.q4 = {   none,   none,   none,   zero }},
+                {.q4 = {   zero,   none,   none,   none }},
+                /* Alpha only */
+                {.q4 = {   none,   none,   none,  alpha }},
+                {.q4 = {  alpha,   none,   none,   none }},
+                /* Chroma only */
+                {.q4 = { chroma, chroma,   none,   none }},
+                {.q4 = {   none, chroma, chroma,   none }},
+                {.q4 = {   none,   none, chroma, chroma }},
+                {.q4 = { chroma,   none, chroma,   none }},
+                {.q4 = {   none, chroma,   none, chroma }},
+                /* Alpha+chroma */
+                {.q4 = { chroma, chroma,   none,  alpha }},
+                {.q4 = {   none, chroma, chroma,  alpha }},
+                {.q4 = {  alpha,   none, chroma, chroma }},
+                {.q4 = { chroma,   none, chroma,  alpha }},
+                {.q4 = {  alpha, chroma,   none, chroma }},
+                /* Random values */
+                {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
+                {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
+                {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
+                {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
+            };
+
+            for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
+                CHECK(FMT("clear_pattern_%s[%d]", type, i), 4, 4, t, t, {
+                    .op   = SWS_OP_CLEAR,
+                    .type = t,
+                    .c    = patterns[i],
+                });
+            }
+        } else if (!ff_sws_pixel_type_is_int(t)) {
+            /* Floating point YUV doesn't exist, only alpha needs to be cleared */
+            CHECK(FMT("clear_alpha_%s", type), 4, 4, t, t, {
+                .op      = SWS_OP_CLEAR,
+                .type    = t,
+                .c.q4[3] = { 0, 1 },
+            });
+        }
+    }
+}
+
+static void check_shift(void)
+{
+    for (SwsPixelType t = U16; t < SWS_PIXEL_TYPE_NB; t++) {
+        const char *type = ff_sws_pixel_type_name(t);
+        if (!ff_sws_pixel_type_is_int(t))
+            continue;
+
+        for (int shift = 1; shift <= 8; shift++) {
+            CHECK_COMMON(FMT("lshift%d_%s", shift, type), t, t, {
+                .op   = SWS_OP_LSHIFT,
+                .type = t,
+                .c.u  = shift,
+            });
+
+            CHECK_COMMON(FMT("rshift%d_%s", shift, type), t, t, {
+                .op   = SWS_OP_RSHIFT,
+                .type = t,
+                .c.u  = shift,
+            });
+        }
+    }
+}
+
+static void check_swizzle(void)
+{
+    for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
+        const char *type = ff_sws_pixel_type_name(t);
+        static const int patterns[][4] = {
+            /* Pure swizzle */
+            {3, 0, 1, 2},
+            {3, 0, 2, 1},
+            {2, 1, 0, 3},
+            {3, 2, 1, 0},
+            {3, 1, 0, 2},
+            {3, 2, 0, 1},
+            {1, 2, 0, 3},
+            {1, 0, 2, 3},
+            {2, 0, 1, 3},
+            {2, 3, 1, 0},
+            {2, 1, 3, 0},
+            {1, 2, 3, 0},
+            {1, 3, 2, 0},
+            {0, 2, 1, 3},
+            {0, 2, 3, 1},
+            {0, 3, 1, 2},
+            {3, 1, 2, 0},
+            {0, 3, 2, 1},
+            /* Luma expansion */
+            {0, 0, 0, 3},
+            {3, 0, 0, 0},
+            {0, 0, 0, 1},
+            {1, 0, 0, 0},
+        };
+
+        for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
+            const int x = patterns[i][0], y = patterns[i][1],
+                      z = patterns[i][2], w = patterns[i][3];
+            CHECK(FMT("swizzle_%d%d%d%d_%s", x, y, z, w, type), 4, 4, t, t, {
+                .op = SWS_OP_SWIZZLE,
+                .type = t,
+                .swizzle = SWS_SWIZZLE(x, y, z, w),
+            });
+        }
+    }
+}
+
+static void check_convert(void)
+{
+    for (SwsPixelType i = U8; i < SWS_PIXEL_TYPE_NB; i++) {
+        const char *itype = ff_sws_pixel_type_name(i);
+        const int isize = ff_sws_pixel_type_size(i);
+        for (SwsPixelType o = U8; o < SWS_PIXEL_TYPE_NB; o++) {
+            const char *otype = ff_sws_pixel_type_name(o);
+            const int osize = ff_sws_pixel_type_size(o);
+            const char *name = FMT("convert_%s_%s", itype, otype);
+            if (i == o)
+                continue;
+
+            if (isize < osize || !ff_sws_pixel_type_is_int(o)) {
+                CHECK_COMMON(name, i, o, {
+                    .op = SWS_OP_CONVERT,
+                    .type = i,
+                    .convert.to = o,
+                });
+            } else if (isize > osize || !ff_sws_pixel_type_is_int(i)) {
+                uint32_t range = (1 << osize * 8) - 1;
+                CHECK_COMMON_RANGE(name, range, i, o, {
+                    .op = SWS_OP_CONVERT,
+                    .type = i,
+                    .convert.to = o,
+                });
+            }
+        }
+    }
+
+    /* Check expanding conversions */
+    CHECK_COMMON("expand16", U8, U16, {
+        .op = SWS_OP_CONVERT,
+        .type = U8,
+        .convert.to = U16,
+        .convert.expand = true,
+    });
+
+    CHECK_COMMON("expand32", U8, U32, {
+        .op = SWS_OP_CONVERT,
+        .type = U8,
+        .convert.to = U32,
+        .convert.expand = true,
+    });
+}
+
+static void check_dither(void)
+{
+    for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) {
+        const char *type = ff_sws_pixel_type_name(t);
+        if (ff_sws_pixel_type_is_int(t))
+            continue;
+
+        /* Test all sizes up to 16x16 */
+        for (int size_log2 = 0; size_log2 <= 4; size_log2++) {
+            const int size = 1 << size_log2;
+            AVRational *matrix = av_refstruct_allocz(size * size * sizeof(*matrix));
+            if (!matrix) {
+                fail();
+                return;
+            }
+
+            if (size == 1) {
+                matrix[0] = (AVRational) { 1, 2 };
+            } else {
+                for (int i = 0; i < size * size; i++)
+                    matrix[i] = rndq(t);
+            }
+
+            CHECK_COMMON(FMT("dither_%dx%d_%s", size, size, type), t, t, {
+                .op = SWS_OP_DITHER,
+                .type = t,
+                .dither.size_log2 = size_log2,
+                .dither.matrix = matrix,
+            });
+
+            av_refstruct_unref(&matrix);
+        }
+    }
+}
+
+static void check_min_max(void)
+{
+    for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
+        const char *type = ff_sws_pixel_type_name(t);
+        CHECK_COMMON(FMT("min_%s", type), t, t, {
+            .op = SWS_OP_MIN,
+            .type = t,
+            .c.q4 = { rndq(t), rndq(t), rndq(t), rndq(t) },
+        });
+
+        CHECK_COMMON(FMT("max_%s", type), t, t, {
+            .op = SWS_OP_MAX,
+            .type = t,
+            .c.q4 = { rndq(t), rndq(t), rndq(t), rndq(t) },
+        });
+    }
+}
+
+static void check_linear(void)
+{
+    static const struct {
+        const char *name;
+        uint32_t mask;
+    } patterns[] = {
+        { "noop",               0 },
+        { "luma",               SWS_MASK_LUMA },
+        { "alpha",              SWS_MASK_ALPHA },
+        { "luma+alpha",         SWS_MASK_LUMA | SWS_MASK_ALPHA },
+        { "dot3",               0b111 },
+        { "dot4",               0b1111 },
+        { "row0",               SWS_MASK_ROW(0) },
+        { "row0+alpha",         SWS_MASK_ROW(0) | SWS_MASK_ALPHA },
+        { "off3",               SWS_MASK_OFF3 },
+        { "off3+alpha",         SWS_MASK_OFF3 | SWS_MASK_ALPHA },
+        { "diag3",              SWS_MASK_DIAG3 },
+        { "diag4",              SWS_MASK_DIAG4 },
+        { "diag3+alpha",        SWS_MASK_DIAG3 | SWS_MASK_ALPHA },
+        { "diag3+off3",         SWS_MASK_DIAG3 | SWS_MASK_OFF3 },
+        { "diag3+off3+alpha",   SWS_MASK_DIAG3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA },
+        { "diag4+off4",         SWS_MASK_DIAG4 | SWS_MASK_OFF4 },
+        { "matrix3",            SWS_MASK_MAT3 },
+        { "matrix3+off3",       SWS_MASK_MAT3 | SWS_MASK_OFF3 },
+        { "matrix3+off3+alpha", SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA },
+        { "matrix4",            SWS_MASK_MAT4 },
+        { "matrix4+off4",       SWS_MASK_MAT4 | SWS_MASK_OFF4 },
+    };
+
+    for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) {
+        const char *type = ff_sws_pixel_type_name(t);
+        if (ff_sws_pixel_type_is_int(t))
+            continue;
+
+        for (int p = 0; p < FF_ARRAY_ELEMS(patterns); p++) {
+            const uint32_t mask = patterns[p].mask;
+            SwsLinearOp lin = { .mask = mask };
+
+            for (int i = 0; i < 4; i++) {
+                for (int j = 0; j < 5; j++) {
+                    if (mask & SWS_MASK(i, j)) {
+                        lin.m[i][j] = rndq(t);
+                    } else {
+                        lin.m[i][j] = (AVRational) { i == j, 1 };
+                    }
+                }
+            }
+
+            CHECK(FMT("linear_%s_%s", patterns[p].name, type), 4, 4, t, t, {
+                .op = SWS_OP_LINEAR,
+                .type = t,
+                .lin = lin,
+            });
+        }
+    }
+}
+
+static void check_scale(void)
+{
+    for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) {
+        const char *type = ff_sws_pixel_type_name(t);
+        const int bits = ff_sws_pixel_type_size(t) * 8;
+        if (ff_sws_pixel_type_is_int(t)) {
+            /* Ensure the result won't exceed the value range */
+            const unsigned max = (1 << bits) - 1;
+            const unsigned scale = rnd() & max;
+            const unsigned range = max / (scale ? scale : 1);
+            CHECK_COMMON_RANGE(FMT("scale_%s", type), range, t, t, {
+                .op   = SWS_OP_SCALE,
+                .type = t,
+                .c.q  = { scale, 1 },
+            });
+        } else {
+            CHECK_COMMON(FMT("scale_%s", type), t, t, {
+                .op   = SWS_OP_SCALE,
+                .type = t,
+                .c.q  = rndq(t),
+            });
+        }
+    }
+}
+
+void checkasm_check_sw_ops(void)
+{
+    check_read_write();
+    report("read_write");
+    check_swap_bytes();
+    report("swap_bytes");
+    check_pack_unpack();
+    report("pack_unpack");
+    check_clear();
+    report("clear");
+    check_shift();
+    report("shift");
+    check_swizzle();
+    report("swizzle");
+    check_convert();
+    report("convert");
+    check_dither();
+    report("dither");
+    check_min_max();
+    report("min_max");
+    check_linear();
+    report("linear");
+    check_scale();
+    report("scale");
+}
-- 
2.49.0