[FFmpeg-devel] [PATCH 3/3] swscale/rgb2rgb: remove R-V V shuffle_bytes_3012

Rémi Denis-Courmont remi at remlab.net
Fri Sep 29 20:16:41 EEST 2023


This is slower than the Zbb version on real hardware due to register
strides. Proper support for vector byte-swap requires the Zvbb
extension, but it's much too early for me to worry about it.
---
 libswscale/riscv/rgb2rgb.c     |  2 --
 libswscale/riscv/rgb2rgb_rvv.S | 25 -------------------------
 2 files changed, 27 deletions(-)

diff --git a/libswscale/riscv/rgb2rgb.c b/libswscale/riscv/rgb2rgb.c
index 21baaa21c5..162a4082b0 100644
--- a/libswscale/riscv/rgb2rgb.c
+++ b/libswscale/riscv/rgb2rgb.c
@@ -30,7 +30,6 @@ void ff_shuffle_bytes_2103_rvv(const uint8_t *src, uint8_t *dst, int src_len);
 void ff_shuffle_bytes_1230_rvv(const uint8_t *src, uint8_t *dst, int src_len);
 void ff_shuffle_bytes_3012_rvv(const uint8_t *src, uint8_t *dst, int src_len);
 void ff_shuffle_bytes_3210_rvb(const uint8_t *src, uint8_t *dst, int src_len);
-void ff_shuffle_bytes_3210_rvv(const uint8_t *src, uint8_t *dst, int src_len);
 void ff_interleave_bytes_rvv(const uint8_t *src1, const uint8_t *src2,
                              uint8_t *dst, int width, int height, int s1stride,
                              int s2stride, int dstride);
@@ -55,7 +54,6 @@ av_cold void rgb2rgb_init_riscv(void)
         shuffle_bytes_2103 = ff_shuffle_bytes_2103_rvv;
         shuffle_bytes_1230 = ff_shuffle_bytes_1230_rvv;
         shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv;
-        shuffle_bytes_3210 = ff_shuffle_bytes_3210_rvv;
         interleaveBytes = ff_interleave_bytes_rvv;
 #if (__riscv_xlen == 64)
         uyvytoyuv422 = ff_uyvytoyuv422_rvv;
diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S
index 0ac3afff7c..008f098bfe 100644
--- a/libswscale/riscv/rgb2rgb_rvv.S
+++ b/libswscale/riscv/rgb2rgb_rvv.S
@@ -74,31 +74,6 @@ func ff_shuffle_bytes_3012_rvv, zve32x
         ret
 endfunc
 
-func ff_shuffle_bytes_3210_rvv, zve32x
-        addi    t1, a0, 2
-        addi    t2, a0, 1
-        addi    t3, a0, 0
-        addi    a0, a0, 3
-        srai    a2, a2, 2
-        li      t4, 4
-1:
-        vsetvli    t0, a2, e8, m1, ta, ma
-        sub        a2, a2, t0
-        vlse8.v    v8, (a0), t4
-        sh2add     a0, t0, a0
-        vlse8.v    v9, (t1), t4
-        sh2add     t1, t0, t1
-        vlse8.v    v10, (t2), t4
-        sh2add     t2, t0, t2
-        vlse8.v    v11, (t3), t4
-        sh2add     t3, t0, t3
-        vsseg4e8.v v8, (a1)
-        sh2add     a1, t0, a1
-        bnez       a2, 1b
-
-        ret
-endfunc
-
 func ff_interleave_bytes_rvv, zve32x
 1:
         mv      t0, a0
-- 
2.40.1



More information about the ffmpeg-devel mailing list