[FFmpeg-devel] [PATCH 2/2] lavc/utvideodsp: R-V V restore_rgb_planes10
Rémi Denis-Courmont
remi at remlab.net
Sat Oct 28 15:36:32 EEST 2023
restore_rgb_planes10_c: 185852.2
restore_rgb_planes10_rvv_i32: 90130.5
---
libavcodec/riscv/utvideodsp_init.c | 9 +++++++-
libavcodec/riscv/utvideodsp_rvv.S | 35 ++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/libavcodec/riscv/utvideodsp_init.c b/libavcodec/riscv/utvideodsp_init.c
index dfaa16692a..f5038c4736 100644
--- a/libavcodec/riscv/utvideodsp_init.c
+++ b/libavcodec/riscv/utvideodsp_init.c
@@ -26,13 +26,20 @@
void ff_restore_rgb_planes_rvv(uint8_t *r, uint8_t *g, uint8_t *b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height);
+void ff_restore_rgb_planes10_rvv(uint16_t *r, uint16_t *g, uint16_t *b,
+ ptrdiff_t linesize_r, ptrdiff_t linesize_g,
+ ptrdiff_t linesize_b, int width, int height);
av_cold void ff_utvideodsp_init_riscv(UTVideoDSPContext *c)
{
#if HAVE_RVV
int flags = av_get_cpu_flags();
- if (flags & AV_CPU_FLAG_RVV_I32)
+ if (flags & AV_CPU_FLAG_RVV_I32) {
c->restore_rgb_planes = ff_restore_rgb_planes_rvv;
+
+ if (flags & AV_CPU_FLAG_RVB_ADDR)
+ c->restore_rgb_planes10 = ff_restore_rgb_planes10_rvv;
+ }
#endif
}
diff --git a/libavcodec/riscv/utvideodsp_rvv.S b/libavcodec/riscv/utvideodsp_rvv.S
index 673e3442ce..fa70d0eb34 100644
--- a/libavcodec/riscv/utvideodsp_rvv.S
+++ b/libavcodec/riscv/utvideodsp_rvv.S
@@ -51,3 +51,38 @@ func ff_restore_rgb_planes_rvv, zve32x
ret
endfunc
+
+func ff_restore_rgb_planes10_rvv, zve32x
+ li t1, -0x200
+ li t2, 0x3FF
+ sub a3, a3, a6
+ sub a4, a4, a6
+ sub a5, a5, a6
+1:
+ mv t6, a6
+ addi a7, a7, -1
+2:
+ vsetvli t0, t6, e16, m8, ta, ma
+ vle16.v v16, (a1)
+ sub t6, t6, t0
+ vle16.v v8, (a0)
+ vadd.vx v16, v16, t1
+ sh1add a1, t0, a1
+ vle16.v v24, (a2)
+ vadd.vv v8, v8, v16
+ vadd.vv v24, v24, v16
+ vand.vx v8, v8, t2
+ vand.vx v24, v24, t2
+ vse16.v v8, (a0)
+ sh1add a0, t0, a0
+ vse16.v v24, (a2)
+ sh1add a2, t0, a2
+ bnez t6, 2b
+
+ sh1add a0, a3, a0
+ sh1add a1, a4, a1
+ sh1add a2, a5, a2
+ bnez a7, 1b
+
+ ret
+endfunc
--
2.42.0
More information about the ffmpeg-devel
mailing list