[FFmpeg-devel] [PATCH 2/2] lavc/aacpsdsp: unroll RISC-V V add_squares

Rémi Denis-Courmont remi at remlab.net
Sat Jul 15 23:57:05 EEST 2023


This does not make much difference with the Device Under Test, but since
we can:

Before:
ps_add_squares_rvv_f32: 11973.7

After:
ps_add_squares_rvv_f32: 11958.2
---
 libavcodec/riscv/aacpsdsp_rvv.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index b7ea314fc3..bd8f905334 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -23,15 +23,15 @@
 func ff_ps_add_squares_rvv, zve32f
         li          t1, 32
 1:
-        vsetvli     t0, a2, e32, m1, ta, ma
+        vsetvli     t0, a2, e32, m4, ta, ma
         vle64.v     v8, (a1)
         vnsrl.wx    v24, v8, zero
         vle32.v     v16, (a0)
         sub         a2, a2, t0
-        vnsrl.wx    v25, v8, t1
+        vnsrl.wx    v28, v8, t1
         vfmacc.vv   v16, v24, v24
         sh3add      a1, t0, a1
-        vfmacc.vv   v16, v25, v25
+        vfmacc.vv   v16, v28, v28
         vse32.v     v16, (a0)
         sh2add      a0, t0, a0
         bnez        a2, 1b
-- 
2.40.1



More information about the ffmpeg-devel mailing list