[FFmpeg-devel] [PATCH 28/29] lavc/aacpsdsp: RISC-V V hybrid_analysis_ileave

remi at remlab.net remi at remlab.net
Thu Sep 22 21:37:25 EEST 2022


From: Rémi Denis-Courmont <remi at remlab.net>

---
 libavcodec/riscv/aacpsdsp_init.c | 14 ++++++++----
 libavcodec/riscv/aacpsdsp_rvv.S  | 37 ++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/libavcodec/riscv/aacpsdsp_init.c b/libavcodec/riscv/aacpsdsp_init.c
index 6222d6f787..76f55502ee 100644
--- a/libavcodec/riscv/aacpsdsp_init.c
+++ b/libavcodec/riscv/aacpsdsp_init.c
@@ -29,16 +29,22 @@ void ff_ps_mul_pair_single_rvv(float (*dst)[2], float (*src0)[2], float *src1,
                                int n);
 void ff_ps_hybrid_analysis_rvv(float (*out)[2], float (*in)[2],
                                const float (*filter)[8][2], ptrdiff_t, int n);
+void ff_ps_hybrid_analysis_ileave_rvv(float (*out)[32][2], float L[2][38][64],
+                                      int i, int len);
 
 av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
 {
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if (flags & AV_CPU_FLAG_RV_ZVE32F) {
-        c->add_squares = ff_ps_add_squares_rvv;
-        c->mul_pair_single = ff_ps_mul_pair_single_rvv;
-        c->hybrid_analysis = ff_ps_hybrid_analysis_rvv;
+    if (flags & AV_CPU_FLAG_RV_ZVE32X) {
+        c->hybrid_analysis_ileave = ff_ps_hybrid_analysis_ileave_rvv;
+
+        if (flags & AV_CPU_FLAG_RV_ZVE32F) {
+            c->add_squares = ff_ps_add_squares_rvv;
+            c->mul_pair_single = ff_ps_mul_pair_single_rvv;
+            c->hybrid_analysis = ff_ps_hybrid_analysis_rvv;
+        }
     }
 #endif
 }
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index 993462de29..9c7bda1098 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -153,3 +153,40 @@ func ff_ps_hybrid_analysis_rvv, zve32f
         .purgem input
         .purgem filter
 endfunc
+
+func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no needs for zve32f here */
+        slli        t0, a2, 5 + 1 + 2 // ctz(32 * 2 * 4)
+        slli        t1, a2, 2
+        add         a0, a0, t0
+        add         a1, a1, t1
+        addi        a2, a2, -64
+        li          t1, 38 * 64 * 4
+        li          t6, 64 * 4 // (uint8_t *)L[x][j+1][i] - L[x][j][i]
+        add         a4, a1, t1 // &L[1]
+        beqz        a2, 3f
+1:
+        mv          t0, a0
+        mv          t1, a1
+        mv          t3, a3
+        mv          t4, a4
+        addi        a2, a2, 1
+2:
+        vsetvli     t5, t3, e32, m1, ta, ma
+        vlse32.v    v16, (t1), t6
+        sub         t3, t3, t5
+        vlse32.v    v17, (t4), t6
+        mul         t2, t5, t6
+        vsseg2e32.v v16, (t0)
+        add         t1, t1, t2
+        add         t4, t4, t2
+        slli        t2, t5, 1 + 2
+        add         t0, t0, t2
+        bnez        t3, 2b
+
+        add         a0, a0, 32 * 2 * 4
+        add         a1, a1, 4
+        add         a4, a4, 4
+        bnez        a2, 1b
+3:
+        ret
+endfunc
-- 
2.37.2



More information about the ffmpeg-devel mailing list