[FFmpeg-devel] [PATCH] lavc/opusdsp: R-V V deemphasis function

Rémi Denis-Courmont remi at remlab.net
Sat Nov 11 20:18:21 EET 2023


Considering the marginality of the measured performance gains (3-4%),
I suppose that we should not merge this. Furthermore those measurements
are not expected to improve with large vector sizes, since the code
uses only 32 bits per vector no matter what.

deemphasis_c: 7703.2
deemphasis_rvv_f32: 7452.0
---
 libavcodec/riscv/opusdsp_init.c | 10 +++++---
 libavcodec/riscv/opusdsp_rvv.S  | 43 +++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/libavcodec/riscv/opusdsp_init.c b/libavcodec/riscv/opusdsp_init.c
index 88d8e77f0e..8d363aaf37 100644
--- a/libavcodec/riscv/opusdsp_init.c
+++ b/libavcodec/riscv/opusdsp_init.c
@@ -26,14 +26,18 @@
 #include "libavcodec/opusdsp.h"
 
 void ff_opus_postfilter_rvv(float *data, int period, float *g, int len);
+float ff_opus_deemphasis_rvv(float *y, float *x, float coeff, int len);
 
 av_cold void ff_opus_dsp_init_riscv(OpusDSP *d)
 {
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if ((flags & AV_CPU_FLAG_RVV_F32) && (flags & AV_CPU_FLAG_RVB_ADDR) &&
-        (flags & AV_CPU_FLAG_RVB_BASIC))
-        d->postfilter = ff_opus_postfilter_rvv;
+    if (flags & AV_CPU_FLAG_RVV_F32) {
+        if ((flags & AV_CPU_FLAG_RVB_ADDR) && (flags & AV_CPU_FLAG_RVB_BASIC))
+            d->postfilter = ff_opus_postfilter_rvv;
+        if (ff_get_rv_vlenb() >= 8)
+            d->deemphasis = ff_opus_deemphasis_rvv;
+    }
 #endif
 }
diff --git a/libavcodec/riscv/opusdsp_rvv.S b/libavcodec/riscv/opusdsp_rvv.S
index 79ae86c30e..839edfa4b0 100644
--- a/libavcodec/riscv/opusdsp_rvv.S
+++ b/libavcodec/riscv/opusdsp_rvv.S
@@ -64,3 +64,46 @@ func ff_opus_postfilter_rvv, zve32f
 
         ret
 endfunc
+
+// FIXME: Zvl64b
+func ff_opus_deemphasis_rvv, zve32f
+        li       t0, 0x3f599a00 // 0.85f
+        li       t1, 8
+NOHWF   fmv.w.x  fa0, a2
+NOHWF   mv       a2, a3
+        vsetivli zero, 1, e32, mf2, ta, ma
+        vmv.s.x  v8, t0
+        fmv.w.x  ft0, t0
+        blt      a2, t1, 2f
+1:
+        vlseg8e32.v v0, (a1)
+        addi    a2, a2, -8
+        vfmacc.vf v0, fa0, v8
+        addi    a1, a1, 8 * 4
+        vfmacc.vf v1, ft0, v0
+        vfmacc.vf v2, ft0, v1
+        vfmacc.vf v3, ft0, v2
+        vfmacc.vf v4, ft0, v3
+        vfmacc.vf v5, ft0, v4
+        vfmacc.vf v6, ft0, v5
+        vfmacc.vf v7, ft0, v6
+        vfmv.f.s fa0, v7
+        vsseg8e32.v v0, (a0)
+        addi    a0, a0, 8 * 4
+        bge     a2, t1, 1b
+2:
+        beqz    a2, 4f
+3:
+        flw     fa1, (a1)
+        addi    a2, a2, -1
+        fmadd.s fa0, ft0, fa0, fa1
+        addi    a1, a1, 4
+        fsw     fa0, (a0)
+        addi    a0, a0, 4
+        bnez    a2, 3b
+4:
+        ret
+
+NOHWF   fmv.x.w   a0, fa0
+        ret
+endfunc
-- 
2.42.0



More information about the ffmpeg-devel mailing list