[FFmpeg-devel] [PATCH 31/31] lavc/aacpsdsp: RISC-V V stereo_interpolate[0]
remi at remlab.net
remi at remlab.net
Sun Sep 25 17:26:19 EEST 2022
From: Rémi Denis-Courmont <remi at remlab.net>
---
libavcodec/riscv/aacpsdsp_init.c | 4 ++
libavcodec/riscv/aacpsdsp_rvv.S | 65 ++++++++++++++++++++++++++++++++
2 files changed, 69 insertions(+)
diff --git a/libavcodec/riscv/aacpsdsp_init.c b/libavcodec/riscv/aacpsdsp_init.c
index 20b1a12741..58a4c61121 100644
--- a/libavcodec/riscv/aacpsdsp_init.c
+++ b/libavcodec/riscv/aacpsdsp_init.c
@@ -34,6 +34,9 @@ void ff_ps_hybrid_analysis_ileave_rvv(float (*out)[32][2], float L[2][38][64],
void ff_ps_hybrid_synthesis_deint_rvv(float out[2][38][64], float (*in)[32][2],
int i, int len);
+void ff_ps_stereo_interpolate_rvv(float (*l)[2], float (*r)[2],
+ float h[2][4], float h_step[2][4], int len);
+
av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
{
#if HAVE_RVV
@@ -47,6 +50,7 @@ av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
c->add_squares = ff_ps_add_squares_rvv;
c->mul_pair_single = ff_ps_mul_pair_single_rvv;
c->hybrid_analysis = ff_ps_hybrid_analysis_rvv;
+ c->stereo_interpolate[0] = ff_ps_stereo_interpolate_rvv;
}
}
#endif
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index 0cbe4c1d3c..a236dfe43c 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -219,3 +219,68 @@ func ff_ps_hybrid_synthesis_deint_rvv, zve32x
3:
ret
endfunc
+
+func ff_ps_stereo_interpolate_rvv, zve32f
+ vsetvli t0, zero, e32, m1, ta, ma
+ vid.v v24
+ flw ft0, (a2)
+ vadd.vi v24, v24, 1 // v24[i] = i + 1
+ flw ft1, 4(a2)
+ vfcvt.f.xu.v v24, v24
+ flw ft2, 8(a2)
+ vfmv.v.f v16, ft0
+ flw ft3, 12(a2)
+ vfmv.v.f v17, ft1
+ flw ft0, (a3)
+ vfmv.v.f v18, ft2
+ flw ft1, 4(a3)
+ vfmv.v.f v19, ft3
+ flw ft2, 8(a3)
+ vfmv.v.f v20, ft0
+ flw ft3, 12(a3)
+ vfmv.v.f v21, ft1
+ fcvt.s.wu ft4, t0 // (float)(vlenb / sizeof (float))
+ vfmv.v.f v22, ft2
+ li t1, 8
+ vfmv.v.f v23, ft3
+ addi a6, a0, 4 // l[*][1]
+ vfmacc.vv v16, v24, v20 // h0 += (i + 1) * h0_step
+ addi a7, a1, 4 // r[*][1]
+ vfmacc.vv v17, v24, v21
+ fmul.s ft0, ft0, ft4
+ vfmacc.vv v18, v24, v22
+ fmul.s ft1, ft1, ft4
+ vfmacc.vv v19, v24, v23
+ fmul.s ft2, ft2, ft4
+ fmul.s ft3, ft3, ft4
+1:
+ vsetvli t0, a4, e32, m1, ta, ma
+ vlse32.v v8, (a0), t1 // l_re
+ sub a4, a4, t0
+ vlse32.v v9, (a6), t1 // l_im
+ vlse32.v v10, (a1), t1 // r_re
+ vlse32.v v11, (a7), t1 // r_im
+ vfmul.vv v12, v8, v16
+ vfmul.vv v13, v9, v16
+ vfmul.vv v14, v8, v17
+ vfmul.vv v15, v9, v17
+ vfmacc.vv v12, v10, v18
+ vfmacc.vv v13, v11, v18
+ vfmacc.vv v14, v10, v19
+ vfmacc.vv v15, v11, v19
+ vsse32.v v12, (a0), t1
+ sh3add a0, t0, a0
+ vsse32.v v13, (a6), t1
+ sh3add a6, t0, a6
+ vsse32.v v14, (a1), t1
+ sh3add a1, t0, a1
+ vsse32.v v15, (a7), t1
+ sh3add a7, t0, a7
+ vfadd.vf v16, v16, ft0 // h0 += (vlenb / sizeof (float)) * h0_step
+ vfadd.vf v17, v17, ft1
+ vfadd.vf v18, v18, ft2
+ vfadd.vf v19, v19, ft3
+ bnez a4, 1b
+
+ ret
+endfunc
--
2.37.2
More information about the ffmpeg-devel
mailing list