[FFmpeg-devel] [PATCH 5/6] lavc/ac3dsp: RISC-V V ac3_compute_mantissa_size
Peiting Shen
shenpeiting at eswincomputing.com
Thu Jun 15 13:36:44 EEST 2023
From: Shen Peiting <shenpeiting at eswincomputing.com>
Use RVV instruction vlseg<nf>e<eew> to operate on matrix columns.
Benchmarks on Spike(cycles):
ac3_compute_mantissa_size_c: 2338
ac3_compute_mantissa_size_rvv: 55
Co-Authored by: Yang Xiaojun <yangxiaojun at eswincomputing.com>
Co-Authored by: Huang Xing <huangxing1 at eswincomputing.com>
Co-Authored by: Zeng Fanchen <zengfanchen at eswincomputing.com>
Signed-off-by: Shen Peiting <shenpeiting at eswincomputing.com>
---
libavcodec/riscv/ac3dsp_init.c | 3 ++
libavcodec/riscv/ac3dsp_rvv.S | 53 ++++++++++++++++++++++++++++++++++
2 files changed, 56 insertions(+)
diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
index d3aa20623a..4769213ebc 100644
--- a/libavcodec/riscv/ac3dsp_init.c
+++ b/libavcodec/riscv/ac3dsp_init.c
@@ -35,6 +35,8 @@ void ff_ac3_sum_square_butterfly_float_rvv(float sum[4],
const float *coef1,
int len);
+void ff_ac3_compute_mantissa_size_rvv(uint16_t mant_cnt[6][16]);
+
av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
{
int flags = av_get_cpu_flags();
@@ -42,6 +44,7 @@ av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
if (flags & AV_CPU_FLAG_RVV_I32) {
c->ac3_exponent_min = ff_ac3_exponent_min_rvv;
c->float_to_fixed24 = ff_float_to_fixed24_rvv;
+ c->compute_mantissa_size = ff_ac3_compute_mantissa_size_rvv;
}
if (flags & AV_CPU_FLAG_RVV_F32)
c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_rvv;
diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
index 05a4d44938..cedd3d7d05 100644
--- a/libavcodec/riscv/ac3dsp_rvv.S
+++ b/libavcodec/riscv/ac3dsp_rvv.S
@@ -170,3 +170,56 @@ func ff_ac3_sum_square_butterfly_float_rvv, zve32f
addi a0, a0, 4
ret
endfunc
+
+
+func ff_ac3_compute_mantissa_size_rvv, zve32x
+ li t1, 32
+ li t2, 3
+ vsetivli t0, 6, e16
+ vlsseg5e16.v v0, (a0), t1
+ #(clolum[[i]1]/3)
+ vdivu.vx v1, v1, t2
+ li t3, 5
+ vwmul.vx v22, v1, t3
+ #(clolum[[i]2]/3)
+ vdivu.vx v2, v2, t2
+ vwmacc.vx v22, t2, v3
+ vsra.vi v4, v4, 1
+ vadd.vv v4, v4, v2
+ li t2, 7
+ vwmacc.vx v22, t2, v4
+
+ addi a0, a0, 10
+ vlsseg8e16.v v5, (a0), t1
+ li t3, 4
+ vwmacc.vx v22, t3, v5
+ li t3, 5
+ vwmacc.vx v22, t3, v6
+ li t3, 6
+ vwmacc.vx v22, t3, v7
+ li t3, 7
+ vwmacc.vx v22, t3, v8
+ li t3, 8
+ vwmacc.vx v22, t3, v9
+ li t3, 9
+ vwmacc.vx v22, t3, v10
+ li t3, 10
+ vwmacc.vx v22, t3, v11
+ li t3, 11
+ vwmacc.vx v22, t3, v12
+
+ addi a0, a0, 16
+ vlsseg3e16.v v5, (a0), t1
+ li t3, 12
+ vwmacc.vx v22, t3, v5
+ li t3, 14
+ vwmacc.vx v22, t3, v6
+ li t3, 16
+ vwmacc.vx v22, t3, v7
+
+ vsetivli t0, 6, e32, m2
+ vmv.s.x v30, x0
+ vredsum.vs v30, v22, v30
+ vmv.x.s a0, v30
+ ret
+endfunc
--
2.17.1
More information about the ffmpeg-devel
mailing list