[FFmpeg-devel] [PATCH 5/5] lavc/vp8dsp: factor R-V V EPEL functions for all lengths
Rémi Denis-Courmont
remi at remlab.net
Sat May 25 18:38:40 EEST 2024
---
libavcodec/riscv/vp8dsp_rvv.S | 56 ++++++++++++++++++++---------------
1 file changed, 32 insertions(+), 24 deletions(-)
diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index a4fcd158a5..002e7f3174 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -32,16 +32,6 @@
.endif
.endm
-.macro vsetvlstatic16 len
-.if \len <= 4
- vsetivli zero, \len, e16, mf2, ta, ma
-.elseif \len <= 8
- vsetivli zero, \len, e16, m1, ta, ma
-.elseif \len <= 16
- vsetivli zero, \len, e16, m2, ta, ma
-.endif
-.endm
-
.macro vp8_idct_dc_add
vlse32.v v0, (a0), a2
lh a5, 0(a1)
@@ -181,13 +171,8 @@ const subpel_filters
.byte 0, -1, 12, 123, -6, 0
endconst
-.macro epel len size type
-func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
-.ifc \type,v
- addi t0, a6, -1
-.else
- addi t0, a5, -1
-.endif
+.macro epel_common size, type
+func ff_put_vp8_epel_\type\()\size\().rvv, zve32x
lla t2, subpel_filters
sh1add t0, t0, t0
sh1add t0, t0, t2
@@ -198,7 +183,6 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
lb t5, 5(t0)
lb t0, (t0)
.endif
- vsetvlstatic8 \len
1:
addi a4, a4, -1
.ifc \type,v
@@ -236,11 +220,11 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
vwmaccsu.vx v16, t1, v22
vwmaccsu.vx v16, t4, v28
vwadd.wx v16, v16, t6
- vsetvlstatic16 \len
+ vsetvl zero, zero, a6 # e16
vwadd.vv v24, v16, v20
vnsra.wi v24, v24, 7
vmax.vx v24, v24, zero
- vsetvlstatic8 \len
+ vsetvl zero, zero, a5 # e8
vnclipu.wi v30, v24, 0
add a2, a2, a3
vse8.v v30, (a0)
@@ -251,9 +235,33 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
endfunc
.endm
+.macro epel len, size, type
+func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
+.ifc \type,v
+ addi t0, a6, -1
+.else
+ addi t0, a5, -1
+.endif
+.if \len <= 4
+ li a5, 0306 # e8, mf4, ta, ma
+ li a6, 0317 # e16, mf2, ta, ma
+.elseif \len <= 8
+ li a5, 0307 # e8, mf2, ta, ma
+ li a6, 0310 # e16, m1, ta, ma
+.else # if len <= 16
+ li a5, 0300 # e8, m1, ta, ma
+ li a6, 0311 # e16, m2, ta, ma
+.endif
+ vsetvlstatic8 \len
+ j ff_put_vp8_epel_\type\()\size\().rvv
+endfunc
+.endm
+
+.irp type,h,v
+.irp size,4,6
+epel_common \size, \type
.irp len,16,8,4
-epel \len 6 h
-epel \len 4 h
-epel \len 6 v
-epel \len 4 v
+epel \len, \size, \type
+.endr
+.endr
.endr
--
2.45.1
More information about the ffmpeg-devel
mailing list