[FFmpeg-devel] [PATCH 5/5] lavc/vp8dsp: factor R-V V EPEL functions for all lengths

Rémi Denis-Courmont remi at remlab.net
Sat May 25 18:38:40 EEST 2024


---
 libavcodec/riscv/vp8dsp_rvv.S | 56 ++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 24 deletions(-)

diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index a4fcd158a5..002e7f3174 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -32,16 +32,6 @@
 .endif
 .endm
 
-.macro vsetvlstatic16 len
-.if \len <= 4
-        vsetivli        zero, \len, e16, mf2, ta, ma
-.elseif \len <= 8
-        vsetivli        zero, \len, e16, m1, ta, ma
-.elseif \len <= 16
-        vsetivli        zero, \len, e16, m2, ta, ma
-.endif
-.endm
-
 .macro vp8_idct_dc_add
         vlse32.v      v0, (a0), a2
         lh            a5, 0(a1)
@@ -181,13 +171,8 @@ const subpel_filters
         .byte 0,  -1,  12, 123,  -6, 0
 endconst
 
-.macro epel len size type
-func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
-.ifc \type,v
-        addi            t0, a6, -1
-.else
-        addi            t0, a5, -1
-.endif
+.macro epel_common size, type
+func ff_put_vp8_epel_\type\()\size\().rvv, zve32x
         lla             t2, subpel_filters
         sh1add          t0, t0, t0
         sh1add          t0, t0, t2
@@ -198,7 +183,6 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
         lb              t5, 5(t0)
         lb              t0, (t0)
 .endif
-        vsetvlstatic8   \len
 1:
         addi            a4, a4, -1
 .ifc \type,v
@@ -236,11 +220,11 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
         vwmaccsu.vx     v16, t1, v22
         vwmaccsu.vx     v16, t4, v28
         vwadd.wx        v16, v16, t6
-        vsetvlstatic16  \len
+        vsetvl          zero, zero, a6 # e16
         vwadd.vv        v24, v16, v20
         vnsra.wi        v24, v24, 7
         vmax.vx         v24, v24, zero
-        vsetvlstatic8   \len
+        vsetvl          zero, zero, a5 # e8
         vnclipu.wi      v30, v24, 0
         add             a2, a2, a3
         vse8.v          v30, (a0)
@@ -251,9 +235,33 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
 endfunc
 .endm
 
+.macro epel len, size, type
+func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
+.ifc \type,v
+        addi    t0, a6, -1
+.else
+        addi    t0, a5, -1
+.endif
+.if \len <= 4
+        li      a5, 0306 # e8, mf4, ta, ma
+        li      a6, 0317 # e16, mf2, ta, ma
+.elseif \len <= 8
+        li      a5, 0307 # e8, mf2, ta, ma
+        li      a6, 0310 # e16, m1, ta, ma
+.else # if len <= 16
+        li      a5, 0300 # e8, m1, ta, ma
+        li      a6, 0311 # e16, m2, ta, ma
+.endif
+        vsetvlstatic8 \len
+        j       ff_put_vp8_epel_\type\()\size\().rvv
+endfunc
+.endm
+
+.irp type,h,v
+.irp size,4,6
+epel_common \size, \type
 .irp len,16,8,4
-epel \len 6 h
-epel \len 4 h
-epel \len 6 v
-epel \len 4 v
+epel \len, \size, \type
+.endr
+.endr
 .endr
-- 
2.45.1



More information about the ffmpeg-devel mailing list