[FFmpeg-devel] [PATCH 1/3] lavc/vp8dsp: R-V V put_bilin_h

Rémi Denis-Courmont remi at remlab.net
Fri Feb 23 19:17:57 EET 2024


Hi,

+
+.macro bilin_h_load dst len
+.ifc \len,4
+        vsetivli        zero, 5, e8, mf2, ta, ma

Don't use fractional multipliers if you don't mix element widths.

+.elseif \len == 8
+        vsetivli        zero, 9, e8, m1, ta, ma
+.else
+        vsetivli        zero, 17, e8, m2, ta, ma
+.endif
+
+        vle8.v          \dst, (a2)
+        vslide1down.vx  v2, \dst, t5
+

+.ifc \len,4
+        vsetivli        zero, 4, e8, mf4, ta, ma

Same as above.

+.elseif \len == 8
+        vsetivli        zero, 8, e8, mf2, ta, ma

Also.

+.else
+        vsetivli        zero, 16, e8, m1, ta, ma
+.endif

+        vwmulu.vx       v28, \dst, t1
+        vwmaccu.vx      v28, a5, v2
+        vwaddu.wx       v24, v28, t4
+        vnsra.wi        \dst, v24, 3
+.endm
+
+.macro put_vp8_bilin_h len
+        li              t1, 8
+        li              t4, 4
+        li              t5, 1
+        sub             t1, t1, a5
+1:
+        addi            a4, a4, -1
+        bilin_h_load    v0, \len
+        vse8.v          v0, (a0)
+        add             a2, a2, a3
+        add             a0, a0, a1
+        bnez            a4, 1b
+
+        ret
+.endm
+
+func ff_put_vp8_bilin16_h_rvv, zve32x
+        put_vp8_bilin_h 16
+endfunc
+
+func ff_put_vp8_bilin8_h_rvv, zve32x
+        put_vp8_bilin_h 8
+endfunc
+
+func ff_put_vp8_bilin4_h_rvv, zve32x
+        put_vp8_bilin_h 4
+endfunc

-- 
レミ・デニ-クールモン
http://www.remlab.net/





More information about the ffmpeg-devel mailing list