[FFmpeg-devel] [PATCH 05/10] lavc/vp9dsp: R-V V mc bilin h

uk7b at foxmail.com uk7b at foxmail.com
Sat May 4 18:03:08 EEST 2024


From: sunyuechi <sunyuechi at iscas.ac.cn>

C908:
vp9_avg_bilin_4h_8bpp_c: 5.5
vp9_avg_bilin_4h_8bpp_rvv_i64: 2.5
vp9_avg_bilin_8h_8bpp_c: 19.7
vp9_avg_bilin_8h_8bpp_rvv_i64: 5.0
vp9_avg_bilin_16h_8bpp_c: 78.2
vp9_avg_bilin_16h_8bpp_rvv_i64: 10.0
vp9_avg_bilin_32h_8bpp_c: 325.2
vp9_avg_bilin_32h_8bpp_rvv_i64: 28.5
vp9_avg_bilin_64h_8bpp_c: 1266.2
vp9_avg_bilin_64h_8bpp_rvv_i64: 115.0
vp9_put_bilin_4h_8bpp_c: 4.5
vp9_put_bilin_4h_8bpp_rvv_i64: 2.2
vp9_put_bilin_8h_8bpp_c: 16.7
vp9_put_bilin_8h_8bpp_rvv_i64: 4.2
vp9_put_bilin_16h_8bpp_c: 65.2
vp9_put_bilin_16h_8bpp_rvv_i64: 8.7
vp9_put_bilin_32h_8bpp_c: 273.5
vp9_put_bilin_32h_8bpp_rvv_i64: 26.7
vp9_put_bilin_64h_8bpp_c: 1041.0
vp9_put_bilin_64h_8bpp_rvv_i64: 87.2
---
 libavcodec/riscv/vp9_mc_rvv.S  | 73 ++++++++++++++++++++++++++++++++++
 libavcodec/riscv/vp9dsp_init.c | 17 ++++++++
 2 files changed, 90 insertions(+)

diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
index ba9ec3431f..a97807633e 100644
--- a/libavcodec/riscv/vp9_mc_rvv.S
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -51,6 +51,72 @@
         ret
 .endm
 
+.macro bilin_h_load dst len type
+.ifc \len,4
+        vsetivli        zero, 5, e8, mf2, ta, ma
+.elseif \len == 8
+        vsetivli        zero, 9, e8, m1, ta, ma
+.elseif \len == 16
+        vsetivli        zero, 17, e8, m2, ta, ma
+.elseif \len == 32
+        li              t0, 33
+        vsetvli         zero, t0, e8, m4, ta, ma
+.elseif \len == 64
+        li              t0, 65
+        vsetvli         zero, t0, e8, m8, ta, ma
+.endif
+
+        vle8.v          v8, (a2)
+        vslide1down.vx  v0, v8, t5
+
+.ifc \len,4
+        vsetivli        zero, 4, e8, mf4, ta, ma
+.elseif \len == 8
+        vsetivli        zero, 8, e8, mf2, ta, ma
+.elseif \len == 16
+        vsetivli        zero, 16, e8, m1, ta, ma
+.elseif \len == 32
+        li              t0, 32
+        vsetvli         zero, t0, e8, m2, ta, ma
+.elseif \len == 64
+        li              t0, 64
+        vsetvli         zero, t0, e8, m4, ta, ma
+.endif
+
+        vwmulu.vx       v16, v0, a5
+        vwmaccsu.vx     v16, t1, v8
+        vwadd.wx        v16, v16, t4
+        vnsra.wi        v16, v16, 4
+        vadd.vv         \dst, v16, v8
+
+.ifc \type,put
+        vadd.vv         \dst, v16, v8
+.elseif \type == avg
+        vadd.vv         v16, v16, v8
+        vle8.v          \dst, (a0)
+        vaaddu.vv       \dst, \dst, v16
+.endif
+
+.endm
+
+.macro bilin_h len type
+.ifc \type,avg
+        csrwi           vxrm, 0
+.endif
+        li              t4, 8
+        li              t5, 1
+        neg             t1, a5
+1:
+        addi            a4, a4, -1
+        bilin_h_load    v0, \len, \type
+        vse8.v          v0, (a0)
+        add             a2, a2, a3
+        add             a0, a0, a1
+        bnez            a4, 1b
+
+        ret
+.endm
+
 .irp len 64, 32, 16
 func ff_copy\len\()_rvv, zve32x
         copy_avg \len copy
@@ -61,4 +127,11 @@ endfunc
 func ff_avg\len\()_rvv, zve32x
         copy_avg \len avg
 endfunc
+
+func ff_put_bilin_\len\()h_rvv, zve32x
+        bilin_h \len put
+endfunc
+func ff_avg_bilin_\len\()h_rvv, zve32x
+        bilin_h \len avg
+endfunc
 .endr
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
index da33e15e97..248501f5d2 100644
--- a/libavcodec/riscv/vp9dsp_init.c
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -106,6 +106,23 @@ static av_cold void vp9dsp_mc_init_rvv(VP9DSPContext *dsp, int bpp)
 #undef init_copy_avg
 #undef init_avg
 #undef init_fpel
+
+#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type)  \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][idxh][idxv] =   \
+        ff_##type##_bilin_##sz##dir##_rvv;
+
+#define init_subpel2(idx, idxh, idxv, dir, type)      \
+    init_subpel1(0, idx, idxh, idxv, 64, dir, type);  \
+    init_subpel1(1, idx, idxh, idxv, 32, dir, type);  \
+    init_subpel1(2, idx, idxh, idxv, 16, dir, type);  \
+    init_subpel1(3, idx, idxh, idxv,  8, dir, type);  \
+    init_subpel1(4, idx, idxh, idxv,  4, dir, type)
+
+#define init_subpel3(idx, type)         \
+    init_subpel2(idx, 1, 0, h, type)
+
+    init_subpel3(0, put);
+    init_subpel3(1, avg);
     }
 #endif
 #endif
-- 
2.45.0



More information about the ffmpeg-devel mailing list