[FFmpeg-devel] [PATCH 05/10] lavc/vp9dsp: R-V V mc bilin h
uk7b at foxmail.com
uk7b at foxmail.com
Sat May 4 18:03:08 EEST 2024
From: sunyuechi <sunyuechi at iscas.ac.cn>
C908:
vp9_avg_bilin_4h_8bpp_c: 5.5
vp9_avg_bilin_4h_8bpp_rvv_i64: 2.5
vp9_avg_bilin_8h_8bpp_c: 19.7
vp9_avg_bilin_8h_8bpp_rvv_i64: 5.0
vp9_avg_bilin_16h_8bpp_c: 78.2
vp9_avg_bilin_16h_8bpp_rvv_i64: 10.0
vp9_avg_bilin_32h_8bpp_c: 325.2
vp9_avg_bilin_32h_8bpp_rvv_i64: 28.5
vp9_avg_bilin_64h_8bpp_c: 1266.2
vp9_avg_bilin_64h_8bpp_rvv_i64: 115.0
vp9_put_bilin_4h_8bpp_c: 4.5
vp9_put_bilin_4h_8bpp_rvv_i64: 2.2
vp9_put_bilin_8h_8bpp_c: 16.7
vp9_put_bilin_8h_8bpp_rvv_i64: 4.2
vp9_put_bilin_16h_8bpp_c: 65.2
vp9_put_bilin_16h_8bpp_rvv_i64: 8.7
vp9_put_bilin_32h_8bpp_c: 273.5
vp9_put_bilin_32h_8bpp_rvv_i64: 26.7
vp9_put_bilin_64h_8bpp_c: 1041.0
vp9_put_bilin_64h_8bpp_rvv_i64: 87.2
---
libavcodec/riscv/vp9_mc_rvv.S | 73 ++++++++++++++++++++++++++++++++++
libavcodec/riscv/vp9dsp_init.c | 17 ++++++++
2 files changed, 90 insertions(+)
diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
index ba9ec3431f..a97807633e 100644
--- a/libavcodec/riscv/vp9_mc_rvv.S
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -51,6 +51,72 @@
ret
.endm
+.macro bilin_h_load dst len type
+.ifc \len,4
+ vsetivli zero, 5, e8, mf2, ta, ma
+.elseif \len == 8
+ vsetivli zero, 9, e8, m1, ta, ma
+.elseif \len == 16
+ vsetivli zero, 17, e8, m2, ta, ma
+.elseif \len == 32
+ li t0, 33
+ vsetvli zero, t0, e8, m4, ta, ma
+.elseif \len == 64
+ li t0, 65
+ vsetvli zero, t0, e8, m8, ta, ma
+.endif
+
+ vle8.v v8, (a2)
+ vslide1down.vx v0, v8, t5
+
+.ifc \len,4
+ vsetivli zero, 4, e8, mf4, ta, ma
+.elseif \len == 8
+ vsetivli zero, 8, e8, mf2, ta, ma
+.elseif \len == 16
+ vsetivli zero, 16, e8, m1, ta, ma
+.elseif \len == 32
+ li t0, 32
+ vsetvli zero, t0, e8, m2, ta, ma
+.elseif \len == 64
+ li t0, 64
+ vsetvli zero, t0, e8, m4, ta, ma
+.endif
+
+ vwmulu.vx v16, v0, a5
+ vwmaccsu.vx v16, t1, v8
+ vwadd.wx v16, v16, t4
+ vnsra.wi v16, v16, 4
+ vadd.vv \dst, v16, v8
+
+.ifc \type,put
+ vadd.vv \dst, v16, v8
+.elseif \type == avg
+ vadd.vv v16, v16, v8
+ vle8.v \dst, (a0)
+ vaaddu.vv \dst, \dst, v16
+.endif
+
+.endm
+
+.macro bilin_h len type
+.ifc \type,avg
+ csrwi vxrm, 0
+.endif
+ li t4, 8
+ li t5, 1
+ neg t1, a5
+1:
+ addi a4, a4, -1
+ bilin_h_load v0, \len, \type
+ vse8.v v0, (a0)
+ add a2, a2, a3
+ add a0, a0, a1
+ bnez a4, 1b
+
+ ret
+.endm
+
.irp len 64, 32, 16
func ff_copy\len\()_rvv, zve32x
copy_avg \len copy
@@ -61,4 +127,11 @@ endfunc
func ff_avg\len\()_rvv, zve32x
copy_avg \len avg
endfunc
+
+func ff_put_bilin_\len\()h_rvv, zve32x
+ bilin_h \len put
+endfunc
+func ff_avg_bilin_\len\()h_rvv, zve32x
+ bilin_h \len avg
+endfunc
.endr
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
index da33e15e97..248501f5d2 100644
--- a/libavcodec/riscv/vp9dsp_init.c
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -106,6 +106,23 @@ static av_cold void vp9dsp_mc_init_rvv(VP9DSPContext *dsp, int bpp)
#undef init_copy_avg
#undef init_avg
#undef init_fpel
+
+#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type) \
+ dsp->mc[idx1][FILTER_BILINEAR ][idx2][idxh][idxv] = \
+ ff_##type##_bilin_##sz##dir##_rvv;
+
+#define init_subpel2(idx, idxh, idxv, dir, type) \
+ init_subpel1(0, idx, idxh, idxv, 64, dir, type); \
+ init_subpel1(1, idx, idxh, idxv, 32, dir, type); \
+ init_subpel1(2, idx, idxh, idxv, 16, dir, type); \
+ init_subpel1(3, idx, idxh, idxv, 8, dir, type); \
+ init_subpel1(4, idx, idxh, idxv, 4, dir, type)
+
+#define init_subpel3(idx, type) \
+ init_subpel2(idx, 1, 0, h, type)
+
+ init_subpel3(0, put);
+ init_subpel3(1, avg);
}
#endif
#endif
--
2.45.0
More information about the ffmpeg-devel
mailing list