[FFmpeg-devel] [PATCH v4 4/4] lavc/vp9dsp: R-V V mc tap hv
flow gg
hlefthleft at gmail.com
Tue Jul 23 12:03:16 EEST 2024
Because of the 3/4 update, updated it."
<uk7b at foxmail.com> 于2024年7月23日周二 16:59写道:
> From: sunyuechi <sunyuechi at iscas.ac.cn>
>
> C908 X60
> vp9_avg_8tap_smooth_4hv_8bpp_c : 32.0 28.0
> vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32 : 15.0 13.2
> vp9_avg_8tap_smooth_8hv_8bpp_c : 98.0 86.2
> vp9_avg_8tap_smooth_8hv_8bpp_rvv_i32 : 23.7 21.2
> vp9_avg_8tap_smooth_16hv_8bpp_c : 355.7 297.0
> vp9_avg_8tap_smooth_16hv_8bpp_rvv_i32 : 47.0 41.5
> vp9_avg_8tap_smooth_32hv_8bpp_c : 1272.7 1099.7
> vp9_avg_8tap_smooth_32hv_8bpp_rvv_i32 : 134.7 119.7
> vp9_avg_8tap_smooth_64hv_8bpp_c : 4937.0 4224.2
> vp9_avg_8tap_smooth_64hv_8bpp_rvv_i32 : 528.5 228.5
> vp9_put_8tap_smooth_4hv_8bpp_c : 30.2 26.7
> vp9_put_8tap_smooth_4hv_8bpp_rvv_i32 : 30.5 12.5
> vp9_put_8tap_smooth_8hv_8bpp_c : 91.5 81.2
> vp9_put_8tap_smooth_8hv_8bpp_rvv_i32 : 22.7 20.2
> vp9_put_8tap_smooth_16hv_8bpp_c : 313.2 277.5
> vp9_put_8tap_smooth_16hv_8bpp_rvv_i32 : 45.2 40.2
> vp9_put_8tap_smooth_32hv_8bpp_c : 1166.7 1022.2
> vp9_put_8tap_smooth_32hv_8bpp_rvv_i32 : 131.7 117.2
> vp9_put_8tap_smooth_64hv_8bpp_c : 4560.5 3961.7
> vp9_put_8tap_smooth_64hv_8bpp_rvv_i32 : 517.0 223.2
> ---
> libavcodec/riscv/vp9_mc_rvv.S | 75 ++++++++++++++++++++++++++++++++++
> libavcodec/riscv/vp9dsp_init.c | 8 ++++
> 2 files changed, 83 insertions(+)
>
> diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
> index 6a4be7b9bd..26754ac6f8 100644
> --- a/libavcodec/riscv/vp9_mc_rvv.S
> +++ b/libavcodec/riscv/vp9_mc_rvv.S
> @@ -366,6 +366,77 @@ func
> ff_\op\()_vp9_8tap_\name\()_\len\()\type\()_rvv\vlen\(), zve32x
> endfunc
> .endm
>
> +#if __riscv_xlen == 64
> +.macro epel_hv_once len, name, op
> + sub a2, a2, a3
> + sub a2, a2, a3
> + sub a2, a2, a3
> + .irp n,0,2,4,6,8,10,12,14
> + epel_load_inc v\n, \len, put, \name, h, 1, t
> + .endr
> + addi a4, a4, -1
> +1:
> + addi a4, a4, -1
> + epel_load v30, \len, \op, \name, v, 0, s
> + vse8.v v30, (a0)
> + vmv.v.v v0, v2
> + vmv.v.v v2, v4
> + vmv.v.v v4, v6
> + vmv.v.v v6, v8
> + vmv.v.v v8, v10
> + vmv.v.v v10, v12
> + vmv.v.v v12, v14
> + epel_load v14, \len, put, \name, h, 1, t
> + add a2, a2, a3
> + add a0, a0, a1
> + bnez a4, 1b
> + epel_load v30, \len, \op, \name, v, 0, s
> + vse8.v v30, (a0)
> +.endm
> +
> +.macro epel_hv op, name, len, vlen
> +func ff_\op\()_vp9_8tap_\name\()_\len\()hv_rvv\vlen\(), zve32x
> + addi sp, sp, -64
> + .irp n,0,1,2,3,4,5,6,7
> + sd s\n, \n\()<<3(sp)
> + .endr
> +.if \len == 64 && \vlen < 256
> + addi sp, sp, -48
> + .irp n,0,1,2,3,4,5
> + sd a\n, \n\()<<3(sp)
> + .endr
> +.endif
> +.ifc \op,avg
> + csrwi vxrm, 0
> +.endif
> + epel_filter \name, h, t, a7
> + epel_filter \name, v, s, s7
> +.if \vlen < 256
> + vsetvlstatic8 \len, a6, 32, m2
> +.else
> + vsetvlstatic8 \len, a6, 64, m2
> +.endif
> + epel_hv_once \len, \name, \op
> +.if \len == 64 && \vlen < 256
> + .irp n,0,1,2,3,4,5
> + ld a\n, \n\()<<3(sp)
> + .endr
> + addi sp, sp, 48
> + addi a0, a0, 32
> + addi a2, a2, 32
> + epel_filter \name, h, t, a7
> + epel_hv_once \len, \name, \op
> +.endif
> + .irp n,0,1,2,3,4,5,6,7
> + ld s\n, \n\()<<3(sp)
> + .endr
> + addi sp, sp, 64
> +
> + ret
> +endfunc
> +.endm
> +#endif
> +
> .irp len, 64, 32, 16, 8, 4
> copy_avg \len
> .irp op, put, avg
> @@ -374,6 +445,10 @@ endfunc
> epel \len, \op, \name, \type, 128
> epel \len, \op, \name, \type, 256
> .endr
> + #if __riscv_xlen == 64
> + epel_hv \op, \name, \len, 128
> + epel_hv \op, \name, \len, 256
> + #endif
> .endr
> .endr
> .endr
> diff --git a/libavcodec/riscv/vp9dsp_init.c
> b/libavcodec/riscv/vp9dsp_init.c
> index 3669070fca..7b090c9889 100644
> --- a/libavcodec/riscv/vp9dsp_init.c
> +++ b/libavcodec/riscv/vp9dsp_init.c
> @@ -119,6 +119,10 @@ static av_cold void
> vp9dsp_mc_init_riscv(VP9DSPContext *dsp, int bpp)
> if (flags & AV_CPU_FLAG_RVB_ADDR) {
> init_subpel2(0, 0, 1, v, put, 128);
> init_subpel2(1, 0, 1, v, avg, 128);
> +# if __riscv_xlen == 64
> + init_subpel2(0, 1, 1, hv, put, 128);
> + init_subpel2(1, 1, 1, hv, avg, 128);
> +# endif
> }
>
> }
> @@ -129,6 +133,10 @@ static av_cold void
> vp9dsp_mc_init_riscv(VP9DSPContext *dsp, int bpp)
> if (flags & AV_CPU_FLAG_RVB_ADDR) {
> init_subpel2(0, 0, 1, v, put, 256);
> init_subpel2(1, 0, 1, v, avg, 256);
> +# if __riscv_xlen == 64
> + init_subpel2(0, 1, 1, hv, put, 256);
> + init_subpel2(1, 1, 1, hv, avg, 256);
> +# endif
> }
> }
> }
> --
> 2.45.2
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>
More information about the ffmpeg-devel
mailing list