[FFmpeg-devel] [PATCH v4 4/4] lavc/vp9dsp: R-V V mc tap hv

flow gg hlefthleft at gmail.com
Tue Jul 23 12:03:16 EEST 2024


Because of the 3/4 update, updated it."

<uk7b at foxmail.com> 于2024年7月23日周二 16:59写道:

> From: sunyuechi <sunyuechi at iscas.ac.cn>
>
>                                                      C908   X60
> vp9_avg_8tap_smooth_4hv_8bpp_c                     :   32.0   28.0
> vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32               :   15.0   13.2
> vp9_avg_8tap_smooth_8hv_8bpp_c                     :   98.0   86.2
> vp9_avg_8tap_smooth_8hv_8bpp_rvv_i32               :   23.7   21.2
> vp9_avg_8tap_smooth_16hv_8bpp_c                    :  355.7  297.0
> vp9_avg_8tap_smooth_16hv_8bpp_rvv_i32              :   47.0   41.5
> vp9_avg_8tap_smooth_32hv_8bpp_c                    : 1272.7 1099.7
> vp9_avg_8tap_smooth_32hv_8bpp_rvv_i32              :  134.7  119.7
> vp9_avg_8tap_smooth_64hv_8bpp_c                    : 4937.0 4224.2
> vp9_avg_8tap_smooth_64hv_8bpp_rvv_i32              :  528.5  228.5
> vp9_put_8tap_smooth_4hv_8bpp_c                     :   30.2   26.7
> vp9_put_8tap_smooth_4hv_8bpp_rvv_i32               :   30.5   12.5
> vp9_put_8tap_smooth_8hv_8bpp_c                     :   91.5   81.2
> vp9_put_8tap_smooth_8hv_8bpp_rvv_i32               :   22.7   20.2
> vp9_put_8tap_smooth_16hv_8bpp_c                    :  313.2  277.5
> vp9_put_8tap_smooth_16hv_8bpp_rvv_i32              :   45.2   40.2
> vp9_put_8tap_smooth_32hv_8bpp_c                    : 1166.7 1022.2
> vp9_put_8tap_smooth_32hv_8bpp_rvv_i32              :  131.7  117.2
> vp9_put_8tap_smooth_64hv_8bpp_c                    : 4560.5 3961.7
> vp9_put_8tap_smooth_64hv_8bpp_rvv_i32              :  517.0  223.2
> ---
>  libavcodec/riscv/vp9_mc_rvv.S  | 75 ++++++++++++++++++++++++++++++++++
>  libavcodec/riscv/vp9dsp_init.c |  8 ++++
>  2 files changed, 83 insertions(+)
>
> diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
> index 6a4be7b9bd..26754ac6f8 100644
> --- a/libavcodec/riscv/vp9_mc_rvv.S
> +++ b/libavcodec/riscv/vp9_mc_rvv.S
> @@ -366,6 +366,77 @@ func
> ff_\op\()_vp9_8tap_\name\()_\len\()\type\()_rvv\vlen\(), zve32x
>  endfunc
>  .endm
>
> +#if __riscv_xlen == 64
> +.macro epel_hv_once len, name, op
> +        sub             a2, a2, a3
> +        sub             a2, a2, a3
> +        sub             a2, a2, a3
> +        .irp n,0,2,4,6,8,10,12,14
> +        epel_load_inc   v\n, \len, put, \name, h, 1, t
> +        .endr
> +        addi            a4, a4, -1
> +1:
> +        addi            a4, a4, -1
> +        epel_load       v30, \len, \op, \name, v, 0, s
> +        vse8.v          v30, (a0)
> +        vmv.v.v         v0, v2
> +        vmv.v.v         v2, v4
> +        vmv.v.v         v4, v6
> +        vmv.v.v         v6, v8
> +        vmv.v.v         v8, v10
> +        vmv.v.v         v10, v12
> +        vmv.v.v         v12, v14
> +        epel_load       v14, \len, put, \name, h, 1, t
> +        add             a2, a2, a3
> +        add             a0, a0, a1
> +        bnez            a4, 1b
> +        epel_load       v30, \len, \op, \name, v, 0, s
> +        vse8.v          v30, (a0)
> +.endm
> +
> +.macro epel_hv op, name, len, vlen
> +func ff_\op\()_vp9_8tap_\name\()_\len\()hv_rvv\vlen\(), zve32x
> +        addi            sp, sp, -64
> +        .irp n,0,1,2,3,4,5,6,7
> +        sd              s\n, \n\()<<3(sp)
> +        .endr
> +.if \len == 64 && \vlen < 256
> +        addi            sp, sp, -48
> +        .irp n,0,1,2,3,4,5
> +        sd              a\n, \n\()<<3(sp)
> +        .endr
> +.endif
> +.ifc \op,avg
> +        csrwi           vxrm, 0
> +.endif
> +        epel_filter     \name, h, t, a7
> +        epel_filter     \name, v, s, s7
> +.if \vlen < 256
> +        vsetvlstatic8   \len, a6, 32, m2
> +.else
> +        vsetvlstatic8   \len, a6, 64, m2
> +.endif
> +        epel_hv_once    \len, \name, \op
> +.if \len == 64 && \vlen < 256
> +        .irp n,0,1,2,3,4,5
> +        ld              a\n, \n\()<<3(sp)
> +        .endr
> +        addi            sp, sp, 48
> +        addi            a0, a0, 32
> +        addi            a2, a2, 32
> +        epel_filter     \name, h, t, a7
> +        epel_hv_once    \len, \name, \op
> +.endif
> +        .irp n,0,1,2,3,4,5,6,7
> +        ld              s\n, \n\()<<3(sp)
> +        .endr
> +        addi            sp, sp, 64
> +
> +        ret
> +endfunc
> +.endm
> +#endif
> +
>  .irp len, 64, 32, 16, 8, 4
>          copy_avg \len
>          .irp op, put, avg
> @@ -374,6 +445,10 @@ endfunc
>                                  epel \len, \op, \name, \type, 128
>                                  epel \len, \op, \name, \type, 256
>                          .endr
> +                        #if __riscv_xlen == 64
> +                        epel_hv \op, \name, \len, 128
> +                        epel_hv \op, \name, \len, 256
> +                        #endif
>                  .endr
>          .endr
>  .endr
> diff --git a/libavcodec/riscv/vp9dsp_init.c
> b/libavcodec/riscv/vp9dsp_init.c
> index 3669070fca..7b090c9889 100644
> --- a/libavcodec/riscv/vp9dsp_init.c
> +++ b/libavcodec/riscv/vp9dsp_init.c
> @@ -119,6 +119,10 @@ static av_cold void
> vp9dsp_mc_init_riscv(VP9DSPContext *dsp, int bpp)
>      if (flags & AV_CPU_FLAG_RVB_ADDR) {
>          init_subpel2(0, 0, 1, v, put, 128);
>          init_subpel2(1, 0, 1, v, avg, 128);
> +# if __riscv_xlen == 64
> +        init_subpel2(0, 1, 1, hv, put, 128);
> +        init_subpel2(1, 1, 1, hv, avg, 128);
> +# endif
>      }
>
>      }
> @@ -129,6 +133,10 @@ static av_cold void
> vp9dsp_mc_init_riscv(VP9DSPContext *dsp, int bpp)
>          if (flags & AV_CPU_FLAG_RVB_ADDR) {
>              init_subpel2(0, 0, 1, v, put, 256);
>              init_subpel2(1, 0, 1, v, avg, 256);
> +# if __riscv_xlen == 64
> +            init_subpel2(0, 1, 1, hv, put, 256);
> +            init_subpel2(1, 1, 1, hv, avg, 256);
> +# endif
>          }
>      }
>      }
> --
> 2.45.2
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>


More information about the ffmpeg-devel mailing list