[FFmpeg-devel] [PATCH 1/2] lavc/flacdsp: R-V V flac_wasted32

Sun May 12 21:37:28 EEST 2024

On 5/12/2024 2:07 PM, Rémi Denis-Courmont wrote:
> T-Head C908:
> flac_wasted_32_c:       949.0
> flac_wasted_32_rvv_i32: 278.7
> ---
>   libavcodec/riscv/flacdsp_init.c |  7 ++++++-
>   libavcodec/riscv/flacdsp_rvv.S  | 15 +++++++++++++++
>   2 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/libavcodec/riscv/flacdsp_init.c b/libavcodec/riscv/flacdsp_init.c
> index 6cfb50ead8..4043715a3b 100644
> --- a/libavcodec/riscv/flacdsp_init.c
> +++ b/libavcodec/riscv/flacdsp_init.c
> @@ -31,6 +31,7 @@ void ff_flac_lpc32_rvv(int32_t *decoded, const int coeffs[32],
>                          int pred_order, int qlevel, int len);
>   void ff_flac_lpc32_rvv_simple(int32_t *decoded, const int coeffs[32],
>                                 int pred_order, int qlevel, int len);
> +void ff_flac_wasted32_rvv(int32_t *, int shift, int len);
>   void ff_flac_decorrelate_indep2_16_rvv(uint8_t **out, int32_t **in,
>                                          int channels, int len, int shift);
>   void ff_flac_decorrelate_indep4_16_rvv(uint8_t **out, int32_t **in,
> @@ -76,8 +77,12 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt,
>                   c->lpc32 = ff_flac_lpc32_rvv_simple;
>               else
>                   c->lpc32 = ff_flac_lpc32_rvv;
> +# endif
>           }
>   
> +        c->wasted32 = ff_flac_wasted32_rvv;
> +
> +# if (__riscv_xlen >= 64)
>           switch (fmt) {
>           case AV_SAMPLE_FMT_S16:
>               switch (channels) {
> @@ -117,8 +122,8 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt,
>               c->decorrelate[2] = ff_flac_decorrelate_rs_32_rvv;
>               c->decorrelate[3] = ff_flac_decorrelate_ms_32_rvv;
>               break;
> -# endif
>           }
> +# endif
>       }
>   #endif
>   }
> diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S
> index 2a0b50f7a9..d576a0cc21 100644
> --- a/libavcodec/riscv/flacdsp_rvv.S
> +++ b/libavcodec/riscv/flacdsp_rvv.S
> @@ -100,7 +100,22 @@ func ff_flac_lpc32_rvv_simple, zve32x
>   
>           ret
>   endfunc
> +#endif
> +
> +func ff_flac_wasted32_rvv, zve32x
> +1:
> +        vsetvli t0, a2, e32, m8, ta, ma
> +        vle32.v v8, (a0)
> +        sub     a2, a2, t0
> +        vsll.vx v8, v8, a1
> +        vse32.v v8, (a0)
> +        sh2add  a0, t0, a0
> +        bnez    a2, 1b

Not sure if you're taking it into account, but the minimum blocksize is 
16 and the buffer is always allocated for max_blocksize plus padding, so 
you should be able to do more samples per loop than this. Same for wasted33.

>   
> +        ret
> +endfunc
> +
> +#if (__riscv_xlen == 64)
>   func ff_flac_decorrelate_indep2_16_rvv, zve32x
>           ld      a0,  (a0)
>           ld      a2, 8(a1)