[FFmpeg-devel] [PATCH 1/2] lavc/flacdsp: R-V V flac_wasted32
James Almer
jamrial at gmail.com
Sun May 12 21:37:28 EEST 2024
On 5/12/2024 2:07 PM, Rémi Denis-Courmont wrote:
> T-Head C908:
> flac_wasted_32_c: 949.0
> flac_wasted_32_rvv_i32: 278.7
> ---
> libavcodec/riscv/flacdsp_init.c | 7 ++++++-
> libavcodec/riscv/flacdsp_rvv.S | 15 +++++++++++++++
> 2 files changed, 21 insertions(+), 1 deletion(-)
>
> diff --git a/libavcodec/riscv/flacdsp_init.c b/libavcodec/riscv/flacdsp_init.c
> index 6cfb50ead8..4043715a3b 100644
> --- a/libavcodec/riscv/flacdsp_init.c
> +++ b/libavcodec/riscv/flacdsp_init.c
> @@ -31,6 +31,7 @@ void ff_flac_lpc32_rvv(int32_t *decoded, const int coeffs[32],
> int pred_order, int qlevel, int len);
> void ff_flac_lpc32_rvv_simple(int32_t *decoded, const int coeffs[32],
> int pred_order, int qlevel, int len);
> +void ff_flac_wasted32_rvv(int32_t *, int shift, int len);
> void ff_flac_decorrelate_indep2_16_rvv(uint8_t **out, int32_t **in,
> int channels, int len, int shift);
> void ff_flac_decorrelate_indep4_16_rvv(uint8_t **out, int32_t **in,
> @@ -76,8 +77,12 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt,
> c->lpc32 = ff_flac_lpc32_rvv_simple;
> else
> c->lpc32 = ff_flac_lpc32_rvv;
> +# endif
> }
>
> + c->wasted32 = ff_flac_wasted32_rvv;
> +
> +# if (__riscv_xlen >= 64)
> switch (fmt) {
> case AV_SAMPLE_FMT_S16:
> switch (channels) {
> @@ -117,8 +122,8 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt,
> c->decorrelate[2] = ff_flac_decorrelate_rs_32_rvv;
> c->decorrelate[3] = ff_flac_decorrelate_ms_32_rvv;
> break;
> -# endif
> }
> +# endif
> }
> #endif
> }
> diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S
> index 2a0b50f7a9..d576a0cc21 100644
> --- a/libavcodec/riscv/flacdsp_rvv.S
> +++ b/libavcodec/riscv/flacdsp_rvv.S
> @@ -100,7 +100,22 @@ func ff_flac_lpc32_rvv_simple, zve32x
>
> ret
> endfunc
> +#endif
> +
> +func ff_flac_wasted32_rvv, zve32x
> +1:
> + vsetvli t0, a2, e32, m8, ta, ma
> + vle32.v v8, (a0)
> + sub a2, a2, t0
> + vsll.vx v8, v8, a1
> + vse32.v v8, (a0)
> + sh2add a0, t0, a0
> + bnez a2, 1b
Not sure if you're taking it into account, but the minimum blocksize is
16 and the buffer is always allocated for max_blocksize plus padding, so
you should be able to do more samples per loop than this. Same for wasted33.
>
> + ret
> +endfunc
> +
> +#if (__riscv_xlen == 64)
> func ff_flac_decorrelate_indep2_16_rvv, zve32x
> ld a0, (a0)
> ld a2, 8(a1)
More information about the ffmpeg-devel
mailing list