[FFmpeg-devel] [PATCH 1/6] lavc/ac3dsp: RISC-V V ac3_exponent_min

Rémi Denis-Courmont remi at remlab.net
Thu Jun 15 21:02:52 EEST 2023


Nihao

Le torstaina 15. kesäkuuta 2023, 13.36.40 EEST Peiting Shen a écrit :
> From: Shen Peiting <shenpeiting at eswincomputing.com>
> 
> Find scalar minium optimized by using RVV instructions
> 
> Benchmarks on Spike(cycles):
> *exp=1280*4;num_reuse_blocks=5;nb_coefs=16
> ac3_exponent_min_c: 1993
> ac3_exponent_min_rvv: 258
> *exp=1280*4;num_reuse_blocks=19;nb_coefs=255
> ac3_exponent_min_c: 99010
> ac3_exponent_min_rvv: 3843
> 
> The optimization performance is more obvious with the increase of number of
> reuse blocks and number of coefs.
> 
> Co-Authored by: Yang Xiaojun <yangxiaojun at eswincomputing.com>
> Co-Authored by: Huang Xing <huangxing1 at eswincomputing.com>
> Co-Authored by: Zeng Fanchen <zengfanchen at eswincomputing.com>
> Signed-off-by: Shen Peiting <shenpeiting at eswincomputing.com>
> ---
>  libavcodec/ac3dsp.c            |  2 ++
>  libavcodec/ac3dsp.h            |  1 +
>  libavcodec/riscv/Makefile      |  2 ++
>  libavcodec/riscv/ac3dsp_init.c | 37 +++++++++++++++++++++++++++
>  libavcodec/riscv/ac3dsp_rvv.S  | 46 ++++++++++++++++++++++++++++++++++
>  5 files changed, 88 insertions(+)
>  create mode 100644 libavcodec/riscv/ac3dsp_init.c
>  create mode 100644 libavcodec/riscv/ac3dsp_rvv.S
> 
> diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
> index 22cb5f242e..302b786b15 100644
> --- a/libavcodec/ac3dsp.c
> +++ b/libavcodec/ac3dsp.c
> @@ -395,5 +395,7 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c)
>      ff_ac3dsp_init_x86(c);
>  #elif ARCH_MIPS
>      ff_ac3dsp_init_mips(c);
> +#elif ARCH_RISCV
> +    ff_ac3dsp_init_riscv(c);
>  #endif
>  }
> diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
> index 33e51e202e..a01bff3d11 100644
> --- a/libavcodec/ac3dsp.h
> +++ b/libavcodec/ac3dsp.h
> @@ -109,6 +109,7 @@ void ff_ac3dsp_init    (AC3DSPContext *c);
>  void ff_ac3dsp_init_arm(AC3DSPContext *c);
>  void ff_ac3dsp_init_x86(AC3DSPContext *c);
>  void ff_ac3dsp_init_mips(AC3DSPContext *c);
> +void ff_ac3dsp_init_riscv(AC3DSPContext *c);
> 
>  void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix,
>                         int out_ch, int in_ch, int len);
> diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
> index ee17a521fd..a627924cac 100644
> --- a/libavcodec/riscv/Makefile
> +++ b/libavcodec/riscv/Makefile
> @@ -1,5 +1,7 @@
>  OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_init.o
>  RVV-OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_rvv.o
> +OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o
> +RVV-OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_rvv.o
>  OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o
>  RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o
>  OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
> diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
> new file mode 100644
> index 0000000000..bb67d86998
> --- /dev/null
> +++ b/libavcodec/riscv/ac3dsp_init.c
> @@ -0,0 +1,37 @@
> +/*
> + * Copyright 2023 Beijing ESWIN Computing Technology Co., Ltd.
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA + */
> +#include <stdint.h>
> +
> +#include "libavutil/attributes.h"
> +#include "libavcodec/ac3dsp.h"
> +#include "libavutil/cpu.h"
> +#include "config.h"
> +
> +void ff_ac3_exponent_min_rvv(uint8_t *exp, int num_reuse_blocks, int
> nb_coefs); +
> +av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
> +{
> +    int flags = av_get_cpu_flags();
> +#if HAVE_RVV
> +    if (flags & AV_CPU_FLAG_RVV_I32)
> +        c->ac3_exponent_min = ff_ac3_exponent_min_rvv;
> +#endif
> +}
> +
> diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
> new file mode 100644
> index 0000000000..879123f4a7
> --- /dev/null
> +++ b/libavcodec/riscv/ac3dsp_rvv.S
> @@ -0,0 +1,46 @@
> +/*
> + * Copyright 2023 Beijing ESWIN Computing Technology Co., Ltd.
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA + */
> +
> +#include "libavutil/riscv/asm.S"
> +
> +func ff_ac3_exponent_min_rvv, zve32x
> +    beq             a1, x0, 3f

Conventionally, we use ABI names for GP and FP registers like almost everybody 
else and their moms in RISC-V world. So that would be `zero`.

But in this case, you should use the `beqz` alias anyway.

> +    li              t0, 256
> +    addi            a1, a1, 1
> +1:
> +    mv              t2, a0

AFAICT, t2 is always the same as a0, and thus this is unnecessary.

> +    mv              t3, a1
> +    lb              t4, (t2)
> +2:
> +    vsetvli         t1, t3, e8, m8
> +    vlse8.v         v0, (t2), t0
> +    vmv.s.x         v8, t4
> +    sub             t3, t3, t1
> +    vredminu.vs     v8, v0, v8
> +    vmv.x.s         t4, v8
> +    bnez            t3, 2b
> +    vsetivli        t1, 1, e8

When you're not using the output, so use zero.

But you don't even need to reset the vector configuration here. Just use 
masking to store the one element (you could also transfer to scalar and store, 
but that's probably slower than masking).

> +    vse8.v          v8, (a0)
> +    addi            a0, a0, 1
> +    addi            a2, a2, -1

This will stall on an in-order CPU. Please avoid immediately consecutive 
interdependent instructions.

> +    bnez            a2, 1b
> +3:
> +    ret
> +endfunc


-- 
Rémi Denis-Courmont
http://www.remlab.net/





More information about the ffmpeg-devel mailing list