[FFmpeg-devel] [PATCH 2/8] avcodec/flac: add AVX2 version of the 16-bit LPC encoder
Rostislav Pehlivanov
atomnuker at gmail.com
Mon Nov 27 01:20:14 EET 2017
On 26 November 2017 at 22:51, James Darnley <james.darnley at gmail.com> wrote:
> When compared to the SSE4 version, runtime is reduced by 0.5 to 20%.
> After a bug fix log, long ago in e609cfd697 the 16-bit lpc encoder is
> used so little that the runtime reduction is no longer correct. The
> function itself is around 2 times faster. (As one might expect for
> doing twice as many samples every iteration.)
> ---
> libavcodec/flacenc.c | 2 +-
> libavcodec/x86/flac_dsp_gpl.asm | 32 +++++++++++++++++++++++++++-----
> libavcodec/x86/flacdsp_init.c | 5 +++++
> 3 files changed, 33 insertions(+), 6 deletions(-)
>
> diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
> index 170c3caf48..cf25982c91 100644
> --- a/libavcodec/flacenc.c
> +++ b/libavcodec/flacenc.c
> @@ -88,7 +88,7 @@ typedef struct FlacSubframe {
> uint64_t rc_sums[32][MAX_PARTITIONS];
>
> int32_t samples[FLAC_MAX_BLOCKSIZE];
> - int32_t residual[FLAC_MAX_BLOCKSIZE+11];
> + int32_t residual[FLAC_MAX_BLOCKSIZE+23];
> } FlacSubframe;
>
> typedef struct FlacFrame {
> diff --git a/libavcodec/x86/flac_dsp_gpl.asm
> b/libavcodec/x86/flac_dsp_gpl.asm
> index e285158185..c461c666be 100644
> --- a/libavcodec/x86/flac_dsp_gpl.asm
> +++ b/libavcodec/x86/flac_dsp_gpl.asm
> @@ -24,7 +24,8 @@
>
> SECTION .text
>
> -INIT_XMM sse4
> +%macro FUNCTION_BODY_16 0
> +
> %if ARCH_X86_64
> cglobal flac_enc_lpc_16, 5, 7, 8, 0, res, smp, len, order, coefs
> DECLARE_REG_TMP 5, 6
> @@ -51,7 +52,7 @@ lea resq, [resq+orderq*4]
> lea smpq, [smpq+orderq*4]
> lea coefsq, [coefsq+orderq*4]
> sub length, orderd
> -movd m3, r5m
> +movd xm3, r5m
> neg orderq
>
> %define posj t0q
> @@ -65,8 +66,20 @@ neg orderq
> xor negj, negj
>
> .looporder:
> +%if cpuflag(avx)
> + vbroadcastss m2, [coefsq+posj*4]
> +%else
> movd m2, [coefsq+posj*4] ; c = coefs[j]
> SPLATD m2
> +%endif
> +%if cpuflag(avx)
> + vpmulld m1, m2, [smpq+negj*4-4]
> + vpmulld m5, m2, [smpq+negj*4-4+mmsize]
> + vpmulld m7, m2, [smpq+negj*4-4+mmsize*2]
> + vpaddd m0, m1
> + vpaddd m4, m5
> + vpaddd m6, m7
>
Same as the 32bit lpc avx2 patch
> +%else
> movu m1, [smpq+negj*4-4] ; s = smp[i-j-1]
> movu m5, [smpq+negj*4-4+mmsize]
> movu m7, [smpq+negj*4-4+mmsize*2]
> @@ -76,14 +89,15 @@ neg orderq
> paddd m0, m1 ; p += c * s
> paddd m4, m5
> paddd m6, m7
> +%endif
>
> dec negj
> inc posj
> jnz .looporder
>
> - psrad m0, m3 ; p >>= shift
> - psrad m4, m3
> - psrad m6, m3
> + psrad m0, xm3 ; p >>= shift
> + psrad m4, xm3
> + psrad m6, xm3
> movu m1, [smpq]
> movu m5, [smpq+mmsize]
> movu m7, [smpq+mmsize*2]
> @@ -99,3 +113,11 @@ neg orderq
> sub length, (3*mmsize)/4
> jg .looplen
> RET
> +
> +%endmacro
> +
> +INIT_XMM sse4
> +FUNCTION_BODY_16
> +
> +INIT_YMM avx2
> +FUNCTION_BODY_16
> diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
> index 1971f81b8d..0a5c01859f 100644
> --- a/libavcodec/x86/flacdsp_init.c
> +++ b/libavcodec/x86/flacdsp_init.c
> @@ -28,6 +28,7 @@ void ff_flac_lpc_32_xop(int32_t *samples, const int
> coeffs[32], int order,
> int qlevel, int len);
>
> void ff_flac_enc_lpc_16_sse4(int32_t *, const int32_t *, int, int, const
> int32_t *,int);
> +void ff_flac_enc_lpc_16_avx2(int32_t *, const int32_t *, int, int, const
> int32_t *,int);
>
> #define DECORRELATE_FUNCS(fmt, opt)
> \
> void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in,
> int channels, \
> @@ -110,6 +111,10 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c,
> enum AVSampleFormat fmt, int
> if (CONFIG_GPL)
> c->lpc16_encode = ff_flac_enc_lpc_16_sse4;
> }
> + if (EXTERNAL_AVX2(cpu_flags)) {
> + if (CONFIG_GPL)
>
yeah, just combine them, if someone wants to add non-gpl asm this is the
least of their problems
> + c->lpc16_encode = ff_flac_enc_lpc_16_avx2;
> + }
> #endif
> #endif /* HAVE_X86ASM */
> }
> --
> 2.15.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
More information about the ffmpeg-devel
mailing list