[FFmpeg-devel] [PATCH] x86/hecv_res_add: add ff_hevc_transform_add{8, 16, 32}_8_avx
Mickaël Raulet
mraulet at gmail.com
Wed Aug 20 09:25:22 CEST 2014
Patch ok
Mickael
Le mercredi 20 août 2014, James Almer <jamrial at gmail.com> a écrit :
> ~15% faster than sse2
>
> Signed-off-by: James Almer <jamrial at gmail.com <javascript:;>>
> ---
> libavcodec/x86/hevc_res_add.asm | 15 +++++++++++----
> libavcodec/x86/hevcdsp.h | 4 ++++
> libavcodec/x86/hevcdsp_init.c | 4 ++++
> 3 files changed, 19 insertions(+), 4 deletions(-)
>
> diff --git a/libavcodec/x86/hevc_res_add.asm
> b/libavcodec/x86/hevc_res_add.asm
> index 47022d3..feea50c 100644
> --- a/libavcodec/x86/hevc_res_add.asm
> +++ b/libavcodec/x86/hevc_res_add.asm
> @@ -156,8 +156,8 @@ cglobal hevc_transform_add4_8, 3, 4, 6
> %endmacro
>
>
> -INIT_XMM sse2
> -; void ff_hevc_transform_add8_8_sse2(uint8_t *dst, int16_t *coeffs,
> ptrdiff_t stride)
> +%macro TRANSFORM_ADD_8 0
> +; void ff_hevc_transform_add8_8_<opt>(uint8_t *dst, int16_t *coeffs,
> ptrdiff_t stride)
> cglobal hevc_transform_add8_8, 3, 4, 8
> lea r3, [r2*3]
> TR_ADD_SSE_8_8
> @@ -167,7 +167,7 @@ cglobal hevc_transform_add8_8, 3, 4, 8
> RET
>
> %if ARCH_X86_64
> -; void ff_hevc_transform_add16_8_sse2(uint8_t *dst, int16_t *coeffs,
> ptrdiff_t stride)
> +; void ff_hevc_transform_add16_8_<opt>(uint8_t *dst, int16_t *coeffs,
> ptrdiff_t stride)
> cglobal hevc_transform_add16_8, 3, 4, 12
> lea r3, [r2*3]
> TR_ADD_SSE_16_8
> @@ -178,7 +178,7 @@ cglobal hevc_transform_add16_8, 3, 4, 12
> %endrep
> RET
>
> -; void ff_hevc_transform_add16_8_sse2(uint8_t *dst, int16_t *coeffs,
> ptrdiff_t stride)
> +; void ff_hevc_transform_add32_8_<opt>(uint8_t *dst, int16_t *coeffs,
> ptrdiff_t stride)
> cglobal hevc_transform_add32_8, 3, 4, 12
>
> TR_ADD_SSE_32_8
> @@ -190,6 +190,13 @@ cglobal hevc_transform_add32_8, 3, 4, 12
> RET
>
> %endif ;ARCH_X86_64
> +%endmacro
> +
> +INIT_XMM sse2
> +TRANSFORM_ADD_8
> +INIT_XMM avx
> +TRANSFORM_ADD_8
> +
>
> ;-----------------------------------------------------------------------------
> ; void ff_hevc_transform_add_10(pixel *dst, int16_t *block, int stride)
>
> ;-----------------------------------------------------------------------------
> diff --git a/libavcodec/x86/hevcdsp.h b/libavcodec/x86/hevcdsp.h
> index 7ced22c..74b5173 100644
> --- a/libavcodec/x86/hevcdsp.h
> +++ b/libavcodec/x86/hevcdsp.h
> @@ -139,6 +139,10 @@ void ff_hevc_transform_add8_8_sse2(uint8_t *dst,
> int16_t *coeffs, ptrdiff_t stri
> void ff_hevc_transform_add16_8_sse2(uint8_t *dst, int16_t *coeffs,
> ptrdiff_t stride);
> void ff_hevc_transform_add32_8_sse2(uint8_t *dst, int16_t *coeffs,
> ptrdiff_t stride);
>
> +void ff_hevc_transform_add8_8_avx(uint8_t *dst, int16_t *coeffs,
> ptrdiff_t stride);
> +void ff_hevc_transform_add16_8_avx(uint8_t *dst, int16_t *coeffs,
> ptrdiff_t stride);
> +void ff_hevc_transform_add32_8_avx(uint8_t *dst, int16_t *coeffs,
> ptrdiff_t stride);
> +
> void ff_hevc_transform_add4_10_mmxext(uint8_t *dst, int16_t *coeffs,
> ptrdiff_t stride);
> void ff_hevc_transform_add8_10_sse2(uint8_t *dst, int16_t *coeffs,
> ptrdiff_t stride);
> void ff_hevc_transform_add16_10_sse2(uint8_t *dst, int16_t *coeffs,
> ptrdiff_t stride);
> diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
> index 0f9fe7d..f6f0a4b 100644
> --- a/libavcodec/x86/hevcdsp_init.c
> +++ b/libavcodec/x86/hevcdsp_init.c
> @@ -509,7 +509,11 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const
> int bit_depth)
> if (ARCH_X86_64) {
> c->hevc_v_loop_filter_luma =
> ff_hevc_v_loop_filter_luma_8_avx;
> c->hevc_h_loop_filter_luma =
> ff_hevc_h_loop_filter_luma_8_avx;
> +
> + c->transform_add[2] = ff_hevc_transform_add16_8_avx;
> + c->transform_add[3] = ff_hevc_transform_add32_8_avx;
> }
> + c->transform_add[1] = ff_hevc_transform_add8_8_avx;
> }
> if (EXTERNAL_AVX2(cpu_flags)) {
> c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2;
> --
> 1.8.5.5
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org <javascript:;>
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
More information about the ffmpeg-devel
mailing list