[FFmpeg-devel] [PATCH 6/7] lavu/aes: add x86 AESNI optimizations
James Almer
jamrial at gmail.com
Mon Oct 12 05:51:06 CEST 2015
On 10/12/2015 12:20 AM, Rodger Combs wrote:
> ---
> libavutil/aes.c | 2 ++
> libavutil/aes_internal.h | 2 ++
> libavutil/x86/Makefile | 4 ++-
> libavutil/x86/aes.asm | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
> libavutil/x86/aes_init.c | 42 ++++++++++++++++++++++++
> 5 files changed, 134 insertions(+), 1 deletion(-)
> create mode 100644 libavutil/x86/aes.asm
> create mode 100644 libavutil/x86/aes_init.c
>
> diff --git a/libavutil/aes.c b/libavutil/aes.c
> index c917706..61ab8f7 100644
> --- a/libavutil/aes.c
> +++ b/libavutil/aes.c
> @@ -200,6 +200,8 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
> uint8_t alog8[512];
>
> a->crypt = decrypt ? aes_decrypt : aes_encrypt;
> + if (ARCH_X86)
> + ff_init_aes_x86(a, decrypt);
>
> if (!enc_multbl[FF_ARRAY_ELEMS(enc_multbl)-1][FF_ARRAY_ELEMS(enc_multbl[0])-1]) {
> j = 1;
> diff --git a/libavutil/aes_internal.h b/libavutil/aes_internal.h
> index 37b9568..2150085 100644
> --- a/libavutil/aes_internal.h
> +++ b/libavutil/aes_internal.h
> @@ -39,4 +39,6 @@ typedef struct AVAES {
> void (*crypt)(struct AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv);
> } AVAES;
>
> +void ff_init_aes_x86(AVAES *a, int decrypt);
> +
> #endif /* AVUTIL_AES_INTERNAL_H */
> diff --git a/libavutil/x86/Makefile b/libavutil/x86/Makefile
> index eb70a62..4ac6219 100644
> --- a/libavutil/x86/Makefile
> +++ b/libavutil/x86/Makefile
> @@ -1,4 +1,5 @@
> -OBJS += x86/cpu.o \
> +OBJS += x86/aes_init.o \
> + x86/cpu.o \
> x86/float_dsp_init.o \
> x86/lls_init.o \
>
> @@ -10,5 +11,6 @@ YASM-OBJS += x86/cpuid.o \
> $(EMMS_OBJS__yes_) \
> x86/float_dsp.o \
> x86/lls.o \
> + x86/aes.o \
>
> YASM-OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils.o \
> diff --git a/libavutil/x86/aes.asm b/libavutil/x86/aes.asm
> new file mode 100644
> index 0000000..7fb9130
> --- /dev/null
> +++ b/libavutil/x86/aes.asm
> @@ -0,0 +1,85 @@
> +;*****************************************************************************
> +;* Copyright (c) 2015 Rodger Combs <rodger.combs at gmail.com>
> +;*
> +;* This file is part of FFmpeg.
> +;*
> +;* FFmpeg is free software; you can redistribute it and/or
> +;* modify it under the terms of the GNU Lesser General Public
> +;* License as published by the Free Software Foundation; either
> +;* version 2.1 of the License, or (at your option) any later version.
> +;*
> +;* FFmpeg is distributed in the hope that it will be useful,
> +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> +;* Lesser General Public License for more details.
> +;*
> +;* You should have received a copy of the GNU Lesser General Public
> +;* License along with FFmpeg; if not, write to the Free Software
> +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> +;******************************************************************************
> +
> +%include "x86util.asm"
> +
> +SECTION .text
> +
> +;-----------------------------------------------------------------------------
> +; void ff_aes_decrypt(AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv)
> +;-----------------------------------------------------------------------------
> +%macro AES_CRYPT 1
> +%if %1 == 1
> +%define CRYPT aesdec
> +%define LAST aesdeclast
> +cglobal aes_decrypt, 5,6,2
> +%else
> +%define CRYPT aesenc
> +%define LAST aesenclast
> +cglobal aes_encrypt, 5,6,2
> +%endif
> + mov r3d, r3d
Why? just use r3d below for the dec and test instructions instead.
> + pxor xm1, xm1
> + test r4, r4
> + je .block
> + movdqu xm1, [r4] ; iv
> +.block:
> + mov r5d, [r0 + 17 * 16]
> + imul r5, 16
> + movdqu xm0, [r2] ; state
> +%if %1 == 0
> + pxor xm0, xm1
> +%endif
> + pxor xm0, [r0 + r5]
> +.round:
> + sub r5, 16
> + CRYPT xm0, [r0 + r5]
> + cmp r5, 16
> + jg .round
> + LAST xm0, [r0]
> +%if %1 == 1
> + pxor xm0, xm1
> + movdqu xm1, [r2]
> +%endif
> + movdqu [r1], xm0
> + dec r3
> + add r2, 16
> + add r1, 16
> + test r3, r3
> + jne .block
> +%if %1 == 0
> + test r4, r4
> + je .ret
> + movdqu [r4], xm0
> +.ret:
> +%endif
> + REP_RET
> +%endmacro
> +
> +%if HAVE_AESNI_EXTERNAL
> +INIT_XMM aesni
> +AES_CRYPT 0
> +AES_CRYPT 1
> +%if HAVE_AVX_EXTERNAL
> +INIT_XMM avx
> +AES_CRYPT 0
> +AES_CRYPT 1
This is not really needed. You're not gaining anything by using the VEX
coding scheme.
The aesni version is IMO enough.
> +%endif
> +%endif
> diff --git a/libavutil/x86/aes_init.c b/libavutil/x86/aes_init.c
> new file mode 100644
> index 0000000..250dbc8
> --- /dev/null
> +++ b/libavutil/x86/aes_init.c
> @@ -0,0 +1,42 @@
> +/*
> + * Copyright (c) 2015 Rodger Combs <rodger.combs at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <stddef.h>
> +#include "libavutil/aes_internal.h"
> +#include "libavutil/x86/cpu.h"
> +
> +void ff_aes_decrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv);
> +void ff_aes_decrypt_avx(AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv);
> +
> +void ff_aes_encrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv);
> +void ff_aes_encrypt_avx(AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv);
> +
> +av_cold void ff_init_aes_x86(AVAES *a, int decrypt)
av_aes_init() is not av_cold, so probably unneeded here.
> +{
> +#if HAVE_YASM
> + int cpu_flags = av_get_cpu_flags();
> + if (EXTERNAL_AESNI(cpu_flags)) {
> + if (EXTERNAL_AVX(cpu_flags))
> + a->crypt = decrypt ? ff_aes_decrypt_avx : ff_aes_encrypt_avx;
> + else
> + a->crypt = decrypt ? ff_aes_decrypt_aesni : ff_aes_encrypt_aesni;
> + }
> +#endif /* HAVE_YASM */
> +}
>
More information about the ffmpeg-devel
mailing list