[FFmpeg-devel] [PATCH 6/7] lavu/aes: add x86 AESNI optimizations

James Almer jamrial at gmail.com
Mon Oct 12 05:51:06 CEST 2015


On 10/12/2015 12:20 AM, Rodger Combs wrote:
> ---
>  libavutil/aes.c          |  2 ++
>  libavutil/aes_internal.h |  2 ++
>  libavutil/x86/Makefile   |  4 ++-
>  libavutil/x86/aes.asm    | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
>  libavutil/x86/aes_init.c | 42 ++++++++++++++++++++++++
>  5 files changed, 134 insertions(+), 1 deletion(-)
>  create mode 100644 libavutil/x86/aes.asm
>  create mode 100644 libavutil/x86/aes_init.c
> 
> diff --git a/libavutil/aes.c b/libavutil/aes.c
> index c917706..61ab8f7 100644
> --- a/libavutil/aes.c
> +++ b/libavutil/aes.c
> @@ -200,6 +200,8 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
>      uint8_t alog8[512];
>  
>      a->crypt = decrypt ? aes_decrypt : aes_encrypt;
> +    if (ARCH_X86)
> +        ff_init_aes_x86(a, decrypt);
>  
>      if (!enc_multbl[FF_ARRAY_ELEMS(enc_multbl)-1][FF_ARRAY_ELEMS(enc_multbl[0])-1]) {
>          j = 1;
> diff --git a/libavutil/aes_internal.h b/libavutil/aes_internal.h
> index 37b9568..2150085 100644
> --- a/libavutil/aes_internal.h
> +++ b/libavutil/aes_internal.h
> @@ -39,4 +39,6 @@ typedef struct AVAES {
>      void (*crypt)(struct AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv);
>  } AVAES;
>  
> +void ff_init_aes_x86(AVAES *a, int decrypt);
> +
>  #endif /* AVUTIL_AES_INTERNAL_H */
> diff --git a/libavutil/x86/Makefile b/libavutil/x86/Makefile
> index eb70a62..4ac6219 100644
> --- a/libavutil/x86/Makefile
> +++ b/libavutil/x86/Makefile
> @@ -1,4 +1,5 @@
> -OBJS += x86/cpu.o                                                       \
> +OBJS += x86/aes_init.o                                                  \
> +        x86/cpu.o                                                       \
>          x86/float_dsp_init.o                                            \
>          x86/lls_init.o                                                  \
>  
> @@ -10,5 +11,6 @@ YASM-OBJS += x86/cpuid.o                                                \
>               $(EMMS_OBJS__yes_)                                      \
>               x86/float_dsp.o                                            \
>               x86/lls.o                                                  \
> +             x86/aes.o                                                  \
>  
>  YASM-OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils.o                      \
> diff --git a/libavutil/x86/aes.asm b/libavutil/x86/aes.asm
> new file mode 100644
> index 0000000..7fb9130
> --- /dev/null
> +++ b/libavutil/x86/aes.asm
> @@ -0,0 +1,85 @@
> +;*****************************************************************************
> +;* Copyright (c) 2015 Rodger Combs <rodger.combs at gmail.com>
> +;*
> +;* This file is part of FFmpeg.
> +;*
> +;* FFmpeg is free software; you can redistribute it and/or
> +;* modify it under the terms of the GNU Lesser General Public
> +;* License as published by the Free Software Foundation; either
> +;* version 2.1 of the License, or (at your option) any later version.
> +;*
> +;* FFmpeg is distributed in the hope that it will be useful,
> +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +;* Lesser General Public License for more details.
> +;*
> +;* You should have received a copy of the GNU Lesser General Public
> +;* License along with FFmpeg; if not, write to the Free Software
> +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> +;******************************************************************************
> +
> +%include "x86util.asm"
> +
> +SECTION .text
> +
> +;-----------------------------------------------------------------------------
> +; void ff_aes_decrypt(AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv)
> +;-----------------------------------------------------------------------------
> +%macro AES_CRYPT 1
> +%if %1 == 1
> +%define CRYPT aesdec
> +%define LAST  aesdeclast
> +cglobal aes_decrypt, 5,6,2
> +%else
> +%define CRYPT aesenc
> +%define LAST  aesenclast
> +cglobal aes_encrypt, 5,6,2
> +%endif
> +    mov r3d, r3d

Why? just use r3d below for the dec and test instructions instead.

> +    pxor xm1, xm1
> +    test r4, r4
> +    je .block
> +    movdqu xm1, [r4] ; iv
> +.block:
> +    mov r5d, [r0 + 17 * 16]
> +    imul r5, 16
> +    movdqu xm0, [r2] ; state
> +%if %1 == 0
> +    pxor xm0, xm1
> +%endif
> +    pxor xm0, [r0 + r5]
> +.round:
> +    sub r5, 16
> +    CRYPT xm0, [r0 + r5]
> +    cmp r5, 16
> +    jg .round
> +    LAST xm0, [r0]
> +%if %1 == 1
> +    pxor xm0, xm1
> +    movdqu xm1, [r2]
> +%endif
> +    movdqu [r1], xm0
> +    dec r3
> +    add r2, 16
> +    add r1, 16
> +    test r3, r3
> +    jne .block
> +%if %1 == 0
> +    test r4, r4
> +    je .ret
> +    movdqu [r4], xm0
> +.ret:
> +%endif
> +    REP_RET
> +%endmacro
> +
> +%if HAVE_AESNI_EXTERNAL
> +INIT_XMM aesni
> +AES_CRYPT 0
> +AES_CRYPT 1
> +%if HAVE_AVX_EXTERNAL
> +INIT_XMM avx
> +AES_CRYPT 0
> +AES_CRYPT 1

This is not really needed. You're not gaining anything by using the VEX
coding scheme.
The aesni version is IMO enough.

> +%endif
> +%endif
> diff --git a/libavutil/x86/aes_init.c b/libavutil/x86/aes_init.c
> new file mode 100644
> index 0000000..250dbc8
> --- /dev/null
> +++ b/libavutil/x86/aes_init.c
> @@ -0,0 +1,42 @@
> +/*
> + * Copyright (c) 2015 Rodger Combs <rodger.combs at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <stddef.h>
> +#include "libavutil/aes_internal.h"
> +#include "libavutil/x86/cpu.h"
> +
> +void ff_aes_decrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv);
> +void ff_aes_decrypt_avx(AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv);
> +
> +void ff_aes_encrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv);
> +void ff_aes_encrypt_avx(AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv);
> +
> +av_cold void ff_init_aes_x86(AVAES *a, int decrypt)

av_aes_init() is not av_cold, so probably unneeded here.

> +{
> +#if HAVE_YASM
> +    int cpu_flags = av_get_cpu_flags();
> +    if (EXTERNAL_AESNI(cpu_flags)) {
> +        if (EXTERNAL_AVX(cpu_flags))
> +            a->crypt = decrypt ? ff_aes_decrypt_avx   : ff_aes_encrypt_avx;
> +        else
> +            a->crypt = decrypt ? ff_aes_decrypt_aesni : ff_aes_encrypt_aesni;
> +    }
> +#endif /* HAVE_YASM */
> +}
> 



More information about the ffmpeg-devel mailing list