[FFmpeg-devel] [PATCH 4/5] x86inc: Correctly set mmreg variables
Henrik Gramner
henrik at gramner.com
Fri Jan 19 01:06:14 EET 2018
---
libavutil/x86/x86inc.asm | 87 ++++++++++++++++++++----------------------------
1 file changed, 36 insertions(+), 51 deletions(-)
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index de048f863d..438863042f 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -1,7 +1,7 @@
;*****************************************************************************
;* x86inc.asm: x264asm abstraction layer
;*****************************************************************************
-;* Copyright (C) 2005-2017 x264 project
+;* Copyright (C) 2005-2018 x264 project
;*
;* Authors: Loren Merritt <lorenm at u.washington.edu>
;* Henrik Gramner <henrik at gramner.com>
@@ -892,6 +892,36 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%undef %1%2
%endmacro
+%macro DEFINE_MMREGS 1 ; mmtype
+ %assign %%prev_mmregs 0
+ %ifdef num_mmregs
+ %assign %%prev_mmregs num_mmregs
+ %endif
+
+ %assign num_mmregs 8
+ %if ARCH_X86_64 && mmsize >= 16
+ %assign num_mmregs 16
+ %if cpuflag(avx512) || mmsize == 64
+ %assign num_mmregs 32
+ %endif
+ %endif
+
+ %assign %%i 0
+ %rep num_mmregs
+ CAT_XDEFINE m, %%i, %1 %+ %%i
+ CAT_XDEFINE nn%1, %%i, %%i
+ %assign %%i %%i+1
+ %endrep
+ %if %%prev_mmregs > num_mmregs
+ %rep %%prev_mmregs - num_mmregs
+ CAT_UNDEF m, %%i
+ CAT_UNDEF nn %+ mmtype, %%i
+ %assign %%i %%i+1
+ %endrep
+ %endif
+ %xdefine mmtype %1
+%endmacro
+
; Prefer registers 16-31 over 0-15 to avoid having to use vzeroupper
%macro AVX512_MM_PERMUTATION 0-1 0 ; start_reg
%if ARCH_X86_64 && cpuflag(avx512)
@@ -908,23 +938,12 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%assign avx_enabled 0
%define RESET_MM_PERMUTATION INIT_MMX %1
%define mmsize 8
- %define num_mmregs 8
%define mova movq
%define movu movq
%define movh movd
%define movnta movntq
- %assign %%i 0
- %rep 8
- CAT_XDEFINE m, %%i, mm %+ %%i
- CAT_XDEFINE nnmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- %rep 24
- CAT_UNDEF m, %%i
- CAT_UNDEF nnmm, %%i
- %assign %%i %%i+1
- %endrep
INIT_CPUFLAGS %1
+ DEFINE_MMREGS mm
%endmacro
%macro INIT_XMM 0-1+
@@ -936,22 +955,9 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%define movh movq
%define movnta movntdq
INIT_CPUFLAGS %1
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 16
- %if cpuflag(avx512)
- %define num_mmregs 32
- %endif
- %endif
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, xmm %+ %%i
- CAT_XDEFINE nnxmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
+ DEFINE_MMREGS xmm
%if WIN64
- ; Swap callee-saved registers with volatile registers
- AVX512_MM_PERMUTATION 6
+ AVX512_MM_PERMUTATION 6 ; Swap callee-saved registers with volatile registers
%endif
%endmacro
@@ -964,19 +970,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%undef movh
%define movnta movntdq
INIT_CPUFLAGS %1
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 16
- %if cpuflag(avx512)
- %define num_mmregs 32
- %endif
- %endif
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, ymm %+ %%i
- CAT_XDEFINE nnymm, %%i, %%i
- %assign %%i %%i+1
- %endrep
+ DEFINE_MMREGS ymm
AVX512_MM_PERMUTATION
%endmacro
@@ -984,21 +978,12 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%assign avx_enabled 1
%define RESET_MM_PERMUTATION INIT_ZMM %1
%define mmsize 64
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 32
- %endif
%define mova movdqa
%define movu movdqu
%undef movh
%define movnta movntdq
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, zmm %+ %%i
- CAT_XDEFINE nnzmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
INIT_CPUFLAGS %1
+ DEFINE_MMREGS zmm
AVX512_MM_PERMUTATION
%endmacro
--
2.11.0
More information about the ffmpeg-devel
mailing list