[FFmpeg-cvslog] x86: add support for fmaddps fma4 instruction with abstraction to avx/sse
Justin Ruggles
git at videolan.org
Sat Jul 28 00:10:47 CEST 2012
ffmpeg | branch: master | Justin Ruggles <justin.ruggles at gmail.com> | Mon Jun 18 23:39:14 2012 -0400| [79687079a97a039c325ab79d7a95920d800b791f] | committer: Justin Ruggles
x86: add support for fmaddps fma4 instruction with abstraction to avx/sse
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=79687079a97a039c325ab79d7a95920d800b791f
---
configure | 5 +++++
libavutil/x86/x86inc.asm | 16 +++++++++++-----
2 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/configure b/configure
index fd90369..715e49b 100755
--- a/configure
+++ b/configure
@@ -242,6 +242,7 @@ Optimization options (experts only):
--disable-sse disable SSE optimizations
--disable-ssse3 disable SSSE3 optimizations
--disable-avx disable AVX optimizations
+ --disable-fma4 disable FMA4 optimizations
--disable-armv5te disable armv5te optimizations
--disable-armv6 disable armv6 optimizations
--disable-armv6t2 disable armv6t2 optimizations
@@ -1047,6 +1048,7 @@ ARCH_EXT_LIST='
armv6t2
armvfp
avx
+ fma4
mmi
mmx
mmx2
@@ -1295,6 +1297,7 @@ mmx2_deps="mmx"
sse_deps="mmx"
ssse3_deps="sse"
avx_deps="ssse3"
+fma4_deps="avx"
aligned_stack_if_any="ppc x86"
fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64"
@@ -2865,6 +2868,7 @@ EOF
check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
die "yasm not found, use --disable-yasm for a crippled build"
check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
+ check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4
fi
case "$cpu" in
@@ -3292,6 +3296,7 @@ if enabled x86; then
echo "SSE enabled ${sse-no}"
echo "SSSE3 enabled ${ssse3-no}"
echo "AVX enabled ${avx-no}"
+ echo "FMA4 enabled ${fma4-no}"
echo "CMOV enabled ${cmov-no}"
echo "CMOV is fast ${fast_cmov-no}"
echo "EBX available ${ebx_available-no}"
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index b76a10c..4b4a19b 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -1093,16 +1093,22 @@ AVX_INSTR pfmul, 1, 0, 1
%undef j
%macro FMA_INSTR 3
- %macro %1 4-7 %1, %2, %3
- %if cpuflag(xop)
- v%5 %1, %2, %3, %4
+ %macro %1 5-8 %1, %2, %3
+ %if cpuflag(xop) || cpuflag(fma4)
+ v%6 %1, %2, %3, %4
%else
- %6 %1, %2, %3
- %7 %1, %4
+ %ifidn %1, %4
+ %7 %5, %2, %3
+ %8 %1, %4, %5
+ %else
+ %7 %1, %2, %3
+ %8 %1, %4
+ %endif
%endif
%endmacro
%endmacro
+FMA_INSTR fmaddps, mulps, addps
FMA_INSTR pmacsdd, pmulld, paddd
FMA_INSTR pmacsww, pmullw, paddw
FMA_INSTR pmadcswd, pmaddwd, paddd
More information about the ffmpeg-cvslog
mailing list