[FFmpeg-cvslog] x86: dct32: port to cpuflags
Diego Biurrun
git at videolan.org
Sat Aug 4 23:54:13 CEST 2012
ffmpeg | branch: master | Diego Biurrun <diego at biurrun.de> | Wed Aug 1 19:28:08 2012 +0200| [0c3ff1982c5da0abfb27a7d2328d742a37257698] | committer: Diego Biurrun
x86: dct32: port to cpuflags
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0c3ff1982c5da0abfb27a7d2328d742a37257698
---
libavcodec/x86/dct32_sse.asm | 74 ++++++++++++++----------------------------
1 file changed, 25 insertions(+), 49 deletions(-)
diff --git a/libavcodec/x86/dct32_sse.asm b/libavcodec/x86/dct32_sse.asm
index e3c8a45..9d6169c 100644
--- a/libavcodec/x86/dct32_sse.asm
+++ b/libavcodec/x86/dct32_sse.asm
@@ -42,39 +42,24 @@ ps_cos_vec: dd 0.500603, 0.505471, 0.515447, 0.531043
align 32
ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
-%macro BUTTERFLY_SSE 4
- movaps %4, %1
- subps %1, %2
- addps %2, %4
- mulps %1, %3
-%endmacro
-
-%macro BUTTERFLY_AVX 4
- vsubps %4, %1, %2
- vaddps %2, %2, %1
- vmulps %1, %4, %3
-%endmacro
-
-%macro BUTTERFLY0_SSE 5
- movaps %4, %1
- shufps %1, %1, %5
- xorps %4, %2
- addps %1, %4
- mulps %1, %3
+%macro BUTTERFLY 4
+ subps %4, %1, %2
+ addps %2, %2, %1
+ mulps %1, %4, %3
%endmacro
-%macro BUTTERFLY0_SSE2 5
+%macro BUTTERFLY0 5
+%if cpuflag(sse2) && notcpuflag(avx)
pshufd %4, %1, %5
xorps %1, %2
addps %1, %4
mulps %1, %3
-%endmacro
-
-%macro BUTTERFLY0_AVX 5
- vshufps %4, %1, %1, %5
- vxorps %1, %1, %2
- vaddps %4, %4, %1
- vmulps %1, %4, %3
+%else
+ shufps %4, %1, %1, %5
+ xorps %1, %1, %2
+ addps %4, %4, %1
+ mulps %1, %4, %3
+%endif
%endmacro
%macro BUTTERFLY2 4
@@ -206,14 +191,11 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
movss [outq+116], m6
%endmacro
-%define BUTTERFLY BUTTERFLY_AVX
-%define BUTTERFLY0 BUTTERFLY0_AVX
-
-INIT_YMM
+INIT_YMM avx
SECTION_TEXT
%if HAVE_AVX
; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in)
-cglobal dct32_float_avx, 2,3,8, out, in, tmp
+cglobal dct32_float, 2,3,8, out, in, tmp
; pass 1
vmovaps m4, [inq+0]
vinsertf128 m5, m5, [inq+96], 1
@@ -286,9 +268,6 @@ INIT_XMM
RET
%endif
-%define BUTTERFLY BUTTERFLY_SSE
-%define BUTTERFLY0 BUTTERFLY0_SSE
-
%if ARCH_X86_64
%define SPILL SWAP
%define UNSPILL SWAP
@@ -411,10 +390,9 @@ INIT_XMM
%endif
-INIT_XMM
-%macro DCT32_FUNC 1
; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
-cglobal dct32_float_%1, 2,3,16, out, in, tmp
+%macro DCT32_FUNC 0
+cglobal dct32_float, 2, 3, 16, out, in, tmp
; pass 1
movaps m0, [inq+0]
@@ -498,18 +476,16 @@ cglobal dct32_float_%1, 2,3,16, out, in, tmp
RET
%endmacro
-%macro LOAD_INV_SSE 2
+%macro LOAD_INV 2
+%if cpuflag(sse2)
+ pshufd %1, %2, 0x1b
+%elif cpuflag(sse)
movaps %1, %2
shufps %1, %1, 0x1b
+%endif
%endmacro
-%define LOAD_INV LOAD_INV_SSE
-DCT32_FUNC sse
-
-%macro LOAD_INV_SSE2 2
- pshufd %1, %2, 0x1b
-%endmacro
-
-%define LOAD_INV LOAD_INV_SSE2
-%define BUTTERFLY0 BUTTERFLY0_SSE2
-DCT32_FUNC sse2
+INIT_XMM sse
+DCT32_FUNC
+INIT_XMM sse2
+DCT32_FUNC
More information about the ffmpeg-cvslog
mailing list