[FFmpeg-devel] [PATCH 5/5] x86: sbrdsp/fft: reuse ps_neg constant
Christophe Gisquet
christophe.gisquet at gmail.com
Wed Aug 6 09:43:43 CEST 2014
---
libavcodec/x86/constants.c | 2 ++
libavcodec/x86/constants.h | 2 ++
libavcodec/x86/fft.asm | 7 ++++---
libavcodec/x86/sbrdsp.asm | 2 +-
4 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index bfb0ff3..5d2c237 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -56,3 +56,5 @@ DECLARE_ALIGNED(32, const ymm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x030
0x0303030303030303ULL, 0x0303030303030303ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC) = 0xFCFCFCFCFCFCFCFCULL;
+
+DECLARE_ALIGNED(16, const xmm_reg, ff_ps_neg) = { 0x8000000080000000ULL, 0x8000000080000000ULL };
diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h
index f73a9f2..e75fff9 100644
--- a/libavcodec/x86/constants.h
+++ b/libavcodec/x86/constants.h
@@ -55,4 +55,6 @@ extern const xmm_reg ff_pb_80;
extern const xmm_reg ff_pb_F8;
extern const uint64_t ff_pb_FC;
+extern const xmm_reg ff_ps_neg;
+
#endif /* AVCODEC_X86_CONSTANTS_H */
diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm
index cae404c..877997e 100644
--- a/libavcodec/x86/fft.asm
+++ b/libavcodec/x86/fft.asm
@@ -68,9 +68,10 @@ perm1: dd 0x00, 0x02, 0x03, 0x01, 0x03, 0x00, 0x02, 0x01
perm2: dd 0x00, 0x01, 0x02, 0x03, 0x01, 0x00, 0x02, 0x03
ps_p1p1m1p1root2: dd 1.0, 1.0, -1.0, 1.0, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2
ps_m1m1p1m1p1m1m1m1: dd 1<<31, 1<<31, 0, 1<<31, 0, 1<<31, 1<<31, 1<<31
-ps_m1m1m1m1: times 4 dd 1<<31
ps_m1p1: dd 1<<31, 0
+cextern ps_neg
+
%assign i 16
%rep 13
cextern cos_ %+ i
@@ -685,7 +686,7 @@ cglobal imdct_calc, 3,5,3
mov r2, r3
sub r3, mmsize
neg r2
- mova m2, [ps_m1m1m1m1]
+ mova m2, [ps_neg]
.loop:
%if mmsize == 8
PSWAPD m0, [r1 + r3]
@@ -998,7 +999,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
sub r4, r3
%endif
%if notcpuflag(3dnowext) && mmsize == 8
- movd m7, [ps_m1m1m1m1]
+ movd m7, [ps_neg]
%endif
.pre:
%if ARCH_X86_64 == 0
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index d556f27..6f2e4f4 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -25,13 +25,13 @@ SECTION_RODATA
; mask equivalent for multiply by -1.0 1.0
ps_mask times 2 dd 1<<31, 0
ps_mask2 times 2 dd 0, 1<<31
-ps_neg times 4 dd 1<<31
ps_noise0 times 2 dd 1.0, 0.0,
ps_noise2 times 2 dd -1.0, 0.0
ps_noise13 dd 0.0, 1.0, 0.0, -1.0
dd 0.0, -1.0, 0.0, 1.0
dd 0.0, 1.0, 0.0, -1.0
cextern sbr_noise_table
+cextern ps_neg
SECTION_TEXT
--
1.9.2.msysgit.0
More information about the ffmpeg-devel
mailing list