[FFmpeg-devel] [PATCH]: Align branch target for fft_sse.c, fft_3dn.c and fft_3dn2.c
Zuxy Meng
zuxy.meng
Sat May 26 16:11:24 CEST 2007
Hi,
This patch aligns the loop entry on 16-byte aligned address, as
recommended by Intel's and AMD's manuals. But I don't see any
noticeable improvements on my Dothan. Anyone to test it on other CPUs?
--
Zuxy
Beauty is truth,
While truth is beauty.
PGP KeyID: E8555ED6
-------------- next part --------------
Index: libavcodec/i386/fft_3dn2.c
===================================================================
--- libavcodec/i386/fft_3dn2.c ?????? 9124??
+++ libavcodec/i386/fft_3dn2.c ????????????
@@ -43,6 +43,7 @@
i = 8 << ln;
asm volatile(
+ ASMALIGN(4)
"1: \n\t"
"sub $32, %0 \n\t"
"movq (%0,%1), %%mm0 \n\t"
@@ -82,6 +83,7 @@
do {
i = nloops*8;
asm volatile(
+ ASMALIGN(4)
"1: \n\t"
"sub $16, %0 \n\t"
"movq (%1,%0), %%mm0 \n\t"
@@ -182,6 +184,7 @@
k = n-8;
asm volatile("movd %0, %%mm7" ::"r"(1<<31));
asm volatile(
+ ASMALIGN(4)
"1: \n\t"
"movq (%4,%0), %%mm0 \n\t" // z[n8+k]
"neg %0 \n\t"
Index: libavcodec/i386/fft_3dn.c
===================================================================
--- libavcodec/i386/fft_3dn.c ?????? 9124??
+++ libavcodec/i386/fft_3dn.c ????????????
@@ -43,6 +43,7 @@
i = 8 << ln;
asm volatile(
+ ASMALIGN(4)
"1: \n\t"
"sub $32, %0 \n\t"
"movq (%0,%1), %%mm0 \n\t"
@@ -83,6 +84,7 @@
do {
i = nloops*8;
asm volatile(
+ ASMALIGN(4)
"1: \n\t"
"sub $16, %0 \n\t"
"movq (%1,%0), %%mm0 \n\t"
Index: libavcodec/i386/fft_sse.c
===================================================================
--- libavcodec/i386/fft_sse.c ?????? 9124??
+++ libavcodec/i386/fft_sse.c ????????????
@@ -61,6 +61,7 @@
i = 8 << ln;
asm volatile(
+ ASMALIGN(4)
"1: \n\t"
"sub $32, %0 \n\t"
/* do the pass 0 butterfly */
@@ -99,6 +100,7 @@
do {
i = nloops*8;
asm volatile(
+ ASMALIGN(4)
"1: \n\t"
"sub $32, %0 \n\t"
"movaps (%2,%0), %%xmm1 \n\t"
@@ -271,6 +273,7 @@
k = 16-n;
asm volatile("movaps %0, %%xmm7 \n\t"::"m"(*m1m1m1m1));
asm volatile(
+ ASMALIGN(4)
"1: \n\t"
"movaps -16(%4,%0), %%xmm1 \n\t" // xmm1 = 4 5 6 7 = z[-2-k]
"neg %0 \n\t"
More information about the ffmpeg-devel
mailing list