[FFmpeg-devel] [PATCH]: Align branch target for fft_sse.c, fft_3dn.c and fft_3dn2.c

Sat May 26 16:11:24 CEST 2007

Hi,

This patch aligns the loop entry on 16-byte aligned address, as
recommended by Intel's and AMD's manuals. But I don't see any
noticeable improvements on my Dothan. Anyone to test it on other CPUs?

-- 
Zuxy
Beauty is truth,
While truth is beauty.
PGP KeyID: E8555ED6
-------------- next part --------------
Index: libavcodec/i386/fft_3dn2.c
===================================================================

--- libavcodec/i386/fft_3dn2.c	?????? 9124??
+++ libavcodec/i386/fft_3dn2.c	????????????
@@ -43,6 +43,7 @@
 
     i = 8 << ln;
     asm volatile(
+        ASMALIGN(4)
         "1: \n\t"
         "sub $32, %0 \n\t"
         "movq    (%0,%1), %%mm0 \n\t"
@@ -82,6 +83,7 @@
         do {
             i = nloops*8;
             asm volatile(
+                ASMALIGN(4)
                 "1: \n\t"
                 "sub $16, %0 \n\t"
                 "movq    (%1,%0), %%mm0 \n\t"
@@ -182,6 +184,7 @@
     k = n-8;
     asm volatile("movd %0, %%mm7" ::"r"(1<<31));
     asm volatile(
+        ASMALIGN(4)
         "1: \n\t"
         "movq    (%4,%0), %%mm0 \n\t" // z[n8+k]
         "neg %0 \n\t"
Index: libavcodec/i386/fft_3dn.c
===================================================================
--- libavcodec/i386/fft_3dn.c	?????? 9124??
+++ libavcodec/i386/fft_3dn.c	????????????
@@ -43,6 +43,7 @@
 
     i = 8 << ln;
     asm volatile(
+        ASMALIGN(4)
         "1: \n\t"
         "sub $32, %0 \n\t"
         "movq    (%0,%1), %%mm0 \n\t"
@@ -83,6 +84,7 @@
         do {
             i = nloops*8;
             asm volatile(
+                ASMALIGN(4)
                 "1: \n\t"
                 "sub $16, %0 \n\t"
                 "movq    (%1,%0), %%mm0 \n\t"
Index: libavcodec/i386/fft_sse.c
===================================================================
--- libavcodec/i386/fft_sse.c	?????? 9124??
+++ libavcodec/i386/fft_sse.c	????????????
@@ -61,6 +61,7 @@
 
     i = 8 << ln;
     asm volatile(
+        ASMALIGN(4)
         "1: \n\t"
         "sub $32, %0 \n\t"
         /* do the pass 0 butterfly */
@@ -99,6 +100,7 @@
         do {
             i = nloops*8;
             asm volatile(
+                ASMALIGN(4)
                 "1: \n\t"
                 "sub $32, %0 \n\t"
                 "movaps    (%2,%0), %%xmm1 \n\t"
@@ -271,6 +273,7 @@
     k = 16-n;
     asm volatile("movaps %0, %%xmm7 \n\t"::"m"(*m1m1m1m1));
     asm volatile(
+        ASMALIGN(4)
         "1: \n\t"
         "movaps  -16(%4,%0), %%xmm1 \n\t"   // xmm1 = 4 5 6 7 = z[-2-k]
         "neg %0 \n\t"