[Mplayer-cvslog] CVS: main/liba52 imdct.c,1.12,1.13

Michael Niedermayer michael at mplayer.dev.hu
Tue Dec 18 16:59:06 CET 2001


Update of /cvsroot/mplayer/main/liba52
In directory mplayer:/var/tmp.root/cvs-serv1671

Modified Files:
	imdct.c 
Log Message:
sse opt


Index: imdct.c
===================================================================
RCS file: /cvsroot/mplayer/main/liba52/imdct.c,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- imdct.c	18 Dec 2001 04:00:29 -0000	1.12
+++ imdct.c	18 Dec 2001 15:59:01 -0000	1.13
@@ -77,9 +77,12 @@
 
 #ifdef ARCH_X86
 // NOTE: SSE needs 16byte alignment or it will segfault 
+// 
 static complex_t __attribute__((aligned(16))) buf[128];
 static float __attribute__((aligned(16))) sseSinCos1a[256];
 static float __attribute__((aligned(16))) sseSinCos1b[256];
+static float __attribute__((aligned(16))) sseSinCos1c[256];
+static float __attribute__((aligned(16))) sseSinCos1d[256];
 static float __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1};
 //static float __attribute__((aligned(16))) sseW0[4];
 static float __attribute__((aligned(16))) sseW1[8];
@@ -555,15 +558,23 @@
 	);
     }
 
-    
-    /* Post IFFT complex multiply  plus IFFT complex conjugate*/
-    for( i=0; i < 128; i++) {
-	/* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
-	tmp_a_r =        buf[i].real;
-	tmp_a_i = -1.0 * buf[i].imag;
-	buf[i].real =(tmp_a_r * xcos1[i])  -  (tmp_a_i  * xsin1[i]);
-	buf[i].imag =(tmp_a_r * xsin1[i])  +  (tmp_a_i  * xcos1[i]);
-    }
+	asm volatile(
+		"movl $-1024, %%esi				\n\t"
+		".balign 16				\n\t"
+		"1:					\n\t"
+		"movaps (%0, %%esi), %%xmm0		\n\t"
+		"movaps (%0, %%esi), %%xmm1		\n\t"
+		"shufps $0xB1, %%xmm0, %%xmm0		\n\t"
+		"mulps 1024+sseSinCos1c(%%esi), %%xmm1	\n\t"
+		"mulps 1024+sseSinCos1d(%%esi), %%xmm0	\n\t"
+		"addps %%xmm1, %%xmm0			\n\t"
+		"movaps %%xmm0, (%0, %%esi)		\n\t"
+		"addl $16, %%esi			\n\t"
+		" jnz 1b				\n\t"
+		:: "r" (buf+128)
+		: "%esi"
+	);   
+
 	
     data_ptr = data;
     delay_ptr = delay;
@@ -824,6 +835,11 @@
 	    sseSinCos1a[2*i+1]= -xcos1[i];
 	    sseSinCos1b[2*i+0]= xcos1[i];
 	    sseSinCos1b[2*i+1]= -xsin1[i];
+
+	    sseSinCos1c[2*i+0]= xcos1[i];
+	    sseSinCos1c[2*i+1]= -xcos1[i];
+	    sseSinCos1d[2*i+0]= xsin1[i];
+	    sseSinCos1d[2*i+1]= xsin1[i];	
 	}
 #endif
 




More information about the MPlayer-cvslog mailing list