[Mplayer-cvslog] CVS: main/liba52 imdct.c,1.3,1.4

Michael Niedermayer michael at mplayer.dev.hu
Sun Dec 16 16:00:06 CET 2001


Update of /cvsroot/mplayer/main/liba52
In directory mplayer:/var/tmp.root/cvs-serv32398

Modified Files:
	imdct.c 
Log Message:
sse opt


Index: imdct.c
===================================================================
RCS file: /cvsroot/mplayer/main/liba52/imdct.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- imdct.c	16 Dec 2001 03:06:41 -0000	1.3
+++ imdct.c	16 Dec 2001 15:00:02 -0000	1.4
@@ -75,6 +75,8 @@
 #ifdef HAVE_SSE
 // NOTE: SSE needs 16byte alignment or it will segfault 
 static complex_t __attribute__((aligned(16))) buf[128];
+static float __attribute__((aligned(16))) sseSinCos1a[256];
+static float __attribute__((aligned(16))) sseSinCos1b[256];
 static float __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1};
 #else
 static complex_t buf[128];
@@ -174,11 +176,32 @@
     /* 512 IMDCT with source and dest data in 'data' */
 	
     /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
+#ifdef HAVE_SSE
+	asm volatile(
+		"xorl %%esi, %%esi			\n\t"
+		"movl $1008, %%edi			\n\t"
+		"1:					\n\t"
+		"movaps (%0, %%esi), %%xmm0		\n\t"
+		"movaps (%0, %%edi), %%xmm1		\n\t"
+		"shufps $0xA0, %%xmm0, %%xmm0		\n\t"
+		"shufps $0x5F, %%xmm1, %%xmm1		\n\t"
+		"mulps sseSinCos1a(%%esi), %%xmm0	\n\t"
+		"mulps sseSinCos1b(%%esi), %%xmm1	\n\t"
+		"addps %%xmm1, %%xmm0			\n\t"
+		"movaps %%xmm0, (%1, %%esi)		\n\t"
+		"addl $16, %%esi			\n\t"
+		"subl $16, %%edi			\n\t"
+		" jnc 1b				\n\t"
+		:: "r" (data), "r" (buf)
+		: "%esi", "%edi"
+	);
+#else
     for( i=0; i < 128; i++) {
 	/* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ 
 	buf[i].real =         (data[256-2*i-1] * xcos1[i])  -  (data[2*i]       * xsin1[i]);
 	buf[i].imag = -1.0 * ((data[2*i]       * xcos1[i])  +  (data[256-2*i-1] * xsin1[i]));
     }
+#endif
 
     /* Bit reversed shuffling */
     for(i=0; i<128; i++) {
@@ -514,6 +537,14 @@
 	    xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1));
 	    xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1));
 	}
+#ifdef HAVE_SSE
+	for (i = 0; i < 128; i++) {
+	    sseSinCos1a[2*i+0]= -xsin1[i];
+	    sseSinCos1a[2*i+1]= -xcos1[i];
+	    sseSinCos1b[2*i+0]= xcos1[i];
+	    sseSinCos1b[2*i+1]= -xsin1[i];
+	}
+#endif
 
 	/* More twiddle factors to turn IFFT into IMDCT */
 	for (i = 0; i < 64; i++) {




More information about the MPlayer-cvslog mailing list