[Mplayer-cvslog] CVS: main/liba52 imdct.c,1.13,1.14

Michael Niedermayer michael at mplayer.dev.hu
Tue Dec 18 18:29:30 CET 2001


Update of /cvsroot/mplayer/main/liba52
In directory mplayer:/var/tmp.root/cvs-serv2066

Modified Files:
	imdct.c 
Log Message:
removing unnecessary sse sin/cos LUT


Index: imdct.c
===================================================================
RCS file: /cvsroot/mplayer/main/liba52/imdct.c,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- imdct.c	18 Dec 2001 15:59:01 -0000	1.13
+++ imdct.c	18 Dec 2001 17:29:27 -0000	1.14
@@ -79,8 +79,6 @@
 // NOTE: SSE needs 16byte alignment or it will segfault 
 // 
 static complex_t __attribute__((aligned(16))) buf[128];
-static float __attribute__((aligned(16))) sseSinCos1a[256];
-static float __attribute__((aligned(16))) sseSinCos1b[256];
 static float __attribute__((aligned(16))) sseSinCos1c[256];
 static float __attribute__((aligned(16))) sseSinCos1d[256];
 static float __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1};
@@ -388,17 +386,20 @@
 		"pushl %%ebp				\n\t" //use ebp without telling gcc
 		".balign 16				\n\t"
 		"1:					\n\t"
-		"movaps (%0, %%esi), %%xmm0		\n\t"
-		"movaps (%0, %%edi), %%xmm1		\n\t"
-		"shufps $0xA0, %%xmm0, %%xmm0		\n\t"
-		"shufps $0x5F, %%xmm1, %%xmm1		\n\t"
-		"mulps sseSinCos1a(%%esi), %%xmm0	\n\t"
-		"mulps sseSinCos1b(%%esi), %%xmm1	\n\t"
-		"addps %%xmm1, %%xmm0			\n\t"
+		"movlps (%0, %%esi), %%xmm0		\n\t" // XXXI
+		"movhps 8(%0, %%edi), %%xmm0		\n\t" // RXXI
+		"movlps 8(%0, %%esi), %%xmm1		\n\t" // XXXi
+		"movhps (%0, %%edi), %%xmm1		\n\t" // rXXi
+		"shufps $0x33, %%xmm1, %%xmm0		\n\t" // irIR
+		"movaps sseSinCos1c(%%esi), %%xmm2	\n\t"
+		"mulps %%xmm0, %%xmm2			\n\t"
+		"shufps $0xB1, %%xmm0, %%xmm0		\n\t" // riRI
+		"mulps sseSinCos1d(%%esi), %%xmm0	\n\t"
+		"subps %%xmm0, %%xmm2			\n\t"
 		"movzbl (%%eax), %%edx			\n\t"
 		"movzbl 1(%%eax), %%ebp			\n\t"
-		"movlps %%xmm0, (%1, %%edx,8)		\n\t"
-		"movhps %%xmm0, (%1, %%ebp,8)		\n\t"
+		"movlps %%xmm2, (%1, %%edx,8)		\n\t"
+		"movhps %%xmm2, (%1, %%ebp,8)		\n\t"
 		"addl $16, %%esi			\n\t"
 		"addl $2, %%eax				\n\t" // avoid complex addressing for P4 crap
 		"subl $16, %%edi			\n\t"
@@ -831,11 +832,6 @@
 	}
 #ifdef ARCH_X86
 	for (i = 0; i < 128; i++) {
-	    sseSinCos1a[2*i+0]= -xsin1[i];
-	    sseSinCos1a[2*i+1]= -xcos1[i];
-	    sseSinCos1b[2*i+0]= xcos1[i];
-	    sseSinCos1b[2*i+1]= -xsin1[i];
-
 	    sseSinCos1c[2*i+0]= xcos1[i];
 	    sseSinCos1c[2*i+1]= -xcos1[i];
 	    sseSinCos1d[2*i+0]= xsin1[i];




More information about the MPlayer-cvslog mailing list