[Mplayer-cvslog] CVS: main/liba52 imdct.c,1.12,1.13
Michael Niedermayer
michael at mplayer.dev.hu
Tue Dec 18 16:59:06 CET 2001
Update of /cvsroot/mplayer/main/liba52
In directory mplayer:/var/tmp.root/cvs-serv1671
Modified Files:
imdct.c
Log Message:
sse opt
Index: imdct.c
===================================================================
RCS file: /cvsroot/mplayer/main/liba52/imdct.c,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- imdct.c 18 Dec 2001 04:00:29 -0000 1.12
+++ imdct.c 18 Dec 2001 15:59:01 -0000 1.13
@@ -77,9 +77,12 @@
#ifdef ARCH_X86
// NOTE: SSE needs 16byte alignment or it will segfault
+//
static complex_t __attribute__((aligned(16))) buf[128];
static float __attribute__((aligned(16))) sseSinCos1a[256];
static float __attribute__((aligned(16))) sseSinCos1b[256];
+static float __attribute__((aligned(16))) sseSinCos1c[256];
+static float __attribute__((aligned(16))) sseSinCos1d[256];
static float __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1};
//static float __attribute__((aligned(16))) sseW0[4];
static float __attribute__((aligned(16))) sseW1[8];
@@ -555,15 +558,23 @@
);
}
-
- /* Post IFFT complex multiply plus IFFT complex conjugate*/
- for( i=0; i < 128; i++) {
- /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
- tmp_a_r = buf[i].real;
- tmp_a_i = -1.0 * buf[i].imag;
- buf[i].real =(tmp_a_r * xcos1[i]) - (tmp_a_i * xsin1[i]);
- buf[i].imag =(tmp_a_r * xsin1[i]) + (tmp_a_i * xcos1[i]);
- }
+ asm volatile(
+ "movl $-1024, %%esi \n\t"
+ ".balign 16 \n\t"
+ "1: \n\t"
+ "movaps (%0, %%esi), %%xmm0 \n\t"
+ "movaps (%0, %%esi), %%xmm1 \n\t"
+ "shufps $0xB1, %%xmm0, %%xmm0 \n\t"
+ "mulps 1024+sseSinCos1c(%%esi), %%xmm1 \n\t"
+ "mulps 1024+sseSinCos1d(%%esi), %%xmm0 \n\t"
+ "addps %%xmm1, %%xmm0 \n\t"
+ "movaps %%xmm0, (%0, %%esi) \n\t"
+ "addl $16, %%esi \n\t"
+ " jnz 1b \n\t"
+ :: "r" (buf+128)
+ : "%esi"
+ );
+
data_ptr = data;
delay_ptr = delay;
@@ -824,6 +835,11 @@
sseSinCos1a[2*i+1]= -xcos1[i];
sseSinCos1b[2*i+0]= xcos1[i];
sseSinCos1b[2*i+1]= -xsin1[i];
+
+ sseSinCos1c[2*i+0]= xcos1[i];
+ sseSinCos1c[2*i+1]= -xcos1[i];
+ sseSinCos1d[2*i+0]= xsin1[i];
+ sseSinCos1d[2*i+1]= xsin1[i];
}
#endif
More information about the MPlayer-cvslog
mailing list