[Mplayer-cvslog] CVS: main/liba52 imdct.c,1.3,1.4
Michael Niedermayer
michael at mplayer.dev.hu
Sun Dec 16 16:00:06 CET 2001
Update of /cvsroot/mplayer/main/liba52
In directory mplayer:/var/tmp.root/cvs-serv32398
Modified Files:
imdct.c
Log Message:
sse opt
Index: imdct.c
===================================================================
RCS file: /cvsroot/mplayer/main/liba52/imdct.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- imdct.c 16 Dec 2001 03:06:41 -0000 1.3
+++ imdct.c 16 Dec 2001 15:00:02 -0000 1.4
@@ -75,6 +75,8 @@
#ifdef HAVE_SSE
// NOTE: SSE needs 16byte alignment or it will segfault
static complex_t __attribute__((aligned(16))) buf[128];
+static float __attribute__((aligned(16))) sseSinCos1a[256];
+static float __attribute__((aligned(16))) sseSinCos1b[256];
static float __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1};
#else
static complex_t buf[128];
@@ -174,11 +176,32 @@
/* 512 IMDCT with source and dest data in 'data' */
/* Pre IFFT complex multiply plus IFFT cmplx conjugate */
+#ifdef HAVE_SSE
+ asm volatile(
+ "xorl %%esi, %%esi \n\t"
+ "movl $1008, %%edi \n\t"
+ "1: \n\t"
+ "movaps (%0, %%esi), %%xmm0 \n\t"
+ "movaps (%0, %%edi), %%xmm1 \n\t"
+ "shufps $0xA0, %%xmm0, %%xmm0 \n\t"
+ "shufps $0x5F, %%xmm1, %%xmm1 \n\t"
+ "mulps sseSinCos1a(%%esi), %%xmm0 \n\t"
+ "mulps sseSinCos1b(%%esi), %%xmm1 \n\t"
+ "addps %%xmm1, %%xmm0 \n\t"
+ "movaps %%xmm0, (%1, %%esi) \n\t"
+ "addl $16, %%esi \n\t"
+ "subl $16, %%edi \n\t"
+ " jnc 1b \n\t"
+ :: "r" (data), "r" (buf)
+ : "%esi", "%edi"
+ );
+#else
for( i=0; i < 128; i++) {
/* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */
buf[i].real = (data[256-2*i-1] * xcos1[i]) - (data[2*i] * xsin1[i]);
buf[i].imag = -1.0 * ((data[2*i] * xcos1[i]) + (data[256-2*i-1] * xsin1[i]));
}
+#endif
/* Bit reversed shuffling */
for(i=0; i<128; i++) {
@@ -514,6 +537,14 @@
xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1));
xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1));
}
+#ifdef HAVE_SSE
+ for (i = 0; i < 128; i++) {
+ sseSinCos1a[2*i+0]= -xsin1[i];
+ sseSinCos1a[2*i+1]= -xcos1[i];
+ sseSinCos1b[2*i+0]= xcos1[i];
+ sseSinCos1b[2*i+1]= -xsin1[i];
+ }
+#endif
/* More twiddle factors to turn IFFT into IMDCT */
for (i = 0; i < 64; i++) {
More information about the MPlayer-cvslog
mailing list