[Mplayer-cvslog] CVS: main/libac3/mmx srfft_3dnow.c,1.7,1.8
Nick Kurshev
nickols_k at users.sourceforge.net
Sun May 27 18:54:04 CEST 2001
Update of /cvsroot/mplayer/main/libac3/mmx
In directory usw-pr-cvs1:/tmp/cvs-serv27495/main/libac3/mmx
Modified Files:
srfft_3dnow.c
Log Message:
PreLast step: fft_8 has been 3dnow! optimized. After TRANS_3DNOW implementation libac3 will be fully 3dnow! optimized.
Index: srfft_3dnow.c
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/srfft_3dnow.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -r1.7 -r1.8
*** srfft_3dnow.c 2001/05/27 14:13:05 1.7
--- srfft_3dnow.c 2001/05/27 16:54:02 1.8
***************
*** 38,51 ****
*/
__asm__ __volatile__("femms":::"memory");
__asm__ __volatile__(
- "movl $-1, %%eax\n\t"
- "movd %%eax, %%mm6\n\t"
- "negl %%eax\n\t"
- "movd %%eax, %%mm7\n\t"
- "punpckldq %%mm7, %%mm6\n\t"
- "punpckldq %%mm6, %%mm7\n\t"
- "pi2fd %%mm7, %%mm7\n\t" /* 1. | 1. */
- "pi2fd %%mm6, %%mm6\n\t" /* -1. | 1. */
-
"movq 24(%0), %%mm3\n\t"
"movq 8(%0), %%mm1\n\t"
--- 38,58 ----
*/
__asm__ __volatile__("femms":::"memory");
+ __asm__ __volatile__ (
+ "movl $1, %%eax\n\t"
+ "movd %%eax, %%mm7\n\t"
+ "negl %%eax\n\t"
+ "movd %%eax, %%mm6\n\t"
+ #ifndef HAVE_3DNOWEX
+ "punpckldq %%mm6, %%mm7\n\t" /* 1.0 | -1.0 */
+ "punpckldq %%mm7, %%mm6\n\t" /* -1.0 | 1.0 */
+ "pi2fd %%mm7, %%mm7\n\t"
+ "pi2fd %%mm6, %%mm6\n\t"
+ #else
+ "punpckldq %%mm6, %%mm7\n\t" /* 1.0 | -1.0 */
+ "pi2fd %%mm7, %%mm7\n\t"
+ "pswapd %%mm7, %%mm6\n\t" /* -1.0 | 1.0 */
+ #endif
+ :::"eax","memory");
__asm__ __volatile__(
"movq 24(%0), %%mm3\n\t"
"movq 8(%0), %%mm1\n\t"
***************
*** 63,90 ****
"movq (%0), %%mm5\n\t" /* yb.re = x[0].re - x[2].re; */
- "pfsub 16(%0), %%mm5\n\t" /* yb.im = x[0].im - x[2].im; mm5 = yb */
-
"movq (%0), %%mm6\n\t" /* yt.re = x[0].re + x[2].re; */
- "pfadd 16(%0), %%mm6\n\t" /* yt.im = x[0].im + x[2].im; mm6 = yt */
-
"movq 24(%0), %%mm7\n\t" /* u.re = x[3].re + x[1].re; */
"pfadd 8(%0), %%mm7\n\t" /* u.im = x[3].im + x[1].im; mm7 = u */
"movq %%mm6, %%mm0\n\t" /* x[0].re = yt.re + u.re; */
"pfadd %%mm7, %%mm0\n\t" /*x[0].im = yt.im + u.im; */
"movq %%mm0, (%0)\n\t"
!
! "movq %%mm5, %%mm0\n\t" /* x[1].re = yb.re + vi.re; */
! "pfadd %%mm4, %%mm0\n\t" /* x[1].im = yb.im + vi.im; */
! "movq %%mm0, 8(%0)\n\t"
"pfsub %%mm7, %%mm6\n\t" /* x[2].re = yt.re - u.re; */
- "movq %%mm6, 16(%0)\n\t" /* x[2].im = yt.im - u.im; */
-
"pfsub %%mm4, %%mm5\n\t" /* x[3].re = yb.re - vi.re; */
"movq %%mm5, 24(%0)" /* x[3].im = yb.im - vi.im; */
:"=r"(x)
:"0"(x)
! :"eax","memory");
__asm__ __volatile__("femms":::"memory");
--- 70,93 ----
"movq (%0), %%mm5\n\t" /* yb.re = x[0].re - x[2].re; */
"movq (%0), %%mm6\n\t" /* yt.re = x[0].re + x[2].re; */
"movq 24(%0), %%mm7\n\t" /* u.re = x[3].re + x[1].re; */
+ "pfsub 16(%0), %%mm5\n\t" /* yb.im = x[0].im - x[2].im; mm5 = yb */
+ "pfadd 16(%0), %%mm6\n\t" /* yt.im = x[0].im + x[2].im; mm6 = yt */
"pfadd 8(%0), %%mm7\n\t" /* u.im = x[3].im + x[1].im; mm7 = u */
"movq %%mm6, %%mm0\n\t" /* x[0].re = yt.re + u.re; */
+ "movq %%mm5, %%mm1\n\t" /* x[1].re = yb.re + vi.re; */
"pfadd %%mm7, %%mm0\n\t" /*x[0].im = yt.im + u.im; */
+ "pfadd %%mm4, %%mm1\n\t" /* x[1].im = yb.im + vi.im; */
"movq %%mm0, (%0)\n\t"
! "movq %%mm1, 8(%0)\n\t"
"pfsub %%mm7, %%mm6\n\t" /* x[2].re = yt.re - u.re; */
"pfsub %%mm4, %%mm5\n\t" /* x[3].re = yb.re - vi.re; */
+ "movq %%mm6, 16(%0)\n\t" /* x[2].im = yt.im - u.im; */
"movq %%mm5, 24(%0)" /* x[3].im = yb.im - vi.im; */
:"=r"(x)
:"0"(x)
! :"memory");
__asm__ __volatile__("femms":::"memory");
***************
*** 144,148 ****
/* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8}
*/
! complex_t wT1, wB1, wT2, wB2;
__asm__ __volatile__(
--- 147,151 ----
/* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8}
*/
! complex_t wT1, wB1, wB2;
__asm__ __volatile__(
***************
*** 209,266 ****
:"memory");
- __asm__ __volatile__("femms":::"memory");
-
/* x[1] x[5] */
! wT2.re = wT1.re;
! wT2.im = wT1.im;
! wT2.re -= x[5].re;
! wT2.im -= x[5].im;
!
! wT2.re += wB1.im;
! wT2.im -= wB1.re;
! wT2.re -= x[7].im;
! wT2.im += x[7].re;
!
! wB2.re = wT2.re;
! wB2.re += wT2.im;
! wT2.im -= wT2.re;
! wB2.re *= HSQRT2;
! wT2.im *= HSQRT2;
! wT2.re = wB2.re;
! wB2.re += x[1].re;
! wT2.re = x[1].re - wT2.re;
!
! wB2.im = x[5].re;
! x[1].re = wB2.re;
! x[5].re = wT2.re;
!
! wT2.re = wT2.im;
! wT2.re += x[1].im;
! wT2.im = x[1].im - wT2.im;
! wB2.re = x[5].im;
! x[1].im = wT2.re;
! x[5].im = wT2.im;
/* x[3] x[7] */
! wT1.re -= wB1.im;
! wT1.im += wB1.re;
! wB1.re = wB2.im - x[7].im;
! wB1.im = wB2.re + x[7].re;
! wT1.re -= wB1.re;
! wT1.im -= wB1.im;
! wB1.re = wT1.re + wT1.im;
! wB1.re *= HSQRT2;
! wT1.im -= wT1.re;
! wT1.im *= HSQRT2;
! wB2.re = x[3].re;
! wB2.im = wB2.re + wT1.im;
! wB2.re -= wT1.im;
! x[3].re = wB2.im;
! x[7].re = wB2.re;
! wB2.im = x[3].im;
! wB2.re = wB2.im + wB1.re;
! wB2.im -= wB1.re;
! x[3].im = wB2.im;
! x[7].im = wB2.re;
}
#if 0
--- 212,322 ----
:"memory");
/* x[1] x[5] */
! __asm__ __volatile__ (
! "movl $1, %%eax\n\t"
! "movd %%eax, %%mm7\n\t"
! "negl %%eax\n\t"
! "movd %%eax, %%mm6\n\t"
! #ifndef HAVE_3DNOWEX
! "punpckldq %%mm6, %%mm7\n\t" /* 1.0 | -1.0 */
! "punpckldq %%mm7, %%mm6\n\t" /* -1.0 | 1.0 */
! "pi2fd %%mm7, %%mm7\n\t"
! "pi2fd %%mm6, %%mm6\n\t"
! #else
! "punpckldq %%mm6, %%mm7\n\t" /* 1.0 | -1.0 */
! "pi2fd %%mm7, %%mm7\n\t"
! "pswapd %%mm7, %%mm6\n\t" /* -1.0 | 1.0 */
! #endif
! :::"eax","memory");
! __asm__ __volatile__ (
! "movq %1, %%mm0\n\t"
! "movq %2, %%mm1\n\t"
! "movq 56(%3), %%mm3\n\t"
! "pfsub 40(%3), %%mm0\n\t"
! #ifdef HAVE_3DNOWEX
! "pswapd %%mm1, %%mm1\n\t"
! #else
! "movq %%mm1, %%mm2\n\t"
! "psrlq $32, %%mm1\n\t"
! "punpckldq %%mm2,%%mm1\n\t"
! #endif
! "pfmul %%mm7, %%mm1\n\t"
! "pfadd %%mm1, %%mm0\n\t"
! #ifdef HAVE_3DNOWEX
! "pswapd %%mm3, %%mm3\n\t"
! #else
! "movq %%mm3, %%mm2\n\t"
! "psrlq $32, %%mm3\n\t"
! "punpckldq %%mm2,%%mm3\n\t"
! #endif
! "pfmul %%mm6, %%mm3\n\t"
! "pfadd %%mm3, %%mm0\n\t"
! "movq %%mm0, %%mm1\n\t"
! "pfmul %%mm6, %%mm1\n\t"
! "pfacc %%mm1, %%mm0\n\t"
! "pfmul %4, %%mm0\n\t"
!
! "movq 40(%3), %%mm5\n\t"
! #ifdef HAVE_3DNOWEX
! "pswapd %%mm5, %%mm5\n\t"
! #else
! "movq %%mm5, %%mm1\n\t"
! "psrlq $32, %%mm5\n\t"
! "punpckldq %%mm1,%%mm5\n\t"
! #endif
! "movq %%mm5, %0\n\t"
!
! "movq 8(%3), %%mm1\n\t"
! "movq %%mm1, %%mm2\n\t"
! "pfsub %%mm0, %%mm1\n\t"
! "pfadd %%mm0, %%mm2\n\t"
! "movq %%mm1, 40(%3)\n\t"
! "movq %%mm2, 8(%3)\n\t"
! :"=m"(wB2)
! :"m"(wT1), "m"(wB1), "r"(x), "m"(HSQRT2_3DNOW)
! :"memory");
+
/* x[3] x[7] */
! __asm__ __volatile__(
! "movq %1, %%mm0\n\t"
! #ifdef HAVE_3DNOWEX
! "pswapd %3, %%mm1\n\t"
! #else
! "movq %3, %%mm1\n\t"
! "psrlq $32, %%mm1\n\t"
! "punpckldq %3, %%mm1\n\t"
! #endif
! "pfmul %%mm6, %%mm1\n\t"
! "pfadd %%mm1, %%mm0\n\t"
! "movq %2, %%mm2\n\t"
! "movq 56(%4), %%mm3\n\t"
! "pfmul %%mm7, %%mm3\n\t"
! "pfadd %%mm3, %%mm2\n\t"
! #ifdef HAVE_3DNOWEX
! "pswapd %%mm2, %%mm2\n\t"
! #else
! "movq %%mm2, %%mm5\n\t"
! "psrlq $32, %%mm2\n\t"
! "punpckldq %%mm5,%%mm2\n\t"
! #endif
! "pfsub %%mm2, %%mm0\n\t"
! "movq %%mm0, %%mm1\n\t"
! "pfmul %%mm6, %%mm0\n\t"
! "pfacc %%mm1, %%mm0\n\t"
! "pfmul %5, %%mm0\n\t"
! "movq %%mm0, %%mm1\n\t"
! "movq 24(%4), %%mm3\n\t"
! "movq %%mm3, %%mm4\n\t"
! "pfmul %%mm6, %%mm1\n\t"
! "pfadd %%mm1, %%mm3\n\t"
! "pfmul %%mm7, %%mm0\n\t"
! "pfadd %%mm0, %%mm4\n\t"
! "movq %%mm4, 24(%0)\n\t"
! "movq %%mm3, 56(%0)\n\t"
! :"=r"(x)
! :"m"(wT1), "m"(wB2), "m"(wB1), "0"(x), "m"(HSQRT2_3DNOW)
! :"memory");
! __asm__ __volatile__("femms":::"memory");
}
#if 0
_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog
More information about the MPlayer-cvslog
mailing list