[Mplayer-cvslog] CVS: main/libac3/mmx srfft_3dnow.c,1.7,1.8

Nick Kurshev nickols_k at users.sourceforge.net
Sun May 27 18:54:04 CEST 2001


Update of /cvsroot/mplayer/main/libac3/mmx
In directory usw-pr-cvs1:/tmp/cvs-serv27495/main/libac3/mmx

Modified Files:
	srfft_3dnow.c 
Log Message:
PreLast step: fft_8 has been 3dnow! optimized. After TRANS_3DNOW implementation libac3 will be fully 3dnow! optimized.

Index: srfft_3dnow.c
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/srfft_3dnow.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -r1.7 -r1.8
*** srfft_3dnow.c	2001/05/27 14:13:05	1.7
--- srfft_3dnow.c	2001/05/27 16:54:02	1.8
***************
*** 38,51 ****
     */
    __asm__ __volatile__("femms":::"memory");
    __asm__ __volatile__(
- 	"movl	$-1, %%eax\n\t"
- 	"movd	%%eax, %%mm6\n\t"
- 	"negl   %%eax\n\t"
- 	"movd	%%eax, %%mm7\n\t"
- 	"punpckldq %%mm7, %%mm6\n\t"
- 	"punpckldq %%mm6, %%mm7\n\t"
- 	"pi2fd	%%mm7, %%mm7\n\t" /* 1. | 1. */
- 	"pi2fd  %%mm6, %%mm6\n\t" /* -1. | 1. */
- 
  	"movq	24(%0), %%mm3\n\t"
  	"movq	8(%0), %%mm1\n\t"
--- 38,58 ----
     */
    __asm__ __volatile__("femms":::"memory");
+   __asm__ __volatile__ (
+ 	"movl $1, %%eax\n\t"
+ 	"movd %%eax, %%mm7\n\t"
+ 	"negl %%eax\n\t"
+ 	"movd %%eax, %%mm6\n\t"
+ #ifndef HAVE_3DNOWEX
+ 	"punpckldq %%mm6, %%mm7\n\t" /* 1.0 | -1.0 */
+ 	"punpckldq %%mm7, %%mm6\n\t" /* -1.0 | 1.0 */
+ 	"pi2fd %%mm7, %%mm7\n\t"
+ 	"pi2fd %%mm6, %%mm6\n\t"
+ #else
+ 	"punpckldq %%mm6, %%mm7\n\t" /* 1.0 | -1.0 */
+ 	"pi2fd %%mm7, %%mm7\n\t"
+ 	"pswapd %%mm7, %%mm6\n\t" /* -1.0 | 1.0 */
+ #endif
+ 	:::"eax","memory");
    __asm__ __volatile__(
  	"movq	24(%0), %%mm3\n\t"
  	"movq	8(%0), %%mm1\n\t"
***************
*** 63,90 ****
  
  	"movq	(%0), %%mm5\n\t" /* yb.re = x[0].re - x[2].re; */
- 	"pfsub	16(%0), %%mm5\n\t" /* yb.im = x[0].im - x[2].im; mm5 = yb */
- 
  	"movq	(%0), %%mm6\n\t" /* yt.re = x[0].re + x[2].re; */
- 	"pfadd	16(%0), %%mm6\n\t" /* yt.im = x[0].im + x[2].im; mm6 = yt */
- 
  	"movq	24(%0), %%mm7\n\t" /* u.re  = x[3].re + x[1].re; */
  	"pfadd	8(%0), %%mm7\n\t" /* u.im  = x[3].im + x[1].im; mm7 = u */
  
  	"movq	%%mm6, %%mm0\n\t" /* x[0].re = yt.re + u.re; */
  	"pfadd	%%mm7, %%mm0\n\t" /*x[0].im = yt.im + u.im; */
  	"movq	%%mm0, (%0)\n\t"
! 
! 	"movq	%%mm5, %%mm0\n\t" /* x[1].re = yb.re + vi.re; */
! 	"pfadd	%%mm4, %%mm0\n\t" /* x[1].im = yb.im + vi.im; */
! 	"movq	%%mm0, 8(%0)\n\t"
  
  	"pfsub	%%mm7, %%mm6\n\t" /* x[2].re = yt.re - u.re; */
- 	"movq	%%mm6, 16(%0)\n\t" /* x[2].im = yt.im - u.im; */
- 
  	"pfsub	%%mm4, %%mm5\n\t" /* x[3].re = yb.re - vi.re; */
  	"movq	%%mm5, 24(%0)" /* x[3].im = yb.im - vi.im; */
  	:"=r"(x)
  	:"0"(x)
! 	:"eax","memory");
  
    __asm__ __volatile__("femms":::"memory");
--- 70,93 ----
  
  	"movq	(%0), %%mm5\n\t" /* yb.re = x[0].re - x[2].re; */
  	"movq	(%0), %%mm6\n\t" /* yt.re = x[0].re + x[2].re; */
  	"movq	24(%0), %%mm7\n\t" /* u.re  = x[3].re + x[1].re; */
+ 	"pfsub	16(%0), %%mm5\n\t" /* yb.im = x[0].im - x[2].im; mm5 = yb */
+ 	"pfadd	16(%0), %%mm6\n\t" /* yt.im = x[0].im + x[2].im; mm6 = yt */
  	"pfadd	8(%0), %%mm7\n\t" /* u.im  = x[3].im + x[1].im; mm7 = u */
  
  	"movq	%%mm6, %%mm0\n\t" /* x[0].re = yt.re + u.re; */
+ 	"movq	%%mm5, %%mm1\n\t" /* x[1].re = yb.re + vi.re; */
  	"pfadd	%%mm7, %%mm0\n\t" /*x[0].im = yt.im + u.im; */
+ 	"pfadd	%%mm4, %%mm1\n\t" /* x[1].im = yb.im + vi.im; */
  	"movq	%%mm0, (%0)\n\t"
! 	"movq	%%mm1, 8(%0)\n\t"
  
  	"pfsub	%%mm7, %%mm6\n\t" /* x[2].re = yt.re - u.re; */
  	"pfsub	%%mm4, %%mm5\n\t" /* x[3].re = yb.re - vi.re; */
+ 	"movq	%%mm6, 16(%0)\n\t" /* x[2].im = yt.im - u.im; */
  	"movq	%%mm5, 24(%0)" /* x[3].im = yb.im - vi.im; */
  	:"=r"(x)
  	:"0"(x)
! 	:"memory");
  
    __asm__ __volatile__("femms":::"memory");
***************
*** 144,148 ****
    /* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8} 
     */
!   complex_t wT1, wB1, wT2, wB2;
    
    __asm__ __volatile__(
--- 147,151 ----
    /* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8} 
     */
!   complex_t wT1, wB1, wB2;
    
    __asm__ __volatile__(
***************
*** 209,266 ****
        :"memory");
    
-   __asm__ __volatile__("femms":::"memory");
- 
    /* x[1] x[5] */
!   wT2.re = wT1.re;
!   wT2.im = wT1.im;
!   wT2.re -= x[5].re;
!   wT2.im -= x[5].im;
!   
!   wT2.re += wB1.im;
!   wT2.im -= wB1.re;
!   wT2.re -= x[7].im;
!   wT2.im += x[7].re;
! 
!   wB2.re = wT2.re;
!   wB2.re += wT2.im;
!   wT2.im -= wT2.re;
!   wB2.re *= HSQRT2;
!   wT2.im *= HSQRT2;
!   wT2.re = wB2.re;
!   wB2.re += x[1].re;
!   wT2.re =  x[1].re - wT2.re;
! 
!   wB2.im = x[5].re;
!   x[1].re = wB2.re;
!   x[5].re = wT2.re;
! 
!   wT2.re = wT2.im;
!   wT2.re += x[1].im;
!   wT2.im = x[1].im - wT2.im;
!   wB2.re = x[5].im;
!   x[1].im = wT2.re;
!   x[5].im = wT2.im;
  
    /* x[3] x[7] */
!   wT1.re -= wB1.im;
!   wT1.im += wB1.re;
!   wB1.re = wB2.im - x[7].im;
!   wB1.im = wB2.re + x[7].re;
!   wT1.re -= wB1.re;
!   wT1.im -= wB1.im;
!   wB1.re = wT1.re + wT1.im;
!   wB1.re *= HSQRT2;
!   wT1.im -= wT1.re;
!   wT1.im *= HSQRT2;
!   wB2.re = x[3].re;
!   wB2.im = wB2.re + wT1.im;
!   wB2.re -= wT1.im;
!   x[3].re = wB2.im;
!   x[7].re = wB2.re;
!   wB2.im = x[3].im;
!   wB2.re = wB2.im + wB1.re;
!   wB2.im -= wB1.re;
!   x[3].im = wB2.im;
!   x[7].im = wB2.re;
  }
  #if 0
--- 212,322 ----
        :"memory");
    
    /* x[1] x[5] */
!   __asm__ __volatile__ (
! 	"movl $1, %%eax\n\t"
! 	"movd %%eax, %%mm7\n\t"
! 	"negl %%eax\n\t"
! 	"movd %%eax, %%mm6\n\t"
! #ifndef HAVE_3DNOWEX
! 	"punpckldq %%mm6, %%mm7\n\t" /* 1.0 | -1.0 */
! 	"punpckldq %%mm7, %%mm6\n\t" /* -1.0 | 1.0 */
! 	"pi2fd %%mm7, %%mm7\n\t"
! 	"pi2fd %%mm6, %%mm6\n\t"
! #else
! 	"punpckldq %%mm6, %%mm7\n\t" /* 1.0 | -1.0 */
! 	"pi2fd %%mm7, %%mm7\n\t"
! 	"pswapd %%mm7, %%mm6\n\t" /* -1.0 | 1.0 */
! #endif
! 	:::"eax","memory");
!   __asm__ __volatile__ (
! 	"movq	%1,	%%mm0\n\t"
! 	"movq	%2,	%%mm1\n\t"
! 	"movq	56(%3),	%%mm3\n\t"
! 	"pfsub	40(%3),	%%mm0\n\t"
! #ifdef HAVE_3DNOWEX
! 	"pswapd	%%mm1,	%%mm1\n\t"
! #else
! 	"movq	%%mm1,	%%mm2\n\t"
! 	"psrlq	$32,	%%mm1\n\t"
! 	"punpckldq %%mm2,%%mm1\n\t"
! #endif
! 	"pfmul	%%mm7,	%%mm1\n\t"
! 	"pfadd	%%mm1,	%%mm0\n\t"
! #ifdef HAVE_3DNOWEX
! 	"pswapd	%%mm3,	%%mm3\n\t"
! #else
! 	"movq	%%mm3,	%%mm2\n\t"
! 	"psrlq	$32,	%%mm3\n\t"
! 	"punpckldq %%mm2,%%mm3\n\t"
! #endif
! 	"pfmul	%%mm6,	%%mm3\n\t"
! 	"pfadd	%%mm3,	%%mm0\n\t"
! 	"movq	%%mm0,	%%mm1\n\t"
! 	"pfmul	%%mm6,	%%mm1\n\t"
! 	"pfacc	%%mm1,	%%mm0\n\t"
! 	"pfmul	%4,	%%mm0\n\t"
! 	
! 	"movq	40(%3),	%%mm5\n\t"
! #ifdef HAVE_3DNOWEX
! 	"pswapd	%%mm5,	%%mm5\n\t"
! #else
! 	"movq	%%mm5,	%%mm1\n\t"
! 	"psrlq	$32,	%%mm5\n\t"
! 	"punpckldq %%mm1,%%mm5\n\t"
! #endif
! 	"movq	%%mm5,	%0\n\t"
! 	
! 	"movq	8(%3),	%%mm1\n\t"
! 	"movq	%%mm1,	%%mm2\n\t"
! 	"pfsub	%%mm0,	%%mm1\n\t"
! 	"pfadd	%%mm0,	%%mm2\n\t"
! 	"movq	%%mm1,	40(%3)\n\t"
! 	"movq	%%mm2,	8(%3)\n\t"
! 	:"=m"(wB2)
! 	:"m"(wT1), "m"(wB1), "r"(x), "m"(HSQRT2_3DNOW)
! 	:"memory");
  
+ 
    /* x[3] x[7] */
!   __asm__ __volatile__(
! 	"movq	%1,	%%mm0\n\t"
! #ifdef HAVE_3DNOWEX
! 	"pswapd	%3,	%%mm1\n\t"
! #else
! 	"movq	%3,	%%mm1\n\t"
! 	"psrlq	$32,	%%mm1\n\t"
! 	"punpckldq %3,	%%mm1\n\t"
! #endif
! 	"pfmul	%%mm6,	%%mm1\n\t"	
! 	"pfadd	%%mm1,	%%mm0\n\t"
! 	"movq	%2,	%%mm2\n\t"
! 	"movq	56(%4),	%%mm3\n\t"
! 	"pfmul	%%mm7,	%%mm3\n\t"
! 	"pfadd	%%mm3,	%%mm2\n\t"
! #ifdef HAVE_3DNOWEX
! 	"pswapd	%%mm2,	%%mm2\n\t"
! #else
! 	"movq	%%mm2,	%%mm5\n\t"
! 	"psrlq	$32,	%%mm2\n\t"
! 	"punpckldq %%mm5,%%mm2\n\t"
! #endif
! 	"pfsub	%%mm2,	%%mm0\n\t"
! 	"movq	%%mm0,	%%mm1\n\t"
! 	"pfmul  %%mm6,	%%mm0\n\t"
! 	"pfacc	%%mm1,	%%mm0\n\t"
! 	"pfmul	%5,	%%mm0\n\t"
! 	"movq	%%mm0,	%%mm1\n\t"
! 	"movq	24(%4),	%%mm3\n\t"
! 	"movq	%%mm3,	%%mm4\n\t"
! 	"pfmul	%%mm6,	%%mm1\n\t"
! 	"pfadd	%%mm1,	%%mm3\n\t"
! 	"pfmul	%%mm7,	%%mm0\n\t"
! 	"pfadd	%%mm0,	%%mm4\n\t"
! 	"movq	%%mm4,	24(%0)\n\t"
! 	"movq	%%mm3,	56(%0)\n\t"
! 	:"=r"(x)
! 	:"m"(wT1), "m"(wB2), "m"(wB1), "0"(x), "m"(HSQRT2_3DNOW)
! 	:"memory");
!   __asm__ __volatile__("femms":::"memory");
  }
  #if 0


_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog



More information about the MPlayer-cvslog mailing list