[Mplayer-cvslog] CVS: main/libac3/mmx srfft_3dnow.c,1.1,1.2

Nick Kurshev nickols_k at users.sourceforge.net
Wed May 23 11:45:35 CEST 2001


Update of /cvsroot/mplayer/main/libac3/mmx
In directory usw-pr-cvs1:/tmp/cvs-serv29840/main/libac3/mmx

Modified Files:
	srfft_3dnow.c 
Log Message:
improvements

Index: srfft_3dnow.c
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/srfft_3dnow.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -r1.1 -r1.2
*** srfft_3dnow.c	2001/05/23 08:20:16	1.1
--- srfft_3dnow.c	2001/05/23 09:45:33	1.2
***************
*** 28,35 ****
   *
   */
- /*
-  * Modified for using AMD's 3DNow! - 3DNowEx(DSP)! SIMD operations support
-  * by Nick Kurshev <nickols_k at mail.ru>
-  */
  
  void fft_4(complex_t *x)
--- 28,31 ----
***************
*** 38,81 ****
    /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4} 
     */
  
!   register complex_t yt, yb, u, vi;
!   
!   yt.re = x[0].re;
!   yb.re = yt.re - x[2].re;
!   yt.re += x[2].re;
! 
!   u.re = x[1].re;
!   vi.im = x[3].re - u.re;
!   u.re += x[3].re;
!   
!   u.im = x[1].im;
!   vi.re = u.im - x[3].im;
!   u.im += x[3].im;
! 
!   yt.im = yt.re;
!   yt.im += u.re;
!   x[0].re = yt.im;
!   yt.re -= u.re;
!   x[2].re = yt.re;
!   yt.im = yb.re;
!   yt.im += vi.re;
!   x[1].re = yt.im;
!   yb.re -= vi.re;
!   x[3].re = yb.re;
! 
!   yt.im = x[0].im;
!   yb.im = yt.im - x[2].im;
!   yt.im += x[2].im;
! 
!   yt.re = yt.im;
!   yt.re += u.im;
!   x[0].im = yt.re;
!   yt.im -= u.im;
!   x[2].im = yt.im;
!   yt.re = yb.im;
!   yt.re += vi.im;
!   x[1].im = yt.re;
!   yb.im -= vi.im;
!   x[3].im = yb.im;
  }
  #if 0
--- 34,89 ----
    /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4} 
     */
+   asm volatile("femms":::"memory");
+   asm volatile(
+ 	"movl	$-1, %%eax\n\t"
+ 	"movd	%%eax, %%mm6\n\t"
+ 	"negl   %%eax\n\t"
+ 	"movd	%%eax, %%mm7\n\t"
+ 	"punpckldq %%mm7, %%mm6\n\t"
+ 	"punpckldq %%mm6, %%mm7\n\t"
+ 	"pi2fd	%%mm7, %%mm7\n\t" /* 1. | 1. */
+ 	"pi2fd  %%mm6, %%mm6\n\t" /* -1. | 1. */
+ 
+ 	"movq	24(%0), %%mm3\n\t"
+ 	"movq	8(%0), %%mm1\n\t"
+ 	"pfmul	%%mm7, %%mm3\n\t" /* mm3.re | -mm3.im */
+ 	"pfmul  %%mm6, %%mm1\n\t" /* -mm1.re | mm1.im */
+ 	"pfadd	%%mm1, %%mm3\n\t" /* vi.im = x[3].re - x[1].re; */
+ 	"movq	%%mm3, %%mm4\n\t" /* vi.re =-x[3].im + x[1].im; mm4 = vi */
+ #ifdef HAVE_3DNOWEX
+ 	"pswapd %%mm4, %%mm4\n\t"
+ #else
+ 	"movq   %%mm4, %%mm5\n\t"
+ 	"psrlq	$32, %%mm4\n\t"
+ 	"punpckldq %%mm5, %%mm4\n\t"
+ #endif
  
! 	"movq	(%0), %%mm5\n\t" /* yb.re = x[0].re - x[2].re; */
! 	"pfsub	16(%0), %%mm5\n\t" /* yb.im = x[0].im - x[2].im; mm5 = yb */
! 
! 	"movq	(%0), %%mm6\n\t" /* yt.re = x[0].re + x[2].re; */
! 	"pfadd	16(%0), %%mm6\n\t" /* yt.im = x[0].im + x[2].im; mm6 = yt */
! 
! 	"movq	24(%0), %%mm7\n\t" /* u.re  = x[3].re + x[1].re; */
! 	"pfadd	8(%0), %%mm7\n\t" /* u.im  = x[3].im + x[1].im; mm7 = u */
! 
! 	"movq	%%mm6, %%mm0\n\t" /* x[0].re = yt.re + u.re; */
! 	"pfadd	%%mm7, %%mm0\n\t" /*x[0].im = yt.im + u.im; */
! 	"movq	%%mm0, (%0)\n\t"
! 
! 	"movq	%%mm5, %%mm0\n\t" /* x[1].re = yb.re + vi.re; */
! 	"pfadd	%%mm4, %%mm0\n\t" /* x[1].im = yb.im + vi.im; */
! 	"movq	%%mm0, 8(%0)\n\t"
! 
! 	"pfsub	%%mm7, %%mm6\n\t" /* x[2].re = yt.re - u.re; */
! 	"movq	%%mm6, 16(%0)\n\t" /* x[2].im = yt.im - u.im; */
! 
! 	"pfsub	%%mm4, %%mm5\n\t" /* x[3].re = yb.re - vi.re; */
! 	"movq	%%mm5, 24(%0)" /* x[3].im = yb.im - vi.im; */
! 	:"=r"(x)
! 	:"0"(x)
! 	:"eax","memory");
! 
!   asm volatile("femms":::"memory");
  }
  #if 0
***************
*** 135,155 ****
    complex_t wT1, wB1, wT2, wB2;
    
!   asm(
! 	"movq	8%0, %%mm0\n\t"
! 	"movq	24%0, %%mm1\n\t"
  	"movq	%%mm0, %1\n\t"  /* wT1 = x[1]; */
  	"movq	%%mm1, %2\n\t" /* wB1 = x[3]; */
! 	:"=m"(*x),"=m"(wT1), "=m"(wB1)
! 	:"0"(*x));
! 
!   asm(
! 	"movq	16%0, %%mm0\n\t"
! 	"movq	32%0, %%mm1\n\t"
! 	"movq	48%0, %%mm2\n\t"
! 	"movq	%%mm0, 8%0\n\t"  /* x[1] = x[2]; */
! 	"movq	%%mm1, 16%0\n\t" /* x[2] = x[4]; */
! 	"movq	%%mm2, 24%0\n\t" /* x[3] = x[6]; */
! 	:"=m"(*x)
! 	:"0"(*x));
    asm volatile("femms":::"memory");
    fft_4(&x[0]);
--- 143,165 ----
    complex_t wT1, wB1, wT2, wB2;
    
!   asm volatile(
! 	"movq	8(%0), %%mm0\n\t"
! 	"movq	24(%0), %%mm1\n\t"
  	"movq	%%mm0, %1\n\t"  /* wT1 = x[1]; */
  	"movq	%%mm1, %2\n\t" /* wB1 = x[3]; */
! 	:"=r"(x),"=m"(wT1), "=m"(wB1)
! 	:"0"(x)
! 	:"memory");
! 
!   asm volatile(
! 	"movq	16(%0), %%mm3\n\t"
! 	"movq	32(%0), %%mm4\n\t"
! 	"movq	48(%0), %%mm5\n\t"
! 	"movq	%%mm3, 8(%0)\n\t"  /* x[1] = x[2]; */
! 	"movq	%%mm4, 16(%0)\n\t" /* x[2] = x[4]; */
! 	"movq	%%mm5, 24(%0)\n\t" /* x[3] = x[6]; */
! 	:"=r"(x)
! 	:"0"(x)
! 	:"memory");
    asm volatile("femms":::"memory");
    fft_4(&x[0]);


_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog



More information about the MPlayer-cvslog mailing list