[Mplayer-cvslog] CVS: main/libac3/mmx imdct_3dnow.c,1.4,1.5 srfft_3dnow.c,1.8,1.9 srfftp_3dnow.h,1.3,1.4

Nick Kurshev nickols_k at users.sourceforge.net
Thu May 31 19:58:59 CEST 2001


Update of /cvsroot/mplayer/main/libac3/mmx
In directory usw-pr-cvs1:/tmp/cvs-serv11371/main/libac3/mmx

Modified Files:
	imdct_3dnow.c srfft_3dnow.c srfftp_3dnow.h 
Log Message:
libac3 now is full 3dnow! optimized

Index: imdct_3dnow.c
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/imdct_3dnow.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -r1.4 -r1.5
*** imdct_3dnow.c	2001/05/27 01:53:17	1.4
--- imdct_3dnow.c	2001/05/31 17:58:56	1.5
***************
*** 80,86 ****
  	}
  
-   __asm__ __volatile__ ("femms":::"memory");
  	fft_128p (&buf[0]);
-   __asm__ __volatile__ ("femms":::"memory");
  
  // Post IFFT complex multiply  plus IFFT complex conjugate
--- 80,84 ----
***************
*** 227,231 ****
--- 225,231 ----
          }
  
+   __asm__ __volatile__ ("femms":::"memory");
         fft_128p(&buf[0]);
+   __asm__ __volatile__ ("femms":::"memory");
  
          /* Post IFFT complex multiply  plus IFFT complex conjugate*/
***************
*** 309,314 ****
--- 309,316 ----
  	}
  
+   __asm__ __volatile__ ("femms":::"memory");
  	fft_64p(&buf1[0]);
  	fft_64p(&buf2[0]);
+   __asm__ __volatile__ ("femms":::"memory");
  
  #ifdef DEBUG
***************
*** 404,409 ****
--- 406,413 ----
  
  
+   __asm__ __volatile__ ("femms":::"memory");
         fft_64p(&buf1[0]);
         fft_64p(&buf2[0]);
+   __asm__ __volatile__ ("femms":::"memory");
  
  #ifdef DEBUG

Index: srfft_3dnow.c
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/srfft_3dnow.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -r1.8 -r1.9
*** srfft_3dnow.c	2001/05/27 16:54:02	1.8
--- srfft_3dnow.c	2001/05/31 17:58:56	1.9
***************
*** 37,41 ****
    /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4} 
     */
-   __asm__ __volatile__("femms":::"memory");
    __asm__ __volatile__ (
  	"movl $1, %%eax\n\t"
--- 37,40 ----
***************
*** 90,95 ****
  	:"0"(x)
  	:"memory");
- 
-   __asm__ __volatile__("femms":::"memory");
  }
  #if 0
--- 89,92 ----
***************
*** 318,322 ****
  	:"m"(wT1), "m"(wB2), "m"(wB1), "0"(x), "m"(HSQRT2_3DNOW)
  	:"memory");
-   __asm__ __volatile__("femms":::"memory");
  }
  #if 0
--- 315,318 ----
***************
*** 445,449 ****
--- 441,447 ----
    register complex_t  *x2k, *x3k, *x4k, *wB;
    register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i;
+   complex_t a, a1, u, v;
  
+   TRANS_FILL_MM6_MM7_3DNOW();
    x2k = x + 2 * k;
    x3k = x2k + 2 * k;
***************
*** 452,461 ****
    
    TRANSZERO_3DNOW(x[0],x2k[0],x3k[0],x4k[0]);
!   TRANS(x[1],x2k[1],x3k[1],x4k[1],wTB[1],wB[1],d[1],d_3[1]);
    
    --k;
    for(;;) {
!      TRANS(x[2],x2k[2],x3k[2],x4k[2],wTB[2],wB[2],d[2],d_3[2]);
!      TRANS(x[3],x2k[3],x3k[3],x4k[3],wTB[3],wB[3],d[3],d_3[3]);
       if (!--k) break;
       x += 2;
--- 450,459 ----
    
    TRANSZERO_3DNOW(x[0],x2k[0],x3k[0],x4k[0]);
!   TRANS_3DNOW(x[1],x2k[1],x3k[1],x4k[1],wTB[1],wB[1],d[1],d_3[1]);
    
    --k;
    for(;;) {
!      TRANS_3DNOW(x[2],x2k[2],x3k[2],x4k[2],wTB[2],wB[2],d[2],d_3[2]);
!      TRANS_3DNOW(x[3],x2k[3],x3k[3],x4k[3],wTB[3],wB[3],d[3],d_3[3]);
       if (!--k) break;
       x += 2;
***************
*** 473,484 ****
  void fft_asmb16(complex_t *x, complex_t *wTB)
  {
!   register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i;
    int k = 2;
  
    /* transform x[0], x[8], x[4], x[12] */
    TRANSZERO_3DNOW(x[0],x[4],x[8],x[12]);
  
    /* transform x[1], x[9], x[5], x[13] */
!   TRANS(x[1],x[5],x[9],x[13],wTB[1],wTB[5],delta16[1],delta16_3[1]);
  
    /* transform x[2], x[10], x[6], x[14] */
--- 471,483 ----
  void fft_asmb16(complex_t *x, complex_t *wTB)
  {
!   complex_t a, a1, u, v;
    int k = 2;
  
+   TRANS_FILL_MM6_MM7_3DNOW();
    /* transform x[0], x[8], x[4], x[12] */
    TRANSZERO_3DNOW(x[0],x[4],x[8],x[12]);
  
    /* transform x[1], x[9], x[5], x[13] */
!   TRANS_3DNOW(x[1],x[5],x[9],x[13],wTB[1],wTB[5],delta16[1],delta16_3[1]);
  
    /* transform x[2], x[10], x[6], x[14] */
***************
*** 486,490 ****
  
    /* transform x[3], x[11], x[7], x[15] */
!   TRANS(x[3],x[7],x[11],x[15],wTB[3],wTB[7],delta16[3],delta16_3[3]);
  
  } 
--- 485,489 ----
  
    /* transform x[3], x[11], x[7], x[15] */
!   TRANS_3DNOW(x[3],x[7],x[11],x[15],wTB[3],wTB[7],delta16[3],delta16_3[3]);
  
  } 

Index: srfftp_3dnow.h
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/srfftp_3dnow.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -r1.3 -r1.4
*** srfftp_3dnow.h	2001/05/27 14:13:05	1.3
--- srfftp_3dnow.h	2001/05/31 17:58:56	1.4
***************
*** 69,77 ****
  	"punpckldq "##mm_hlp","##mm_base"\n\t"
  #endif
  
  #define TRANSZERO_3DNOW(A0,A4,A8,A12) \
  { \
-     __asm__ __volatile__("femms":::"memory");\
-     TRANS_FILL_MM6_MM7_3DNOW()\
      __asm__ __volatile__(\
  	"movq	%4, %%mm0\n\t" /* mm0 = wTB[0]*/\
--- 69,84 ----
  	"punpckldq "##mm_hlp","##mm_base"\n\t"
  #endif
+ #ifdef HAVE_3DNOWEX
+ #define PFNACC_MM(mm_base,mm_hlp)	"pfnacc	"##mm_base","##mm_base"\n\t"
+ #else
+ #define PFNACC_MM(mm_base,mm_hlp)\
+ 	"movq	"##mm_base","##mm_hlp"\n\t"\
+ 	"psrlq	$32,"##mm_hlp"\n\t"\
+ 	"punpckldq "##mm_hlp","##mm_hlp"\n\t"\
+ 	"pfsub	"##mm_hlp","##mm_base"\n\t"
+ #endif
  
  #define TRANSZERO_3DNOW(A0,A4,A8,A12) \
  { \
      __asm__ __volatile__(\
  	"movq	%4, %%mm0\n\t" /* mm0 = wTB[0]*/\
***************
*** 99,109 ****
  	:"m"(wTB[0]), "m"(wTB[k*2]), "0"(A0), "2"(A4)\
  	:"memory");\
-     __asm__ __volatile__("femms":::"memory");\
  }
  
  #define TRANSHALF_16_3DNOW(A2,A6,A10,A14)\
  {\
-     __asm__ __volatile__("femms":::"memory");\
-     TRANS_FILL_MM6_MM7_3DNOW()\
      __asm__ __volatile__(\
  	"movq	%4, %%mm0\n\t"/*u.re = wTB[2].im + wTB[2].re;*/\
--- 106,113 ----
***************
*** 143,147 ****
  	:"m"(wTB[2]), "m"(wTB[6]), "0"(A2), "2"(A6), "m"(HSQRT2_3DNOW)\
  	:"memory");\
!     __asm__ __volatile__("femms":::"memory");\
  }
  
--- 147,200 ----
  	:"m"(wTB[2]), "m"(wTB[6]), "0"(A2), "2"(A6), "m"(HSQRT2_3DNOW)\
  	:"memory");\
! }
! 
! #define TRANS_3DNOW(A1,A5,A9,A13,WT,WB,D,D3)\
! { \
!     __asm__ __volatile__(\
! 	"movq	%1,	%%mm4\n\t"\
! 	"movq	%%mm4,	%%mm5\n\t"\
! 	"punpckldq %%mm4, %%mm4\n\t"/*mm4 = D.re | D.re */\
! 	"punpckhdq %%mm5, %%mm5\n\t"/*mm5 = D.im | D.im */\
! 	"movq	%0,	%%mm0\n\t"\
! 	"pfmul	%%mm0,	%%mm4\n\t"/* mm4 =u.re | u.im */\
! 	"pfmul	%%mm0,	%%mm5\n\t"/* mm5 = a.re | a.im */\
! 	PSWAP_MM("%%mm5","%%mm3")\
! 	"pfmul	%%mm7,	%%mm5\n\t"\
! 	"pfadd	%%mm5,	%%mm4\n\t"/* mm4 = u*/\
! 	"movq	%3,	%%mm1\n\t"\
! 	"movq	%2,	%%mm0\n\t"\
! 	PSWAP_MM("%%mm1","%%mm3")\
! 	"movq	%%mm0,	%%mm2\n\t"\
! 	"pfmul	%%mm1,	%%mm0\n\t"/* mm0 = a*/\
! 	"pfmul	%3,	%%mm2\n\t"/* mm2 = v*/\
! 	PFNACC_MM("%%mm2","%%mm3")\
! 	"pfacc	%%mm0,	%%mm0\n\t"\
! 	"punpckldq %%mm0,%%mm2\n\t"/*mm2 = v.re | a.re*/\
! 	"movq	%%mm2,	%%mm3\n\t"\
! 	"pfmul	%%mm7,	%%mm3\n\t"\
! 	"movq	%%mm4,	%%mm5\n\t"\
! 	"pfmul	%%mm6,	%%mm5\n\t"\
! 	"pfadd	%%mm3,	%%mm5\n\t"\
! 	PSWAP_MM("%%mm5","%%mm3")/* mm5 = v*/\
! 	"pfadd	%%mm2,	%%mm4\n\t"\
! 	:\
! 	:"m"(WT), "m"(D), "m"(WB), "m"(D3)\
! 	:"memory");\
!     __asm__ __volatile__(\
! 	"movq	%4, %%mm0\n\t"/* a1 = A1*/\
! 	"movq	%%mm0, %%mm1\n\t"\
! 	"pfadd	%%mm4, %%mm0\n\t"/*A1 = a1 + u*/\
! 	"pfsub	%%mm4, %%mm1\n\t"/*A9 = a1 - u*/\
! 	"movq	%%mm0, %0\n\t"\
! 	"movq	%%mm1, %1\n\t"\
! 	"movq	%5, %%mm2\n\t"/* a1 = A5*/\
! 	"movq	%%mm2, %%mm3\n\t"\
! 	"pfsub	%%mm5, %%mm2\n\t"/*A5 = a1 - v*/\
! 	"pfadd	%%mm5, %%mm3\n\t"/*A9 = a1 + v*/\
! 	"movq	%%mm2, %2\n\t"\
! 	"movq	%%mm3, %3"\
! 	:"=m"(A1), "=m"(A9), "=m"(A5), "=m"(A13)\
! 	:"0"(A1), "2"(A5), "m"(u), "m"(v)\
! 	:"memory");\
  }
  


_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog



More information about the MPlayer-cvslog mailing list