[Mplayer-cvslog] CVS: main/libvo fastmemcpy.h,1.4,1.5

Felix Buenemann atmosfear at users.sourceforge.net
Sat Apr 14 19:56:46 CEST 2001


Update of /cvsroot/mplayer/main/libvo
In directory usw-pr-cvs1:/tmp/cvs-serv9489

Modified Files:
	fastmemcpy.h 
Log Message:
- applied SSE patch by Nick Kurshev


Index: fastmemcpy.h
===================================================================
RCS file: /cvsroot/mplayer/main/libvo/fastmemcpy.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -r1.4 -r1.5
*** fastmemcpy.h	2001/04/12 14:40:10	1.4
--- fastmemcpy.h	2001/04/14 17:56:44	1.5
***************
*** 28,85 ****
  	  
  	__asm__ __volatile__ (
! 		"1: prefetchnta (%0)\n"		/* This set is 28 bytes */
! 		"   prefetchnta 64(%0)\n"
! 		"   prefetchnta 128(%0)\n"
! 		"   prefetchnta 192(%0)\n"
! 		"   prefetchnta 256(%0)\n"
! #if 0		
! 		"2:  \n"
! 		".section .fixup, \"ax\"\n"
! 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
! 		"   jmp 2b\n"
! 		".previous\n"
! 		".section __ex_table,\"a\"\n"
! 		"	.align 4\n"
! 		"	.long 1b, 3b\n"
! 		".previous"
! #endif		
  		: : "r" (from) );
! 		
! 	
  	for(; i>0; i--)
  	{
  		__asm__ __volatile__ (
! 		"1:  prefetchnta 320(%0)\n"
! 		"2:  movq (%0), %%mm0\n"
! 		"  movq 8(%0), %%mm1\n"
! 		"  movq 16(%0), %%mm2\n"
! 		"  movq 24(%0), %%mm3\n"
! 		"  movntq %%mm0, (%1)\n"
! 		"  movntq %%mm1, 8(%1)\n"
! 		"  movntq %%mm2, 16(%1)\n"
! 		"  movntq %%mm3, 24(%1)\n"
! 		"  movq 32(%0), %%mm0\n"
! 		"  movq 40(%0), %%mm1\n"
! 		"  movq 48(%0), %%mm2\n"
! 		"  movq 56(%0), %%mm3\n"
! 		"  movntq %%mm0, 32(%1)\n"
! 		"  movntq %%mm1, 40(%1)\n"
! 		"  movntq %%mm2, 48(%1)\n"
! 		"  movntq %%mm3, 56(%1)\n"
! #if 0		
! 		".section .fixup, \"ax\"\n"
! 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
! 		"   jmp 2b\n"
! 		".previous\n"
! 		".section __ex_table,\"a\"\n"
! 		"	.align 4\n"
! 		"	.long 1b, 3b\n"
! 		".previous"
! #endif		
! 		: : "r" (from), "r" (to) : "memory");
  		from+=64;
  		to+=64;
  	}
! 	        __asm__ __volatile__ ("emms":::"memory");
  	}
  	/*
--- 28,82 ----
  	  
  	__asm__ __volatile__ (
! 		"prefetchnta (%0)\n"
! 		"prefetchnta 64(%0)\n"
! 		"prefetchnta 128(%0)\n"
! 		"prefetchnta 192(%0)\n"
! 		"prefetchnta 256(%0)\n"
  		: : "r" (from) );
!         /*
!            This algorithm is top effective when the code consequently
!            reads and writes blocks which have size of cache line.
!            Size of cache line is processor-dependent.
!            It will, however, be a minimum of 32 bytes on any processors.
!            It would be better to have a number of instructions which
!            perform reading and writing to be multiple to a number of
!            processor's decoders, but it's not always possible.
!         */
  	for(; i>0; i--)
  	{
  		__asm__ __volatile__ (
! 		"prefetchnta 320(%0)\n"
! #ifdef HAVE_SSE /* Only P3 (may be Cyrix3) */
! 		"movups (%0), %%xmm0\n"
! 		"movups 16(%0), %%xmm1\n"
! 		"movntps %%xmm0, (%1)\n"
! 		"movntps %%xmm1, 16(%1)\n"
! 		"movups 32(%0), %%xmm0\n"
! 		"movups 48(%0), %%xmm1\n"
! 		"movntps %%xmm0, 32(%1)\n"
! 		"movntps %%xmm1, 48(%1)\n"
! #else /* Only K7 (may be other) */
! 		"movq (%0), %%mm0\n"
! 		"movq 8(%0), %%mm1\n"
! 		"movq 16(%0), %%mm2\n"
! 		"movq 24(%0), %%mm3\n"
! 		"movntq %%mm0, (%1)\n"
! 		"movntq %%mm1, 8(%1)\n"
! 		"movntq %%mm2, 16(%1)\n"
! 		"movntq %%mm3, 24(%1)\n"
! 		"movq 32(%0), %%mm0\n"
! 		"movq 40(%0), %%mm1\n"
! 		"movq 48(%0), %%mm2\n"
! 		"movq 56(%0), %%mm3\n"
! 		"movntq %%mm0, 32(%1)\n"
! 		"movntq %%mm1, 40(%1)\n"
! 		"movntq %%mm2, 48(%1)\n"
! 		"movntq %%mm3, 56(%1)\n"
! #endif
! 		:: "r" (from), "r" (to) : "memory");
  		from+=64;
  		to+=64;
  	}
! 		__asm__ __volatile__ ("emms":::"memory");
  	}
  	/*


_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog



More information about the MPlayer-cvslog mailing list