[Mplayer-cvslog] CVS: main/libvo fastmemcpy.h,1.4,1.5
Felix Buenemann
atmosfear at users.sourceforge.net
Sat Apr 14 19:56:46 CEST 2001
Update of /cvsroot/mplayer/main/libvo
In directory usw-pr-cvs1:/tmp/cvs-serv9489
Modified Files:
fastmemcpy.h
Log Message:
- applied SSE patch by Nick Kurshev
Index: fastmemcpy.h
===================================================================
RCS file: /cvsroot/mplayer/main/libvo/fastmemcpy.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -r1.4 -r1.5
*** fastmemcpy.h 2001/04/12 14:40:10 1.4
--- fastmemcpy.h 2001/04/14 17:56:44 1.5
***************
*** 28,85 ****
__asm__ __volatile__ (
! "1: prefetchnta (%0)\n" /* This set is 28 bytes */
! " prefetchnta 64(%0)\n"
! " prefetchnta 128(%0)\n"
! " prefetchnta 192(%0)\n"
! " prefetchnta 256(%0)\n"
! #if 0
! "2: \n"
! ".section .fixup, \"ax\"\n"
! "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
! " jmp 2b\n"
! ".previous\n"
! ".section __ex_table,\"a\"\n"
! " .align 4\n"
! " .long 1b, 3b\n"
! ".previous"
! #endif
: : "r" (from) );
!
!
for(; i>0; i--)
{
__asm__ __volatile__ (
! "1: prefetchnta 320(%0)\n"
! "2: movq (%0), %%mm0\n"
! " movq 8(%0), %%mm1\n"
! " movq 16(%0), %%mm2\n"
! " movq 24(%0), %%mm3\n"
! " movntq %%mm0, (%1)\n"
! " movntq %%mm1, 8(%1)\n"
! " movntq %%mm2, 16(%1)\n"
! " movntq %%mm3, 24(%1)\n"
! " movq 32(%0), %%mm0\n"
! " movq 40(%0), %%mm1\n"
! " movq 48(%0), %%mm2\n"
! " movq 56(%0), %%mm3\n"
! " movntq %%mm0, 32(%1)\n"
! " movntq %%mm1, 40(%1)\n"
! " movntq %%mm2, 48(%1)\n"
! " movntq %%mm3, 56(%1)\n"
! #if 0
! ".section .fixup, \"ax\"\n"
! "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
! " jmp 2b\n"
! ".previous\n"
! ".section __ex_table,\"a\"\n"
! " .align 4\n"
! " .long 1b, 3b\n"
! ".previous"
! #endif
! : : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
}
! __asm__ __volatile__ ("emms":::"memory");
}
/*
--- 28,82 ----
__asm__ __volatile__ (
! "prefetchnta (%0)\n"
! "prefetchnta 64(%0)\n"
! "prefetchnta 128(%0)\n"
! "prefetchnta 192(%0)\n"
! "prefetchnta 256(%0)\n"
: : "r" (from) );
! /*
! This algorithm is top effective when the code consequently
! reads and writes blocks which have size of cache line.
! Size of cache line is processor-dependent.
! It will, however, be a minimum of 32 bytes on any processors.
! It would be better to have a number of instructions which
! perform reading and writing to be multiple to a number of
! processor's decoders, but it's not always possible.
! */
for(; i>0; i--)
{
__asm__ __volatile__ (
! "prefetchnta 320(%0)\n"
! #ifdef HAVE_SSE /* Only P3 (may be Cyrix3) */
! "movups (%0), %%xmm0\n"
! "movups 16(%0), %%xmm1\n"
! "movntps %%xmm0, (%1)\n"
! "movntps %%xmm1, 16(%1)\n"
! "movups 32(%0), %%xmm0\n"
! "movups 48(%0), %%xmm1\n"
! "movntps %%xmm0, 32(%1)\n"
! "movntps %%xmm1, 48(%1)\n"
! #else /* Only K7 (may be other) */
! "movq (%0), %%mm0\n"
! "movq 8(%0), %%mm1\n"
! "movq 16(%0), %%mm2\n"
! "movq 24(%0), %%mm3\n"
! "movntq %%mm0, (%1)\n"
! "movntq %%mm1, 8(%1)\n"
! "movntq %%mm2, 16(%1)\n"
! "movntq %%mm3, 24(%1)\n"
! "movq 32(%0), %%mm0\n"
! "movq 40(%0), %%mm1\n"
! "movq 48(%0), %%mm2\n"
! "movq 56(%0), %%mm3\n"
! "movntq %%mm0, 32(%1)\n"
! "movntq %%mm1, 40(%1)\n"
! "movntq %%mm2, 48(%1)\n"
! "movntq %%mm3, 56(%1)\n"
! #endif
! :: "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
}
! __asm__ __volatile__ ("emms":::"memory");
}
/*
_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog
More information about the MPlayer-cvslog
mailing list