[Mplayer-users] [mplayer-20010411 PATCH] Fix MMX2 support
Nick Kurshev
nick at radiotelcom.ru
Thu Apr 12 14:00:59 CEST 2001
Hello!
I'm sorry! In previous patch I sent you version that is workable only on K7 cpu.
(instruction PREFETCH exists only on K6-2, K7 cpus. Full analog of it is PREFETCHNTA
that exists on both K7 and P3 processors). Also I've slightly rewrite small_memcpy version
for speedup of mplayer. (Previous version of small_memcpy may be faster only for 4-byte
aligned data but for misaligned ones it's slower. Using MOVSB for small blocks is faster
for such data.
Below is patch for it:
diff -u -r -N main/libvo/fastmemcpy.h main.new/libvo/fastmemcpy.h
--- main/libvo/fastmemcpy.h Thu Apr 12 04:09:57 2001
+++ main.new/libvo/fastmemcpy.h Thu Apr 12 11:44:07 2001
@@ -2,31 +2,19 @@
This part of code was taken by from Linux-2.4.3 and slightly modified
for MMX2 instruction set. I have done it since linux uses page aligned
blocks but mplayer uses weakly ordered data and original sources can not
-speedup their. Only using prefetch and movntq together have effect!
+speedup their. Only using prefetchnta and movntq together have effect!
If you have questions please contact with me: Nick Kurshev: nickols_k at mail.ru.
*/
-
-#ifndef HAVE_MMX2
-//static inline void * __memcpy(void * to, const void * from, unsigned n)
-inline static void * fast_memcpy(void * to, const void * from, unsigned n)
-{
-int d0, d1, d2;
-__asm__ __volatile__(
- "rep ; movsl\n\t"
- "testb $2,%b4\n\t"
- "je 1f\n\t"
- "movsw\n"
- "1:\ttestb $1,%b4\n\t"
- "je 2f\n\t"
- "movsb\n"
- "2:"
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
- :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
- : "memory");
-return (to);
+#ifdef HAVE_MMX2
+/* for small memory blocks (<256 bytes) this version is faster */
+#define small_memcpy(to,from,n)\
+{\
+__asm__ __volatile__(\
+ "rep ; movsb\n"\
+ ::"D" (to), "S" (from),"c" (n)\
+ : "memory");\
}
-#else
-//inline static void *__memcpy_mmx2(void *to, const void *from, unsigned len)
+
inline static void * fast_memcpy(void * to, const void * from, unsigned len)
{
void *p;
@@ -37,11 +25,12 @@
p = to;
i = len >> 6; /* len/64 */
__asm__ __volatile__ (
- "1: prefetch (%0)\n" /* This set is 28 bytes */
- " prefetch 64(%0)\n"
- " prefetch 128(%0)\n"
- " prefetch 192(%0)\n"
- " prefetch 256(%0)\n"
+ "1: prefetchnta (%0)\n" /* This set is 28 bytes */
+ " prefetchnta 64(%0)\n"
+ " prefetchnta 128(%0)\n"
+ " prefetchnta 192(%0)\n"
+ " prefetchnta 256(%0)\n"
+#if 0
"2: \n"
".section .fixup, \"ax\"\n"
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
@@ -51,13 +40,14 @@
" .align 4\n"
" .long 1b, 3b\n"
".previous"
+#endif
: : "r" (from) );
for(; i>0; i--)
{
__asm__ __volatile__ (
- "1: prefetch 320(%0)\n"
+ "1: prefetchnta 320(%0)\n"
"2: movq (%0), %%mm0\n"
" movq 8(%0), %%mm1\n"
" movq 16(%0), %%mm2\n"
@@ -74,6 +64,7 @@
" movntq %%mm1, 40(%1)\n"
" movntq %%mm2, 48(%1)\n"
" movntq %%mm3, 56(%1)\n"
+#if 0
".section .fixup, \"ax\"\n"
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
" jmp 2b\n"
@@ -82,6 +73,7 @@
" .align 4\n"
" .long 1b, 3b\n"
".previous"
+#endif
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
@@ -91,10 +83,10 @@
/*
* Now do the tail of the block
*/
- memcpy(to, from, len&63);
+ small_memcpy(to, from, len&63);
return p;
}
+#define memcpy(a,b,c) fast_memcpy(a,b,c)
#endif
-#define memcpy(a,b,c) fast_memcpy(a,b,c)
Also I want suggest you don't put your own CFLAGS in config.mak. I have exported CFLAGS
from /etc/profile and I think many people have it too:
diff -u -r -N main/configure main.new/configure
--- main/configure Thu Apr 12 00:08:27 2001
+++ main.new/configure Thu Apr 12 11:00:55 2001
@@ -659,6 +659,11 @@
_lirclibs=''
fi
+# checking for CFLAGS
+if test "$CFLAGS" = ""; then
+ CFLAGS="-O2 -fomit-frame-pointer -pipe -ffats-math"
+fi
+
echo
echo "Creating $MCONF"
@@ -669,7 +674,7 @@
AR=ar
CC=$_cc
# OPTFLAGS=-O4 -march=$proc -mcpu=$proc -pipe -fomit-frame-pointer -ffast-math
-OPTFLAGS=-O4 -march=$proc -mcpu=$proc -pipe -ffast-math
+OPTFLAGS=$CFLAGS
# LIBS=-L/usr/lib -L/usr/local/lib $_x11libdir $_gllib $_sdllib $_dgalib $_x11lib $_xvlib
X_LIBS=$_x11libdir $_gllib $_sdllib $_dgalib $_x11lib $_xvlib $_vmlib $_svgalib
TERMCAP_LIB=$_libtermcap
Best regards! Nick
_______________________________________________
Mplayer-users mailing list
Mplayer-users at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-users
More information about the MPlayer-users
mailing list