[Mplayer-cvslog] CVS: main/libvo aclib.c, 1.12, 1.13 aclib_template.c, 1.9, 1.10 osd.c, 1.22, 1.23 osd_template.c, 1.21, 1.22
Aurelien Jacobs CVS
syncmail at mplayerhq.hu
Thu Oct 21 13:55:22 CEST 2004
- Previous message: [Mplayer-cvslog] CVS: main bswap.h, 1.4, 1.5 configure, 1.922, 1.923 cpudetect.c, 1.36, 1.37 cpudetect.h, 1.10, 1.11
- Next message: [Mplayer-cvslog] CVS: main/libmpcodecs pullup.c, 1.19, 1.20 vf_decimate.c, 1.1, 1.2 vf_divtc.c, 1.1, 1.2 vf_eq.c, 1.7, 1.8 vf_eq2.c, 1.8, 1.9 vf_filmdint.c, 1.3, 1.4 vf_halfpack.c, 1.5, 1.6 vf_ilpack.c, 1.4, 1.5 vf_ivtc.c, 1.3, 1.4 vf_noise.c, 1.13, 1.14 vf_spp.c, 1.23, 1.24 vf_tfields.c, 1.7, 1.8
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
CVS change done by Aurelien Jacobs CVS
Update of /cvsroot/mplayer/main/libvo
In directory mail:/var2/tmp/cvs-serv9471/libvo
Modified Files:
aclib.c aclib_template.c osd.c osd_template.c
Log Message:
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
Index: aclib.c
===================================================================
RCS file: /cvsroot/mplayer/main/libvo/aclib.c,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- aclib.c 25 Jun 2004 16:58:45 -0000 1.12
+++ aclib.c 21 Oct 2004 11:55:20 -0000 1.13
@@ -17,7 +17,7 @@
//Feel free to fine-tune the above 2, it might be possible to get some speedup with them :)
//#define STATISTICS
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
#define CAN_COMPILE_X86_ASM
#endif
@@ -50,7 +50,6 @@
#undef HAVE_3DNOW
#undef HAVE_SSE
#undef HAVE_SSE2
-#undef ARCH_X86
/*
#ifdef COMPILE_C
#undef HAVE_MMX
@@ -69,7 +68,6 @@
#undef HAVE_3DNOW
#undef HAVE_SSE
#undef HAVE_SSE2
-#define ARCH_X86
#define RENAME(a) a ## _MMX
#include "aclib_template.c"
#endif
@@ -82,7 +80,6 @@
#undef HAVE_3DNOW
#undef HAVE_SSE
#undef HAVE_SSE2
-#define ARCH_X86
#define RENAME(a) a ## _MMX2
#include "aclib_template.c"
#endif
@@ -95,7 +92,6 @@
#define HAVE_3DNOW
#undef HAVE_SSE
#undef HAVE_SSE2
-#define ARCH_X86
#define RENAME(a) a ## _3DNow
#include "aclib_template.c"
#endif
@@ -108,7 +104,6 @@
#undef HAVE_3DNOW
#define HAVE_SSE
#define HAVE_SSE2
-#define ARCH_X86
#define RENAME(a) a ## _SSE
#include "aclib_template.c"
#endif
Index: aclib_template.c
===================================================================
RCS file: /cvsroot/mplayer/main/libvo/aclib_template.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- aclib_template.c 22 Aug 2002 23:28:33 -0000 1.9
+++ aclib_template.c 21 Oct 2004 11:55:20 -0000 1.10
@@ -257,62 +257,62 @@
// Pure Assembly cuz gcc is a bit unpredictable ;)
if(i>=BLOCK_SIZE/64)
asm volatile(
- "xorl %%eax, %%eax \n\t"
+ "xor %%"REG_a", %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
- "movl (%0, %%eax), %%ebx \n\t"
- "movl 32(%0, %%eax), %%ebx \n\t"
- "movl 64(%0, %%eax), %%ebx \n\t"
- "movl 96(%0, %%eax), %%ebx \n\t"
- "addl $128, %%eax \n\t"
- "cmpl %3, %%eax \n\t"
+ "movl (%0, %%"REG_a"), %%ebx \n\t"
+ "movl 32(%0, %%"REG_a"), %%ebx \n\t"
+ "movl 64(%0, %%"REG_a"), %%ebx \n\t"
+ "movl 96(%0, %%"REG_a"), %%ebx \n\t"
+ "add $128, %%"REG_a" \n\t"
+ "cmp %3, %%"REG_a" \n\t"
" jb 1b \n\t"
- "xorl %%eax, %%eax \n\t"
+ "xor %%"REG_a", %%"REG_a" \n\t"
".balign 16 \n\t"
"2: \n\t"
- "movq (%0, %%eax), %%mm0\n"
- "movq 8(%0, %%eax), %%mm1\n"
- "movq 16(%0, %%eax), %%mm2\n"
- "movq 24(%0, %%eax), %%mm3\n"
- "movq 32(%0, %%eax), %%mm4\n"
- "movq 40(%0, %%eax), %%mm5\n"
- "movq 48(%0, %%eax), %%mm6\n"
- "movq 56(%0, %%eax), %%mm7\n"
- MOVNTQ" %%mm0, (%1, %%eax)\n"
- MOVNTQ" %%mm1, 8(%1, %%eax)\n"
- MOVNTQ" %%mm2, 16(%1, %%eax)\n"
- MOVNTQ" %%mm3, 24(%1, %%eax)\n"
- MOVNTQ" %%mm4, 32(%1, %%eax)\n"
- MOVNTQ" %%mm5, 40(%1, %%eax)\n"
- MOVNTQ" %%mm6, 48(%1, %%eax)\n"
- MOVNTQ" %%mm7, 56(%1, %%eax)\n"
- "addl $64, %%eax \n\t"
- "cmpl %3, %%eax \n\t"
+ "movq (%0, %%"REG_a"), %%mm0\n"
+ "movq 8(%0, %%"REG_a"), %%mm1\n"
+ "movq 16(%0, %%"REG_a"), %%mm2\n"
+ "movq 24(%0, %%"REG_a"), %%mm3\n"
+ "movq 32(%0, %%"REG_a"), %%mm4\n"
+ "movq 40(%0, %%"REG_a"), %%mm5\n"
+ "movq 48(%0, %%"REG_a"), %%mm6\n"
+ "movq 56(%0, %%"REG_a"), %%mm7\n"
+ MOVNTQ" %%mm0, (%1, %%"REG_a")\n"
+ MOVNTQ" %%mm1, 8(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm2, 16(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm3, 24(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm4, 32(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm5, 40(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm6, 48(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm7, 56(%1, %%"REG_a")\n"
+ "add $64, %%"REG_a" \n\t"
+ "cmp %3, %%"REG_a" \n\t"
"jb 2b \n\t"
#if CONFUSION_FACTOR > 0
// a few percent speedup on out of order executing CPUs
- "movl %5, %%eax \n\t"
+ "mov %5, %%"REG_a" \n\t"
"2: \n\t"
"movl (%0), %%ebx \n\t"
"movl (%0), %%ebx \n\t"
"movl (%0), %%ebx \n\t"
"movl (%0), %%ebx \n\t"
- "decl %%eax \n\t"
+ "dec %%"REG_a" \n\t"
" jnz 2b \n\t"
#endif
- "xorl %%eax, %%eax \n\t"
- "addl %3, %0 \n\t"
- "addl %3, %1 \n\t"
- "subl %4, %2 \n\t"
- "cmpl %4, %2 \n\t"
+ "xor %%"REG_a", %%"REG_a" \n\t"
+ "add %3, %0 \n\t"
+ "add %3, %1 \n\t"
+ "sub %4, %2 \n\t"
+ "cmp %4, %2 \n\t"
" jae 1b \n\t"
: "+r" (from), "+r" (to), "+r" (i)
- : "r" (BLOCK_SIZE), "i" (BLOCK_SIZE/64), "i" (CONFUSION_FACTOR)
- : "%eax", "%ebx"
+ : "r" ((long)BLOCK_SIZE), "i" (BLOCK_SIZE/64), "i" ((long)CONFUSION_FACTOR)
+ : "%"REG_a, "%ebx"
);
for(; i>0; i--)
Index: osd.c
===================================================================
RCS file: /cvsroot/mplayer/main/libvo/osd.c,v
retrieving revision 1.22
retrieving revision 1.23
diff -u -r1.22 -r1.23
--- osd.c 31 May 2004 15:09:44 -0000 1.22
+++ osd.c 21 Oct 2004 11:55:20 -0000 1.23
@@ -14,7 +14,7 @@
extern int verbose; // defined in mplayer.c
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
#define CAN_COMPILE_X86_ASM
#endif
@@ -48,18 +48,18 @@
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
-#undef ARCH_X86
+
+#ifndef CAN_COMPILE_X86_ASM
#ifdef COMPILE_C
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
-#undef ARCH_X86
#define RENAME(a) a ## _C
#include "osd_template.c"
#endif
-#ifdef CAN_COMPILE_X86_ASM
+#else
//X86 noMMX versions
#ifdef COMPILE_C
@@ -67,7 +67,6 @@
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
-#define ARCH_X86
#define RENAME(a) a ## _X86
#include "osd_template.c"
#endif
@@ -78,7 +77,6 @@
#define HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
-#define ARCH_X86
#define RENAME(a) a ## _MMX
#include "osd_template.c"
#endif
@@ -89,7 +87,6 @@
#define HAVE_MMX
#define HAVE_MMX2
#undef HAVE_3DNOW
-#define ARCH_X86
#define RENAME(a) a ## _MMX2
#include "osd_template.c"
#endif
@@ -100,7 +97,6 @@
#define HAVE_MMX
#undef HAVE_MMX2
#define HAVE_3DNOW
-#define ARCH_X86
#define RENAME(a) a ## _3DNow
#include "osd_template.c"
#endif
@@ -129,7 +125,7 @@
vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
#elif defined (HAVE_MMX)
vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
#else
vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
@@ -159,7 +155,7 @@
vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
#elif defined (HAVE_MMX)
vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
#else
vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
@@ -189,7 +185,7 @@
vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
#elif defined (HAVE_MMX)
vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
#else
vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
@@ -219,7 +215,7 @@
vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
#elif defined (HAVE_MMX)
vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
#else
vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
@@ -249,7 +245,7 @@
vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
#elif defined (HAVE_MMX)
vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
#else
vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
@@ -294,7 +290,7 @@
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
#elif defined (HAVE_MMX)
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
#else
mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
Index: osd_template.c
===================================================================
RCS file: /cvsroot/mplayer/main/libvo/osd_template.c,v
retrieving revision 1.21
retrieving revision 1.22
diff -u -r1.21 -r1.22
--- osd_template.c 31 May 2004 15:13:35 -0000 1.21
+++ osd_template.c 21 Oct 2004 11:55:20 -0000 1.22
@@ -189,7 +189,7 @@
for(y=0;y<h;y++){
register unsigned char *dst = dstbase;
register int x;
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
#ifdef HAVE_MMX
asm volatile(
PREFETCHW" %0\n\t"
@@ -253,7 +253,7 @@
"addl %2, %%eax\n\t"
"movb %%ah, 2(%0)\n\t"
:
- :"r" (dst),
+ :"D" (dst),
"r" ((unsigned)srca[x]),
"r" (((unsigned)src[x])<<8)
:"%eax", "%ecx"
@@ -293,7 +293,7 @@
#endif
for(y=0;y<h;y++){
register int x;
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
#ifdef HAVE_MMX
#ifdef HAVE_3DNOW
asm volatile(
- Previous message: [Mplayer-cvslog] CVS: main bswap.h, 1.4, 1.5 configure, 1.922, 1.923 cpudetect.c, 1.36, 1.37 cpudetect.h, 1.10, 1.11
- Next message: [Mplayer-cvslog] CVS: main/libmpcodecs pullup.c, 1.19, 1.20 vf_decimate.c, 1.1, 1.2 vf_divtc.c, 1.1, 1.2 vf_eq.c, 1.7, 1.8 vf_eq2.c, 1.8, 1.9 vf_filmdint.c, 1.3, 1.4 vf_halfpack.c, 1.5, 1.6 vf_ilpack.c, 1.4, 1.5 vf_ivtc.c, 1.3, 1.4 vf_noise.c, 1.13, 1.14 vf_spp.c, 1.23, 1.24 vf_tfields.c, 1.7, 1.8
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the MPlayer-cvslog
mailing list