[Mplayer-cvslog] CVS: main/libvo aclib.c, 1.12, 1.13 aclib_template.c, 1.9, 1.10 osd.c, 1.22, 1.23 osd_template.c, 1.21, 1.22

Aurelien Jacobs CVS syncmail at mplayerhq.hu
Thu Oct 21 13:55:22 CEST 2004


CVS change done by Aurelien Jacobs CVS

Update of /cvsroot/mplayer/main/libvo
In directory mail:/var2/tmp/cvs-serv9471/libvo

Modified Files:
	aclib.c aclib_template.c osd.c osd_template.c 
Log Message:
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64

Index: aclib.c
===================================================================
RCS file: /cvsroot/mplayer/main/libvo/aclib.c,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- aclib.c	25 Jun 2004 16:58:45 -0000	1.12
+++ aclib.c	21 Oct 2004 11:55:20 -0000	1.13
@@ -17,7 +17,7 @@
 //Feel free to fine-tune the above 2, it might be possible to get some speedup with them :)
 
 //#define STATISTICS
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 #define CAN_COMPILE_X86_ASM
 #endif
 
@@ -50,7 +50,6 @@
 #undef HAVE_3DNOW
 #undef HAVE_SSE
 #undef HAVE_SSE2
-#undef ARCH_X86
 /*
 #ifdef COMPILE_C
 #undef HAVE_MMX
@@ -69,7 +68,6 @@
 #undef HAVE_3DNOW
 #undef HAVE_SSE
 #undef HAVE_SSE2
-#define ARCH_X86
 #define RENAME(a) a ## _MMX
 #include "aclib_template.c"
 #endif
@@ -82,7 +80,6 @@
 #undef HAVE_3DNOW
 #undef HAVE_SSE
 #undef HAVE_SSE2
-#define ARCH_X86
 #define RENAME(a) a ## _MMX2
 #include "aclib_template.c"
 #endif
@@ -95,7 +92,6 @@
 #define HAVE_3DNOW
 #undef HAVE_SSE
 #undef HAVE_SSE2
-#define ARCH_X86
 #define RENAME(a) a ## _3DNow
 #include "aclib_template.c"
 #endif
@@ -108,7 +104,6 @@
 #undef HAVE_3DNOW
 #define HAVE_SSE
 #define HAVE_SSE2
-#define ARCH_X86
 #define RENAME(a) a ## _SSE
 #include "aclib_template.c"
 #endif

Index: aclib_template.c
===================================================================
RCS file: /cvsroot/mplayer/main/libvo/aclib_template.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- aclib_template.c	22 Aug 2002 23:28:33 -0000	1.9
+++ aclib_template.c	21 Oct 2004 11:55:20 -0000	1.10
@@ -257,62 +257,62 @@
 	// Pure Assembly cuz gcc is a bit unpredictable ;)
 	if(i>=BLOCK_SIZE/64)
 		asm volatile(
-			"xorl %%eax, %%eax	\n\t"
+			"xor %%"REG_a", %%"REG_a"	\n\t"
 			".balign 16		\n\t"
 			"1:			\n\t"
-				"movl (%0, %%eax), %%ebx 	\n\t"
-				"movl 32(%0, %%eax), %%ebx 	\n\t"
-				"movl 64(%0, %%eax), %%ebx 	\n\t"
-				"movl 96(%0, %%eax), %%ebx 	\n\t"
-				"addl $128, %%eax		\n\t"
-				"cmpl %3, %%eax			\n\t"
+				"movl (%0, %%"REG_a"), %%ebx 	\n\t"
+				"movl 32(%0, %%"REG_a"), %%ebx 	\n\t"
+				"movl 64(%0, %%"REG_a"), %%ebx 	\n\t"
+				"movl 96(%0, %%"REG_a"), %%ebx 	\n\t"
+				"add $128, %%"REG_a"		\n\t"
+				"cmp %3, %%"REG_a"		\n\t"
 				" jb 1b				\n\t"
 
-			"xorl %%eax, %%eax	\n\t"
+			"xor %%"REG_a", %%"REG_a"	\n\t"
 
 				".balign 16		\n\t"
 				"2:			\n\t"
-				"movq (%0, %%eax), %%mm0\n"
-				"movq 8(%0, %%eax), %%mm1\n"
-				"movq 16(%0, %%eax), %%mm2\n"
-				"movq 24(%0, %%eax), %%mm3\n"
-				"movq 32(%0, %%eax), %%mm4\n"
-				"movq 40(%0, %%eax), %%mm5\n"
-				"movq 48(%0, %%eax), %%mm6\n"
-				"movq 56(%0, %%eax), %%mm7\n"
-				MOVNTQ" %%mm0, (%1, %%eax)\n"
-				MOVNTQ" %%mm1, 8(%1, %%eax)\n"
-				MOVNTQ" %%mm2, 16(%1, %%eax)\n"
-				MOVNTQ" %%mm3, 24(%1, %%eax)\n"
-				MOVNTQ" %%mm4, 32(%1, %%eax)\n"
-				MOVNTQ" %%mm5, 40(%1, %%eax)\n"
-				MOVNTQ" %%mm6, 48(%1, %%eax)\n"
-				MOVNTQ" %%mm7, 56(%1, %%eax)\n"
-				"addl $64, %%eax		\n\t"
-				"cmpl %3, %%eax		\n\t"
+				"movq (%0, %%"REG_a"), %%mm0\n"
+				"movq 8(%0, %%"REG_a"), %%mm1\n"
+				"movq 16(%0, %%"REG_a"), %%mm2\n"
+				"movq 24(%0, %%"REG_a"), %%mm3\n"
+				"movq 32(%0, %%"REG_a"), %%mm4\n"
+				"movq 40(%0, %%"REG_a"), %%mm5\n"
+				"movq 48(%0, %%"REG_a"), %%mm6\n"
+				"movq 56(%0, %%"REG_a"), %%mm7\n"
+				MOVNTQ" %%mm0, (%1, %%"REG_a")\n"
+				MOVNTQ" %%mm1, 8(%1, %%"REG_a")\n"
+				MOVNTQ" %%mm2, 16(%1, %%"REG_a")\n"
+				MOVNTQ" %%mm3, 24(%1, %%"REG_a")\n"
+				MOVNTQ" %%mm4, 32(%1, %%"REG_a")\n"
+				MOVNTQ" %%mm5, 40(%1, %%"REG_a")\n"
+				MOVNTQ" %%mm6, 48(%1, %%"REG_a")\n"
+				MOVNTQ" %%mm7, 56(%1, %%"REG_a")\n"
+				"add $64, %%"REG_a"		\n\t"
+				"cmp %3, %%"REG_a"		\n\t"
 				"jb 2b				\n\t"
 
 #if CONFUSION_FACTOR > 0
 	// a few percent speedup on out of order executing CPUs
-			"movl %5, %%eax		\n\t"
+			"mov %5, %%"REG_a"		\n\t"
 				"2:			\n\t"
 				"movl (%0), %%ebx	\n\t"
 				"movl (%0), %%ebx	\n\t"
 				"movl (%0), %%ebx	\n\t"
 				"movl (%0), %%ebx	\n\t"
-				"decl %%eax		\n\t"
+				"dec %%"REG_a"		\n\t"
 				" jnz 2b		\n\t"
 #endif
 
-			"xorl %%eax, %%eax	\n\t"
-			"addl %3, %0		\n\t"
-			"addl %3, %1		\n\t"
-			"subl %4, %2		\n\t"
-			"cmpl %4, %2		\n\t"
+			"xor %%"REG_a", %%"REG_a"	\n\t"
+			"add %3, %0		\n\t"
+			"add %3, %1		\n\t"
+			"sub %4, %2		\n\t"
+			"cmp %4, %2		\n\t"
 			" jae 1b		\n\t"
 				: "+r" (from), "+r" (to), "+r" (i)
-				: "r" (BLOCK_SIZE), "i" (BLOCK_SIZE/64), "i" (CONFUSION_FACTOR)
-				: "%eax", "%ebx"
+				: "r" ((long)BLOCK_SIZE), "i" (BLOCK_SIZE/64), "i" ((long)CONFUSION_FACTOR)
+				: "%"REG_a, "%ebx"
 		);
 
 	for(; i>0; i--)

Index: osd.c
===================================================================
RCS file: /cvsroot/mplayer/main/libvo/osd.c,v
retrieving revision 1.22
retrieving revision 1.23
diff -u -r1.22 -r1.23
--- osd.c	31 May 2004 15:09:44 -0000	1.22
+++ osd.c	21 Oct 2004 11:55:20 -0000	1.23
@@ -14,7 +14,7 @@
 
 extern int verbose; // defined in mplayer.c
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 #define CAN_COMPILE_X86_ASM
 #endif
 
@@ -48,18 +48,18 @@
 #undef HAVE_MMX
 #undef HAVE_MMX2
 #undef HAVE_3DNOW
-#undef ARCH_X86
+
+#ifndef CAN_COMPILE_X86_ASM
 
 #ifdef COMPILE_C
 #undef HAVE_MMX
 #undef HAVE_MMX2
 #undef HAVE_3DNOW
-#undef ARCH_X86
 #define RENAME(a) a ## _C
 #include "osd_template.c"
 #endif
 
-#ifdef CAN_COMPILE_X86_ASM
+#else
 
 //X86 noMMX versions
 #ifdef COMPILE_C
@@ -67,7 +67,6 @@
 #undef HAVE_MMX
 #undef HAVE_MMX2
 #undef HAVE_3DNOW
-#define ARCH_X86
 #define RENAME(a) a ## _X86
 #include "osd_template.c"
 #endif
@@ -78,7 +77,6 @@
 #define HAVE_MMX
 #undef HAVE_MMX2
 #undef HAVE_3DNOW
-#define ARCH_X86
 #define RENAME(a) a ## _MMX
 #include "osd_template.c"
 #endif
@@ -89,7 +87,6 @@
 #define HAVE_MMX
 #define HAVE_MMX2
 #undef HAVE_3DNOW
-#define ARCH_X86
 #define RENAME(a) a ## _MMX2
 #include "osd_template.c"
 #endif
@@ -100,7 +97,6 @@
 #define HAVE_MMX
 #undef HAVE_MMX2
 #define HAVE_3DNOW
-#define ARCH_X86
 #define RENAME(a) a ## _3DNow
 #include "osd_template.c"
 #endif
@@ -129,7 +125,7 @@
 		vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
 #elif defined (HAVE_MMX)
 		vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
 		vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
 #else
 		vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
@@ -159,7 +155,7 @@
 		vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
 #elif defined (HAVE_MMX)
 		vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
 		vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
 #else
 		vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
@@ -189,7 +185,7 @@
 		vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
 #elif defined (HAVE_MMX)
 		vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
 		vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
 #else
 		vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
@@ -219,7 +215,7 @@
 		vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
 #elif defined (HAVE_MMX)
 		vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
 		vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
 #else
 		vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
@@ -249,7 +245,7 @@
 		vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
 #elif defined (HAVE_MMX)
 		vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
 		vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
 #else
 		vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
@@ -294,7 +290,7 @@
 			mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
 #elif defined (HAVE_MMX)
 			mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
 			mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
 #else
 			mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");

Index: osd_template.c
===================================================================
RCS file: /cvsroot/mplayer/main/libvo/osd_template.c,v
retrieving revision 1.21
retrieving revision 1.22
diff -u -r1.21 -r1.22
--- osd_template.c	31 May 2004 15:13:35 -0000	1.21
+++ osd_template.c	21 Oct 2004 11:55:20 -0000	1.22
@@ -189,7 +189,7 @@
     for(y=0;y<h;y++){
         register unsigned char *dst = dstbase;
         register int x;
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 #ifdef HAVE_MMX
     asm volatile(
 	PREFETCHW" %0\n\t"
@@ -253,7 +253,7 @@
 		"addl %2, %%eax\n\t"
 		"movb %%ah, 2(%0)\n\t"
 		:
-		:"r" (dst),
+		:"D" (dst),
 		 "r" ((unsigned)srca[x]),
 		 "r" (((unsigned)src[x])<<8)
 		:"%eax", "%ecx"
@@ -293,7 +293,7 @@
 #endif
     for(y=0;y<h;y++){
         register int x;
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 #ifdef HAVE_MMX
 #ifdef HAVE_3DNOW
     asm volatile(




More information about the MPlayer-cvslog mailing list