[Mplayer-cvslog] CVS: main/libvo aclib.c,1.4,1.5 aclib_template.c,1.4,1.5 fastmemcpy.h,1.15,1.16
Michael Niedermayer
michael at mplayer.dev.hu
Wed Feb 13 00:17:25 CET 2002
- Previous message: [Mplayer-cvslog] CVS: main/DOCS/Hungarian documentation.html,1.110,1.111 encoding.html,1.16,1.17 example.conf,1.12,1.13 faq.html,1.44,1.45 mplayer.1,1.40,1.41
- Next message: [Mplayer-cvslog] CVS: main/libvo vo_dga.c,1.41,1.42
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/mplayer/main/libvo
In directory mplayer:/var/tmp.root/cvs-serv32643
Modified Files:
aclib.c aclib_template.c fastmemcpy.h
Log Message:
mem2agpcpy()
Index: aclib.c
===================================================================
RCS file: /cvsroot/mplayer/main/libvo/aclib.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- aclib.c 9 Dec 2001 15:25:11 -0000 1.4
+++ aclib.c 12 Feb 2002 23:17:14 -0000 1.5
@@ -118,4 +118,34 @@
#endif //!RUNTIME_CPUDETECT
}
-#endif /* use fastmemcpy */
\ No newline at end of file
+inline void * mem2agpcpy(void * to, const void * from, size_t len)
+{
+#ifdef RUNTIME_CPUDETECT
+#ifdef CAN_COMPILE_X86_ASM
+ // ordered per speed fasterst first
+ if(gCpuCaps.hasMMX2)
+ mem2agpcpy_MMX2(to, from, len);
+ else if(gCpuCaps.has3DNow)
+ mem2agpcpy_3DNow(to, from, len);
+ else if(gCpuCaps.hasMMX)
+ mem2agpcpy_MMX(to, from, len);
+ else
+#endif //CAN_COMPILE_X86_ASM
+ memcpy(to, from, len); // prior to mmx we use the standart memcpy
+#else
+#ifdef HAVE_MMX2
+ mem2agpcpy_MMX2(to, from, len);
+#elif defined (HAVE_3DNOW)
+ mem2agpcpy_3DNow(to, from, len);
+#elif defined (HAVE_MMX)
+ mem2agpcpy_MMX(to, from, len);
+#else
+ memcpy(to, from, len); // prior to mmx we use the standart memcpy
+#endif
+
+#endif //!RUNTIME_CPUDETECT
+}
+
+
+#endif /* use fastmemcpy */
+
Index: aclib_template.c
===================================================================
RCS file: /cvsroot/mplayer/main/libvo/aclib_template.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- aclib_template.c 9 Dec 2001 15:25:11 -0000 1.4
+++ aclib_template.c 12 Feb 2002 23:17:14 -0000 1.5
@@ -353,3 +353,88 @@
if(len) small_memcpy(to, from, len);
return retval;
}
+
+/**
+ * special copy routine for mem -> agp/pci copy (based upon fast_memcpy)
+ */
+static inline void * RENAME(mem2agpcpy)(void * to, const void * from, size_t len)
+{
+ void *retval;
+ size_t i;
+ retval = to;
+#ifdef STATISTICS
+ {
+ static int freq[33];
+ static int t=0;
+ int i;
+ for(i=0; len>(1<<i); i++);
+ freq[i]++;
+ t++;
+ if(1024*1024*1024 % t == 0)
+ for(i=0; i<32; i++)
+ printf("mem2agp freq < %8d %4d\n", 1<<i, freq[i]);
+ }
+#endif
+ if(len >= MIN_LEN)
+ {
+ register unsigned long int delta;
+ /* Align destinition to MMREG_SIZE -boundary */
+ delta = ((unsigned long int)to)&7;
+ if(delta)
+ {
+ delta=8-delta;
+ len -= delta;
+ small_memcpy(to, from, delta);
+ }
+ i = len >> 6; /* len/64 */
+ len &= 63;
+ /*
+ This algorithm is top effective when the code consequently
+ reads and writes blocks which have size of cache line.
+ Size of cache line is processor-dependent.
+ It will, however, be a minimum of 32 bytes on any processors.
+ It would be better to have a number of instructions which
+ perform reading and writing to be multiple to a number of
+ processor's decoders, but it's not always possible.
+ */
+ for(; i>0; i--)
+ {
+ __asm__ __volatile__ (
+ PREFETCH" 320(%0)\n"
+ "movq (%0), %%mm0\n"
+ "movq 8(%0), %%mm1\n"
+ "movq 16(%0), %%mm2\n"
+ "movq 24(%0), %%mm3\n"
+ "movq 32(%0), %%mm4\n"
+ "movq 40(%0), %%mm5\n"
+ "movq 48(%0), %%mm6\n"
+ "movq 56(%0), %%mm7\n"
+ MOVNTQ" %%mm0, (%1)\n"
+ MOVNTQ" %%mm1, 8(%1)\n"
+ MOVNTQ" %%mm2, 16(%1)\n"
+ MOVNTQ" %%mm3, 24(%1)\n"
+ MOVNTQ" %%mm4, 32(%1)\n"
+ MOVNTQ" %%mm5, 40(%1)\n"
+ MOVNTQ" %%mm6, 48(%1)\n"
+ MOVNTQ" %%mm7, 56(%1)\n"
+ :: "r" (from), "r" (to) : "memory");
+ ((const unsigned char *)from)+=64;
+ ((unsigned char *)to)+=64;
+ }
+#ifdef HAVE_MMX2
+ /* since movntq is weakly-ordered, a "sfence"
+ * is needed to become ordered again. */
+ __asm__ __volatile__ ("sfence":::"memory");
+#endif
+#ifndef HAVE_SSE
+ /* enables to use FPU */
+ __asm__ __volatile__ (EMMS:::"memory");
+#endif
+ }
+ /*
+ * Now do the tail of the block
+ */
+ if(len) small_memcpy(to, from, len);
+ return retval;
+}
+
Index: fastmemcpy.h
===================================================================
RCS file: /cvsroot/mplayer/main/libvo/fastmemcpy.h,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -r1.15 -r1.16
--- fastmemcpy.h 15 Jun 2001 12:42:00 -0000 1.15
+++ fastmemcpy.h 12 Feb 2002 23:17:14 -0000 1.16
@@ -9,8 +9,14 @@
#include <stddef.h>
extern void * fast_memcpy(void * to, const void * from, size_t len);
+extern void * mem2agpcpy(void * to, const void * from, size_t len);
#define memcpy(a,b,c) fast_memcpy(a,b,c)
-#endif /* HAVE_MMX/MMX2/3DNOW/SSE/SSE2 */
-#endif /* USE_FASTMEMCPY */
+#else /* HAVE_MMX/MMX2/3DNOW/SSE/SSE2 */
+#define mem2agpcpy(a,b,c) memcpy(a,b,c)
+#endif
+
+#else /* USE_FASTMEMCPY */
+#define mem2agpcpy(a,b,c) memcpy(a,b,c)
+#endif
#endif
- Previous message: [Mplayer-cvslog] CVS: main/DOCS/Hungarian documentation.html,1.110,1.111 encoding.html,1.16,1.17 example.conf,1.12,1.13 faq.html,1.44,1.45 mplayer.1,1.40,1.41
- Next message: [Mplayer-cvslog] CVS: main/libvo vo_dga.c,1.41,1.42
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the MPlayer-cvslog
mailing list