[Mplayer-cvslog] CVS: main/libavcodec dsputil.c,1.3,1.4
Jürgen Keil
jkeil at mplayer.dev.hu
Thu Jul 12 20:11:06 CEST 2001
Update of /cvsroot/mplayer/main/libavcodec
In directory mplayer:/var/tmp.root/cvs-serv28069
Modified Files:
dsputil.c
Log Message:
Another small optimization for ffmpeg: Copy DCT blocks using 32-bit moves
on x86 cpus. Reduces video decoding time by another 10%.
Index: dsputil.c
===================================================================
RCS file: /cvsroot/mplayer/main/libavcodec/dsputil.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- dsputil.c 10 Jul 2001 21:34:14 -0000 1.3
+++ dsputil.c 12 Jul 2001 18:11:03 -0000 1.4
@@ -22,6 +22,29 @@
#include "dsputil.h"
#include "mpegvideo.h"
+#ifdef ARCH_X86
+/*
+ * x86 CPUs can access unaligned 32-bit data, so we can speed up copying
+ * DCT blocks a bit by using 32-bit data moves instead of 8-bit moves
+ * (fewer instructions in the inner loop)
+ */
+static void put_pixels_long(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ UINT32 *p;
+ const UINT32 *pix;
+
+ p = (UINT32*)block;
+ pix = (UINT32*)pixels;
+ do {
+ p[0] = pix[0];
+ p[1] = pix[1];
+ pix = (UINT32*) ((char*)pix + line_size);
+ p = (UINT32*) ((char*)p + line_size);
+ } while (--h);
+}
+#endif
+
+
#ifndef HAVE_MMX
static UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
#endif
@@ -40,6 +63,11 @@
for(i=0;i<512;i++) {
squareTbl[i] = (i - 256) * (i - 256);
}
+
+#ifdef ARCH_X86
+ put_pixels_tab[0] = put_pixels_long;
+ put_no_rnd_pixels_tab[0] = put_pixels_long;
+#endif
}
void get_pixels(DCTELEM *block, const UINT8 *pixels, int line_size)
More information about the MPlayer-cvslog
mailing list