[Mplayer-cvslog] CVS: main/libavcodec/i386 dsputil_mmx.c,1.2,1.3

Nick Kurshev nick at mplayer.dev.hu
Tue Jul 17 11:08:04 CEST 2001


Update of /cvsroot/mplayer/main/libavcodec/i386
In directory mplayer:/var/tmp.root/cvs-serv23114/main/libavcodec/i386

Modified Files:
	dsputil_mmx.c 
Log Message:
Minor improvements and 3dNow! - MMX2 support

Index: dsputil_mmx.c
===================================================================
RCS file: /cvsroot/mplayer/main/libavcodec/i386/dsputil_mmx.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- dsputil_mmx.c	16 Jul 2001 09:16:20 -0000	1.2
+++ dsputil_mmx.c	17 Jul 2001 09:07:57 -0000	1.3
@@ -20,6 +20,8 @@
  */
 
 /* pixel operations */
+static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 };
+static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 };
 
 /*
  cropTable[0..255] = idx;
@@ -31,7 +33,7 @@
     const DCTELEM *p;
     UINT8 *pix;
     int i;
-    
+
     /* read the pixels */
     p = block;
     pix = pixels;
@@ -68,7 +70,7 @@
     const DCTELEM *p;
     UINT8 *pix;
     int i;
-    
+
     /* read the pixels */
     p = block;
     pix = pixels;
@@ -104,21 +106,724 @@
     emms();
 }
 
-static void put_pixels_long(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+static void put_pixels(UINT8 *block, const UINT8 *pixels, int line_size, int h)
 {
-    UINT32 *p;
-    const UINT32 *pix;
-    p   = (UINT32*)block;
-    pix = (UINT32*)pixels;
-    do {
+    int dh, hh;
+    UINT8 *p;
+    const UINT8 *pix;
+    p   = block;
+    pix = pixels;
+    hh=h>>2;
+    dh=h%4;
+    while(hh--) {
     __asm __volatile(
+	"movq	%4, %%mm0\n\t"
+	"movq	%5, %%mm1\n\t"
+	"movq	%6, %%mm2\n\t"
+	"movq	%7, %%mm3\n\t"
+	"movq	%%mm0, %0\n\t"
+	"movq	%%mm1, %1\n\t"
+	"movq	%%mm2, %2\n\t"
+	"movq	%%mm3, %3\n\t"
+	:"=m"(*p), "=m"(*(p+line_size)), "=m"(*(p+line_size*2)), "=m"(*(p+line_size*3))
+	:"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)), "m"(*(pix+line_size*3))
+	:"memory");
+        pix = pix + line_size*4;
+        p =   p   + line_size*4;
+    }
+    while(dh--) {
+     __asm __volatile(
 	"movq	%1, %%mm0\n\t"
 	"movq	%%mm0, %0\n\t"
-	:"=m"(p[0])
-	:"m"(pix[0])
+	:"=m"(*p)
+	:"m"(*pix)
 	:"memory");
-        pix = (UINT32*) ((char*)pix + line_size);
-        p =   (UINT32*) ((char*)p   + line_size);
-    } while (--h);
+        pix = pix + line_size;
+        p =   p   + line_size;
+    }
     emms();
 }
+
+
+#ifdef HAVE_3DNOW
+/* for Athlons PAVGUSB is prefered */
+#define PAVGB "pavgusb"
+#else
+/* Introduced only in MMX2 set */
+#define PAVGB "pavgb"
+#endif
+
+#if defined ( HAVE_3DNOW ) || defined ( HAVE_MMX2 )
+static void put_pixels_x2(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+  int dh, hh;
+  UINT8 *p;
+  const UINT8 *pix;
+  p = block;
+  pix = pixels;
+  hh=h>>2;
+  dh=h%4;
+  while(hh--) {
+    __asm __volatile(
+	"movq	%4, %%mm0\n\t"
+	"movq	1%4, %%mm1\n\t"
+	"movq	%5, %%mm2\n\t"
+	"movq	1%5, %%mm3\n\t"
+	"movq	%6, %%mm4\n\t"
+	"movq	1%6, %%mm5\n\t"
+	"movq	%7, %%mm6\n\t"
+	"movq	1%7, %%mm7\n\t"
+	PAVGB"  %%mm1, %%mm0\n\t"
+	PAVGB"  %%mm3, %%mm2\n\t"
+	PAVGB"  %%mm5, %%mm4\n\t"
+	PAVGB"  %%mm7, %%mm6\n\t"
+	"movq	%%mm0, %0\n\t"
+	"movq	%%mm2, %1\n\t"
+	"movq	%%mm4, %2\n\t"
+	"movq	%%mm6, %3\n\t"
+	:"=m"(*p), "=m"(*(p+line_size)), "=m"(*(p+line_size*2)), "=m"(*(p+line_size*3))
+	:"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)), "m"(*(pix+line_size*3))
+	:"memory");
+     pix += line_size*4; p += line_size*4;
+  }
+  while(dh--) {
+    __asm __volatile(
+	"movq	%1, %%mm0\n\t"
+	"movq	1%1, %%mm1\n\t"
+	PAVGB"  %%mm1, %%mm0\n\t"
+	"movq	%%mm0, %0\n\t"
+	:"=m"(*p)
+	:"m"(*pix)
+	:"memory");
+     pix += line_size; p += line_size;
+  }
+  emms();
+}
+#else
+static void put_pixels_x2(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+  UINT8 *p;
+  const UINT8 *pix;
+  p = block;
+  pix = pixels;
+  __asm __volatile(
+	"pxor	%%mm7, %%mm7\n\t"
+	"movq	%0, %%mm4\n\t"
+	::"m"(mm_wone[0]):"memory");
+  do {
+    __asm __volatile(
+	"movq	%1, %%mm0\n\t"
+	"movq	1%1, %%mm1\n\t"
+	"movq	%%mm0, %%mm2\n\t"
+	"movq	%%mm1, %%mm3\n\t"
+	"punpcklbw %%mm7, %%mm0\n\t"
+	"punpcklbw %%mm7, %%mm1\n\t"
+	"punpckhbw %%mm7, %%mm2\n\t"
+	"punpckhbw %%mm7, %%mm3\n\t"
+	"paddusw %%mm1, %%mm0\n\t"
+	"paddusw %%mm3, %%mm2\n\t"
+	"paddusw %%mm4, %%mm0\n\t"
+	"paddusw %%mm4, %%mm2\n\t"
+	"psrlw	$1, %%mm0\n\t"
+	"psrlw	$1, %%mm2\n\t"
+	"packuswb  %%mm2, %%mm0\n\t"
+	"movq	%%mm0, %0\n\t"
+	:"=m"(*p)
+	:"m"(*pix)
+	:"memory");
+   pix += line_size; p += line_size;
+  } while (--h);
+  emms();
+}
+#endif
+
+#if defined ( HAVE_3DNOW ) || defined ( HAVE_MMX2 )
+static void put_pixels_y2(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+  int dh, hh;
+  UINT8 *p;
+  const UINT8 *pix;
+  const UINT8 *pix1;
+  p = block;
+  pix = pixels;
+  pix1 = pixels + line_size;
+  hh=h>>1;
+  dh=h%2;
+  while(hh--) {
+    __asm __volatile(
+	"movq	%2, %%mm0\n\t"
+	"movq	%4, %%mm1\n\t"
+	"movq	%3, %%mm2\n\t"
+	"movq	%5, %%mm3\n\t"
+	PAVGB"  %%mm1, %%mm0\n\t"
+	PAVGB"  %%mm3, %%mm2\n\t"
+	"movq	%%mm0, %0\n\t"
+	"movq	%%mm2, %1\n\t"
+	:"=m"(*p), "=m"(*(p+line_size))
+	:"m"(*pix), "m"(*(pix+line_size)),
+	 "m"(*pix1), "m"(*(pix1+line_size))
+	:"memory");
+     pix += line_size*2; p += line_size*2; pix1 += line_size*2;
+  }
+  while(dh--) {
+    __asm __volatile(
+	"movq	%1, %%mm0\n\t"
+	"movq	%2, %%mm1\n\t"
+	PAVGB"  %%mm1, %%mm0\n\t"
+	"movq	%%mm0, %0\n\t"
+	:"=m"(*p)
+	:"m"(*pix),
+	 "m"(*pix1)
+	:"memory");
+     pix += line_size; p += line_size; pix1 += line_size;
+  }
+  emms();
+}
+#else
+static void put_pixels_y2(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+  UINT8 *p;
+  const UINT8 *pix;
+  const UINT8 *pix1;
+  p = block;
+  pix = pixels;
+  pix1 = pixels + line_size;
+  __asm __volatile(
+	"pxor	%%mm7, %%mm7\n\t"
+	"movq	%0, %%mm4\n\t"
+	::"m"(mm_wone[0]):"memory");
+  do {
+    __asm __volatile(
+	"movq	%1, %%mm0\n\t"
+	"movq	%2, %%mm1\n\t"
+	"movq	%%mm0, %%mm2\n\t"
+	"movq	%%mm1, %%mm3\n\t"
+	"punpcklbw %%mm7, %%mm0\n\t"
+	"punpcklbw %%mm7, %%mm1\n\t"
+	"punpckhbw %%mm7, %%mm2\n\t"
+	"punpckhbw %%mm7, %%mm3\n\t"
+	"paddusw %%mm1, %%mm0\n\t"
+	"paddusw %%mm3, %%mm2\n\t"
+	"paddusw %%mm4, %%mm0\n\t"
+	"paddusw %%mm4, %%mm2\n\t"
+	"psrlw	$1, %%mm0\n\t"
+	"psrlw	$1, %%mm2\n\t"
+	"packuswb  %%mm2, %%mm0\n\t"
+	"movq	%%mm0, %0\n\t"
+	:"=m"(*p)
+	:"m"(*pix),
+	 "m"(*pix1)
+	:"memory");
+   pix += line_size; p += line_size; pix1 += line_size;
+  } while (--h);
+  emms();
+}
+#endif
+
+#if defined ( HAVE_3DNOW ) || defined ( HAVE_MMX2 )
+static void put_pixels_xy2(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+  int dh, hh;
+  UINT8 *p;
+  const UINT8 *pix;
+  const UINT8 *pix1;
+  p = block;
+  pix = pixels;
+  pix1 = pixels + line_size;
+  hh=h>>1;
+  dh=h%2;
+  while(hh--) {
+    __asm __volatile(
+	"movq	%2, %%mm0\n\t"
+	"movq	%4, %%mm1\n\t"
+	"movq	1%2, %%mm2\n\t"
+	"movq	1%4, %%mm3\n\t"
+	"movq	%3, %%mm4\n\t"
+	"movq	%5, %%mm5\n\t"
+	"movq	1%3, %%mm6\n\t"
+	"movq	1%5, %%mm7\n\t"
+	PAVGB"	%%mm1, %%mm0\n\t"
+	PAVGB"	%%mm3, %%mm2\n\t"
+	PAVGB"	%%mm5, %%mm4\n\t"
+	PAVGB"	%%mm7, %%mm6\n\t"
+	PAVGB"	%%mm2, %%mm0\n\t"
+	PAVGB"	%%mm6, %%mm4\n\t"
+	"movq	%%mm0, %0\n\t"
+	"movq	%%mm4, %1\n\t"
+	:"=m"(*p), "=m"(*(p+line_size))
+	:"m"(*pix), "m"(*(pix+line_size)),
+	 "m"(*pix1), "m"(*(pix1+line_size))
+	:"memory");
+   pix += line_size*2;
+   pix1 += line_size*2;
+   p += line_size*2;
+  }
+  while(dh--) {
+    __asm __volatile(
+	"movq	%1, %%mm0\n\t"
+	"movq	%2, %%mm1\n\t"
+	"movq	1%1, %%mm2\n\t"
+	"movq	1%2, %%mm3\n\t"
+	PAVGB"	%%mm1, %%mm0\n\t"
+	PAVGB"	%%mm3, %%mm2\n\t"
+	PAVGB"	%%mm2, %%mm0\n\t"
+	"movq	%%mm0, %0\n\t"
+	:"=m"(*p)
+	:"m"(*pix),
+	 "m"(*pix1)
+	:"memory");
+   pix += line_size;
+   pix1 += line_size;
+   p += line_size;
+  }
+  emms();
+}
+#else
+static void put_pixels_xy2(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+  UINT8 *p;
+  const UINT8 *pix;
+  const UINT8 *pix1;
+  p = block;
+  pix = pixels;
+  pix1 = pixels + line_size;
+  __asm __volatile(
+	"pxor	%%mm7, %%mm7\n\t"
+	"movq	%0, %%mm6\n\t"
+	::"m"(mm_wtwo[0]):"memory");
+  do {
+    __asm __volatile(
+	"movq	%1, %%mm0\n\t"
+	"movq	%2, %%mm1\n\t"
+	"movq	1%1, %%mm4\n\t"
+	"movq	1%2, %%mm5\n\t"
+	"movq	%%mm0, %%mm2\n\t"
+	"movq	%%mm1, %%mm3\n\t"
+	"punpcklbw %%mm7, %%mm0\n\t"
+	"punpcklbw %%mm7, %%mm1\n\t"
+	"punpckhbw %%mm7, %%mm2\n\t"
+	"punpckhbw %%mm7, %%mm3\n\t"
+	"paddusw %%mm1, %%mm0\n\t"
+	"paddusw %%mm3, %%mm2\n\t"
+	"movq	%%mm4, %%mm1\n\t"
+	"movq	%%mm5, %%mm3\n\t"
+	"punpcklbw %%mm7, %%mm4\n\t"
+	"punpcklbw %%mm7, %%mm5\n\t"
+	"punpckhbw %%mm7, %%mm1\n\t"
+	"punpckhbw %%mm7, %%mm3\n\t"
+	"paddusw %%mm5, %%mm4\n\t"
+	"paddusw %%mm3, %%mm1\n\t"
+	"paddusw %%mm6, %%mm4\n\t"
+	"paddusw %%mm6, %%mm1\n\t"
+	"paddusw %%mm4, %%mm0\n\t"
+	"paddusw %%mm1, %%mm2\n\t"
+	"psrlw	$2, %%mm0\n\t"
+	"psrlw	$2, %%mm2\n\t"
+	"packuswb  %%mm2, %%mm0\n\t"
+	"movq	%%mm0, %0\n\t"
+	:"=m"(*p)
+	:"m"(*pix),
+	 "m"(*pix1)
+	:"memory");
+   pix += line_size;
+   pix1 += line_size;
+   p += line_size;
+  } while(--h);
+  emms();
+}
+#endif
+void (* put_pixels_tab[4])(UINT8 *block, const UINT8 *pixels, int line_size, int h) =
+{ put_pixels,   put_pixels_x2,   put_pixels_y2,   put_pixels_xy2, };
+
+static void   put_no_rnd_pixels_x2( UINT8  *block, const UINT8 *pixels, int line_size, int h)
+{
+  UINT8  *p;
+  const UINT8 *pix;
+  p = block;
+  pix = pixels;
+  __asm __volatile("pxor %%mm7, %%mm7\n\t":::"memory");
+  do {
+    __asm __volatile(
+	"movq	%1, %%mm0\n\t"
+	"movq	1%1, %%mm1\n\t"
+	"movq	%%mm0, %%mm2\n\t"
+	"movq	%%mm1, %%mm3\n\t"
+	"punpcklbw %%mm7, %%mm0\n\t"
+	"punpcklbw %%mm7, %%mm1\n\t"
+	"punpckhbw %%mm7, %%mm2\n\t"
+	"punpckhbw %%mm7, %%mm3\n\t"
+	"paddusw %%mm1, %%mm0\n\t"
+	"paddusw %%mm3, %%mm2\n\t"
+	"psrlw	$1, %%mm0\n\t"
+	"psrlw	$1, %%mm2\n\t"
+	"packuswb  %%mm2, %%mm0\n\t"
+	"movq	%%mm0, %0\n\t"
+	:"=m"(*p)
+	:"m"(*pix)
+	:"memory");
+   pix += line_size;
+   p +=   line_size;
+  } while (--h);
+}
+
+static void put_no_rnd_pixels_y2( UINT8  *block, const UINT8 *pixels, int line_size, int h)
+{
+  UINT8  *p;
+  const UINT8 *pix;
+  const UINT8 *pix1;
+  p = block;
+  pix = pixels;
+  pix1 = pixels + line_size;
+  __asm __volatile("pxor %%mm7, %%mm7\n\t":::"memory");
+  do {
+    __asm __volatile(
+	"movq	%1, %%mm0\n\t"
+	"movq	%2, %%mm1\n\t"
+	"movq	%%mm0, %%mm2\n\t"
+	"movq	%%mm1, %%mm3\n\t"
+	"punpcklbw %%mm7, %%mm0\n\t"
+	"punpcklbw %%mm7, %%mm1\n\t"
+	"punpckhbw %%mm7, %%mm2\n\t"
+	"punpckhbw %%mm7, %%mm3\n\t"
+	"paddusw %%mm1, %%mm0\n\t"
+	"paddusw %%mm3, %%mm2\n\t"
+	"psrlw	$1, %%mm0\n\t"
+	"psrlw	$1, %%mm2\n\t"
+	"packuswb  %%mm2, %%mm0\n\t"
+	"movq	%%mm0, %0\n\t"
+	:"=m"(*p)
+	:"m"(*pix),
+	 "m"(*pix1)
+	:"memory");
+   pix += line_size;
+   pix1 += line_size;
+   p +=   line_size;
+  } while(--h);
+}
+
+static void   put_no_rnd_pixels_xy2( UINT8  *block, const UINT8 *pixels, int line_size, int h)
+{
+  UINT8  *p;
+  const UINT8 *pix;
+  const UINT8 *pix1;
+  p = block;
+  pix = pixels;
+  pix1 = pixels + line_size;
+  __asm __volatile(
+	"pxor	%%mm7, %%mm7\n\t"
+	"movq	%0, %%mm6\n\t"
+	::"m"(mm_wone[0]):"memory");
+  do {
+    __asm __volatile(
+	"movq	%1, %%mm0\n\t"
+	"movq	%2, %%mm1\n\t"
+	"movq	1%1, %%mm4\n\t"
+	"movq	1%2, %%mm5\n\t"
+	"movq	%%mm0, %%mm2\n\t"
+	"movq	%%mm1, %%mm3\n\t"
+	"punpcklbw %%mm7, %%mm0\n\t"
+	"punpcklbw %%mm7, %%mm1\n\t"
+	"punpckhbw %%mm7, %%mm2\n\t"
+	"punpckhbw %%mm7, %%mm3\n\t"
+	"paddusw %%mm1, %%mm0\n\t"
+	"paddusw %%mm3, %%mm2\n\t"
+	"movq	%%mm4, %%mm1\n\t"
+	"movq	%%mm5, %%mm3\n\t"
+	"punpcklbw %%mm7, %%mm4\n\t"
+	"punpcklbw %%mm7, %%mm5\n\t"
+	"punpckhbw %%mm7, %%mm1\n\t"
+	"punpckhbw %%mm7, %%mm3\n\t"
+	"paddusw %%mm5, %%mm4\n\t"
+	"paddusw %%mm3, %%mm1\n\t"
+	"paddusw %%mm6, %%mm4\n\t"
+	"paddusw %%mm6, %%mm1\n\t"
+	"paddusw %%mm4, %%mm0\n\t"
+	"paddusw %%mm1, %%mm2\n\t"
+	"psrlw	$2, %%mm0\n\t"
+	"psrlw	$2, %%mm2\n\t"
+	"packuswb  %%mm2, %%mm0\n\t"
+	"movq	%%mm0, %0\n\t"
+	:"=m"(*p)
+	:"m"(*pix),
+	 "m"(*pix1)
+	:"memory");
+   pix += line_size;
+   pix1 += line_size;
+   p +=   line_size;
+  } while(--h);
+}
+
+void (* put_no_rnd_pixels_tab[4])(UINT8 *block, const UINT8 *pixels, int line_size, int h) =
+{  put_pixels,   put_no_rnd_pixels_x2,   put_no_rnd_pixels_y2,   put_no_rnd_pixels_xy2, };
+
+/*
+    NK:	Stuff below is no tested and not optimized!
+	Simply I have no necessary samples for now. 
+*/
+
+static void avg_pixels(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+  UINT8  *p;
+  const UINT8 *pix;
+  p = block;
+  pix = pixels;
+  do {
+   p[0]  = ((  p[0]  +    pix[0]  +1)>>1)  ;
+   p[1]  = ((  p[1]  +    pix[1]  +1)>>1)  ;
+   p[2]  = ((  p[2]  +    pix[2]  +1)>>1)  ;
+   p[3]  = ((  p[3]  +    pix[3]  +1)>>1)  ;
+   p[4]  = ((  p[4]  +    pix[4]  +1)>>1)  ;
+   p[5]  = ((  p[5]  +    pix[5]  +1)>>1)  ;
+   p[6]  = ((  p[6]  +    pix[6]  +1)>>1)  ;
+   p[7]  = ((  p[7]  +    pix[7]  +1)>>1)  ;
+   pix += line_size;
+   p +=   line_size;
+  }
+  while (--h);
+}
+
+static void   avg_pixels_x2( UINT8  *block, const UINT8 *pixels, int line_size, int h)
+{
+  UINT8  *p;
+  const UINT8 *pix;
+  p = block;
+  pix = pixels;
+  do {
+   p[0]  = ((  p[0]  +    (( pix[0] +  pix[1] +1)>>1)   +1)>>1)  ;
+   p[1]  = ((  p[1]  +    (( pix[1] +  pix[2] +1)>>1)   +1)>>1)  ;
+   p[2]  = ((  p[2]  +    (( pix[2] +  pix[3] +1)>>1)   +1)>>1)  ;
+   p[3]  = ((  p[3]  +    (( pix[3] +  pix[4] +1)>>1)   +1)>>1)  ;
+   p[4]  = ((  p[4]  +    (( pix[4] +  pix[5] +1)>>1)   +1)>>1)  ;
+   p[5]  = ((  p[5]  +    (( pix[5] +  pix[6] +1)>>1)   +1)>>1)  ;
+   p[6]  = ((  p[6]  +    (( pix[6] +  pix[7] +1)>>1)   +1)>>1)  ;
+   p[7]  = ((  p[7]  +    (( pix[7] +  pix[8] +1)>>1)   +1)>>1)  ;
+   pix += line_size;
+   p +=   line_size;
+  } while (--h);
+}
+
+static void   avg_pixels_y2( UINT8  *block, const UINT8 *pixels, int line_size, int h)
+{
+  UINT8  *p;
+  const UINT8 *pix;
+  const UINT8 *pix1;
+  p = block;
+  pix = pixels;
+  pix1 = pixels + line_size;
+  do {
+   p[0]  = ((  p[0]  +    (( pix[0] +  pix1[0] +1)>>1)   +1)>>1)  ;
+   p[1]  = ((  p[1]  +    (( pix[1] +  pix1[1] +1)>>1)   +1)>>1)  ;
+   p[2]  = ((  p[2]  +    (( pix[2] +  pix1[2] +1)>>1)   +1)>>1)  ;
+   p[3]  = ((  p[3]  +    (( pix[3] +  pix1[3] +1)>>1)   +1)>>1)  ;
+   p[4]  = ((  p[4]  +    (( pix[4] +  pix1[4] +1)>>1)   +1)>>1)  ;
+   p[5]  = ((  p[5]  +    (( pix[5] +  pix1[5] +1)>>1)   +1)>>1)  ;
+   p[6]  = ((  p[6]  +    (( pix[6] +  pix1[6] +1)>>1)   +1)>>1)  ;
+   p[7]  = ((  p[7]  +    (( pix[7] +  pix1[7] +1)>>1)   +1)>>1)  ;
+   pix += line_size;
+   pix1 += line_size;
+   p +=   line_size ;
+  } while(--h);
+}
+
+static void   avg_pixels_xy2( UINT8  *block, const UINT8 *pixels, int line_size, int h)
+{
+  UINT8  *p;
+  const UINT8 *pix;
+  const UINT8 *pix1;
+  p = block;
+  pix = pixels;
+  pix1 = pixels + line_size;
+  do {
+   p[0]  = (( p[0] + (( pix[0] + pix[1] + pix1[0] + pix1[1] +2)>>2)+1)>>1)  ;
+   p[1]  = (( p[1] + (( pix[1] + pix[2] + pix1[1] + pix1[2] +2)>>2)+1)>>1)  ;
+   p[2]  = (( p[2] + (( pix[2] + pix[3] + pix1[2] + pix1[3] +2)>>2)+1)>>1)  ;
+   p[3]  = (( p[3] + (( pix[3] + pix[4] + pix1[3] + pix1[4] +2)>>2)+1)>>1)  ;
+   p[4]  = (( p[4] + (( pix[4] + pix[5] + pix1[4] + pix1[5] +2)>>2)+1)>>1)  ;
+   p[5]  = (( p[5] + (( pix[5] + pix[6] + pix1[5] + pix1[6] +2)>>2)+1)>>1)  ;
+   p[6]  = (( p[6] + (( pix[6] + pix[7] + pix1[6] + pix1[7] +2)>>2)+1)>>1)  ;
+   p[7]  = (( p[7] + (( pix[7] + pix[8] + pix1[7] + pix1[8] +2)>>2)+1)>>1)  ;
+   pix += line_size;
+   pix1 += line_size;
+   p +=   line_size ;
+  } while(--h);
+}
+
+void (*avg_pixels_tab[4])(UINT8 *block, const UINT8 *pixels, int line_size, int h) =
+{   avg_pixels,   avg_pixels_x2,   avg_pixels_y2,   avg_pixels_xy2, };
+
+static void avg_no_rnd_pixels( UINT8  *block, const UINT8 *pixels, int line_size, int h)
+{
+  UINT8  *p;
+  const UINT8 *pix;
+  p = block;
+  pix = pixels;
+  do {
+   p[0]  = ((  p[0]  +    pix[0]  )>>1)  ;
+   p[1]  = ((  p[1]  +    pix[1]  )>>1)  ;
+   p[2]  = ((  p[2]  +    pix[2]  )>>1)  ;
+   p[3]  = ((  p[3]  +    pix[3]  )>>1)  ;
+   p[4]  = ((  p[4]  +    pix[4]  )>>1)  ;
+   p[5]  = ((  p[5]  +    pix[5]  )>>1)  ;
+   p[6]  = ((  p[6]  +    pix[6]  )>>1)  ;
+   p[7]  = ((  p[7]  +    pix[7]  )>>1)  ;
+   pix += line_size;
+   p +=   line_size ;
+  } while (--h);
+}
+
+static void   avg_no_rnd_pixels_x2( UINT8  *block, const UINT8 *pixels, int line_size, int h)
+{
+  UINT8  *p;
+  const UINT8 *pix;
+  p = block;
+  pix = pixels;
+  do {
+   p[0]  = ((  p[0]  +    (( pix[0] +  pix[1] )>>1)   )>>1)  ;
+   p[1]  = ((  p[1]  +    (( pix[1] +  pix[2] )>>1)   )>>1)  ;
+   p[2]  = ((  p[2]  +    (( pix[2] +  pix[3] )>>1)   )>>1)  ;
+   p[3]  = ((  p[3]  +    (( pix[3] +  pix[4] )>>1)   )>>1)  ;
+   p[4]  = ((  p[4]  +    (( pix[4] +  pix[5] )>>1)   )>>1)  ;
+   p[5]  = ((  p[5]  +    (( pix[5] +  pix[6] )>>1)   )>>1)  ;
+   p[6]  = ((  p[6]  +    (( pix[6] +  pix[7] )>>1)   )>>1)  ;
+   p[7]  = ((  p[7]  +    (( pix[7] +  pix[8] )>>1)   )>>1)  ;
+   pix += line_size;
+   p +=   line_size;
+ } while (--h);
+}
+
+static void   avg_no_rnd_pixels_y2( UINT8  *block, const UINT8 *pixels, int line_size, int h)
+{
+  UINT8  *p;
+  const UINT8 *pix;
+  const UINT8 *pix1;
+  p = block;
+  pix = pixels;
+  pix1 = pixels + line_size;
+  do {
+   p[0]  = ((  p[0]  +    (( pix[0] +  pix1[0] )>>1)   )>>1)  ;
+   p[1]  = ((  p[1]  +    (( pix[1] +  pix1[1] )>>1)   )>>1)  ;
+   p[2]  = ((  p[2]  +    (( pix[2] +  pix1[2] )>>1)   )>>1)  ;
+   p[3]  = ((  p[3]  +    (( pix[3] +  pix1[3] )>>1)   )>>1)  ;
+   p[4]  = ((  p[4]  +    (( pix[4] +  pix1[4] )>>1)   )>>1)  ;
+   p[5]  = ((  p[5]  +    (( pix[5] +  pix1[5] )>>1)   )>>1)  ;
+   p[6]  = ((  p[6]  +    (( pix[6] +  pix1[6] )>>1)   )>>1)  ;
+   p[7]  = ((  p[7]  +    (( pix[7] +  pix1[7] )>>1)   )>>1)  ;
+   pix += line_size;
+   pix1 += line_size;
+   p +=   line_size ;
+  } while(--h);
+}
+
+static void   avg_no_rnd_pixels_xy2( UINT8  *block, const UINT8 *pixels, int line_size, int h)
+{
+  UINT8  *p;
+  const UINT8 *pix;
+  const UINT8 *pix1;
+  p = block;
+  pix = pixels;
+  pix1 = pixels + line_size;
+  do {
+   p[0]  = ((  p[0]  +    (( pix[0] +  pix[1] +  pix1[0] +  pix1[1] +1)>>2)   )>>1)  ;
+   p[1]  = ((  p[1]  +    (( pix[1] +  pix[2] +  pix1[1] +  pix1[2] +1)>>2)   )>>1)  ;
+   p[2]  = ((  p[2]  +    (( pix[2] +  pix[3] +  pix1[2] +  pix1[3] +1)>>2)   )>>1)  ;
+   p[3]  = ((  p[3]  +    (( pix[3] +  pix[4] +  pix1[3] +  pix1[4] +1)>>2)   )>>1)  ;
+   p[4]  = ((  p[4]  +    (( pix[4] +  pix[5] +  pix1[4] +  pix1[5] +1)>>2)   )>>1)  ;
+   p[5]  = ((  p[5]  +    (( pix[5] +  pix[6] +  pix1[5] +  pix1[6] +1)>>2)   )>>1)  ;
+   p[6]  = ((  p[6]  +    (( pix[6] +  pix[7] +  pix1[6] +  pix1[7] +1)>>2)   )>>1)  ;
+   p[7]  = ((  p[7]  +    (( pix[7] +  pix[8] +  pix1[7] +  pix1[8] +1)>>2)   )>>1)  ;
+   pix += line_size;
+   pix1 += line_size;
+   p += line_size;
+  } while(--h);
+}
+
+void (* avg_no_rnd_pixels_tab[4])(UINT8 *block, const UINT8 *pixels, int line_size, int h) =
+{  avg_no_rnd_pixels,   avg_no_rnd_pixels_x2,   avg_no_rnd_pixels_y2,   avg_no_rnd_pixels_xy2, };
+
+static void sub_pixels( DCTELEM  *block, const UINT8 *pixels, int line_size, int h)
+{
+  DCTELEM  *p;
+  const UINT8 *pix;
+  p = block;
+  pix = pixels;
+  do {
+   p[0]  -=   pix[0]  ;
+   p[1]  -=   pix[1]  ;
+   p[2]  -=   pix[2]  ;
+   p[3]  -=   pix[3]  ;
+   p[4]  -=   pix[4]  ;
+   p[5]  -=   pix[5]  ;
+   p[6]  -=   pix[6]  ;
+   p[7]  -=   pix[7]  ;
+   pix += line_size;
+   p +=   8 ;
+  } while (--h);
+}
+
+static void sub_pixels_x2( DCTELEM  *block, const UINT8 *pixels, int line_size, int h)
+{
+  DCTELEM  *p;
+  const UINT8 *pix;
+  p = block;
+  pix = pixels;
+  do {
+   p[0]  -=   (( pix[0] +  pix[1] +1)>>1)   ;
+   p[1]  -=   (( pix[1] +  pix[2] +1)>>1)   ;
+   p[2]  -=   (( pix[2] +  pix[3] +1)>>1)   ;
+   p[3]  -=   (( pix[3] +  pix[4] +1)>>1)   ;
+   p[4]  -=   (( pix[4] +  pix[5] +1)>>1)   ;
+   p[5]  -=   (( pix[5] +  pix[6] +1)>>1)   ;
+   p[6]  -=   (( pix[6] +  pix[7] +1)>>1)   ;
+   p[7]  -=   (( pix[7] +  pix[8] +1)>>1)   ;
+   pix += line_size;
+   p +=   8;
+ } while (--h);
+}
+
+static void sub_pixels_y2( DCTELEM  *block, const UINT8 *pixels, int line_size, int h)
+{
+  DCTELEM  *p;
+  const UINT8 *pix;
+  const UINT8 *pix1;
+  p = block;
+  pix = pixels;
+  pix1 = pixels + line_size;
+  do {
+   p[0]  -=   (( pix[0] +  pix1[0] +1)>>1)   ;
+   p[1]  -=   (( pix[1] +  pix1[1] +1)>>1)   ;
+   p[2]  -=   (( pix[2] +  pix1[2] +1)>>1)   ;
+   p[3]  -=   (( pix[3] +  pix1[3] +1)>>1)   ;
+   p[4]  -=   (( pix[4] +  pix1[4] +1)>>1)   ;
+   p[5]  -=   (( pix[5] +  pix1[5] +1)>>1)   ;
+   p[6]  -=   (( pix[6] +  pix1[6] +1)>>1)   ;
+   p[7]  -=   (( pix[7] +  pix1[7] +1)>>1)   ;
+   pix += line_size;
+   pix1 += line_size;
+   p +=   8 ;
+  } while(--h);
+}
+
+static void   sub_pixels_xy2( DCTELEM  *block, const UINT8 *pixels, int line_size, int h)
+{
+  DCTELEM  *p;
+  const UINT8 *pix;
+  const UINT8 *pix1;
+  p = block;
+  pix = pixels; pix1 = pixels + line_size;
+  do {
+   p[0]  -=   (( pix[0] +  pix[1] +  pix1[0] +  pix1[1] +2)>>2)   ;
+   p[1]  -=   (( pix[1] +  pix[2] +  pix1[1] +  pix1[2] +2)>>2)   ;
+   p[2]  -=   (( pix[2] +  pix[3] +  pix1[2] +  pix1[3] +2)>>2)   ;
+   p[3]  -=   (( pix[3] +  pix[4] +  pix1[3] +  pix1[4] +2)>>2)   ;
+   p[4]  -=   (( pix[4] +  pix[5] +  pix1[4] +  pix1[5] +2)>>2)   ;
+   p[5]  -=   (( pix[5] +  pix[6] +  pix1[5] +  pix1[6] +2)>>2)   ;
+   p[6]  -=   (( pix[6] +  pix[7] +  pix1[6] +  pix1[7] +2)>>2)   ;
+   p[7]  -=   (( pix[7] +  pix[8] +  pix1[7] +  pix1[8] +2)>>2)   ;
+   pix += line_size;
+   pix1 += line_size;
+   p +=   8 ;
+  } while(--h);
+}
+
+void (* sub_pixels_tab[4])(DCTELEM *block, const UINT8 *pixels, int line_size, int h) =
+{   sub_pixels,   sub_pixels_x2,   sub_pixels_y2,   sub_pixels_xy2, };




More information about the MPlayer-cvslog mailing list