[Mplayer-cvslog] CVS: main/libavcodec/i386 sad_mmx.s,1.1,1.2

Nick Kurshev nick at mplayer.dev.hu
Mon Jul 9 10:31:17 CEST 2001


Update of /cvsroot/mplayer/main/libavcodec/i386
In directory mplayer:/var/tmp.root/cvs-serv9810/i386

Modified Files:
	sad_mmx.s 
Log Message:
Minor optimization and K7 support

Index: sad_mmx.s
===================================================================
RCS file: /cvsroot/mplayer/main/libavcodec/i386/sad_mmx.s,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- sad_mmx.s	6 Jul 2001 03:32:40 -0000	1.1
+++ sad_mmx.s	9 Jul 2001 08:31:10 -0000	1.2
@@ -1,4 +1,4 @@
-;  MMX/SSE optimized routines for SAD of 16*16 macroblocks
+;  MMX/MMX2 optimized routines for SAD of 16*16 macroblocks
 ;	Copyright (C) Juan J. Sierralta P. <juanjo at atmlab.utfsm.cl>
 ;
 ;  dist1_* Original Copyright (C) 2000 Chris Atenasio <chris at crud.net>
@@ -169,7 +169,7 @@
 	;emms								; clear mmx registers
 	ret								; return
 
-global pix_abs16x16_sse
+global pix_abs16x16_mmx2
 
 ; int  pix_abs16x16_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h);
 ; esi = p1 (init:		blk1)
@@ -188,7 +188,7 @@
 
 
 align 32
-pix_abs16x16_sse:
+pix_abs16x16_mmx2:
 	push				ebp					; save frame pointer
 	mov				ebp, esp
 
@@ -202,6 +202,8 @@
 	pxor				mm1, mm1
 	mov				esi, [ebp+8]		; get pix1
 	mov				edi, [ebp+12]		; get pix2
+	prefetchnta			[esi]
+	prefetchnta			[edi]
 	mov				edx, [ebp+16]		; get lx
 	mov				ecx, [ebp+20]		; get rowsleft
 	jmp				.next4row
@@ -210,6 +212,8 @@
 .next4row:
 	; First row
 	
+	prefetchnta			[esi+edx]
+	prefetchnta			[edi+edx]
 	movq				mm4, [edi]		; load first 8 bytes of pix2 row 
 	movq				mm5, [edi+8]	; load last 8 bytes of pix2 row
 	psadbw			mm4, [esi]		; SAD of first 8 bytes
@@ -221,6 +225,8 @@
 
 	add				edi, edx;
 	add				esi, edx;
+	prefetchnta			[esi+edx]
+	prefetchnta			[edi+edx]
 	
 	movq				mm6, [edi]		; load first 8 bytes of pix2 row 
 	movq				mm7, [edi+8]	; load last 8 bytes of pix2 row
@@ -233,6 +239,8 @@
 	
 	add				edi, edx;
 	add				esi, edx;
+	prefetchnta			[esi+edx]
+	prefetchnta			[edi+edx]
 	
 	movq				mm4, [edi]		; load first 8 bytes of pix2 row 
 	movq				mm5, [edi+8]	; load last 8 bytes of pix2 row
@@ -242,9 +250,10 @@
 	paddw				mm1, mm5
 		
 	; Fourth row	
-
 	add				edi, edx;
 	add				esi, edx;
+	prefetchnta			[esi+edx]
+	prefetchnta			[edi+edx]
 	
 	movq				mm6, [edi]		; load first 8 bytes of pix2 row 
 	movq				mm7, [edi+8]	; load last 8 bytes of pix2 row
@@ -254,9 +263,11 @@
 	paddw				mm1, mm7
 	
 	; Loop termination
-	
+
 	add				esi, edx		; update pointers to next row
 	add				edi, edx		
+	prefetchnta			[esi+edx]
+	prefetchnta			[edi+edx]
 	sub				ecx,4
 	test				ecx, ecx		; check rowsleft
 	jnz				near .next4row




More information about the MPlayer-cvslog mailing list