[Mplayer-cvslog] CVS: main/mp3lib decode_k7.s,1.5,1.6
Nick Kurshev
nickols_k at users.sourceforge.net
Fri Jun 8 11:41:24 CEST 2001
Update of /cvsroot/mplayer/main/mp3lib
In directory usw-pr-cvs1:/tmp/cvs-serv6994/main/mp3lib
Modified Files:
decode_k7.s
Log Message:
Last minute improvements before release
Index: decode_k7.s
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/decode_k7.s,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -r1.5 -r1.6
*** decode_k7.s 2001/05/24 09:42:18 1.5
--- decode_k7.s 2001/06/08 09:41:22 1.6
***************
*** 8,11 ****
--- 8,15 ----
/// - decreased number of opcodes (as it was suggested by k7 manual)
/// (using memory reference as operand of instructions)
+ /// - added PREFETCHW opcode. It has different semantic than k6-2
+ /// and saves 15-25 cpu clocks for athlon.
+ /// - partial unrolling loops for removing slower MOVW insns.
+ /// (Note probably same operation should be done for decode_3dnow)
/// - change function name for support 3DNowEx! automatic detect
///
***************
*** 43,49 ****
.comm buffs,4352,4
.data
! .align 4
! bo:
! .long 1
.text
/* int synth_1to1(real *bandPtr,int channel,unsigned char *out) */
--- 47,54 ----
.comm buffs,4352,4
.data
! .align 8
! null_one: .long 0x0000ffff, 0x0000ffff
! one_null: .long 0xffff0000, 0xffff0000
! bo: .long 1
.text
/* int synth_1to1(real *bandPtr,int channel,unsigned char *out) */
***************
*** 101,105 ****
movl %eax,%ecx
subl %edx,%ecx
! movl $16,%ebp
.L55:
--- 106,111 ----
movl %eax,%ecx
subl %edx,%ecx
! movl $8,%ebp
! prefetchw (%esi)
.L55:
***************
*** 107,150 ****
movq (%ecx),%mm0
pfmul (%ebx),%mm0
movq 8(%ecx),%mm1
pfmul 8(%ebx),%mm1
pfadd %mm1,%mm0
movq 16(%ebx),%mm2
pfmul 16(%ecx),%mm2
pfadd %mm2,%mm0
movq 24(%ecx),%mm3
pfmul 24(%ebx),%mm3
pfadd %mm3,%mm0
! movq 32(%ebx),%mm4
! pfmul 32(%ecx),%mm4
! pfadd %mm4,%mm0
!
! movq 40(%ecx),%mm5
! pfmul 40(%ebx),%mm5
! pfadd %mm5,%mm0
!
! movq 48(%ebx),%mm6
! pfmul 48(%ecx),%mm6
! pfadd %mm6,%mm0
!
! movq 56(%ecx),%mm7
! pfmul 56(%ebx),%mm7
! pfadd %mm7,%mm0
!
! pfnacc %mm0, %mm0
!
! pf2id %mm0,%mm0
! movd %mm0,%eax
! sar $16,%eax
! movw %ax,(%esi)
! addl $64,%ebx
! subl $-128,%ecx
! addl $4,%esi
decl %ebp
jnz .L55
--- 113,180 ----
movq (%ecx),%mm0
pfmul (%ebx),%mm0
+ movq 128(%ecx),%mm4
+ pfmul 64(%ebx),%mm4
movq 8(%ecx),%mm1
pfmul 8(%ebx),%mm1
pfadd %mm1,%mm0
+ movq 136(%ecx),%mm5
+ pfmul 72(%ebx),%mm5
+ pfadd %mm5,%mm4
movq 16(%ebx),%mm2
pfmul 16(%ecx),%mm2
pfadd %mm2,%mm0
+ movq 80(%ebx),%mm6
+ pfmul 144(%ecx),%mm6
+ pfadd %mm6,%mm4
movq 24(%ecx),%mm3
pfmul 24(%ebx),%mm3
pfadd %mm3,%mm0
+ movq 152(%ecx),%mm7
+ pfmul 88(%ebx),%mm7
+ pfadd %mm7,%mm4
! movq 32(%ebx),%mm1
! pfmul 32(%ecx),%mm1
! pfadd %mm1,%mm0
! movq 96(%ebx),%mm5
! pfmul 160(%ecx),%mm5
! pfadd %mm5,%mm4
!
! movq 40(%ecx),%mm2
! pfmul 40(%ebx),%mm2
! pfadd %mm2,%mm0
! movq 168(%ecx),%mm6
! pfmul 104(%ebx),%mm6
! pfadd %mm6,%mm4
! movq 48(%ebx),%mm3
! pfmul 48(%ecx),%mm3
! pfadd %mm3,%mm0
! movq 112(%ebx),%mm7
! pfmul 176(%ecx),%mm7
! pfadd %mm7,%mm4
! movq 56(%ecx),%mm1
! pfmul 56(%ebx),%mm1
! pfadd %mm1,%mm0
! movq 184(%ecx),%mm5
! pfmul 120(%ebx),%mm5
! pfadd %mm5,%mm4
!
! pfnacc %mm4, %mm0
! movq (%esi), %mm1
! pf2id %mm0, %mm0
! pand one_null, %mm1
! psrld $16,%mm0
! pand null_one, %mm0
! por %mm0, %mm1
! movq %mm1,(%esi)
!
! addl $128,%ebx
! addl $256,%ecx
! addl $8,%esi
decl %ebp
jnz .L55
***************
*** 152,155 ****
--- 182,187 ----
/ --- end of loop 1 ---
+ prefetchw (%esi) /* prefetching for writing this block and next loop */
+
movd (%ecx),%mm0
pfmul (%ebx),%mm0
***************
*** 190,213 ****
movw %ax,(%esi)
! addl $-64,%ebx
addl $4,%esi
addl $256,%ecx
! movl $15,%ebp
.L68:
pxor %mm0, %mm0
movq (%ecx),%mm1
pfmul (%ebx),%mm1
pfsub %mm1,%mm0
movq 8(%ecx),%mm2
pfmul 8(%ebx),%mm2
pfsub %mm2,%mm0
movq 16(%ecx),%mm3
pfmul 16(%ebx),%mm3
pfsub %mm3,%mm0
movq 24(%ecx),%mm4
pfmul 24(%ebx),%mm4
--- 222,321 ----
movw %ax,(%esi)
! subl $64,%ebx
addl $4,%esi
addl $256,%ecx
! movl $7,%ebp
.L68:
pxor %mm0, %mm0
+ pxor %mm4, %mm4
movq (%ecx),%mm1
pfmul (%ebx),%mm1
pfsub %mm1,%mm0
+ movq 128(%ecx),%mm5
+ pfmul -64(%ebx),%mm5
+ pfsub %mm5,%mm4
movq 8(%ecx),%mm2
pfmul 8(%ebx),%mm2
pfsub %mm2,%mm0
+ movq 136(%ecx),%mm6
+ pfmul -56(%ebx),%mm6
+ pfsub %mm6,%mm4
movq 16(%ecx),%mm3
pfmul 16(%ebx),%mm3
pfsub %mm3,%mm0
+ movq 144(%ecx),%mm7
+ pfmul -48(%ebx),%mm7
+ pfsub %mm7,%mm4
+ movq 24(%ecx),%mm1
+ pfmul 24(%ebx),%mm1
+ pfsub %mm1,%mm0
+ movq 152(%ecx),%mm5
+ pfmul -40(%ebx),%mm5
+ pfsub %mm5,%mm4
+
+ movq 32(%ecx),%mm2
+ pfmul 32(%ebx),%mm2
+ pfsub %mm2,%mm0
+ movq 160(%ecx),%mm6
+ pfmul -32(%ebx),%mm6
+ pfsub %mm6,%mm4
+
+ movq 40(%ecx),%mm3
+ pfmul 40(%ebx),%mm3
+ pfsub %mm3,%mm0
+ movq 168(%ecx),%mm7
+ pfmul -24(%ebx),%mm7
+ pfsub %mm7,%mm4
+
+ movq 48(%ecx),%mm1
+ pfmul 48(%ebx),%mm1
+ pfsub %mm1,%mm0
+ movq 176(%ecx),%mm5
+ pfmul -16(%ebx),%mm5
+ pfsub %mm5,%mm4
+
+ movq 56(%ecx),%mm2
+ pfmul 56(%ebx),%mm2
+ pfsub %mm2,%mm0
+ movq 184(%ecx),%mm6
+ pfmul -8(%ebx),%mm6
+ pfsub %mm6,%mm4
+
+ pfacc %mm4,%mm0
+ movq (%esi), %mm1
+ pf2id %mm0, %mm0
+ pand one_null, %mm1
+ psrld $16,%mm0
+ pand null_one, %mm0
+ por %mm0, %mm1
+ movq %mm1,(%esi)
+
+ subl $128,%ebx
+ addl $256,%ecx
+ addl $8,%esi
+ decl %ebp
+ jnz .L68
+
+ / --- end of loop 2
+
+ pxor %mm0, %mm0
+
+ movq (%ecx),%mm1
+ pfmul (%ebx),%mm1
+ pfsub %mm1,%mm0
+
+ movq 8(%ecx),%mm2
+ pfmul 8(%ebx),%mm2
+ pfsub %mm2,%mm0
+
+ movq 16(%ecx),%mm3
+ pfmul 16(%ebx),%mm3
+ pfsub %mm3,%mm0
+
movq 24(%ecx),%mm4
pfmul 24(%ebx),%mm4
***************
*** 238,249 ****
movw %ax,(%esi)
-
- addl $-64,%ebx
- subl $-128,%ecx
- addl $4,%esi
- decl %ebp
- jnz .L68
-
- / --- end of loop 2
femms
--- 346,349 ----
_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog
More information about the MPlayer-cvslog
mailing list