[Mplayer-cvslog] CVS: main/libac3/downmix downmix_3dnow.S,NONE,1.1
    Nick Kurshev 
    nickols_k at users.sourceforge.net
       
    Thu May 17 18:40:31 CEST 2001
    
    
  
Update of /cvsroot/mplayer/main/libac3/downmix
In directory usw-pr-cvs1:/tmp/cvs-serv25710/main/libac3/downmix
Added Files:
	downmix_3dnow.S 
Log Message:
libac3 3dnow optimizations! It speed up my Duron from 4.5 up to 3.5 prcnts of cpu loading
--- NEW FILE ---
/*
 *  downmix_3dnow.S
 *
 *  Replacement of downmix_kni.S with AMD's 3DNow! SIMD operations support
 *
 *  Modified by Nick Kurshev <nickols_k at mail.ru>
 *
 *  Copyright (C) Yuqing Deng <Yuqing_Deng at brown.edu> - April 2000
 *
 *  downmix_3dnow.S is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2, or (at your option)
 *  any later version.
 *
 *  downmix_3dnow.S is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with GNU Make; see the file COPYING.  If not, write to
 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 */
.section .rodata
	.align 4
sqrt2:	.float 0f0.7071068
	.p2align 5,0,
	
	.section .text
	
	.align 4
	.global downmix_3f_2r_to_2ch
	.type downmix_3f_2r_to_2ch, @function
downmix_3f_2r_to_2ch:
	pushl	%ebp
	movl  	%esp, %ebp
	
	pushl	%eax
	pushl	%ebx
	pushl	%ecx
	femms
	movl 	8(%ebp), %eax    /* samples[] */
	movl	12(%ebp), %ebx   /* &dm_par */
	movl	$128, %ecx	 /* loop counter */
	movd	(%ebx), %mm5     /* unit */
	movq	%mm5, %mm0
	punpckldq %mm0, %mm5     /* unit | unit */
	movd	4(%ebx), %mm6    /* clev */
	movq	%mm6, %mm0
	punpckldq %mm0, %mm6     /* clev | clev */
	movd	8(%ebx), %mm7    /* slev */
	movq	%mm7, %mm0
	punpckldq %mm0, %mm7     /* slev | slev */
.loop:	
	movq	(%eax), %mm0 	 /* left */
	movq	2048(%eax), %mm1 /* right */
	movq 	1024(%eax), %mm2 /* center */
	pfmul	%mm5, %mm0
	pfmul	%mm5, %mm1
	
	pfmul	%mm6, %mm2
	movq	3072(%eax), %mm3 /* leftsur */
	movq	4096(%eax), %mm4 /* rithgsur */
	pfadd	%mm2, %mm0
	pfadd 	%mm2, %mm1
	pfmul	%mm7, %mm3
	pfmul	%mm7, %mm4
	pfadd	%mm3, %mm0
	pfadd	%mm4, %mm1
	movq	%mm0, (%eax)
	movq	%mm1, 1024(%eax)
	addl	$8, %eax
	decl 	%ecx
	jnz	.loop
	popl	%ecx
	popl	%ebx
	popl 	%eax
	femms
	leave
	ret
	.p2align 4,,7
	.global downmix_2f_2r_to_2ch
	.type downmix_2f_2r_to_2ch, @function
downmix_2f_2r_to_2ch:
	pushl %ebp
	movl  %esp, %ebp
	pushl %eax
	pushl %ebx
	pushl %ecx
	femms
	movl 8(%ebp), %eax 	 /* samples[] */
	movl 12(%ebp), %ebx 	 /* &dm_par */
	movl $128, %ecx		 /* loop counter */
	movd	(%ebx), %mm5     /* unit */
	movq	%mm5, %mm0
	punpckldq %mm0, %mm5     /* unit | unit */
	movd	8(%ebx), %mm7    /* slev */
	movq	%mm7, %mm0
	punpckldq %mm0, %mm7     /* slev | slev */
.loop3:	
	movq	(%eax), %mm0 	 /* left */
	movq	1024(%eax), %mm1 /* right */
	movq 	2048(%eax), %mm3 /* leftsur */
	pfmul	%mm5, %mm0
	pfmul	%mm5, %mm1
	
	movq	3072(%eax), %mm4 /* rightsur */
	pfmul	%mm7, %mm3
	pfmul	%mm7, %mm4
	pfadd	%mm3, %mm0
	pfadd	%mm4, %mm1
	movq	%mm0, (%eax)
	movq	%mm1, 1024(%eax)
	addl	$8, %eax
	decl 	%ecx
	jnz	.loop3
	popl	%ecx
	popl	%ebx
	popl 	%eax
	femms
	leave
	ret
	.p2align 4,,7
	
	.global downmix_3f_1r_to_2ch
	.type downmix_3f_1r_to_2ch, @function
downmix_3f_1r_to_2ch:
	pushl	%ebp
	movl  	%esp, %ebp
	
	pushl	%eax
	pushl	%ebx
	pushl	%ecx
	femms
	movl 	8(%ebp), %eax    /* samples[] */
	movl	12(%ebp), %ebx   /* &dm_par */
	movl	$128, %ecx	 /* loop counter */
	movd	(%ebx), %mm5     /* unit */
	movq	%mm5, %mm0
	punpckldq %mm0, %mm5     /* unit | unit */
	movd	4(%ebx), %mm6    /* clev */
	movq	%mm6, %mm0
	punpckldq %mm0, %mm6     /* clev | clev */
	movd	8(%ebx), %mm7    /* slev */
	movq	%mm7, %mm0
	punpckldq %mm0, %mm7     /* slev | slev */
.loop4:	
	movq	(%eax), %mm0 	 /* left */
	movq	2048(%eax), %mm1 /* right */
	movq 	1024(%eax), %mm2 /* center */
	pfmul	%mm5, %mm0
	pfmul	%mm5, %mm1
	
	pfmul	%mm6, %mm2
	movq	3072(%eax), %mm3 /* sur */
	pfadd	%mm2, %mm0
	pfmul	%mm7, %mm3
	
	pfadd 	%mm2, %mm1
	pfsub	%mm3, %mm0
	pfadd	%mm3, %mm1
	movq	%mm0, (%eax)
	movq	%mm1, 1024(%eax)
	addl	$8, %eax
	decl 	%ecx
	jnz	.loop4
	popl	%ecx
	popl	%ebx
	popl 	%eax
	femms
	leave
	ret
	.p2align 4,,7
		
	.global downmix_2f_1r_to_2ch
	.type downmix_2f_1r_to_2ch, @function
downmix_2f_1r_to_2ch:
	pushl	%ebp
	movl  	%esp, %ebp
	
	pushl	%eax
	pushl	%ebx
	pushl	%ecx
	femms
	movl 	8(%ebp), %eax    /* samples[] */
	movl	12(%ebp), %ebx   /* &dm_par */
	movl	$128, %ecx	 /* loop counter */
	movd	(%ebx), %mm5     /* unit */
	movq	%mm5, %mm0
	punpckldq %mm0, %mm5     /* unit | unit */
	movd	8(%ebx), %mm7    /* slev */
	movq	%mm7, %mm0
	punpckldq %mm0, %mm7     /* slev | slev */
.loop5:	
	movq	(%eax), %mm0 	 /* left */
	movq	1024(%eax), %mm1 /* right */
	
	pfmul	%mm5, %mm0
	pfmul	%mm5, %mm1
	
	movq	2048(%eax), %mm3 /* sur */
	pfmul	%mm7, %mm3
	
	pfsub	%mm3, %mm0
	pfadd	%mm3, %mm1
	movq	%mm0, (%eax)
	movq	%mm1, 1024(%eax)
	addl	$8, %eax
	decl 	%ecx
	jnz	.loop5
	popl	%ecx
	popl	%ebx
	popl 	%eax
	femms
	leave
	ret
	.p2align 4,,7
	
	.global downmix_3f_0r_to_2ch
	.type downmix_3f_0r_to_2ch, @function
downmix_3f_0r_to_2ch:
	pushl	%ebp
	movl  	%esp, %ebp
	
	pushl	%eax
	pushl	%ebx
	pushl	%ecx
	femms
	movl 	8(%ebp), %eax    /* samples[] */
	movl	12(%ebp), %ebx   /* &dm_par */
	movl	$128, %ecx	 /* loop counter */
	movd	(%ebx), %mm5     /* unit */
	movq	%mm5, %mm0
	punpckldq %mm0, %mm5     /* unit | unit */
	movd	4(%ebx), %mm6    /* clev */
	movq	%mm6, %mm0
	punpckldq %mm0, %mm6     /* clev | clev */
.loop6:	
	movq	(%eax), %mm0 	 /* left */
	movq	2048(%eax), %mm1 /* right */
	movq 	1024(%eax), %mm2 /* center */
	pfmul	%mm5, %mm0
	pfmul	%mm5, %mm1
	
	pfmul	%mm6, %mm2
	pfadd	%mm2, %mm0
	
	pfadd 	%mm2, %mm1
	movq	%mm0, (%eax)
	movq	%mm1, 1024(%eax)
	addl	$8, %eax
	decl 	%ecx
	jnz	.loop6
	popl	%ecx
	popl	%ebx
	popl 	%eax
	femms
	leave
	ret
	.p2align 4,,7
	
	.global stream_sample_2ch_to_s16
	.type stream_sample_2ch_to_s16, @function
stream_sample_2ch_to_s16:
	pushl %ebp
	movl  %esp, %ebp
	pushl %eax
	pushl %ebx
	pushl %edx
	pushl %ecx
	femms
	movl 8(%ebp), %eax	/* s16_samples */
	movl 12(%ebp), %ebx	/* left */
	movl 16(%ebp), %edx	/* right */
	movl $128, %ecx
.loop1:
	movq (%ebx), %mm0	/* l1 | l0 */
	movq (%edx), %mm1	/* r1 | r0 */
	movq %mm0, %mm2		/* l1 | l0 */
	psrlq $32, %mm2		/* l1 */
	movq %mm1, %mm3		/* r1 | r0 */
	psrlq $32, %mm3		/* r1 */
	punpckldq %mm1, %mm0	/* r0 | l0 */
	punpckldq %mm3, %mm2	/* r1 | l1 */
	pf2id %mm0, %mm0	/* r0 | l0 --> int_32 */
	pf2id %mm2, %mm2	/* r1 | l1 --> int_32 */
	packssdw %mm2, %mm0	/* r1 l1 r0 l0 --> int_16 */
	movq %mm0, (%eax)
	addl $8, %eax
	addl $8, %ebx
	addl $8, %edx
	decl %ecx
	jnz .loop1
	popl %ecx
	popl %edx
	popl %ebx
	popl %eax
	femms
	leave
	ret
	.p2align 4,,7
	
	.global stream_sample_1ch_to_s16
	.type stream_sample_1ch_to_s16, @function
stream_sample_1ch_to_s16:
	pushl %ebp
	movl  %esp, %ebp
	pushl %eax
	pushl %ebx
	pushl %ecx
	femms
	movl $sqrt2, %eax
	movd (%eax), %mm7
	movl 8(%ebp), %eax	/* s16_samples */
	movl 12(%ebp), %ebx	/* left */
	movq %mm7,  %mm0
	punpckldq %mm0, %mm7    /* sqrt2 | sqrt2 */
	movl $128, %ecx
.loop2:
	movq  (%ebx), %mm0	/* c1 | c0 */
	pfmul %mm7, %mm0
	pf2id %mm0, %mm0	/* c1 c0 --> int_32 */
	packssdw %mm0, %mm0	/* c1 c1 c0 c0 --> int_16 */
	movq %mm0, (%eax)
	addl $8, %eax
	addl $8, %ebx
	decl %ecx
	jnz .loop2
	popl %ecx
	popl %ebx
	popl %eax
	
	femms
	leave
	ret 
_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog
    
    
More information about the MPlayer-cvslog
mailing list