[Mplayer-cvslog] CVS: main/libac3/downmix downmix_3dnow.S,NONE,1.1
Nick Kurshev
nickols_k at users.sourceforge.net
Thu May 17 18:40:31 CEST 2001
Update of /cvsroot/mplayer/main/libac3/downmix
In directory usw-pr-cvs1:/tmp/cvs-serv25710/main/libac3/downmix
Added Files:
downmix_3dnow.S
Log Message:
libac3 3dnow optimizations! It speed up my Duron from 4.5 up to 3.5 prcnts of cpu loading
--- NEW FILE ---
/*
* downmix_3dnow.S
*
* Replacement of downmix_kni.S with AMD's 3DNow! SIMD operations support
*
* Modified by Nick Kurshev <nickols_k at mail.ru>
*
* Copyright (C) Yuqing Deng <Yuqing_Deng at brown.edu> - April 2000
*
* downmix_3dnow.S is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* downmix_3dnow.S is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
.section .rodata
.align 4
sqrt2: .float 0f0.7071068
.p2align 5,0,
.section .text
.align 4
.global downmix_3f_2r_to_2ch
.type downmix_3f_2r_to_2ch, @function
downmix_3f_2r_to_2ch:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
femms
movl 8(%ebp), %eax /* samples[] */
movl 12(%ebp), %ebx /* &dm_par */
movl $128, %ecx /* loop counter */
movd (%ebx), %mm5 /* unit */
movq %mm5, %mm0
punpckldq %mm0, %mm5 /* unit | unit */
movd 4(%ebx), %mm6 /* clev */
movq %mm6, %mm0
punpckldq %mm0, %mm6 /* clev | clev */
movd 8(%ebx), %mm7 /* slev */
movq %mm7, %mm0
punpckldq %mm0, %mm7 /* slev | slev */
.loop:
movq (%eax), %mm0 /* left */
movq 2048(%eax), %mm1 /* right */
movq 1024(%eax), %mm2 /* center */
pfmul %mm5, %mm0
pfmul %mm5, %mm1
pfmul %mm6, %mm2
movq 3072(%eax), %mm3 /* leftsur */
movq 4096(%eax), %mm4 /* rithgsur */
pfadd %mm2, %mm0
pfadd %mm2, %mm1
pfmul %mm7, %mm3
pfmul %mm7, %mm4
pfadd %mm3, %mm0
pfadd %mm4, %mm1
movq %mm0, (%eax)
movq %mm1, 1024(%eax)
addl $8, %eax
decl %ecx
jnz .loop
popl %ecx
popl %ebx
popl %eax
femms
leave
ret
.p2align 4,,7
.global downmix_2f_2r_to_2ch
.type downmix_2f_2r_to_2ch, @function
downmix_2f_2r_to_2ch:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
femms
movl 8(%ebp), %eax /* samples[] */
movl 12(%ebp), %ebx /* &dm_par */
movl $128, %ecx /* loop counter */
movd (%ebx), %mm5 /* unit */
movq %mm5, %mm0
punpckldq %mm0, %mm5 /* unit | unit */
movd 8(%ebx), %mm7 /* slev */
movq %mm7, %mm0
punpckldq %mm0, %mm7 /* slev | slev */
.loop3:
movq (%eax), %mm0 /* left */
movq 1024(%eax), %mm1 /* right */
movq 2048(%eax), %mm3 /* leftsur */
pfmul %mm5, %mm0
pfmul %mm5, %mm1
movq 3072(%eax), %mm4 /* rightsur */
pfmul %mm7, %mm3
pfmul %mm7, %mm4
pfadd %mm3, %mm0
pfadd %mm4, %mm1
movq %mm0, (%eax)
movq %mm1, 1024(%eax)
addl $8, %eax
decl %ecx
jnz .loop3
popl %ecx
popl %ebx
popl %eax
femms
leave
ret
.p2align 4,,7
.global downmix_3f_1r_to_2ch
.type downmix_3f_1r_to_2ch, @function
downmix_3f_1r_to_2ch:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
femms
movl 8(%ebp), %eax /* samples[] */
movl 12(%ebp), %ebx /* &dm_par */
movl $128, %ecx /* loop counter */
movd (%ebx), %mm5 /* unit */
movq %mm5, %mm0
punpckldq %mm0, %mm5 /* unit | unit */
movd 4(%ebx), %mm6 /* clev */
movq %mm6, %mm0
punpckldq %mm0, %mm6 /* clev | clev */
movd 8(%ebx), %mm7 /* slev */
movq %mm7, %mm0
punpckldq %mm0, %mm7 /* slev | slev */
.loop4:
movq (%eax), %mm0 /* left */
movq 2048(%eax), %mm1 /* right */
movq 1024(%eax), %mm2 /* center */
pfmul %mm5, %mm0
pfmul %mm5, %mm1
pfmul %mm6, %mm2
movq 3072(%eax), %mm3 /* sur */
pfadd %mm2, %mm0
pfmul %mm7, %mm3
pfadd %mm2, %mm1
pfsub %mm3, %mm0
pfadd %mm3, %mm1
movq %mm0, (%eax)
movq %mm1, 1024(%eax)
addl $8, %eax
decl %ecx
jnz .loop4
popl %ecx
popl %ebx
popl %eax
femms
leave
ret
.p2align 4,,7
.global downmix_2f_1r_to_2ch
.type downmix_2f_1r_to_2ch, @function
downmix_2f_1r_to_2ch:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
femms
movl 8(%ebp), %eax /* samples[] */
movl 12(%ebp), %ebx /* &dm_par */
movl $128, %ecx /* loop counter */
movd (%ebx), %mm5 /* unit */
movq %mm5, %mm0
punpckldq %mm0, %mm5 /* unit | unit */
movd 8(%ebx), %mm7 /* slev */
movq %mm7, %mm0
punpckldq %mm0, %mm7 /* slev | slev */
.loop5:
movq (%eax), %mm0 /* left */
movq 1024(%eax), %mm1 /* right */
pfmul %mm5, %mm0
pfmul %mm5, %mm1
movq 2048(%eax), %mm3 /* sur */
pfmul %mm7, %mm3
pfsub %mm3, %mm0
pfadd %mm3, %mm1
movq %mm0, (%eax)
movq %mm1, 1024(%eax)
addl $8, %eax
decl %ecx
jnz .loop5
popl %ecx
popl %ebx
popl %eax
femms
leave
ret
.p2align 4,,7
.global downmix_3f_0r_to_2ch
.type downmix_3f_0r_to_2ch, @function
downmix_3f_0r_to_2ch:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
femms
movl 8(%ebp), %eax /* samples[] */
movl 12(%ebp), %ebx /* &dm_par */
movl $128, %ecx /* loop counter */
movd (%ebx), %mm5 /* unit */
movq %mm5, %mm0
punpckldq %mm0, %mm5 /* unit | unit */
movd 4(%ebx), %mm6 /* clev */
movq %mm6, %mm0
punpckldq %mm0, %mm6 /* clev | clev */
.loop6:
movq (%eax), %mm0 /* left */
movq 2048(%eax), %mm1 /* right */
movq 1024(%eax), %mm2 /* center */
pfmul %mm5, %mm0
pfmul %mm5, %mm1
pfmul %mm6, %mm2
pfadd %mm2, %mm0
pfadd %mm2, %mm1
movq %mm0, (%eax)
movq %mm1, 1024(%eax)
addl $8, %eax
decl %ecx
jnz .loop6
popl %ecx
popl %ebx
popl %eax
femms
leave
ret
.p2align 4,,7
.global stream_sample_2ch_to_s16
.type stream_sample_2ch_to_s16, @function
stream_sample_2ch_to_s16:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %edx
pushl %ecx
femms
movl 8(%ebp), %eax /* s16_samples */
movl 12(%ebp), %ebx /* left */
movl 16(%ebp), %edx /* right */
movl $128, %ecx
.loop1:
movq (%ebx), %mm0 /* l1 | l0 */
movq (%edx), %mm1 /* r1 | r0 */
movq %mm0, %mm2 /* l1 | l0 */
psrlq $32, %mm2 /* l1 */
movq %mm1, %mm3 /* r1 | r0 */
psrlq $32, %mm3 /* r1 */
punpckldq %mm1, %mm0 /* r0 | l0 */
punpckldq %mm3, %mm2 /* r1 | l1 */
pf2id %mm0, %mm0 /* r0 | l0 --> int_32 */
pf2id %mm2, %mm2 /* r1 | l1 --> int_32 */
packssdw %mm2, %mm0 /* r1 l1 r0 l0 --> int_16 */
movq %mm0, (%eax)
addl $8, %eax
addl $8, %ebx
addl $8, %edx
decl %ecx
jnz .loop1
popl %ecx
popl %edx
popl %ebx
popl %eax
femms
leave
ret
.p2align 4,,7
.global stream_sample_1ch_to_s16
.type stream_sample_1ch_to_s16, @function
stream_sample_1ch_to_s16:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
femms
movl $sqrt2, %eax
movd (%eax), %mm7
movl 8(%ebp), %eax /* s16_samples */
movl 12(%ebp), %ebx /* left */
movq %mm7, %mm0
punpckldq %mm0, %mm7 /* sqrt2 | sqrt2 */
movl $128, %ecx
.loop2:
movq (%ebx), %mm0 /* c1 | c0 */
pfmul %mm7, %mm0
pf2id %mm0, %mm0 /* c1 c0 --> int_32 */
packssdw %mm0, %mm0 /* c1 c1 c0 c0 --> int_16 */
movq %mm0, (%eax)
addl $8, %eax
addl $8, %ebx
decl %ecx
jnz .loop2
popl %ecx
popl %ebx
popl %eax
femms
leave
ret
_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog
More information about the MPlayer-cvslog
mailing list