[MPlayer-dev-eng] MPlayer and gcc ia32 intrinsics
Michael Niedermayer
michaelni at gmx.at
Wed Nov 23 02:19:15 CET 2005
Hi
On Tue, Nov 22, 2005 at 11:09:42PM +0100, Aurelien Jacobs wrote:
> On Tue, 22 Nov 2005 16:54:52 -0500
> Jason Tackaberry <tack at sault.org> wrote:
>
> > On Tue, 2005-11-22 at 22:34 +0200, Jan Knutar wrote:
> > > They don't work with gcc 2.95.3?
> >
> > What if the SIMD code was simply disabled (fall back to C) for gcc < 3?
>
> IIRC gcc is somewhat buggy about intrinsics and sometimes produce
> very slow code.
yes, heres a example:
typedef short mmxw __attribute__ ((mode(V4HI)));
typedef int mmxdw __attribute__ ((mode(V2SI)));
mmxdw dw;
mmxw w;
void test(){
w+=w;
dw= (mmxdw)w;
}
gcc 3.4.0:
movq w, %mm1
psllw $1, %mm1
movq %mm1, w
movq w, %mm0
movq %mm0, dw
ret
human:
movq w, %mm1
paddw %mm1,%mm1
movq %mm1, w
movq %mm1,dw
ret
gcc-4.1.0:
test: subl $20, %esp
movl w, %eax
movl w+4, %edx
movl %ebx, 8(%esp)
movl %esi, 12(%esp)
movl %eax, (%esp)
movl %edx, 4(%esp)
movswl (%esp),%esi
movl %edi, 16(%esp)
movswl 4(%esp),%ecx
movswl 2(%esp),%edi
movswl 6(%esp),%ebx
addl %esi, %esi
addl %ecx, %ecx
movzwl %si, %esi
sall $17, %edi
movzwl %cx, %ecx
sall $17, %ebx
movl %edi, %eax
movl 16(%esp), %edi
movl %ebx, %edx
orl %esi, %eax
movl 8(%esp), %ebx
orl %ecx, %edx
movl 12(%esp), %esi
movl %eax, w
movl %edx, w+4
movl w, %eax
movl w+4, %edx
movl %eax, dw
movl %edx, dw+4
addl $20, %esp
ret
gcc 4.1.0/20051113 with x87/mmx mode switch patch produces:
test: movq w, %mm0
paddw %mm0, %mm0
movq %mm0, w
movl w, %eax
movl w+4, %edx
movl %eax, dw
movl %edx, dw+4
emms
ret
note, in this case there are partial memory stalls which are VERY slow, about
10-20 cpu cycles
i think this demonstrates the problem
examples taken from
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14552
[...]
--
Michael
More information about the MPlayer-dev-eng
mailing list