[FFmpeg-devel] [PATCH 3/3] x86: sbrdsp: implement SSE2 hf_apply_noise
Michael Niedermayer
michaelni at gmx.at
Fri Apr 19 00:59:49 CEST 2013
On Mon, Apr 15, 2013 at 08:54:32PM +0200, Christophe Gisquet wrote:
> 2013/4/14 Michael Niedermayer <michaelni at gmx.at>:
> > Seems to crash on linux x86_64 shared
>
> Indeed, noise vector was loaded (not shown in that stack trace)
> through r9 and thus r9 was overwritten.
The patch you attached is identical to the last, heres more complete
disassmbly of the code that crashes
0x00007ffff6cd9c80 <ff_sbr_hf_apply_noise_0_sse2+0>: movdqa 0x14fcf8(%rip),%xmm0 # 0x7ffff6e29980 <ps_noise0>
0x00007ffff6cd9c88 <ff_sbr_hf_apply_noise_0_sse2+8>: jmp 0x7ffff6cd9cd5 <apply_noise_main>
0x00007ffff6cd9c8a <ff_sbr_hf_apply_noise_0_sse2+10>: nopw 0x0(%rax,%rax,1)
0x00007ffff6cd9c90 <ff_sbr_hf_apply_noise_1_sse2+0>: and $0x1,%r8
0x00007ffff6cd9c94 <ff_sbr_hf_apply_noise_1_sse2+4>: shl $0x4,%r8
0x00007ffff6cd9c98 <ff_sbr_hf_apply_noise_1_sse2+8>: lea 0x14fd01(%rip),%r9 # 0x7ffff6e299a0 <ps_noise13>
0x00007ffff6cd9c9f <ff_sbr_hf_apply_noise_1_sse2+15>: movdqa (%r8,%r9,1),%xmm0
0x00007ffff6cd9ca5 <ff_sbr_hf_apply_noise_1_sse2+21>: jmp 0x7ffff6cd9cd5 <apply_noise_main>
0x00007ffff6cd9ca7 <ff_sbr_hf_apply_noise_1_sse2+23>: nopw 0x0(%rax,%rax,1)
0x00007ffff6cd9cb0 <ff_sbr_hf_apply_noise_2_sse2+0>: movdqa 0x14fcd8(%rip),%xmm0 # 0x7ffff6e29990 <ps_noise2>
0x00007ffff6cd9cb8 <ff_sbr_hf_apply_noise_2_sse2+8>: jmp 0x7ffff6cd9cd5 <apply_noise_main>
0x00007ffff6cd9cba <ff_sbr_hf_apply_noise_2_sse2+10>: nopw 0x0(%rax,%rax,1)
0x00007ffff6cd9cc0 <ff_sbr_hf_apply_noise_3_sse2+0>: and $0x1,%r8
0x00007ffff6cd9cc4 <ff_sbr_hf_apply_noise_3_sse2+4>: shl $0x4,%r8
0x00007ffff6cd9cc8 <ff_sbr_hf_apply_noise_3_sse2+8>: lea 0x14fce1(%rip),%r9 # 0x7ffff6e299b0 <ps_noise13+16>
0x00007ffff6cd9ccf <ff_sbr_hf_apply_noise_3_sse2+15>: movdqa (%r8,%r9,1),%xmm0
0x00007ffff6cd9cd5 <apply_noise_main+0>: dec %rcx
0x00007ffff6cd9cd8 <apply_noise_main+3>: shl $0x2,%r9
0x00007ffff6cd9cdc <apply_noise_main+7>: lea 0x1a9bd(%rip),%rax # 0x7ffff6cf46a0 <ff_sbr_noise_table>
0x00007ffff6cd9ce3 <apply_noise_main+14>: lea (%rdi,%r9,2),%rdi
0x00007ffff6cd9ce7 <apply_noise_main+18>: add %r9,%rsi
0x00007ffff6cd9cea <apply_noise_main+21>: add %r9,%rdx
0x00007ffff6cd9ced <apply_noise_main+24>: shl $0x3,%rcx
0x00007ffff6cd9cf1 <apply_noise_main+28>: pxor %xmm5,%xmm5
0x00007ffff6cd9cf5 <apply_noise_main+32>: neg %r9
0x00007ffff6cd9cf8 <apply_noise_main.loop+0>: movdqa (%rdx,%r9,1),%xmm1
0x00007ffff6cd9cfe <apply_noise_main.loop+6>: movdqu 0x10(%rcx,%rax,1),%xmm3
0x00007ffff6cd9d04 <apply_noise_main.loop+12>: movdqu 0x20(%rcx,%rax,1),%xmm4
0x00007ffff6cd9d0a <apply_noise_main.loop+18>: add $0x20,%rcx
0x00007ffff6cd9d0e <apply_noise_main.loop+22>: and $0xff8,%rcx
0x00007ffff6cd9d15 <apply_noise_main.loop+29>: movdqa %xmm1,%xmm2
0x00007ffff6cd9d19 <apply_noise_main.loop+33>: punpckhdq %xmm1,%xmm2
0x00007ffff6cd9d1d <apply_noise_main.loop+37>: punpckldq %xmm1,%xmm1
0x00007ffff6cd9d21 <apply_noise_main.loop+41>: mulps %xmm3,%xmm1
0x00007ffff6cd9d24 <apply_noise_main.loop+44>: mulps %xmm4,%xmm2
=> 0x00007ffff6cd9d27 <apply_noise_main.loop+47>: movdqa (%rsi,%r9,1),%xmm3
0x00007ffff6cd9d2d <apply_noise_main.loop+53>: movdqa %xmm3,%xmm4
0x00007ffff6cd9d31 <apply_noise_main.loop+57>: punpckhdq %xmm3,%xmm4
0x00007ffff6cd9d35 <apply_noise_main.loop+61>: punpckldq %xmm3,%xmm3
0x00007ffff6cd9d39 <apply_noise_main.loop+65>: movdqa %xmm3,%xmm6
0x00007ffff6cd9d3d <apply_noise_main.loop+69>: pcmpeqd %xmm5,%xmm6
0x00007ffff6cd9d41 <apply_noise_main.loop+73>: movdqa %xmm4,%xmm7
0x00007ffff6cd9d45 <apply_noise_main.loop+77>: pcmpeqd %xmm5,%xmm7
0x00007ffff6cd9d49 <apply_noise_main.loop+81>: mulps %xmm0,%xmm3
0x00007ffff6cd9d4c <apply_noise_main.loop+84>: mulps %xmm0,%xmm4
0x00007ffff6cd9d4f <apply_noise_main.loop+87>: pand %xmm6,%xmm1
0x00007ffff6cd9d53 <apply_noise_main.loop+91>: pand %xmm7,%xmm2
0x00007ffff6cd9d57 <apply_noise_main.loop+95>: movdqu (%rdi,%r9,2),%xmm6
0x00007ffff6cd9d5d <apply_noise_main.loop+101>: movdqu 0x10(%rdi,%r9,2),%xmm7
0x00007ffff6cd9d64 <apply_noise_main.loop+108>: addps %xmm1,%xmm3
0x00007ffff6cd9d67 <apply_noise_main.loop+111>: addps %xmm2,%xmm4
0x00007ffff6cd9d6a <apply_noise_main.loop+114>: addps %xmm3,%xmm6
0x00007ffff6cd9d6d <apply_noise_main.loop+117>: addps %xmm4,%xmm7
0x00007ffff6cd9d70 <apply_noise_main.loop+120>: movdqu %xmm6,(%rdi,%r9,2)
0x00007ffff6cd9d76 <apply_noise_main.loop+126>: movdqu %xmm7,0x10(%rdi,%r9,2)
0x00007ffff6cd9d7d <apply_noise_main.loop+133>: add $0x10,%r9
0x00007ffff6cd9d81 <apply_noise_main.loop+137>: jl 0x7ffff6cd9cf8 <apply_noise_main.loop>
0x00007ffff6cd9d87 <apply_noise_main.loop+143>: retq
rax 0x7ffff6cf46a0 140737334167200
rbx 0x23 35
rcx 0x590 1424
rdx 0x27fffd3814f50 703686695276368
rsi 0x27fffd3854c50 703686695537744
rdi 0x47fffaf0b4bd0 1266636036983760
rbp 0x7fffffffd640 0x7fffffffd640
rsp 0x7fffffffd3f8 0x7fffffffd3f8
r8 0x0 0
r9 0xfffe0000247743b0 -562949341625424
r10 0x2b 43
r11 0x23 35
r12 0x7ffff7f5b720 140737353463584
r13 0x7ffff7f5b780 140737353463680
r14 0x1 1
r15 0x23 35
rip 0x7ffff6cd9d27 0x7ffff6cd9d27 <apply_noise_main.loop+47>
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
In a rich man's house there is no place to spit but his face.
-- Diogenes of Sinope
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130419/467f2589/attachment.asc>
More information about the ffmpeg-devel
mailing list