[FFmpeg-devel] [FFmpeg-commits] Implement a SIMD version of emulated_edge_mc() for x86.
Daniel Verkamp
daniel
Mon Feb 7 08:18:06 CET 2011
On Mon, Jan 31, 2011 at 7:01 PM, Ronald S. Bultje <git at ffmpeg.org> wrote:
> Module: ffmpeg
> Branch: master
> Commit: 81f2a3f4ffcc6935b8b8ada4954700b3f333ae4f
>
> Author: Ronald S. Bultje <rsbultje at gmail.com>
> Date: ? Mon Jan 31 20:55:56 2011 -0500
>
> Implement a SIMD version of emulated_edge_mc() for x86.
This crashes on a mingw-w64 build run on Win7 x64:
GNU gdb (GDB) 7.2
Copyright (C) 2010 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law. Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-w64-mingw32".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from Z:\src\ffmpeg\ffmpeg-git\build-mingw-w64/ffplay_g.exe...don
e.
(gdb) r G:\files\video\1337.mp4
Starting program: Z:\src\ffmpeg\ffmpeg-git\build-mingw-w64/ffplay_g.exe G:\files
\video\1337.mp4
[New Thread 4348.0x11dc]
[New Thread 4348.0x898]
[New Thread 4348.0xec8]
[New Thread 4348.0x710]
[New Thread 4348.0x10e4]
[New Thread 4348.0x121c]
[New Thread 4348.0x438]
[New Thread 4348.0xd10]
[New Thread 4348.0x124c]
[New Thread 4348.0x115c]
[New Thread 4348.0x1004]
Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 4348.0x115c]
0x00000000008715eb in _ff_emu_edge_core_sse.emuedge_extend_bottom_15_loop ()
(gdb) display /i $pc
1: x/i $pc
=> 0x8715eb <_ff_emu_edge_core_sse.emuedge_extend_bottom_15_loop>:
movq %mm0,(%rcx)
(gdb) disas
Dump of assembler code for function _ff_emu_edge_core_sse.emuedge_extend_bottom_
15_loop:
=> 0x00000000008715eb <+0>: movq %mm0,(%rcx)
0x00000000008715ee <+3>: movd %mm1,0x8(%rcx)
0x00000000008715f2 <+7>: mov %r9w,0xc(%rcx)
0x00000000008715f7 <+12>: mov %al,0xe(%rcx)
0x00000000008715fa <+15>: add %r8,%rcx
0x00000000008715fd <+18>: dec %rsi
0x0000000000871600 <+21>: jne 0x8715eb <_ff_emu_edge_core_sse.emuedge_e
xtend_bottom_15_loop>
End of assembler dump.
(gdb) info all-registers
rax 0x870e00 8850944
rbx 0x4449040 71602240
rcx 0x5130011 85131281
rdx 0x6b36d00 112422144
rsi 0xfffffffffffff640 -2496
rdi 0x11 17
rbp 0x1 0x1
rsp 0x679f5b0 0x679f5b0
r8 0x160 352
r9 0x0 0
r10 0x871600 8852992
r11 0x11 17
r12 0x6b36cff 112422143
r13 0x0 0
r14 0x0 0
r15 0xb0 176
rip 0x8715eb 0x8715eb <_ff_emu_edge_core_sse.emuedge_extend_bottom_15
_loop>
eflags 0x10282 [ SF IF RF ]
cs 0x33 51
ss 0x282002b 42074155
ds 0x0 0
es 0x0 0
fs 0x0 0
gs 0x2b0000 2818048
st0 -nan(0x8787878787878787) (raw 0xffff8787878787878787)
st1 -nan(0x87008700870087) (raw 0xffff0087008700870087)
st2 -nan(0x8888888888888888) (raw 0xffff8888888888888888)
st3 -nan(0x88008800880088) (raw 0xffff0088008800880088)
st4 -nan(0x86008600860086) (raw 0xffff0086008600860086)
st5 -nan(0x86008600860086) (raw 0xffff0086008600860086)
st6 -nan(0x86008600860086) (raw 0xffff0086008600860086)
st7 -inf (raw 0xffff0000000000000000)
fctrl 0x27f 639
fstat 0xff0000 16711680
ftag 0xff 255
fiseg 0x0 0
fioff 0x0 0
foseg 0x0 0
fooff 0x0 0
fop 0x0 0
xmm0 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
uint128 = 0x00000000000000000000000000000000}
xmm1 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0x34, 0x32, 0x34, 0x33, 0x33, 0x34, 0x32, 0x34, 0x34, 0x35,
0x35, 0x34, 0x34, 0x36, 0x35, 0x33}, v8_int16 = {0x3234, 0x3334, 0x3433,
0x3432, 0x3534, 0x3435, 0x3634, 0x3335}, v4_int32 = {0x33343234,
0x34323433, 0x34353534, 0x33353634}, v2_int64 = {0x3432343333343234,
0x3335363434353534}, uint128 = 0x33353634343535343432343333343234}
xmm2 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0x33, 0x33, 0x34, 0x33, 0x33, 0x34, 0x33, 0x33, 0x33, 0x35,
0x36, 0x35, 0x33, 0x33, 0x34, 0x35}, v8_int16 = {0x3333, 0x3334, 0x3433,
0x3333, 0x3533, 0x3536, 0x3333, 0x3534}, v4_int32 = {0x33343333,
0x33333433, 0x35363533, 0x35343333}, v2_int64 = {0x3333343333343333,
0x3534333335363533}, uint128 = 0x35343333353635333333343333343333}
xmm3 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0x34, 0x36, 0x34, 0x35, 0x35, 0x34, 0x36, 0x34, 0x35, 0x33,
0x33, 0x34, 0x35, 0x34, 0x34, 0x34}, v8_int16 = {0x3634, 0x3534, 0x3435,
0x3436, 0x3335, 0x3433, 0x3435, 0x3434}, v4_int32 = {0x35343634,
0x34363435, 0x34333335, 0x34343435}, v2_int64 = {0x3436343535343634,
0x3434343534333335}, uint128 = 0x34343435343333353436343535343634}
xmm4 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
uint128 = 0x00000000000000000000000000000000}
xmm5 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
uint128 = 0x00000000000000000000000000000000}
xmm6 {v4_float = {0x1, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0x0, 0x0, 0x80, 0x3f, 0x0 <repeats 12 times>}, v8_int16 = {0x0,
0x3f80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x3f800000, 0x0, 0x0,
0x0}, v2_int64 = {0x3f800000, 0x0},
uint128 = 0x0000000000000000000000003f800000}
xmm7 {v4_float = {0x0, 0xa, 0x0, 0x0}, v2_double = {0xf4240, 0x0},
v16_int8 = {0x0, 0x0, 0x0, 0x0, 0x80, 0x84, 0x2e, 0x41, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x0, 0x0, 0x8480, 0x412e, 0x0, 0x0, 0x0,
0x0}, v4_int32 = {0x0, 0x412e8480, 0x0, 0x0}, v2_int64 = {
0x412e848000000000, 0x0}, uint128 = 0x0000000000000000412e848000000000}
xmm8 {v4_float = {0x0, 0x1, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xe0, 0x3f, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x3fe0, 0x0, 0x0, 0x0,
0x0}, v4_int32 = {0x0, 0x3fe00000, 0x0, 0x0}, v2_int64 = {
0x3fe0000000000000, 0x0}, uint128 = 0x00000000000000003fe0000000000000}
xmm9 {v4_float = {0xfdcb9e00, 0xffffffff, 0x0, 0x0}, v2_double = {
0x0, 0x0}, v16_int8 = {0x80, 0x18, 0xd, 0xcc, 0xbe, 0xbb, 0xa6, 0xbf,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x1880, 0xcc0d,
0xbbbe, 0xbfa6, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0xcc0d1880, 0xbfa6bbbe,
0x0, 0x0}, v2_int64 = {0xbfa6bbbecc0d1880, 0x0},
uint128 = 0x0000000000000000bfa6bbbecc0d1880}
xmm10 {v4_float = {0x0, 0x2, 0x0, 0x0}, v2_double = {0xa, 0x0},
v16_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x24, 0x40, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x4024, 0x0, 0x0, 0x0,
0x0}, v4_int32 = {0x0, 0x40240000, 0x0, 0x0}, v2_int64 = {
0x4024000000000000, 0x0}, uint128 = 0x00000000000000004024000000000000}
xmm11 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {
0x8000000000000000, 0x0}, v16_int8 = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0x7f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0xffff,
0xffff, 0xffff, 0x7fff, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0xffffffff,
0x7fffffff, 0x0, 0x0}, v2_int64 = {0x7fffffffffffffff, 0x0},
uint128 = 0x00000000000000007fffffffffffffff}
xmm12 {v4_float = {0x15c28, 0x1, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0x7b, 0x14, 0xae, 0x47, 0xe1, 0x7a, 0x84, 0x3f, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x147b, 0x47ae, 0x7ae1, 0x3f84,
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x47ae147b, 0x3f847ae1, 0x0, 0x0},
v2_int64 = {0x3f847ae147ae147b, 0x0},
uint128 = 0x00000000000000003f847ae147ae147b}
xmm13 {v4_float = {0x0, 0xa, 0x0, 0x0}, v2_double = {0xf4240, 0x0},
v16_int8 = {0x0, 0x0, 0x0, 0x0, 0x80, 0x84, 0x2e, 0x41, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x0, 0x0, 0x8480, 0x412e, 0x0, 0x0, 0x0,
0x0}, v4_int32 = {0x0, 0x412e8480, 0x0, 0x0}, v2_int64 = {
0x412e848000000000, 0x0}, uint128 = 0x0000000000000000412e848000000000}
xmm14 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x8000, 0x0, 0x0, 0x0,
0x0}, v4_int32 = {0x0, 0x80000000, 0x0, 0x0}, v2_int64 = {
0x8000000000000000, 0x0}, uint128 = 0x00000000000000008000000000000000}
xmm15 {v4_float = {0x0, 0xfffffff6, 0x0, 0x0}, v2_double = {
0xfffffffffff0bdc0, 0x0}, v16_int8 = {0x0, 0x0, 0x0, 0x0, 0x80, 0x84,
0x2e, 0xc1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x0,
0x0, 0x8480, 0xc12e, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0xc12e8480,
0x0, 0x0}, v2_int64 = {0xc12e848000000000, 0x0},
uint128 = 0x0000000000000000c12e848000000000}
mxcsr 0x1fa0 [ PE IM DM ZM OM UM PM ]
(gdb)
Crash occurs in normal ffmpeg conversion as well.
Sample: http://drv.nu/temp/1337.mp4 - plays fine with that patch reverted.
Thanks,
-- Daniel Verkamp
More information about the ffmpeg-devel
mailing list