[FFmpeg-devel] FATE & Regressions (and PPC is broken)
Michael Niedermayer
michaelni
Sat Mar 15 02:24:41 CET 2008
On Sat, Mar 15, 2008 at 12:36:36AM +0000, M?ns Rullg?rd wrote:
> Mike Melanson <mike at multimedia.cx> writes:
>
> > Hi,
> >
> > The following regression test is broken on PowerPC (gcc 4.1.2):
> >
> > diff -u -w
> > "/home/melanson/ffmpeg/ffmpeg-main"/tests/ffmpeg.regression.ref
> > tests/data/vsynth.regression
> > --- /home/melanson/ffmpeg/ffmpeg-main/tests/ffmpeg.regression.ref
> > 2008-03-08 19:10:07.000000000 -0800
> > +++ tests/data/vsynth.regression 2008-03-12 14:27:53.000000000 -0700
> > @@ -197,7 +197,7 @@
> > 353368 ./tests/data/a-flac.flac
> > c4228df189aad9567a037727d0e763e4 *./tests/data/flac.vsynth.out.wav
> > stddev: 33.31 PSNR:65.87 bytes:1040384
> > -a1e56bb034c2773438ba1c830c4cea07 *./tests/data/a-wmav1.asf
> > +59575b4756d674cd8c8d53d86c08e8cb *./tests/data/a-wmav1.asf
> > 106004 ./tests/data/a-wmav1.asf
> > stddev:12251.50 PSNR:14.56 bytes:1056768
> > stddev:2106.00 PSNR:29.85 bytes:1048576
> > make: *** [codectest] Error 1
>
> OK, I tracked this one down. It turns out lrint() rounds slightly
> differently for whatever reason (bug or otherwise). Attached patch
> changes it to lround() (which has well-defined rounding behaviour).
> With this change, the results are the same on x86_64 and ppc64.
>
> This raises the broader question of lrint() usage in FFmpeg. The
> documentation for the lrint() family says:
>
> These functions shall round their argument to the nearest integer
> value, rounding according to the current rounding direction.
>
> Clearly, in a library we do not know the current rounding direction,
> and it would be rude of us to change it. The lround() family, on the
> other hand, always rounds the same way:
>
> These functions shall round their argument to the nearest integer
> value, rounding halfway cases away from zero, regardless of the
> current rounding direction.
We have to use lrint* because its faster see:
Dump of assembler code for function lrint:
<lrint+0>: fldl 0x4(%esp)
<lrint+4>: sub $0x4,%esp
<lrint+7>: fistpl (%esp)
<lrint+10>: fwait
<lrint+11>: pop %eax
<lrint+12>: ret
Dump of assembler code for function lround:
<lround+0>: push %ebp
<lround+1>: mov %esp,%ebp
<lround+3>: push %edi
<lround+4>: push %esi
<lround+5>: sub $0x28,%esp
<lround+8>: fldl 0x8(%ebp)
<lround+11>: fstl 0xffffffd8(%ebp)
<lround+14>: fstpl 0xffffffe0(%ebp)
<lround+17>: mov 0xffffffe4(%ebp),%eax
<lround+20>: mov 0xffffffe0(%ebp),%edx
<lround+23>: mov %eax,%edi
<lround+25>: mov %edx,0xffffffec(%ebp)
<lround+28>: mov %eax,%edx
<lround+30>: and $0xfffff,%eax
<lround+35>: shr $0x14,%edx
<lround+38>: and $0x7ff,%edx
<lround+44>: sar $0x1f,%edi
<lround+47>: lea 0xfffffc01(%edx),%esi
<lround+53>: or $0x1,%edi
<lround+56>: mov %eax,0xffffffd4(%ebp)
<lround+59>: orl $0x100000,0xffffffd4(%ebp)
<lround+66>: cmp $0x13,%esi
<lround+69>: jg <lround+112>
<lround+71>: test %esi,%esi
<lround+73>: js <lround+211>
<lround+79>: mov %esi,%ecx
<lround+81>: mov $0x80000,%eax
<lround+86>: sar %cl,%eax
<lround+88>: mov $0x14,%ecx
<lround+93>: add 0xffffffd4(%ebp),%eax
<lround+96>: sub %esi,%ecx
<lround+98>: shr %cl,%eax
<lround+100>: imul %edi,%eax
<lround+103>: add $0x28,%esp
<lround+106>: pop %esi
<lround+107>: pop %edi
<lround+108>: pop %ebp
<lround+109>: ret
<lround+110>: data16
<lround+111>: nop
<lround+112>: cmp $0x1e,%esi
<lround+115>: jg <lround+176>
<lround+117>: sub $0x413,%edx
<lround+123>: mov $0x80000000,%eax
<lround+128>: mov %edx,%ecx
<lround+130>: shr %cl,%eax
<lround+132>: mov 0xffffffec(%ebp),%ecx
<lround+135>: mov %edx,0xffffffe8(%ebp)
<lround+138>: lea (%eax,%ecx,1),%edx
<lround+141>: cmp %ecx,%edx
<lround+143>: adcl $0x0,0xffffffd4(%ebp)
<lround+147>: cmp $0x14,%esi
<lround+150>: mov 0xffffffd4(%ebp),%eax
<lround+153>: je <lround+100>
<lround+155>: movzbl 0xffffffe8(%ebp),%ecx
<lround+159>: shl %cl,%eax
<lround+161>: mov $0x34,%ecx
<lround+166>: sub %esi,%ecx
<lround+168>: shr %cl,%edx
<lround+170>: or %edx,%eax
<lround+172>: jmp <lround+100>
<lround+174>: data16
<lround+175>: nop
<lround+176>: fnstcw 0xfffffff6(%ebp)
<lround+179>: fldl 0xffffffd8(%ebp)
<lround+182>: movzwl 0xfffffff6(%ebp),%eax
<lround+186>: mov $0xc,%ah
<lround+188>: mov %ax,0xfffffff4(%ebp)
<lround+192>: fldcw 0xfffffff4(%ebp)
<lround+195>: fistpl 0xfffffff0(%ebp)
<lround+198>: fldcw 0xfffffff6(%ebp)
<lround+201>: mov 0xfffffff0(%ebp),%eax
<lround+204>: add $0x28,%esp
<lround+207>: pop %esi
<lround+208>: pop %edi
<lround+209>: pop %ebp
<lround+210>: ret
<lround+211>: xor %eax,%eax
<lround+213>: cmp $0xffffffff,%esi
<lround+216>: setne %al
<lround+219>: add $0x28,%esp
<lround+222>: sub $0x1,%eax
<lround+225>: and %edi,%eax
<lround+227>: pop %esi
<lround+228>: pop %edi
<lround+229>: pop %ebp
<lround+230>: ret
-------------
so the problem has to be solved differently, maybe a
if(flags & CODEC_FLAG_BITEXACT)
for()
lround()
else
for()
lrint()
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Good people do not need laws to tell them to act responsibly, while bad
people will find a way around the laws. -- Plato
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080315/d1f0cc73/attachment.pgp>
More information about the ffmpeg-devel
mailing list