[FFmpeg-devel] [PATCH 1/2] checkasm: add sbrdsp tests
James Almer
jamrial at gmail.com
Tue Jul 4 21:15:56 EEST 2017
On 7/4/2017 2:31 PM, Michael Niedermayer wrote:
> On Mon, Jul 03, 2017 at 02:32:28PM +0200, Matthieu Bouron wrote:
>> On Fri, Jun 30, 2017 at 05:16:37PM +0200, Matthieu Bouron wrote:
>>> On Fri, Jun 30, 2017 at 03:55:52PM +0200, Michael Niedermayer wrote:
>>>> On Thu, Jun 29, 2017 at 10:53:06PM -0300, James Almer wrote:
>>>>> On 6/29/2017 10:14 PM, Henrik Gramner wrote:
>>>>>> On Fri, Jun 30, 2017 at 1:58 AM, Michael Niedermayer
>>>>>> <michael at niedermayer.cc> wrote:
>>>>>>> Program received signal SIGSEGV, Segmentation fault.
>>>>>>> 0x0000000000684919 in ff_sbr_hf_gen_sse ()
>>>>>>
>>>>>>> 0x0000000000684909 <ff_sbr_hf_gen_sse+25>: sub %r9,%r8
>>>>>>
>>>>>>> => 0x0000000000684919 <ff_sbr_hf_gen_sse+41>: movaps (%rsi,%r8,1),%xmm0
>>>>>>
>>>>>>> r9 0xdeadbeef00000080 -2401053092612145024
>>>>>>
>>>>>> Another case of a 32-bit int being used as part of a 64-bit operation.
>>>>>
>>>>> I can't reproduce it on my ArchLinux x86_64 environment for some reason,
>>>>> but based on what you said i assume the attached patch should fix it.
>>>>
>>>> no crash occurs here with this, so it seems fixed
>>>
>>> Should i push the patchset or wait a little bit longer ?
>>
>> Patchset applied.
>
> it seems theres some issue still in this:
>
> checkasm: using random seed 3655967467
> MMX:
> - audiodsp.audiodsp [OK]
> - blockdsp.blockdsp [OK]
> - h264dsp.idct [OK]
> - h264pred.pred4x4 [OK]
> - h264pred.pred8x8 [OK]
> - h264pred.pred16x16 [OK]
> - pixblockdsp.get_pixels [OK]
> - pixblockdsp.diff_pixels [OK]
> - vp8dsp.idct [OK]
> - vp8dsp.mc [OK]
> - vp9dsp.ipred [OK]
> - vp9dsp.itxfm [OK]
> - vp9dsp.mc [OK]
> MMXEXT:
> - audiodsp.audiodsp [OK]
> - h264dsp.idct [OK]
> - h264pred.pred4x4 [OK]
> - h264pred.pred8x8 [OK]
> - h264pred.pred16x16 [OK]
> - h264pred.pred8x8l [OK]
> - h264qpel.put [OK]
> - h264qpel.avg [OK]
> - hevc_add_res.add_residual [OK]
> - hevc_idct.idct_dc [OK]
> - vp8dsp.mc [OK]
> - vp9dsp.ipred [OK]
> - vp9dsp.itxfm [OK]
> - vp9dsp.loopfilter [OK]
> - vp9dsp.mc [OK]
> SSE:
> - aacpsdsp.add_squares [OK]
> - aacpsdsp.mul_pair_single [OK]
> - aacpsdsp.hybrid_analysis [OK]
> - sbrdsp.sum64x5 [OK]
> - sbrdsp.sum_square [OK]
> - sbrdsp.neg_odd_64 [OK]
> - sbrdsp.qmf_post_shuffle [OK]
> - sbrdsp.qmf_deint_neg [OK]
> - sbrdsp.qmf_deint_bfly [OK]
> - sbrdsp.autocorrelate [OK]
> - sbrdsp.hf_gen [OK]
> - sbrdsp.hf_g_filt [OK]
> - audiodsp.audiodsp [OK]
> - blockdsp.blockdsp [OK]
> - fmtconvert.fmtconvert [OK]
> - h264pred.pred16x16 [OK]
> - vp8dsp.idct [OK]
> - vp8dsp.mc [OK]
> - vp9dsp.ipred [OK]
> - vp9dsp.mc [OK]
> - float_dsp.vector_fmul [OK]
> - float_dsp.vector_fmac [OK]
> - float_dsp.butterflies_float [OK]
> - float_dsp.scalarproduct_float [OK]
> SSE2:
> - sbrdsp.qmf_pre_shuffle [OK]
> - sbrdsp.qmf_deint_bfly [OK]
>
> Program received signal SIGSEGV, Segmentation fault.
> apply_noise_main.loop () at libavcodec/x86/sbrdsp.asm:418
> 418 movu m7, [Yq + 2*count + mmsize]
> (gdb) bt
> Python Exception <type 'exceptions.ImportError'> No module named gdb.frames:
> #0 apply_noise_main.loop () at libavcodec/x86/sbrdsp.asm:418
> #1 0x000000000043659b in checkasm_checked_call () at tests/checkasm/x86/checkasm.asm:77
> #2 0xdeadbeefdeadbeef in ?? ()
> #3 0xdeadbeefdeadbeef in ?? ()
> #4 0xdeadbeefdeadbeef in ?? ()
> #5 0xdeadbeefdeadbeef in ?? ()
> #6 0xdeadbeefdeadbeef in ?? ()
> #7 0xdeadbeefdeadbeef in ?? ()
> #8 0xdeadbeefdeadbeef in ?? ()
> #9 0xdeadbeefdeadbeef in ?? ()
> #10 0xdeadbeefdeadbeef in ?? ()
> #11 0xdeadbeefdeadbeef in ?? ()
> #12 0xdeadbeefdeadbeef in ?? ()
> #13 0xdeadbeefdeadbeef in ?? ()
> #14 0xdeadbeefdeadbeef in ?? ()
> #15 0xdeadbeefdeadbeef in ?? ()
> #16 0xdeadbeefdeadbeef in ?? ()
> #17 0xdeadbeefdeadbeef in ?? ()
> #18 0xdeadbeefdeadbeef in ?? ()
> #19 0x00007fffffffd870 in ?? ()
> #20 0x00007fffffffcc70 in ?? ()
> #21 0x00007fffffffce70 in ?? ()
> #22 0x0000000000000000 in ?? ()
> (gdb) info all-registers
> rax 0x0 0
> rbx 0xed56bb2dcb3c7736 -1344681633365854410
> rcx 0x8e8 2280
> rdx 0x7ab77bbbffffd070 8842672440749314160
> rsi 0x7ab77bbbffffce70 8842672440749313648
> rdi 0xf56e7777ffffdc70 -761539929699263376
> rbp 0x8bda43d3fd1a7e06 0x8bda43d3fd1a7e06
> rsp 0x7fffffffcae8 0x7fffffffcae8
> r8 0xdeadbeef00000000 -2401053092612145152
> r9 0x85490444000009c0 -8842531703260968512
> r10 0x684bf0 6835184
> r11 0x1 1
> r12 0x4a75479abd64e097 5365273261009854615
> r13 0x249214109d5d1c88 2635190793557318792
> r14 0xb64a9c9e5d318408 -5311260606547786744
> r15 0xdf9a54b303f1d3a3 -2334460328996121693
> rip 0x684cc9 0x684cc9 <apply_noise_main.loop+105>
> eflags 0x10206 [ PF IF RF ]
> cs 0x33 51
> ss 0x2b 43
> ds 0x0 0
> es 0x0 0
> fs 0x0 0
> gs 0x0 0
> st0 -nan(0x0fffb0005) (raw 0xffff00000000fffb0005)
> st1 -nan(0x334fe50ff28fc84) (raw 0xffff0334fe50ff28fc84)
> st2 -nan(0x0ff640150) (raw 0xffff00000000ff640150)
> st3 -nan(0x0005e005a) (raw 0xffff00000000005e005a)
> st4 -nan(0x0ff5bffe7) (raw 0xffff00000000ff5bffe7)
> st5 -nan(0xff63fc2cfe94fee5) (raw 0xffffff63fc2cfe94fee5)
> st6 -nan(0x01c4df38a) (raw 0xffff000000001c4df38a)
> st7 -nan(0x06215436f) (raw 0xffff000000006215436f)
>
Does the attached patch fix it?
-------------- next part --------------
From 14c4b77569af06ae181e521330aef6290f29fca1 Mon Sep 17 00:00:00 2001
From: James Almer <jamrial at gmail.com>
Date: Tue, 4 Jul 2017 15:05:47 -0300
Subject: [PATCH] x86/sbrdsp: zero extend m_max in apply_noise_main
Signed-off-by: James Almer <jamrial at gmail.com>
---
libavcodec/x86/sbrdsp.asm | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index c716184b14..62bbe512ec 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -378,24 +378,24 @@ cglobal sbr_hf_apply_noise_3, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max
apply_noise_main:
%if ARCH_X86_64 == 0 || WIN64
mov kxd, m_maxm
-%define count kxq
+ DEFINE_ARGS Y, s_m, q_filt, noise, count
%else
-%define count m_maxq
+ DEFINE_ARGS Y, s_m, q_filt, noise, kx, count
%endif
movsxdifnidn noiseq, noised
dec noiseq
- shl count, 2
+ shl countd, 2
%ifdef PIC
lea NOISE_TABLE, [sbr_noise_table]
%endif
- lea Yq, [Yq + 2*count]
- add s_mq, count
- add q_filtq, count
+ lea Yq, [Yq + 2*countq]
+ add s_mq, countq
+ add q_filtq, countq
shl noiseq, 3
pxor m5, m5
- neg count
+ neg countq
.loop:
- mova m1, [q_filtq + count]
+ mova m1, [q_filtq + countq]
movu m3, [noiseq + NOISE_TABLE + 1*mmsize]
movu m4, [noiseq + NOISE_TABLE + 2*mmsize]
add noiseq, 2*mmsize
@@ -404,7 +404,7 @@ apply_noise_main:
punpckldq m1, m1
mulps m1, m3 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
mulps m2, m4 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
- mova m3, [s_mq + count]
+ mova m3, [s_mq + countq]
; TODO: replace by a vpermd in AVX2
punpckhdq m4, m3, m3
punpckldq m3, m3
@@ -414,15 +414,15 @@ apply_noise_main:
mulps m4, m0 ; s_m[m] * phi_sign
pand m1, m6
pand m2, m7
- movu m6, [Yq + 2*count]
- movu m7, [Yq + 2*count + mmsize]
+ movu m6, [Yq + 2*countq]
+ movu m7, [Yq + 2*countq + mmsize]
addps m3, m1
addps m4, m2
addps m6, m3
addps m7, m4
- movu [Yq + 2*count], m6
- movu [Yq + 2*count + mmsize], m7
- add count, mmsize
+ movu [Yq + 2*countq], m6
+ movu [Yq + 2*countq + mmsize], m7
+ add countq, mmsize
jl .loop
RET
--
2.13.0
More information about the ffmpeg-devel
mailing list