[FFmpeg-devel] [PATCH] avfilter: add hflip x86 SIMD

Martin Vignali martin.vignali at gmail.com
Sun Dec 3 21:48:38 EET 2017


Maybe the problem come from the skip part :

+INIT_XMM ssse3
> +cglobal hflip_byte, 3, 5, 3, src, dst, w, x, v
> +    mova    m0, [pb_flip_byte]
> +    mov     xq, 0
> +    mov     wd, dword wm
> +    sub     wq, 2 * mmsize
> +    cmp     wq, mmsize
> +    jl .skip
> +
> +    .loop0:
> +        neg     xq
> +        movu    m1, [srcq + xq -     mmsize + 1]
> +        movu    m2, [srcq + xq - 2 * mmsize + 1]
> +        pshufb  m1, m0
> +        pshufb  m2, m0
> +        neg     xq
> +        movu    [dstq + xq         ], m1
> +        movu    [dstq + xq + mmsize], m2
> +        add     xq, mmsize * 2
> +        cmp     xq, wq
> +        jl .loop0
> +
> +.skip:
> +    add     wq, 2 * mmsize
>

==> use xq instead of wq ?


> +    .loop1:
> +        neg    xq
> +        mov    vb, [srcq + xq]
> +        neg    xq
> +        mov    [dstq + xq], vb
> +        add    xq, 1
> +        cmp    xq, wq
> +        jl .loop1
> +RET
> +
> +cglobal hflip_short, 3, 5, 3, src, dst, w, x, v
> +    mova    m0, [pb_flip_short]
> +    mov     xq, 0
> +    mov     wd, dword wm
> +    add     wq, wq
> +    sub     wq, 2 * mmsize
> +    cmp     wq, mmsize
> +    jl .skip
> +
> +    .loop0:
> +        neg     xq
> +        movu    m1, [srcq + xq -     mmsize + 2]
> +        movu    m2, [srcq + xq - 2 * mmsize + 2]
> +        pshufb  m1, m0
> +        pshufb  m2, m0
> +        neg     xq
> +        movu    [dstq + xq         ], m1
> +        movu    [dstq + xq + mmsize], m2
> +        add     xq, mmsize
> +        cmp     xq, wq
> +        jl .loop0
> +
> +.skip:
> +    add     wq, 2 * mmsize
>


==> same here ?


+    .loop1:
> +        neg    xq
> +        mov    vw, [srcq + xq]
> +        neg    xq
> +        mov    [dstq + xq], vw
> +        add    xq, 2
> +        cmp    xq, wq
> +        jl .loop1
> +RET
>


More information about the ffmpeg-devel mailing list