[FFmpeg-devel] [PATCH] avfilter: add hflip x86 SIMD
Martin Vignali
martin.vignali at gmail.com
Sun Dec 3 21:48:38 EET 2017
Maybe the problem come from the skip part :
+INIT_XMM ssse3
> +cglobal hflip_byte, 3, 5, 3, src, dst, w, x, v
> + mova m0, [pb_flip_byte]
> + mov xq, 0
> + mov wd, dword wm
> + sub wq, 2 * mmsize
> + cmp wq, mmsize
> + jl .skip
> +
> + .loop0:
> + neg xq
> + movu m1, [srcq + xq - mmsize + 1]
> + movu m2, [srcq + xq - 2 * mmsize + 1]
> + pshufb m1, m0
> + pshufb m2, m0
> + neg xq
> + movu [dstq + xq ], m1
> + movu [dstq + xq + mmsize], m2
> + add xq, mmsize * 2
> + cmp xq, wq
> + jl .loop0
> +
> +.skip:
> + add wq, 2 * mmsize
>
==> use xq instead of wq ?
> + .loop1:
> + neg xq
> + mov vb, [srcq + xq]
> + neg xq
> + mov [dstq + xq], vb
> + add xq, 1
> + cmp xq, wq
> + jl .loop1
> +RET
> +
> +cglobal hflip_short, 3, 5, 3, src, dst, w, x, v
> + mova m0, [pb_flip_short]
> + mov xq, 0
> + mov wd, dword wm
> + add wq, wq
> + sub wq, 2 * mmsize
> + cmp wq, mmsize
> + jl .skip
> +
> + .loop0:
> + neg xq
> + movu m1, [srcq + xq - mmsize + 2]
> + movu m2, [srcq + xq - 2 * mmsize + 2]
> + pshufb m1, m0
> + pshufb m2, m0
> + neg xq
> + movu [dstq + xq ], m1
> + movu [dstq + xq + mmsize], m2
> + add xq, mmsize
> + cmp xq, wq
> + jl .loop0
> +
> +.skip:
> + add wq, 2 * mmsize
>
==> same here ?
+ .loop1:
> + neg xq
> + mov vw, [srcq + xq]
> + neg xq
> + mov [dstq + xq], vw
> + add xq, 2
> + cmp xq, wq
> + jl .loop1
> +RET
>
More information about the ffmpeg-devel
mailing list