[FFmpeg-devel] [PATCH 1/2] pixblockdsp: x86: Condense diff_pixels_* to a shared macro
Ronald S. Bultje
rsbultje at gmail.com
Sat Nov 7 04:11:51 CET 2015
Hi,
On Sun, Nov 1, 2015 at 11:59 AM, Timothy Gu <timothygu99 at gmail.com> wrote:
> ---
> libavcodec/x86/pixblockdsp.asm | 66
> ++++++++++++++++++++----------------------
> 1 file changed, 31 insertions(+), 35 deletions(-)
>
> diff --git a/libavcodec/x86/pixblockdsp.asm
> b/libavcodec/x86/pixblockdsp.asm
> index 7c5377b..a7d9816 100644
> --- a/libavcodec/x86/pixblockdsp.asm
> +++ b/libavcodec/x86/pixblockdsp.asm
> @@ -80,54 +80,50 @@ cglobal get_pixels, 3, 4, 5
> mova [r0+0x70], m3
> RET
>
> -INIT_MMX mmx
> ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const
> uint8_t *s2,
> ; int stride);
> -cglobal diff_pixels, 4,5
> - movsxdifnidn r3, r3d
> - pxor m7, m7
> - add r0, 128
> - mov r4, -128
> -.loop:
> - mova m0, [r1]
> - mova m2, [r2]
> - mova m1, m0
> - mova m3, m2
> - punpcklbw m0, m7
> - punpckhbw m1, m7
> - punpcklbw m2, m7
> - punpckhbw m3, m7
> - psubw m0, m2
> - psubw m1, m3
> - mova [r0+r4+0], m0
> - mova [r0+r4+8], m1
> - add r1, r3
> - add r2, r3
> - add r4, 16
> - jne .loop
> - REP_RET
> -
> -INIT_XMM sse2
> -cglobal diff_pixels, 4, 5, 5
> +%macro DIFF_PIXELS 0
> +cglobal diff_pixels, 4,5,5
> movsxdifnidn r3, r3d
> pxor m4, m4
> add r0, 128
> mov r4, -128
> .loop:
> - movh m0, [r1]
> - movh m2, [r2]
> - movh m1, [r1+r3]
> - movh m3, [r2+r3]
> + movq m0, [r1]
> + movq m2, [r2]
> +%if mmsize == 8
> + movq m1, m0
> + movq m3, m2
> + punpcklbw m0, m4
> + punpckhbw m1, m4
> + punpcklbw m2, m4
> + punpckhbw m3, m4
> +%else
> + movq m1, [r1+r3]
> + movq m3, [r2+r3]
> punpcklbw m0, m4
> punpcklbw m1, m4
> punpcklbw m2, m4
> punpcklbw m3, m4
+%endif
> psubw m0, m2
> psubw m1, m3
> - mova [r0+r4+0 ], m0
> - mova [r0+r4+16], m1
> + mova [r0+r4+0], m0
> + mova [r0+r4+mmsize], m1
> +%if mmsize == 8
> + add r1, r3
> + add r2, r3
> +%else
> lea r1, [r1+r3*2]
> lea r2, [r2+r3*2]
> - add r4, 32
> +%endif
> + add r4, 2 * mmsize
> jne .loop
> - RET
> + REP_RET
>
RET. We don't use REP_RET anymore.
Rest is fine.
Ronald
More information about the ffmpeg-devel
mailing list