[FFmpeg-devel] [PATCH 1/2] pixblockdsp: x86: Condense diff_pixels_* to a shared macro

Ronald S. Bultje rsbultje at gmail.com
Sat Nov 7 04:11:51 CET 2015


Hi,

On Sun, Nov 1, 2015 at 11:59 AM, Timothy Gu <timothygu99 at gmail.com> wrote:

> ---
>  libavcodec/x86/pixblockdsp.asm | 66
> ++++++++++++++++++++----------------------
>  1 file changed, 31 insertions(+), 35 deletions(-)
>
> diff --git a/libavcodec/x86/pixblockdsp.asm
> b/libavcodec/x86/pixblockdsp.asm
> index 7c5377b..a7d9816 100644
> --- a/libavcodec/x86/pixblockdsp.asm
> +++ b/libavcodec/x86/pixblockdsp.asm
> @@ -80,54 +80,50 @@ cglobal get_pixels, 3, 4, 5
>      mova  [r0+0x70], m3
>      RET
>
> -INIT_MMX mmx
>  ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const
> uint8_t *s2,
>  ;                         int stride);
> -cglobal diff_pixels, 4,5
> -    movsxdifnidn r3, r3d
> -    pxor         m7, m7
> -    add          r0,  128
> -    mov          r4, -128
> -.loop:
> -    mova         m0, [r1]
> -    mova         m2, [r2]
> -    mova         m1, m0
> -    mova         m3, m2
> -    punpcklbw    m0, m7
> -    punpckhbw    m1, m7
> -    punpcklbw    m2, m7
> -    punpckhbw    m3, m7
> -    psubw        m0, m2
> -    psubw        m1, m3
> -    mova  [r0+r4+0], m0
> -    mova  [r0+r4+8], m1
> -    add          r1, r3
> -    add          r2, r3
> -    add          r4, 16
> -    jne .loop
> -    REP_RET
> -
> -INIT_XMM sse2
> -cglobal diff_pixels, 4, 5, 5
> +%macro DIFF_PIXELS 0
> +cglobal diff_pixels, 4,5,5
>      movsxdifnidn r3, r3d
>      pxor         m4, m4
>      add          r0,  128
>      mov          r4, -128
>  .loop:
> -    movh         m0, [r1]
> -    movh         m2, [r2]
> -    movh         m1, [r1+r3]
> -    movh         m3, [r2+r3]
> +    movq         m0, [r1]
> +    movq         m2, [r2]
> +%if mmsize == 8
> +    movq         m1, m0
> +    movq         m3, m2
> +    punpcklbw    m0, m4
> +    punpckhbw    m1, m4
> +    punpcklbw    m2, m4
> +    punpckhbw    m3, m4
> +%else
> +    movq         m1, [r1+r3]
> +    movq         m3, [r2+r3]
>      punpcklbw    m0, m4
>      punpcklbw    m1, m4
>      punpcklbw    m2, m4
>      punpcklbw    m3, m4

+%endif
>      psubw        m0, m2
>      psubw        m1, m3
> -    mova [r0+r4+0 ], m0
> -    mova [r0+r4+16], m1
> +    mova  [r0+r4+0], m0
> +    mova  [r0+r4+mmsize], m1
> +%if mmsize == 8
> +    add          r1, r3
> +    add          r2, r3
> +%else
>      lea          r1, [r1+r3*2]
>      lea          r2, [r2+r3*2]
> -    add          r4, 32
> +%endif
> +    add          r4, 2 * mmsize
>      jne .loop
> -    RET
> +    REP_RET
>

RET. We don't use REP_RET anymore.

Rest is fine.

Ronald


More information about the ffmpeg-devel mailing list