[FFmpeg-devel] [PATCH 14/15] vp9/x86: make filter_48/84/88_h work on 32-bit.

Clément Bœsch u at pkh.me
Sat Dec 27 19:32:00 CET 2014


On Sat, Dec 27, 2014 at 11:02:49AM -0500, Ronald S. Bultje wrote:
> ---
>  libavcodec/x86/vp9dsp_init.c | 12 +++------
>  libavcodec/x86/vp9lpf.asm    | 62 ++++++++++++++++++++++++++++++++------------
>  2 files changed, 48 insertions(+), 26 deletions(-)
> 
> diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
> index c5decfa..cdbf1e9 100644
> --- a/libavcodec/x86/vp9dsp_init.c
> +++ b/libavcodec/x86/vp9dsp_init.c
> @@ -355,17 +355,11 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
>      dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
>      dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
>      dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
> -    if (ARCH_X86_64) { \
> -        dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
> -    } \
> +    dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
>      dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
> -    if (ARCH_X86_64) { \
> -        dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
> -    } \
> +    dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
>      dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
> -    if (ARCH_X86_64) { \
> -        dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
> -    } \
> +    dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
>      dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
>  } while (0)
>  
> diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
> index c111f48..b8c75e9 100644
> --- a/libavcodec/x86/vp9lpf.asm
> +++ b/libavcodec/x86/vp9lpf.asm
> @@ -935,9 +935,12 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
>      mova                    m3, [P0]
>      mova                    m4, [Q0]
>      mova                    m5, [Q1]
> +%if ARCH_X86_64
>      mova                    m6, [Q2]
> +%endif
>      mova                    m7, [Q3]
>      DEFINE_REAL_P7_TO_Q7
> +%if ARCH_X86_64
>      SBUTTERFLY  bw,  0,  1, 8
>      SBUTTERFLY  bw,  2,  3, 8
>      SBUTTERFLY  bw,  4,  5, 8
> @@ -950,22 +953,47 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
>      SBUTTERFLY  dq,  1,  5, 8
>      SBUTTERFLY  dq,  2,  6, 8
>      SBUTTERFLY  dq,  3,  7, 8
> -    movh   [P7], m0
> -    movhps [P6], m0
> -    movh   [Q0], m1
> -    movhps [Q1], m1
> -    movh   [P3], m2
> -    movhps [P2], m2
> -    movh   [Q4], m3
> -    movhps [Q5], m3
> -    movh   [P5], m4
> -    movhps [P4], m4
> -    movh   [Q2], m5
> -    movhps [Q3], m5
> -    movh   [P1], m6
> -    movhps [P0], m6
> -    movh   [Q6], m7
> -    movhps [Q7], m7
> +%else
> +    SBUTTERFLY  bw,  0,  1, 6
> +    mova  [rsp+64], m1
> +    mova        m6, [rsp+96]
> +    SBUTTERFLY  bw,  2,  3, 1
> +    SBUTTERFLY  bw,  4,  5, 1
> +    SBUTTERFLY  bw,  6,  7, 1
> +    SBUTTERFLY  wd,  0,  2, 1
> +    mova  [rsp+96], m2
> +    mova        m1, [rsp+64]
> +    SBUTTERFLY  wd,  1,  3, 2
> +    SBUTTERFLY  wd,  4,  6, 2
> +    SBUTTERFLY  wd,  5,  7, 2
> +    SBUTTERFLY  dq,  0,  4, 2
> +    SBUTTERFLY  dq,  1,  5, 2
> +    movh      [Q0], m1
> +    movhps    [Q1], m1
> +    mova        m2, [rsp+96]
> +    SBUTTERFLY  dq,  2,  6, 1
> +    SBUTTERFLY  dq,  3,  7, 1
> +%endif
> +    SWAP         3, 6
> +    SWAP         1, 4
> +    movh      [P7], m0
> +    movhps    [P6], m0
> +    movh      [P5], m1
> +    movhps    [P4], m1
> +    movh      [P3], m2
> +    movhps    [P2], m2
> +    movh      [P1], m3
> +    movhps    [P0], m3
> +%if ARCH_X86_64
> +    movh      [Q0], m4
> +    movhps    [Q1], m4
> +%endif
> +    movh      [Q2], m5
> +    movhps    [Q3], m5
> +    movh      [Q4], m6
> +    movhps    [Q5], m6
> +    movh      [Q6], m7
> +    movhps    [Q7], m7
>  %endif
>  %endif
>  
> @@ -975,7 +1003,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
>  %macro LPF_16_VH 5
>  INIT_XMM %5
>  LOOPFILTER v, %1, %2,  0, %4
> -%if ARCH_X86_64 || %1 == 44
> +%if ARCH_X86_64 || %1 != 16
>  LOOPFILTER h, %1, %2, %3, %4
>  %endif
>  %endmacro

OK

-- 
Clément B.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 473 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20141227/529049fb/attachment.asc>


More information about the ffmpeg-devel mailing list