[FFmpeg-devel] [PATCH] x86/vf_w3fdif: 32-bit compatibility for w3fdif_simple_high

Hendrik Leppkes h.leppkes at gmail.com
Fri Jan 8 12:02:14 CET 2016


On Thu, Jan 7, 2016 at 2:13 PM, Hendrik Leppkes <h.leppkes at gmail.com> wrote:
> ---
>  libavfilter/x86/vf_w3fdif.asm    | 35 +++++++++++++++++++++++++++++++++--
>  libavfilter/x86/vf_w3fdif_init.c |  2 +-
>  2 files changed, 34 insertions(+), 3 deletions(-)
>
> diff --git a/libavfilter/x86/vf_w3fdif.asm b/libavfilter/x86/vf_w3fdif.asm
> index c3c73ea..52628c3 100644
> --- a/libavfilter/x86/vf_w3fdif.asm
> +++ b/libavfilter/x86/vf_w3fdif.asm
> @@ -102,14 +102,22 @@ cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize
>  REP_RET
>
>  %if ARCH_X86_64
> -
>  cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
> +%else
> +cglobal w3fdif_simple_high, 4, 7, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
> +%endif
>      movq                  m2, [coefq]
> +%if ARCH_X86_64
>      DEFINE_ARGS    work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, linesize, offset, in_lines_cur2, in_lines_adj1, in_lines_adj2
> +    xor              offsetq, offsetq
> +%else
> +    DEFINE_ARGS    work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, in_lines_cur2, in_lines_adj1, in_lines_adj2
> +    %define linesized r4mp
> +%endif
> +
>      pshufd                m0, m2, q0000
>      SPLATW                m2, m2, 2
>      pxor                  m7, m7
> -    mov              offsetq, 0
>      mov       in_lines_cur2q, [in_lines_cur0q+gprsize*2]
>      mov       in_lines_cur1q, [in_lines_cur0q+gprsize]
>      mov       in_lines_cur0q, [in_lines_cur0q]
> @@ -117,8 +125,21 @@ cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0,
>      mov       in_lines_adj1q, [in_lines_adj0q+gprsize]
>      mov       in_lines_adj0q, [in_lines_adj0q]
>
> +%if ARCH_X86_32
> +    sub in_lines_cur1q, in_lines_cur0q
> +    sub in_lines_cur2q, in_lines_cur0q
> +    sub in_lines_adj0q, in_lines_cur0q
> +    sub in_lines_adj1q, in_lines_cur0q
> +    sub in_lines_adj2q, in_lines_cur0q
> +    %define offsetq in_lines_cur0q
> +%endif
> +
>  .loop:
> +%if ARCH_X86_64
>      movh                                   m3, [in_lines_cur0q+offsetq]
> +%else
> +    movh                                   m3, [in_lines_cur0q]
> +%endif
>      movh                                   m4, [in_lines_cur1q+offsetq]
>      punpcklbw                              m3, m7
>      punpcklbw                              m4, m7
> @@ -143,15 +164,25 @@ cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0,
>      pmaddwd                                m6, m2
>      paddd                                  m3, m5
>      paddd                                  m4, m6
> +%if ARCH_X86_64
>      paddd                                  m3, [work_lineq+offsetq*4]
>      paddd                                  m4, [work_lineq+offsetq*4+mmsize]
>      mova               [work_lineq+offsetq*4], m3
>      mova        [work_lineq+offsetq*4+mmsize], m4
> +%else
> +    paddd                                  m3, [work_lineq]
> +    paddd                                  m4, [work_lineq+mmsize]
> +    mova                         [work_lineq], m3
> +    mova                  [work_lineq+mmsize], m4
> +    add                            work_lineq, mmsize*2
> +%endif
>      add                               offsetq, mmsize/2
>      sub                             linesized, mmsize/2
>      jg .loop
>  REP_RET
>
> +%if ARCH_X86_64
> +
>  cglobal w3fdif_complex_high, 5, 13, 10, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
>      movq                  m0, [coefq+0]
>      movd                  m4, [coefq+8]
> diff --git a/libavfilter/x86/vf_w3fdif_init.c b/libavfilter/x86/vf_w3fdif_init.c
> index 72ea657..9bf06e8 100644
> --- a/libavfilter/x86/vf_w3fdif_init.c
> +++ b/libavfilter/x86/vf_w3fdif_init.c
> @@ -51,12 +51,12 @@ av_cold void ff_w3fdif_init_x86(W3FDIFDSPContext *dsp)
>
>      if (EXTERNAL_SSE2(cpu_flags)) {
>          dsp->filter_simple_low   = ff_w3fdif_simple_low_sse2;
> +        dsp->filter_simple_high  = ff_w3fdif_simple_high_sse2;
>          dsp->filter_complex_low  = ff_w3fdif_complex_low_sse2;
>          dsp->filter_scale        = ff_w3fdif_scale_sse2;
>      }
>
>      if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) {
> -        dsp->filter_simple_high  = ff_w3fdif_simple_high_sse2;
>          dsp->filter_complex_high = ff_w3fdif_complex_high_sse2;
>      }
>  }
> --
> 2.6.2.windows.1
>

Applied.


More information about the ffmpeg-devel mailing list