[FFmpeg-devel] [PATCH] x86/vf_w3fdif: 32-bit compatibility for w3fdif_simple_high

Hendrik Leppkes h.leppkes at gmail.com
Thu Jan 7 03:54:30 CET 2016


---
Based on an idea from Ronald mentioend in an earlier thread about this function.

It works and passes FATE, however I'm sure some aspects can be done easier or cleaner, so please let me know.


 libavfilter/x86/vf_w3fdif.asm    | 37 ++++++++++++++++++++++++++++++++++---
 libavfilter/x86/vf_w3fdif_init.c |  2 +-
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/libavfilter/x86/vf_w3fdif.asm b/libavfilter/x86/vf_w3fdif.asm
index c3c73ea..35768c3 100644
--- a/libavfilter/x86/vf_w3fdif.asm
+++ b/libavfilter/x86/vf_w3fdif.asm
@@ -102,14 +102,22 @@ cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize
 REP_RET
 
 %if ARCH_X86_64
-
 cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
+%else
+cglobal w3fdif_simple_high, 4, 7, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
+%endif
     movq                  m2, [coefq]
-    DEFINE_ARGS    work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, linesize, offset, in_lines_cur2, in_lines_adj1, in_lines_adj2
+%if ARCH_X86_64
+    DEFINE_ARGS    work_line, in_lines_cur0, in_lines_adj0, linesize, offset, in_lines_cur1, in_lines_cur2, in_lines_adj1, in_lines_adj2
+    mov              offsetq, 0
+%else
+    DEFINE_ARGS    work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, in_lines_cur2, in_lines_adj1, in_lines_adj2
+    %define linesized dword r4m
+%endif
+
     pshufd                m0, m2, q0000
     SPLATW                m2, m2, 2
     pxor                  m7, m7
-    mov              offsetq, 0
     mov       in_lines_cur2q, [in_lines_cur0q+gprsize*2]
     mov       in_lines_cur1q, [in_lines_cur0q+gprsize]
     mov       in_lines_cur0q, [in_lines_cur0q]
@@ -117,8 +125,21 @@ cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0,
     mov       in_lines_adj1q, [in_lines_adj0q+gprsize]
     mov       in_lines_adj0q, [in_lines_adj0q]
 
+%if ARCH_X86_32
+    sub in_lines_cur1q, in_lines_cur0q
+    sub in_lines_cur2q, in_lines_cur0q
+    sub in_lines_adj0q, in_lines_cur0q
+    sub in_lines_adj1q, in_lines_cur0q
+    sub in_lines_adj2q, in_lines_cur0q
+    %define offsetq in_lines_cur0q
+%endif
+
 .loop:
+%if ARCH_X86_64
     movh                                   m3, [in_lines_cur0q+offsetq]
+%else
+    movh                                   m3, [in_lines_cur0q]
+%endif
     movh                                   m4, [in_lines_cur1q+offsetq]
     punpcklbw                              m3, m7
     punpcklbw                              m4, m7
@@ -143,15 +164,25 @@ cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0,
     pmaddwd                                m6, m2
     paddd                                  m3, m5
     paddd                                  m4, m6
+%if ARCH_X86_64
     paddd                                  m3, [work_lineq+offsetq*4]
     paddd                                  m4, [work_lineq+offsetq*4+mmsize]
     mova               [work_lineq+offsetq*4], m3
     mova        [work_lineq+offsetq*4+mmsize], m4
+%else
+    paddd                                  m3, [work_lineq]
+    paddd                                  m4, [work_lineq+mmsize]
+    mova                         [work_lineq], m3
+    mova                  [work_lineq+mmsize], m4
+    add                            work_lineq, mmsize*2
+%endif
     add                               offsetq, mmsize/2
     sub                             linesized, mmsize/2
     jg .loop
 REP_RET
 
+%if ARCH_X86_64
+
 cglobal w3fdif_complex_high, 5, 13, 10, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
     movq                  m0, [coefq+0]
     movd                  m4, [coefq+8]
diff --git a/libavfilter/x86/vf_w3fdif_init.c b/libavfilter/x86/vf_w3fdif_init.c
index 72ea657..9bf06e8 100644
--- a/libavfilter/x86/vf_w3fdif_init.c
+++ b/libavfilter/x86/vf_w3fdif_init.c
@@ -51,12 +51,12 @@ av_cold void ff_w3fdif_init_x86(W3FDIFDSPContext *dsp)
 
     if (EXTERNAL_SSE2(cpu_flags)) {
         dsp->filter_simple_low   = ff_w3fdif_simple_low_sse2;
+        dsp->filter_simple_high  = ff_w3fdif_simple_high_sse2;
         dsp->filter_complex_low  = ff_w3fdif_complex_low_sse2;
         dsp->filter_scale        = ff_w3fdif_scale_sse2;
     }
 
     if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) {
-        dsp->filter_simple_high  = ff_w3fdif_simple_high_sse2;
         dsp->filter_complex_high = ff_w3fdif_complex_high_sse2;
     }
 }
-- 
2.6.2.windows.1



More information about the ffmpeg-devel mailing list