[FFmpeg-cvslog] avfilter/x86/vf_interlace : add AVX2 version

Martin Vignali git at videolan.org
Thu Jan 11 22:04:47 EET 2018


ffmpeg | branch: master | Martin Vignali <martin.vignali at gmail.com> | Sat Dec 30 19:30:56 2017 +0100| [b94cd55155d8c061f1e1faca9076afe540149c27] | committer: Martin Vignali

avfilter/x86/vf_interlace : add AVX2 version

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b94cd55155d8c061f1e1faca9076afe540149c27
---

 libavfilter/x86/vf_interlace.asm     | 23 ++++++++++++++++++++++-
 libavfilter/x86/vf_interlace_init.c  | 12 ++++++++++++
 libavfilter/x86/vf_tinterlace_init.c | 16 ++++++++++++++++
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm
index 06b269828a..a6c65b805d 100644
--- a/libavfilter/x86/vf_interlace.asm
+++ b/libavfilter/x86/vf_interlace.asm
@@ -39,6 +39,20 @@ SECTION .text
 
     pcmpeq%1 m6, m6
 
+    test hq, mmsize
+    je .loop
+
+    ;process 1 * mmsize
+    movu m0, [mrefq+hq]
+    pavg%1 m0, [prefq+hq]
+    pxor m0, m6
+    pxor m2, m6, [srcq+hq]
+    pavg%1 m0, m2
+    pxor m0, m6
+    mova [dstq+hq], m0
+    add hq, mmsize
+    jge .end
+
 .loop:
     movu m0, [mrefq+hq]
     movu m1, [mrefq+hq+mmsize]
@@ -57,7 +71,9 @@ SECTION .text
 
     add hq, 2*mmsize
     jl .loop
-REP_RET
+
+.end:
+    REP_RET
 %endmacro
 
 %macro LOWPASS_LINE 0
@@ -201,5 +217,10 @@ LOWPASS_LINE
 INIT_XMM avx
 LOWPASS_LINE
 
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+LOWPASS_LINE
+%endif
+
 INIT_XMM sse2
 LOWPASS_LINE_COMPLEX
diff --git a/libavfilter/x86/vf_interlace_init.c b/libavfilter/x86/vf_interlace_init.c
index b024b61735..0de0fea382 100644
--- a/libavfilter/x86/vf_interlace_init.c
+++ b/libavfilter/x86/vf_interlace_init.c
@@ -32,6 +32,9 @@ void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize,
 void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
                           const uint8_t *srcp, ptrdiff_t mref,
                           ptrdiff_t pref, int clip_max);
+void ff_lowpass_line_avx2 (uint8_t *dstp, ptrdiff_t linesize,
+                          const uint8_t *srcp, ptrdiff_t mref,
+                          ptrdiff_t pref, int clip_max);
 
 void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
                              const uint8_t *srcp, ptrdiff_t mref,
@@ -39,6 +42,9 @@ void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
 void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize,
                              const uint8_t *srcp, ptrdiff_t mref,
                              ptrdiff_t pref, int clip_max);
+void ff_lowpass_line_16_avx2 (uint8_t *dstp, ptrdiff_t linesize,
+                             const uint8_t *srcp, ptrdiff_t mref,
+                             ptrdiff_t pref, int clip_max);
 
 void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
                                   const uint8_t *srcp, ptrdiff_t mref,
@@ -62,6 +68,9 @@ av_cold void ff_interlace_init_x86(InterlaceContext *s, int depth)
         if (EXTERNAL_AVX(cpu_flags))
             if (s->lowpass == VLPF_LIN)
                 s->lowpass_line = ff_lowpass_line_16_avx;
+        if (EXTERNAL_AVX2_FAST(cpu_flags))
+            if (s->lowpass == VLPF_LIN)
+                s->lowpass_line = ff_lowpass_line_16_avx2;
     } else {
         if (EXTERNAL_SSE2(cpu_flags)) {
             if (s->lowpass == VLPF_LIN)
@@ -72,5 +81,8 @@ av_cold void ff_interlace_init_x86(InterlaceContext *s, int depth)
         if (EXTERNAL_AVX(cpu_flags))
             if (s->lowpass == VLPF_LIN)
                 s->lowpass_line = ff_lowpass_line_avx;
+        if (EXTERNAL_AVX2_FAST(cpu_flags))
+            if (s->lowpass == VLPF_LIN)
+                s->lowpass_line = ff_lowpass_line_avx2;
     }
 }
diff --git a/libavfilter/x86/vf_tinterlace_init.c b/libavfilter/x86/vf_tinterlace_init.c
index 209812964d..2c9b1de581 100644
--- a/libavfilter/x86/vf_tinterlace_init.c
+++ b/libavfilter/x86/vf_tinterlace_init.c
@@ -33,6 +33,9 @@ void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize,
 void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
                           const uint8_t *srcp, ptrdiff_t mref,
                           ptrdiff_t pref, int clip_max);
+void ff_lowpass_line_avx2 (uint8_t *dstp, ptrdiff_t linesize,
+                          const uint8_t *srcp, ptrdiff_t mref,
+                          ptrdiff_t pref, int clip_max);
 
 void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
                              const uint8_t *srcp, ptrdiff_t mref,
@@ -40,6 +43,9 @@ void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
 void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize,
                              const uint8_t *srcp, ptrdiff_t mref,
                              ptrdiff_t pref, int clip_max);
+void ff_lowpass_line_16_avx2 (uint8_t *dstp, ptrdiff_t linesize,
+                             const uint8_t *srcp, ptrdiff_t mref,
+                             ptrdiff_t pref, int clip_max);
 
 void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
                                   const uint8_t *srcp, ptrdiff_t mref,
@@ -63,6 +69,11 @@ av_cold void ff_tinterlace_init_x86(TInterlaceContext *s)
         if (EXTERNAL_AVX(cpu_flags))
             if (!(s->flags & TINTERLACE_FLAG_CVLPF))
                 s->lowpass_line = ff_lowpass_line_16_avx;
+        if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+            if (!(s->flags & TINTERLACE_FLAG_CVLPF)) {
+                s->lowpass_line = ff_lowpass_line_16_avx2;
+            }
+        }
     } else {
         if (EXTERNAL_SSE2(cpu_flags)) {
             if (!(s->flags & TINTERLACE_FLAG_CVLPF))
@@ -73,5 +84,10 @@ av_cold void ff_tinterlace_init_x86(TInterlaceContext *s)
         if (EXTERNAL_AVX(cpu_flags))
             if (!(s->flags & TINTERLACE_FLAG_CVLPF))
                 s->lowpass_line = ff_lowpass_line_avx;
+        if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+            if (!(s->flags & TINTERLACE_FLAG_CVLPF)) {
+                s->lowpass_line = ff_lowpass_line_avx2;
+            }
+        }
     }
 }



More information about the ffmpeg-cvslog mailing list