[FFmpeg-cvslog] avfilter/x86/vf_threshold : add threshold16 SIMD (SSE4 and AVX2)
    Martin Vignali 
    git at videolan.org
       
    Sat Dec  9 15:48:46 EET 2017
    
    
  
ffmpeg | branch: master | Martin Vignali <martin.vignali at gmail.com> | Thu Dec  7 22:01:54 2017 +0100| [869efbf971208faccfdd88680178afaf5b1d4e77] | committer: Martin Vignali
avfilter/x86/vf_threshold : add threshold16 SIMD (SSE4 and AVX2)
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=869efbf971208faccfdd88680178afaf5b1d4e77
---
 libavfilter/x86/vf_threshold.asm    | 21 ++++++++++++++-------
 libavfilter/x86/vf_threshold_init.c | 34 ++++++++++++++++++++--------------
 2 files changed, 34 insertions(+), 21 deletions(-)
diff --git a/libavfilter/x86/vf_threshold.asm b/libavfilter/x86/vf_threshold.asm
index 56a6c242d8..098069b083 100644
--- a/libavfilter/x86/vf_threshold.asm
+++ b/libavfilter/x86/vf_threshold.asm
@@ -25,16 +25,18 @@
 SECTION_RODATA
 
 pb_128: times 16 db 128
+pb_128_0 : times 8 db 0, 128
 
 SECTION .text
 
-%macro THRESHOLD_8 0
+;%1 depth (8 or 16) ; %2 b or w ; %3 constant
+%macro THRESHOLD 3
 %if ARCH_X86_64
-cglobal threshold8, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x
+cglobal threshold%1, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x
     mov             wd, dword wm
     mov             hd, dword hm
 %else
-cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
+cglobal threshold%1, 5, 7, 5, in, threshold, min, max, out, w, x
     mov             wd, r10m
 %define     ilinesizeq  r5mp
 %define     tlinesizeq  r6mp
@@ -43,7 +45,10 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
 %define     olinesizeq  r9mp
 %define             hd  r11mp
 %endif
-    VBROADCASTI128  m4, [pb_128]
+    VBROADCASTI128  m4, [%3]
+%if %1 == 16
+    add             wq, wq ; w *= 2 (16 bits instead of 8)
+%endif
     add            inq, wq
     add     thresholdq, wq
     add           minq, wq
@@ -60,7 +65,7 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
         movu            m3, [maxq + xq]
         pxor            m0, m4
         pxor            m1, m4
-        pcmpgtb         m0, m1
+        pcmpgt%2        m0, m1
         PBLENDVB        m3, m2, m0
         movu   [outq + xq], m3
         add             xq, mmsize
@@ -77,9 +82,11 @@ RET
 %endmacro
 
 INIT_XMM sse4
-THRESHOLD_8
+THRESHOLD 8, b, pb_128
+THRESHOLD 16, w, pb_128_0
 
 %if HAVE_AVX2_EXTERNAL
 INIT_YMM avx2
-THRESHOLD_8
+THRESHOLD 8, b, pb_128
+THRESHOLD 16, w, pb_128_0
 %endif
diff --git a/libavfilter/x86/vf_threshold_init.c b/libavfilter/x86/vf_threshold_init.c
index db0559533d..8e42296791 100644
--- a/libavfilter/x86/vf_threshold_init.c
+++ b/libavfilter/x86/vf_threshold_init.c
@@ -23,20 +23,19 @@
 #include "libavutil/x86/cpu.h"
 #include "libavfilter/threshold.h"
 
-void ff_threshold8_sse4(const uint8_t *in, const uint8_t *threshold,
-                        const uint8_t *min, const uint8_t *max,
-                        uint8_t *out,
-                        ptrdiff_t ilinesize, ptrdiff_t tlinesize,
-                        ptrdiff_t flinesize, ptrdiff_t slinesize,
-                        ptrdiff_t olinesize,
-                        int w, int h);
-void ff_threshold8_avx2(const uint8_t *in, const uint8_t *threshold,
-                        const uint8_t *min, const uint8_t *max,
-                        uint8_t *out,
-                        ptrdiff_t ilinesize, ptrdiff_t tlinesize,
-                        ptrdiff_t flinesize, ptrdiff_t slinesize,
-                        ptrdiff_t olinesize,
-                        int w, int h);
+#define THRESHOLD_FUNC(depth, opt) \
+void ff_threshold##depth##_##opt(const uint8_t *in, const uint8_t *threshold,\
+                                const uint8_t *min, const uint8_t *max,     \
+                                uint8_t *out,                               \
+                                ptrdiff_t ilinesize, ptrdiff_t tlinesize,   \
+                                ptrdiff_t flinesize, ptrdiff_t slinesize,   \
+                                ptrdiff_t olinesize,                        \
+                                int w, int h);
+
+THRESHOLD_FUNC(8, sse4)
+THRESHOLD_FUNC(8, avx2)
+THRESHOLD_FUNC(16, sse4)
+THRESHOLD_FUNC(16, avx2)
 
 av_cold void ff_threshold_init_x86(ThresholdContext *s)
 {
@@ -49,5 +48,12 @@ av_cold void ff_threshold_init_x86(ThresholdContext *s)
         if (EXTERNAL_AVX2_FAST(cpu_flags)) {
             s->threshold = ff_threshold8_avx2;
         }
+    } else if (s->depth == 16) {
+        if (EXTERNAL_SSE4(cpu_flags)) {
+            s->threshold = ff_threshold16_sse4;
+        }
+        if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+            s->threshold = ff_threshold16_avx2;
+        }
     }
 }
    
    
More information about the ffmpeg-cvslog
mailing list