[FFmpeg-devel] [PATCH 1/2] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512

Alan Kelly alankelly at google.com
Wed Sep 6 17:24:29 EEST 2023


---
 libswscale/x86/swscale.c    |  7 +++++++
 libswscale/x86/yuv2yuvX.asm | 19 ++++++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index ff16398988..00e42b4bec 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -225,6 +225,9 @@ YUV2YUVX_FUNC(sse3, 32)
 #if HAVE_AVX2_EXTERNAL
 YUV2YUVX_FUNC(avx2, 64)
 #endif
+#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
+YUV2YUVX_FUNC(avx512, 128)
+#endif
 
 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
@@ -467,6 +470,10 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c)
 #if HAVE_AVX2_EXTERNAL
         if (EXTERNAL_AVX2_FAST(cpu_flags))
             c->yuv2planeX = yuv2yuvX_avx2;
+#endif
+#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
+        if (EXTERNAL_AVX512ICL(cpu_flags))
+            c->yuv2planeX = yuv2yuvX_avx512;
 #endif
     }
 #if ARCH_X86_32 && !HAVE_ALIGNED_STACK
diff --git a/libswscale/x86/yuv2yuvX.asm b/libswscale/x86/yuv2yuvX.asm
index 369c850674..57bfa09d66 100644
--- a/libswscale/x86/yuv2yuvX.asm
+++ b/libswscale/x86/yuv2yuvX.asm
@@ -22,6 +22,10 @@
 
 %include "libavutil/x86/x86util.asm"
 
+SECTION_RODATA 64
+
+permutation: dq 0, 2, 4, 6, 1, 3, 5, 7
+
 SECTION .text
 
 ;-----------------------------------------------------------------------------
@@ -50,6 +54,10 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
 %else
     movq                 xm3, [ditherq]
 %endif ; avx2
+
+%if cpuflag(avx512)
+    mova                 m15, [permutation]
+%endif
     cmp                  offsetd, 0
     jz                   .offset
 
@@ -109,7 +117,10 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
     packuswb             m6, m6, m1
 %endif
     mov                  srcq, [filterq]
-%if cpuflag(avx2)
+%if cpuflag(avx512)
+    vpermt2q             m3, m15, m3
+    vpermt2q             m6, m15, m6
+%elif cpuflag(avx2)
     vpermq               m3, m3, 216
     vpermq               m6, m6, 216
 %endif
@@ -131,4 +142,10 @@ YUV2YUVX_FUNC
 %if HAVE_AVX2_EXTERNAL
 INIT_YMM avx2
 YUV2YUVX_FUNC
+%if HAVE_AVX512_EXTERNAL
+%if ARCH_X86_64
+INIT_ZMM avx512
+YUV2YUVX_FUNC
+%endif
+%endif
 %endif
-- 
2.42.0.283.g2d96d420d3-goog



More information about the ffmpeg-devel mailing list