[FFmpeg-devel] [PATCH 1/3] x86/vf_gblur: fix postscale_slice prologue

James Almer jamrial at gmail.com
Wed Feb 17 18:41:04 EET 2021


x86_32 ABI does not pass float arguments directly on xmm regs, and the Win64
ABI uses only the first four regs for this purpose.

Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavfilter/vf_gblur.c       |  3 +--
 libavfilter/x86/vf_gblur.asm | 29 +++++++++++++----------------
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c
index 109a7a95f9..40956e122d 100644
--- a/libavfilter/vf_gblur.c
+++ b/libavfilter/vf_gblur.c
@@ -234,8 +234,7 @@ void ff_gblur_init(GBlurContext *s)
 {
     s->horiz_slice = horiz_slice_c;
     s->postscale_slice = postscale_c;
-    if (ARCH_X86_64)
-        ff_gblur_init_x86(s);
+    ff_gblur_init_x86(s);
 }
 
 static int config_input(AVFilterLink *inlink)
diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm
index c29ecba889..c2b2998202 100644
--- a/libavfilter/x86/vf_gblur.asm
+++ b/libavfilter/x86/vf_gblur.asm
@@ -185,27 +185,24 @@ HORIZ_SLICE
 %endif
 
 %macro POSTSCALE_SLICE 0
-%if UNIX64
-cglobal postscale_slice, 2, 2, 4, ptr, length
-%else
-cglobal postscale_slice, 5, 5, 4, ptr, length, postscale, min, max
-%endif
+cglobal postscale_slice, 2, 2, 4, ptr, length, postscale, min, max
     shl lengthd, 2
     add ptrq, lengthq
     neg lengthq
-%if WIN64
+%if ARCH_X86_32
+    VBROADCASTSS m0, postscalem
+    VBROADCASTSS m1, minm
+    VBROADCASTSS m2, maxm
+%elif WIN64
     SWAP 0, 2
     SWAP 1, 3
-    SWAP 2, 4
-%endif
-%if cpuflag(avx2)
-    vbroadcastss  m0, xm0
-    vbroadcastss  m1, xm1
-    vbroadcastss  m2, xm2
-%else
-    shufps   xm0, xm0, 0
-    shufps   xm1, xm1, 0
-    shufps   xm2, xm2, 0
+    VBROADCASTSS m0, xm0
+    VBROADCASTSS m1, xm1
+    VBROADCASTSS m2, maxm
+%else ; UNIX64
+    VBROADCASTSS m0, xm0
+    VBROADCASTSS m1, xm1
+    VBROADCASTSS m2, xm3
 %endif
 
     .loop:
-- 
2.30.0



More information about the ffmpeg-devel mailing list