[FFmpeg-devel] [PATCH 1/3] x86/vf_gblur: fix postscale_slice prologue
James Almer
jamrial at gmail.com
Wed Feb 17 18:41:04 EET 2021
x86_32 ABI does not pass float arguments directly on xmm regs, and the Win64
ABI uses only the first four regs for this purpose.
Signed-off-by: James Almer <jamrial at gmail.com>
---
libavfilter/vf_gblur.c | 3 +--
libavfilter/x86/vf_gblur.asm | 29 +++++++++++++----------------
2 files changed, 14 insertions(+), 18 deletions(-)
diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c
index 109a7a95f9..40956e122d 100644
--- a/libavfilter/vf_gblur.c
+++ b/libavfilter/vf_gblur.c
@@ -234,8 +234,7 @@ void ff_gblur_init(GBlurContext *s)
{
s->horiz_slice = horiz_slice_c;
s->postscale_slice = postscale_c;
- if (ARCH_X86_64)
- ff_gblur_init_x86(s);
+ ff_gblur_init_x86(s);
}
static int config_input(AVFilterLink *inlink)
diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm
index c29ecba889..c2b2998202 100644
--- a/libavfilter/x86/vf_gblur.asm
+++ b/libavfilter/x86/vf_gblur.asm
@@ -185,27 +185,24 @@ HORIZ_SLICE
%endif
%macro POSTSCALE_SLICE 0
-%if UNIX64
-cglobal postscale_slice, 2, 2, 4, ptr, length
-%else
-cglobal postscale_slice, 5, 5, 4, ptr, length, postscale, min, max
-%endif
+cglobal postscale_slice, 2, 2, 4, ptr, length, postscale, min, max
shl lengthd, 2
add ptrq, lengthq
neg lengthq
-%if WIN64
+%if ARCH_X86_32
+ VBROADCASTSS m0, postscalem
+ VBROADCASTSS m1, minm
+ VBROADCASTSS m2, maxm
+%elif WIN64
SWAP 0, 2
SWAP 1, 3
- SWAP 2, 4
-%endif
-%if cpuflag(avx2)
- vbroadcastss m0, xm0
- vbroadcastss m1, xm1
- vbroadcastss m2, xm2
-%else
- shufps xm0, xm0, 0
- shufps xm1, xm1, 0
- shufps xm2, xm2, 0
+ VBROADCASTSS m0, xm0
+ VBROADCASTSS m1, xm1
+ VBROADCASTSS m2, maxm
+%else ; UNIX64
+ VBROADCASTSS m0, xm0
+ VBROADCASTSS m1, xm1
+ VBROADCASTSS m2, xm3
%endif
.loop:
--
2.30.0
More information about the ffmpeg-devel
mailing list