[FFmpeg-devel] [PATCH] avfilter/x86/af_afir: add avx version of fcmul_add
Paul B Mahol
onemda at gmail.com
Sun Dec 30 19:48:17 EET 2018
Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
libavfilter/x86/af_afir.asm | 15 ++++++++++++---
libavfilter/x86/af_afir_init.c | 6 ++++++
2 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm
index 849d85e70f..e770420a21 100644
--- a/libavfilter/x86/af_afir.asm
+++ b/libavfilter/x86/af_afir.asm
@@ -27,7 +27,7 @@ SECTION .text
; void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
;------------------------------------------------------------------------------
-INIT_XMM sse3
+%macro VECTOR_FCMUL_ADD 0
cglobal fcmul_add, 4,4,6, sum, t, c, len
shl lend, 3
add lend, mmsize*2
@@ -43,8 +43,8 @@ ALIGN 16
movaps m4, [cq + lenq+mmsize]
mulps m0, m1
mulps m3, m4
- shufps m1, m1, 0xb1
- shufps m4, m4, 0xb1
+ shufps m1, m1, m1, 0xb1
+ shufps m4, m4, m4, 0xb1
movshdup m2, [tq + lenq]
movshdup m5, [tq + lenq+mmsize]
mulps m2, m1
@@ -58,3 +58,12 @@ ALIGN 16
add lenq, mmsize*2
jl .loop
REP_RET
+%endmacro
+
+INIT_XMM sse3
+VECTOR_FCMUL_ADD
+
+%if HAVE_AVX_EXTERNAL
+INIT_YMM avx
+VECTOR_FCMUL_ADD
+%endif
diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c
index 6a652b9b83..214aaf9719 100644
--- a/libavfilter/x86/af_afir_init.c
+++ b/libavfilter/x86/af_afir_init.c
@@ -25,6 +25,9 @@
void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
ptrdiff_t len);
+void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
+ ptrdiff_t len);
+
av_cold void ff_afir_init_x86(AudioFIRContext *s)
{
int cpu_flags = av_get_cpu_flags();
@@ -32,4 +35,7 @@ av_cold void ff_afir_init_x86(AudioFIRContext *s)
if (EXTERNAL_SSE3(cpu_flags)) {
s->fcmul_add = ff_fcmul_add_sse3;
}
+ if (EXTERNAL_AVX_FAST(cpu_flags)) {
+ s->fcmul_add = ff_fcmul_add_avx;
+ }
}
--
2.17.1
More information about the ffmpeg-devel
mailing list