[FFmpeg-devel] [PATCH 4/4] x86/af_afir: add ff_fcmul_add_avx()
James Almer
jamrial at gmail.com
Thu Jan 3 15:15:49 EET 2019
On 1/3/2019 5:47 AM, Paul B Mahol wrote:
> On 1/3/19, James Almer <jamrial at gmail.com> wrote:
>> fcmul_add_c: 1228.8
>> fcmul_add_sse3: 334.3
>> fcmul_add_avx: 186.3
>>
>> Signed-off-by: James Almer <jamrial at gmail.com>
>> ---
>> libavfilter/x86/af_afir.asm | 8 +++++++-
>> libavfilter/x86/af_afir_init.c | 5 +++++
>> 2 files changed, 12 insertions(+), 1 deletion(-)
>>
>> diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm
>> index fcc1f426db..8054ac5f10 100644
>> --- a/libavfilter/x86/af_afir.asm
>> +++ b/libavfilter/x86/af_afir.asm
>> @@ -27,7 +27,7 @@ SECTION .text
>> ; void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
>> ;------------------------------------------------------------------------------
>>
>> -INIT_XMM sse3
>> +%macro FCMUL_ADD 0
>> cglobal fcmul_add, 4,4,6, sum, t, c, len
>> shl lend, 3
>> add tq, lenq
>> @@ -61,3 +61,9 @@ ALIGN 16
>> addss xm0, [sumq + lenq]
>> movss [sumq + lenq], xm0
>> RET
>> +%endmacro
>> +
>> +INIT_XMM sse3
>> +FCMUL_ADD
>> +INIT_YMM avx
>> +FCMUL_ADD
>> diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c
>> index 29e6f976b2..c37212c381 100644
>> --- a/libavfilter/x86/af_afir_init.c
>> +++ b/libavfilter/x86/af_afir_init.c
>> @@ -24,6 +24,8 @@
>>
>> void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
>> ptrdiff_t len);
>> +void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
>> + ptrdiff_t len);
>>
>> av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
>> {
>> @@ -32,4 +34,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
>> if (EXTERNAL_SSE3(cpu_flags)) {
>> s->fcmul_add = ff_fcmul_add_sse3;
>> }
>> + if (EXTERNAL_AVX_FAST(cpu_flags)) {
>> + s->fcmul_add = ff_fcmul_add_avx;
>> + }
>> }
>> --
>> 2.20.1
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel at ffmpeg.org
>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>
> OK
>
> Also write CPU on which you tested it.
Set pushed with that addition.
Thanks.
More information about the ffmpeg-devel
mailing list