[FFmpeg-devel] [PATCH 08/15] avfilter/vf_bwdif: Add neon for filter_edge
John Cox
jc at kynesim.co.uk
Sun Jul 2 13:50:52 EEST 2023
On Sun, 2 Jul 2023 00:40:09 +0300 (EEST), you wrote:
>On Thu, 29 Jun 2023, John Cox wrote:
>
>> Signed-off-by: John Cox <jc at kynesim.co.uk>
>> ---
>> libavfilter/aarch64/vf_bwdif_init_aarch64.c | 20 ++++
>> libavfilter/aarch64/vf_bwdif_neon.S | 104 ++++++++++++++++++++
>> 2 files changed, 124 insertions(+)
>>
>> diff --git a/libavfilter/aarch64/vf_bwdif_init_aarch64.c b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
>> index 3ffaa07ab3..e75cf2f204 100644
>> --- a/libavfilter/aarch64/vf_bwdif_init_aarch64.c
>> +++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
>> @@ -24,10 +24,29 @@
>> #include "libavfilter/bwdif.h"
>> #include "libavutil/aarch64/cpu.h"
>>
>> +void ff_bwdif_filter_edge_neon(void *dst1, void *prev1, void *cur1, void *next1,
>> + int w, int prefs, int mrefs, int prefs2, int mrefs2,
>> + int parity, int clip_max, int spat);
>> +
>> void ff_bwdif_filter_intra_neon(void *dst1, void *cur1, int w, int prefs, int mrefs,
>> int prefs3, int mrefs3, int parity, int clip_max);
>>
>>
>> +static void filter_edge_helper(void *dst1, void *prev1, void *cur1, void *next1,
>> + int w, int prefs, int mrefs, int prefs2, int mrefs2,
>> + int parity, int clip_max, int spat)
>> +{
>> + const int w0 = clip_max != 255 ? 0 : w & ~15;
>> +
>> + ff_bwdif_filter_edge_neon(dst1, prev1, cur1, next1, w0, prefs, mrefs, prefs2, mrefs2,
>> + parity, clip_max, spat);
>> +
>> + if (w0 < w)
>> + ff_bwdif_filter_edge_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0,
>> + w - w0, prefs, mrefs, prefs2, mrefs2,
>> + parity, clip_max, spat);
>> +}
>> +
>> static void filter_intra_helper(void *dst1, void *cur1, int w, int prefs, int mrefs,
>> int prefs3, int mrefs3, int parity, int clip_max)
>> {
>> @@ -52,5 +71,6 @@ ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth)
>> return;
>>
>> s->filter_intra = filter_intra_helper;
>> + s->filter_edge = filter_edge_helper;
>> }
>>
>> diff --git a/libavfilter/aarch64/vf_bwdif_neon.S b/libavfilter/aarch64/vf_bwdif_neon.S
>> index 6c5d1598f4..a33b235882 100644
>> --- a/libavfilter/aarch64/vf_bwdif_neon.S
>> +++ b/libavfilter/aarch64/vf_bwdif_neon.S
>> @@ -128,6 +128,110 @@ coeffs:
>> .hword 5570, 3801, 1016, -3801 // hf[0] = v0.h[2], -hf[1] = v0.h[5]
>> .hword 5077, 981 // sp[0] = v0.h[6]
>>
>> +// ============================================================================
>> +//
>> +// void ff_bwdif_filter_edge_neon(
>> +// void *dst1, // x0
>> +// void *prev1, // x1
>> +// void *cur1, // x2
>> +// void *next1, // x3
>> +// int w, // w4
>> +// int prefs, // w5
>> +// int mrefs, // w6
>> +// int prefs2, // w7
>> +// int mrefs2, // [sp, #0]
>> +// int parity, // [sp, #8]
>> +// int clip_max, // [sp, #16] unused
>> +// int spat); // [sp, #24]
>
>This doesn't hold for macOS targets (and the checkasm tests fail on that
>platform).
>
>On macOS, arguments that aren't passed in registers but on the stack, are
>tightly packed. So since parity is 32 bit and mrefs2 also was 32 bit,
>parity is available at [sp, #4].
>
>Therefore, it's usually simplest for portability reasons, to pass any
>arguments after the first 8, as intptr_t or ptrdiff_t, as that makes them
>consistent across platforms.
Not my interface - this is already existing code. What do you suggest I
do?
I'm happy either to change the interface or fix my stack offsets if
there is any clue that lets me detect this ABI. As personal preference
I'd choose the latter.
I don't have easy access to a mac. Is there any easy way of getting this
tested before resubmission?
Thanks
JC
>// Martin
More information about the ffmpeg-devel
mailing list