[FFmpeg-devel] [PATCHv2 4/4] avfilter/vf_framerate: add SIMD functions for frame blending
James Almer
jamrial at gmail.com
Fri Jan 19 00:31:23 EET 2018
On 1/18/2018 6:16 PM, James Almer wrote:
> On 1/18/2018 6:06 PM, Marton Balint wrote:
>> Blend function speedups on x86_64 Core i5 4460:
>>
>> ffmpeg -f lavfi -i allyuv -vf framerate=60:threads=1 -f null none
>>
>> C: 447548411 decicycles in Blend, 2048 runs, 0 skips
>> SSSE3: 130020087 decicycles in Blend, 2048 runs, 0 skips
>> AVX2: 128508221 decicycles in Blend, 2048 runs, 0 skips
>>
>> ffmpeg -f lavfi -i allyuv -vf format=yuv420p12,framerate=60:threads=1 -f null none
>>
>> C: 228932745 decicycles in Blend, 2048 runs, 0 skips
>> SSE4: 123357781 decicycles in Blend, 2048 runs, 0 skips
>> AVX2: 121215353 decicycles in Blend, 2048 runs, 0 skips
>>
>> Signed-off-by: Marton Balint <cus at passwd.hu>
>> ---
>> libavfilter/vf_framerate.c | 24 ++++++-
>> libavfilter/x86/Makefile | 1 +
>> libavfilter/x86/vf_framerate.asm | 136 +++++++++++++++++++++++++++++++++++++++
>> 3 files changed, 158 insertions(+), 3 deletions(-)
>> create mode 100644 libavfilter/x86/vf_framerate.asm
>>
>> diff --git a/libavfilter/vf_framerate.c b/libavfilter/vf_framerate.c
>> index d315ef5d09..6a3b85910f 100644
>> --- a/libavfilter/vf_framerate.c
>> +++ b/libavfilter/vf_framerate.c
>> @@ -29,11 +29,13 @@
>> #define DEBUG
>>
>> #include "libavutil/avassert.h"
>> +#include "libavutil/cpu.h"
>> #include "libavutil/imgutils.h"
>> #include "libavutil/internal.h"
>> #include "libavutil/opt.h"
>> #include "libavutil/pixdesc.h"
>> #include "libavutil/pixelutils.h"
>> +#include "libavutil/x86/cpu.h"
>>
>> #include "avfilter.h"
>> #include "internal.h"
>> @@ -246,7 +248,7 @@ static int blend_frames(AVFilterContext *ctx, int interpolate)
>> av_frame_copy_props(s->work, s->f0);
>>
>> ff_dlog(ctx, "blend_frames() INTERPOLATE to create work frame\n");
>> - ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
>> + ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(FFMAX(1, outlink->h >> 2), ff_filter_get_nb_threads(ctx)));
>> return 1;
>> }
>> return 0;
>> @@ -347,6 +349,11 @@ static void blend_frames_c(BLEND_FUNC_PARAMS)
>> }
>> }
>>
>> +void ff_blend_frames_ssse3(BLEND_FUNC_PARAMS);
>> +void ff_blend_frames_avx2(BLEND_FUNC_PARAMS);
>> +void ff_blend_frames16_sse4(BLEND_FUNC_PARAMS);
>> +void ff_blend_frames16_avx2(BLEND_FUNC_PARAMS);
>> +
>> static void blend_frames16_c(BLEND_FUNC_PARAMS)
>> {
>> int line, pixel;
>> @@ -371,6 +378,7 @@ static int config_input(AVFilterLink *inlink)
>> AVFilterContext *ctx = inlink->dst;
>> FrameRateContext *s = ctx->priv;
>> const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
>> + int cpu_flags = av_get_cpu_flags();
>> int plane;
>>
>> for (plane = 0; plane < 4; plane++) {
>> @@ -389,10 +397,20 @@ static int config_input(AVFilterLink *inlink)
>>
>> if (s->bitdepth == 8) {
>> s->blend_factor_max = 1 << BLEND_FACTOR_DEPTH8;
>> - s->blend = blend_frames_c;
>> + if (ARCH_X86 && EXTERNAL_AVX2_FAST(cpu_flags))
>> + s->blend = ff_blend_frames_avx2;
>> + else if (ARCH_X86 && EXTERNAL_SSSE3(cpu_flags))
>> + s->blend = ff_blend_frames_ssse3;
>> + else
>> + s->blend = blend_frames_c;
>> } else {
>> s->blend_factor_max = 1 << BLEND_FACTOR_DEPTH16;
>> - s->blend = blend_frames16_c;
>> + if (ARCH_X86 && EXTERNAL_AVX2_FAST(cpu_flags))
>> + s->blend = ff_blend_frames16_avx2;
>> + else if (ARCH_X86 && EXTERNAL_SSE4(cpu_flags))
>> + s->blend = ff_blend_frames16_sse4;
>> + else
>> + s->blend = blend_frames16_c;
>
> The simd function pointer initialization and the respective prototypes
> should be in a separate file in the x86 folder. In here you should only
> have something like
>
> if (ARCH_X86)
> ff_blend_frames_init_x86(s);
On second thought, seeing this is the framerate filter, a more correct
name would be ff_framerate_init_x86(). Blend may not be the only
function the filter could optimize with assembly in the future.
>
> Then the corresponding pointer initialization inside that function. The
> prototype for ff_blend_frames_init_x86() should be in a new header.
>
> See how vf_blend (and many other filters) do.
>
>> }
>>
>> return 0;
More information about the ffmpeg-devel
mailing list