[FFmpeg-devel] [PATCHv2 4/4] avfilter/vf_framerate: add SIMD functions for frame blending
James Almer
jamrial at gmail.com
Thu Jan 18 23:16:13 EET 2018
On 1/18/2018 6:06 PM, Marton Balint wrote:
> Blend function speedups on x86_64 Core i5 4460:
>
> ffmpeg -f lavfi -i allyuv -vf framerate=60:threads=1 -f null none
>
> C: 447548411 decicycles in Blend, 2048 runs, 0 skips
> SSSE3: 130020087 decicycles in Blend, 2048 runs, 0 skips
> AVX2: 128508221 decicycles in Blend, 2048 runs, 0 skips
>
> ffmpeg -f lavfi -i allyuv -vf format=yuv420p12,framerate=60:threads=1 -f null none
>
> C: 228932745 decicycles in Blend, 2048 runs, 0 skips
> SSE4: 123357781 decicycles in Blend, 2048 runs, 0 skips
> AVX2: 121215353 decicycles in Blend, 2048 runs, 0 skips
>
> Signed-off-by: Marton Balint <cus at passwd.hu>
> ---
> libavfilter/vf_framerate.c | 24 ++++++-
> libavfilter/x86/Makefile | 1 +
> libavfilter/x86/vf_framerate.asm | 136 +++++++++++++++++++++++++++++++++++++++
> 3 files changed, 158 insertions(+), 3 deletions(-)
> create mode 100644 libavfilter/x86/vf_framerate.asm
>
> diff --git a/libavfilter/vf_framerate.c b/libavfilter/vf_framerate.c
> index d315ef5d09..6a3b85910f 100644
> --- a/libavfilter/vf_framerate.c
> +++ b/libavfilter/vf_framerate.c
> @@ -29,11 +29,13 @@
> #define DEBUG
>
> #include "libavutil/avassert.h"
> +#include "libavutil/cpu.h"
> #include "libavutil/imgutils.h"
> #include "libavutil/internal.h"
> #include "libavutil/opt.h"
> #include "libavutil/pixdesc.h"
> #include "libavutil/pixelutils.h"
> +#include "libavutil/x86/cpu.h"
>
> #include "avfilter.h"
> #include "internal.h"
> @@ -246,7 +248,7 @@ static int blend_frames(AVFilterContext *ctx, int interpolate)
> av_frame_copy_props(s->work, s->f0);
>
> ff_dlog(ctx, "blend_frames() INTERPOLATE to create work frame\n");
> - ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
> + ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(FFMAX(1, outlink->h >> 2), ff_filter_get_nb_threads(ctx)));
> return 1;
> }
> return 0;
> @@ -347,6 +349,11 @@ static void blend_frames_c(BLEND_FUNC_PARAMS)
> }
> }
>
> +void ff_blend_frames_ssse3(BLEND_FUNC_PARAMS);
> +void ff_blend_frames_avx2(BLEND_FUNC_PARAMS);
> +void ff_blend_frames16_sse4(BLEND_FUNC_PARAMS);
> +void ff_blend_frames16_avx2(BLEND_FUNC_PARAMS);
> +
> static void blend_frames16_c(BLEND_FUNC_PARAMS)
> {
> int line, pixel;
> @@ -371,6 +378,7 @@ static int config_input(AVFilterLink *inlink)
> AVFilterContext *ctx = inlink->dst;
> FrameRateContext *s = ctx->priv;
> const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
> + int cpu_flags = av_get_cpu_flags();
> int plane;
>
> for (plane = 0; plane < 4; plane++) {
> @@ -389,10 +397,20 @@ static int config_input(AVFilterLink *inlink)
>
> if (s->bitdepth == 8) {
> s->blend_factor_max = 1 << BLEND_FACTOR_DEPTH8;
> - s->blend = blend_frames_c;
> + if (ARCH_X86 && EXTERNAL_AVX2_FAST(cpu_flags))
> + s->blend = ff_blend_frames_avx2;
> + else if (ARCH_X86 && EXTERNAL_SSSE3(cpu_flags))
> + s->blend = ff_blend_frames_ssse3;
> + else
> + s->blend = blend_frames_c;
> } else {
> s->blend_factor_max = 1 << BLEND_FACTOR_DEPTH16;
> - s->blend = blend_frames16_c;
> + if (ARCH_X86 && EXTERNAL_AVX2_FAST(cpu_flags))
> + s->blend = ff_blend_frames16_avx2;
> + else if (ARCH_X86 && EXTERNAL_SSE4(cpu_flags))
> + s->blend = ff_blend_frames16_sse4;
> + else
> + s->blend = blend_frames16_c;
The simd function pointer initialization and the respective prototypes
should be in a separate file in the x86 folder. In here you should only
have something like
if (ARCH_X86)
ff_blend_frames_init_x86(s);
Then the corresponding pointer initialization inside that function. The
prototype for ff_blend_frames_init_x86() should be in a new header.
See how vf_blend (and many other filters) do.
> }
>
> return 0;
More information about the ffmpeg-devel
mailing list