[FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x

James Almer jamrial at gmail.com
Fri Feb 18 17:48:10 EET 2022



On 2/18/2022 12:24 PM, Victoria Zhislina wrote:
> By ffmpeg threading support implementation via frame slicing and doing
> zimg_filter_graph_build that used to take 30-60% of each frame processig
> only if necessary (some parameters changed)
> the performance increase vs original version
> in video downscale and color conversion  >4x is seen
> on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
> 
> Signed-off-by: Victoria Zhislina <Victoria.Zhislina at intel.com>
> ---
>   libavfilter/vf_zscale.c | 787 ++++++++++++++++++++++++----------------
>   1 file changed, 475 insertions(+), 312 deletions(-)
> 
> diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
> index 1288c5efc1..ea2565025f 100644
> --- a/libavfilter/vf_zscale.c
> +++ b/libavfilter/vf_zscale.c
> @@ -1,6 +1,7 @@
>   /*
>    * Copyright (c) 2015 Paul B Mahol
> - *
> + * 2022 Victoria Zhislina, Intel
> +
>    * This file is part of FFmpeg.
>    *
>    * FFmpeg is free software; you can redistribute it and/or
> @@ -44,6 +45,8 @@
>   #include "libavutil/imgutils.h"
>   
>   #define ZIMG_ALIGNMENT 32
> +#define MIN_TILESIZE 64
> +#define MAX_THREADS 64
>   
>   static const char *const var_names[] = {
>       "in_w",   "iw",
> @@ -113,13 +116,17 @@ typedef struct ZScaleContext {
>   
>       int force_original_aspect_ratio;
>   
> -    void *tmp;
> -    size_t tmp_size;
> +    void *tmp[MAX_THREADS]; //separate for each thread;
> +    int nb_threads;
> +    int slice_h;
>   
>       zimg_image_format src_format, dst_format;
>       zimg_image_format alpha_src_format, alpha_dst_format;
> +    zimg_image_format src_format_tmp, dst_format_tmp;
> +    zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
>       zimg_graph_builder_params alpha_params, params;
> -    zimg_filter_graph *alpha_graph, *graph;
> +    zimg_graph_builder_params alpha_params_tmp, params_tmp;
> +    zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
>   
>       enum AVColorSpace in_colorspace, out_colorspace;
>       enum AVColorTransferCharacteristic in_trc, out_trc;
> @@ -128,10 +135,181 @@ typedef struct ZScaleContext {
>       enum AVChromaLocation in_chromal, out_chromal;
>   } ZScaleContext;
>   
> +
> +typedef struct ThreadData {
> +    const AVPixFmtDescriptor *desc, *odesc;
> +    AVFrame *in, *out;
> +} ThreadData;
> +
> +static int convert_chroma_location(enum AVChromaLocation chroma_location)
> +{
> +    switch (chroma_location) {
> +    case AVCHROMA_LOC_UNSPECIFIED:
> +    case AVCHROMA_LOC_LEFT:
> +        return ZIMG_CHROMA_LEFT;
> +    case AVCHROMA_LOC_CENTER:
> +        return ZIMG_CHROMA_CENTER;
> +    case AVCHROMA_LOC_TOPLEFT:
> +        return ZIMG_CHROMA_TOP_LEFT;
> +    case AVCHROMA_LOC_TOP:
> +        return ZIMG_CHROMA_TOP;
> +    case AVCHROMA_LOC_BOTTOMLEFT:
> +        return ZIMG_CHROMA_BOTTOM_LEFT;
> +    case AVCHROMA_LOC_BOTTOM:
> +        return ZIMG_CHROMA_BOTTOM;
> +    }
> +    return ZIMG_CHROMA_LEFT;
> +}
> +
> +static int convert_matrix(enum AVColorSpace colorspace)
> +{
> +    switch (colorspace) {
> +    case AVCOL_SPC_RGB:
> +        return ZIMG_MATRIX_RGB;
> +    case AVCOL_SPC_BT709:
> +        return ZIMG_MATRIX_709;
> +    case AVCOL_SPC_UNSPECIFIED:
> +        return ZIMG_MATRIX_UNSPECIFIED;
> +    case AVCOL_SPC_FCC:
> +        return ZIMG_MATRIX_FCC;
> +    case AVCOL_SPC_BT470BG:
> +        return ZIMG_MATRIX_470BG;
> +    case AVCOL_SPC_SMPTE170M:
> +        return ZIMG_MATRIX_170M;
> +    case AVCOL_SPC_SMPTE240M:
> +        return ZIMG_MATRIX_240M;
> +    case AVCOL_SPC_YCGCO:
> +        return ZIMG_MATRIX_YCGCO;
> +    case AVCOL_SPC_BT2020_NCL:
> +        return ZIMG_MATRIX_2020_NCL;
> +    case AVCOL_SPC_BT2020_CL:
> +        return ZIMG_MATRIX_2020_CL;
> +    case AVCOL_SPC_CHROMA_DERIVED_NCL:
> +        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
> +    case AVCOL_SPC_CHROMA_DERIVED_CL:
> +        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
> +    case AVCOL_SPC_ICTCP:
> +        return ZIMG_MATRIX_ICTCP;
> +    }
> +    return ZIMG_MATRIX_UNSPECIFIED;
> +}
> +
> +static int convert_trc(enum AVColorTransferCharacteristic color_trc)
> +{
> +    switch (color_trc) {
> +    case AVCOL_TRC_UNSPECIFIED:
> +        return ZIMG_TRANSFER_UNSPECIFIED;
> +    case AVCOL_TRC_BT709:
> +        return ZIMG_TRANSFER_709;
> +    case AVCOL_TRC_GAMMA22:
> +        return ZIMG_TRANSFER_470_M;
> +    case AVCOL_TRC_GAMMA28:
> +        return ZIMG_TRANSFER_470_BG;
> +    case AVCOL_TRC_SMPTE170M:
> +        return ZIMG_TRANSFER_601;
> +    case AVCOL_TRC_SMPTE240M:
> +        return ZIMG_TRANSFER_240M;
> +    case AVCOL_TRC_LINEAR:
> +        return ZIMG_TRANSFER_LINEAR;
> +    case AVCOL_TRC_LOG:
> +        return ZIMG_TRANSFER_LOG_100;
> +    case AVCOL_TRC_LOG_SQRT:
> +        return ZIMG_TRANSFER_LOG_316;
> +    case AVCOL_TRC_IEC61966_2_4:
> +        return ZIMG_TRANSFER_IEC_61966_2_4;
> +    case AVCOL_TRC_BT2020_10:
> +        return ZIMG_TRANSFER_2020_10;
> +    case AVCOL_TRC_BT2020_12:
> +        return ZIMG_TRANSFER_2020_12;
> +    case AVCOL_TRC_SMPTE2084:
> +        return ZIMG_TRANSFER_ST2084;
> +    case AVCOL_TRC_ARIB_STD_B67:
> +        return ZIMG_TRANSFER_ARIB_B67;
> +    case AVCOL_TRC_IEC61966_2_1:
> +        return ZIMG_TRANSFER_IEC_61966_2_1;
> +    }
> +    return ZIMG_TRANSFER_UNSPECIFIED;
> +}
> +
> +static int convert_primaries(enum AVColorPrimaries color_primaries)
> +{
> +    switch (color_primaries) {
> +    case AVCOL_PRI_UNSPECIFIED:
> +        return ZIMG_PRIMARIES_UNSPECIFIED;
> +    case AVCOL_PRI_BT709:
> +        return ZIMG_PRIMARIES_709;
> +    case AVCOL_PRI_BT470M:
> +        return ZIMG_PRIMARIES_470_M;
> +    case AVCOL_PRI_BT470BG:
> +        return ZIMG_PRIMARIES_470_BG;
> +    case AVCOL_PRI_SMPTE170M:
> +        return ZIMG_PRIMARIES_170M;
> +    case AVCOL_PRI_SMPTE240M:
> +        return ZIMG_PRIMARIES_240M;
> +    case AVCOL_PRI_FILM:
> +        return ZIMG_PRIMARIES_FILM;
> +    case AVCOL_PRI_BT2020:
> +        return ZIMG_PRIMARIES_2020;
> +    case AVCOL_PRI_SMPTE428:
> +        return ZIMG_PRIMARIES_ST428;
> +    case AVCOL_PRI_SMPTE431:
> +        return ZIMG_PRIMARIES_ST431_2;
> +    case AVCOL_PRI_SMPTE432:
> +        return ZIMG_PRIMARIES_ST432_1;
> +    case AVCOL_PRI_JEDEC_P22:
> +        return ZIMG_PRIMARIES_EBU3213_E;
> +    }
> +    return ZIMG_PRIMARIES_UNSPECIFIED;
> +}
> +
> +static int convert_range(enum AVColorRange color_range)
> +{
> +    switch (color_range) {
> +    case AVCOL_RANGE_UNSPECIFIED:
> +    case AVCOL_RANGE_MPEG:
> +        return ZIMG_RANGE_LIMITED;
> +    case AVCOL_RANGE_JPEG:
> +        return ZIMG_RANGE_FULL;
> +    }
> +    return ZIMG_RANGE_LIMITED;
> +}
> +
> +static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
> +{
> +    switch (color_range) {
> +    case ZIMG_RANGE_LIMITED:
> +        return AVCOL_RANGE_MPEG;
> +    case ZIMG_RANGE_FULL:
> +        return AVCOL_RANGE_JPEG;
> +    }
> +    return AVCOL_RANGE_UNSPECIFIED;
> +}
> +
>   static av_cold int init(AVFilterContext *ctx)
>   {
>       ZScaleContext *s = ctx->priv;
>       int ret;
> +    int i;
> +
> +    for (i = 0; i < MAX_THREADS; i++) {
> +        s->tmp[i] = NULL;
> +        s->graph[i] = NULL;
> +        s->alpha_graph[i] = NULL;
> +    }
> +    zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
> +
> +    zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
> +
> +    zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
>   
>       if (s->size_str && (s->w_expr || s->h_expr)) {
>           av_log(ctx, AV_LOG_ERROR,
> @@ -158,7 +336,6 @@ static av_cold int init(AVFilterContext *ctx)
>           av_opt_set(s, "w", "iw", 0);
>       if (!s->h_expr)
>           av_opt_set(s, "h", "ih", 0);
> -
>       return 0;
>   }
>   
> @@ -194,6 +371,153 @@ static int query_formats(AVFilterContext *ctx)
>       return ff_formats_ref(ff_make_format_list(pixel_fmts), &ctx->outputs[0]->incfg.formats);
>   }
>   
> +/* returns 0 if image formats are the same and 1 otherwise */
> +static int compare_zimg_image_formats(zimg_image_format *img_fmt0, zimg_image_format *img_fmt1)
> +{
> +    return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
> +#if ZIMG_API_VERSION >= 0x204
> +        (img_fmt0->alpha != img_fmt1->alpha) ||
> +#endif
> +        (img_fmt0->color_family != img_fmt1->color_family) ||
> +        (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
> +        (img_fmt0->depth != img_fmt1->depth) ||
> +        (img_fmt0->field_parity != img_fmt1->field_parity) ||
> +        (img_fmt0->height != img_fmt1->height) ||
> +        (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
> +        (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
> +        (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
> +        (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
> +        (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
> +        (img_fmt0->transfer_characteristics != img_fmt1->transfer_characteristics) ||
> +        (img_fmt0->width != img_fmt1->width));
> +}
> +
> +/* returns 0 if graph builder parameters are the same and 1 otherwise */
> +static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0, zimg_graph_builder_params *parm1)
> +{
> +    /* the parameters that could be changed inside a single ffmpeg zscale invocation  are checked only
> +    and NaN values that are default for some params are treated properly*/
> +    int ret = (parm0->allow_approximate_gamma != parm1->allow_approximate_gamma) ||
> +        (parm0->dither_type != parm1->dither_type) ||
> +        (parm0->resample_filter != parm1->resample_filter) ||
> +        (parm0->resample_filter_uv != parm1->resample_filter_uv);
> +
> +    if ((isnan(parm0->nominal_peak_luminance) == 0) || (isnan(parm1->nominal_peak_luminance) == 0))
> +        ret = ret || (parm0->nominal_peak_luminance != parm1->nominal_peak_luminance);
> +    if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a) == 0))
> +        ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
> +    if ((isnan(parm0->filter_param_a_uv) == 0) || (isnan(parm1->filter_param_a_uv) == 0))
> +        ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
> +    if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b) == 0))
> +        ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
> +    if ((isnan(parm0->filter_param_b_uv) == 0) || (isnan(parm1->filter_param_b_uv) == 0))
> +        ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
> +
> +    return ret;
> +}
> +
> +static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
> +    int colorspace, int primaries, int transfer, int range, int location)
> +{
> +    format->width = frame->width;
> +    format->height = frame->height;
> +    format->subsample_w = desc->log2_chroma_w;
> +    format->subsample_h = desc->log2_chroma_h;
> +    format->depth = desc->comp[0].depth;
> +    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
> +    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
> +    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
> +    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
> +    format->transfer_characteristics = transfer == -1 ? convert_trc(frame->color_trc) : transfer;
> +    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
> +    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
> +}

Why are you moving all these functions up in the file? They make the 
patch much harder to read.

If moving them is necessary, then please split this patch in two. One 
moving the functions, then one applying the actual changes to them and 
the rest of the file. It will make reviewing much easier.


More information about the ffmpeg-devel mailing list