[FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
Paul B Mahol
onemda at gmail.com
Fri Feb 18 13:43:44 EET 2022
On Thu, Feb 10, 2022 at 01:08:04PM +0300, Victoria Zhislina wrote:
> By ffmpeg threading support implementation via frame slicing and doing
> zimg_filter_graph_build that used to take 30-60% of each frame processig
> only if necessary (some parameters changed)
> the performance increase vs original version
> in video downscale and color conversion >4x is seen
> on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
>
> Signed-off-by: Victoria Zhislina <Victoria.Zhislina at intel.com>
> ---
> libavfilter/vf_zscale.c | 786 ++++++++++++++++++++++++----------------
> 1 file changed, 475 insertions(+), 311 deletions(-)
>
> diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
> index 1288c5efc1..ce4c0b2c76 100644
> --- a/libavfilter/vf_zscale.c
> +++ b/libavfilter/vf_zscale.c
> @@ -1,6 +1,7 @@
> /*
> * Copyright (c) 2015 Paul B Mahol
> - *
> + * * 2022 Victoria Zhislina, Intel - performance optimization
Just name, please, without extra stuff, see line above.
> +
> * This file is part of FFmpeg.
> *
> * FFmpeg is free software; you can redistribute it and/or
> @@ -44,6 +45,8 @@
> #include "libavutil/imgutils.h"
>
> #define ZIMG_ALIGNMENT 32
> +#define MIN_TILESIZE 64
> +#define MAX_THREADS 64
>
> static const char *const var_names[] = {
> "in_w", "iw",
> @@ -113,13 +116,17 @@ typedef struct ZScaleContext {
>
> int force_original_aspect_ratio;
>
> - void *tmp;
> - size_t tmp_size;
> + void *tmp[MAX_THREADS]; //separate for each thread;
> + int nb_threads;
Sorry, but tab characters are generally forbidden in FFmpeg source code.
> + int slice_h;
>
> zimg_image_format src_format, dst_format;
> zimg_image_format alpha_src_format, alpha_dst_format;
> + zimg_image_format src_format_tmp, dst_format_tmp;
> + zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
> zimg_graph_builder_params alpha_params, params;
> - zimg_filter_graph *alpha_graph, *graph;
> + zimg_graph_builder_params alpha_params_tmp, params_tmp;
> + zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
>
> enum AVColorSpace in_colorspace, out_colorspace;
> enum AVColorTransferCharacteristic in_trc, out_trc;
> @@ -128,10 +135,181 @@ typedef struct ZScaleContext {
> enum AVChromaLocation in_chromal, out_chromal;
> } ZScaleContext;
>
> +
> +typedef struct ThreadData {
> + const AVPixFmtDescriptor *desc, *odesc;
> + AVFrame *in, *out;
> +} ThreadData;
> +
> +static int convert_chroma_location(enum AVChromaLocation chroma_location)
> +{
> + switch (chroma_location) {
> + case AVCHROMA_LOC_UNSPECIFIED:
> + case AVCHROMA_LOC_LEFT:
> + return ZIMG_CHROMA_LEFT;
> + case AVCHROMA_LOC_CENTER:
> + return ZIMG_CHROMA_CENTER;
> + case AVCHROMA_LOC_TOPLEFT:
> + return ZIMG_CHROMA_TOP_LEFT;
> + case AVCHROMA_LOC_TOP:
> + return ZIMG_CHROMA_TOP;
> + case AVCHROMA_LOC_BOTTOMLEFT:
> + return ZIMG_CHROMA_BOTTOM_LEFT;
> + case AVCHROMA_LOC_BOTTOM:
> + return ZIMG_CHROMA_BOTTOM;
> + }
> + return ZIMG_CHROMA_LEFT;
> +}
> +
> +static int convert_matrix(enum AVColorSpace colorspace)
> +{
> + switch (colorspace) {
> + case AVCOL_SPC_RGB:
> + return ZIMG_MATRIX_RGB;
> + case AVCOL_SPC_BT709:
> + return ZIMG_MATRIX_709;
> + case AVCOL_SPC_UNSPECIFIED:
> + return ZIMG_MATRIX_UNSPECIFIED;
> + case AVCOL_SPC_FCC:
> + return ZIMG_MATRIX_FCC;
> + case AVCOL_SPC_BT470BG:
> + return ZIMG_MATRIX_470BG;
> + case AVCOL_SPC_SMPTE170M:
> + return ZIMG_MATRIX_170M;
> + case AVCOL_SPC_SMPTE240M:
> + return ZIMG_MATRIX_240M;
> + case AVCOL_SPC_YCGCO:
> + return ZIMG_MATRIX_YCGCO;
> + case AVCOL_SPC_BT2020_NCL:
> + return ZIMG_MATRIX_2020_NCL;
> + case AVCOL_SPC_BT2020_CL:
> + return ZIMG_MATRIX_2020_CL;
> + case AVCOL_SPC_CHROMA_DERIVED_NCL:
> + return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
> + case AVCOL_SPC_CHROMA_DERIVED_CL:
> + return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
> + case AVCOL_SPC_ICTCP:
> + return ZIMG_MATRIX_ICTCP;
> + }
> + return ZIMG_MATRIX_UNSPECIFIED;
> +}
> +
> +static int convert_trc(enum AVColorTransferCharacteristic color_trc)
> +{
> + switch (color_trc) {
> + case AVCOL_TRC_UNSPECIFIED:
> + return ZIMG_TRANSFER_UNSPECIFIED;
> + case AVCOL_TRC_BT709:
> + return ZIMG_TRANSFER_709;
> + case AVCOL_TRC_GAMMA22:
> + return ZIMG_TRANSFER_470_M;
> + case AVCOL_TRC_GAMMA28:
> + return ZIMG_TRANSFER_470_BG;
> + case AVCOL_TRC_SMPTE170M:
> + return ZIMG_TRANSFER_601;
> + case AVCOL_TRC_SMPTE240M:
> + return ZIMG_TRANSFER_240M;
> + case AVCOL_TRC_LINEAR:
> + return ZIMG_TRANSFER_LINEAR;
> + case AVCOL_TRC_LOG:
> + return ZIMG_TRANSFER_LOG_100;
> + case AVCOL_TRC_LOG_SQRT:
> + return ZIMG_TRANSFER_LOG_316;
> + case AVCOL_TRC_IEC61966_2_4:
> + return ZIMG_TRANSFER_IEC_61966_2_4;
> + case AVCOL_TRC_BT2020_10:
> + return ZIMG_TRANSFER_2020_10;
> + case AVCOL_TRC_BT2020_12:
> + return ZIMG_TRANSFER_2020_12;
> + case AVCOL_TRC_SMPTE2084:
> + return ZIMG_TRANSFER_ST2084;
> + case AVCOL_TRC_ARIB_STD_B67:
> + return ZIMG_TRANSFER_ARIB_B67;
> + case AVCOL_TRC_IEC61966_2_1:
> + return ZIMG_TRANSFER_IEC_61966_2_1;
> + }
> + return ZIMG_TRANSFER_UNSPECIFIED;
> +}
> +
> +static int convert_primaries(enum AVColorPrimaries color_primaries)
> +{
> + switch (color_primaries) {
> + case AVCOL_PRI_UNSPECIFIED:
> + return ZIMG_PRIMARIES_UNSPECIFIED;
> + case AVCOL_PRI_BT709:
> + return ZIMG_PRIMARIES_709;
> + case AVCOL_PRI_BT470M:
> + return ZIMG_PRIMARIES_470_M;
> + case AVCOL_PRI_BT470BG:
> + return ZIMG_PRIMARIES_470_BG;
> + case AVCOL_PRI_SMPTE170M:
> + return ZIMG_PRIMARIES_170M;
> + case AVCOL_PRI_SMPTE240M:
> + return ZIMG_PRIMARIES_240M;
> + case AVCOL_PRI_FILM:
> + return ZIMG_PRIMARIES_FILM;
> + case AVCOL_PRI_BT2020:
> + return ZIMG_PRIMARIES_2020;
> + case AVCOL_PRI_SMPTE428:
> + return ZIMG_PRIMARIES_ST428;
> + case AVCOL_PRI_SMPTE431:
> + return ZIMG_PRIMARIES_ST431_2;
> + case AVCOL_PRI_SMPTE432:
> + return ZIMG_PRIMARIES_ST432_1;
> + case AVCOL_PRI_JEDEC_P22:
> + return ZIMG_PRIMARIES_EBU3213_E;
> + }
> + return ZIMG_PRIMARIES_UNSPECIFIED;
> +}
> +
> +static int convert_range(enum AVColorRange color_range)
> +{
> + switch (color_range) {
> + case AVCOL_RANGE_UNSPECIFIED:
> + case AVCOL_RANGE_MPEG:
> + return ZIMG_RANGE_LIMITED;
> + case AVCOL_RANGE_JPEG:
> + return ZIMG_RANGE_FULL;
> + }
> + return ZIMG_RANGE_LIMITED;
> +}
> +
> +static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
> +{
> + switch (color_range) {
> + case ZIMG_RANGE_LIMITED:
> + return AVCOL_RANGE_MPEG;
> + case ZIMG_RANGE_FULL:
> + return AVCOL_RANGE_JPEG;
> + }
> + return AVCOL_RANGE_UNSPECIFIED;
> +}
> +
> static av_cold int init(AVFilterContext *ctx)
> {
> ZScaleContext *s = ctx->priv;
> int ret;
> + int i;
> +
> + for (i = 0; i < MAX_THREADS; i++) {
> + s->tmp[i] = NULL;
> + s->graph[i] = NULL;
> + s->alpha_graph[i] = NULL;
> + }
> + zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
> + zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
> + zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
> + zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
> +
> + zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
> + zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
> + zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
> + zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
> +
> + zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
> + zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
> + zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
> + zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
>
> if (s->size_str && (s->w_expr || s->h_expr)) {
> av_log(ctx, AV_LOG_ERROR,
> @@ -194,6 +372,153 @@ static int query_formats(AVFilterContext *ctx)
> return ff_formats_ref(ff_make_format_list(pixel_fmts), &ctx->outputs[0]->incfg.formats);
> }
>
> +/* returns 0 if image formats are the same and 1 otherwise */
> +static int compare_zimg_image_formats(zimg_image_format *img_fmt0, zimg_image_format *img_fmt1)
> +{
> + return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
> +#if ZIMG_API_VERSION >= 0x204
> + (img_fmt0->alpha != img_fmt1->alpha) ||
> +#endif
> + (img_fmt0->color_family != img_fmt1->color_family) ||
> + (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
> + (img_fmt0->depth != img_fmt1->depth) ||
> + (img_fmt0->field_parity != img_fmt1->field_parity) ||
> + (img_fmt0->height != img_fmt1->height) ||
> + (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
> + (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
> + (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
> + (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
> + (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
> + (img_fmt0->transfer_characteristics != img_fmt1->transfer_characteristics) ||
> + (img_fmt0->width != img_fmt1->width));
> +}
> +
> +/* returns 0 if graph builder parameters are the same and 1 otherwise */
> +static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0, zimg_graph_builder_params *parm1)
> +{
> + /* the parameters that could be changed inside a single ffmpeg zscale invocation are checked only
> + and NaN values that are default for some params are treated properly*/
> + int ret = (parm0->allow_approximate_gamma != parm1->allow_approximate_gamma) ||
> + (parm0->dither_type != parm1->dither_type) ||
> + (parm0->resample_filter != parm1->resample_filter) ||
> + (parm0->resample_filter_uv != parm1->resample_filter_uv);
> +
> + if ((isnan(parm0->nominal_peak_luminance) == 0) || (isnan(parm1->nominal_peak_luminance) == 0))
> + ret = ret || (parm0->nominal_peak_luminance != parm1->nominal_peak_luminance);
> + if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a) == 0))
> + ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
> + if ((isnan(parm0->filter_param_a_uv) == 0) || (isnan(parm1->filter_param_a_uv) == 0))
> + ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
> + if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b) == 0))
> + ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
> + if ((isnan(parm0->filter_param_b_uv) == 0) || (isnan(parm1->filter_param_b_uv) == 0))
> + ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
> +
> + return ret;
> +}
> +
> +static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
> + int colorspace, int primaries, int transfer, int range, int location)
> +{
> + format->width = frame->width;
> + format->height = frame->height;
> + format->subsample_w = desc->log2_chroma_w;
> + format->subsample_h = desc->log2_chroma_h;
> + format->depth = desc->comp[0].depth;
> + format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
> + format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
> + format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
> + format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
> + format->transfer_characteristics = transfer == -1 ? convert_trc(frame->color_trc) : transfer;
> + format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
> + format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
> +}
> +
> +static int print_zimg_error(AVFilterContext *ctx)
> +{
> + char err_msg[1024];
> + int err_code = zimg_get_last_error(err_msg, sizeof(err_msg));
> +
> + av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
> +
> + return AVERROR_EXTERNAL;
> +}
> +
> +static int graphs_build(AVFrame *in, AVFrame *out, const AVPixFmtDescriptor *desc, const AVPixFmtDescriptor *out_desc,
> + ZScaleContext *s, int job_nr)
> +{
> + int ret;
> + size_t size;
> + zimg_image_format src_format;
> + zimg_image_format dst_format;
> + zimg_image_format alpha_src_format;
> + zimg_image_format alpha_dst_format;
> +
> + src_format = s->src_format;
> + dst_format = s->dst_format;
> + /* The input slice is specified through the active_region field,
> + unlike the output slice.
> + according to zimg requirements input and output slices should have even dimentions */
> + src_format.active_region.width = in->width;
> + src_format.active_region.height = s->slice_h;
> + src_format.active_region.left = 0;
> + src_format.active_region.top = job_nr * src_format.active_region.height;
> + //dst now is the single tile only!!
> + dst_format.width = out->width;
> + dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
> +
> + //the last slice could differ from the previous ones due to the slices division "tail"
> + if (job_nr == (s->nb_threads - 1)) {
> + src_format.active_region.height = src_format.height - src_format.active_region.top;
> + dst_format.height = out->height - job_nr * dst_format.height;
> + }
> +
> + if (s->graph[job_nr]) {
> + zimg_filter_graph_free(s->graph[job_nr]);
> + }
> + s->graph[job_nr] = zimg_filter_graph_build(&src_format, &dst_format, &s->params);
> + if (!s->graph[job_nr])
> + return print_zimg_error(NULL);
> +
> + ret = zimg_filter_graph_get_tmp_size(s->graph[job_nr], &size);
> + if (ret)
> + return print_zimg_error(NULL);
> +
> + if (s->tmp[job_nr])
> + av_freep(&s->tmp[job_nr]);
> + s->tmp[job_nr] = av_malloc(size);
> + if (!s->tmp[job_nr])
> + return AVERROR(ENOMEM);
> +
> + if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && out_desc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> + alpha_src_format = s->alpha_src_format;
> + alpha_dst_format = s->alpha_dst_format;
> + /* The input slice is specified through the active_region field, unlike the output slice.
> + according to zimg requirements input and output slices should have even dimentions */
> + alpha_src_format.active_region.width = in->width;
> + alpha_src_format.active_region.height = s->slice_h;
> + alpha_src_format.active_region.left = 0;
> + alpha_src_format.active_region.top = job_nr * alpha_src_format.active_region.height;
> + //dst now is the single tile only!!
> + alpha_dst_format.width = out->width;
> + alpha_dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
> +
> + //the last slice could differ from the previous ones due to the slices division "tail"
> + if (job_nr == (s->nb_threads - 1)) {
> + alpha_src_format.active_region.height = alpha_src_format.height - alpha_src_format.active_region.top;
> + alpha_dst_format.height = out->height - job_nr * alpha_dst_format.height;
> + }
> +
> + if (s->alpha_graph[job_nr]) {
> + zimg_filter_graph_free(s->alpha_graph[job_nr]);
> + }
> + s->alpha_graph[job_nr] = zimg_filter_graph_build(&alpha_src_format, &alpha_dst_format, &s->alpha_params);
> + if (!s->alpha_graph[job_nr])
> + return print_zimg_error(NULL);
> + }
> + return 0;
> +}
> +
> static int config_props(AVFilterLink *outlink)
> {
> AVFilterContext *ctx = outlink->src;
> @@ -317,212 +642,15 @@ fail:
> return ret;
> }
>
> -static int print_zimg_error(AVFilterContext *ctx)
> -{
> - char err_msg[1024];
> - int err_code = zimg_get_last_error(err_msg, sizeof(err_msg));
> -
> - av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
> -
> - return AVERROR_EXTERNAL;
> -}
> -
> -static int convert_chroma_location(enum AVChromaLocation chroma_location)
> -{
> - switch (chroma_location) {
> - case AVCHROMA_LOC_UNSPECIFIED:
> - case AVCHROMA_LOC_LEFT:
> - return ZIMG_CHROMA_LEFT;
> - case AVCHROMA_LOC_CENTER:
> - return ZIMG_CHROMA_CENTER;
> - case AVCHROMA_LOC_TOPLEFT:
> - return ZIMG_CHROMA_TOP_LEFT;
> - case AVCHROMA_LOC_TOP:
> - return ZIMG_CHROMA_TOP;
> - case AVCHROMA_LOC_BOTTOMLEFT:
> - return ZIMG_CHROMA_BOTTOM_LEFT;
> - case AVCHROMA_LOC_BOTTOM:
> - return ZIMG_CHROMA_BOTTOM;
> - }
> - return ZIMG_CHROMA_LEFT;
> -}
> -
> -static int convert_matrix(enum AVColorSpace colorspace)
> -{
> - switch (colorspace) {
> - case AVCOL_SPC_RGB:
> - return ZIMG_MATRIX_RGB;
> - case AVCOL_SPC_BT709:
> - return ZIMG_MATRIX_709;
> - case AVCOL_SPC_UNSPECIFIED:
> - return ZIMG_MATRIX_UNSPECIFIED;
> - case AVCOL_SPC_FCC:
> - return ZIMG_MATRIX_FCC;
> - case AVCOL_SPC_BT470BG:
> - return ZIMG_MATRIX_470BG;
> - case AVCOL_SPC_SMPTE170M:
> - return ZIMG_MATRIX_170M;
> - case AVCOL_SPC_SMPTE240M:
> - return ZIMG_MATRIX_240M;
> - case AVCOL_SPC_YCGCO:
> - return ZIMG_MATRIX_YCGCO;
> - case AVCOL_SPC_BT2020_NCL:
> - return ZIMG_MATRIX_2020_NCL;
> - case AVCOL_SPC_BT2020_CL:
> - return ZIMG_MATRIX_2020_CL;
> - case AVCOL_SPC_CHROMA_DERIVED_NCL:
> - return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
> - case AVCOL_SPC_CHROMA_DERIVED_CL:
> - return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
> - case AVCOL_SPC_ICTCP:
> - return ZIMG_MATRIX_ICTCP;
> - }
> - return ZIMG_MATRIX_UNSPECIFIED;
> -}
> -
> -static int convert_trc(enum AVColorTransferCharacteristic color_trc)
> -{
> - switch (color_trc) {
> - case AVCOL_TRC_UNSPECIFIED:
> - return ZIMG_TRANSFER_UNSPECIFIED;
> - case AVCOL_TRC_BT709:
> - return ZIMG_TRANSFER_709;
> - case AVCOL_TRC_GAMMA22:
> - return ZIMG_TRANSFER_470_M;
> - case AVCOL_TRC_GAMMA28:
> - return ZIMG_TRANSFER_470_BG;
> - case AVCOL_TRC_SMPTE170M:
> - return ZIMG_TRANSFER_601;
> - case AVCOL_TRC_SMPTE240M:
> - return ZIMG_TRANSFER_240M;
> - case AVCOL_TRC_LINEAR:
> - return ZIMG_TRANSFER_LINEAR;
> - case AVCOL_TRC_LOG:
> - return ZIMG_TRANSFER_LOG_100;
> - case AVCOL_TRC_LOG_SQRT:
> - return ZIMG_TRANSFER_LOG_316;
> - case AVCOL_TRC_IEC61966_2_4:
> - return ZIMG_TRANSFER_IEC_61966_2_4;
> - case AVCOL_TRC_BT2020_10:
> - return ZIMG_TRANSFER_2020_10;
> - case AVCOL_TRC_BT2020_12:
> - return ZIMG_TRANSFER_2020_12;
> - case AVCOL_TRC_SMPTE2084:
> - return ZIMG_TRANSFER_ST2084;
> - case AVCOL_TRC_ARIB_STD_B67:
> - return ZIMG_TRANSFER_ARIB_B67;
> - case AVCOL_TRC_IEC61966_2_1:
> - return ZIMG_TRANSFER_IEC_61966_2_1;
> - }
> - return ZIMG_TRANSFER_UNSPECIFIED;
> -}
> -
> -static int convert_primaries(enum AVColorPrimaries color_primaries)
> -{
> - switch (color_primaries) {
> - case AVCOL_PRI_UNSPECIFIED:
> - return ZIMG_PRIMARIES_UNSPECIFIED;
> - case AVCOL_PRI_BT709:
> - return ZIMG_PRIMARIES_709;
> - case AVCOL_PRI_BT470M:
> - return ZIMG_PRIMARIES_470_M;
> - case AVCOL_PRI_BT470BG:
> - return ZIMG_PRIMARIES_470_BG;
> - case AVCOL_PRI_SMPTE170M:
> - return ZIMG_PRIMARIES_170M;
> - case AVCOL_PRI_SMPTE240M:
> - return ZIMG_PRIMARIES_240M;
> - case AVCOL_PRI_FILM:
> - return ZIMG_PRIMARIES_FILM;
> - case AVCOL_PRI_BT2020:
> - return ZIMG_PRIMARIES_2020;
> - case AVCOL_PRI_SMPTE428:
> - return ZIMG_PRIMARIES_ST428;
> - case AVCOL_PRI_SMPTE431:
> - return ZIMG_PRIMARIES_ST431_2;
> - case AVCOL_PRI_SMPTE432:
> - return ZIMG_PRIMARIES_ST432_1;
> - case AVCOL_PRI_JEDEC_P22:
> - return ZIMG_PRIMARIES_EBU3213_E;
> - }
> - return ZIMG_PRIMARIES_UNSPECIFIED;
> -}
> -
> -static int convert_range(enum AVColorRange color_range)
> -{
> - switch (color_range) {
> - case AVCOL_RANGE_UNSPECIFIED:
> - case AVCOL_RANGE_MPEG:
> - return ZIMG_RANGE_LIMITED;
> - case AVCOL_RANGE_JPEG:
> - return ZIMG_RANGE_FULL;
> - }
> - return ZIMG_RANGE_LIMITED;
> -}
> -
> -static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
> -{
> - switch (color_range) {
> - case ZIMG_RANGE_LIMITED:
> - return AVCOL_RANGE_MPEG;
> - case ZIMG_RANGE_FULL:
> - return AVCOL_RANGE_JPEG;
> - }
> - return AVCOL_RANGE_UNSPECIFIED;
> -}
> -
> -static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
> - int colorspace, int primaries, int transfer, int range, int location)
> -{
> - format->width = frame->width;
> - format->height = frame->height;
> - format->subsample_w = desc->log2_chroma_w;
> - format->subsample_h = desc->log2_chroma_h;
> - format->depth = desc->comp[0].depth;
> - format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
> - format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
> - format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
> - format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
> - format->transfer_characteristics = transfer == - 1 ? convert_trc(frame->color_trc) : transfer;
> - format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
> - format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
> -}
> -
> -static int graph_build(zimg_filter_graph **graph, zimg_graph_builder_params *params,
> - zimg_image_format *src_format, zimg_image_format *dst_format,
> - void **tmp, size_t *tmp_size)
> -{
> - int ret;
> - size_t size;
> -
> - zimg_filter_graph_free(*graph);
> - *graph = zimg_filter_graph_build(src_format, dst_format, params);
> - if (!*graph)
> - return print_zimg_error(NULL);
> -
> - ret = zimg_filter_graph_get_tmp_size(*graph, &size);
> - if (ret)
> - return print_zimg_error(NULL);
> -
> - if (size > *tmp_size) {
> - av_freep(tmp);
> - *tmp = av_malloc(size);
> - if (!*tmp)
> - return AVERROR(ENOMEM);
> -
> - *tmp_size = size;
> - }
> -
> - return 0;
> -}
>
> static int realign_frame(const AVPixFmtDescriptor *desc, AVFrame **frame)
> {
> AVFrame *aligned = NULL;
> - int ret = 0, plane;
> + int ret = 0, plane, planes;
>
> /* Realign any unaligned input frame. */
> - for (plane = 0; plane < 3; plane++) {
> + planes = av_pix_fmt_count_planes(desc->nb_components);
> + for (plane = 0; plane < planes; plane++) {
> int p = desc->comp[plane].plane;
> if ((uintptr_t)(*frame)->data[p] % ZIMG_ALIGNMENT || (*frame)->linesize[p] % ZIMG_ALIGNMENT) {
> if (!(aligned = av_frame_alloc())) {
> @@ -554,6 +682,7 @@ fail:
> return ret;
> }
>
> +
This newline is not needed.
> static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
> {
> if (s->colorspace != -1)
> @@ -572,20 +701,77 @@ static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
> frame->chroma_location = (int)s->dst_format.chroma_location + 1;
> }
>
> +static int filter_slice(AVFilterContext *ctx, void *data, int job_nr, int n_jobs)
> +{
> + ThreadData *td = data;
> + int ret = 0;
> + int p;
> + int out_sampl;
> + int need_gb;
> + ZScaleContext *s = ctx->priv;
> + zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
> + zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
> + int dst_tile_height = ((unsigned int)(td->out->height / n_jobs)) & 0xfffffffe;
> +
> + /* create zimg filter graphs for each thread
> + only if not created earlier or there is some change in frame parameters */
> + need_gb = compare_zimg_image_formats(&s->src_format, &s->src_format_tmp) ||
> + compare_zimg_image_formats(&s->dst_format, &s->dst_format_tmp) ||
> + compare_zimg_graph_builder_params(&s->params, &s->params_tmp);
> + if(td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA)
> + need_gb = need_gb || compare_zimg_image_formats(&s->alpha_src_format, &s->alpha_src_format_tmp) ||
> + compare_zimg_image_formats(&s->alpha_dst_format, &s->alpha_dst_format_tmp) ||
> + compare_zimg_graph_builder_params(&s->alpha_params, &s->alpha_params_tmp);
> +
> + if (need_gb){
> + ret = graphs_build(td->in, td->out, td->desc, td->odesc, s, job_nr);
> + if (ret < 0)
> + return print_zimg_error(ctx);
> + }
> + out_sampl = FFMAX3(td->out->linesize[0], td->out->linesize[1], td->out->linesize[2]);
> + for (int i = 0; i < 3; i++) {
> + p = td->desc->comp[i].plane;
> +
> + src_buf.plane[i].data = td->in->data[p];
> + src_buf.plane[i].stride = td->in->linesize[p];
> + src_buf.plane[i].mask = -1;
> +
> + p = td->odesc->comp[i].plane;
> + dst_buf.plane[i].data = td->out->data[p] + td->out->linesize[p] * dst_tile_height * td->out->linesize[p] / out_sampl * job_nr;
> + dst_buf.plane[i].stride = td->out->linesize[p];
> + dst_buf.plane[i].mask = -1;
> + }
> + ret = zimg_filter_graph_process(s->graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
> + if (ret)
> + return print_zimg_error(ctx);
> +
> + if (td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> + src_buf.plane[0].data = td->in->data[3];
> + src_buf.plane[0].stride = td->in->linesize[3];
> + src_buf.plane[0].mask = -1;
> +
> + dst_buf.plane[0].data = td->out->data[3] + td->out->linesize[3] * dst_tile_height * job_nr;
> + dst_buf.plane[0].stride = td->out->linesize[3];
> + dst_buf.plane[0].mask = -1;
> +
> + ret = zimg_filter_graph_process(s->alpha_graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
> + if (ret)
> + return print_zimg_error(ctx);
> + }
> + return 0;
> +}
> +
> static int filter_frame(AVFilterLink *link, AVFrame *in)
> {
> - ZScaleContext *s = link->dst->priv;
> - AVFilterLink *outlink = link->dst->outputs[0];
> + AVFilterContext *ctx = link->dst;
> + ZScaleContext *s = ctx->priv;
> + AVFilterLink *outlink = ctx->outputs[0];
> const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
> const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
> - zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
> - zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
> char buf[32];
> - int ret = 0, plane;
> + int ret = 0;
> AVFrame *out = NULL;
> -
> - if ((ret = realign_frame(desc, &in)) < 0)
> - goto fail;
> + ThreadData td;
>
> if (!(out = ff_get_video_buffer(outlink, outlink->w, outlink->h))) {
> ret = AVERROR(ENOMEM);
> @@ -596,35 +782,60 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
> out->width = outlink->w;
> out->height = outlink->h;
>
> - if( in->width != link->w
> - || in->height != link->h
> - || in->format != link->format
> - || s->in_colorspace != in->colorspace
> - || s->in_trc != in->color_trc
> - || s->in_primaries != in->color_primaries
> - || s->in_range != in->color_range
> - || s->out_colorspace != out->colorspace
> - || s->out_trc != out->color_trc
> - || s->out_primaries != out->color_primaries
> - || s->out_range != out->color_range
> - || s->in_chromal != in->chroma_location
> - || s->out_chromal != out->chroma_location) {
> + //we need to use this filter if something is different for an input and output only
> + //otherwise - just copy the input frame to the output
> + if ((link->w != outlink->w) ||
> + (link->h != outlink->h) ||
> + (s->src_format.chroma_location != s->dst_format.chroma_location)||
Please use space between ')' and '||' here and anywhere else.
> + (s->src_format.color_family !=s->dst_format.color_family)||
> + (s->src_format.color_primaries !=s->dst_format.color_primaries)||
> + (s->src_format.depth !=s->dst_format.depth)||
> + (s->src_format.matrix_coefficients !=s->dst_format.matrix_coefficients)||
> + (s->src_format.field_parity !=s->dst_format.field_parity)||
> + (s->src_format.pixel_range !=s->dst_format.pixel_range)||
> + (s->src_format.pixel_type !=s->dst_format.pixel_type)||
> + (s->src_format.transfer_characteristics !=s->dst_format.transfer_characteristics)
> + ){
> + if ((ret = realign_frame(desc, &in)) < 0)
> + goto fail;
> +
> snprintf(buf, sizeof(buf)-1, "%d", outlink->w);
> av_opt_set(s, "w", buf, 0);
> snprintf(buf, sizeof(buf)-1, "%d", outlink->h);
> av_opt_set(s, "h", buf, 0);
>
> +
This new line is not needed.
> link->dst->inputs[0]->format = in->format;
> link->dst->inputs[0]->w = in->width;
> link->dst->inputs[0]->h = in->height;
>
> - if ((ret = config_props(outlink)) < 0)
> - goto fail;
> + update_output_color_information(s, out);
> +
> + s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), link->h / MIN_TILESIZE);
> + s->slice_h = ((unsigned int)(link->h / s->nb_threads)) & 0xfffffffe; // slice_h should be even for zimg
> + s->in_colorspace = in->colorspace;
> + s->in_trc = in->color_trc;
> + s->in_primaries = in->color_primaries;
> + s->in_range = in->color_range;
> + s->out_colorspace = out->colorspace;
> + s->out_trc = out->color_trc;
> + s->out_primaries = out->color_primaries;
> + s->out_range = out->color_range;
> +
> + av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
> + (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
> + (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
> + INT_MAX);
>
> zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
> zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
> zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
>
> + format_init(&s->src_format, in, desc, s->colorspace_in,
> + s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
> + format_init(&s->dst_format, out, odesc, s->colorspace,
> + s->primaries, s->trc, s->range, s->chromal);
> +
> s->params.dither_type = s->dither;
> s->params.cpu_type = ZIMG_CPU_AUTO;
> s->params.resample_filter = s->filter;
> @@ -634,27 +845,6 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
> s->params.filter_param_a = s->params.filter_param_a_uv = s->param_a;
> s->params.filter_param_b = s->params.filter_param_b_uv = s->param_b;
>
> - format_init(&s->src_format, in, desc, s->colorspace_in,
> - s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
> - format_init(&s->dst_format, out, odesc, s->colorspace,
> - s->primaries, s->trc, s->range, s->chromal);
> -
> - update_output_color_information(s, out);
> -
> - ret = graph_build(&s->graph, &s->params, &s->src_format, &s->dst_format,
> - &s->tmp, &s->tmp_size);
> - if (ret < 0)
> - goto fail;
> -
> - s->in_colorspace = in->colorspace;
> - s->in_trc = in->color_trc;
> - s->in_primaries = in->color_primaries;
> - s->in_range = in->color_range;
> - s->out_colorspace = out->colorspace;
> - s->out_trc = out->color_trc;
> - s->out_primaries = out->color_primaries;
> - s->out_range = out->color_range;
> -
> if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
> zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
> @@ -670,76 +860,48 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
> s->alpha_src_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
> s->alpha_src_format.color_family = ZIMG_COLOR_GREY;
>
> - s->alpha_dst_format.width = out->width;
> - s->alpha_dst_format.height = out->height;
> s->alpha_dst_format.depth = odesc->comp[0].depth;
> s->alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
> s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
> -
> - zimg_filter_graph_free(s->alpha_graph);
> - s->alpha_graph = zimg_filter_graph_build(&s->alpha_src_format, &s->alpha_dst_format, &s->alpha_params);
> - if (!s->alpha_graph) {
> - ret = print_zimg_error(link->dst);
> - goto fail;
> - }
> }
> - }
>
> - update_output_color_information(s, out);
> + td.in = in;
> + td.out = out;
> + td.desc = desc;
> + td.odesc = odesc;
>
> - av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
> - (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
> - (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
> - INT_MAX);
> -
> - for (plane = 0; plane < 3; plane++) {
> - int p = desc->comp[plane].plane;
> - src_buf.plane[plane].data = in->data[p];
> - src_buf.plane[plane].stride = in->linesize[p];
> - src_buf.plane[plane].mask = -1;
> -
> - p = odesc->comp[plane].plane;
> - dst_buf.plane[plane].data = out->data[p];
> - dst_buf.plane[plane].stride = out->linesize[p];
> - dst_buf.plane[plane].mask = -1;
> - }
> -
> - ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
> - if (ret) {
> - ret = print_zimg_error(link->dst);
> - goto fail;
> - }
> + ff_filter_execute(ctx, filter_slice, &td, NULL, s->nb_threads);
>
> - if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> - src_buf.plane[0].data = in->data[3];
> - src_buf.plane[0].stride = in->linesize[3];
> - src_buf.plane[0].mask = -1;
> -
> - dst_buf.plane[0].data = out->data[3];
> - dst_buf.plane[0].stride = out->linesize[3];
> - dst_buf.plane[0].mask = -1;
> -
> - ret = zimg_filter_graph_process(s->alpha_graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
> - if (ret) {
> - ret = print_zimg_error(link->dst);
> - goto fail;
> + s->src_format_tmp = s->src_format;
> + s->dst_format_tmp = s->dst_format;
> + s->params_tmp = s->params;
> + if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> + s->alpha_src_format_tmp = s->alpha_src_format;
> + s->alpha_dst_format_tmp = s->alpha_dst_format;
> + s->alpha_params_tmp = s->alpha_params;
> }
> - } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> - int x, y;
> -
> - if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
> - for (y = 0; y < out->height; y++) {
> - for (x = 0; x < out->width; x++) {
> - AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
> - av_float2int(1.0f));
> +
> + if ((!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) && (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) ){
> + int x, y;
> + if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
> + for (y = 0; y < out->height; y++) {
> + for (x = 0; x < out->width; x++) {
> + AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
> + av_float2int(1.0f));
> + }
> }
> + } else {
> + for (y = 0; y < outlink->h; y++)
> + memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
> }
> - } else {
> - for (y = 0; y < outlink->h; y++)
> - memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
> }
> }
> -
> + else {
> + /*no need for any filtering */
> + ret = av_frame_copy(out, in);
> + if (ret < 0)
> + return ret;
> + }
> fail:
> av_frame_free(&in);
> if (ret) {
> @@ -753,11 +915,12 @@ fail:
> static av_cold void uninit(AVFilterContext *ctx)
> {
> ZScaleContext *s = ctx->priv;
> -
> - zimg_filter_graph_free(s->graph);
> - zimg_filter_graph_free(s->alpha_graph);
> - av_freep(&s->tmp);
> - s->tmp_size = 0;
> + int i;
> + for (i = 0; i < s->nb_threads; i++) {
> + if (s->tmp[i]) av_freep(&s->tmp[i]);
This check for !NULL is not needed.
> + if (s->graph[i]) zimg_filter_graph_free(s->graph[i]);
> + if (s->alpha_graph[i]) zimg_filter_graph_free(s->alpha_graph[i]);
> + }
> }
>
> static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
> @@ -941,4 +1104,5 @@ const AVFilter ff_vf_zscale = {
> FILTER_OUTPUTS(avfilter_vf_zscale_outputs),
> FILTER_QUERY_FUNC(query_formats),
> .process_command = process_command,
> + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
timeline support does not work if w/h changes. so just remove this flag from here.
> };
> --
> 2.31.1.windows.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
More information about the ffmpeg-devel
mailing list