[FFmpeg-devel] [PATCH] vf_tonemap.c: Support for slice thread for performance

Sun May 5 11:02:43 EEST 2019

On 5/5/19, lance.lmwang at gmail.com <lance.lmwang at gmail.com> wrote:
> From: Limin Wang <lance.lmwang at gmail.com>
>
> ---
>  libavfilter/vf_tonemap.c | 109 +++++++++++++++++++++++++++++----------
>  1 file changed, 81 insertions(+), 28 deletions(-)
>
> diff --git a/libavfilter/vf_tonemap.c b/libavfilter/vf_tonemap.c
> index efd4af5466..0b26dd5e7f 100644
> --- a/libavfilter/vf_tonemap.c
> +++ b/libavfilter/vf_tonemap.c
> @@ -1,5 +1,6 @@
>  /*
>   * Copyright (c) 2017 Vittorio Giovara <vittorio.giovara at gmail.com>
> + * Copyright (c) 2019 Limin Wang <lance.lmwang at gmail.com>
>   *
>   * This file is part of FFmpeg.
>   *
> @@ -71,6 +72,13 @@ typedef struct TonemapContext {
>      const struct LumaCoefficients *coeffs;
>  } TonemapContext;
>
> +typedef struct ThreadData {
> +    AVFrame *in, *out;
> +    double peak;
> +    const struct AVPixFmtDescriptor *desc;
> +    const struct AVPixFmtDescriptor *odesc;
> +} ThreadData;
> +
>  static const enum AVPixelFormat pix_fmts[] = {
>      AV_PIX_FMT_GBRPF32,
>      AV_PIX_FMT_GBRAPF32,
> @@ -127,15 +135,10 @@ static float mobius(float in, float j, double peak)
>  }
>
>  #define MIX(x,y,a) (x) * (1 - (a)) + (y) * (a)
> -static void tonemap(TonemapContext *s, AVFrame *out, const AVFrame *in,
> -                    const AVPixFmtDescriptor *desc, int x, int y, double
> peak)
> +static void tonemap(TonemapContext *s, float *r_out, float *b_out, float
> *g_out,
> +                    const float *r_in, const float *b_in, const float
> *g_in,
> +                    const AVPixFmtDescriptor *desc, double peak)
>  {
> -    const float *r_in = (const float *)(in->data[0] + x *
> desc->comp[0].step + y * in->linesize[0]);
> -    const float *b_in = (const float *)(in->data[1] + x *
> desc->comp[1].step + y * in->linesize[1]);
> -    const float *g_in = (const float *)(in->data[2] + x *
> desc->comp[2].step + y * in->linesize[2]);
> -    float *r_out = (float *)(out->data[0] + x * desc->comp[0].step + y *
> out->linesize[0]);
> -    float *b_out = (float *)(out->data[1] + x * desc->comp[1].step + y *
> out->linesize[1]);
> -    float *g_out = (float *)(out->data[2] + x * desc->comp[2].step + y *
> out->linesize[2]);
>      float sig, sig_orig;
>
>      /* load values */
> @@ -189,17 +192,78 @@ static void tonemap(TonemapContext *s, AVFrame *out,
> const AVFrame *in,
>      *r_out *= sig / sig_orig;
>      *g_out *= sig / sig_orig;
>      *b_out *= sig / sig_orig;
> +
> +}
> +
> +static int do_tonemap_slice(AVFilterContext *ctx, void *arg, int jobnr, int
> nb_jobs)
> +{
> +    TonemapContext *s = ctx->priv;
> +    const ThreadData *td = arg;
> +    const AVFrame *in = td->in;
> +    AVFrame *out = td->out;
> +    int x, y;
> +    const AVPixFmtDescriptor *desc = td->desc;
> +    const AVPixFmtDescriptor *odesc = td->odesc;
> +    const int slice_start = (out->height *  jobnr   ) / nb_jobs;
> +    const int slice_end   = (out->height * (jobnr+1)) / nb_jobs;
> +    const int slice_h   = slice_end - slice_start;
> +    uint8_t *dstr = out->data[0] + slice_start * out->linesize[0];
> +    uint8_t *dstb = out->data[1] + slice_start * out->linesize[1];
> +    uint8_t *dstg = out->data[2] + slice_start * out->linesize[2];
> +    const uint8_t *srcr = in->data[0] + slice_start * in->linesize[0];
> +    const uint8_t *srcb = in->data[1] + slice_start * in->linesize[1];
> +    const uint8_t *srcg = in->data[2] + slice_start * in->linesize[2];
> +    uint8_t *dsta = out->data[3] + slice_start * out->linesize[3];
> +    const uint8_t *srca = in ->data[3] + slice_start * in->linesize[3];
> +
> +    /* do the tone map */
> +    for (y = slice_start; y < slice_end; y++) {
> +        for (x = 0; x < out->width; x++) {
> +            const float *r_in = (const float *)(srcr + x *
> desc->comp[0].step);
> +            const float *b_in = (const float *)(srcb + x *
> desc->comp[1].step);
> +            const float *g_in = (const float *)(srcg + x *
> desc->comp[2].step);
> +            float *r_out = (float *)(dstr + x * desc->comp[0].step);
> +            float *b_out = (float *)(dstb + x * desc->comp[1].step);
> +            float *g_out = (float *)(dstg + x * desc->comp[2].step);
> +
> +            tonemap(s, r_out, b_out, g_out, r_in, b_in, g_in, desc,
> td->peak);
> +        }
> +        srcr += in->linesize[0];
> +        srcg += in->linesize[1];
> +        srcb += in->linesize[2];
> +        dstr += out->linesize[0];
> +        dstg += out->linesize[1];
> +        dstb += out->linesize[2];
> +    }
> +
> +    /* copy/generate alpha if needed */
> +    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags &
> AV_PIX_FMT_FLAG_ALPHA) {
> +        av_image_copy_plane(dsta, out->linesize[3],
> +                srca, in->linesize[3],
> +                out->linesize[3], slice_h);
> +    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> +        for (y = slice_start; y < slice_end; y++) {
> +            for (x = 0; x < out->width; x++) {
> +                AV_WN32(dsta + x * odesc->comp[3].step + y *
> out->linesize[3],
> +                        av_float2int(1.0f));
> +            }
> +        }
> +    }
> +
> +    return 0;
>  }
>
>  static int filter_frame(AVFilterLink *link, AVFrame *in)
>  {
> -    TonemapContext *s = link->dst->priv;
> +    AVFilterContext *ctx = link->dst;
> +    TonemapContext *s = ctx->priv;
>      AVFilterLink *outlink = link->dst->outputs[0];
>      AVFrame *out;
>      const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
>      const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
> -    int ret, x, y;
> +    int ret;
>      double peak = s->peak;
> +    ThreadData td;
>
>      if (!desc || !odesc) {
>          av_frame_free(&in);
> @@ -244,24 +308,12 @@ static int filter_frame(AVFilterLink *link, AVFrame
> *in)
>          s->desat = 0;
>      }
>
> -    /* do the tone map */
> -    for (y = 0; y < out->height; y++)
> -        for (x = 0; x < out->width; x++)
> -            tonemap(s, out, in, desc, x, y, peak);
> -
> -    /* copy/generate alpha if needed */
> -    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags &
> AV_PIX_FMT_FLAG_ALPHA) {
> -        av_image_copy_plane(out->data[3], out->linesize[3],
> -                            in->data[3], in->linesize[3],
> -                            out->linesize[3], outlink->h);
> -    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> -        for (y = 0; y < out->height; y++) {
> -            for (x = 0; x < out->width; x++) {
> -                AV_WN32(out->data[3] + x * odesc->comp[3].step + y *
> out->linesize[3],
> -                        av_float2int(1.0f));
> -            }
> -        }
> -    }
> +    td.in = in;
> +    td.out = out;
> +    td.desc = desc;
> +    td.odesc = odesc;
> +    td.peak = peak;
> +    ctx->internal->execute(ctx, do_tonemap_slice, &td, NULL,
> FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
>
>      av_frame_free(&in);
>
> @@ -315,4 +367,5 @@ AVFilter ff_vf_tonemap = {
>      .priv_class      = &tonemap_class,
>      .inputs          = tonemap_inputs,
>      .outputs         = tonemap_outputs,
> +    .flags           = AVFILTER_FLAG_SLICE_THREADS,
>  };
> --
> 2.21.0

Another patch that does same thing was already applied.