[FFmpeg-devel] [PATCH] vf_tonemap.c: Support for slice thread for performance
Paul B Mahol
onemda at gmail.com
Sun May 5 11:02:43 EEST 2019
On 5/5/19, lance.lmwang at gmail.com <lance.lmwang at gmail.com> wrote:
> From: Limin Wang <lance.lmwang at gmail.com>
>
> ---
> libavfilter/vf_tonemap.c | 109 +++++++++++++++++++++++++++++----------
> 1 file changed, 81 insertions(+), 28 deletions(-)
>
> diff --git a/libavfilter/vf_tonemap.c b/libavfilter/vf_tonemap.c
> index efd4af5466..0b26dd5e7f 100644
> --- a/libavfilter/vf_tonemap.c
> +++ b/libavfilter/vf_tonemap.c
> @@ -1,5 +1,6 @@
> /*
> * Copyright (c) 2017 Vittorio Giovara <vittorio.giovara at gmail.com>
> + * Copyright (c) 2019 Limin Wang <lance.lmwang at gmail.com>
> *
> * This file is part of FFmpeg.
> *
> @@ -71,6 +72,13 @@ typedef struct TonemapContext {
> const struct LumaCoefficients *coeffs;
> } TonemapContext;
>
> +typedef struct ThreadData {
> + AVFrame *in, *out;
> + double peak;
> + const struct AVPixFmtDescriptor *desc;
> + const struct AVPixFmtDescriptor *odesc;
> +} ThreadData;
> +
> static const enum AVPixelFormat pix_fmts[] = {
> AV_PIX_FMT_GBRPF32,
> AV_PIX_FMT_GBRAPF32,
> @@ -127,15 +135,10 @@ static float mobius(float in, float j, double peak)
> }
>
> #define MIX(x,y,a) (x) * (1 - (a)) + (y) * (a)
> -static void tonemap(TonemapContext *s, AVFrame *out, const AVFrame *in,
> - const AVPixFmtDescriptor *desc, int x, int y, double
> peak)
> +static void tonemap(TonemapContext *s, float *r_out, float *b_out, float
> *g_out,
> + const float *r_in, const float *b_in, const float
> *g_in,
> + const AVPixFmtDescriptor *desc, double peak)
> {
> - const float *r_in = (const float *)(in->data[0] + x *
> desc->comp[0].step + y * in->linesize[0]);
> - const float *b_in = (const float *)(in->data[1] + x *
> desc->comp[1].step + y * in->linesize[1]);
> - const float *g_in = (const float *)(in->data[2] + x *
> desc->comp[2].step + y * in->linesize[2]);
> - float *r_out = (float *)(out->data[0] + x * desc->comp[0].step + y *
> out->linesize[0]);
> - float *b_out = (float *)(out->data[1] + x * desc->comp[1].step + y *
> out->linesize[1]);
> - float *g_out = (float *)(out->data[2] + x * desc->comp[2].step + y *
> out->linesize[2]);
> float sig, sig_orig;
>
> /* load values */
> @@ -189,17 +192,78 @@ static void tonemap(TonemapContext *s, AVFrame *out,
> const AVFrame *in,
> *r_out *= sig / sig_orig;
> *g_out *= sig / sig_orig;
> *b_out *= sig / sig_orig;
> +
> +}
> +
> +static int do_tonemap_slice(AVFilterContext *ctx, void *arg, int jobnr, int
> nb_jobs)
> +{
> + TonemapContext *s = ctx->priv;
> + const ThreadData *td = arg;
> + const AVFrame *in = td->in;
> + AVFrame *out = td->out;
> + int x, y;
> + const AVPixFmtDescriptor *desc = td->desc;
> + const AVPixFmtDescriptor *odesc = td->odesc;
> + const int slice_start = (out->height * jobnr ) / nb_jobs;
> + const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
> + const int slice_h = slice_end - slice_start;
> + uint8_t *dstr = out->data[0] + slice_start * out->linesize[0];
> + uint8_t *dstb = out->data[1] + slice_start * out->linesize[1];
> + uint8_t *dstg = out->data[2] + slice_start * out->linesize[2];
> + const uint8_t *srcr = in->data[0] + slice_start * in->linesize[0];
> + const uint8_t *srcb = in->data[1] + slice_start * in->linesize[1];
> + const uint8_t *srcg = in->data[2] + slice_start * in->linesize[2];
> + uint8_t *dsta = out->data[3] + slice_start * out->linesize[3];
> + const uint8_t *srca = in ->data[3] + slice_start * in->linesize[3];
> +
> + /* do the tone map */
> + for (y = slice_start; y < slice_end; y++) {
> + for (x = 0; x < out->width; x++) {
> + const float *r_in = (const float *)(srcr + x *
> desc->comp[0].step);
> + const float *b_in = (const float *)(srcb + x *
> desc->comp[1].step);
> + const float *g_in = (const float *)(srcg + x *
> desc->comp[2].step);
> + float *r_out = (float *)(dstr + x * desc->comp[0].step);
> + float *b_out = (float *)(dstb + x * desc->comp[1].step);
> + float *g_out = (float *)(dstg + x * desc->comp[2].step);
> +
> + tonemap(s, r_out, b_out, g_out, r_in, b_in, g_in, desc,
> td->peak);
> + }
> + srcr += in->linesize[0];
> + srcg += in->linesize[1];
> + srcb += in->linesize[2];
> + dstr += out->linesize[0];
> + dstg += out->linesize[1];
> + dstb += out->linesize[2];
> + }
> +
> + /* copy/generate alpha if needed */
> + if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags &
> AV_PIX_FMT_FLAG_ALPHA) {
> + av_image_copy_plane(dsta, out->linesize[3],
> + srca, in->linesize[3],
> + out->linesize[3], slice_h);
> + } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> + for (y = slice_start; y < slice_end; y++) {
> + for (x = 0; x < out->width; x++) {
> + AV_WN32(dsta + x * odesc->comp[3].step + y *
> out->linesize[3],
> + av_float2int(1.0f));
> + }
> + }
> + }
> +
> + return 0;
> }
>
> static int filter_frame(AVFilterLink *link, AVFrame *in)
> {
> - TonemapContext *s = link->dst->priv;
> + AVFilterContext *ctx = link->dst;
> + TonemapContext *s = ctx->priv;
> AVFilterLink *outlink = link->dst->outputs[0];
> AVFrame *out;
> const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
> const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
> - int ret, x, y;
> + int ret;
> double peak = s->peak;
> + ThreadData td;
>
> if (!desc || !odesc) {
> av_frame_free(&in);
> @@ -244,24 +308,12 @@ static int filter_frame(AVFilterLink *link, AVFrame
> *in)
> s->desat = 0;
> }
>
> - /* do the tone map */
> - for (y = 0; y < out->height; y++)
> - for (x = 0; x < out->width; x++)
> - tonemap(s, out, in, desc, x, y, peak);
> -
> - /* copy/generate alpha if needed */
> - if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags &
> AV_PIX_FMT_FLAG_ALPHA) {
> - av_image_copy_plane(out->data[3], out->linesize[3],
> - in->data[3], in->linesize[3],
> - out->linesize[3], outlink->h);
> - } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> - for (y = 0; y < out->height; y++) {
> - for (x = 0; x < out->width; x++) {
> - AV_WN32(out->data[3] + x * odesc->comp[3].step + y *
> out->linesize[3],
> - av_float2int(1.0f));
> - }
> - }
> - }
> + td.in = in;
> + td.out = out;
> + td.desc = desc;
> + td.odesc = odesc;
> + td.peak = peak;
> + ctx->internal->execute(ctx, do_tonemap_slice, &td, NULL,
> FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
>
> av_frame_free(&in);
>
> @@ -315,4 +367,5 @@ AVFilter ff_vf_tonemap = {
> .priv_class = &tonemap_class,
> .inputs = tonemap_inputs,
> .outputs = tonemap_outputs,
> + .flags = AVFILTER_FLAG_SLICE_THREADS,
> };
> --
> 2.21.0
Another patch that does same thing was already applied.
More information about the ffmpeg-devel
mailing list