[FFmpeg-devel] [PATCH] Parallelize vf_lut

Michael Niedermayer michael at niedermayer.cc
Wed Feb 27 17:28:05 EET 2019


On Mon, Feb 25, 2019 at 03:25:30PM -0500, Britt Cyr wrote:
> ---
>  libavfilter/vf_lut.c | 106 ++++++++++++++++++++++++++++---------------
>  1 file changed, 70 insertions(+), 36 deletions(-)
> 
> diff --git a/libavfilter/vf_lut.c b/libavfilter/vf_lut.c
> index c815ddc194..14386938be 100644
> --- a/libavfilter/vf_lut.c
> +++ b/libavfilter/vf_lut.c
> @@ -72,6 +72,12 @@ typedef struct LutContext {
>      int negate_alpha; /* only used by negate */
>  } LutContext;
>  
> +typedef struct ThreadData {
> +  AVFrame *in;
> +  AVFrame *out;
> +  AVFilterLink *link;
> +} ThreadData;

indention depth is inconsistant


[...]
> @@ -366,11 +359,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>          const int in_linesize  =  in->linesize[0] / 2;
>          const int out_linesize = out->linesize[0] / 2;
>          const int step = s->step;
> +        const int row_min = jobnr / nb_jobs * h;
> +        const int row_max = (jobnr + 1) / nb_jobs * h;
>  
>          inrow0  = (uint16_t*) in ->data[0];
>          outrow0 = (uint16_t*) out->data[0];
>  
> -        for (i = 0; i < h; i ++) {
> +        for (i = row_min; i < row_max; i ++) {
>              inrow  = inrow0;
>              outrow = outrow0;
>              for (j = 0; j < w; j++) {
> @@ -403,11 +398,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>          const int in_linesize  =  in->linesize[0];
>          const int out_linesize = out->linesize[0];
>          const int step = s->step;
> +        const int row_min = jobnr / nb_jobs * h;
> +        const int row_max = (jobnr + 1) / nb_jobs * h;
>  
>          inrow0  = in ->data[0];
>          outrow0 = out->data[0];
>  
> -        for (i = 0; i < h; i ++) {
> +        for (i = row_min; i < row_max; i ++) {
>              inrow  = inrow0;
>              outrow = outrow0;
>              for (j = 0; j < w; j++) {
> @@ -435,11 +432,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>              const uint16_t *tab = s->lut[plane];
>              const int in_linesize  =  in->linesize[plane] / 2;
>              const int out_linesize = out->linesize[plane] / 2;
> +            const int row_min = jobnr / nb_jobs * h;
> +            const int row_max = (jobnr + 1) / nb_jobs * h;
>  
>              inrow  = (uint16_t *)in ->data[plane];
>              outrow = (uint16_t *)out->data[plane];
>  
> -            for (i = 0; i < h; i++) {
> +            for (i = row_min; i < row_max; i++) {
>                  for (j = 0; j < w; j++) {
>  #if HAVE_BIGENDIAN
>                      outrow[j] = av_bswap16(tab[av_bswap16(inrow[j])]);
> @@ -463,11 +462,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>              const uint16_t *tab = s->lut[plane];
>              const int in_linesize  =  in->linesize[plane];
>              const int out_linesize = out->linesize[plane];
> +            const int row_min = jobnr / nb_jobs * h;
> +            const int row_max = (jobnr + 1) / nb_jobs * h;
>  
>              inrow  = in ->data[plane];
>              outrow = out->data[plane];
>  
> -            for (i = 0; i < h; i++) {
> +            for (i = row_min; i < row_max; i++) {
>                  for (j = 0; j < w; j++)
>                      outrow[j] = tab[inrow[j]];
>                  inrow  += in_linesize;

unreaĺated to your patch, i just spoted this as it makes it obvious
replicating this code 4 times is a bit ugly


> @@ -476,9 +477,42 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>          }
>      }
>  
> -    if (!direct)
> +    return 0;
> +}
> +
> +static AVFrame *apply_lut(AVFilterLink *inlink, AVFrame *in) {
> +    AVFilterContext *ctx = inlink->dst;
> +    AVFilterLink *outlink = ctx->outputs[0];
> +    AVFrame *out;
> +    ThreadData td;
> +
> +    if (av_frame_is_writable(in)) {
> +        out = in;
> +    } else {
> +        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> +        if (!out) {
> +            av_frame_free(&in);
> +            return NULL;
> +        }
> +        av_frame_copy_props(out, in);
> +    }
> +    td.in  = in;
> +    td.out = out;
> +    td.link = inlink;

> +    ctx->internal->execute(ctx, lookup_slice, &td, NULL, FFMIN(outlink->h, 1));

how many tasks does this run in parallel and how much faster is it ?

thanks

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Its not that you shouldnt use gotos but rather that you should write
readable code and code with gotos often but not always is less readable
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20190227/325cbee5/attachment.sig>


More information about the ffmpeg-devel mailing list