[FFmpeg-devel] [PATCH] added slicing yuv, rgb
Clément Bœsch
u at pkh.me
Thu Dec 18 01:09:19 CET 2014
> Subject: Re: [FFmpeg-devel] [PATCH] added slicing yuv, rgb
You probably want to say what part it affects, as well as mentioning
threading. So probably something like "avfilter/lut: support rgb and yuv
slice threading"
You might also want to mention in the commit description what performance
boost you observe.
Also, from what we discussed already, you can get a huge performance boost
by tweaking a bit the current C code before the addition of the threading
(at least for the rgb path). Please do that before threading.
On Tue, Dec 16, 2014 at 10:44:27PM -0800, Yayoi wrote:
> ---
> libavfilter/allfilters.c | 2 +-
> libavfilter/vf_lut.c | 119 ++++++++++++++++++++++++++++++++++-------------
> 2 files changed, 88 insertions(+), 33 deletions(-)
>
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index adb86be..76341bd 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -218,7 +218,7 @@ void avfilter_register_all(void)
> REGISTER_FILTER(YADIF, yadif, vf);
> REGISTER_FILTER(ZMQ, zmq, vf);
> REGISTER_FILTER(ZOOMPAN, zoompan, vf);
> -
> +
This change is unrelated and should not be part of this commit (you are
adding trailing whitespaces). You might want to have a look at
http://ffmpeg.org/developer.html#Editor-configuration
> REGISTER_FILTER(CELLAUTO, cellauto, vsrc);
> REGISTER_FILTER(COLOR, color, vsrc);
> REGISTER_FILTER(FREI0R, frei0r_src, vsrc);
> diff --git a/libavfilter/vf_lut.c b/libavfilter/vf_lut.c
> index 0b7a2ca..3ab7f40 100644
> --- a/libavfilter/vf_lut.c
> +++ b/libavfilter/vf_lut.c
> @@ -69,6 +69,13 @@ typedef struct LutContext {
> int negate_alpha; /* only used by negate */
> } LutContext;
>
> +typedef struct ThreadData {
> + AVFrame *in, *out;
> + int plane;
> + int h,w;
Style nit: missing space
> + AVFilterLink *inlink;
It doesn't look like you are using this one
> +} ThreadData;
> +
> #define Y 0
> #define U 1
> #define V 2
> @@ -276,14 +283,76 @@ static int config_props(AVFilterLink *inlink)
> return 0;
> }
>
> +static int process_slice_rgb(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
> +{
> + int x, y;
> + LutContext *s = ctx->priv;
> + const ThreadData *td = arg;
> + const AVFrame *in = td->in;
> + const AVFrame *out = td->out;
> + const int step = s->step;
> + const uint8_t (*tab)[256] = (const uint8_t (*)[256])s->lut;
> +
> +
> + const int slice_start = (in->height * jobnr ) / nb_jobs;
> + const int slice_end = (in->height * (jobnr+1)) / nb_jobs;
> + uint8_t *outrow = out->data[0] + slice_start * out->linesize[0];
> + const uint8_t *inrow = in->data[0] + slice_start * in->linesize[0];
style: weird vertical alignment (same in yuv variant)
> + int w = td->w;
> + for (y = slice_start; y < slice_end; y++) {
Here and a few other places below you have trailing whitespaces again (I
won't mention them again)
> +
> + for (x = 0; x < w; x++) {
> + switch (step) {
> + case 4: outrow[3] = tab[3][inrow[3]]; // Fall-through
> + case 3: outrow[2] = tab[2][inrow[2]]; // Fall-through
> + case 2: outrow[1] = tab[1][inrow[1]]; // Fall-through
> + default: outrow[0] = tab[0][inrow[0]];
> + }
> + outrow += step;
> + inrow += step;
> + }
> + }
> + return 0;
> +
> +}
> +
> +
> +
nit style: drop the 2 extra lines
> +static int process_slice_yuv(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
> +{
> + int x, y;
> + LutContext *s = ctx->priv;
> + const ThreadData *td = arg;
> + int plane = td->plane;
> + int h = td->h;
> + int w = td->w;
> +
nit: you might want to mark them as const
> + const AVFrame *in = td->in;
> + const AVFrame *out = td->out;
> + int slice_start = (h * jobnr ) / nb_jobs;
> + int slice_end = (h * (jobnr+1)) / nb_jobs;
> + uint8_t *outrow = out->data[plane] + slice_start * out->linesize[plane];
> + const uint8_t *inrow = in->data[plane] + slice_start * in->linesize[plane];
> +
> + for (y = slice_start; y < slice_end; y++) {
> + const uint8_t *tab = s->lut[plane];
> + for (x = 0; x < w; x++)
> + outrow[x] = tab[inrow[x]];
> + inrow += in ->linesize[plane];
> + outrow += out->linesize[plane];
> + }
> + return 0;
> +
> +}
> +
> static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> {
> AVFilterContext *ctx = inlink->dst;
> LutContext *s = ctx->priv;
> AVFilterLink *outlink = ctx->outputs[0];
> AVFrame *out;
> - uint8_t *inrow, *outrow, *inrow0, *outrow0;
> - int i, j, plane, direct = 0;
> + ThreadData td;
You might want to zero init this with ThreadData td = {0};
Because typically, in the RGB case, the plane field will stay
uninitialized and the execute function might want to copy the ThreadData
or something like that (probably not the case but just in case...)
> + int plane, direct = 0;
>
> if (av_frame_is_writable(in)) {
> direct = 1;
> @@ -297,47 +366,33 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> av_frame_copy_props(out, in);
> }
>
> + td.in = in;
> + td.out = out;
> + td.inlink = inlink;
> +
> +
> if (s->is_rgb) {
> /* packed */
> - inrow0 = in ->data[0];
> - outrow0 = out->data[0];
> -
> - for (i = 0; i < in->height; i ++) {
> - int w = inlink->w;
> - const uint8_t (*tab)[256] = (const uint8_t (*)[256])s->lut;
> - inrow = inrow0;
> - outrow = outrow0;
> - for (j = 0; j < w; j++) {
> - switch (s->step) {
> - case 4: outrow[3] = tab[3][inrow[3]]; // Fall-through
> - case 3: outrow[2] = tab[2][inrow[2]]; // Fall-through
> - case 2: outrow[1] = tab[1][inrow[1]]; // Fall-through
> - default: outrow[0] = tab[0][inrow[0]];
> - }
> - outrow += s->step;
> - inrow += s->step;
> - }
> - inrow0 += in ->linesize[0];
> - outrow0 += out->linesize[0];
> - }
> +
> + int w = inlink->w;
Is there a reason for this intermediate step?
Also, you can't declare a new variable here since it's not the beginning
of a scope (it will choke on some compilers)
> + td.w = w;
> + ctx->internal->execute(ctx, process_slice_rgb, &td, NULL, FFMIN(outlink->h, ctx->graph->nb_threads));
> +
> } else {
> /* planar */
> +
> for (plane = 0; plane < 4 && in->data[plane] && in->linesize[plane]; plane++) {
> int vsub = plane == 1 || plane == 2 ? s->vsub : 0;
> int hsub = plane == 1 || plane == 2 ? s->hsub : 0;
> int h = FF_CEIL_RSHIFT(inlink->h, vsub);
> int w = FF_CEIL_RSHIFT(inlink->w, hsub);
>
> - inrow = in ->data[plane];
> - outrow = out->data[plane];
> + td.plane = plane;
> + td.h = h;
> + td.w = w;
> +
> + ctx->internal->execute(ctx, process_slice_yuv, &td, NULL, FFMIN(h, ctx->graph->nb_threads));
>
> - for (i = 0; i < h; i++) {
> - const uint8_t *tab = s->lut[plane];
> - for (j = 0; j < w; j++)
> - outrow[j] = tab[inrow[j]];
> - inrow += in ->linesize[plane];
> - outrow += out->linesize[plane];
> - }
> }
> }
Also missing a .flags update.
Looking forward next iteration, thank you.
--
Clément B.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 473 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20141218/909e690d/attachment.asc>
More information about the ffmpeg-devel
mailing list