[FFmpeg-devel] [PATCH] avfilter/vf_overlay: add slice threading
Paul B Mahol
onemda at gmail.com
Sun Apr 29 11:04:12 EEST 2018
On 4/29/18, Michael Niedermayer <michael at niedermayer.cc> wrote:
> On Sat, Apr 28, 2018 at 12:00:46PM +0200, Paul B Mahol wrote:
>> Signed-off-by: Paul B Mahol <onemda at gmail.com>
>> ---
>> libavfilter/vf_overlay.c | 281
>> ++++++++++++++++++++++++++++++++---------------
>> 1 file changed, 190 insertions(+), 91 deletions(-)
>>
>> diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
>> index c6a6ac82f3..cb304e9522 100644
>> --- a/libavfilter/vf_overlay.c
>> +++ b/libavfilter/vf_overlay.c
>> @@ -40,6 +40,10 @@
>> #include "framesync.h"
>> #include "video.h"
>>
>> +typedef struct ThreadData {
>> + AVFrame *dst, *src;
>> +} ThreadData;
>> +
>> static const char *const var_names[] = {
>> "main_w", "W", ///< width of the main video
>> "main_h", "H", ///< height of the main video
>> @@ -124,7 +128,7 @@ typedef struct OverlayContext {
>>
>> AVExpr *x_pexpr, *y_pexpr;
>>
>> - void (*blend_image)(AVFilterContext *ctx, AVFrame *dst, const AVFrame
>> *src, int x, int y);
>> + int (*blend_slice)(AVFilterContext *ctx, void *arg, int jobnr, int
>> nb_jobs);
>> } OverlayContext;
>>
>> static av_cold void uninit(AVFilterContext *ctx)
>> @@ -403,10 +407,10 @@ static int config_output(AVFilterLink *outlink)
>> * Blend image in src to destination buffer dst at position (x, y).
>> */
>>
>> -static av_always_inline void blend_image_packed_rgb(AVFilterContext
>> *ctx,
>> +static av_always_inline void blend_slice_packed_rgb(AVFilterContext
>> *ctx,
>> AVFrame *dst, const AVFrame *src,
>> int main_has_alpha, int x, int y,
>> - int is_straight)
>> + int is_straight, int jobnr, int
>> nb_jobs)
>> {
>> OverlayContext *s = ctx->priv;
>> int i, imax, j, jmax;
>> @@ -425,13 +429,19 @@ static av_always_inline void
>> blend_image_packed_rgb(AVFilterContext *ctx,
>> const int sb = s->overlay_rgba_map[B];
>> const int sa = s->overlay_rgba_map[A];
>> const int sstep = s->overlay_pix_step[0];
>> + int slice_start, slice_end;
>> uint8_t *S, *sp, *d, *dp;
>>
>> i = FFMAX(-y, 0);
>> - sp = src->data[0] + i * src->linesize[0];
>> - dp = dst->data[0] + (y+i) * dst->linesize[0];
>> + imax = FFMIN(-y + dst_h, src_h);
>> +
>> + slice_start = (imax * jobnr) / nb_jobs;
>> + slice_end = (imax * (jobnr+1)) / nb_jobs;
>> +
>> + sp = src->data[0] + (i + slice_start) * src->linesize[0];
>> + dp = dst->data[0] + (y + i + slice_start) * dst->linesize[0];
>>
>> - for (imax = FFMIN(-y + dst_h, src_h); i < imax; i++) {
>> + for (i = i + slice_start; i < slice_end; i++) {
>> j = FFMAX(-x, 0);
>> S = sp + j * sstep;
>> d = dp + (x+j) * dstep;
>> @@ -495,7 +505,9 @@ static av_always_inline void
>> blend_plane(AVFilterContext *ctx,
>> int dst_offset,
>> int dst_step,
>> int straight,
>> - int yuv)
>> + int yuv,
>> + int jobnr,
>> + int nb_jobs)
>> {
>> int src_wp = AV_CEIL_RSHIFT(src_w, hsub);
>> int src_hp = AV_CEIL_RSHIFT(src_h, vsub);
>> @@ -505,16 +517,22 @@ static av_always_inline void
>> blend_plane(AVFilterContext *ctx,
>> int xp = x>>hsub;
>> uint8_t *s, *sp, *d, *dp, *dap, *a, *da, *ap;
>> int jmax, j, k, kmax;
>> + int slice_start, slice_end;
>>
>> j = FFMAX(-yp, 0);
>> - sp = src->data[i] + j * src->linesize[i];
>> + jmax = FFMIN(-yp + dst_hp, src_hp);
>> +
>> + slice_start = (jmax * jobnr) / nb_jobs;
>> + slice_end = ((jmax * (jobnr+1)) / nb_jobs);
>> +
>> + sp = src->data[i] + slice_start * src->linesize[i];
>> dp = dst->data[dst_plane]
>> - + (yp+j) * dst->linesize[dst_plane]
>> + + (yp + slice_start) * dst->linesize[dst_plane]
>> + dst_offset;
>> - ap = src->data[3] + (j<<vsub) * src->linesize[3];
>> - dap = dst->data[3] + ((yp+j) << vsub) * dst->linesize[3];
>> + ap = src->data[3] + (slice_start << vsub) * src->linesize[3];
>> + dap = dst->data[3] + ((yp + slice_start) << vsub) *
>> dst->linesize[3];
>>
>> - for (jmax = FFMIN(-yp + dst_hp, src_hp); j < jmax; j++) {
>> + for (j = j + slice_start; j < slice_end; j++) {
>> k = FFMAX(-xp, 0);
>> d = dp + (xp+k) * dst_step;
>> s = sp + k;
>> @@ -577,17 +595,23 @@ static av_always_inline void
>> blend_plane(AVFilterContext *ctx,
>> static inline void alpha_composite(const AVFrame *src, const AVFrame
>> *dst,
>> int src_w, int src_h,
>> int dst_w, int dst_h,
>> - int x, int y)
>> + int x, int y,
>> + int jobnr, int nb_jobs)
>> {
>> uint8_t alpha; ///< the amount of overlay to blend on to
>> main
>> uint8_t *s, *sa, *d, *da;
>> int i, imax, j, jmax;
>> + int slice_start, slice_end;
>> +
>> + imax = FFMIN(-y + dst_h, src_h);
>> + slice_start = (imax * jobnr) / nb_jobs;
>> + slice_end = ((imax * (jobnr+1)) / nb_jobs);
>>
>> i = FFMAX(-y, 0);
>> - sa = src->data[3] + i * src->linesize[3];
>> - da = dst->data[3] + (y+i) * dst->linesize[3];
>> + sa = src->data[3] + (i + slice_start) * src->linesize[3];
>> + da = dst->data[3] + (y + i + slice_start) * dst->linesize[3];
>>
>> - for (imax = FFMIN(-y + dst_h, src_h); i < imax; i++) {
>
>> + for (i = i + slice_start; i < imax; i++) {
>
> shouldnt this use slice_end ?
Yes.
More information about the ffmpeg-devel
mailing list