[FFmpeg-devel] [PATCH v3] lavfi/qsvvpp: support async depth
Fei Wang
fei.w.wang at intel.com
Wed Mar 31 05:07:44 EEST 2021
Async depth will allow qsv filter cache few frames, and avoid force
switch and end filter task frame by frame. This change will improve
performance for some multi-task case, for example 1:N transcode(
decode + vpp + encode) with all QSV plugins.
Performance data test on my Coffee Lake Desktop(i7-8700K) by using
the following 1:8 transcode test case improvement:
1. Fps improved from 55 to 130.
2. Render/Video usage improved from ~61%/~38% to ~100%/~70%.(Data get
from intel_gpu_top)
test CMD:
ffmpeg -v verbose -init_hw_device qsv=hw:/dev/dri/renderD128 -filter_hw_device \
hw -hwaccel qsv -hwaccel_output_format qsv -c:v h264_qsv -i 1920x1080.264 \
-vf 'vpp_qsv=w=1280:h=720:async_depth=4' -c:v h264_qsv -r:v 30 -preset 7 -g 33 -refs 2 -bf 3 -q 24 -f null - \
-vf 'vpp_qsv=w=1280:h=720:async_depth=4' -c:v h264_qsv -r:v 30 -preset 7 -g 33 -refs 2 -bf 3 -q 24 -f null - \
-vf 'vpp_qsv=w=1280:h=720:async_depth=4' -c:v h264_qsv -r:v 30 -preset 7 -g 33 -refs 2 -bf 3 -q 24 -f null - \
-vf 'vpp_qsv=w=1280:h=720:async_depth=4' -c:v h264_qsv -r:v 30 -preset 7 -g 33 -refs 2 -bf 3 -q 24 -f null - \
-vf 'vpp_qsv=w=1280:h=720:async_depth=4' -c:v h264_qsv -r:v 30 -preset 7 -g 33 -refs 2 -bf 3 -q 24 -f null - \
-vf 'vpp_qsv=w=1280:h=720:async_depth=4' -c:v h264_qsv -r:v 30 -preset 7 -g 33 -refs 2 -bf 3 -q 24 -f null - \
-vf 'vpp_qsv=w=1280:h=720:async_depth=4' -c:v h264_qsv -r:v 30 -preset 7 -g 33 -refs 2 -bf 3 -q 24 -f null -
Signed-off-by: Fei Wang <fei.w.wang at intel.com>
---
Change:
1. Add test data in commit message.
2. Rmove some duplicate code.
libavfilter/qsvvpp.c | 153 ++++++++++++++++++-------------
libavfilter/qsvvpp.h | 39 +++++++-
libavfilter/vf_deinterlace_qsv.c | 14 +--
libavfilter/vf_vpp_qsv.c | 75 ++++++++++++---
4 files changed, 191 insertions(+), 90 deletions(-)
diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c
index f216b3f248..4768f6208b 100644
--- a/libavfilter/qsvvpp.c
+++ b/libavfilter/qsvvpp.c
@@ -37,37 +37,6 @@
#define IS_OPAQUE_MEMORY(mode) (mode & MFX_MEMTYPE_OPAQUE_FRAME)
#define IS_SYSTEM_MEMORY(mode) (mode & MFX_MEMTYPE_SYSTEM_MEMORY)
-typedef struct QSVFrame {
- AVFrame *frame;
- mfxFrameSurface1 *surface;
- mfxFrameSurface1 surface_internal; /* for system memory */
- struct QSVFrame *next;
-} QSVFrame;
-
-/* abstract struct for all QSV filters */
-struct QSVVPPContext {
- mfxSession session;
- int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/* callback */
- enum AVPixelFormat out_sw_format; /* Real output format */
- mfxVideoParam vpp_param;
- mfxFrameInfo *frame_infos; /* frame info for each input */
-
- /* members related to the input/output surface */
- int in_mem_mode;
- int out_mem_mode;
- QSVFrame *in_frame_list;
- QSVFrame *out_frame_list;
- int nb_surface_ptrs_in;
- int nb_surface_ptrs_out;
- mfxFrameSurface1 **surface_ptrs_in;
- mfxFrameSurface1 **surface_ptrs_out;
-
- /* MFXVPP extern parameters */
- mfxExtOpaqueSurfaceAlloc opaque_alloc;
- mfxExtBuffer **ext_buffers;
- int nb_ext_buffers;
-};
-
static const mfxHandleType handle_types[] = {
MFX_HANDLE_VA_DISPLAY,
MFX_HANDLE_D3D9_DEVICE_MANAGER,
@@ -336,9 +305,11 @@ static int fill_frameinfo_by_link(mfxFrameInfo *frameinfo, AVFilterLink *link)
static void clear_unused_frames(QSVFrame *list)
{
while (list) {
- if (list->surface && !list->surface->Data.Locked) {
- list->surface = NULL;
+ /* list->queued==1 means the frame is not cached in VPP
+ * process any more, it can be released to pool. */
+ if ((list->queued == 1) && !list->surface.Data.Locked) {
av_frame_free(&list->frame);
+ list->queued = 0;
}
list = list->next;
}
@@ -361,8 +332,10 @@ static QSVFrame *get_free_frame(QSVFrame **list)
QSVFrame *out = *list;
for (; out; out = out->next) {
- if (!out->surface)
+ if (!out->queued) {
+ out->queued = 1;
break;
+ }
}
if (!out) {
@@ -371,8 +344,9 @@ static QSVFrame *get_free_frame(QSVFrame **list)
av_log(NULL, AV_LOG_ERROR, "Can't alloc new output frame.\n");
return NULL;
}
- out->next = *list;
- *list = out;
+ out->queued = 1;
+ out->next = *list;
+ *list = out;
}
return out;
@@ -402,7 +376,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *p
return NULL;
}
qsv_frame->frame = av_frame_clone(picref);
- qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame->data[3];
+ qsv_frame->surface = *(mfxFrameSurface1 *)qsv_frame->frame->data[3];
} else {
/* make a copy if the input is not padded as libmfx requires */
if (picref->height & 31 || picref->linesize[0] & 31) {
@@ -425,27 +399,26 @@ static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *p
qsv_frame->frame = av_frame_clone(picref);
if (map_frame_to_surface(qsv_frame->frame,
- &qsv_frame->surface_internal) < 0) {
+ &qsv_frame->surface) < 0) {
av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n");
return NULL;
}
- qsv_frame->surface = &qsv_frame->surface_internal;
}
- qsv_frame->surface->Info = s->frame_infos[FF_INLINK_IDX(inlink)];
- qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts,
+ qsv_frame->surface.Info = s->frame_infos[FF_INLINK_IDX(inlink)];
+ qsv_frame->surface.Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts,
inlink->time_base, default_tb);
- qsv_frame->surface->Info.PicStruct =
+ qsv_frame->surface.Info.PicStruct =
!qsv_frame->frame->interlaced_frame ? MFX_PICSTRUCT_PROGRESSIVE :
(qsv_frame->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF :
MFX_PICSTRUCT_FIELD_BFF);
if (qsv_frame->frame->repeat_pict == 1)
- qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED;
+ qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED;
else if (qsv_frame->frame->repeat_pict == 2)
- qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING;
+ qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING;
else if (qsv_frame->frame->repeat_pict == 4)
- qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING;
+ qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING;
return qsv_frame;
}
@@ -476,7 +449,7 @@ static QSVFrame *query_frame(QSVVPPContext *s, AVFilterLink *outlink)
return NULL;
}
- out_frame->surface = (mfxFrameSurface1 *)out_frame->frame->data[3];
+ out_frame->surface = *(mfxFrameSurface1 *)out_frame->frame->data[3];
} else {
/* Get a frame with aligned dimensions.
* Libmfx need system memory being 128x64 aligned */
@@ -490,14 +463,12 @@ static QSVFrame *query_frame(QSVVPPContext *s, AVFilterLink *outlink)
out_frame->frame->height = outlink->h;
ret = map_frame_to_surface(out_frame->frame,
- &out_frame->surface_internal);
+ &out_frame->surface);
if (ret < 0)
return NULL;
-
- out_frame->surface = &out_frame->surface_internal;
}
- out_frame->surface->Info = s->vpp_param.vpp.Out;
+ out_frame->surface.Info = s->vpp_param.vpp.Out;
return out_frame;
}
@@ -666,6 +637,16 @@ static int init_vpp_session(AVFilterContext *avctx, QSVVPPContext *s)
return 0;
}
+static unsigned int qsv_fifo_item_size(void)
+{
+ return sizeof(mfxSyncPoint) + sizeof(QSVFrame*);
+}
+
+static unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
+{
+ return av_fifo_size(fifo)/qsv_fifo_item_size();
+}
+
int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *param)
{
int i;
@@ -738,7 +719,17 @@ int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *p
s->vpp_param.ExtParam = param->ext_buf;
}
- s->vpp_param.AsyncDepth = 1;
+ s->got_frame = 0;
+
+ /** keep fifo size at least 1. Even when async_depth is 0, fifo is used. */
+ s->async_fifo = av_fifo_alloc((param->async_depth + 1) * qsv_fifo_item_size());
+ s->async_depth = param->async_depth;
+ if (!s->async_fifo) {
+ ret = AVERROR(ENOMEM);
+ goto failed;
+ }
+
+ s->vpp_param.AsyncDepth = param->async_depth;
if (IS_SYSTEM_MEMORY(s->in_mem_mode))
s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY;
@@ -793,6 +784,7 @@ int ff_qsvvpp_free(QSVVPPContext **vpp)
av_freep(&s->surface_ptrs_out);
av_freep(&s->ext_buffers);
av_freep(&s->frame_infos);
+ av_fifo_free(s->async_fifo);
av_freep(vpp);
return 0;
@@ -803,9 +795,29 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picr
AVFilterContext *ctx = inlink->dst;
AVFilterLink *outlink = ctx->outputs[0];
mfxSyncPoint sync;
- QSVFrame *in_frame, *out_frame;
+ QSVFrame *in_frame, *out_frame, *tmp;
int ret, filter_ret;
+ while (s->eof && qsv_fifo_size(s->async_fifo)) {
+ av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL);
+ av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL);
+ if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
+ av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
+
+ filter_ret = s->filter_frame(outlink, tmp->frame);
+ if (filter_ret < 0) {
+ av_frame_free(&tmp->frame);
+ ret = filter_ret;
+ break;
+ }
+ tmp->queued--;
+ s->got_frame = 1;
+ tmp->frame = NULL;
+ };
+
+ if (!picref)
+ return 0;
+
in_frame = submit_frame(s, inlink, picref);
if (!in_frame) {
av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on input[%d]\n",
@@ -821,8 +833,8 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picr
}
do {
- ret = MFXVideoVPP_RunFrameVPPAsync(s->session, in_frame->surface,
- out_frame->surface, NULL, &sync);
+ ret = MFXVideoVPP_RunFrameVPPAsync(s->session, &in_frame->surface,
+ &out_frame->surface, NULL, &sync);
if (ret == MFX_WRN_DEVICE_BUSY)
av_usleep(500);
} while (ret == MFX_WRN_DEVICE_BUSY);
@@ -833,20 +845,33 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picr
ret = AVERROR(EAGAIN);
break;
}
+ out_frame->frame->pts = av_rescale_q(out_frame->surface.Data.TimeStamp,
+ default_tb, outlink->time_base);
- if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
- av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
+ out_frame->queued++;
+ av_fifo_generic_write(s->async_fifo, &out_frame, sizeof(out_frame), NULL);
+ av_fifo_generic_write(s->async_fifo, &sync, sizeof(sync), NULL);
- out_frame->frame->pts = av_rescale_q(out_frame->surface->Data.TimeStamp,
- default_tb, outlink->time_base);
- filter_ret = s->filter_frame(outlink, out_frame->frame);
- if (filter_ret < 0) {
- av_frame_free(&out_frame->frame);
- ret = filter_ret;
- break;
+ if (qsv_fifo_size(s->async_fifo) > s->async_depth) {
+ av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL);
+ av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL);
+
+ do {
+ ret = MFXVideoCORE_SyncOperation(s->session, sync, 1000);
+ } while (ret == MFX_WRN_IN_EXECUTION);
+
+ filter_ret = s->filter_frame(outlink, tmp->frame);
+ if (filter_ret < 0) {
+ av_frame_free(&tmp->frame);
+ ret = filter_ret;
+ break;
+ }
+
+ tmp->queued--;
+ s->got_frame = 1;
+ tmp->frame = NULL;
}
- out_frame->frame = NULL;
} while(ret == MFX_ERR_MORE_SURFACE);
return ret;
diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h
index b4baeedf9e..e0f4c8f5bb 100644
--- a/libavfilter/qsvvpp.h
+++ b/libavfilter/qsvvpp.h
@@ -27,6 +27,7 @@
#include <mfx/mfxvideo.h>
#include "avfilter.h"
+#include "libavutil/fifo.h"
#define FF_INLINK_IDX(link) ((int)((link)->dstpad - (link)->dst->input_pads))
#define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src->output_pads))
@@ -39,7 +40,41 @@
((MFX_VERSION.Major > (MAJOR)) || \
(MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= (MINOR)))
-typedef struct QSVVPPContext QSVVPPContext;
+typedef struct QSVFrame {
+ AVFrame *frame;
+ mfxFrameSurface1 surface;
+ struct QSVFrame *next;
+ int queued;
+} QSVFrame;
+
+typedef struct QSVVPPContext {
+ mfxSession session;
+ int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame); /**< callback */
+ enum AVPixelFormat out_sw_format; /**< Real output format */
+ mfxVideoParam vpp_param;
+ mfxFrameInfo *frame_infos; /**< frame info for each input */
+
+ /** members related to the input/output surface */
+ int in_mem_mode;
+ int out_mem_mode;
+ QSVFrame *in_frame_list;
+ QSVFrame *out_frame_list;
+ int nb_surface_ptrs_in;
+ int nb_surface_ptrs_out;
+ mfxFrameSurface1 **surface_ptrs_in;
+ mfxFrameSurface1 **surface_ptrs_out;
+
+ /** MFXVPP extern parameters */
+ mfxExtOpaqueSurfaceAlloc opaque_alloc;
+ mfxExtBuffer **ext_buffers;
+ int nb_ext_buffers;
+
+ int got_frame;
+ int async_depth;
+ int eof;
+ /** order with frame_out, sync */
+ AVFifoBuffer *async_fifo;
+} QSVVPPContext;
typedef struct QSVVPPCrop {
int in_idx; ///< Input index
@@ -60,6 +95,8 @@ typedef struct QSVVPPParam {
/* Crop information for each input, if needed */
int num_crop;
QSVVPPCrop *crop;
+
+ int async_depth;
} QSVVPPParam;
/* create and initialize the QSV session */
diff --git a/libavfilter/vf_deinterlace_qsv.c b/libavfilter/vf_deinterlace_qsv.c
index 89a282f99e..34feb616ab 100644
--- a/libavfilter/vf_deinterlace_qsv.c
+++ b/libavfilter/vf_deinterlace_qsv.c
@@ -47,14 +47,6 @@ enum {
QSVDEINT_MORE_INPUT,
};
-typedef struct QSVFrame {
- AVFrame *frame;
- mfxFrameSurface1 surface;
- int used;
-
- struct QSVFrame *next;
-} QSVFrame;
-
typedef struct QSVDeintContext {
const AVClass *class;
@@ -376,7 +368,7 @@ static void clear_unused_frames(QSVDeintContext *s)
while (cur) {
if (!cur->surface.Data.Locked) {
av_frame_free(&cur->frame);
- cur->used = 0;
+ cur->queued = 0;
}
cur = cur->next;
}
@@ -391,7 +383,7 @@ static int get_free_frame(QSVDeintContext *s, QSVFrame **f)
frame = s->work_frames;
last = &s->work_frames;
while (frame) {
- if (!frame->used) {
+ if (!frame->queued) {
*f = frame;
return 0;
}
@@ -453,7 +445,7 @@ static int submit_frame(AVFilterContext *ctx, AVFrame *frame,
(AVRational){1, 90000});
*surface = &qf->surface;
- qf->used = 1;
+ qf->queued = 1;
return 0;
}
diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c
index 5d57707455..d9c27ce43e 100644
--- a/libavfilter/vf_vpp_qsv.c
+++ b/libavfilter/vf_vpp_qsv.c
@@ -32,6 +32,7 @@
#include "formats.h"
#include "internal.h"
#include "avfilter.h"
+#include "filters.h"
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
@@ -93,6 +94,9 @@ typedef struct VPPContext{
char *cx, *cy, *cw, *ch;
char *ow, *oh;
char *output_format_str;
+
+ int async_depth;
+ int eof;
} VPPContext;
static const AVOption options[] = {
@@ -128,6 +132,7 @@ static const AVOption options[] = {
{ "h", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
{ "height", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
{ "format", "Output pixel format", OFFSET(output_format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
+ { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
{ NULL }
};
@@ -303,6 +308,7 @@ static int config_output(AVFilterLink *outlink)
param.filter_frame = NULL;
param.num_ext_buf = 0;
param.ext_buf = ext_buf;
+ param.async_depth = vpp->async_depth;
if (inlink->format == AV_PIX_FMT_QSV) {
if (!inlink->hw_frames_ctx || !inlink->hw_frames_ctx->data)
@@ -467,23 +473,64 @@ static int config_output(AVFilterLink *outlink)
return 0;
}
-static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
+static int activate(AVFilterContext *ctx)
{
- int ret = 0;
- AVFilterContext *ctx = inlink->dst;
- VPPContext *vpp = inlink->dst->priv;
- AVFilterLink *outlink = ctx->outputs[0];
-
- if (vpp->qsv) {
- ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref);
- av_frame_free(&picref);
+ AVFilterLink *inlink = ctx->inputs[0];
+ AVFilterLink *outlink = ctx->outputs[0];
+ VPPContext *s =ctx->priv;
+ QSVVPPContext *qsv = s->qsv;
+ AVFrame *in = NULL;
+ int ret, status;
+ int64_t pts;
+
+ FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+ if (!s->eof) {
+ ret = ff_inlink_consume_frame(inlink, &in);
+ if (ret < 0)
+ return ret;
+
+ if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
+ if (status == AVERROR_EOF) {
+ s->eof = 1;
+ }
+ }
+ }
+
+ if (qsv) {
+ if (in || s->eof) {
+ qsv->eof = s->eof;
+ ret = ff_qsvvpp_filter_frame(qsv, inlink, in);
+ av_frame_free(&in);
+
+ if (s->eof) {
+ ff_outlink_set_status(outlink, status, pts);
+ return 0;
+ }
+
+ if (qsv->got_frame) {
+ qsv->got_frame = 0;
+ return ret;
+ }
+ }
} else {
- if (picref->pts != AV_NOPTS_VALUE)
- picref->pts = av_rescale_q(picref->pts, inlink->time_base, outlink->time_base);
- ret = ff_filter_frame(outlink, picref);
+ if (in) {
+ if (in->pts != AV_NOPTS_VALUE)
+ in->pts = av_rescale_q(in->pts, inlink->time_base, outlink->time_base);
+
+ ret = ff_filter_frame(outlink, in);
+ return ret;
+ }
}
- return ret;
+ if (s->eof) {
+ ff_outlink_set_status(outlink, status, pts);
+ return 0;
+ } else {
+ FF_FILTER_FORWARD_WANTED(outlink, inlink);
+ }
+
+ return FFERROR_NOT_READY;
}
static int query_formats(AVFilterContext *ctx)
@@ -531,7 +578,6 @@ static const AVFilterPad vpp_inputs[] = {
.name = "default",
.type = AVMEDIA_TYPE_VIDEO,
.config_props = config_input,
- .filter_frame = filter_frame,
},
{ NULL }
};
@@ -554,6 +600,7 @@ AVFilter ff_vf_vpp_qsv = {
.uninit = vpp_uninit,
.inputs = vpp_inputs,
.outputs = vpp_outputs,
+ .activate = activate,
.priv_class = &vpp_class,
.flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
};
--
2.17.1
More information about the ffmpeg-devel
mailing list