[FFmpeg-devel] [PATCH V3 3/3] libavcodec/vaapi_encode: Add async_depth to vaapi_encoder to increase performance
Xiang, Haihao
haihao.xiang at intel.com
Fri Feb 11 06:43:19 EET 2022
> Add async_depth to increase encoder's performance. Reuse encode_fifo as
> async buffer. Encoder puts all reordered frame to HW and then check
> fifo size. If fifo < async_depth and the top frame is not ready, it will
> return AVERROR(EAGAIN) to require more frames.
>
> 1080p transcoding (no B frames) with -async_depth=4 can increase 20%
> performance on my environment.
> The async increases performance but also introduces frame delay.
>
> Signed-off-by: Wenbin Chen <wenbin.chen at intel.com>
> ---
> libavcodec/vaapi_encode.c | 16 ++++++++++++----
> libavcodec/vaapi_encode.h | 12 ++++++++++--
> 2 files changed, 22 insertions(+), 6 deletions(-)
>
> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> index 15ddbbaa4a..432abf31f7 100644
> --- a/libavcodec/vaapi_encode.c
> +++ b/libavcodec/vaapi_encode.c
> @@ -1158,7 +1158,8 @@ static int vaapi_encode_send_frame(AVCodecContext
> *avctx, AVFrame *frame)
> if (ctx->input_order == ctx->decode_delay)
> ctx->dts_pts_diff = pic->pts - ctx->first_pts;
> if (ctx->output_delay > 0)
> - ctx->ts_ring[ctx->input_order % (3 * ctx->output_delay)] = pic-
> >pts;
> + ctx->ts_ring[ctx->input_order %
> + (3 * ctx->output_delay + ctx->async_depth)] = pic-
> >pts;
>
> pic->display_order = ctx->input_order;
> ++ctx->input_order;
> @@ -1214,7 +1215,7 @@ int ff_vaapi_encode_receive_packet(AVCodecContext
> *avctx, AVPacket *pkt)
>
> #if VA_CHECK_VERSION(1, 9, 0)
> if (ctx->has_sync_buffer_func) {
> - while (av_fifo_can_read(ctx->encode_fifo) <= MAX_PICTURE_REFERENCES)
> {
> + while (av_fifo_can_read(ctx->encode_fifo) <= MAX_ASYNC_DEPTH) {
> pic = NULL;
> err = vaapi_encode_pick_next(avctx, &pic);
> if (err < 0)
> @@ -1232,6 +1233,13 @@ int ff_vaapi_encode_receive_packet(AVCodecContext
> *avctx, AVPacket *pkt)
> }
> if (!av_fifo_can_read(ctx->encode_fifo))
> return err;
> + if (av_fifo_can_read(ctx->encode_fifo) < ctx->async_depth &&
> + !ctx->end_of_stream) {
> + av_fifo_peek(ctx->encode_fifo, &pic, 1, 0);
> + err = vaapi_encode_wait(avctx, pic, 0);
> + if (err < 0)
> + return err;
> + }
> av_fifo_read(ctx->encode_fifo, &pic, 1);
> ctx->encode_order = pic->encode_order + 1;
> } else
> @@ -1267,7 +1275,7 @@ int ff_vaapi_encode_receive_packet(AVCodecContext
> *avctx, AVPacket *pkt)
> pkt->dts = ctx->ts_ring[pic->encode_order] - ctx->dts_pts_diff;
> } else {
> pkt->dts = ctx->ts_ring[(pic->encode_order - ctx->decode_delay) %
> - (3 * ctx->output_delay)];
> + (3 * ctx->output_delay + ctx->async_depth)];
> }
> av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64" dts
> %"PRId64".\n",
> pkt->pts, pkt->dts);
> @@ -2588,7 +2596,7 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
> vas = vaSyncBuffer(ctx->hwctx->display, 0, 0);
> if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
> ctx->has_sync_buffer_func = 1;
> - ctx->encode_fifo = av_fifo_alloc2(MAX_PICTURE_REFERENCES + 1,
> + ctx->encode_fifo = av_fifo_alloc2(MAX_ASYNC_DEPTH,
> sizeof(VAAPIEncodePicture *),
> 0);
> if (!ctx->encode_fifo)
> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> index d33a486cb8..691521387d 100644
> --- a/libavcodec/vaapi_encode.h
> +++ b/libavcodec/vaapi_encode.h
> @@ -48,6 +48,7 @@ enum {
> MAX_TILE_ROWS = 22,
> // A.4.1: table A.6 allows at most 20 tile columns for any level.
> MAX_TILE_COLS = 20,
> + MAX_ASYNC_DEPTH = 64,
> };
>
> extern const AVCodecHWConfigInternal *const ff_vaapi_encode_hw_configs[];
> @@ -298,7 +299,8 @@ typedef struct VAAPIEncodeContext {
> // Timestamp handling.
> int64_t first_pts;
> int64_t dts_pts_diff;
> - int64_t ts_ring[MAX_REORDER_DELAY * 3];
> + int64_t ts_ring[MAX_REORDER_DELAY * 3 +
> + MAX_ASYNC_DEPTH];
>
> // Slice structure.
> int slice_block_rows;
> @@ -350,6 +352,8 @@ typedef struct VAAPIEncodeContext {
> AVFifo *encode_fifo;
> //Whether the driver support vaSyncBuffer
> int has_sync_buffer_func;
> + //Max number of frame buffered in encoder.
> + int async_depth;
> } VAAPIEncodeContext;
>
> enum {
> @@ -460,7 +464,11 @@ int ff_vaapi_encode_close(AVCodecContext *avctx);
> { "b_depth", \
> "Maximum B-frame reference depth", \
> OFFSET(common.desired_b_depth), AV_OPT_TYPE_INT, \
> - { .i64 = 1 }, 1, INT_MAX, FLAGS }
> + { .i64 = 1 }, 1, INT_MAX, FLAGS }, \
> + { "async_depth", "Maximum processing parallelism. " \
> + "Increase this to improve single channel performance", \
async_depth is not available if vaSyncBuffer is not implemented, it would be
better to add some comments in the help string.
Thanks
Haihao
> + OFFSET(common.async_depth), AV_OPT_TYPE_INT, \
> + { .i64 = 4 }, 0, MAX_ASYNC_DEPTH, FLAGS }
>
> #define VAAPI_ENCODE_RC_MODE(name, desc) \
> { #name, desc, 0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_ ## name }, \
More information about the ffmpeg-devel
mailing list