[FFmpeg-devel] [PATCH 3/3] libavcodec/vaapi_encode: Add async_depth to vaapi_encoder to increase performance
Ed Martin
lists at edman007.com
Sat Dec 25 01:22:42 EET 2021
On 10/31/21 22:14, Chen, Wenbin wrote:
>> Add async_depth to increase encoder's performance. Reuse encode_fifo as
>> async buffer. Encoder puts all reordered frame to HW and then check
>> fifo size. If fifo < async_depth and the top frame is not ready, it will
>> return AVERROR(EAGAIN) to require more frames.
>>
>> 1080p transcoding (no B frames) with -async_depth=4 can increase 20%
>> performance on my environment.
>> The async increases performance but also introduces frame delay.
>>
>> Signed-off-by: Wenbin Chen <wenbin.chen at intel.com>
>> ---
>> libavcodec/vaapi_encode.c | 20 +++++++++++++++-----
>> libavcodec/vaapi_encode.h | 12 ++++++++++--
>> 2 files changed, 25 insertions(+), 7 deletions(-)
>>
>> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
>> index db0ae136a1..616fb7c089 100644
>> --- a/libavcodec/vaapi_encode.c
>> +++ b/libavcodec/vaapi_encode.c
>> @@ -1158,7 +1158,8 @@ static int
>> vaapi_encode_send_frame(AVCodecContext *avctx, AVFrame *frame)
>> if (ctx->input_order == ctx->decode_delay)
>> ctx->dts_pts_diff = pic->pts - ctx->first_pts;
>> if (ctx->output_delay > 0)
>> - ctx->ts_ring[ctx->input_order % (3 * ctx->output_delay)] = pic->pts;
>> + ctx->ts_ring[ctx->input_order %
>> + (3 * ctx->output_delay + ctx->async_depth)] = pic->pts;
>>
>> pic->display_order = ctx->input_order;
>> ++ctx->input_order;
>> @@ -1212,7 +1213,8 @@ int
>> ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
>> return AVERROR(EAGAIN);
>> }
>>
>> - while (av_fifo_size(ctx->encode_fifo) <= MAX_PICTURE_REFERENCES *
>> sizeof(VAAPIEncodePicture *)) {
>> + while (av_fifo_size(ctx->encode_fifo) <
>> + MAX_ASYNC_DEPTH * sizeof(VAAPIEncodePicture *)) {
>> pic = NULL;
>> err = vaapi_encode_pick_next(avctx, &pic);
>> if (err < 0)
>> @@ -1234,6 +1236,14 @@ int
>> ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
>> if (!av_fifo_size(ctx->encode_fifo))
>> return err;
>>
>> + if (av_fifo_size(ctx->encode_fifo) < ctx->async_depth *
>> sizeof(VAAPIEncodePicture *) &&
>> + !ctx->end_of_stream) {
>> + av_fifo_generic_peek(ctx->encode_fifo, &pic, sizeof(pic), NULL);
>> + err = vaapi_encode_wait(avctx, pic, 0);
>> + if (err < 0)
>> + return err;
>> + }
>> +
>> av_fifo_generic_read(ctx->encode_fifo, &pic, sizeof(pic), NULL);
>> ctx->encode_order = pic->encode_order + 1;
>>
>> @@ -1252,7 +1262,7 @@ int
>> ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
>> pkt->dts = ctx->ts_ring[pic->encode_order] - ctx->dts_pts_diff;
>> } else {
>> pkt->dts = ctx->ts_ring[(pic->encode_order - ctx->decode_delay) %
>> - (3 * ctx->output_delay)];
>> + (3 * ctx->output_delay + ctx->async_depth)];
>> }
>> av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64"
>> dts %"PRId64".\n",
>> pkt->pts, pkt->dts);
>> @@ -2566,8 +2576,8 @@ av_cold int ff_vaapi_encode_init(AVCodecContext
>> *avctx)
>> }
>> }
>>
>> - ctx->encode_fifo = av_fifo_alloc((MAX_PICTURE_REFERENCES + 1) *
>> - sizeof(VAAPIEncodePicture *));
>> + ctx->encode_fifo = av_fifo_alloc(MAX_ASYNC_DEPTH *
>> + sizeof(VAAPIEncodePicture *));
>> if (!ctx->encode_fifo)
>> return AVERROR(ENOMEM);
>>
>> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
>> index 89fe8de466..1bf5d7c337 100644
>> --- a/libavcodec/vaapi_encode.h
>> +++ b/libavcodec/vaapi_encode.h
>> @@ -48,6 +48,7 @@ enum {
>> MAX_TILE_ROWS = 22,
>> // A.4.1: table A.6 allows at most 20 tile columns for any level.
>> MAX_TILE_COLS = 20,
>> + MAX_ASYNC_DEPTH = 64,
>> };
>>
>> extern const AVCodecHWConfigInternal *const
>> ff_vaapi_encode_hw_configs[];
>> @@ -298,7 +299,8 @@ typedef struct VAAPIEncodeContext {
>> // Timestamp handling.
>> int64_t first_pts;
>> int64_t dts_pts_diff;
>> - int64_t ts_ring[MAX_REORDER_DELAY * 3];
>> + int64_t ts_ring[MAX_REORDER_DELAY * 3 +
>> + MAX_ASYNC_DEPTH];
>>
>> // Slice structure.
>> int slice_block_rows;
>> @@ -348,6 +350,8 @@ typedef struct VAAPIEncodeContext {
>> AVFrame *frame;
>>
>> AVFifoBuffer *encode_fifo;
>> +
>> + int async_depth;
>> } VAAPIEncodeContext;
>>
>> enum {
>> @@ -458,7 +462,11 @@ int ff_vaapi_encode_close(AVCodecContext *avctx);
>> { "b_depth", \
>> "Maximum B-frame reference depth", \
>> OFFSET(common.desired_b_depth), AV_OPT_TYPE_INT, \
>> - { .i64 = 1 }, 1, INT_MAX, FLAGS }
>> + { .i64 = 1 }, 1, INT_MAX, FLAGS }, \
>> + { "async_depth", "Maximum processing parallelism. " \
>> + "Increase this to improve single channel performance", \
>> + OFFSET(common.async_depth), AV_OPT_TYPE_INT, \
>> + { .i64 = 4 }, 0, MAX_ASYNC_DEPTH, FLAGS }
>>
>> #define VAAPI_ENCODE_RC_MODE(name, desc) \
>> { #name, desc, 0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_ ## name }, \
>> --
>> 2.25.1
> ping
I tested this patchset and I can confirm that it solves my bug that I
thought was a mesa bug
(https://gitlab.freedesktop.org/mesa/mesa/-/issues/1235)
I would love if this feature is incorporated into ffmpeg
More information about the ffmpeg-devel
mailing list