[FFmpeg-devel] [PATCH v2 3/3] avcodec/cuvid: introduce a ringbuffer to reattach additional data

Fri Nov 29 22:46:53 EET 2024

On Fri, 29 Nov 2024 at 20:06, Timo Rothenpieler <timo at rothenpieler.org> wrote:
>
> On 01.11.2024 18:21, Clément Péron wrote:
> > From: Troy Benson <t.benson-c at paravision.ai>
> >
> > Cuvid data packet have specific format that don't contain any side data.
> > In order to keep additional information and metadata, we need to implement
> > a ring buffer and reattach side data to decoded frame.
> >
> > Signed-off-by: Troy Benson <t.benson-c at paravision.ai>
> > Signed-off-by: Clément Péron <peron.clem at gmail.com>
> > ---
> >   libavcodec/cuviddec.c | 175 ++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 175 insertions(+)
> >
> > diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
> > index 3fae9c12eb..464da4d2f4 100644
> > --- a/libavcodec/cuviddec.c
> > +++ b/libavcodec/cuviddec.c
> > @@ -51,6 +51,162 @@
> >   #define CUVID_HAS_AV1_SUPPORT
> >   #endif
> >
> > +/// @brief Structure to store source packets.
> > +/// This struct implements a circular buffer to store source packets.
> > +/// The packets are packets that have already been sent to the decoder.
> > +/// The reason we need the packets is because they may contain additional information and metadata,
> > +/// that we need to attach to the decoded frame so that the muxers and filters can use it.
> > +typedef struct sourcePkts
> > +{
> > +    /// @brief Array of AVPackets.
> > +    AVPacket **pkts;
> > +    /// @brief Number of packets to store.
> > +    int capacity;
> > +    /// @brief Index of the first packet.
> > +    int start_idx;
> > +    /// @brief Number of packets stored. (typically we found while testing that this hovers around 8)
> > +    int count;
> > +} sourcePkts;
> > +
> > +/// @brief Allocates a sourcePkts structure.
> > +/// @param avctx - AVCodecContext (for logging)
> > +/// @param source_pkts (output)
> > +/// @param capacity - number of pkts to store
> > +/// @return int = 0 on success, < 0 on error
> > +int sourcePkts_alloc(AVCodecContext *avctx, sourcePkts *source_pkts, int capacity);
> > +
> > +/// @brief Clears the sourcePkts structure of pkts with pts less than the given pts.
> > +/// @param avctx - AVCodecContext (for logging)
> > +/// @param source_pkts - sourcePkts structure
> > +/// @param pts - pts to clear pkts before
> > +/// @return int = 0 on success, < 0 on error
> > +int sourcePkts_clear(AVCodecContext *avctx, sourcePkts *source_pkts, int64_t pts);
> > +
> > +/// @brief Converts a logical index to a physical index.
> > +/// @param source_pkts - sourcePkts structure
> > +/// @param idx - logical index
> > +/// @return int - physical index
> > +int sourcePkts_idx(sourcePkts *source_pkts, int idx);
> > +
> > +/// @brief Adds a pkt to the sourcePkts structure.
> > +/// @param avctx - AVCodecContext (for logging)
> > +/// @param source_pkts - sourcePkts structure
> > +/// @param pkt - AVPacket to add
> > +/// @return int = 0 on success, < 0 on error
> > +int sourcePkts_add(AVCodecContext *avctx, sourcePkts *source_pkts, AVPacket *pkt);
> > +
> > +/// @brief Clears the sourcePkts structure.
> > +/// @param avctx - AVCodecContext (for logging)
> > +/// @param source_pkts - sourcePkts structure
> > +/// @return int = 0 on success, < 0 on error
> > +int sourcePkts_free(AVCodecContext *avctx, sourcePkts *source_pkts);
> > +
> > +/// @brief Finds a pkt with the given pts.
> > +/// @param avctx - AVCodecContext (for logging)
> > +/// @param source_pkts - sourcePkts structure
> > +/// @param pts - pts to find
> > +/// @return int - physical index of pkt, -1 if not found
> > +int sourcePkts_find(AVCodecContext *avctx, sourcePkts *source_pkts, int64_t pts);
> > +
> > +int sourcePkts_alloc(AVCodecContext *avctx, sourcePkts *source_pkts, int capacity)
> > +{
> > +    assert(source_pkts->pkts == NULL);
> > +    source_pkts->pkts = av_malloc_array(capacity, sizeof(AVPacket *));
> > +    if (!source_pkts->pkts)
> > +        return AVERROR(ENOMEM);
> > +
> > +    source_pkts->capacity = capacity;
> > +    source_pkts->start_idx = 0;
> > +    source_pkts->count = 0;
> > +
> > +    av_log(avctx, AV_LOG_TRACE, "sourcePkts_alloc: capacity: %d\n", capacity);
> > +
> > +    return 0;
> > +}
> > +
> > +// Converts a logical index to a physical index.
> > +int sourcePkts_idx(sourcePkts *source_pkts, int idx)
> > +{
> > +    return (source_pkts->start_idx + idx) % source_pkts->capacity;
> > +}
> > +
> > +int sourcePkts_find(AVCodecContext *avctx, sourcePkts *source_pkts, int64_t pts)
> > +{
> > +    for (int i = 0; i < source_pkts->count; i++)
> > +    {
> > +        int idx = sourcePkts_idx(source_pkts, i);
> > +        av_log(avctx, AV_LOG_TRACE, "sourcePkts_find: idx: %d, pts: %ld == %ld\n", idx, source_pkts->pkts[idx]->pts, pts);
> > +        if (source_pkts->pkts[idx]->pts == pts)
> > +            return idx;
> > +    }
> > +
> > +    return -1;
> > +}
> > +
> > +int sourcePkts_add(AVCodecContext *avctx, sourcePkts *source_pkts, AVPacket *pkt)
> > +{
> > +    int ret = 0;
> > +    int idx = 0;
> > +
> > +    if (!pkt || pkt->pts == AV_NOPTS_VALUE) return ret;
> > +
> > +    if (sourcePkts_find(avctx, source_pkts, pkt->pts) >= 0)
> > +    {
> > +        av_log(avctx, AV_LOG_TRACE, "sourcePkts_add: pkt already exists, pts: %ld\n", pkt->pts);
> > +        return ret;
> > +    }
> > +
> > +    if (source_pkts->capacity == source_pkts->count)
> > +    {
> > +        av_log(avctx, AV_LOG_TRACE, "sourcePkts_add: capacity reached, clearing old pkts\n");
> > +        ret = sourcePkts_clear(avctx, source_pkts, pkt->pts);
> > +        if (ret < 0) return ret;
> > +    }
> > +
> > +    idx = sourcePkts_idx(source_pkts, source_pkts->count);
> > +    source_pkts->pkts[idx] = av_packet_clone(pkt);
> > +    if (!source_pkts->pkts[idx]) return AVERROR(ENOMEM);
> > +
> > +    source_pkts->count++;
> > +
> > +    av_log(avctx, AV_LOG_TRACE, "sourcePkts_add: added pkt, pts: %ld, size: %d\n", pkt->pts, source_pkts->count);
> > +
> > +    return ret;
> > +}
> > +
> > +int sourcePkts_clear(AVCodecContext *avctx, sourcePkts *source_pkts, int64_t pts)
> > +{
> > +    for (int i = 0; i < source_pkts->count; i++)
> > +    {
> > +        if (!source_pkts->pkts[source_pkts->start_idx] || source_pkts->pkts[source_pkts->start_idx]->pts > pts)
> > +            break;
> > +
> > +        av_log(avctx, AV_LOG_TRACE, "sourcePkts_clear: clearing pkt, pts: %ld\n", source_pkts->pkts[source_pkts->start_idx]->pts);
> > +
> > +        av_packet_unref(source_pkts->pkts[source_pkts->start_idx]);
> > +        source_pkts->pkts[source_pkts->start_idx] = NULL;
> > +        source_pkts->count--;
> > +        source_pkts->start_idx = sourcePkts_idx(source_pkts, 1);
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +int sourcePkts_free(AVCodecContext *avctx, sourcePkts *source_pkts)
> > +{
> > +    for (int i = 0; i < source_pkts->count; i++)
> > +    {
> > +        if (source_pkts->pkts[i]) {
> > +            av_log(avctx, AV_LOG_TRACE, "sourcePkts_free: freeing pkt, pts: %ld\n", source_pkts->pkts[i]->pts);
> > +            av_packet_unref(source_pkts->pkts[i]);
> > +        }
> > +    }
> > +
> > +    av_freep(&source_pkts->pkts);
> > +
> > +    return 0;
> > +}
> > +
> >   typedef struct CuvidContext
> >   {
> >       AVClass *avclass;
> > @@ -95,6 +251,8 @@ typedef struct CuvidContext
> >
> >       int *key_frame;
> >
> > +    sourcePkts source_pkts;
> > +
> >       cudaVideoCodec codec_type;
> >       cudaVideoChromaFormat chroma_format;
> >
> > @@ -512,6 +670,8 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
> >           if (ret < 0 && ret != AVERROR_EOF)
> >               return ret;
> >           ret = cuvid_decode_packet(avctx, pkt);
> > +
> > +        ret = ret < 0 ? ret : sourcePkts_add(avctx, & ctx->source_pkts, pkt);
> >           av_packet_unref(pkt);
> >           // cuvid_is_buffer_full() should avoid this.
> >           if (ret == AVERROR(EAGAIN))
> > @@ -530,6 +690,7 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
> >           unsigned int pitch = 0;
> >           int offset = 0;
> >           int i;
> > +        int source_pkt_idx;
> >
> >           memset(&params, 0, sizeof(params));
> >           params.progressive_frame = parsed_frame.dispinfo.progressive_frame;
> > @@ -655,6 +816,16 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
> >               }
> >           }
> >
> > +        source_pkt_idx = sourcePkts_find(avctx, &ctx->source_pkts, frame->pts);
> > +        if (source_pkt_idx >= 0)
> > +        {
> > +            av_log(avctx, AV_LOG_TRACE, "found source_pkt for frame, pts: %ld\n", frame->pts);
> > +            ff_decode_frame_props_from_pkt(avctx, frame, ctx->source_pkts.pkts[source_pkt_idx]);
> > +            sourcePkts_clear(avctx, & ctx->source_pkts, frame->pts);
> > +        } else {
> > +            av_log(avctx, AV_LOG_TRACE, "no source_pkt for frame, pts: %ld\n", frame->pts);
> > +        }
> > +
> >           /* CUVIDs opaque reordering breaks the internal pkt logic.
> >            * So set pkt_pts and clear all the other pkt_ fields.
> >            */
> > @@ -722,6 +893,7 @@ static av_cold int cuvid_decode_end(AVCodecContext *avctx)
> >       av_freep(&ctx->cuparse_ext);
> >
> >       cuvid_free_functions(&ctx->cvdl);
> > +    sourcePkts_free(avctx, & ctx->source_pkts);
> >
> >       return 0;
> >   }
> > @@ -948,6 +1120,9 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
> >       cuda_ctx = device_hwctx->cuda_ctx;
> >       ctx->cudl = device_hwctx->internal->cuda_dl;
> >
> > +    // To be safe we allocate twice the number of surfaces to avoid the buffer running full with a minimum of 32.
> > +    sourcePkts_alloc(avctx, &ctx->source_pkts, ctx->nb_surfaces * 2 <= 32 ? 32 : ctx->nb_surfaces * 2);
> > +
> >       memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
> >       memset(&seq_pkt, 0, sizeof(seq_pkt));
> >
>
> This whole patch feels out of place for cuviddec. It adds a lot of
> complexity, and also breaks with typical naming conventions.
> Why is all this neccesary in cuviddec?

I agree with you that it adds a lot of complexity that shouldn't be
managed by cuviddec.
But as I tried to explain in the commit message, the cuvid format
doesn't allow to have any custom data.

So if we want to reattach side data when we receive a decoded frame we
need to have a mechanism to do so.
This patch proposes to introduce a ring buffer to be able to do this
reattachment.
What would be the proper way to achieve this if it's not in the cuviddec ?

For the naming convention I will fix this.

>
> Generally, the only point of cuviddec is as a validation thing to
> compare the results of the proper hwaccel against it.
> So I'm against adding any additional features and complexity, if the one
> can just use the nvdec hwaccel instead.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".