[FFmpeg-devel] [PATCH v9 6/6] lavc/hevcdec: Parse DOVI RPU NALs

Sun Jan 2 06:31:30 EET 2022

Niklas Haas:
> From: Niklas Haas <git at haasn.dev>
> 
> And expose the parsed values as frame side data. Update FATE results to
> match.
> 
> It's worth documenting that this relies on the dovi configuration record
> being present on the first AVPacket fed to the decoder, which in
> practice is the case if if the API user has called something like
> av_format_inject_global_side_data, which is unfortunately not the
> default.
> 
> This commit is not the time and place to change that behavior, though.
> 
> Signed-off-by: Niklas Haas <git at haasn.dev>
> ---
>  configure                  |   2 +-
>  libavcodec/hevcdec.c       |  63 +++++++++--
>  libavcodec/hevcdec.h       |   3 +
>  tests/ref/fate/hevc-dv-rpu | 224 +++++++++++++++++++++++++++++++++++++
>  4 files changed, 283 insertions(+), 9 deletions(-)
> 
> diff --git a/configure b/configure
> index 68658a847f..7803aa47af 100755
> --- a/configure
> +++ b/configure
> @@ -2826,7 +2826,7 @@ h264_decoder_suggest="error_resilience"
>  hap_decoder_select="snappy texturedsp"
>  hap_encoder_deps="libsnappy"
>  hap_encoder_select="texturedspenc"
> -hevc_decoder_select="atsc_a53 bswapdsp cabac golomb hevcparse videodsp"
> +hevc_decoder_select="atsc_a53 bswapdsp cabac dovi_rpu golomb hevcparse videodsp"
>  huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp"
>  huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llvidencdsp"
>  hymt_decoder_select="huffyuv_decoder"
> diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
> index 46d9edf8eb..298d89fea6 100644
> --- a/libavcodec/hevcdec.c
> +++ b/libavcodec/hevcdec.c
> @@ -2723,6 +2723,7 @@ error:
>  static int set_side_data(HEVCContext *s)
>  {
>      AVFrame *out = s->ref->frame;
> +    int ret;
>  
>      if (s->sei.frame_packing.present &&
>          s->sei.frame_packing.arrangement_type >= 3 &&
> @@ -2967,6 +2968,9 @@ static int set_side_data(HEVCContext *s)
>          s->rpu_buf = NULL;
>      }
>  
> +    if ((ret = ff_dovi_attach_side_data(&s->dovi_ctx, out)) < 0)
> +        return ret;
> +
>      return 0;
>  }
>  
> @@ -3298,16 +3302,24 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
>      if (s->pkt.nb_nals > 1 && s->pkt.nals[s->pkt.nb_nals - 1].type == HEVC_NAL_UNSPEC62 &&
>          s->pkt.nals[s->pkt.nb_nals - 1].size > 2 && !s->pkt.nals[s->pkt.nb_nals - 1].nuh_layer_id
>          && !s->pkt.nals[s->pkt.nb_nals - 1].temporal_id) {
> +        H2645NAL *nal = &s->pkt.nals[s->pkt.nb_nals - 1];
>          if (s->rpu_buf) {
>              av_buffer_unref(&s->rpu_buf);
>              av_log(s->avctx, AV_LOG_WARNING, "Multiple Dolby Vision RPUs found in one AU. Skipping previous.\n");
>          }
>  
> -        s->rpu_buf = av_buffer_alloc(s->pkt.nals[s->pkt.nb_nals - 1].raw_size - 2);
> +        s->rpu_buf = av_buffer_alloc(nal->raw_size - 2);
>          if (!s->rpu_buf)
>              return AVERROR(ENOMEM);
> +        memcpy(s->rpu_buf->data, nal->raw_data + 2, nal->raw_size - 2);
>  
> -        memcpy(s->rpu_buf->data, s->pkt.nals[s->pkt.nb_nals - 1].raw_data + 2, s->pkt.nals[s->pkt.nb_nals - 1].raw_size - 2);
> +        s->dovi_ctx.config = s->dovi_cfg ? (void *) s->dovi_cfg->data : NULL;
> +        ret = ff_dovi_rpu_parse(&s->dovi_ctx, nal->data + 2, nal->size - 2);
> +        if (ret < 0) {
> +            av_buffer_unref(&s->rpu_buf);
> +            av_log(s->avctx, AV_LOG_WARNING, "Error parsing DOVI NAL unit.\n");
> +            /* ignore */
> +        }
>      }
>  
>      /* decode the NAL units */
> @@ -3440,8 +3452,8 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
>                               AVPacket *avpkt)
>  {
>      int ret;
> -    size_t new_extradata_size;
> -    uint8_t *new_extradata;
> +    uint8_t *sd;
> +    size_t sd_size;
>      HEVCContext *s = avctx->priv_data;
>  
>      if (!avpkt->size) {
> @@ -3453,14 +3465,37 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
>          return 0;
>      }
>  
> -    new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
> -                                            &new_extradata_size);
> -    if (new_extradata && new_extradata_size > 0) {
> -        ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
> +    sd = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA, &sd_size);
> +    if (sd && sd_size > 0) {
> +        ret = hevc_decode_extradata(s, sd, sd_size, 0);
>          if (ret < 0)
>              return ret;
>      }
>  
> +    sd = av_packet_get_side_data(avpkt, AV_PKT_DATA_DOVI_CONF, &sd_size);
> +    if (sd && sd_size > 0) {
> +        if (s->dovi_cfg) {
> +            /* Reuse existing buffer */
> +            if ((ret = av_buffer_make_writable(&s->dovi_cfg)) < 0)
> +                return ret;
> +        } else {
> +            /* Allocate new buffer */
> +            AVDOVIDecoderConfigurationRecord *cfg;
> +            size_t cfg_size;
> +            cfg = av_dovi_alloc(&cfg_size);
> +            if (!cfg)
> +                return AVERROR(ENOMEM);
> +            s->dovi_cfg = av_buffer_create((uint8_t *) cfg, cfg_size, NULL, NULL, 0);
> +            if (!s->dovi_cfg) {
> +                av_free(cfg);
> +                return AVERROR(ENOMEM);
> +            }
> +        }
> +
> +        av_assert0(sd_size >= s->dovi_cfg->size);
> +        memcpy(s->dovi_cfg->data, sd, s->dovi_cfg->size);

dovi_cfg is only used for exactly one thing: To read its dv_profile in
ff_dovi_rpu_parse() later. This is quite a lot of effort to get this bit
of data. Why not add a ff_dovi_parse_config() that just copies the
needed field(s) from the user-supplied AVDOVIDecoderConfigurationRecord
to the DOVIContext instead?
(Furthermore, said config is explicitly intended to survive an unref as
happens in a flush, leading to a dangling pointer in case the user
unrefs dovi_cfg in flush. (Yes, the hevc decoder ensures that config is
always set correctly before every ff_dovi_rpu_parse() call, but it is
nevertheless nowhere stated that the profile is intended to be persistent.))

> +    }
> +
>      s->ref = NULL;
>      ret    = decode_nal_units(s, avpkt->data, avpkt->size);
>      if (ret < 0)
> @@ -3553,6 +3588,8 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
>  
>      pic_arrays_free(s);
>  
> +    ff_dovi_ctx_unref(&s->dovi_ctx);
> +    av_buffer_unref(&s->dovi_cfg);
>      av_buffer_unref(&s->rpu_buf);
>  
>      av_freep(&s->md5_ctx);
> @@ -3637,6 +3674,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx)
>  
>      ff_bswapdsp_init(&s->bdsp);
>  
> +    s->dovi_ctx.logctx = avctx;
>      s->context_initialized = 1;
>      s->eos = 0;
>  
> @@ -3745,6 +3783,14 @@ static int hevc_update_thread_context(AVCodecContext *dst,
>      if (ret < 0)
>          return ret;
>  
> +    ret = av_buffer_replace(&s->dovi_cfg, s0->dovi_cfg);
> +    if (ret < 0)
> +        return ret;
> +
> +    ret = ff_dovi_ctx_replace(&s->dovi_ctx, &s0->dovi_ctx);
> +    if (ret < 0)
> +        return ret;
> +
>      s->sei.frame_packing        = s0->sei.frame_packing;
>      s->sei.display_orientation  = s0->sei.display_orientation;
>      s->sei.mastering_display    = s0->sei.mastering_display;
> @@ -3801,6 +3847,7 @@ static void hevc_decode_flush(AVCodecContext *avctx)
>      HEVCContext *s = avctx->priv_data;
>      ff_hevc_flush_dpb(s);
>      ff_hevc_reset_sei(&s->sei);
> +    ff_dovi_ctx_unref(&s->dovi_ctx);
>      av_buffer_unref(&s->rpu_buf);
>      s->max_ra = INT_MAX;
>      s->eos = 1;
> diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
> index 870ff178d4..c8dde6fd17 100644
> --- a/libavcodec/hevcdec.h
> +++ b/libavcodec/hevcdec.h
> @@ -32,6 +32,7 @@
>  #include "avcodec.h"
>  #include "bswapdsp.h"
>  #include "cabac.h"
> +#include "dovi_rpu.h"
>  #include "get_bits.h"
>  #include "hevcpred.h"
>  #include "h2645_parse.h"
> @@ -574,6 +575,8 @@ typedef struct HEVCContext {
>      int nuh_layer_id;
>  
>      AVBufferRef *rpu_buf;       ///< 0 or 1 Dolby Vision RPUs.
> +    AVBufferRef *dovi_cfg;      ///< contains AVDOVIDecoderConfigurationRecord
> +    DOVIContext dovi_ctx;       ///< Dolby Vision decoding context
>  } HEVCContext;
>  
>  /**