[FFmpeg-devel] [PATCH v2 1/2] avcodec/cbs_h2645: fix parsing and storing Picture Header references in the context

James Almer jamrial at gmail.com
Sun Jul 2 16:01:54 EEST 2023


On 7/1/2023 10:06 PM, Nuo Mi wrote:
> On Sun, Jul 2, 2023 at 7:32 AM James Almer <jamrial at gmail.com> wrote:
> 
>> On 7/1/2023 8:51 AM, Nuo Mi wrote:
>>> On Fri, Jun 30, 2023 at 6:45 AM James Almer<jamrial at gmail.com>  wrote:
>>>
>>>> Signed-off-by: James Almer<jamrial at gmail.com>
>>>> ---
>>>>    libavcodec/cbs_h2645.c                | 35 ++++++++++++++++-----------
>>>>    libavcodec/cbs_h266.h                 | 17 +++++++------
>>>>    libavcodec/cbs_h266_syntax_template.c | 17 ++++++-------
>>>>    libavcodec/h266_metadata_bsf.c        | 13 +++++-----
>>>>    libavcodec/vvc_parser.c               | 10 ++++----
>>>>    5 files changed, 50 insertions(+), 42 deletions(-)
>>>>
>>>> diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c
>>>> index cdd7901518..68ccf6a7eb 100644
>>>> --- a/libavcodec/cbs_h2645.c
>>>> +++ b/libavcodec/cbs_h2645.c
>>>> @@ -525,12 +525,6 @@ static int
>>>> cbs_h2645_split_fragment(CodedBitstreamContext *ctx,
>>>>        if (frag->data_size == 0)
>>>>            return 0;
>>>>
>>>> -    if (codec_id == AV_CODEC_ID_VVC) {
>>>> -        //we deactive picture header here to avoid reuse previous au's
>> ph.
>>>> -        CodedBitstreamH266Context *h266 = ctx->priv_data;
>>>> -        h266->priv.ph = NULL;
>>>> -    }
>>>> -
>>>>        if (header && frag->data[0] && codec_id == AV_CODEC_ID_H264) {
>>>>            // AVCC header.
>>>>            size_t size, start, end;
>>>> @@ -793,19 +787,20 @@ cbs_h266_replace_ps(6, SPS, sps,
>>>> sps_seq_parameter_set_id)
>>>>    cbs_h266_replace_ps(6, PPS, pps, pps_pic_parameter_set_id)
>>>>
>>>>    static int cbs_h266_replace_ph(CodedBitstreamContext *ctx,
>>>> -                               CodedBitstreamUnit *unit)
>>>> +                               CodedBitstreamUnit *unit,
>>>> +                               H266RawPictureHeader *ph)
>>>>    {
>>>>        CodedBitstreamH266Context *h266 = ctx->priv_data;
>>>>        int err;
>>>>
>>>> -    h266->priv.ph = NULL;
>>>>        err = ff_cbs_make_unit_refcounted(ctx, unit);
>>>>        if (err < 0)
>>>>            return err;
>>>> -    err = av_buffer_replace(&h266->priv.ph_ref, unit->content_ref);
>>>> +    av_assert0(unit->content_ref);
>>>> +    err = av_buffer_replace(&h266->ph_ref, unit->content_ref);
>>>>        if (err < 0)
>>>>            return err;
>>>> -    h266->priv.ph = (H266RawPH*)h266->priv.ph_ref->data;
>>>> +    h266->ph = ph;
>>>>        return 0;
>>>>    }
>>>>
>>>> @@ -1111,7 +1106,7 @@ static int
>>>> cbs_h266_read_nal_unit(CodedBitstreamContext *ctx,
>>>>                err = cbs_h266_read_ph(ctx, &gbc, ph);
>>>>                if (err < 0)
>>>>                    return err;
>>>> -            err = cbs_h266_replace_ph(ctx, unit);
>>>> +            err = cbs_h266_replace_ph(ctx, unit,
>> &ph->ph_picture_header);
>>>>                if (err < 0)
>>>>                    return err;
>>>>            }
>>>> @@ -1139,6 +1134,12 @@ static int
>>>> cbs_h266_read_nal_unit(CodedBitstreamContext *ctx,
>>>>                pos = get_bits_count(&gbc);
>>>>                len = unit->data_size;
>>>>
>>>> +            if (slice->header.sh_picture_header_in_slice_header_flag) {
>>>> +                err = cbs_h266_replace_ph(ctx, unit,
>>>> &slice->header.sh_picture_header);
>>>> +                if (err < 0)
>>>> +                    return err;
>>>> +            }
>>>> +
>>>>                slice->data_size = len - pos / 8;
>>>>                slice->data_ref  = av_buffer_ref(unit->data_ref);
>>>>                if (!slice->data_ref)
>>>> @@ -1640,7 +1641,7 @@ static int
>>>> cbs_h266_write_nal_unit(CodedBitstreamContext *ctx,
>>>>                if (err < 0)
>>>>                    return err;
>>>>
>>>> -            err = cbs_h266_replace_ph(ctx, unit);
>>>> +            err = cbs_h266_replace_ph(ctx, unit,
>> &ph->ph_picture_header);
>>>>                if (err < 0)
>>>>                    return err;
>>>>            }
>>>> @@ -1661,6 +1662,12 @@ static int
>>>> cbs_h266_write_nal_unit(CodedBitstreamContext *ctx,
>>>>                if (err < 0)
>>>>                    return err;
>>>>
>>>> +            if (slice->header.sh_picture_header_in_slice_header_flag) {
>>>> +                err = cbs_h266_replace_ph(ctx, unit,
>>>> &slice->header.sh_picture_header);
>>>> +                if (err < 0)
>>>> +                    return err;
>>>> +            }
>>>> +
>>>>                if (slice->data) {
>>>>                    err = cbs_h2645_write_slice_data(ctx, pbc,
>> slice->data,
>>>>                                                     slice->data_size,
>>>> @@ -1884,8 +1891,8 @@ static void cbs_h266_flush(CodedBitstreamContext
>>>> *ctx)
>>>>            av_buffer_unref(&h266->pps_ref[i]);
>>>>            h266->pps[i] = NULL;
>>>>        }
>>>> -    av_buffer_unref(&h266->priv.ph_ref);
>>>> -    h266->priv.ph = NULL;
>>>> +    av_buffer_unref(&h266->ph_ref);
>>>> +    h266->ph = NULL;
>>>>    }
>>>>
>>>>    static void cbs_h266_close(CodedBitstreamContext *ctx)
>>>> diff --git a/libavcodec/cbs_h266.h b/libavcodec/cbs_h266.h
>>>> index 03dfd4a954..54590748c3 100644
>>>> --- a/libavcodec/cbs_h266.h
>>>> +++ b/libavcodec/cbs_h266.h
>>>> @@ -581,8 +581,7 @@ typedef struct H266RawPredWeightTable {
>>>>        int16_t  delta_chroma_offset_l1[15][2];
>>>>    } H266RawPredWeightTable;
>>>>
>>>> -typedef struct  H266RawPH {
>>>> -    H266RawNALUnitHeader nal_unit_header;
>>>> +typedef struct  H266RawPictureHeader {
>>>>        uint8_t  ph_gdr_or_irap_pic_flag;
>>>>        uint8_t  ph_non_ref_pic_flag;
>>>>        uint8_t  ph_gdr_pic_flag;
>>>> @@ -670,12 +669,17 @@ typedef struct  H266RawPH {
>>>>
>>>>        uint8_t  ph_extension_length;
>>>>        uint8_t  ph_extension_data_byte[256];
>>>> +} H266RawPictureHeader;
>>>> +
>>>> +typedef struct H266RawPH {
>>>> +    H266RawNALUnitHeader nal_unit_header;
>>>> +    H266RawPictureHeader ph_picture_header;
>>>>    } H266RawPH;
>>>>
>>>>    typedef struct  H266RawSliceHeader {
>>>>        H266RawNALUnitHeader nal_unit_header;
>>>>        uint8_t  sh_picture_header_in_slice_header_flag;
>>>> -    H266RawPH sh_picture_header;
>>>> +    H266RawPictureHeader sh_picture_header;
>>>>
>>>>        uint16_t sh_subpic_id;
>>>>        uint16_t sh_slice_address;
>>>> @@ -770,14 +774,11 @@ typedef struct CodedBitstreamH266Context {
>>>>        AVBufferRef *vps_ref[VVC_MAX_VPS_COUNT];
>>>>        AVBufferRef *sps_ref[VVC_MAX_SPS_COUNT];
>>>>        AVBufferRef *pps_ref[VVC_MAX_PPS_COUNT];
>>>> +    AVBufferRef *ph_ref;
>>>>        H266RawVPS  *vps[VVC_MAX_SPS_COUNT];
>>>>        H266RawSPS  *sps[VVC_MAX_SPS_COUNT];
>>>>        H266RawPPS  *pps[VVC_MAX_PPS_COUNT];
>>>> -
>>>> -    struct {
>>>> -        AVBufferRef *ph_ref;
>>>> -        H266RawPH   *ph;
>>>> -    } priv;
>>>> +    H266RawPictureHeader *ph;
>>>>    } CodedBitstreamH266Context;
>>>>
>>>>    #endif /* AVCODEC_CBS_H266_H */
>>>> diff --git a/libavcodec/cbs_h266_syntax_template.c
>>>> b/libavcodec/cbs_h266_syntax_template.c
>>>> index 06f9f29e08..6d826eba49 100644
>>>> --- a/libavcodec/cbs_h266_syntax_template.c
>>>> +++ b/libavcodec/cbs_h266_syntax_template.c
>>>> @@ -2231,8 +2231,8 @@ static int FUNC(pred_weight_table)
>>>> (CodedBitstreamContext *ctx, RWContext *rw,
>>>>        return 0;
>>>>    }
>>>>
>>>> -static int FUNC(picture_header) (CodedBitstreamContext *ctx, RWContext
>>>> *rw,
>>>> -                                 H266RawPH *current){
>>>> +static int FUNC(picture_header_structure)(CodedBitstreamContext *ctx,
>>>> RWContext *rw,
>>>> +                                          H266RawPictureHeader
>> *current) {
>>>>        CodedBitstreamH266Context *h266 = ctx->priv_data;
>>>>        const H266RawVPS *vps;
>>>>        const H266RawSPS *sps;
>>>> @@ -2651,7 +2651,7 @@ static int FUNC(ph) (CodedBitstreamContext *ctx,
>>>> RWContext *rw,
>>>>        HEADER("Picture Header");
>>>>
>>>>        CHECK(FUNC(nal_unit_header) (ctx, rw, &current->nal_unit_header,
>>>> VVC_PH_NUT));
>>>> -    CHECK(FUNC(picture_header) (ctx, rw, current));
>>>> +    CHECK(FUNC(picture_header_structure) (ctx, rw,
>>>> &current->ph_picture_header));
>>>>        CHECK(FUNC(rbsp_trailing_bits) (ctx, rw));
>>>>        return 0;
>>>>    }
>>>> @@ -2662,7 +2662,7 @@ static int FUNC(slice_header)
>> (CodedBitstreamContext
>>>> *ctx, RWContext *rw,
>>>>        CodedBitstreamH266Context *h266 = ctx->priv_data;
>>>>        const H266RawSPS *sps;
>>>>        const H266RawPPS *pps;
>>>> -    const H266RawPH *ph;
>>>> +    const H266RawPictureHeader *ph;
>>>>        const H266RefPicLists *ref_pic_lists;
>>>>        int err, i;
>>>>        uint8_t nal_unit_type, qp_bd_offset;
>>>> @@ -2675,12 +2675,11 @@ static int FUNC(slice_header)
>>>> (CodedBitstreamContext *ctx, RWContext *rw,
>>>>
>>>>        flag(sh_picture_header_in_slice_header_flag);
>>>>        if (current->sh_picture_header_in_slice_header_flag) {
>>>> -        CHECK(FUNC(picture_header) (ctx, rw,
>>>> &current->sh_picture_header));
>>>> +        //7.4.8 if sh_picture_header_in_slice_header_flag is true, we
>> do
>>>> not have a PH NAL unit
>>>> +        CHECK(FUNC(picture_header_structure) (ctx, rw,
>>>> &current->sh_picture_header));
>>>>            ph = &current->sh_picture_header;
>>>> -        //7.4.8 if sh_picture_header_in_slice_header_flag is true, we
>> do
>>>> not have PH NAL unit
>>>> -        h266->priv.ph = NULL;
>>>>        } else {
>>>> -        ph = h266->priv.ph;
>>>> +        ph = h266->ph;
>>>>
>>> Based on the following items in the spec, all slices will have the same
>>> picture header.  Maybe we can remove sh_picture_header and just keep
>>> h266->ph.
>>>
>>> 1. The PH syntax structure contains information that is common for all
>>> slices of the current picture.
>>> 2. It is a requirement of bitstream conformance that the value of
>>> sh_picture_header_in_slice_header_flag shall be the same in all coded
>>> slices in a CLVS.
>>> 3. When sh_picture_header_in_slice_header_flag is equal to 1 for a coded
>>> slice, it is a requirement of bitstream conformance that no NAL unit with
>>> nal_unit_type equal to PH_NUT shall be present in the CLVS.
>>
>> CodedBitstreamH266Context holds the state of an hypothetical decoder
>> after the last unit fed to CBS was parsed. If you were to feed it two
>> PUs in a row, h266->ph will be a pointer to the H266RawPictureHeader
>> relevant to the second PU (Either picture_header from the last PH NALU,
>> or picture_header from the last Slice NALU).
>>
>> We can't store values read from the bitstream there, since if they are
>> overwritten, then they will be unavailable to callers. It's only meant
>> to store pointers and references to values and structs stored in units
>> within the fragment, or derived values.
>>
> All slices in one picture will have the same picture header content.
> if sh_picture_header_in_slice_header_flag  == 0, we only have one PH NAL.
> so we will not overwrite.
> if sh_picture_header_in_slice_header_flag  == 1, we only need to parse the
> PH in the first slice, then use memcpy to do a sanity check for later
> slices.

We need to parse the entire slice header in all cases, picture header 
included if present, to advance the getbitcontext and reach the 
following fields.

> 
> BTW, do we need constify the raw vps, sps, pps and ph
> in CodedBitstreamH266Context

Yeah, probably a good idea.


More information about the ffmpeg-devel mailing list