[FFmpeg-devel] [PATCH v2 06/11] avcodec: add cbs for h266/vvc

Sat Jan 9 23:28:58 EET 2021

On 09/01/2021 07:34, Nuo Mi wrote:
> ---
>   configure                             |    2 +
>   libavcodec/Makefile                   |    1 +
>   libavcodec/cbs.c                      |    6 +
>   libavcodec/cbs_h2645.c                |  373 ++++
>   libavcodec/cbs_h266.h                 |  840 ++++++++
>   libavcodec/cbs_h266_syntax_template.c | 2761 +++++++++++++++++++++++++
>   libavcodec/cbs_internal.h             |    3 +-
>   7 files changed, 3985 insertions(+), 1 deletion(-)
>   create mode 100644 libavcodec/cbs_h266.h
>   create mode 100644 libavcodec/cbs_h266_syntax_template.c
> 
> ...
> @@ -920,6 +934,135 @@ static int cbs_h265_read_nal_unit(CodedBitstreamContext *ctx,
>       return 0;
>   }
>   
> +static int cbs_h266_replace_ph(CodedBitstreamContext *ctx,
> +                               CodedBitstreamUnit *unit)
> +{
> +    CodedBitstreamH266Context *priv = ctx->priv_data;
> +    int err;
> +    err = ff_cbs_make_unit_refcounted(ctx, unit);
> +    if (err < 0)
> +        return err;
> +    av_buffer_unref(&priv->ph_ref);
> +    av_assert0(unit->content_ref);
> +    priv->ph_ref = av_buffer_ref(unit->content_ref);
> +    if (!priv->ph_ref)
> +        return AVERROR(ENOMEM);
> +    priv->active_ph = priv->ph = (H266RawPH *)priv->ph_ref->data;

Why are there too variables here?  They seem to always be the same.

> +    return 0;
> +}
> +
> ...
> +
>   static int cbs_h2645_assemble_fragment(CodedBitstreamContext *ctx,
>                                          CodedBitstreamFragment *frag)
>   {
> @@ -1248,6 +1494,11 @@ static int cbs_h2645_assemble_fragment(CodedBitstreamContext *ctx,
>                (unit->type == HEVC_NAL_VPS ||
>                 unit->type == HEVC_NAL_SPS ||
>                 unit->type == HEVC_NAL_PPS)) ||
> +            (ctx->codec->codec_id == AV_CODEC_ID_VVC &&
> +             (unit->type == VVC_VPS_NUT ||
> +              unit->type == VVC_SPS_NUT ||
> +              unit->type == VVC_PPS_NUT ||
> +              unit->type == VVC_PREFIX_APS_NUT)) ||

Also various other things, which might be here since passthrough does not require decomposition to be implemented.

This test is getting unwieldy - maybe it should be moved to a new function cbs_h2645_unit_requires_zero_byte().

>               i == 0 /* (Assume this is the start of an access unit.) */) {
>               // zero_byte
>               data[dp++] = 0;
> @@ -1362,6 +1613,41 @@ static void cbs_h265_close(CodedBitstreamContext *ctx)
>           av_buffer_unref(&h265->pps_ref[i]);
>   }
>   
> ...
> @@ -1506,6 +1792,77 @@ static const CodedBitstreamUnitTypeDescriptor cbs_h265_unit_types[] = {
>       CBS_UNIT_TYPE_END_OF_LIST
>   };
>   
> +static void cbs_h266_free_sei(void *opaque, uint8_t *content)
> +{
> +}

So as implemented currently it is POD?

> +
> +static const CodedBitstreamUnitTypeDescriptor cbs_h266_unit_types[] = {
> ...
> +
> +typedef struct H266RawNALUnitHeader {
> +    uint8_t nuh_layer_id;
> +    uint8_t nal_unit_type;
> +    uint8_t nuh_temporal_id_plus1;
> +} H266RawNALUnitHeader;
> +
> +typedef struct H266GeneralConstraintsInfo {
> +    uint8_t gci_present_flag;
> +    
> ...
> +
> +    /* loop filter */
> +    uint8_t gci_no_sao_constraint_flag;
> +    uint8_t gci_no_alf_constraint_flag;
> +    uint8_t gci_no_ccalf_constraint_flag;
> +    uint8_t gci_no_lmcs_constraint_flag;
> +    uint8_t gci_no_ladf_constraint_flag;
> +    uint8_t gci_no_virtual_boundaries_constraint_flag;
> +    uint8_t gci_num_reserved_bits;

Also needs gci_reserved_zero_bit[], so that we can handle streams with future constraints rather than just rejecting them.

"Although the value of gci_num_reserved_bits is required to be equal to 0 in this version
of this Specification, decoders conforming to this version of this Specification shall allow the value of
gci_num_reserved_bits greater than 0 to appear in the syntax and shall ignore the values of all the gci_reserved_zero_bit[ i ]
syntax elements when gci_num_reserved_bits is greater than 0."

> +} H266GeneralConstraintsInfo;
> +
> ...
> +
> +typedef struct H266RawVPS {
> +    H266RawNALUnitHeader nal_unit_header;
> +
> +    uint8_t vps_video_parameter_set_id;
> +
> +    uint8_t vps_max_layers_minus1;
> +    uint8_t vps_max_sublayers_minus1;
> +    /*TODO add more*/
> +    H266RawExtensionData extension_data;
> +} H266RawVPS;

You don't actually use the VPS structure at all, so given that it isn't parsed I think it would be cleaner to just not include it here at all.

> +//
> ...
> +
> +typedef struct H266RawSEIBufferingPeriod {
> +    uint8_t  bp_seq_parameter_set_id;
> +} H266RawSEIBufferingPeriod;
> +
> +typedef struct H266RawSEIPicTiming {
> +} H266RawSEIPicTiming;
> +
> +typedef struct H266RawSEIPanScanRect {
> +} H266RawSEIPanScanRect;
> +
> +typedef struct H266RawSEIUserDataRegistered {
> +
> +} H266RawSEIUserDataRegistered;
> +
> +typedef struct H266RawSEIUserDataUnregistered {
> +} H266RawSEIUserDataUnregistered;
> +
> +typedef struct H266RawSEIRecoveryPoint {
> +} H266RawSEIRecoveryPoint;
> +
> +typedef struct H266RawSEIDisplayOrientation {
> +} H266RawSEIDisplayOrientation;
> +
> +typedef struct H266RawSEIActiveParameterSets {
> +} H266RawSEIActiveParameterSets;
> +
> +typedef struct H266RawSEIDecodedPictureHash {
> +    uint8_t  dph_sei_hash_type;
> +    uint8_t  dph_sei_single_component_flag;
> +    uint8_t  dph_sei_picture_md5[3][16];
> +    uint16_t dph_sei_picture_crc[3];
> +    uint32_t dph_sei_picture_checksum[3];
> +} H266RawSEIDecodedPictureHash;
> +
> +typedef struct H266RawSEITimeCode {
> +} H266RawSEITimeCode;
> +
> +typedef struct H266RawSEIMasteringDisplayColourVolume {
> +} H266RawSEIMasteringDisplayColourVolume;
> +

Don't include all of these empty structures.

(Half of them are implemented in the common SEI patches on the ML now anyway.)

> ...
> +#endif /* AVCODEC_CBS_H266_H */
> diff --git a/libavcodec/cbs_h266_syntax_template.c b/libavcodec/cbs_h266_syntax_template.c
> new file mode 100644
> index 0000000000..6a6defc8a5
> --- /dev/null
> +++ b/libavcodec/cbs_h266_syntax_template.c
> @@ -0,0 +1,2761 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef H266_CEIL
> +#define H266_CEIL(v, a) (((v) + (a) - 1) / (a))
> +#endif

If it's useful to have this separately then put it in cbs_h2645.c rather than doubled in the template.  (See for example cbs_av1_tile_log2().)

> +
> +#ifndef H266_HAS_MULTI_SLICES_IN_TILE
> +#define H266_HAS_MULTI_SLICES_IN_TILE(pps, tile_y, slice) \
> +    (pps->pps_slice_width_in_tiles_minus1[slice] == 0 && \
> +    pps->pps_slice_height_in_tiles_minus1[slice] == 0 && \
> +    pps->pps_tile_row_height_minus1[tile_y] > 0)
> +#endif

This seems to be used exactly once and match the syntax in the standard, so I don't think it should be separate.

> +
> ...
> +
> +static int FUNC(byte_alignment)(CodedBitstreamContext *ctx, RWContext *rw)
> +{
> +    int err;
> +
> +    fixed(1, alignment_bit_equal_to_one, 1);
> +    while (byte_alignment(rw) != 0)
> +        fixed(1, alignment_bit_equal_to_zero, 0);

Names aren't right.

> +    return 0;
> +}
> +
> +static int FUNC(general_constraints_info)(CodedBitstreamContext *ctx,
> +                                          RWContext *rw,
> +                                          H266GeneralConstraintsInfo *current)
> +{
> +    int err, i;
> +
> +    flag(gci_present_flag);
> +    if (current->gci_present_flag) {
> +        ...
> +
> +        /* loop filter */
> +        flag(gci_no_sao_constraint_flag);
> +        flag(gci_no_alf_constraint_flag);
> +        flag(gci_no_ccalf_constraint_flag);
> +        flag(gci_no_lmcs_constraint_flag);
> +        flag(gci_no_ladf_constraint_flag);
> +        flag(gci_no_virtual_boundaries_constraint_flag);
> +        ub(8, gci_num_reserved_bits);
> +        for (i = 0; i < current->gci_num_reserved_bits; i++) {
> +            fixed(1, gci_reserved_zero_bit, 1);

As noted above, needs to be passed through for future compatibility.

> +        }
> +    }
> +    while (byte_alignment(rw) != 0)
> +        fixed(1, gci_alignment_zero_bit, 0);
> +    return 0;
> +}
> +
> +static int FUNC(profile_tier_level)(CodedBitstreamContext *ctx, RWContext *rw,
> +                                    H266RawProfileTierLevel *current,
> +                                    int profile_tier_present_flag,
> +                                    int max_num_sub_layers_minus1)
> +{
> +    int err, i;
> +
> +    if (profile_tier_present_flag) {
> +        ub(7, general_profile_idc);
> +        flag(general_tier_flag);
> +    }
> +    ub(8, general_level_idc);
> +    flag(ptl_frame_only_constraint_flag);
> +    flag(ptl_multilayer_enabled_flag);
> +    if (profile_tier_present_flag) {
> +        CHECK(FUNC(general_constraints_info)(ctx, rw,
> +                                             &current->general_constraints_info));
> +    }
> +    for (i = max_num_sub_layers_minus1 - 1; i >= 0; i--)
> +        flags(ptl_sublayer_level_present_flag[i], 1, i);
> +    while (byte_alignment(rw) != 0)
> +        fixed(1, ptl_reserved_zero_bit, 0);

Need not be zero, has to be passed through.

> +    for (i = max_num_sub_layers_minus1 - 1; i >= 0; i--)
> +        if (current->ptl_sublayer_level_present_flag[i])
> +            ubs(8, sublayer_level_idc[i], 1, i);
> +    if (profile_tier_present_flag) {
> +        ub(8, ptl_num_sub_profiles);
> +        for (i = 0; i < current->ptl_num_sub_profiles; i++)
> +            ubs(32, general_sub_profile_idc[i], 1, i);
> +    }
> +    return 0;
> +}
> +
> +static int FUNC(vui_parameters)(CodedBitstreamContext *ctx, RWContext *rw,
> +                                H266RawVUI *current)
> +{
> +    int err;
> +
> +    flag(vui_progressive_source_flag);
> +    flag(vui_interlaced_source_flag);
> +    flag(vui_non_packed_constraint_flag);
> +    flag(vui_non_projected_constraint_flag);
> +    flag(vui_aspect_ratio_info_present_flag);
> +    if (current->vui_aspect_ratio_info_present_flag) {
> +        flag(vui_aspect_ratio_constant_flag);
> +        ub(8, vui_aspect_ratio_idc);
> +        if (current->vui_aspect_ratio_idc == 255) {
> +            ub(16, vui_sar_width);
> +            ub(16, vui_sar_height);
> +        }
> +    } else {
> +        infer(vui_aspect_ratio_constant_flag, 0);
> +        infer(vui_aspect_ratio_idc, 0);
> +    }
> +    flag(vui_overscan_info_present_flag);
> +    if (current->vui_overscan_info_present_flag)
> +        flag(vui_overscan_appropriate_flag);
> +    flag(vui_colour_description_present_flag);
> +    if (current->vui_colour_description_present_flag) {
> +        ub(8, vui_colour_primaries);
> +        ub(8, vui_transfer_characteristics);
> +        ub(8, vui_matrix_coeffs);
> +        flag(vui_full_range_flag);
> +    } else {
> +        infer(vui_colour_primaries, 2);
> +        infer(vui_transfer_characteristics, 2);
> +        infer(vui_matrix_coeffs, 2);
> +        infer(vui_full_range_flag, 0);
> +    }
> +    flag(vui_chroma_loc_info_present_flag);
> +    if (current->vui_chroma_loc_info_present_flag) {
> +        if (current->vui_progressive_source_flag &&
> +            !current->vui_interlaced_source_flag) {
> +            ue(vui_chroma_sample_loc_type_frame, 0, 6);
> +        } else {
> +            ue(vui_chroma_sample_loc_type_top_field, 0, 6);
> +            ue(vui_chroma_sample_loc_type_bottom_field,  0, 6);
> +        }
> +    }

These are inferred as 6 when not present?

> +
> +    return 0;
> +}
> +
> ...
> +
> +static int FUNC(vui_payload)(CodedBitstreamContext *ctx, RWContext *rw,
> +                             H266RawVUI *current, uint16_t vui_payload_size)
> +{
> +    int err;
> +    int start_position, current_position;
> +#ifdef READ
> +    start_position = get_bits_count(rw);
> +#else
> +    start_position = put_bits_count(rw);
> +#endif
> +    CHECK(FUNC(vui_parameters)(ctx, rw, current));
> +
> +#ifdef READ
> +    current_position = get_bits_count(rw) - start_position;
> +#else
> +    current_position = put_bits_count(rw) - start_position;
> +#endif

Looks like another case for bit_position(RWContext) as in <https://lists.ffmpeg.org/pipermail/ffmpeg-devel/2021-January/274208.html>.

> +    if (current_position < 8 * vui_payload_size) {
> +        CHECK(FUNC(payload_extension)(ctx, rw, &current->extension_data,
> +                                      vui_payload_size, current_position));
> +        fixed(1, vui_payload_bit_equal_to_one, 1);
> +        while (byte_alignment(rw) != 0)
> +            fixed(1, vui_payload_bit_equal_to_zero, 0);
> +    }
> +    return 0;
> +}
> +
> ...
> +
> +static int FUNC(ref_pic_list_struct)(CodedBitstreamContext *ctx, RWContext *rw,
> +                                     H266RefPicListStruct *current,
> +                                     uint8_t list_idx, uint8_t rpls_idx,
> +                                     const H266RawSPS *sps)
> +{
> +
> +    int err, i, j;
> +    ue(num_ref_entries, 0, VVC_MAX_DPB_SIZE + 13);

The + 13 is appearing in the sizes too.  Can we give it a better name?

> +    if (sps->sps_long_term_ref_pics_flag &&
> +        rpls_idx < sps->sps_num_ref_pic_lists[list_idx] &&
> +        current->num_ref_entries > 0)
> +        flag(ltrp_in_header_flag);
> +    if (sps->sps_long_term_ref_pics_flag &&
> +        rpls_idx == sps->sps_num_ref_pic_lists[list_idx])
> +        infer(ltrp_in_header_flag, 1);
> +    for (i = 0, j = 0; i < current->num_ref_entries; i++) {
> +        if (sps->sps_inter_layer_prediction_enabled_flag)
> +            flags(inter_layer_ref_pic_flag[i], 1, i);
> +        else
> +            infer(inter_layer_ref_pic_flag[i], 0);
> +
> +        if (!current->inter_layer_ref_pic_flag[i]) {
> +            if (sps->sps_long_term_ref_pics_flag)
> +                flags(st_ref_pic_flag[i],  1, i);
> +            else
> +                infer(st_ref_pic_flag[i], 1);
> +            if (current->st_ref_pic_flag[i]) {
> +                int abs_delta_poc_st;
> +                ues(abs_delta_poc_st[i], 0, MAX_UINT_BITS(15), 1, i);
> +                if ((sps->sps_weighted_pred_flag ||
> +                    sps->sps_weighted_bipred_flag) && i != 0)
> +                    abs_delta_poc_st = current->abs_delta_poc_st[i];
> +                else
> +                    abs_delta_poc_st = current->abs_delta_poc_st[i] + 1;
> +                if (abs_delta_poc_st > 0)
> +                    flags(strp_entry_sign_flag[i], 1, i);
> +            } else {
> +                if (!current->ltrp_in_header_flag) {
> +                    uint8_t bits = sps->sps_log2_max_pic_order_cnt_lsb_minus4 + 4;
> +                    ubs(bits, rpls_poc_lsb_lt[j], 1, j);
> +                    j++;
> +                }
> +            }
> +        } else {
> +            //todo: check range when vps parser is ready.
> +            ues(ilrp_idx[i], 0, MAX_UINT_BITS(8), 1, i);
> +        }
> +    }
> +    return 0;
> +}
> +
> ...
> +
> +static int FUNC(sublayer_hrd_parameters)(CodedBitstreamContext *ctx, RWContext *rw,
> +                     H266RawSubLayerHRDParameters *current, int sublayer_id,
> +                     const H266RawGeneralTimingHrdParameters *general)
> +{
> +    int err, i;
> +    for (i = 0; i <= general->hrd_cpb_cnt_minus1; i++) {
> +        ues(bit_rate_value_minus1[i], 0, UINT32_MAX - 1, 2, sublayer_id, i);
> +        ues(cpb_size_value_minus1[i], 0, UINT32_MAX - 1, 2, sublayer_id, i);
> +        if (general->general_du_hrd_params_present_flag) {
> +            ues(cpb_size_du_value_minus1[i],
> +                0, UINT32_MAX - 1, 2, sublayer_id, i);
> +            ues(bit_rate_du_value_minus1[i],
> +                0, UINT32_MAX - 1, 2, sublayer_id, i);
> +        }
> +        flags(cbr_flag[i], 2, sublayer_id, i);
> +    }

On everything here you've correctly got that there are two subscripts, but there is actually only one in the variable - perhaps the argument shouldn't be a substructure?

> +    return 0;
> +}
> +
> +static int FUNC(ols_timing_hrd_parameters)(CodedBitstreamContext *ctx,
> +                RWContext *rw, H266RawOlsTimingHrdParameters *current,
> +                uint8_t first_sublayer, uint8_t max_sublayers_minus1,
> +                const H266RawGeneralTimingHrdParameters *general)
> +{
> +    int err, i;
> +    for (i = first_sublayer; i <= max_sublayers_minus1; i++) {
> +        flags(fixed_pic_rate_general_flag[i], 1, i);
> +        if (!current->fixed_pic_rate_general_flag[i])
> +            flags(fixed_pic_rate_within_cvs_flag[i], 1, i);
> +        else
> +            infer(fixed_pic_rate_within_cvs_flag[i], 1);
> +        if (current->fixed_pic_rate_within_cvs_flag[i]) {
> +            ues(elemental_duration_in_tc_minus1[i], 0, 2047, 1, i);
> +            infer(low_delay_hrd_flag[i], 0);
> +        } else if ((general->general_nal_hrd_params_present_flag ||
> +            general->general_vcl_hrd_params_present_flag) &&
> +            general->hrd_cpb_cnt_minus1 == 0) {
> +            flags(low_delay_hrd_flag[i], 1, i);
> +        } else {
> +            infer(low_delay_hrd_flag[i], 0);
> +        }
> +        if (general->general_nal_hrd_params_present_flag)
> +            CHECK(FUNC(sublayer_hrd_parameters)(ctx, rw,
> +                                                &current->nal_sub_layer_hrd_parameters[i],
> +                                                i, general));
> +        if (general->general_vcl_hrd_params_present_flag)
> +            CHECK(FUNC(sublayer_hrd_parameters)(ctx, rw,
> +                       &current->nal_sub_layer_hrd_parameters[i], i, general));
> +    }
> +    return 0;
> +}
> +
> +static int FUNC(sps_subpic)(CodedBitstreamContext *ctx, RWContext *rw,
> +                     H266RawSPS *current)

This function doesn't actually exist, and all splitting it out seems to be doing is making the template look less like the standard?

> +{
> ...
> +}
> +
> +static int FUNC(sps)(CodedBitstreamContext *ctx, RWContext *rw,
> +                     H266RawSPS *current)
> +{
> +    CodedBitstreamH266Context *h266 = ctx->priv_data;
> +    const H266RawVPS *vps;
> +    int err, i, j;
> +    unsigned int ctb_log2_size_y, min_cb_log2_size_y,
> +                 min_qt_log2_size_intra_y, min_qt_log2_size_inter_y,
> +                 ctb_size_y, max_num_merge_cand;
> +    uint8_t qp_bd_offset;
> +
> +    static const uint8_t h266_sub_width_c[] = {
> +        1, 2, 2, 1
> +    };
> +    static const uint8_t h266_sub_height_c[] = {
> +        1, 2, 1, 1
> +    };
> +
> +    HEADER("Sequence Parameter Set");
> +
> +    CHECK(FUNC(nal_unit_header)(ctx, rw,
> +                                &current->nal_unit_header, VVC_SPS_NUT));
> +
> +    ub(4, sps_seq_parameter_set_id);
> +    ub(4, sps_video_parameter_set_id);
> +    if (!current->sps_video_parameter_set_id && !h266->vps[0]) {
> +        H266RawVPS *vps0;
> +        h266->vps_ref[0] = av_buffer_alloc(sizeof(H266RawVPS));
> +        if (!h266->vps_ref[0])
> +            return AVERROR(ENOMEM);
> +        vps0 = h266->vps[0] = (H266RawVPS *)h266->vps_ref[0]->data;
> +        vps0->vps_max_layers_minus1 = 0;
> +        //todo: infer all things in 7.4.3.4 sps_video_parameter_set_id paragraph
> +    }
> +    h266->active_vps = vps = h266->vps[current->sps_video_parameter_set_id];

You don't actually use the vps at all, so why bother with this?

> +
> +    u(3, sps_max_sublayers_minus1, 0, VVC_MAX_SUBLAYERS - 1);
> +    u(2, sps_chroma_format_idc, 0, 3);
> +    u(2, sps_log2_ctu_size_minus5, 0, 3);
> +    ctb_log2_size_y = current->sps_log2_ctu_size_minus5 + 5;
> +    ctb_size_y = 1 << ctb_log2_size_y;
> +
> ...
> +
> +    u(2, sps_num_extra_ph_bytes, 0, 2);
> +    for (i = 0; i < (current->sps_num_extra_ph_bytes * 8); i++) {
> +        flags(sps_extra_ph_bit_present_flag[i], 1, i);
> +    }
> +
> +    u(2, sps_num_extra_sh_bytes, 0, 0);

Between zero and zero?

> +    for (i = 0; i < (current->sps_num_extra_sh_bytes * 8); i++) {
> +        flags(sps_extra_sh_bit_present_flag[i], 1, i);
> +    }
> +
> +    ...
> +
> +    flag(sps_affine_enabled_flag);
> +    if (current->sps_affine_enabled_flag) {
> +        ue(sps_five_minus_max_num_subblock_merge_cand,
> +           0, 5 - current->sps_sbtmvp_enabled_flag);
> +        flag(sps_6param_affine_enabled_flag);
> +        if (current->sps_amvr_enabled_flag)
> +            flag(sps_affine_amvr_enabled_flag);
> +        else
> +            infer(sps_affine_amvr_enabled_flag, 0);
> +        flag(sps_affine_prof_enabled_flag);
> +        if (current->sps_affine_prof_enabled_flag)
> +            flag(sps_prof_control_present_in_ph_flag);
> +        else
> +            infer(sps_prof_control_present_in_ph_flag, 0);
> +    } else {
> +        infer(sps_five_minus_max_num_subblock_merge_cand, 0);

This inference isn't right - there is also a temporal subblock merge candidate.

> +        infer(sps_6param_affine_enabled_flag, 0);
> +        infer(sps_affine_amvr_enabled_flag, 0);
> +        infer(sps_affine_prof_enabled_flag, 0);
> +        infer(sps_prof_control_present_in_ph_flag, 0);
> +    }
> +
> ...
> +
> +    flag(sps_field_seq_flag);
> +    flag(sps_vui_parameters_present_flag);
> +    if (current->sps_vui_parameters_present_flag) {
> +        ue(sps_vui_payload_size_minus1, 0, 1023);
> +        while (byte_alignment(rw) != 0)
> +            fixed(1, sps_vui_alignment_zero_bit, 0);
> +        CHECK(FUNC(vui_payload)(ctx, rw, &current->vui,
> +                                current->sps_vui_payload_size_minus1 + 1));
> +    }
> +
> +    flag(sps_extension_flag);
> +    if (current->sps_extension_flag)
> +        CHECK(FUNC(extension_data)(ctx, rw, &current->extension_data));
> +
> +    CHECK(FUNC(rbsp_trailing_bits)(ctx, rw));
> +
> +    return 0;
> +}

(I stopped here on trying to read this in detail.  Even above, I haven't checked all of the bounds.)

> ...
> +
> +static int FUNC(aud)(CodedBitstreamContext *ctx, RWContext *rw,
> +                     H266RawAUD *current)
> +{
> +    int err;
> +
> +    HEADER("Access Unit Delimiter");
> +
> +    CHECK(FUNC(nal_unit_header)(ctx, rw,
> +                                &current->nal_unit_header, VVC_AUD_NUT));
> +
> +    flag(aud_irap_or_gdr_flag);
> +    u(3, aud_pic_type, 0, 2);

Another one for future-extension compatibility:

"Decoders conforming to this version of this Specification shall ignore reserved values of aud_pic_type."

> +
> +    CHECK(FUNC(rbsp_trailing_bits)(ctx, rw));
> +    return 0;
> +}
> +
> ...
> +
> +static int FUNC(slice_header)(CodedBitstreamContext *ctx, RWContext *rw,
> +                              H266RawSliceHeader *current)
> +{
> +    CodedBitstreamH266Context *h266 = ctx->priv_data;
> +    const H266RawSPS *sps;
> +    const H266RawPPS *pps;
> +    const H266RawPH  *ph;
> +    const H266RefPicLists *ref_pic_lists;
> +    int      err, i;
> +    uint8_t  nal_unit_type, qp_bd_offset;
> +    uint16_t curr_subpic_idx;
> +    uint16_t num_slices_in_subpic;
> +
> +    HEADER("Slice Header");
> +
> +    CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header, -1));
> +
> +    flag(sh_picture_header_in_slice_header_flag);
> +    if (current->sh_picture_header_in_slice_header_flag){
> +        CHECK(FUNC(picture_header)(ctx, rw, &current->sh_picture_header));
> +        if (!h266->ph_ref) {
> +            h266->ph_ref = av_buffer_allocz(sizeof(H266RawPH));
> +            if (!h266->ph_ref)
> +                return AVERROR(ENOMEM);
> +            h266->active_ph = h266->ph = (H266RawPH*)h266->ph_ref->data;
> +        }
> +        memcpy(h266->ph, &current->sh_picture_header, sizeof(H266RawPH));
> +    }
> +    sps = h266->active_sps;
> +    pps = h266->active_pps;
> +    ph  = h266->active_ph;

This is going to do something horrible if you have a frame where you lose the NAL unit containing the picture header but it still has other slices.

Can we detect and avoid that case?

> +
> +    if (!sps || !pps || !ph) {
> +        av_log(ctx->log_ctx, AV_LOG_ERROR, "no sps(%p)/pps(%p)/ph(%p).\n", sps, pps, ph);

This message is definitely user-facing - it should be clearer.

Maybe "SPS id %d not available", "PPS id %d not available", "Picture header not available"?

> +        return AVERROR_INVALIDDATA;
> +    }
> +
> ...
> +
> +    return 0;
> +}
> +
> +static int FUNC(sei_decoded_picture_hash)(CodedBitstreamContext *ctx, RWContext *rw,
> +                                          H266RawSEIDecodedPictureHash *current)
> +{
> +    int err, c, i;
> +
> +    HEADER("Decoded Picture Hash");
> +
> +    u(8, dph_sei_hash_type, 0, 2);
> +    flag(dph_sei_single_component_flag);
> +    fixed(7, ph_sei_reserved_zero_7bits, 0);

"shall ignore"

> +
> +    for (c = 0; c < (current->dph_sei_single_component_flag ? 1 : 3); c++) {

This variable is called cIdx, and it does appear in strings.

> +        if (current->dph_sei_hash_type == 0) {
> +            for (i = 0; i < 16; i++)
> +                us(8, dph_sei_picture_md5[c][i], 0x00, 0xff, 2, c, i);
> +        } else if (current->dph_sei_hash_type == 1) {
> +            us(16, dph_sei_picture_crc[c], 0x0000, 0xffff, 1, c);
> +        } else if (current->dph_sei_hash_type == 2) {
> +            us(32, dph_sei_picture_checksum[c], 0x00000000, 0xffffffff, 1, c);
> +        }
> +    }
> +    return 0;
> +}
> +
> ...

Thanks,

- Mark