[FFmpeg-devel] [PATCH v2 06/11] avcodec: add cbs for h266/vvc
Nuo Mi
nuomi2021 at gmail.com
Sun Jan 10 11:10:33 EET 2021
On Sun, Jan 10, 2021 at 5:34 AM Mark Thompson <sw at jkqxz.net> wrote:
> On 09/01/2021 07:34, Nuo Mi wrote:
> > ---
> > configure | 2 +
> > libavcodec/Makefile | 1 +
> > libavcodec/cbs.c | 6 +
> > libavcodec/cbs_h2645.c | 373 ++++
> > libavcodec/cbs_h266.h | 840 ++++++++
> > libavcodec/cbs_h266_syntax_template.c | 2761 +++++++++++++++++++++++++
> > libavcodec/cbs_internal.h | 3 +-
> > 7 files changed, 3985 insertions(+), 1 deletion(-)
> > create mode 100644 libavcodec/cbs_h266.h
> > create mode 100644 libavcodec/cbs_h266_syntax_template.c
> >
> > ...
> > @@ -920,6 +934,135 @@ static int
> cbs_h265_read_nal_unit(CodedBitstreamContext *ctx,
> > return 0;
> > }
> >
> > +static int cbs_h266_replace_ph(CodedBitstreamContext *ctx,
> > + CodedBitstreamUnit *unit)
> > +{
> > + CodedBitstreamH266Context *priv = ctx->priv_data;
> > + int err;
> > + err = ff_cbs_make_unit_refcounted(ctx, unit);
> > + if (err < 0)
> > + return err;
> > + av_buffer_unref(&priv->ph_ref);
> > + av_assert0(unit->content_ref);
> > + priv->ph_ref = av_buffer_ref(unit->content_ref);
> > + if (!priv->ph_ref)
> > + return AVERROR(ENOMEM);
> > + priv->active_ph = priv->ph = (H266RawPH *)priv->ph_ref->data;
>
> Why are there too variables here? They seem to always be the same.
>
priv->active_ph is read-only, priv->ph is writeable pointer. I can change
to priv->ph only if you prefer.
>
> > + return 0;
> > +}
> > +
> > ...
> > +
> > static int cbs_h2645_assemble_fragment(CodedBitstreamContext *ctx,
> > CodedBitstreamFragment *frag)
> > {
> > @@ -1248,6 +1494,11 @@ static int
> cbs_h2645_assemble_fragment(CodedBitstreamContext *ctx,
> > (unit->type == HEVC_NAL_VPS ||
> > unit->type == HEVC_NAL_SPS ||
> > unit->type == HEVC_NAL_PPS)) ||
> > + (ctx->codec->codec_id == AV_CODEC_ID_VVC &&
> > + (unit->type == VVC_VPS_NUT ||
> > + unit->type == VVC_SPS_NUT ||
> > + unit->type == VVC_PPS_NUT ||
> > + unit->type == VVC_PREFIX_APS_NUT)) ||
>
> Also various other things, which might be here since passthrough does not
> require decomposition to be implemented.
>
> This test is getting unwieldy - maybe it should be moved to a new function
> cbs_h2645_unit_requires_zero_byte().
>
done
> > i == 0 /* (Assume this is the start of an access unit.)
> */) {
> > // zero_byte
> > data[dp++] = 0;
> > @@ -1362,6 +1613,41 @@ static void cbs_h265_close(CodedBitstreamContext
> *ctx)
> > av_buffer_unref(&h265->pps_ref[i]);
> > }
> >
> > ...
> > @@ -1506,6 +1792,77 @@ static const CodedBitstreamUnitTypeDescriptor
> cbs_h265_unit_types[] = {
> > CBS_UNIT_TYPE_END_OF_LIST
> > };
> >
> > +static void cbs_h266_free_sei(void *opaque, uint8_t *content)
> > +{
> > +}
>
> So as implemented currently it is POD?
>
Yes, currently only md5 sei implemented. We can do more after your react
patch merged.
>
> > +
> > +static const CodedBitstreamUnitTypeDescriptor cbs_h266_unit_types[] = {
> > ...
> > +
> > +typedef struct H266RawNALUnitHeader {
> > + uint8_t nuh_layer_id;
> > + uint8_t nal_unit_type;
> > + uint8_t nuh_temporal_id_plus1;
> > +} H266RawNALUnitHeader;
> > +
> > +typedef struct H266GeneralConstraintsInfo {
> > + uint8_t gci_present_flag;
> > +
> > ...
> > +
> > + /* loop filter */
> > + uint8_t gci_no_sao_constraint_flag;
> > + uint8_t gci_no_alf_constraint_flag;
> > + uint8_t gci_no_ccalf_constraint_flag;
> > + uint8_t gci_no_lmcs_constraint_flag;
> > + uint8_t gci_no_ladf_constraint_flag;
> > + uint8_t gci_no_virtual_boundaries_constraint_flag;
> > + uint8_t gci_num_reserved_bits;
>
> Also needs gci_reserved_zero_bit[], so that we can handle streams with
> future constraints rather than just rejecting them.
>
> "Although the value of gci_num_reserved_bits is required to be equal to 0
> in this version
> of this Specification, decoders conforming to this version of this
> Specification shall allow the value of
> gci_num_reserved_bits greater than 0 to appear in the syntax and shall
> ignore the values of all the gci_reserved_zero_bit[ i ]
> syntax elements when gci_num_reserved_bits is greater than 0."
>
This just follows the same pattern as h265.
How about we create a separate patch set for this, fix h265 as well.
> > +} H266GeneralConstraintsInfo;
> > +
> > ...
> > +
> > +typedef struct H266RawVPS {
> > + H266RawNALUnitHeader nal_unit_header;
> > +
> > + uint8_t vps_video_parameter_set_id;
> > +
> > + uint8_t vps_max_layers_minus1;
> > + uint8_t vps_max_sublayers_minus1;
> > + /*TODO add more*/
> > + H266RawExtensionData extension_data;
> > +} H266RawVPS;
>
> You don't actually use the VPS structure at all, so given that it isn't
> parsed I think it would be cleaner to just not include it here at all.
>
Done. The current conformance stream has no VPS.
>
> > +//
> > ...
> > +
> > +typedef struct H266RawSEIBufferingPeriod {
> > + uint8_t bp_seq_parameter_set_id;
> > +} H266RawSEIBufferingPeriod;
> > +
> > +typedef struct H266RawSEIPicTiming {
> > +} H266RawSEIPicTiming;
> > +
> > +typedef struct H266RawSEIPanScanRect {
> > +} H266RawSEIPanScanRect;
> > +
> > +typedef struct H266RawSEIUserDataRegistered {
> > +
> > +} H266RawSEIUserDataRegistered;
> > +
> > +typedef struct H266RawSEIUserDataUnregistered {
> > +} H266RawSEIUserDataUnregistered;
> > +
> > +typedef struct H266RawSEIRecoveryPoint {
> > +} H266RawSEIRecoveryPoint;
> > +
> > +typedef struct H266RawSEIDisplayOrientation {
> > +} H266RawSEIDisplayOrientation;
> > +
> > +typedef struct H266RawSEIActiveParameterSets {
> > +} H266RawSEIActiveParameterSets;
> > +
> > +typedef struct H266RawSEIDecodedPictureHash {
> > + uint8_t dph_sei_hash_type;
> > + uint8_t dph_sei_single_component_flag;
> > + uint8_t dph_sei_picture_md5[3][16];
> > + uint16_t dph_sei_picture_crc[3];
> > + uint32_t dph_sei_picture_checksum[3];
> > +} H266RawSEIDecodedPictureHash;
> > +
> > +typedef struct H266RawSEITimeCode {
> > +} H266RawSEITimeCode;
> > +
> > +typedef struct H266RawSEIMasteringDisplayColourVolume {
> > +} H266RawSEIMasteringDisplayColourVolume;
> > +
>
> Don't include all of these empty structures.
>
> (Half of them are implemented in the common SEI patches on the ML now
> anyway.)
>
Fixed
>
> > ...
> > +#endif /* AVCODEC_CBS_H266_H */
> > diff --git a/libavcodec/cbs_h266_syntax_template.c
> b/libavcodec/cbs_h266_syntax_template.c
> > new file mode 100644
> > index 0000000000..6a6defc8a5
> > --- /dev/null
> > +++ b/libavcodec/cbs_h266_syntax_template.c
> > @@ -0,0 +1,2761 @@
> > +/*
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +
> > +#ifndef H266_CEIL
> > +#define H266_CEIL(v, a) (((v) + (a) - 1) / (a))
> > +#endif
>
> If it's useful to have this separately then put it in cbs_h2645.c rather
> than doubled in the template. (See for example cbs_av1_tile_log2().)
>
It's protected by ifdef, not doubled.
> > +
> > +#ifndef H266_HAS_MULTI_SLICES_IN_TILE
> > +#define H266_HAS_MULTI_SLICES_IN_TILE(pps, tile_y, slice) \
> > + (pps->pps_slice_width_in_tiles_minus1[slice] == 0 && \
> > + pps->pps_slice_height_in_tiles_minus1[slice] == 0 && \
> > + pps->pps_tile_row_height_minus1[tile_y] > 0)
> > +#endif
>
> This seems to be used exactly once and match the syntax in the standard,
> so I don't think it should be separate.
>
Done
>
> > +
> > ...
> > +
> > +static int FUNC(byte_alignment)(CodedBitstreamContext *ctx, RWContext
> *rw)
> > +{
> > + int err;
> > +
> > + fixed(1, alignment_bit_equal_to_one, 1);
> > + while (byte_alignment(rw) != 0)
> > + fixed(1, alignment_bit_equal_to_zero, 0);
>
> Names aren't right.
>
Fixed.
>
> > + return 0;
> > +}
> > +
> > +static int FUNC(general_constraints_info)(CodedBitstreamContext *ctx,
> > + RWContext *rw,
> > + H266GeneralConstraintsInfo
> *current)
> > +{
> > + int err, i;
> > +
> > + flag(gci_present_flag);
> > + if (current->gci_present_flag) {
> > + ...
> > +
> > + /* loop filter */
> > + flag(gci_no_sao_constraint_flag);
> > + flag(gci_no_alf_constraint_flag);
> > + flag(gci_no_ccalf_constraint_flag);
> > + flag(gci_no_lmcs_constraint_flag);
> > + flag(gci_no_ladf_constraint_flag);
> > + flag(gci_no_virtual_boundaries_constraint_flag);
> > + ub(8, gci_num_reserved_bits);
> > + for (i = 0; i < current->gci_num_reserved_bits; i++) {
> > + fixed(1, gci_reserved_zero_bit, 1);
>
> As noted above, needs to be passed through for future compatibility.
>
Yes, but it just follows h265 cbs, how about we handle it in a separate
patch set.
>
> > + }
> > + }
> > + while (byte_alignment(rw) != 0)
> > + fixed(1, gci_alignment_zero_bit, 0);
> > + return 0;
> > +}
> > +
> > +static int FUNC(profile_tier_level)(CodedBitstreamContext *ctx,
> RWContext *rw,
> > + H266RawProfileTierLevel *current,
> > + int profile_tier_present_flag,
> > + int max_num_sub_layers_minus1)
> > +{
> > + int err, i;
> > +
> > + if (profile_tier_present_flag) {
> > + ub(7, general_profile_idc);
> > + flag(general_tier_flag);
> > + }
> > + ub(8, general_level_idc);
> > + flag(ptl_frame_only_constraint_flag);
> > + flag(ptl_multilayer_enabled_flag);
> > + if (profile_tier_present_flag) {
> > + CHECK(FUNC(general_constraints_info)(ctx, rw,
> > +
> ¤t->general_constraints_info));
> > + }
> > + for (i = max_num_sub_layers_minus1 - 1; i >= 0; i--)
> > + flags(ptl_sublayer_level_present_flag[i], 1, i);
> > + while (byte_alignment(rw) != 0)
> > + fixed(1, ptl_reserved_zero_bit, 0);
>
> Need not be zero, has to be passed through.
>
Ditto
>
> > + for (i = max_num_sub_layers_minus1 - 1; i >= 0; i--)
> > + if (current->ptl_sublayer_level_present_flag[i])
> > + ubs(8, sublayer_level_idc[i], 1, i);
> > + if (profile_tier_present_flag) {
> > + ub(8, ptl_num_sub_profiles);
> > + for (i = 0; i < current->ptl_num_sub_profiles; i++)
> > + ubs(32, general_sub_profile_idc[i], 1, i);
> > + }
> > + return 0;
> > +}
> > +
> > +static int FUNC(vui_parameters)(CodedBitstreamContext *ctx, RWContext
> *rw,
> > + H266RawVUI *current)
> > +{
> > + int err;
> > +
> > + flag(vui_progressive_source_flag);
> > + flag(vui_interlaced_source_flag);
> > + flag(vui_non_packed_constraint_flag);
> > + flag(vui_non_projected_constraint_flag);
> > + flag(vui_aspect_ratio_info_present_flag);
> > + if (current->vui_aspect_ratio_info_present_flag) {
> > + flag(vui_aspect_ratio_constant_flag);
> > + ub(8, vui_aspect_ratio_idc);
> > + if (current->vui_aspect_ratio_idc == 255) {
> > + ub(16, vui_sar_width);
> > + ub(16, vui_sar_height);
> > + }
> > + } else {
> > + infer(vui_aspect_ratio_constant_flag, 0);
> > + infer(vui_aspect_ratio_idc, 0);
> > + }
> > + flag(vui_overscan_info_present_flag);
> > + if (current->vui_overscan_info_present_flag)
> > + flag(vui_overscan_appropriate_flag);
> > + flag(vui_colour_description_present_flag);
> > + if (current->vui_colour_description_present_flag) {
> > + ub(8, vui_colour_primaries);
> > + ub(8, vui_transfer_characteristics);
> > + ub(8, vui_matrix_coeffs);
> > + flag(vui_full_range_flag);
> > + } else {
> > + infer(vui_colour_primaries, 2);
> > + infer(vui_transfer_characteristics, 2);
> > + infer(vui_matrix_coeffs, 2);
> > + infer(vui_full_range_flag, 0);
> > + }
> > + flag(vui_chroma_loc_info_present_flag);
> > + if (current->vui_chroma_loc_info_present_flag) {
> > + if (current->vui_progressive_source_flag &&
> > + !current->vui_interlaced_source_flag) {
> > + ue(vui_chroma_sample_loc_type_frame, 0, 6);
> > + } else {
> > + ue(vui_chroma_sample_loc_type_top_field, 0, 6);
> > + ue(vui_chroma_sample_loc_type_bottom_field, 0, 6);
> > + }
> > + }
>
> These are inferred as 6 when not present?
>
6 only happened when ChromaFormatIdc = 1, others are not defined.
and 6 is the unspecific value in the spec...
Do we really need to infer it :)
>
> > +
> > + return 0;
> > +}
> > +
> > ...
> > +
> > +static int FUNC(vui_payload)(CodedBitstreamContext *ctx, RWContext *rw,
> > + H266RawVUI *current, uint16_t
> vui_payload_size)
> > +{
> > + int err;
> > + int start_position, current_position;
> > +#ifdef READ
> > + start_position = get_bits_count(rw);
> > +#else
> > + start_position = put_bits_count(rw);
> > +#endif
> > + CHECK(FUNC(vui_parameters)(ctx, rw, current));
> > +
> > +#ifdef READ
> > + current_position = get_bits_count(rw) - start_position;
> > +#else
> > + current_position = put_bits_count(rw) - start_position;
> > +#endif
>
> Looks like another case for bit_position(RWContext) as in <
> https://lists.ffmpeg.org/pipermail/ffmpeg-devel/2021-January/274208.html>.
>
yeah, we can change it after your patch merged.
>
> > + if (current_position < 8 * vui_payload_size) {
> > + CHECK(FUNC(payload_extension)(ctx, rw, ¤t->extension_data,
> > + vui_payload_size,
> current_position));
> > + fixed(1, vui_payload_bit_equal_to_one, 1);
> > + while (byte_alignment(rw) != 0)
> > + fixed(1, vui_payload_bit_equal_to_zero, 0);
> > + }
> > + return 0;
> > +}
> > +
> > ...
> > +
> > +static int FUNC(ref_pic_list_struct)(CodedBitstreamContext *ctx,
> RWContext *rw,
> > + H266RefPicListStruct *current,
> > + uint8_t list_idx, uint8_t rpls_idx,
> > + const H266RawSPS *sps)
> > +{
> > +
> > + int err, i, j;
> > + ue(num_ref_entries, 0, VVC_MAX_DPB_SIZE + 13);
>
> The + 13 is appearing in the sizes too. Can we give it a better name?
>
fixed with VVC_MAX_REF_ENTRIES
>
> > + if (sps->sps_long_term_ref_pics_flag &&
> > + rpls_idx < sps->sps_num_ref_pic_lists[list_idx] &&
> > + current->num_ref_entries > 0)
> > + flag(ltrp_in_header_flag);
> > + if (sps->sps_long_term_ref_pics_flag &&
> > + rpls_idx == sps->sps_num_ref_pic_lists[list_idx])
> > + infer(ltrp_in_header_flag, 1);
> > + for (i = 0, j = 0; i < current->num_ref_entries; i++) {
> > + if (sps->sps_inter_layer_prediction_enabled_flag)
> > + flags(inter_layer_ref_pic_flag[i], 1, i);
> > + else
> > + infer(inter_layer_ref_pic_flag[i], 0);
> > +
> > + if (!current->inter_layer_ref_pic_flag[i]) {
> > + if (sps->sps_long_term_ref_pics_flag)
> > + flags(st_ref_pic_flag[i], 1, i);
> > + else
> > + infer(st_ref_pic_flag[i], 1);
> > + if (current->st_ref_pic_flag[i]) {
> > + int abs_delta_poc_st;
> > + ues(abs_delta_poc_st[i], 0, MAX_UINT_BITS(15), 1, i);
> > + if ((sps->sps_weighted_pred_flag ||
> > + sps->sps_weighted_bipred_flag) && i != 0)
> > + abs_delta_poc_st = current->abs_delta_poc_st[i];
> > + else
> > + abs_delta_poc_st = current->abs_delta_poc_st[i] + 1;
> > + if (abs_delta_poc_st > 0)
> > + flags(strp_entry_sign_flag[i], 1, i);
> > + } else {
> > + if (!current->ltrp_in_header_flag) {
> > + uint8_t bits =
> sps->sps_log2_max_pic_order_cnt_lsb_minus4 + 4;
> > + ubs(bits, rpls_poc_lsb_lt[j], 1, j);
> > + j++;
> > + }
> > + }
> > + } else {
> > + //todo: check range when vps parser is ready.
> > + ues(ilrp_idx[i], 0, MAX_UINT_BITS(8), 1, i);
> > + }
> > + }
> > + return 0;
> > +}
> > +
> > ...
> > +
> > +static int FUNC(sublayer_hrd_parameters)(CodedBitstreamContext *ctx,
> RWContext *rw,
> > + H266RawSubLayerHRDParameters *current, int
> sublayer_id,
> > + const H266RawGeneralTimingHrdParameters *general)
> > +{
> > + int err, i;
> > + for (i = 0; i <= general->hrd_cpb_cnt_minus1; i++) {
> > + ues(bit_rate_value_minus1[i], 0, UINT32_MAX - 1, 2,
> sublayer_id, i);
> > + ues(cpb_size_value_minus1[i], 0, UINT32_MAX - 1, 2,
> sublayer_id, i);
> > + if (general->general_du_hrd_params_present_flag) {
> > + ues(cpb_size_du_value_minus1[i],
> > + 0, UINT32_MAX - 1, 2, sublayer_id, i);
> > + ues(bit_rate_du_value_minus1[i],
> > + 0, UINT32_MAX - 1, 2, sublayer_id, i);
> > + }
> > + flags(cbr_flag[i], 2, sublayer_id, i);
> > + }
>
> On everything here you've correctly got that there are two subscripts, but
> there is actually only one in the variable - perhaps the argument shouldn't
> be a substructure?
>
fixed
>
> > + return 0;
> > +}
> > +
> > +static int FUNC(ols_timing_hrd_parameters)(CodedBitstreamContext *ctx,
> > + RWContext *rw, H266RawOlsTimingHrdParameters *current,
> > + uint8_t first_sublayer, uint8_t max_sublayers_minus1,
> > + const H266RawGeneralTimingHrdParameters *general)
> > +{
> > + int err, i;
> > + for (i = first_sublayer; i <= max_sublayers_minus1; i++) {
> > + flags(fixed_pic_rate_general_flag[i], 1, i);
> > + if (!current->fixed_pic_rate_general_flag[i])
> > + flags(fixed_pic_rate_within_cvs_flag[i], 1, i);
> > + else
> > + infer(fixed_pic_rate_within_cvs_flag[i], 1);
> > + if (current->fixed_pic_rate_within_cvs_flag[i]) {
> > + ues(elemental_duration_in_tc_minus1[i], 0, 2047, 1, i);
> > + infer(low_delay_hrd_flag[i], 0);
> > + } else if ((general->general_nal_hrd_params_present_flag ||
> > + general->general_vcl_hrd_params_present_flag) &&
> > + general->hrd_cpb_cnt_minus1 == 0) {
> > + flags(low_delay_hrd_flag[i], 1, i);
> > + } else {
> > + infer(low_delay_hrd_flag[i], 0);
> > + }
> > + if (general->general_nal_hrd_params_present_flag)
> > + CHECK(FUNC(sublayer_hrd_parameters)(ctx, rw,
> > +
> ¤t->nal_sub_layer_hrd_parameters[i],
> > + i, general));
> > + if (general->general_vcl_hrd_params_present_flag)
> > + CHECK(FUNC(sublayer_hrd_parameters)(ctx, rw,
> > + ¤t->nal_sub_layer_hrd_parameters[i], i,
> general));
> > + }
> > + return 0;
> > +}
> > +
> > +static int FUNC(sps_subpic)(CodedBitstreamContext *ctx, RWContext *rw,
> > + H266RawSPS *current)
>
> This function doesn't actually exist, and all splitting it out seems to be
> doing is making the template look less like the standard?
>
VVC is more complex than hevc. If we do not use some sub-functions. It will
be too large. not friendly for reader and indent.
>
> > +{
> > ...
> > +}
> > +
> > +static int FUNC(sps)(CodedBitstreamContext *ctx, RWContext *rw,
> > + H266RawSPS *current)
> > +{
> > + CodedBitstreamH266Context *h266 = ctx->priv_data;
> > + const H266RawVPS *vps;
> > + int err, i, j;
> > + unsigned int ctb_log2_size_y, min_cb_log2_size_y,
> > + min_qt_log2_size_intra_y, min_qt_log2_size_inter_y,
> > + ctb_size_y, max_num_merge_cand;
> > + uint8_t qp_bd_offset;
> > +
> > + static const uint8_t h266_sub_width_c[] = {
> > + 1, 2, 2, 1
> > + };
> > + static const uint8_t h266_sub_height_c[] = {
> > + 1, 2, 1, 1
> > + };
> > +
> > + HEADER("Sequence Parameter Set");
> > +
> > + CHECK(FUNC(nal_unit_header)(ctx, rw,
> > + ¤t->nal_unit_header,
> VVC_SPS_NUT));
> > +
> > + ub(4, sps_seq_parameter_set_id);
> > + ub(4, sps_video_parameter_set_id);
> > + if (!current->sps_video_parameter_set_id && !h266->vps[0]) {
> > + H266RawVPS *vps0;
> > + h266->vps_ref[0] = av_buffer_alloc(sizeof(H266RawVPS));
> > + if (!h266->vps_ref[0])
> > + return AVERROR(ENOMEM);
> > + vps0 = h266->vps[0] = (H266RawVPS *)h266->vps_ref[0]->data;
> > + vps0->vps_max_layers_minus1 = 0;
> > + //todo: infer all things in 7.4.3.4 sps_video_parameter_set_id
> paragraph
> > + }
> > + h266->active_vps = vps =
> h266->vps[current->sps_video_parameter_set_id];
>
> You don't actually use the vps at all, so why bother with this?
>
> > +
> > + u(3, sps_max_sublayers_minus1, 0, VVC_MAX_SUBLAYERS - 1);
> > + u(2, sps_chroma_format_idc, 0, 3);
> > + u(2, sps_log2_ctu_size_minus5, 0, 3);
> > + ctb_log2_size_y = current->sps_log2_ctu_size_minus5 + 5;
> > + ctb_size_y = 1 << ctb_log2_size_y;
> > +
> > ...
> > +
> > + u(2, sps_num_extra_ph_bytes, 0, 2);
> > + for (i = 0; i < (current->sps_num_extra_ph_bytes * 8); i++) {
> > + flags(sps_extra_ph_bit_present_flag[i], 1, i);
> > + }
> > +
> > + u(2, sps_num_extra_sh_bytes, 0, 0);
>
> Between zero and zero?
>
For the current version of spec, yes.
Spec always says we need to open for some bits/values.
But, from my personal experience, when we have other values in the reserved
one. We usually cant correctly decode a stream.
So, instead of ignoring the value, another approach is to report the error
for it, so we can get the bitstream earlier and fix it.
>
> > + for (i = 0; i < (current->sps_num_extra_sh_bytes * 8); i++) {
> > + flags(sps_extra_sh_bit_present_flag[i], 1, i);
> > + }
> > +
> > + ...
> > +
> > + flag(sps_affine_enabled_flag);
> > + if (current->sps_affine_enabled_flag) {
> > + ue(sps_five_minus_max_num_subblock_merge_cand,
> > + 0, 5 - current->sps_sbtmvp_enabled_flag);
> > + flag(sps_6param_affine_enabled_flag);
> > + if (current->sps_amvr_enabled_flag)
> > + flag(sps_affine_amvr_enabled_flag);
> > + else
> > + infer(sps_affine_amvr_enabled_flag, 0);
> > + flag(sps_affine_prof_enabled_flag);
> > + if (current->sps_affine_prof_enabled_flag)
> > + flag(sps_prof_control_present_in_ph_flag);
> > + else
> > + infer(sps_prof_control_present_in_ph_flag, 0);
> > + } else {
> > + infer(sps_five_minus_max_num_subblock_merge_cand, 0);
>
> This inference isn't right - there is also a temporal subblock merge
> candidate.
>
How to get it? Spec did not define how to infer
sps_five_minus_max_num_subblock_merge_cand,
I just guess it from sps_6param_affine_enabled_flag
>
> > + infer(sps_6param_affine_enabled_flag, 0);
> > + infer(sps_affine_amvr_enabled_flag, 0);
> > + infer(sps_affine_prof_enabled_flag, 0);
> > + infer(sps_prof_control_present_in_ph_flag, 0);
> > + }
> > +
> > ...
> > +
> > + flag(sps_field_seq_flag);
> > + flag(sps_vui_parameters_present_flag);
> > + if (current->sps_vui_parameters_present_flag) {
> > + ue(sps_vui_payload_size_minus1, 0, 1023);
> > + while (byte_alignment(rw) != 0)
> > + fixed(1, sps_vui_alignment_zero_bit, 0);
> > + CHECK(FUNC(vui_payload)(ctx, rw, ¤t->vui,
> > + current->sps_vui_payload_size_minus1 +
> 1));
> > + }
> > +
> > + flag(sps_extension_flag);
> > + if (current->sps_extension_flag)
> > + CHECK(FUNC(extension_data)(ctx, rw, ¤t->extension_data));
> > +
> > + CHECK(FUNC(rbsp_trailing_bits)(ctx, rw));
> > +
> > + return 0;
> > +}
>
> (I stopped here on trying to read this in detail. Even above, I haven't
> checked all of the bounds.)
>
> > ...
> > +
> > +static int FUNC(aud)(CodedBitstreamContext *ctx, RWContext *rw,
> > + H266RawAUD *current)
> > +{
> > + int err;
> > +
> > + HEADER("Access Unit Delimiter");
> > +
> > + CHECK(FUNC(nal_unit_header)(ctx, rw,
> > + ¤t->nal_unit_header,
> VVC_AUD_NUT));
> > +
> > + flag(aud_irap_or_gdr_flag);
> > + u(3, aud_pic_type, 0, 2);
>
> Another one for future-extension compatibility:
>
> "Decoders conforming to this version of this Specification shall ignore
> reserved values of aud_pic_type."
>
yeah, we may need a separate patch to define a common macro, and fix
h265/h266 together.
>
> > +
> > + CHECK(FUNC(rbsp_trailing_bits)(ctx, rw));
> > + return 0;
> > +}
> > +
> > ...
> > +
> > +static int FUNC(slice_header)(CodedBitstreamContext *ctx, RWContext *rw,
> > + H266RawSliceHeader *current)
> > +{
> > + CodedBitstreamH266Context *h266 = ctx->priv_data;
> > + const H266RawSPS *sps;
> > + const H266RawPPS *pps;
> > + const H266RawPH *ph;
> > + const H266RefPicLists *ref_pic_lists;
> > + int err, i;
> > + uint8_t nal_unit_type, qp_bd_offset;
> > + uint16_t curr_subpic_idx;
> > + uint16_t num_slices_in_subpic;
> > +
> > + HEADER("Slice Header");
> > +
> > + CHECK(FUNC(nal_unit_header)(ctx, rw, ¤t->nal_unit_header,
> -1));
> > +
> > + flag(sh_picture_header_in_slice_header_flag);
> > + if (current->sh_picture_header_in_slice_header_flag){
> > + CHECK(FUNC(picture_header)(ctx, rw,
> ¤t->sh_picture_header));
> > + if (!h266->ph_ref) {
> > + h266->ph_ref = av_buffer_allocz(sizeof(H266RawPH));
> > + if (!h266->ph_ref)
> > + return AVERROR(ENOMEM);
> > + h266->active_ph = h266->ph = (H266RawPH*)h266->ph_ref->data;
> > + }
> > + memcpy(h266->ph, ¤t->sh_picture_header,
> sizeof(H266RawPH));
> > + }
> > + sps = h266->active_sps;
> > + pps = h266->active_pps;
> > + ph = h266->active_ph;
>
> This is going to do something horrible if you have a frame where you lose
> the NAL unit containing the picture header but it still has other slices.
>
> Can we detect and avoid that case?
>
In this case, we will have a parser to split the frames, the parser code
will make sure it always has a picture header in every frame.
> > +
> > + if (!sps || !pps || !ph) {
> > + av_log(ctx->log_ctx, AV_LOG_ERROR, "no
> sps(%p)/pps(%p)/ph(%p).\n", sps, pps, ph);
>
> This message is definitely user-facing - it should be clearer.
>
> Maybe "SPS id %d not available", "PPS id %d not available", "Picture
> header not available"?
>
actually, it's checked by picture_header(), once we got ph, we always got
the sps and pps.
will check ph and assert sps, pps.
>
> > + return AVERROR_INVALIDDATA;
> > + }
> > +
> > ...
> > +
> > + return 0;
> > +}
> > +
> > +static int FUNC(sei_decoded_picture_hash)(CodedBitstreamContext *ctx,
> RWContext *rw,
> > + H266RawSEIDecodedPictureHash
> *current)
> > +{
> > + int err, c, i;
> > +
> > + HEADER("Decoded Picture Hash");
> > +
> > + u(8, dph_sei_hash_type, 0, 2);
> > + flag(dph_sei_single_component_flag);
> > + fixed(7, ph_sei_reserved_zero_7bits, 0);
>
> "shall ignore"
>
> > +
> > + for (c = 0; c < (current->dph_sei_single_component_flag ? 1 : 3);
> c++) {
>
> This variable is called cIdx, and it does appear in strings.
>
> > + if (current->dph_sei_hash_type == 0) {
> > + for (i = 0; i < 16; i++)
> > + us(8, dph_sei_picture_md5[c][i], 0x00, 0xff, 2, c, i);
> > + } else if (current->dph_sei_hash_type == 1) {
> > + us(16, dph_sei_picture_crc[c], 0x0000, 0xffff, 1, c);
> > + } else if (current->dph_sei_hash_type == 2) {
> > + us(32, dph_sei_picture_checksum[c], 0x00000000, 0xffffffff,
> 1, c);
> > + }
> > + }
> > + return 0;
> > +}
> > +
> > ...
>
> Thanks,
>
> - Mark
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
More information about the ffmpeg-devel
mailing list