[FFmpeg-devel] [PATCH 08/11] avformat/hevc: add a function to write a lhvC box
James Almer
jamrial at gmail.com
Thu Jul 4 00:26:43 EEST 2024
Signed-off-by: James Almer <jamrial at gmail.com>
---
libavformat/hevc.c | 163 +++++++++++++++++++++++++++++++++++----------
libavformat/hevc.h | 17 +++++
2 files changed, 145 insertions(+), 35 deletions(-)
diff --git a/libavformat/hevc.c b/libavformat/hevc.c
index f3e5542699..69e6f719e5 100644
--- a/libavformat/hevc.c
+++ b/libavformat/hevc.c
@@ -43,12 +43,16 @@ enum {
#define FLAG_ARRAY_COMPLETENESS (1 << 0)
#define FLAG_IS_NALFF (1 << 1)
+#define FLAG_IS_LHVC (1 << 2)
typedef struct HVCCNALUnit {
uint8_t nuh_layer_id;
uint8_t parameter_set_id;
uint16_t nalUnitLength;
const uint8_t *nalUnit;
+
+ // VPS
+ uint8_t vps_max_sub_layers_minus1;
} HVCCNALUnit;
typedef struct HVCCNALUnitArray {
@@ -383,8 +387,6 @@ static void skip_sub_layer_ordering_info(GetBitContext *gb)
static int hvcc_parse_vps(GetBitContext *gb, HVCCNALUnit *nal,
HEVCDecoderConfigurationRecord *hvcc)
{
- unsigned int vps_max_sub_layers_minus1;
-
nal->parameter_set_id = get_bits(gb, 4);
/*
* vps_reserved_three_2bits u(2)
@@ -392,7 +394,7 @@ static int hvcc_parse_vps(GetBitContext *gb, HVCCNALUnit *nal,
*/
skip_bits(gb, 8);
- vps_max_sub_layers_minus1 = get_bits(gb, 3);
+ nal->vps_max_sub_layers_minus1 = get_bits(gb, 3);
/*
* numTemporalLayers greater than 1 indicates that the stream to which this
@@ -403,7 +405,7 @@ static int hvcc_parse_vps(GetBitContext *gb, HVCCNALUnit *nal,
* that it is unknown whether the stream is temporally scalable.
*/
hvcc->numTemporalLayers = FFMAX(hvcc->numTemporalLayers,
- vps_max_sub_layers_minus1 + 1);
+ nal->vps_max_sub_layers_minus1 + 1);
/*
* vps_temporal_id_nesting_flag u(1)
@@ -411,7 +413,7 @@ static int hvcc_parse_vps(GetBitContext *gb, HVCCNALUnit *nal,
*/
skip_bits(gb, 17);
- hvcc_parse_ptl(gb, hvcc, vps_max_sub_layers_minus1);
+ hvcc_parse_ptl(gb, hvcc, nal->vps_max_sub_layers_minus1);
/* nothing useful for hvcC past this point */
return 0;
@@ -508,10 +510,33 @@ static int hvcc_parse_sps(GetBitContext *gb, HVCCNALUnit *nal,
{
unsigned int i, sps_max_sub_layers_minus1, log2_max_pic_order_cnt_lsb_minus4;
unsigned int num_short_term_ref_pic_sets, num_delta_pocs[HEVC_MAX_SHORT_TERM_REF_PIC_SETS];
+ unsigned int sps_ext_or_max_sub_layers_minus1, multi_layer_ext_sps_flag;
+
+ unsigned int sps_video_parameter_set_id = get_bits(gb, 4);
- skip_bits(gb, 4); // sps_video_parameter_set_id
+ if (nal->nuh_layer_id == 0) {
+ sps_ext_or_max_sub_layers_minus1 = 0;
+ sps_max_sub_layers_minus1 = get_bits(gb, 3);
+ } else {
+ sps_ext_or_max_sub_layers_minus1 = get_bits(gb, 3);
+ if (sps_ext_or_max_sub_layers_minus1 == 7) {
+ const HVCCNALUnitArray *array = &hvcc->arrays[VPS_INDEX];
+ const HVCCNALUnit *vps = NULL;
+
+ for (i = 0; i < array->numNalus; i++)
+ if (sps_video_parameter_set_id == array->nal[i].parameter_set_id) {
+ vps = &array->nal[i];
+ break;
+ }
+ if (!vps)
+ return AVERROR_INVALIDDATA;
- sps_max_sub_layers_minus1 = get_bits (gb, 3);
+ sps_max_sub_layers_minus1 = vps->vps_max_sub_layers_minus1;
+ } else
+ sps_max_sub_layers_minus1 = sps_ext_or_max_sub_layers_minus1;
+ }
+ multi_layer_ext_sps_flag = nal->nuh_layer_id &&
+ sps_ext_or_max_sub_layers_minus1 == 7;
/*
* numTemporalLayers greater than 1 indicates that the stream to which this
@@ -524,12 +549,17 @@ static int hvcc_parse_sps(GetBitContext *gb, HVCCNALUnit *nal,
hvcc->numTemporalLayers = FFMAX(hvcc->numTemporalLayers,
sps_max_sub_layers_minus1 + 1);
- hvcc->temporalIdNested = get_bits1(gb);
-
- hvcc_parse_ptl(gb, hvcc, sps_max_sub_layers_minus1);
+ if (!multi_layer_ext_sps_flag) {
+ hvcc->temporalIdNested = get_bits1(gb);
+ hvcc_parse_ptl(gb, hvcc, sps_max_sub_layers_minus1);
+ }
nal->parameter_set_id = get_ue_golomb_long(gb);
+ if (multi_layer_ext_sps_flag) {
+ if (get_bits1(gb)) // update_rep_format_flag
+ skip_bits(gb, 8); // sps_rep_format_idx
+ } else {
hvcc->chromaFormat = get_ue_golomb_long(gb);
if (hvcc->chromaFormat == 3)
@@ -547,12 +577,15 @@ static int hvcc_parse_sps(GetBitContext *gb, HVCCNALUnit *nal,
hvcc->bitDepthLumaMinus8 = get_ue_golomb_long(gb);
hvcc->bitDepthChromaMinus8 = get_ue_golomb_long(gb);
+ }
log2_max_pic_order_cnt_lsb_minus4 = get_ue_golomb_long(gb);
+ if (!multi_layer_ext_sps_flag) {
/* sps_sub_layer_ordering_info_present_flag */
i = get_bits1(gb) ? 0 : sps_max_sub_layers_minus1;
for (; i <= sps_max_sub_layers_minus1; i++)
skip_sub_layer_ordering_info(gb);
+ }
get_ue_golomb_long(gb); // log2_min_luma_coding_block_size_minus3
get_ue_golomb_long(gb); // log2_diff_max_min_luma_coding_block_size
@@ -561,9 +594,15 @@ static int hvcc_parse_sps(GetBitContext *gb, HVCCNALUnit *nal,
get_ue_golomb_long(gb); // max_transform_hierarchy_depth_inter
get_ue_golomb_long(gb); // max_transform_hierarchy_depth_intra
- if (get_bits1(gb) && // scaling_list_enabled_flag
- get_bits1(gb)) // sps_scaling_list_data_present_flag
- skip_scaling_list_data(gb);
+ if (get_bits1(gb)) { // scaling_list_enabled_flag
+ int sps_infer_scaling_list_flag = 0;
+ if (multi_layer_ext_sps_flag)
+ sps_infer_scaling_list_flag = get_bits1(gb);
+ if (sps_infer_scaling_list_flag)
+ skip_bits(gb, 6); // sps_scaling_list_ref_layer_id
+ else if (get_bits1(gb)) // sps_scaling_list_data_present_flag
+ skip_scaling_list_data(gb);
+ }
skip_bits1(gb); // amp_enabled_flag
skip_bits1(gb); // sample_adaptive_offset_enabled_flag
@@ -703,6 +742,7 @@ static int hvcc_add_nal_unit(const uint8_t *nal_buf, uint32_t nal_size,
{
int ret = 0;
int is_nalff = !!(flags & FLAG_IS_NALFF);
+ int is_lhvc = !!(flags & FLAG_IS_LHVC);
int ps_array_completeness = !!(flags & FLAG_ARRAY_COMPLETENESS);
HVCCNALUnitArray *const array = &hvcc->arrays[array_idx];
HVCCNALUnit *nal;
@@ -722,7 +762,7 @@ static int hvcc_add_nal_unit(const uint8_t *nal_buf, uint32_t nal_size,
goto end;
nal_unit_parse_header(&gbc, &nal_type, &nuh_layer_id);
- if (nuh_layer_id > 0)
+ if (!is_lhvc && nuh_layer_id > 0)
goto end;
/*
@@ -798,9 +838,12 @@ static void hvcc_close(HEVCDecoderConfigurationRecord *hvcc)
}
}
-static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
+static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc,
+ int flags)
{
- uint16_t vps_count, sps_count, pps_count;
+ uint16_t numNalus[NB_ARRAYS] = { 0 };
+ int is_lhvc = !!(flags & FLAG_IS_LHVC);
+ int numOfArrays = 0;
/*
* We only support writing HEVCDecoderConfigurationRecord version 1.
@@ -826,10 +869,32 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
* let's always set them to values meaning 'unspecified'.
*/
hvcc->avgFrameRate = 0;
- hvcc->constantFrameRate = 0;
+ /*
+ * lhvC doesn't store this field. It instead reserves the bits, setting them
+ * to '11'b.
+ */
+ hvcc->constantFrameRate = is_lhvc * 0x3;
+
+ /*
+ * Skip all NALUs with nuh_layer_id == 0 if writing lhvC. We do it here and
+ * not before parsing them as some parameter sets with nuh_layer_id > 0
+ * may reference base layer parameters sets.
+ */
+ for (unsigned i = 0; i < FF_ARRAY_ELEMS(hvcc->arrays); i++) {
+ const HVCCNALUnitArray *const array = &hvcc->arrays[i];
+
+ if (array->numNalus == 0)
+ continue;
+ for (unsigned j = 0; j < array->numNalus; j++)
+ numNalus[i] += !is_lhvc || (array->nal[j].nuh_layer_id != 0);
+ numOfArrays += (numNalus[i] > 0);
+ }
+
+ av_log(NULL, AV_LOG_TRACE, "%s\n", is_lhvc ? "lhvC" : "hvcC");
av_log(NULL, AV_LOG_TRACE, "configurationVersion: %"PRIu8"\n",
hvcc->configurationVersion);
+ if (!is_lhvc) {
av_log(NULL, AV_LOG_TRACE, "general_profile_space: %"PRIu8"\n",
hvcc->general_profile_space);
av_log(NULL, AV_LOG_TRACE, "general_tier_flag: %"PRIu8"\n",
@@ -842,10 +907,12 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
hvcc->general_constraint_indicator_flags);
av_log(NULL, AV_LOG_TRACE, "general_level_idc: %"PRIu8"\n",
hvcc->general_level_idc);
+ }
av_log(NULL, AV_LOG_TRACE, "min_spatial_segmentation_idc: %"PRIu16"\n",
hvcc->min_spatial_segmentation_idc);
av_log(NULL, AV_LOG_TRACE, "parallelismType: %"PRIu8"\n",
hvcc->parallelismType);
+ if (!is_lhvc) {
av_log(NULL, AV_LOG_TRACE, "chromaFormat: %"PRIu8"\n",
hvcc->chromaFormat);
av_log(NULL, AV_LOG_TRACE, "bitDepthLumaMinus8: %"PRIu8"\n",
@@ -856,6 +923,7 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
hvcc->avgFrameRate);
av_log(NULL, AV_LOG_TRACE, "constantFrameRate: %"PRIu8"\n",
hvcc->constantFrameRate);
+ }
av_log(NULL, AV_LOG_TRACE, "numTemporalLayers: %"PRIu8"\n",
hvcc->numTemporalLayers);
av_log(NULL, AV_LOG_TRACE, "temporalIdNested: %"PRIu8"\n",
@@ -863,11 +931,11 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
av_log(NULL, AV_LOG_TRACE, "lengthSizeMinusOne: %"PRIu8"\n",
hvcc->lengthSizeMinusOne);
av_log(NULL, AV_LOG_TRACE, "numOfArrays: %"PRIu8"\n",
- hvcc->numOfArrays);
+ numOfArrays);
for (unsigned i = 0, j = 0; i < FF_ARRAY_ELEMS(hvcc->arrays); i++) {
const HVCCNALUnitArray *const array = &hvcc->arrays[i];
- if (array->numNalus == 0)
+ if (numNalus[i] == 0)
continue;
av_log(NULL, AV_LOG_TRACE, "array_completeness[%u]: %"PRIu8"\n",
@@ -875,28 +943,31 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
av_log(NULL, AV_LOG_TRACE, "NAL_unit_type[%u]: %"PRIu8"\n",
j, array->NAL_unit_type);
av_log(NULL, AV_LOG_TRACE, "numNalus[%u]: %"PRIu16"\n",
- j, array->numNalus);
- for (unsigned k = 0; k < array->numNalus; k++)
+ j, numNalus[i]);
+ for (unsigned k = 0; k < array->numNalus; k++) {
+ if (is_lhvc && array->nal[k].nuh_layer_id == 0)
+ continue;
+
av_log(NULL, AV_LOG_TRACE,
"nalUnitLength[%u][%u]: %"PRIu16"\n",
j, k, array->nal[k].nalUnitLength);
+ }
j++;
}
/*
* We need at least one of each: VPS, SPS and PPS.
*/
- vps_count = hvcc->arrays[VPS_INDEX].numNalus;
- sps_count = hvcc->arrays[SPS_INDEX].numNalus;
- pps_count = hvcc->arrays[PPS_INDEX].numNalus;
- if (!vps_count || vps_count > HEVC_MAX_VPS_COUNT ||
- !sps_count || sps_count > HEVC_MAX_SPS_COUNT ||
- !pps_count || pps_count > HEVC_MAX_PPS_COUNT)
+ if ((!numNalus[VPS_INDEX] || numNalus[VPS_INDEX] > HEVC_MAX_VPS_COUNT) && !is_lhvc)
+ return AVERROR_INVALIDDATA;
+ if (!numNalus[SPS_INDEX] || numNalus[SPS_INDEX] > HEVC_MAX_SPS_COUNT ||
+ !numNalus[PPS_INDEX] || numNalus[PPS_INDEX] > HEVC_MAX_PPS_COUNT)
return AVERROR_INVALIDDATA;
/* unsigned int(8) configurationVersion = 1; */
avio_w8(pb, hvcc->configurationVersion);
+ if (!is_lhvc) {
/*
* unsigned int(2) general_profile_space;
* unsigned int(1) general_tier_flag;
@@ -915,6 +986,7 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
/* unsigned int(8) general_level_idc; */
avio_w8(pb, hvcc->general_level_idc);
+ }
/*
* bit(4) reserved = '1111'b;
@@ -928,6 +1000,7 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
*/
avio_w8(pb, hvcc->parallelismType | 0xfc);
+ if (!is_lhvc) {
/*
* bit(6) reserved = '111111'b;
* unsigned int(2) chromaFormat;
@@ -948,9 +1021,13 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
/* bit(16) avgFrameRate; */
avio_wb16(pb, hvcc->avgFrameRate);
+ }
/*
+ * if (!is_lhvc)
* bit(2) constantFrameRate;
+ * else
+ * bit(2) reserved = '11'b;
* bit(3) numTemporalLayers;
* bit(1) temporalIdNested;
* unsigned int(2) lengthSizeMinusOne;
@@ -961,12 +1038,12 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
hvcc->lengthSizeMinusOne);
/* unsigned int(8) numOfArrays; */
- avio_w8(pb, hvcc->numOfArrays);
+ avio_w8(pb, numOfArrays);
for (unsigned i = 0; i < FF_ARRAY_ELEMS(hvcc->arrays); i++) {
const HVCCNALUnitArray *const array = &hvcc->arrays[i];
- if (!array->numNalus)
+ if (!numNalus[i])
continue;
/*
* bit(1) array_completeness;
@@ -977,11 +1054,14 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
array->NAL_unit_type & 0x3f);
/* unsigned int(16) numNalus; */
- avio_wb16(pb, array->numNalus);
+ avio_wb16(pb, numNalus[i]);
for (unsigned j = 0; j < array->numNalus; j++) {
HVCCNALUnit *nal = &array->nal[j];
+ if (is_lhvc && nal->nuh_layer_id == 0)
+ continue;
+
/* unsigned int(16) nalUnitLength; */
avio_wb16(pb, nal->nalUnitLength);
@@ -1082,12 +1162,11 @@ static int hvcc_parse_nal_unit(const uint8_t *buf, uint32_t len, int type,
return 0;
}
-int ff_isom_write_hvcc(AVIOContext *pb, const uint8_t *data,
- int size, int ps_array_completeness)
+static int write_configuration_record(AVIOContext *pb, const uint8_t *data,
+ int size, int flags)
{
HEVCDecoderConfigurationRecord hvcc;
uint8_t *buf, *end, *start = NULL;
- int flags = !!ps_array_completeness * FLAG_ARRAY_COMPLETENESS;
int ret;
if (size < 6) {
@@ -1154,7 +1233,7 @@ int ff_isom_write_hvcc(AVIOContext *pb, const uint8_t *data,
}
}
- ret = hvcc_write(pb, &hvcc);
+ ret = hvcc_write(pb, &hvcc, flags);
goto end;
} else if (!(AV_RB24(data) == 1 || AV_RB32(data) == 1)) {
/* Not a valid Annex B start code prefix */
@@ -1183,10 +1262,24 @@ int ff_isom_write_hvcc(AVIOContext *pb, const uint8_t *data,
buf += len;
}
- ret = hvcc_write(pb, &hvcc);
+ ret = hvcc_write(pb, &hvcc, flags);
end:
hvcc_close(&hvcc);
av_free(start);
return ret;
}
+
+int ff_isom_write_hvcc(AVIOContext *pb, const uint8_t *data,
+ int size, int ps_array_completeness)
+{
+ return write_configuration_record(pb, data, size,
+ !!ps_array_completeness * FLAG_ARRAY_COMPLETENESS);
+}
+
+int ff_isom_write_lhvc(AVIOContext *pb, const uint8_t *data,
+ int size, int ps_array_completeness)
+{
+ return write_configuration_record(pb, data, size,
+ (!!ps_array_completeness * FLAG_ARRAY_COMPLETENESS) | FLAG_IS_LHVC);
+}
diff --git a/libavformat/hevc.h b/libavformat/hevc.h
index cb66ac66ac..12000b25a5 100644
--- a/libavformat/hevc.h
+++ b/libavformat/hevc.h
@@ -97,4 +97,21 @@ int ff_hevc_annexb2mp4_buf(const uint8_t *buf_in, uint8_t **buf_out,
int ff_isom_write_hvcc(AVIOContext *pb, const uint8_t *data,
int size, int ps_array_completeness);
+/**
+ * Writes L-HEVC extradata (parameter sets with nuh_layer_id > 0, as a
+ * LHEVCDecoderConfigurationRecord) to the provided AVIOContext.
+ *
+ * If the extradata is Annex B format, it gets converted to lhvC format before
+ * writing. Otherwise, hvcC formated extradata is expected, not lhvC.
+ *
+ * @param pb address of the AVIOContext where the lhvC shall be written
+ * @param data address of the buffer holding the data needed to write the lhvC
+ * @param size size (in bytes) of the data buffer
+ * @param ps_array_completeness whether all parameter sets are in the lhvC (1)
+ * or there may be additional parameter sets in the bitstream (0)
+ * @return >=0 in case of success, a negative value corresponding to an AVERROR
+ * code in case of failure
+ */
+int ff_isom_write_lhvc(AVIOContext *pb, const uint8_t *data,
+ int size, int ps_array_completeness);
#endif /* AVFORMAT_HEVC_H */
--
2.45.2
More information about the ffmpeg-devel
mailing list