[FFmpeg-devel] [PATCH v2 1/2] avformat/flvdec: implement support for parsing ModEx data

Fri Jan 17 01:32:54 EET 2025

On 17.01.2025 00:16, Leo Izen wrote:
> On 1/15/25 5:41 PM, Timo Rothenpieler wrote:
>> ---
>>   libavformat/flv.h    |  5 ++++
>>   libavformat/flvdec.c | 68 ++++++++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 73 insertions(+)
>>
>> diff --git a/libavformat/flv.h b/libavformat/flv.h
>> index 74d3b8de8b..d8f7980f2e 100644
>> --- a/libavformat/flv.h
>> +++ b/libavformat/flv.h
>> @@ -130,6 +130,7 @@ enum {
>>       PacketTypeMetadata              = 4,
>>       PacketTypeMPEG2TSSequenceStart  = 5,
>>       PacketTypeMultitrack            = 6,
>> +    PacketTypeModEx                 = 7,
>>   };
>>   enum {
>> @@ -139,6 +140,10 @@ enum {
>>       AudioPacketTypeMultitrack         = 5,
>>   };
>> +enum {
>> +    PacketModExTypeTimestampOffsetNano = 0,
>> +};
>> +
>>   enum {
>>       AudioChannelOrderUnspecified = 0,
>>       AudioChannelOrderNative      = 1,
>> diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
>> index 2f46475d08..61dd19a691 100644
>> --- a/libavformat/flvdec.c
>> +++ b/libavformat/flvdec.c
>> @@ -1279,6 +1279,62 @@ static int 
>> flv_update_video_color_info(AVFormatContext *s, AVStream *st)
>>       return 0;
>>   }
>> +static int flv_parse_mod_ex_data(AVFormatContext *s, int *pkt_type, 
>> int *size, int64_t *dts)
>> +{
>> +    int ex_type, ret;
>> +    uint8_t *ex_data;
>> +
>> +    int ex_size = (uint8_t)avio_r8(s->pb) + 1;
>> +    *size -= 1;
>> +
>> +    if (ex_size == 256) {
>> +        ex_size = (uint16_t)avio_rb16(s->pb) + 1;
>> +        *size -= 2;
>> +    }
>> +
>> +    if (ex_size >= *size) {
>> +        av_log(s, AV_LOG_WARNING, "ModEx size larger than remaining 
>> data!\n");
>> +        return AVERROR(EINVAL);
>> +    }
>> +
>> +    ex_data = av_malloc(ex_size);
>> +    if (!ex_data)
>> +        return AVERROR(ENOMEM);
>> +
>> +    ret = avio_read(s->pb, ex_data, ex_size);
>> +    if (ret < 0) {
>> +        av_free(ex_data);
>> +        return ret;
>> +    }
>> +    *size -= ex_size;
>> +
>> +    ex_type = (uint8_t)avio_r8(s->pb);
>> +    *size -= 1;
>> +
>> +    *pkt_type = ex_type & 0x0f;
>> +    ex_type &= 0xf0;
>> +
>> +    if (ex_type == PacketModExTypeTimestampOffsetNano) {
>> +        uint32_t nano_offset;
>> +
>> +        if (ex_size != 3) {
>> +            av_log(s, AV_LOG_WARNING, "Invalid ModEx size for Type 
>> TimestampOffsetNano!\n");
>> +            nano_offset = 0;
>> +        } else {
>> +            nano_offset = (ex_data[0] << 16) | (ex_data[1] << 8) | 
>> ex_data[2];
>> +        }
>> +
>> +        // this is not likely to ever add anything, but right now 
>> timestamps are with ms precision
>> +        *dts += nano_offset / 1000000;
>> +    } else {
>> +        av_log(s, AV_LOG_INFO, "Unknown ModEx type: %d", ex_type);
>> +    }
>> +
>> +    av_free(ex_data);
>> +
>> +    return 0;
>> +}
>> +
>>   static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
>>   {
>>       FLVContext *flv = s->priv_data;
>> @@ -1347,6 +1403,12 @@ retry:
>>               enhanced_flv = 1;
>>               pkt_type = flags & ~FLV_AUDIO_CODECID_MASK;
>> +            while (pkt_type == PacketTypeModEx) {
>> +                ret = flv_parse_mod_ex_data(s, &pkt_type, &size, &dts);
>> +                if (ret < 0)
>> +                    goto leave;
>> +            }
>> +
> 
> 
> Just to double check, is this a while loop instead of an if statement 
> because multiple of these can exist in a row? If so, should we in theory 
> add nano_offset together multiple times? i.e. if we have 500 us twice, 
> wouldn't that add together to 1 ms of DTS offset, rather than 0, per 
> current implementation? Or is this really not an issue in practice?
> 
The while loop is straight from the spec.
There can be multiple of this in a row. But I'm pretty sure the intent 
of that is to prepare for additional future types, not to send the nano 
offset multiple times.
That'd be pretty pointless, given you can add more than 1ms via it, and 
at that point you could just increase the regular ms accuracy timestamp.