[FFmpeg-devel] [PATCH] avformat/matroskaenc: Allow changing the time stamp precision via option

Fri May 21 01:12:38 EEST 2021

On 5/20/2021 7:04 PM, Michael Fabian 'Xaymar' Dirks wrote:
> On 2021-05-20 23:47, James Almer wrote:
>> On 5/20/2021 4:35 PM, Michael Fabian 'Xaymar' Dirks wrote:
>>> On 2021-05-20 21:16, michael.dirks at xaymar.com wrote:
>>>> From: Michael Fabian 'Xaymar' Dirks <michael.dirks at xaymar.com>
>>>>
>>>> Adds "timestamp_precision" to the available option for Matroska/WebM
>>>> muxing. The option enables users and developers to change the precision
>>>> of the time stamps in the Matroska/WebM container up to 1 nanosecond,
>>>> which can aid with the proper detection of constant and variable rate
>>>> content.
>>>>
>>>> Work-around fix for: 259, 6406, 7927, 8909 and 9124.
>>>>
>>>> Signed-off-by: Michael Fabian 'Xaymar' Dirks <michael.dirks at xaymar.com>
>>>> ---
>>>>   doc/muxers.texi           |  8 ++++++++
>>>>   libavformat/matroskaenc.c | 28 ++++++++++++++++++++--------
>>>>   2 files changed, 28 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/doc/muxers.texi b/doc/muxers.texi
>>>> index e1c6ad0829..8655be94ff 100644
>>>> --- a/doc/muxers.texi
>>>> +++ b/doc/muxers.texi
>>>> @@ -1583,6 +1583,14 @@ bitmap is stored bottom-up. Note that this 
>>>> option does not flip the bitmap
>>>>   which has to be done manually beforehand, e.g. by using the vflip 
>>>> filter.
>>>>   Default is @var{false} and indicates bitmap is stored top down.
>>>> + at item timestamp_precision
>>>> +Sets the timestamp precision up to 1 nanosecond for Matroska/WebM, 
>>>> which can
>>>> +improve detection of constant rate content in demuxers. Note that 
>>>> some poorly
>>>> +implemented demuxers may require a timestamp precision of 1 
>>>> millisecond, so
>>>> +increasing it past that point may result in playback issues. Higher 
>>>> precision
>>>> +also reduces the maximum possible timestamp significantly.
>>>> +Default is @var{1/1000} (1 millisecond).
>>>> +
>>>>   @end table
>>>>   @anchor{md5}
>>>> diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
>>>> index 186a25d920..2417ac12cc 100644
>>>> --- a/libavformat/matroskaenc.c
>>>> +++ b/libavformat/matroskaenc.c
>>>> @@ -20,6 +20,7 @@
>>>>    */
>>>>   #include <stdint.h>
>>>> +#include <float.h>
>>>>   #include "av1.h"
>>>>   #include "avc.h"
>>>> @@ -158,6 +159,8 @@ typedef struct MatroskaMuxContext {
>>>>       int                 default_mode;
>>>>       uint32_t            segment_uid[4];
>>>> +
>>>> +    AVRational          time_base;
>>>>   } MatroskaMuxContext;
>>>>   /** 2 bytes * 7 for EBML IDs, 7 1-byte EBML lengths, 6 1-byte uint,
>>>> @@ -1814,6 +1817,7 @@ static int mkv_write_header(AVFormatContext *s)
>>>>       const AVDictionaryEntry *tag;
>>>>       int ret, i, version = 2;
>>>>       int64_t creation_time;
>>>> +    int64_t time_base = 1;
>>>>       if (mkv->mode != MODE_WEBM ||
>>>>           av_dict_get(s->metadata, "stereo_mode", NULL, 0) ||
>>>> @@ -1850,7 +1854,10 @@ static int mkv_write_header(AVFormatContext *s)
>>>>           return ret;
>>>>       pb = mkv->info.bc;
>>>> -    put_ebml_uint(pb, MATROSKA_ID_TIMECODESCALE, 1000000);
>>>> +    time_base = av_rescale_q(time_base, mkv->time_base, 
>>>> (AVRational){1, 1000000000});
>>>> +    av_log(s, AV_LOG_DEBUG, "TimestampScale is: %" PRId64 " ns\n", 
>>>> time_base);
>>>> +    put_ebml_uint(pb, MATROSKA_ID_TIMECODESCALE, time_base);
>>>> +
>>>>       if ((tag = av_dict_get(s->metadata, "title", NULL, 0)))
>>>>           put_ebml_string(pb, MATROSKA_ID_TITLE, tag->value);
>>>>       if (!(s->flags & AVFMT_FLAG_BITEXACT)) {
>>>> @@ -1883,11 +1890,11 @@ static int mkv_write_header(AVFormatContext *s)
>>>>           int64_t metadata_duration = get_metadata_duration(s);
>>>>           if (s->duration > 0) {
>>>> -            int64_t scaledDuration = av_rescale(s->duration, 1000, 
>>>> AV_TIME_BASE);
>>>> +            int64_t scaledDuration = av_rescale_q(s->duration, 
>>>> AV_TIME_BASE_Q, mkv->time_base);
>>>>               put_ebml_float(pb, MATROSKA_ID_DURATION, scaledDuration);
>>>>               av_log(s, AV_LOG_DEBUG, "Write early duration from 
>>>> recording time = %" PRIu64 "\n", scaledDuration);
>>>>           } else if (metadata_duration > 0) {
>>>> -            int64_t scaledDuration = av_rescale(metadata_duration, 
>>>> 1000, AV_TIME_BASE);
>>>> +            int64_t scaledDuration = 
>>>> av_rescale_q(metadata_duration, AV_TIME_BASE_Q, mkv->time_base);
>>>>               put_ebml_float(pb, MATROSKA_ID_DURATION, scaledDuration);
>>>>               av_log(s, AV_LOG_DEBUG, "Write early duration from 
>>>> metadata = %" PRIu64 "\n", scaledDuration);
>>>>           } else if (s->pb->seekable & AVIO_SEEKABLE_NORMAL) {
>>>> @@ -1948,12 +1955,12 @@ static int mkv_write_header(AVFormatContext *s)
>>>>       // after 4k and on a keyframe
>>>>       if (IS_SEEKABLE(pb, mkv)) {
>>>>           if (mkv->cluster_time_limit < 0)
>>>> -            mkv->cluster_time_limit = 5000;
>>>> +            mkv->cluster_time_limit = av_rescale_q(5000, 
>>>> (AVRational){1, 1000}, mkv->time_base);
>>>>           if (mkv->cluster_size_limit < 0)
>>>>               mkv->cluster_size_limit = 5 * 1024 * 1024;
>>>>       } else {
>>>>           if (mkv->cluster_time_limit < 0)
>>>> -            mkv->cluster_time_limit = 1000;
>>>> +            mkv->cluster_time_limit = av_rescale_q(1000, 
>>>> (AVRational){1, 1000}, mkv->time_base);
>>>>           if (mkv->cluster_size_limit < 0)
>>>>               mkv->cluster_size_limit = 32 * 1024;
>>>>       }
>>>> @@ -2323,7 +2330,7 @@ static int 
>>>> mkv_write_packet_internal(AVFormatContext *s, const AVPacket *pkt)
>>>>               ret = mkv_end_cluster(s);
>>>>               if (ret < 0)
>>>>                   return ret;
>>>> -            av_log(s, AV_LOG_WARNING, "Starting new cluster due to 
>>>> timestamp\n");
>>>> +            av_log(s, AV_LOG_VERBOSE, "Starting new cluster as 
>>>> timestamp offset exceeds signed 16-bit integer range.\n");
>>>
>>> I've marked this as verbose, as the problem is not exactly something 
>>> we should even bother warning about. So far in my testing, creating 
>>> new clusters has not caused issues.
>>
>> It's not about having issues with the resulting file, it's about size 
>> overhead present in it (Each cluster is a top level element, so one 
>> per block is a lot of bytes), and about meeting the requested limit 
>> set with cluster_size_limit and cluster_time_limit or not.
>> Clusters delimitation is what's used to split the file in Dash Webm 
>> after all, so honoring the requested limits is important and this log 
>> message is useful to let the user know if the values they requested 
>> can be achieved.
> Ah then I misunderstood the purpose of the warning. I will revert this 
> part once I hear back on the latter comment.
>>
>>>
>>>>           }
>>>>       }
>>>> @@ -2738,8 +2745,8 @@ static int mkv_init(struct AVFormatContext *s)
>>>>               track->uid = mkv_get_uid(mkv->tracks, i, &c);
>>>>           }
>>>> -        // ms precision is the de-facto standard timescale for mkv 
>>>> files
>>>> -        avpriv_set_pts_info(st, 64, 1, 1000);
>>>> +        // Use user-defined timescale.
>>>> +        avpriv_set_pts_info(st, 64, mkv->time_base.num, 
>>>> mkv->time_base.den);
>>>>           if (st->codecpar->codec_type == AVMEDIA_TYPE_ATTACHMENT) {
>>>>               if (mkv->mode == MODE_WEBM) {
>>>> @@ -2759,6 +2766,10 @@ static int mkv_init(struct AVFormatContext *s)
>>>>           track->track_num_size = ebml_num_size(track->track_num);
>>>>       }
>>>> +    // Scale the configured cluster_time_limit.
>>>> +    if (mkv->cluster_time_limit >= 0)
>>>> +        mkv->cluster_time_limit = 
>>>> av_rescale_q(mkv->cluster_time_limit, (AVRational){1, 1000}, 
>>>> mkv->time_base);
>>>> +
>>>>       if (mkv->is_dash && nb_tracks != 1)
>>>>           return AVERROR(EINVAL);
>>>> @@ -2826,6 +2837,7 @@ static const AVOption options[] = {
>>>>       { "infer", "For each track type, mark the first track of 
>>>> disposition default as default; if none exists, mark the first track 
>>>> as default.", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_INFER }, 
>>>> 0, 0, FLAGS, "default_mode" },
>>>>       { "infer_no_subs", "For each track type, mark the first track 
>>>> of disposition default as default; for audio and video: if none 
>>>> exists, mark the first track as default.", 0, AV_OPT_TYPE_CONST, { 
>>>> .i64 = DEFAULT_MODE_INFER_NO_SUBS }, 0, 0, FLAGS, "default_mode" },
>>>>       { "passthrough", "Use the disposition flag as-is", 0, 
>>>> AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_PASSTHROUGH }, 0, 0, FLAGS, 
>>>> "default_mode" },
>>>> +    { "timestamp_precision", "Timestamp precision to use for the 
>>>> entire container", OFFSET(time_base), AV_OPT_TYPE_RATIONAL, { .dbl = 
>>>> 0.001 }, 0.000000001, DBL_MAX, FLAGS},
>>
>> Is DBL_MAX useful? Can't another max value be used instead?
> 
> It's not really useful, as the actual limit for AVRational is 
> 2147483647/1, while Matroska/WebM could support as much as 
> 18446744073/1. So perhaps INT_MAX is a better choice here, which is 
> already being used in other places. As I personally do not need lower 
> precision, I have no opinion on what upper limit would be ideal to 
> present to the user.

INT_MAX sounds good, yes.

> 
>>
>>> Reverted the default to 1ms as per discussion. In the future, 
>>> Matroska hopefully adopts rational per-track time bases so that this 
>>> parameter can be deprecated.
>>
>> Ok.
>>
>>>>       { NULL },
>>>>   };
> 
>