[MPlayer-dev-eng] Lots of stuff for NUT

Luca Barbato lu_zero at gentoo.org
Tue Jan 3 21:20:44 CET 2006


Oded Shimon wrote:
> 
> Index: DOCS/tech/mpcf.txt
> ===================================================================
> RCS file: /cvsroot/mplayer/main/DOCS/tech/mpcf.txt,v
> retrieving revision 1.86
> diff -u -r1.86 mpcf.txt
> --- DOCS/tech/mpcf.txt	22 Dec 2005 05:48:04 -0000	1.86
> +++ DOCS/tech/mpcf.txt	3 Jan 2006 19:28:16 -0000
> @@ -1,5 +1,5 @@
>  ========================================
> -NUT Open Container Format DRAFT 20051118
> +NUT Open Container Format DRAFT 20060103
>  ========================================
>  
>  
> @@ -21,13 +21,13 @@
>  
>  Compact
>      ~0.2% overhead, for normal bitrates

s/sec/s

> -    index is <10kb per hour (1 keyframe every 3sec)
> +    index is <100kb per hour (1 keyframe every 3sec)
>      a usual header for a file is about 100 bytes (audio + video headers together)
> -    a packet header is about ~1-8 bytes
> +    a packet header is about ~1-5 bytes
>  
>  Error resistant

s:/:and

>      seeking / playback without an index
> -    headers & index can be repeated
> +    headers can be repeated
>      damaged files can be played back with minimal data loss and fast
>      resync times
>  
> @@ -134,26 +134,29 @@
>      version                             v
>      stream_count                        v
>      max_distance                        v
> -    max_index_distance                  v
> -    global_time_base_nom                v
> -    global_time_base_denom              v
>      for(i=0; i<256; ){
>          tmp_flag                        v
>          tmp_fields                      v
> -        if(tmp_fields>0) tmp_pts        s
> -        if(tmp_fields>1) tmp_mul        v
> -        if(tmp_fields>2) tmp_stream     v
> -        if(tmp_fields>3) tmp_size       v
> +        if(tmp_fields>0) tmp_mul        v
> +        else tmp_mul=1
> +        if(tmp_fields>1) tmp_sflag      v
> +        else tmp_sflag=0
> +        if(tmp_fields>2) tmp_pts        s
> +        else tmp_pts=0
> +        if(tmp_fields>3) tmp_stream     v
> +        else tmp_stream=0
> +        if(tmp_fields>4) tmp_size       v
>          else tmp_size=0
> -        if(tmp_fields>4) tmp_res        v
> +        if(tmp_fields>5) tmp_res        v
>          else tmp_res=0
> -        if(tmp_fields>5) count          v
> +        if(tmp_fields>6) count          v
>          else count= tmp_mul - tmp_size
> -        for(j=6; j<tmp_fields; j++){
> +        for(j=7; j<tmp_fields; j++){
>              tmp_reserved[i]             v
>          }
>          for(j=0; j<count && i<256; j++, i++){
>              flags[i]= tmp_flag;
> +            stream_flags[i]= tmp_sflag;
>              stream_id_plus1[i]= tmp_stream;
>              data_size_mul[i]= tmp_mul;
>              data_size_lsb[i]= tmp_size + j;
> @@ -214,6 +217,9 @@
>      if(flags[frame_code]&1){
>          data_size_msb                   v
>      }
> +    if(flags[frame_code]&2){
> +        coded_stream_flags              v
> +    }
>      for(i=0; i<reserved_count[frame_code]; i++)
>          reserved                        v
>      data
> @@ -221,12 +227,36 @@
>  index:
>      index_startcode                     f(64)
>      packet header
> -    stream_id                           v
>      max_pts                             v
> -    index_length                        v
> -    for(i=0; i<index_length; i++){
> -        index_pts                       v
> -        index_position                  v
> +    syncpoints                          v
> +    for(i=0; i<syncpoints; i++){
> +        syncpoint_pos_div8              v
> +    }

wordy description needed ^^;

> +    for(i=0; i<stream_count; i++) {
> +        j = 0
> +        while (j < syncpoints) {
> +            repeat                      v
> +            type = repeat & 1
> +            repeat = repeat >> 1
> +            b = repeat & 1
> +            repeat = (repeat >> 1) + 1
> +            if (type) {
> +                key_pts                 v
> +                key_pts += syncpoint[j-1].stream[i].key_pts
> +                for(k=0; k<repeat; k++) {
> +                    syncpoint[j+k].stream[i].back_ptr = syncpoint[j-b].pos_div8
> +                    syncpoint[j+k].stream[i].key_pts = key_pts
> +                }
> +            } else {
> +                for(k=0; k<repeat; k++) {
> +                    syncpoint[j+k].stream[i].back_ptr = syncpoint[j+k-b].pos_div8
> +                    key_pts             v
> +                    key_pts += syncpoint[j+k-1].stream[i].key_pts
> +                    syncpoint[j+k].stream[i].key_pts = key_pts
> +                }
> +            }
> +            j += repeat
> +        }
>      }
>      reserved_bytes
>      checksum                            u(32)
> @@ -243,6 +273,8 @@
>              name                        vb
>          if(type=="v")
>              value                       v
> +        else if(type=="s")
> +            value                       s
>          else
>              value                       vb
>      }
> @@ -254,10 +286,26 @@
>      packet header
>      info_frame
>  
> -sync_point:
> -    frame_startcode                     f(64)
> -    global_timestamp                    v
> -    back_ptr                            v
> +syncpoint:
> +    syncpoint_startcode                 f(64)
> +    coded_pts                           v
> +    stream = coded_pts % stream_count
> +    back_ptr_div8[0]                    v
> +    back_ptr[stream] = back_ptr_div8[0]
> +    global_key_pts = coded_pts/stream_count
> +    key_pts[stream] = global_key_pts
> +    n=1
> +    for (i=0; i<stream_count; i++) {
> +        if (i == stream) continue
> +        coded_pts                       v
> +        A= coded_pts % (n+1)
> +        B= coded_pts / (n+1)
> +        if(A == n)
> +            back_ptr_div8[n++]          v
> +        back_ptr[i]= back_ptr_div8[A]
> +        key_pts[i] = covert_ts(global_key_pts, timebase[stream], timebase[i])
> +        key_pts[i] -= B
> +    }
>  
>              Complete definition:
>  
> @@ -277,15 +325,13 @@
>              info_packet
>          }
>          while(next_code != main_startcode){
> -            if(next_code == frame_startcode)
> -                sync_point
> +            if(next_code == syncpoint_startcode)
> +                syncpoint
>              frame
>          }
>      }
>      if (next_code == index_startcode){
> -        while(!eof){
> -            index
> -        }
> +        index
>          index_ptr                       u(64)
>      }
>  
> @@ -297,12 +343,33 @@
>      size of the packet data (exactly the distance from the first byte
>      after the forward_ptr to the first byte of the next packet)
>  
> -back_ptr
> +back_ptr[stream]

First tell what a back_ptr is, then what are the constraints and then 
how to calculate the correct address.

>      real_back_ptr = back_ptr * 8 + 7
> -    real_back_ptr must point to a position such that a syncpoint
> -    startcode begins within the next 8 bytes, and such that at least
> -    one keyframe for each stream lies between the syncpoint to which
> -    real_back_ptr points, and the current syncpoint.
> +    real_back_ptr must point to a position within 8 bytes of a syncpoint
> +    startcode. This syncpoint MUST be the closest syncpoint such that at
> +    least one keyframe for this stream lies between it and the current
> +    syncpoint, or immediately after the current syncpoint.
> +

Is End Of Relevance defined before?

> +    Note: back_ptr can be zero, when the frame immediately following is
> +    a keyframe of this stream, or EOR has been set for this stream.
> +    back_ptr of a stream where EOR is set MUST be zero.
> +
> +    Note: SOR is a keyframe like any other and back_ptr must point to it if
> +    necessary.
> +
> +global_key_pts
> +    After a syncpoint, last_pts of each stream is to be set to:

convert_ts isn't defined before I'd move it

> +    last_pts[i] = convert_ts(global_key_pts, timebase[i], timebase[stream])
> +
> +    To be able to code key_pts for every stream, global_key_pts MUST be the
> +    max key_pts across all streams.
> +
> +key_pts[stream]
> +    The pts of the last keyframe in the stream until the syncpoint
> +    including the frame immediately following the syncpoint.
> +
> +    Note: After an EOR, key_pts MUST be set to global_key_pts in correct
> +    timebase. This is to be done by using coded_pts of 0.
>  
>  file_id_string
>      "nut/multimedia container\0"
> @@ -316,13 +383,9 @@
>  stream_starcode
>      0x11405BF2F9DBULL + (((uint64_t)('N'<<8) + 'S')<<48)
>  
> -frame_startcode
> +syncpoint_startcode
>      0xE4ADEECA4569ULL + (((uint64_t)('N'<<8) + 'K')<<48)
>  
> -    frame_startcodes SHOULD be placed immediately before a keyframe if the
> -    previous frame of the same stream was a non-keyframe, unless such
> -    non-keyframe - keyframe transitions are very frequent
> -
>  index_startcode
>      0xDD672F23E64EULL + (((uint64_t)('N'<<8) + 'X')<<48)
>  
> @@ -333,22 +396,21 @@
>      NUT version. The current value is 2.
>  
>  max_distance
> -    max distance of frame_startcodes, the distance may only be larger if
> -    there is only a single frame between the two frame_startcodes this can
> +    max distance of syncpoints, the distance may only be larger if
> +    there is no more than a single frame between the two syncpoints. This can
>      be used by the demuxer to detect damaged frame headers if the damage
>      results in too long of a chain
>  
> +    Syncpoints MUST be placed immediately before a non-EOR keyframe if the
> +    back_ptr of this stream in the last syncpoint is greater than
> +    max_distance.
> +
> +    The begginning of a frame is defined by the first byte of the frame header.
> +
>      SHOULD be set to <=32768 or at least <=65536 unless there is a very
>      good reason to set it higher, otherwise reasonable error recovery will
>      be impossible
>  
> -max_index_distance
> -    max distance of keyframes which are represented in the index, the
> -    distance between consecutive entries A and B may only be larger if
> -    there are no keyframes within this stream between A and B
> -    SHOULD be set to <=32768 or at least <=65536 unless there is a very
> -    good reason to set it higher
> -
>  stream_id
>      Stream identifier
>      stream_id MUST be < stream_count
> @@ -381,22 +443,15 @@
>          29.97     1001             30000
>          23.976    1001             24000
>  
> -global_time_base_nom / global_time_base_denom = global_time_base
> -    the length of a timer tick in seconds
> -    global_time_base_nom and global_time_base_denom MUST NOT be 0
> -    global_time_base_nom and global_time_base_denom MUST be relatively prime
> -    global_time_base_denom MUST be < 2^31
> -
> -global_timestamp
> -    timestamp in global_time_base units
> -    when a global_timestamp is encountered the last_pts of all
> -    streams is set to the following:
> -
> -    ln       = global_time_base_nom*time_base_denom
> -    sn       = global_timestamp
> -    d1       = global_time_base_denom
> -    d2       = time_base_nom
> -    last_pts = (ln/d1*sn + ln%d1*sn/d1)/d2
> +convert_ts
> +    To switch from 2 different timebases, the following calculation is
> +    defined:
> +
> +    ln        = from_time_base_nom*to_time_base_denom
> +    sn        = from_timestamp
> +    d1        = from_time_base_denom
> +    d2        = to_time_base_nom
> +    timestamp = (ln/d1*sn + ln%d1*sn/d1)/d2
>      Note: this calculation MUST be done with unsigned 64 bit integers, and
>      is equivalent to (ln*sn)/(d1*d2) but this would require a 96bit integer
>  
> @@ -422,17 +477,32 @@
>      different from the first byte of any startcode
>  
>  flags[frame_code]
> -    first of the flags from MSB to LSB are called KD
> -    if D is 1 then data_size_msb is coded, otherwise data_size_msb is 0
> -    K is the keyframe_type
> -        0 -> no keyframe,
> -        1 -> keyframe,
> -    flags=4 can be used to mark illegal frame_code bytes
> -    frame_code=78 must have flags=4
> -    Note: frames MUST NOT depend(1) upon frames prior to the last
> -          frame_startcode
> -    Important: depend(1) means dependency on the container level (NUT) not
> -    dependency on the codec level
> +    Bit  Name             Description
> +      1  data_size_msb    if set, data_size_msb is at frame header,
> +                          otherwise data_size_msb is 0
> +      2  more_flags       if set, stream control flags are at frame header.
> +      4  invalid          if set, frame_code is invalid.
> +
> +    frame_code=78 ('N') MUST have flags=64
> +
> +stream_flags
> +    stream_flags is "stream_flags[frame_code] | coded_stream_flags"
> +
> +    Bit  Name               Description
> +      1  is_key             if set, frame is keyframe
> +      2  end_of_relevance   if set, stream has no relevance on
> +                            presentation. (EOR)
> +      4  start_of_relevance if set, unsets EOR. (SOR)
> +
> +    EOR and SOR frames MUST be zero-length and must be set keyframe.
> +    All streams SHOULD end with EOR, where the pts of the EOR indicates the
> +    end presentation time of the final frame.
> +    An EOR set stream MUST be unset by an SOR before any content frames.
> +    An SOR sets the dts_cache of the stream to the pts of of the SOR.
> +    The dts of an SOR is its pts. SOR pts MUST be smaller to pts of all
> +    subsequent frames on this stream.
> +    Note: SOR can and SHOULD immediately precede the first content frame
> +    of its stream.
>  
>  stream_id_plus1[frame_code]
>      must be <250
> @@ -479,9 +549,10 @@
>      stream, into which the current pts is inserted and the element with
>      the smallest value is removed, this is then the current dts
>      this buffer is initalized with decode_delay -1 elements
> -    all frames must be monotone, that means a frame
> -    which occurs later in the stream must have a larger or equal dts
> -    than an earlier frame
> +
> +    Pts of all frames in all streams MUST be bigger or equal to dts of all
> +    previous frames in all streams, compared in common timebase. (SOR and
> +    EOR frames are NOT exempt from this rule)
>  
>  width/height
>      MUST be set to the coded width/height
> @@ -508,23 +579,22 @@
>      forward_ptr until last byte before the checksum).
>  
>  max_pts
> -    The highest pts in the stream.
> -
> -index_pts
> -    value of the pts of a keyframe relative to the last keyframe
> -    stored in this index
> -
> -index_position
> -    position in bytes of the first byte of a keyframe, relative to the
> -    last keyframe stored in this index
> -    there MUST be no keyframe with the same stream_id as this index between
> -    two consecutive index entries if they are more than max_index_distance
> -    apart
> +    s = max_pts % stream_count
> +    pts = max_pts / stream_count
> +    The highest pts in the entire file in the timebase of stream 's'.
> +
> +syncpoints
> +    amount of syncpoints in the file.
> +
> +syncpoint_pos_div8
> +    offset from begginning of file to up to 7 bytes before the syncpoint
> +    referred to in this index entry. Relative to position of last
> +    syncpoint.
>  
>  index_ptr
> -    Length in bytes from the first byte of the first index startcode
> -    to the first byte of the index_ptr. If there is no index, index_ptr
> -    MUST NOT be written.
> +    Length in bytes from the first byte of the index startcode to the first
> +    byte of the index_ptr. If there is no index, index_ptr MUST NOT be
> +    written.
>  
>  id
>      the ID of the type/name pair, so it is more compact
> 

Looks ok, with just some minor changes and maybe a paragraph to explain 
back pointers, relevance, etc.

lu

-- 

Luca Barbato

Gentoo/linux Developer		Gentoo/PPC Operational Leader
http://dev.gentoo.org/~lu_zero




More information about the MPlayer-dev-eng mailing list