[Ffmpeg-devel] [PATCH] FLV decoder metadata reading

Allan Hsu allan
Sat Dec 9 03:43:49 CET 2006


>From the secret journal of Michael Niedermayer:
[...]
> > +            for(keylen = get_be16(ioc); keylen != 0; keylen = get_be16(ioc)) {
> > +                url_fskip(ioc, keylen); //skip key string
> > +                if(amf_skip_object(ioc, NULL) < 0) //skip the following object
> > +                    return -1; //if we couldn't skip, bomb out.
> > +            }
> 
> while(keylen = get_be16(ioc)){
> }
> 
> is slightly simpler (no keylen = get_be16(ioc) duplication)

Changed.

> > +            if(get_byte(ioc) != AMF_END_OF_OBJECT)
> > +                return -1;
> > +        }
> > +            break;
> > +        case        AMF_DATA_TYPE_NULL:
> > +        case   AMF_DATA_TYPE_UNDEFINED:
> > +        case AMF_DATA_TYPE_UNSUPPORTED:
> > +            break; //these take up no additional space
> > +        case       AMF_DATA_TYPE_ARRAY: {
> > +            unsigned int arraylen, i;
> > +
> > +            arraylen = get_be32(ioc);
> > +            for(i = 0; i < arraylen; i++)
> > +                amf_skip_object(ioc, NULL);
> > +        }
> > +            break;
> > +        case        AMF_DATA_TYPE_DATE:
> > +            url_fskip(ioc, 8 + 2); //timestamp (double) and UTC offset (int16)
> > +            break;
> > +        default: //unsupported, we couldn't skip.
> > +            return -1;
> > +    }
> > +
> > +    return 0;
> > +}
> 
> is amf_skip_object() really needed? isnt a simple loop which reads
> double/string/bool/date and either assigns it to something or not
> depending on what it was enough?

The AMF array types and object types are both used in onMetaData tags;
these types contain an arbitrary number of nested AMF objects.
Since there is no object index information, it isn't possible to find the
next object without parsing these objects and any nested objects. We
don't care about any of the complex types, but it is necessary to parse
and skip in order to get to any of the simpler types we do care about.

[...]
> > +static int amf_get_string(ByteIOContext *ioc, char *buffer, int buffsize) {
> > +    int length;
> > +
> > +    length = get_be16(ioc);
> > +    if(length > buffsize)
> > +        return -1; //string will not fit in buffer
> > +
> > +    get_buffer(ioc, buffer, length);
> > +
> > +    buffer[length] = '\0';
> > +
> > +    return length;
> > +}
> 
> shouldnt it rather be?
> 
> if(length >= buffsize){
>     url_fskip(ioc, length);
>     return -1;
> }

More like:

if(length >= buffsize - 1)
...

Fixed.

[...]
> > +    if(   flv_read_tag_header(&s->pb, &taginfo)  < 0 || taginfo.type != FLV_TAG_TYPE_META
> > +       || flv_read_metabody(s, taginfo.next_pos) < 0 || flv_validate_context(s) < 0
> > +       || flv_create_streams(s) < 0) {
> > +        //error reading first tag header, first tag is not metadata, or metadata incomplete.
> > +        s->ctx_flags |= AVFMTCTX_NOHEADER;
> > +
> 
> isnt it simpler to simple read all the metadata in flv_read_packet() and just
> call flv_read_packet() from flv_read_header() once if needed?
> 
> >      if(!url_is_streamed(&s->pb)){
> >          const int fsize= url_fsize(&s->pb);
> >          url_fseek(&s->pb, fsize-4, SEEK_SET);
> > @@ -62,7 +387,8 @@
> >          }
> >      }
> >  
> > -    url_fseek(&s->pb, offset, SEEK_SET);
> > +        url_fseek(&s->pb, offset, SEEK_SET);
> 
> this seeks backward or? if so its a matter of luck if it works, if theres
> too much metadata then it will fail if the stream is not "seekable"
[...]

I tried to implement your suggested changes to flv_read_header() and
flv_read_packet(), but it only made the backward-seeking issue worse.
Since metadata packets don't have a stream, they don't have a valid
stream index and so the next audio/video packet is returned by
flv_read_packet(), which makes the maximum length of a backward seek
in flv_read_header() quite large.

Instead, I've opted to try and reduce the size of any backward seeks in
the attached revision of the patch. Backward seeks will only occur in
the case that there is not enough metadata. In this case, if the first
tag was a metadata tag, the backwards seek should go to the second
packet, where flv_read_metabody() should have stopped, which should
be effectively no movement. If the first tag wasn't a metadata tag,
the backward seek should be 15 bytes (the size of the FLV tag header
read by flv_read_tag_header()).

This revision also includes Aurelien's suggestions from earlier today.

	-Allan
-- 
Allan Hsu <allan at counterpop dot net>
1E64 E20F 34D9 CBA7 1300 1457 AC37 CBBB 0E92 C779
-------------- next part --------------
Index: libavformat/flvdec.c
===================================================================
--- libavformat/flvdec.c	(revision 7260)
+++ libavformat/flvdec.c	(working copy)
@@ -27,6 +27,311 @@
 #include "avformat.h"
 #include "flv.h"
 
+typedef struct {
+    int has_video;
+    int has_audio;
+    int is_stereo;
+    int videocodecid;
+    int audiocodecid;
+    int width;
+    int height;
+    int samplerate;
+    int samplesize;
+} FLVDemuxContext;
+
+typedef struct {
+    unsigned int pos;
+    unsigned int prev_tag_size;
+    unsigned int next_pos;
+    int type;
+    int body_size;
+    int pts;
+} FLVTagInfo;
+
+inline static void create_vp6_extradata(AVStream *stream) {
+    if(stream->codec->extradata_size != 1) {
+        stream->codec->extradata_size = 1;
+        stream->codec->extradata = av_malloc(1);
+    }
+}
+
+static int amf_skip_object(ByteIOContext *ioc, AMFDataType *type) {
+    AMFDataType objectType;
+
+    objectType = (type != NULL ? *type : get_byte(ioc));
+    switch(objectType) {
+        case AMF_DATA_TYPE_NUMBER:
+            url_fskip(ioc, 8); break; //double precision float
+        case AMF_DATA_TYPE_BOOL:
+            url_fskip(ioc, 1); break; //byte
+        case AMF_DATA_TYPE_STRING:
+            url_fskip(ioc, get_be16(ioc)); break;
+        case AMF_DATA_TYPE_OBJECT: {
+            unsigned int keylen;
+
+            while((keylen = get_be16(ioc))) {
+                url_fskip(ioc, keylen); //skip key string
+                if(amf_skip_object(ioc, NULL) < 0) //skip the following object
+                    return -1; //if we couldn't skip, bomb out.
+            }
+            if(get_byte(ioc) != AMF_END_OF_OBJECT)
+                return -1;
+        }
+            break;
+        case AMF_DATA_TYPE_NULL:
+        case AMF_DATA_TYPE_UNDEFINED:
+        case AMF_DATA_TYPE_UNSUPPORTED:
+            break; //these take up no additional space
+        case AMF_DATA_TYPE_ARRAY: {
+            unsigned int arraylen, i;
+
+            arraylen = get_be32(ioc);
+            for(i = 0; i < arraylen; i++)
+                amf_skip_object(ioc, NULL);
+        }
+            break;
+        case AMF_DATA_TYPE_DATE:
+            url_fskip(ioc, 8 + 2); //timestamp (double) and UTC offset (int16)
+            break;
+        default: //unsupported, we couldn't skip.
+            return -1;
+    }
+
+    return 0;
+}
+
+static int amf_get_object(ByteIOContext *ioc, AMFDataType type, void *dest) {
+    AMFDataType actualType = get_byte(ioc);
+
+    if(actualType != type) {
+        //type was not the one we expected; skip object, don't touch dest, return error.
+        amf_skip_object(ioc, &actualType);
+        return -1;
+    }
+
+    //we currently only need these two types for metadata parsing.
+    switch(type) {
+        case AMF_DATA_TYPE_NUMBER:
+            *(double *)dest = av_int2dbl(get_be64(ioc));
+            break;
+        case AMF_DATA_TYPE_BOOL:
+            *(unsigned char *)dest = get_byte(ioc);
+            break;
+        default:
+            return -1;
+    }
+
+    return 0;
+}
+
+static int amf_get_string(ByteIOContext *ioc, char *buffer, int buffsize) {
+    int length;
+
+    length = get_be16(ioc);
+    if(length >= buffsize - 1) {
+        url_fskip(ioc, length);
+        return -1; //string will not fit in buffer
+    }
+
+    get_buffer(ioc, buffer, length);
+
+    buffer[length] = '\0';
+
+    return length;
+}
+
+static int flv_read_tag_header(ByteIOContext *ioc, FLVTagInfo *info) {
+    info->pos = url_ftell(ioc);
+    info->prev_tag_size = get_be32(ioc);
+    info->type = get_byte(ioc);
+    info->body_size = get_be24(ioc);
+    info->pts = get_be24(ioc);
+//    av_log(s, AV_LOG_DEBUG, "type:%d, size:%d, pts:%d\n", info->type, info->body_size, info->pts);
+
+    if(url_feof(ioc))
+        return AVERROR_IO;
+
+    url_fskip(ioc, 4); /* reserved */
+
+    info->next_pos = info->body_size + url_ftell(ioc);
+
+    return 0;
+}
+
+static int flv_read_metabody(AVFormatContext *s, unsigned int next_pos) {
+    FLVDemuxContext *context;
+    AMFDataType type;
+    ByteIOContext *ioc;
+    int keylen;
+    unsigned int itemcount;
+    char buffer[256];
+    double dbl;
+    unsigned char bool;
+
+    context = s->priv_data;
+    ioc = &s->pb;
+
+    //first object needs to be "onMetaData" string
+    type = get_byte(ioc);
+    if(type != AMF_DATA_TYPE_STRING || amf_get_string(ioc, buffer, sizeof(buffer)) < 0 || strcmp(buffer, "onMetaData") != 0)
+        goto bail;
+
+    //second object needs to be a mixedarray
+    type = get_byte(ioc);
+    if(type != AMF_DATA_TYPE_MIXEDARRAY)
+        goto bail;
+
+    //this number has been known to misreport the number of items in the mixed array, so we don't use it.
+    itemcount = get_be32(ioc);
+
+    while(url_ftell(ioc) < next_pos - 2 && (keylen = amf_get_string(ioc, buffer, sizeof(buffer))) > 0) {
+        if(!strcmp(buffer, "stereo")) {
+            if(!amf_get_object(ioc, AMF_DATA_TYPE_BOOL, &bool)) context->is_stereo = bool;
+        } else if(!amf_get_object(ioc, AMF_DATA_TYPE_NUMBER, &dbl)) {
+            if(!strcmp(buffer, "duration"))             s->duration           = dbl * AV_TIME_BASE;
+            else if(!strcmp(buffer, "width"))           context->width        = dbl;
+            else if(!strcmp(buffer, "height"))          context->height       = dbl;
+            else if(!strcmp(buffer, "audiocodecid"))    context->audiocodecid = dbl;
+            else if(!strcmp(buffer, "videocodecid"))    context->videocodecid = dbl;
+            else if(!strcmp(buffer, "audiosamplerate")) context->samplerate   = dbl;
+            else if(!strcmp(buffer, "audiosamplesize")) context->samplesize   = dbl;
+        }
+    }
+
+    if(keylen < 0 || get_byte(ioc) != AMF_END_OF_OBJECT)
+        goto bail;
+
+    url_fseek(ioc, next_pos, SEEK_SET);
+    return 0;
+
+bail:
+    url_fseek(ioc, next_pos, SEEK_SET);
+    return -1;
+}
+
+static int flv_validate_context(AVFormatContext *s) {
+    FLVDemuxContext *context = s->priv_data;
+
+    //if any values do not validate, assume metadata tool was brain dead and fail.
+    if(s->duration <= 0)
+        return -1;
+
+    if(context->has_audio) {
+        switch(context->audiocodecid << FLV_AUDIO_CODECID_OFFSET) {
+            case FLV_CODECID_PCM_BE:
+            case FLV_CODECID_ADPCM:
+            case FLV_CODECID_MP3:
+            case FLV_CODECID_PCM_LE:
+            case FLV_CODECID_NELLYMOSER_8HZ_MONO:
+            case FLV_CODECID_NELLYMOSER:
+                break;
+            default:
+                return -1;
+        }
+
+        //flvtool (and maybe others) writes approximate sample rates for some awesome reason.
+        switch(context->samplerate) {
+            case 44100: case 44000: context->samplerate = 44100; break;
+            case 22050: case 22000: context->samplerate = 22050; break;
+            case 11025: case 11000: context->samplerate = 11025; break;
+            case  5512: case  5500: context->samplerate =  5512; break;
+            default:
+                return -1;
+        }
+
+        if(context->samplesize != 8 && context->samplesize != 16)
+            return -1;
+    }
+
+    if(context->has_video) {
+        switch(context->videocodecid) {
+            case FLV_CODECID_H263:
+            case FLV_CODECID_SCREEN:
+            case FLV_CODECID_VP6:
+                break;
+            default:
+                return -1;
+        }
+
+        if(context->height == 0 || context->width == 0)
+            return -1;
+    }
+
+    return 0;
+}
+
+static int flv_create_streams(AVFormatContext *s) {
+    FLVDemuxContext *context;
+    AVStream *audioStream, *videoStream;
+    
+    context = s->priv_data;
+    audioStream = NULL;
+    videoStream = NULL;
+
+    if(context->has_video) {
+        videoStream = av_new_stream(s, 0);
+        if(videoStream == NULL)
+            return -1;
+
+        av_set_pts_info(videoStream, 24, 1, 1000);
+
+        videoStream->codec->codec_type = CODEC_TYPE_VIDEO;
+        videoStream->codec->width = context->width;
+        videoStream->codec->height = context->height;
+
+        switch(context->videocodecid) {
+            case FLV_CODECID_H263:
+                videoStream->codec->codec_id = CODEC_ID_FLV1;
+                break;
+            case FLV_CODECID_SCREEN:
+                videoStream->codec->codec_id = CODEC_ID_FLASHSV;
+                break;
+            case FLV_CODECID_VP6:
+                videoStream->codec->codec_id = CODEC_ID_VP6F;
+                create_vp6_extradata(videoStream);
+                break;
+            default:
+                av_log(s, AV_LOG_INFO, "Unsupported video codec in META tag: (%x)\n", context->videocodecid);
+                videoStream->codec->codec_tag = context->videocodecid;
+        }
+    }
+
+    if(context->has_audio) {
+        audioStream = av_new_stream(s, 1);
+        if(audioStream == NULL)
+            return -1;
+
+        av_set_pts_info(audioStream, 24, 1, 1000);
+
+        audioStream->codec->codec_type = CODEC_TYPE_AUDIO;
+        audioStream->codec->channels = context->is_stereo ? 2 : 1;
+        audioStream->codec->bits_per_sample = context->samplesize;
+        audioStream->codec->sample_rate = context->samplerate;
+
+        switch(context->audiocodecid << FLV_AUDIO_CODECID_OFFSET) {
+            case FLV_CODECID_PCM_BE:
+                audioStream->codec->codec_id = context->samplesize == 16 ? CODEC_ID_PCM_S16BE : CODEC_ID_PCM_S8;
+                break;
+            case FLV_CODECID_ADPCM:
+                audioStream->codec->codec_id = CODEC_ID_ADPCM_SWF;
+                break;
+            case FLV_CODECID_MP3:
+                audioStream->codec->codec_id = CODEC_ID_MP3;
+                break;
+            case FLV_CODECID_PCM_LE:
+                audioStream->codec->codec_id = context->samplesize == 16 ? CODEC_ID_PCM_S16LE : CODEC_ID_PCM_S8;
+                break;
+            case FLV_CODECID_NELLYMOSER_8HZ_MONO:
+            case FLV_CODECID_NELLYMOSER:
+            default:
+                av_log(s, AV_LOG_INFO, "Unsupported audio codec in META tag: (%x)\n", context->audiocodecid);
+                audioStream->codec->codec_tag = context->audiocodecid;
+        }
+    }
+
+    return 0;
+}
+
 static int flv_probe(AVProbeData *p)
 {
     const uint8_t *d;
@@ -43,15 +348,38 @@
 static int flv_read_header(AVFormatContext *s,
                            AVFormatParameters *ap)
 {
+    FLVTagInfo taginfo;
+    FLVDemuxContext *context = s->priv_data;
     int offset, flags, size;
 
-    s->ctx_flags |= AVFMTCTX_NOHEADER; //ok we have a header but theres no fps, codec type, sample_rate, ...
-
     url_fskip(&s->pb, 4);
     flags = get_byte(&s->pb);
 
     offset = get_be32(&s->pb);
 
+    if(flags & FLV_HEADER_FLAG_HASVIDEO)
+        context->has_video = 1;
+    if(flags & FLV_HEADER_FLAG_HASAUDIO)
+        context->has_audio = 1;
+
+    //0 is a valid audio codec id, so set it to something that will cause a validation error if it does not get set in flv_read_metabody
+    context->audiocodecid = -1;
+
+    if(flv_read_tag_header(&s->pb, &taginfo) < 0)
+        return -1; //could not even read the first tag header.
+
+    if(taginfo.type == FLV_TAG_TYPE_META) {
+        flv_read_metabody(s, taginfo.next_pos);
+        offset = taginfo.next_pos; //no need to rewind to before the meta tag.
+    }
+
+    if(flv_validate_context(s) == 0) {
+        if(flv_create_streams(s) < 0)
+            return -1;
+    } else {
+        //not enough metadata to create streams.
+        s->ctx_flags |= AVFMTCTX_NOHEADER;
+
     if(!url_is_streamed(&s->pb)){
         const int fsize= url_fsize(&s->pb);
         url_fseek(&s->pb, fsize-4, SEEK_SET);
@@ -63,6 +391,7 @@
     }
 
     url_fseek(&s->pb, offset, SEEK_SET);
+    }
 
     s->start_time = 0;
 
@@ -71,76 +400,32 @@
 
 static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
-    int ret, i, type, size, pts, flags, is_audio, next, pos;
+    FLVTagInfo taginfo;
+    int ret, i, flags, is_audio;
     AVStream *st = NULL;
 
  for(;;){
-    pos = url_ftell(&s->pb);
-    url_fskip(&s->pb, 4); /* size of previous packet */
-    type = get_byte(&s->pb);
-    size = get_be24(&s->pb);
-    pts = get_be24(&s->pb);
-//    av_log(s, AV_LOG_DEBUG, "type:%d, size:%d, pts:%d\n", type, size, pts);
-    if (url_feof(&s->pb))
+    if(flv_read_tag_header(&s->pb, &taginfo) < 0)
         return AVERROR_IO;
-    url_fskip(&s->pb, 4); /* reserved */
+
     flags = 0;
 
-    if(size == 0)
+    if(taginfo.body_size == 0)
         continue;
 
-    next= size + url_ftell(&s->pb);
-
-    if (type == FLV_TAG_TYPE_AUDIO) {
+    if (taginfo.type == FLV_TAG_TYPE_AUDIO) {
         is_audio=1;
         flags = get_byte(&s->pb);
-    } else if (type == FLV_TAG_TYPE_VIDEO) {
+    } else if (taginfo.type == FLV_TAG_TYPE_VIDEO) {
         is_audio=0;
         flags = get_byte(&s->pb);
-    } else if (type == FLV_TAG_TYPE_META && size > 13+1+4) {
-        url_fskip(&s->pb, 13); //onMetaData blah
-        if(get_byte(&s->pb) == 8){
-            url_fskip(&s->pb, 4);
-        }
-        while(url_ftell(&s->pb) + 5 < next){
-            char tmp[128];
-            int type, len;
-            double d= 0;
-
-            len= get_be16(&s->pb);
-            if(len >= sizeof(tmp) || !len)
-                break;
-            get_buffer(&s->pb, tmp, len);
-            tmp[len]=0;
-
-            type= get_byte(&s->pb);
-            if(type == AMF_DATA_TYPE_NUMBER){
-                d= av_int2dbl(get_be64(&s->pb));
-            }else if(type == AMF_DATA_TYPE_STRING){
-                len= get_be16(&s->pb);
-                if(len >= sizeof(tmp))
-                    break;
-                url_fskip(&s->pb, len);
-            }else if(type == AMF_DATA_TYPE_MIXEDARRAY){
-                //array
-                break;
-            }else if(type == AMF_DATA_TYPE_DATE){
-                d= av_int2dbl(get_be64(&s->pb));
-                get_be16(&s->pb);
-            }
-
-            if(!strcmp(tmp, "duration")){
-                s->duration = d*AV_TIME_BASE;
-            }else if(!strcmp(tmp, "videodatarate")){
-            }else if(!strcmp(tmp, "audiodatarate")){
-            }
-        }
-        url_fseek(&s->pb, next, SEEK_SET);
+    } else if (taginfo.type == FLV_TAG_TYPE_META && taginfo.body_size > 13+1+4) {
+        flv_read_metabody(s, taginfo.next_pos);
         continue;
     } else {
         /* skip packet */
-        av_log(s, AV_LOG_ERROR, "skipping flv packet: type %d, size %d, flags %d\n", type, size, flags);
-        url_fseek(&s->pb, next, SEEK_SET);
+        av_log(s, AV_LOG_ERROR, "skipping flv packet: type %d, size %d, flags %d\n", taginfo.type, taginfo.body_size, flags);
+        url_fseek(&s->pb, taginfo.next_pos, SEEK_SET);
         continue;
     }
 
@@ -163,11 +448,11 @@
        ||(st->discard >= AVDISCARD_BIDIR  &&  ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_DISP_INTER && !is_audio))
        || st->discard >= AVDISCARD_ALL
        ){
-        url_fseek(&s->pb, next, SEEK_SET);
+        url_fseek(&s->pb, taginfo.next_pos, SEEK_SET);
         continue;
     }
     if ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY)
-        av_add_index_entry(st, pos, pts, size, 0, AVINDEX_KEYFRAME);
+        av_add_index_entry(st, taginfo.pos, taginfo.pts, taginfo.body_size, 0, AVINDEX_KEYFRAME);
     break;
  }
 
@@ -200,13 +485,10 @@
             case FLV_CODECID_SCREEN: st->codec->codec_id = CODEC_ID_FLASHSV; break;
             case FLV_CODECID_VP6   :
                 st->codec->codec_id = CODEC_ID_VP6F;
-                if (st->codec->extradata_size != 1) {
-                    st->codec->extradata_size = 1;
-                    st->codec->extradata = av_malloc(1);
-                }
+                create_vp6_extradata(st);
                 /* width and height adjustment */
                 st->codec->extradata[0] = get_byte(&s->pb);
-                size--;
+                taginfo.body_size--;
                 break;
             default:
                 av_log(s, AV_LOG_INFO, "Unsupported video codec (%x)\n", flags & FLV_VIDEO_CODECID_MASK);
@@ -214,14 +496,14 @@
             }
     }
 
-    ret= av_get_packet(&s->pb, pkt, size - 1);
+    ret= av_get_packet(&s->pb, pkt, taginfo.body_size - 1);
     if (ret <= 0) {
         return AVERROR_IO;
     }
     /* note: we need to modify the packet size here to handle the last
        packet */
     pkt->size = ret;
-    pkt->pts = pts;
+    pkt->pts = taginfo.pts;
     pkt->stream_index = st->index;
 
     if (is_audio || ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY))
@@ -249,7 +531,7 @@
 AVInputFormat flv_demuxer = {
     "flv",
     "flv format",
-    0,
+    sizeof(FLVDemuxContext),
     flv_probe,
     flv_read_header,
     flv_read_packet,



More information about the ffmpeg-devel mailing list