[FFmpeg-cvslog] avformat/mov: improve HEIF parsing

James Almer git at videolan.org
Mon Jan 15 20:10:21 EET 2024


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Tue Jan  9 16:05:53 2024 -0300| [d9fed9df2a9e70c9375d3b2591db35c09303d369] | committer: James Almer

avformat/mov: improve HEIF parsing

Parse iprp and iinf boxes and its child boxes to get the actual codec used
(AV1 for avif, HEVC for heic), and properly export extradata and other
properties in a generic way.
The avif tests reference files are updated as the extradata is now exported.

Based on a patch by Swaraj Hota

Co-authored-by: Swaraj Hota <swarajhota353 at gmail.com>
Signed-off-by: James Almer <jamrial at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d9fed9df2a9e70c9375d3b2591db35c09303d369
---

 libavformat/isom.h                                 |  22 +-
 libavformat/mov.c                                  | 351 ++++++++++++++++-----
 tests/ref/fate/mov-avif-demux-still-image-1-item   |   2 +-
 .../fate/mov-avif-demux-still-image-multiple-items |   2 +-
 4 files changed, 282 insertions(+), 95 deletions(-)

diff --git a/libavformat/isom.h b/libavformat/isom.h
index 90c4fb5530..2cf456fee1 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -262,12 +262,24 @@ typedef struct MOVStreamContext {
     } cenc;
 } MOVStreamContext;
 
+typedef struct HEIFItem {
+    AVStream *st;
+    int item_id;
+    int64_t extent_length;
+    int64_t extent_offset;
+    int64_t size;
+    int width;
+    int height;
+    int type;
+} HEIFItem;
+
 typedef struct MOVContext {
     const AVClass *class; ///< class for private options
     AVFormatContext *fc;
     int time_scale;
     int64_t duration;     ///< duration of the longest track
     int found_moov;       ///< 'moov' atom has been found
+    int found_iloc;       ///< 'iloc' atom has been found
     int found_mdat;       ///< 'mdat' atom has been found
     int found_hdlr_mdta;  ///< 'hdlr' atom with type 'mdta' has been found
     int trak_index;       ///< Index of the current 'trak'
@@ -319,16 +331,10 @@ typedef struct MOVContext {
     int have_read_mfra_size;
     uint32_t mfra_size;
     uint32_t max_stts_delta;
-    int is_still_picture_avif;
     int primary_item_id;
-    struct {
-        int item_id;
-        int extent_length;
-        int64_t extent_offset;
-    } *heif_info;
+    int cur_item_id;
+    HEIFItem *heif_info;
     int heif_info_size;
-    int64_t hvcC_offset;
-    int hvcC_size;
     int interleaved_read;
 } MOVContext;
 
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 12e82c66a9..53a3d22726 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -1233,8 +1233,6 @@ static int mov_read_ftyp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         c->isom = 1;
     av_log(c->fc, AV_LOG_DEBUG, "ISO: File Type Major Brand: %.4s\n",(char *)&type);
     av_dict_set(&c->fc->metadata, "major_brand", type, 0);
-    c->is_still_picture_avif = !strncmp(type, "avif", 4) ||
-                               !strncmp(type, "mif1", 4);
     minor_ver = avio_rb32(pb); /* minor version */
     av_dict_set_int(&c->fc->metadata, "minor_version", minor_ver, 0);
 
@@ -4641,10 +4639,6 @@ static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     MOVStreamContext *sc;
     int ret;
 
-    if (c->is_still_picture_avif) {
-        return AVERROR_INVALIDDATA;
-    }
-
     st = avformat_new_stream(c->fc, NULL);
     if (!st) return AVERROR(ENOMEM);
     st->id = -1;
@@ -4916,44 +4910,23 @@ static int mov_read_custom(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return ret;
 }
 
-static int heif_add_stream(MOVContext *c, int item_id)
+static int heif_add_stream(MOVContext *c, HEIFItem *item)
 {
     MOVStreamContext *sc;
     AVStream *st;
-    int item_index = -1;
-    if (c->fc->nb_streams)
-        return AVERROR_INVALIDDATA;
-    for (int i = 0; i < c->heif_info_size; i++)
-        if (c->heif_info[i].item_id == item_id) {
-            item_index = i;
-            break;
-        }
-    if (item_index < 0)
-        return AVERROR_INVALIDDATA;
+
     st = avformat_new_stream(c->fc, NULL);
     if (!st)
         return AVERROR(ENOMEM);
-    st->id = c->fc->nb_streams;
     sc = av_mallocz(sizeof(MOVStreamContext));
     if (!sc)
         return AVERROR(ENOMEM);
 
+    item->st = st;
+    st->id = item->item_id;
     st->priv_data = sc;
     st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
-    st->codecpar->codec_id = AV_CODEC_ID_AV1;
-    if (c->hvcC_offset >= 0) {
-        int ret;
-        int64_t pos = avio_tell(c->fc->pb);
-        st->codecpar->codec_id = AV_CODEC_ID_HEVC;
-        if (avio_seek(c->fc->pb, c->hvcC_offset, SEEK_SET) != c->hvcC_offset) {
-            av_log(c->fc, AV_LOG_ERROR, "Failed to seek to hvcC data.\n");
-            return AVERROR_UNKNOWN;
-        }
-        ret = ff_get_extradata(c->fc, st->codecpar, c->fc->pb, c->hvcC_size);
-        if (ret < 0)
-            return ret;
-        avio_seek(c->fc->pb, pos, SEEK_SET);
-    }
+    st->codecpar->codec_id = mov_codec_id(st, item->type);
     sc->ffindex = st->index;
     c->trak_index = st->index;
     st->avg_frame_rate.num = st->avg_frame_rate.den = 1;
@@ -4987,17 +4960,12 @@ static int heif_add_stream(MOVContext *c, int item_id)
     sc->stts_data[0].count = 1;
     // Not used for still images. But needed by mov_build_index.
     sc->stts_data[0].duration = 0;
-    sc->sample_sizes[0] = c->heif_info[item_index].extent_length;
-    sc->chunk_offsets[0] = c->heif_info[item_index].extent_offset;
 
-    mov_build_index(c, st);
     return 0;
 }
 
 static int mov_read_meta(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
-    c->hvcC_offset = -1;
-    c->hvcC_size = 0;
     while (atom.size > 8) {
         uint32_t tag;
         if (avio_feof(pb))
@@ -5005,23 +4973,9 @@ static int mov_read_meta(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         tag = avio_rl32(pb);
         atom.size -= 4;
         if (tag == MKTAG('h','d','l','r')) {
-            int ret;
             avio_seek(pb, -8, SEEK_CUR);
             atom.size += 8;
-            if ((ret = mov_read_default(c, pb, atom)) < 0)
-                return ret;
-            if (c->is_still_picture_avif) {
-                int ret;
-                // Add a stream for the YUV planes (primary item).
-                if ((ret = heif_add_stream(c, c->primary_item_id)) < 0)
-                    return ret;
-                // For still AVIF images, the meta box contains all the
-                // necessary information that would generally be provided by the
-                // moov box. So simply mark that we have found the moov box so
-                // that parsing can continue.
-                c->found_moov = 1;
-            }
-            return ret;
+            return mov_read_default(c, pb, atom);
         }
     }
     return 0;
@@ -7803,6 +7757,7 @@ static int mov_read_pitm(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     avio_rb32(pb);  // version & flags.
     c->primary_item_id = avio_rb16(pb);
+    av_log(c->fc, AV_LOG_TRACE, "pitm: primary_item_id %d\n", c->primary_item_id);
     return atom.size;
 }
 
@@ -7813,18 +7768,10 @@ static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     uint64_t base_offset, extent_offset, extent_length;
     uint8_t value;
 
-    if (!c->is_still_picture_avif) {
-        // * For non-avif, we simply ignore the iloc box.
-        // * For animated avif, we don't care about the iloc box as all the
-        //   necessary information can be found in the moov box.
-        return 0;
-    }
-
-    if (c->heif_info) {
+    if (c->found_iloc) {
         av_log(c->fc, AV_LOG_INFO, "Duplicate iloc box found\n");
         return 0;
     }
-    av_assert0(!c->fc->nb_streams);
 
     version = avio_r8(pb);
     avio_rb24(pb);  // flags.
@@ -7841,26 +7788,33 @@ static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     }
     item_count = (version < 2) ? avio_rb16(pb) : avio_rb32(pb);
 
-    c->heif_info = av_malloc_array(item_count, sizeof(*c->heif_info));
-    if (!c->heif_info)
-        return AVERROR(ENOMEM);
-    c->heif_info_size = item_count;
+    if (!c->heif_info) {
+        c->heif_info = av_calloc(item_count, sizeof(*c->heif_info));
+        if (!c->heif_info)
+            return AVERROR(ENOMEM);
+        c->heif_info_size = item_count;
+    }
 
+    av_log(c->fc, AV_LOG_TRACE, "iloc: item_count %d\n", item_count);
     for (int i = 0; i < item_count; i++) {
         int item_id = (version < 2) ? avio_rb16(pb) : avio_rb32(pb);
+        int offset_type = (version > 0) ? avio_rb16(pb) & 0xf : 0;
+
         if (avio_feof(pb))
             return AVERROR_INVALIDDATA;
+        if (offset_type > 1) {
+            avpriv_request_sample(c->fc, "iloc offset type %d", offset_type);
+            return AVERROR_PATCHWELCOME;
+        }
         c->heif_info[i].item_id = item_id;
 
-        if (version > 0)
-            avio_rb16(pb);  // construction_method.
         avio_rb16(pb);  // data_reference_index.
         if (rb_size(pb, &base_offset, base_offset_size) < 0)
             return AVERROR_INVALIDDATA;
         extent_count = avio_rb16(pb);
         if (extent_count > 1) {
             // For still AVIF images, we only support one extent item.
-            av_log(c->fc, AV_LOG_ERROR, "iloc: extent_count > 1 not supported.\n");
+            av_log(c->fc, AV_LOG_ERROR, "iloc: extent_count > 1 not supported\n");
             return AVERROR_PATCHWELCOME;
         }
         for (int j = 0; j < extent_count; j++) {
@@ -7869,32 +7823,236 @@ static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                 return AVERROR_INVALIDDATA;
             c->heif_info[i].extent_length = extent_length;
             c->heif_info[i].extent_offset = base_offset + extent_offset;
+            av_log(c->fc, AV_LOG_TRACE, "iloc: item_idx %d, offset_type %d, "
+                                        "extent_offset %"PRId64", extent_length %"PRId64"\n",
+                   i, offset_type, c->heif_info[i].extent_offset, c->heif_info[i].extent_length);
         }
     }
 
+    c->found_iloc = 1;
     return atom.size;
 }
 
+static int mov_read_infe(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    char item_name[128];
+    int64_t size = atom.size;
+    uint32_t item_type;
+    int item_id;
+    int version, ret;
+
+    version = avio_r8(pb);
+    avio_rb24(pb);  // flags.
+    size -= 4;
+
+    if (version != 2) {
+        av_log(c->fc, AV_LOG_ERROR, "infe: version != 2 not supported\n");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    item_id = avio_rb16(pb);
+    avio_rb16(pb); // item_protection_index
+    item_type = avio_rl32(pb);
+    size -= 8;
+    size -= avio_get_str(pb, INT_MAX, item_name, sizeof(item_name));
+
+    av_log(c->fc, AV_LOG_TRACE, "infe: item_id %d, item_type %s, item_name %s\n",
+           item_id, av_fourcc2str(item_type), item_name);
+
+    // Skip all but the primary item until support is added
+    if (item_id != c->primary_item_id)
+        return 0;
+
+    if (size > 0)
+        avio_skip(pb, size);
+
+    c->heif_info[c->cur_item_id].item_id = item_id;
+    c->heif_info[c->cur_item_id].type    = item_type;
+
+    switch (item_type) {
+    case MKTAG('a','v','0','1'):
+    case MKTAG('h','v','c','1'):
+        ret = heif_add_stream(c, &c->heif_info[c->cur_item_id]);
+        if (ret < 0)
+            return ret;
+        break;
+    default:
+        av_log(c->fc, AV_LOG_TRACE, "infe: ignoring item_type %s\n", av_fourcc2str(item_type));
+        break;
+    }
+
+    c->cur_item_id++;
+
+    return 0;
+}
+
+static int mov_read_iinf(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    int entry_count;
+    int version, ret;
+
+    version = avio_r8(pb);
+    avio_rb24(pb);  // flags.
+    entry_count = version ? avio_rb32(pb) : avio_rb16(pb);
+
+    if (!c->heif_info) {
+        c->heif_info = av_calloc(entry_count, sizeof(*c->heif_info));
+        if (!c->heif_info)
+            return AVERROR(ENOMEM);
+        c->heif_info_size = entry_count;
+    }
+
+    c->cur_item_id = 0;
+
+    for (int i = 0; i < entry_count; i++) {
+        MOVAtom infe;
+
+        infe.size = avio_rb32(pb) - 8;
+        infe.type = avio_rl32(pb);
+        ret = mov_read_infe(c, pb, infe);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static int mov_read_iref(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    avio_rb32(pb);  /* version and flags */
+    atom.size -= 4;
+    return mov_read_default(c, pb, atom);
+}
+
+static int mov_read_ispe(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    uint32_t width, height;
+    avio_r8(pb);  /* version */
+    avio_rb24(pb);  /* flags */
+    width  = avio_rb32(pb);
+    height = avio_rb32(pb);
+
+    av_log(c->fc, AV_LOG_TRACE, "ispe: item_id %d, width %u, height %u\n",
+           c->cur_item_id, width, height);
+
+    for (int i = 0; i < c->heif_info_size; i++) {
+        if (c->heif_info[i].item_id == c->cur_item_id) {
+            c->heif_info[i].width  = width;
+            c->heif_info[i].height = height;
+            break;
+        }
+    }
+
+    return 0;
+}
+
 static int mov_read_iprp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
-    int size = avio_rb32(pb);
-    if (avio_rl32(pb) != MKTAG('i','p','c','o'))
+    typedef struct MOVAtoms {
+        FFIOContext b;
+        uint32_t type;
+        int64_t  size;
+        uint8_t *data;
+    } MOVAtoms;
+    MOVAtoms *atoms = NULL;
+    MOVAtom a;
+    unsigned count;
+    int nb_atoms = 0;
+    int version, flags;
+    int ret;
+
+    a.size = avio_rb32(pb);
+    a.type = avio_rl32(pb);
+
+    if (a.size < 8 || a.type != MKTAG('i','p','c','o'))
         return AVERROR_INVALIDDATA;
-    size -= 8;
-    while (size > 0) {
-        int sub_size, sub_type;
-        sub_size = avio_rb32(pb);
-        sub_type = avio_rl32(pb);
-        sub_size -= 8;
-        size -= sub_size + 8;
-        if (sub_type == MKTAG('h','v','c','C')) {
-            c->hvcC_offset = avio_tell(pb);
-            c->hvcC_size = sub_size;
+
+    a.size -= 8;
+    while (a.size >= 8) {
+        MOVAtoms *ref = av_dynarray2_add((void**)&atoms, &nb_atoms, sizeof(MOVAtoms), NULL);
+        if (!ref) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        ref->data = NULL;
+        ref->size = avio_rb32(pb);
+        ref->type = avio_rl32(pb);
+        if (ref->size > a.size || ref->size < 8)
             break;
+        ref->data = av_malloc(ref->size);
+        if (!ref->data) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
         }
-        avio_skip(pb, sub_size);
+        av_log(c->fc, AV_LOG_TRACE, "ipco: index %d, box type %s\n", nb_atoms, av_fourcc2str(ref->type));
+        avio_seek(pb, -8, SEEK_CUR);
+        if (avio_read(pb, ref->data, ref->size) != ref->size) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+        ffio_init_read_context(&ref->b, ref->data, ref->size);
+        a.size -= ref->size;
     }
-    return atom.size;
+
+    if (a.size) {
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    a.size = avio_rb32(pb);
+    a.type = avio_rl32(pb);
+
+    if (a.size < 8 || a.type != MKTAG('i','p','m','a')) {
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    version = avio_r8(pb);
+    flags   = avio_rb24(pb);
+    count   = avio_rb32(pb);
+
+    for (int i = 0; i < count; i++) {
+        int item_id = version ? avio_rb32(pb) : avio_rb16(pb);
+        int assoc_count = avio_r8(pb);
+
+        for (int j = 0; j < assoc_count; j++) {
+            MOVAtoms *ref;
+            int index = avio_r8(pb) & 0x7f;
+            if (flags & 1) {
+                index <<= 8;
+                index |= avio_r8(pb);
+            }
+            if (index > nb_atoms || index <= 0) {
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+            ref = &atoms[--index];
+
+            av_log(c->fc, AV_LOG_TRACE, "ipma: property_index %d, item_id %d, item_type %s\n",
+                   index + 1, item_id, av_fourcc2str(ref->type));
+
+            // Skip properties referencing items other than the primary item until support is added
+            if (item_id != c->primary_item_id)
+                continue;
+
+            c->cur_item_id = item_id;
+
+            ret = mov_read_default(c, &ref->b.pub,
+                                   (MOVAtom) { .size = ref->size,
+                                               .type = MKTAG('i','p','c','o') });
+            if (ret < 0)
+                goto fail;
+            ffio_init_read_context(&ref->b, ref->data, ref->size);
+        }
+    }
+
+    ret = 0;
+fail:
+    for (int i = 0; i < nb_atoms; i++)
+        av_free(atoms[i].data);
+    av_free(atoms);
+
+    return ret;
 }
 
 static const MOVParseTableEntry mov_default_parse_table[] = {
@@ -8004,7 +8162,10 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('p','c','m','C'), mov_read_pcmc }, /* PCM configuration box */
 { MKTAG('p','i','t','m'), mov_read_pitm },
 { MKTAG('e','v','c','C'), mov_read_glbl },
+{ MKTAG('i','r','e','f'), mov_read_iref },
+{ MKTAG('i','s','p','e'), mov_read_ispe },
 { MKTAG('i','p','r','p'), mov_read_iprp },
+{ MKTAG('i','i','n','f'), mov_read_iinf },
 { 0, NULL }
 };
 
@@ -8059,7 +8220,7 @@ static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom)
             a.size = avio_rb64(pb) - 8;
             total_size += 8;
         }
-        av_log(c->fc, AV_LOG_TRACE, "type:'%s' parent:'%s' sz: %"PRId64" %"PRId64" %"PRId64"\n",
+        av_log(c->fc, AV_LOG_DEBUG, "type:'%s' parent:'%s' sz: %"PRId64" %"PRId64" %"PRId64"\n",
                av_fourcc2str(a.type), av_fourcc2str(atom.type), a.size, total_size, atom.size);
         if (a.size == 0) {
             a.size = atom.size - total_size + 8;
@@ -8672,13 +8833,33 @@ static int mov_read_header(AVFormatContext *s)
             av_log(s, AV_LOG_ERROR, "error reading header\n");
             return err;
         }
-    } while ((pb->seekable & AVIO_SEEKABLE_NORMAL) && !mov->found_moov && !mov->moov_retry++);
-    if (!mov->found_moov) {
+    } while ((pb->seekable & AVIO_SEEKABLE_NORMAL) && !mov->found_moov && !mov->found_iloc && !mov->moov_retry++);
+    if (!mov->found_moov && !mov->found_iloc) {
         av_log(s, AV_LOG_ERROR, "moov atom not found\n");
         return AVERROR_INVALIDDATA;
     }
     av_log(mov->fc, AV_LOG_TRACE, "on_parse_exit_offset=%"PRId64"\n", avio_tell(pb));
 
+    if (mov->found_iloc) {
+        for (i = 0; i < mov->heif_info_size; i++) {
+            HEIFItem *item = &mov->heif_info[i];
+            MOVStreamContext *sc;
+            AVStream *st;
+
+            if (!item->st)
+                continue;
+
+            st = item->st;
+            sc = st->priv_data;
+            st->codecpar->width  = item->width;
+            st->codecpar->height = item->height;
+            sc->sample_sizes[0]  = item->extent_length;
+            sc->chunk_offsets[0] = item->extent_offset;
+
+            mov_build_index(mov, st);
+        }
+    }
+
     if (pb->seekable & AVIO_SEEKABLE_NORMAL) {
         if (mov->nb_chapter_tracks > 0 && !mov->ignore_chapters)
             mov_read_chapters(s);
diff --git a/tests/ref/fate/mov-avif-demux-still-image-1-item b/tests/ref/fate/mov-avif-demux-still-image-1-item
index b74a2d9dbc..1ead593caa 100644
--- a/tests/ref/fate/mov-avif-demux-still-image-1-item
+++ b/tests/ref/fate/mov-avif-demux-still-image-1-item
@@ -1,7 +1,7 @@
 #format: frame checksums
 #version: 2
 #hash: MD5
-#extradata 0,                              13, b52ae298d37128862ef1918cf916239c
+#extradata 0,                               4, b24b71499a8480fa4469bcbcba2140aa
 #tb 0: 1/1
 #media_type 0: video
 #codec_id 0: av1
diff --git a/tests/ref/fate/mov-avif-demux-still-image-multiple-items b/tests/ref/fate/mov-avif-demux-still-image-multiple-items
index b74a2d9dbc..1ead593caa 100644
--- a/tests/ref/fate/mov-avif-demux-still-image-multiple-items
+++ b/tests/ref/fate/mov-avif-demux-still-image-multiple-items
@@ -1,7 +1,7 @@
 #format: frame checksums
 #version: 2
 #hash: MD5
-#extradata 0,                              13, b52ae298d37128862ef1918cf916239c
+#extradata 0,                               4, b24b71499a8480fa4469bcbcba2140aa
 #tb 0: 1/1
 #media_type 0: video
 #codec_id 0: av1



More information about the ffmpeg-cvslog mailing list