[FFmpeg-devel] [PATCH] aadec: alternate mp3 seek handling

Tue Jul 31 22:16:32 EEST 2018

After seeking, determine the offset of the next frame in the decrypted
buffer by scanning the first few bytes for a valid mp3 header.
This significantly improves the listening experience for audio content
with untypical encoding.
---
This is a refinement of an earlier patch iteration, according to lessons
learned and discussions on the mailing list: Scan a limited range to find
the first shifted frame only, check for a very specific bit pattern, and
add extra checks and guards for better code maintainability.

Unfortunately, this variant violates the architectural layering between
demuxer and codec. But I did some more testing with untypical encodings,
where the current estimation mechanism fails, and the resulting audio on
seek was just too horribly annoying.

I believe the better listening experience outweighs the architectural
uglyness, so this should be in the official code base. But if you think
otherwise, just let me know, and I will keep this a private patch.

 libavformat/aadec.c | 45 ++++++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/libavformat/aadec.c b/libavformat/aadec.c
index d83f283ffe..9b1495c218 100644
--- a/libavformat/aadec.c
+++ b/libavformat/aadec.c
@@ -37,7 +37,7 @@
 #define TEA_BLOCK_SIZE 8
 #define CHAPTER_HEADER_SIZE 8
 #define TIMEPREC 1000
-#define MP3_FRAME_SIZE 104
+#define MP3_FRAME_SIZE 105
 
 typedef struct AADemuxContext {
     AVClass *class;
@@ -51,7 +51,7 @@ typedef struct AADemuxContext {
     int64_t current_chapter_size;
     int64_t content_start;
     int64_t content_end;
-    int seek_offset;
+    int did_seek;
 } AADemuxContext;
 
 static int get_second_size(char *codec_name)
@@ -179,7 +179,7 @@ static int aa_read_header(AVFormatContext *s)
         st->codecpar->sample_rate = 22050;
         st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
         avpriv_set_pts_info(st, 64, 8, 32000 * TIMEPREC);
-        // encoded audio frame is MP3_FRAME_SIZE bytes (+1 with padding, unlikely)
+        // encoded audio frame is MP3_FRAME_SIZE bytes (-1 without padding)
     } else if (!strcmp(codec_name, "acelp85")) {
         st->codecpar->codec_id = AV_CODEC_ID_SIPR;
         st->codecpar->block_align = 19;
@@ -231,7 +231,7 @@ static int aa_read_header(AVFormatContext *s)
     ff_update_cur_dts(s, st, 0);
     avio_seek(pb, start, SEEK_SET);
     c->current_chapter_size = 0;
-    c->seek_offset = 0;
+    c->did_seek = 0;
 
     return 0;
 }
@@ -244,7 +244,7 @@ static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
     int trailing_bytes;
     int blocks;
     uint8_t buf[MAX_CODEC_SECOND_SIZE * 2];
-    int written = 0;
+    int written = 0, offset = 0;
     int ret;
     AADemuxContext *c = s->priv_data;
     uint64_t pos = avio_tell(s->pb);
@@ -297,16 +297,33 @@ static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
     if (c->current_chapter_size <= 0)
         c->current_chapter_size = 0;
 
-    if (c->seek_offset > written)
-        c->seek_offset = 0; // ignore wrong estimate
+    if (c->did_seek) {
+        c->did_seek = 0;
+
+        if (s->streams[0]->codecpar->codec_id == AV_CODEC_ID_MP3 && written >= MP3_FRAME_SIZE + 3) {
+            for (offset = 0; offset < MP3_FRAME_SIZE; ++offset) {
+                // find mp3 header: sync, v2, layer3, no crc, 32kbps, 22kHz, mono
+                if ((buf[offset + 0]       ) == 0xff &&
+                    (buf[offset + 1]       ) == 0xf3 &&
+                    (buf[offset + 2] & 0xfc) == 0x40 &&
+                    (buf[offset + 3] & 0xf0) == 0xc0)
+                        break;
+            }
+            if (offset == MP3_FRAME_SIZE) offset = 0; // not found, just use as is
+        }
+
+        ff_update_cur_dts(s, s->streams[0],
+            (pos + offset - c->content_start - CHAPTER_HEADER_SIZE * (c->chapter_idx - 1))
+            * TIMEPREC);
+    }
 
-    ret = av_new_packet(pkt, written - c->seek_offset);
+    if (offset > written) offset = 0;
+    ret = av_new_packet(pkt, written - offset);
     if (ret < 0)
         return ret;
-    memcpy(pkt->data, buf + c->seek_offset, written - c->seek_offset);
+    memcpy(pkt->data, buf + offset, written - offset);
     pkt->pos = pos;
 
-    c->seek_offset = 0;
     return 0;
 }
 
@@ -349,13 +366,7 @@ static int aa_read_seek(AVFormatContext *s,
     c->current_codec_second_size = c->codec_second_size;
     c->current_chapter_size = chapter_size - chapter_pos;
     c->chapter_idx = 1 + chapter_idx;
-
-    // for unaligned frames, estimate offset of first frame in block (assume no padding)
-    if (s->streams[0]->codecpar->codec_id == AV_CODEC_ID_MP3) {
-        c->seek_offset = (MP3_FRAME_SIZE - chapter_pos % MP3_FRAME_SIZE) % MP3_FRAME_SIZE;
-    }
-
-    ff_update_cur_dts(s, s->streams[0], ch->start + (chapter_pos + c->seek_offset) * TIMEPREC);
+    c->did_seek = 1;
 
     return 1;
 }
-- 
2.14.3 (Apple Git-98)