[FFmpeg-devel] [PATCH] aadec: alternate mp3 seek handling

Wed Aug 8 11:41:02 EEST 2018

Ping. Any thoughts on this?

Cheers, Karsten

> Am 31.07.2018 um 21:16 schrieb Karsten Otto <ottoka at posteo.de>:
> 
> After seeking, determine the offset of the next frame in the decrypted
> buffer by scanning the first few bytes for a valid mp3 header.
> This significantly improves the listening experience for audio content
> with untypical encoding.
> ---
> This is a refinement of an earlier patch iteration, according to lessons
> learned and discussions on the mailing list: Scan a limited range to find
> the first shifted frame only, check for a very specific bit pattern, and
> add extra checks and guards for better code maintainability.
> 
> Unfortunately, this variant violates the architectural layering between
> demuxer and codec. But I did some more testing with untypical encodings,
> where the current estimation mechanism fails, and the resulting audio on
> seek was just too horribly annoying.
> 
> I believe the better listening experience outweighs the architectural
> uglyness, so this should be in the official code base. But if you think
> otherwise, just let me know, and I will keep this a private patch.
> 
> libavformat/aadec.c | 45 ++++++++++++++++++++++++++++-----------------
> 1 file changed, 28 insertions(+), 17 deletions(-)
> 
> diff --git a/libavformat/aadec.c b/libavformat/aadec.c
> index d83f283ffe..9b1495c218 100644
> --- a/libavformat/aadec.c
> +++ b/libavformat/aadec.c
> @@ -37,7 +37,7 @@
> #define TEA_BLOCK_SIZE 8
> #define CHAPTER_HEADER_SIZE 8
> #define TIMEPREC 1000
> -#define MP3_FRAME_SIZE 104
> +#define MP3_FRAME_SIZE 105
> 
> typedef struct AADemuxContext {
>     AVClass *class;
> @@ -51,7 +51,7 @@ typedef struct AADemuxContext {
>     int64_t current_chapter_size;
>     int64_t content_start;
>     int64_t content_end;
> -    int seek_offset;
> +    int did_seek;
> } AADemuxContext;
> 
> static int get_second_size(char *codec_name)
> @@ -179,7 +179,7 @@ static int aa_read_header(AVFormatContext *s)
>         st->codecpar->sample_rate = 22050;
>         st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
>         avpriv_set_pts_info(st, 64, 8, 32000 * TIMEPREC);
> -        // encoded audio frame is MP3_FRAME_SIZE bytes (+1 with padding, unlikely)
> +        // encoded audio frame is MP3_FRAME_SIZE bytes (-1 without padding)
>     } else if (!strcmp(codec_name, "acelp85")) {
>         st->codecpar->codec_id = AV_CODEC_ID_SIPR;
>         st->codecpar->block_align = 19;
> @@ -231,7 +231,7 @@ static int aa_read_header(AVFormatContext *s)
>     ff_update_cur_dts(s, st, 0);
>     avio_seek(pb, start, SEEK_SET);
>     c->current_chapter_size = 0;
> -    c->seek_offset = 0;
> +    c->did_seek = 0;
> 
>     return 0;
> }
> @@ -244,7 +244,7 @@ static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
>     int trailing_bytes;
>     int blocks;
>     uint8_t buf[MAX_CODEC_SECOND_SIZE * 2];
> -    int written = 0;
> +    int written = 0, offset = 0;
>     int ret;
>     AADemuxContext *c = s->priv_data;
>     uint64_t pos = avio_tell(s->pb);
> @@ -297,16 +297,33 @@ static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
>     if (c->current_chapter_size <= 0)
>         c->current_chapter_size = 0;
> 
> -    if (c->seek_offset > written)
> -        c->seek_offset = 0; // ignore wrong estimate
> +    if (c->did_seek) {
> +        c->did_seek = 0;
> +
> +        if (s->streams[0]->codecpar->codec_id == AV_CODEC_ID_MP3 && written >= MP3_FRAME_SIZE + 3) {
> +            for (offset = 0; offset < MP3_FRAME_SIZE; ++offset) {
> +                // find mp3 header: sync, v2, layer3, no crc, 32kbps, 22kHz, mono
> +                if ((buf[offset + 0]       ) == 0xff &&
> +                    (buf[offset + 1]       ) == 0xf3 &&
> +                    (buf[offset + 2] & 0xfc) == 0x40 &&
> +                    (buf[offset + 3] & 0xf0) == 0xc0)
> +                        break;
> +            }
> +            if (offset == MP3_FRAME_SIZE) offset = 0; // not found, just use as is
> +        }
> +
> +        ff_update_cur_dts(s, s->streams[0],
> +            (pos + offset - c->content_start - CHAPTER_HEADER_SIZE * (c->chapter_idx - 1))
> +            * TIMEPREC);
> +    }
> 
> -    ret = av_new_packet(pkt, written - c->seek_offset);
> +    if (offset > written) offset = 0;
> +    ret = av_new_packet(pkt, written - offset);
>     if (ret < 0)
>         return ret;
> -    memcpy(pkt->data, buf + c->seek_offset, written - c->seek_offset);
> +    memcpy(pkt->data, buf + offset, written - offset);
>     pkt->pos = pos;
> 
> -    c->seek_offset = 0;
>     return 0;
> }
> 
> @@ -349,13 +366,7 @@ static int aa_read_seek(AVFormatContext *s,
>     c->current_codec_second_size = c->codec_second_size;
>     c->current_chapter_size = chapter_size - chapter_pos;
>     c->chapter_idx = 1 + chapter_idx;
> -
> -    // for unaligned frames, estimate offset of first frame in block (assume no padding)
> -    if (s->streams[0]->codecpar->codec_id == AV_CODEC_ID_MP3) {
> -        c->seek_offset = (MP3_FRAME_SIZE - chapter_pos % MP3_FRAME_SIZE) % MP3_FRAME_SIZE;
> -    }
> -
> -    ff_update_cur_dts(s, s->streams[0], ch->start + (chapter_pos + c->seek_offset) * TIMEPREC);
> +    c->did_seek = 1;
> 
>     return 1;
> }
> -- 
> 2.14.3 (Apple Git-98)
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel