[FFmpeg-devel] [PATCH v6] Add support for Audible AA files

Sun Aug 2 06:42:58 CEST 2015

On 30/07/15 7:46 AM, Vesselin Bontchev wrote:
> From 06b0c0013404a67c72ea14a3c90730c0c4bd5b9a Mon Sep 17 00:00:00 2001
> From: Vesselin Bontchev <vesselin.bontchev at yandex.com>
> Date: Sun, 19 Jul 2015 23:16:36 +0200
> Subject: [PATCH] Add support for Audible AA files
> 
> https://en.wikipedia.org/wiki/Audible.com#Quality
> ---
>  doc/demuxers.texi        |   6 +
>  doc/general.texi         |   2 +
>  libavformat/Makefile     |   1 +
>  libavformat/aadec.c      | 325 +++++++++++++++++++++++++++++++++++++++++++++++
>  libavformat/allformats.c |   1 +
>  5 files changed, 335 insertions(+)
>  create mode 100644 libavformat/aadec.c
> 
> diff --git a/doc/demuxers.texi b/doc/demuxers.texi
> index e45e1af..c86e8a4 100644
> --- a/doc/demuxers.texi
> +++ b/doc/demuxers.texi
> @@ -18,6 +18,12 @@ enabled demuxers.
>  
>  The description of some of the currently available demuxers follows.
>  
> + at section aa
> +
> +Audible Format 2, 3, and 4 demuxer.
> +
> +This demuxer is used to demux Audible Format 2, 3, and 4 (.aa) files.
> +
>  @section applehttp
>  
>  Apple HTTP Live Streaming demuxer.
> diff --git a/doc/general.texi b/doc/general.texi
> index a260e79..2b782e0 100644
> --- a/doc/general.texi
> +++ b/doc/general.texi
> @@ -228,6 +228,8 @@ library:
>  @item 8088flex TMV              @tab   @tab X
>  @item AAX                       @tab   @tab X
>      @tab Audible Enhanced Audio format, used in audiobooks.
> + at item AA                        @tab   @tab X
> +    @tab Audible Format 2, 3, and 4, used in audiobooks.
>  @item ACT Voice                 @tab   @tab X
>      @tab contains G.729 audio
>  @item Adobe Filmstrip           @tab X @tab X
> diff --git a/libavformat/Makefile b/libavformat/Makefile
> index cc73fd8..466da51 100644
> --- a/libavformat/Makefile
> +++ b/libavformat/Makefile
> @@ -59,6 +59,7 @@ OBJS-$(CONFIG_SHARED)                    += log2_tab.o golomb_tab.o
>  
>  # muxers/demuxers
>  OBJS-$(CONFIG_A64_MUXER)                 += a64.o rawenc.o
> +OBJS-$(CONFIG_AA_DEMUXER)                += aadec.o
>  OBJS-$(CONFIG_AAC_DEMUXER)               += aacdec.o apetag.o img2.o rawdec.o
>  OBJS-$(CONFIG_AC3_DEMUXER)               += ac3dec.o rawdec.o
>  OBJS-$(CONFIG_AC3_MUXER)                 += rawenc.o
> diff --git a/libavformat/aadec.c b/libavformat/aadec.c
> new file mode 100644
> index 0000000..771bcfd
> --- /dev/null
> +++ b/libavformat/aadec.c
> @@ -0,0 +1,325 @@
> +/*
> + * Audible AA demuxer
> + * Copyright (c) 2015 Vesselin Bontchev
> + *
> + * Header parsing is borrowed from https://github.com/jteeuwen/audible project.
> + * Copyright (c) 2001-2014, Jim Teeuwen
> + *
> + * Redistribution and use in source and binary forms, with or without modification,
> + * are permitted provided that the following conditions are met:
> + *
> + * 1. Redistributions of source code must retain the above copyright notice, this
> + *    list of conditions and the following disclaimer.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
> + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
> + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
> + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
> + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
> + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include "avformat.h"
> +#include "internal.h"
> +#include "libavutil/intreadwrite.h"
> +#include "libavutil/tea.h"
> +#include "libavutil/opt.h"
> +
> +#define AA_MAGIC 1469084982 /* this identifies an audible .aa file */
> +#define MAX_CODEC_SECOND_SIZE 3982
> +#define MAX_TOC_ENTRIES 16
> +#define MAX_DICTIONARY_ENTRIES 128
> +#define TEA_BLOCK_SIZE 8
> +
> +typedef struct AADemuxContext {
> +    AVClass *class;
> +    void *aa_fixed_key;

char

> +    int aa_fixed_key_size;
> +    int32_t codec_second_size;
> +    struct AVTEA *tea_ctx;
> +    uint8_t file_key[16];
> +    int64_t current_chapter_size;
> +    int64_t current_codec_second_size;
> +    int chapter_idx;
> +} AADemuxContext;
> +
> +static int32_t get_second_size(char *codec_name)
> +{
> +    int32_t result = -1;
> +
> +    if (!strcmp(codec_name, "mp332")) {
> +        result = 3982;
> +    } else if (!strcmp(codec_name, "acelp16")) {
> +        result = 2000;
> +    } else if (!strcmp(codec_name, "acelp85")) {
> +        result = 1045;
> +    }
> +
> +    return result;
> +}
> +
> +static int aa_read_header(AVFormatContext *s)
> +{
> +    int i, j, idx;
> +    uint32_t nkey;
> +    uint32_t nval;
> +    char key[128];
> +    char val[128];
> +    uint8_t output[24];
> +    uint8_t dst[8];
> +    uint8_t src[8];

Any reason some are char and others uint8_t? Also, try to put more than one variable
of the same type per line.

> +    int largest_idx = -1;
> +    int64_t largest_size = -1;
> +    int64_t current_size = -1;
> +    uint32_t start;
> +    char codec_name[64] = {0};
> +    int ret = 0;
> +    struct toc_entry {
> +        uint32_t offset;
> +        uint32_t size;
> +    } TOC[MAX_TOC_ENTRIES];
> +    uint32_t toc_size;
> +    uint32_t npairs;
> +    uint32_t header_seed;
> +    union {
> +        uint8_t key[16];
> +        uint32_t part[4];
> +    } header_key;
> +    AADemuxContext *c = s->priv_data;
> +    AVIOContext *pb = s->pb;
> +    AVStream *st = avformat_new_stream(s, NULL);
> +    if (!st)
> +        return AVERROR(ENOMEM);
> +    c->tea_ctx = av_tea_alloc();

You could move this below right before tea_init and replace all of the gotos below
with simple returns.

> +    if (!c->tea_ctx) {
> +        ret =  AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
> +    avio_skip(pb, 4); // file size
> +    avio_skip(pb, 4); // magic string
> +    toc_size = avio_rb32(pb); // TOC size
> +    avio_skip(pb, 4); // unidentified integer
> +    if (toc_size > MAX_TOC_ENTRIES) {
> +        ret = AVERROR_INVALIDDATA;
> +        goto fail;
> +    }
> +    for (i = 0; i < toc_size; i++) { // read TOC
> +        avio_skip(pb, 4); // TOC entry index
> +        TOC[i].offset = avio_rb32(pb); // block offset
> +        TOC[i].size = avio_rb32(pb); // block size
> +    }
> +    avio_skip(pb, 24); // header termination block (ignored)
> +    npairs = avio_rb32(pb); // read dictionary entries
> +    if (npairs > MAX_DICTIONARY_ENTRIES) {
> +        ret = AVERROR_INVALIDDATA;
> +        goto fail;
> +    }
> +    for (i = 0; i < npairs; i++) {
> +        memset(val, 0, sizeof(val));
> +        memset(key, 0, sizeof(key));
> +        avio_skip(pb, 1); // unidentified integer
> +        nkey = avio_rb32(pb); // key string length
> +        nval = avio_rb32(pb); // value string length
> +        if (nkey > sizeof(key)) {
> +            avio_skip(pb, nkey);
> +        } else {
> +            avio_read(pb, key, nkey); // key string
> +        }
> +        if (nval > sizeof(val)) {
> +            avio_skip(pb, nval);
> +        } else {
> +            avio_read(pb, val, nval); // value string
> +        }
> +        if (!strcmp(key, "codec")) {
> +            strncpy(codec_name, val, sizeof(codec_name) - 1);
> +        }
> +        if (!strcmp(key, "HeaderSeed")) {
> +            header_seed = atoi(val);
> +        }
> +        if (!strcmp(key, "HeaderKey")) {
> +            sscanf(val, "%d%d%d%d", &header_key.part[0], &header_key.part[1], &header_key.part[2], &header_key.part[3]);
> +            for (idx = 0; idx < 4; idx++) {
> +                header_key.part[idx] = AV_RB32(&header_key.part[idx]); // convert to BE!
> +            }
> +        }
> +    }
> +
> +    /* verify fixed key */
> +    if (c->aa_fixed_key_size != 16) {

This is zeroed during init but apparently never touched after that.
You should probably check the length of the AVOption aa_fixed_key instead.

> +        av_log(s, AV_LOG_FATAL, "[aa] aa_fixed_key value needs to be 16 bytes!\n");
> +        ret = AVERROR(EINVAL);
> +        goto fail;
> +    }
> +
> +    /* decryption key derivation */
> +    av_tea_init(c->tea_ctx, c->aa_fixed_key, 16);
> +    c->codec_second_size = get_second_size(codec_name);

You check for the codec name below again. Maybe you could combine both?

> +    if (c->codec_second_size == -1) {
> +        av_log(s, AV_LOG_FATAL, "[aa] unknown codec <%s>!\n", codec_name);

AV_LOG_ERROR is enough, and there's no need to add "[aa]" since you're passing the
AVFormatContext as argument.

> +        return AVERROR_INVALIDDATA;

goto fail, or move the tea alloc and init functions right below this.

> +    }
> +    output[0] = output[1] = 0; // purely for padding purposes
> +    memcpy(output + 2, &header_key, 16);
> +    idx = 0;
> +    for (i = 0; i < 3; i++) { // TEA CBC with weird mixed endianness
> +        AV_WB32(src, header_seed);
> +        AV_WB32(src + 4, header_seed + 1);
> +        header_seed += 2;
> +        av_tea_crypt(c->tea_ctx, dst, src, 1, NULL, 0); // TEA ECB encrypt
> +        for (j = 0; j < TEA_BLOCK_SIZE && idx < 18; j+=1, idx+=1) {
> +            output[idx] = output[idx] ^ dst[j];
> +        }
> +    }
> +    memcpy(c->file_key, output + 2, 16); // skip first 2 bytes of output
> +
> +    /* decoder setup */
> +    st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
> +    if (!strcmp(codec_name, "mp332")) {
> +        st->codec->codec_id = AV_CODEC_ID_MP3;
> +        st->codec->sample_rate = 22050;
> +        st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
> +        st->start_time = 0;
> +    } else if (!strcmp(codec_name, "acelp85")) {
> +        st->codec->codec_id = AV_CODEC_ID_SIPR;
> +        st->codec->block_align = 19;
> +        st->codec->channels = 1;
> +        st->codec->sample_rate = 8500;
> +    } else if (!strcmp(codec_name, "acelp16")) {
> +        st->codec->codec_id = AV_CODEC_ID_SIPR;
> +        st->codec->block_align = 20;
> +        st->codec->channels = 1;
> +        st->codec->sample_rate = 16000;
> +    }
> +
> +    /* determine, and jump to audio start offset */
> +    for (i = 1; i < toc_size; i++) { // skip the first entry!
> +        current_size = TOC[i].size;
> +        if (current_size > largest_size) {
> +            largest_idx = i;
> +            largest_size = current_size;
> +        }
> +    }
> +    start = TOC[largest_idx].offset;
> +    avio_seek(pb, start, SEEK_SET);
> +    c->current_chapter_size = 0;
> +
> +    return 0;
> +
> +fail:

If you move the tea_alloc() and tea_init() functions down this will not be needed
anymore.

> +    av_freep(&c->tea_ctx);
> +    av_free(st);
> +    return ret;
> +}
> +
> +static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
> +{
> +    uint8_t dst[TEA_BLOCK_SIZE];
> +    uint8_t src[TEA_BLOCK_SIZE];
> +    int i;
> +    int trailing_bytes;
> +    int blocks;
> +    uint8_t buf[MAX_CODEC_SECOND_SIZE * 2];
> +    int written = 0;
> +    int ret;
> +    AADemuxContext *c = s->priv_data;
> +
> +    // are we at the start of a chapter?
> +    if (c->current_chapter_size == 0) {
> +        c->current_chapter_size = avio_rb32(s->pb);
> +        if (c->current_chapter_size == 0) {
> +            return AVERROR_EOF;
> +        }
> +        av_log(s, AV_LOG_DEBUG, "[aa] chapter %d (%" PRId64 " bytes)\n", c->chapter_idx, c->current_chapter_size);

Same, no need for [aa].

> +        c->chapter_idx = c->chapter_idx + 1;
> +        avio_skip(s->pb, 4); // data start offset
> +        c->current_codec_second_size = c->codec_second_size;
> +    }
> +
> +    // is this the last block in this chapter?
> +    if (c->current_chapter_size / c->current_codec_second_size == 0) {
> +        c->current_codec_second_size = c->current_chapter_size % c->current_codec_second_size;
> +    }
> +
> +    // decrypt c->current_codec_second_size bytes
> +    blocks = c->current_codec_second_size / TEA_BLOCK_SIZE;
> +    for (i = 0; i < blocks; i++) {
> +        avio_read(s->pb, src, TEA_BLOCK_SIZE);
> +        av_tea_init(c->tea_ctx, c->file_key, 16);
> +        av_tea_crypt(c->tea_ctx, dst, src, 1, NULL, 1);
> +        memcpy(buf + written, dst, TEA_BLOCK_SIZE);
> +        written = written + TEA_BLOCK_SIZE;
> +    }
> +    trailing_bytes = c->current_codec_second_size % TEA_BLOCK_SIZE;
> +    if (trailing_bytes != 0) { // trailing bytes are left unencrypted!
> +        avio_read(s->pb, src, trailing_bytes);
> +        memcpy(buf + written, src, trailing_bytes);
> +        written = written + trailing_bytes;
> +    }
> +
> +    // update state
> +    c->current_chapter_size = c->current_chapter_size - c->current_codec_second_size;
> +    if (c->current_chapter_size <= 0)
> +        c->current_chapter_size = 0;
> +
> +    ret = av_new_packet(pkt, written);
> +    if (ret < 0)
> +        return ret;
> +    memcpy(pkt->data, buf, written);
> +
> +    return 0;
> +}
> +
> +static int aa_probe(AVProbeData *p)
> +{
> +    uint8_t *buf = p->buf;
> +
> +    // first 4 bytes are file size, next 4 bytes are the magic
> +    if (AV_RB32(buf+4) != AA_MAGIC)
> +        return 0;
> +
> +    return AVPROBE_SCORE_MAX / 2;
> +}
> +
> +static int aa_read_close(AVFormatContext *s)
> +{
> +    AADemuxContext *c = s->priv_data;
> +
> +    av_freep(&c->tea_ctx);
> +
> +    return 0;
> +}
> +
> +#define OFFSET(x) offsetof(AADemuxContext, x)
> +#define FLAGS AV_OPT_FLAG_DECODING_PARAM

You're defining this but not using it.

> +static const AVOption aa_options[] = {
> +    { "aa_fixed_key", // extracted from libAAX_SDK.so and AAXSDKWin.dll files!
> +        "Fixed key used for handling Audible AA files", OFFSET(aa_fixed_key),
> +        AV_OPT_TYPE_BINARY, {.str="77214d4b196a87cd520045fd2a51d673"},

This should probably be AV_OPT_TYPE_STRING.

> +        .flags = AV_OPT_FLAG_DECODING_PARAM },
> +    { NULL },
> +};
> +
> +static const AVClass aa_class = {
> +    .class_name = "aa",
> +    .item_name  = av_default_item_name,
> +    .option     = aa_options,
> +    .version    = LIBAVUTIL_VERSION_INT,
> +};
> +
> +AVInputFormat ff_aa_demuxer = {
> +    .name           = "aa",
> +    .long_name      = NULL_IF_CONFIG_SMALL("Audible AA format files"),
> +    .priv_class     = &aa_class,
> +    .priv_data_size = sizeof(AADemuxContext),
> +    .extensions     = "aa",
> +    .read_probe     = aa_probe,
> +    .read_header    = aa_read_header,
> +    .read_packet    = aa_read_packet,
> +    .read_close     = aa_read_close,
> +    .flags          = AVFMT_GENERIC_INDEX,
> +};
> diff --git a/libavformat/allformats.c b/libavformat/allformats.c
> index 181cb9e..0a24ac7 100644
> --- a/libavformat/allformats.c
> +++ b/libavformat/allformats.c
> @@ -60,6 +60,7 @@ void av_register_all(void)
>  
>      /* (de)muxers */
>      REGISTER_MUXER   (A64,              a64);
> +    REGISTER_DEMUXER (AA,               aa);
>      REGISTER_DEMUXER (AAC,              aac);
>      REGISTER_MUXDEMUX(AC3,              ac3);
>      REGISTER_DEMUXER (ACT,              act);
> -- 2.1.4

I'd say wait for more comments from other people before sending the patch again,
i probably missed some things.