[FFmpeg-devel] [PATCH 6/6] lavc/audiotoolboxdec: fix a number of config and timestamp issues

Sun Mar 27 19:20:27 CEST 2016

- ADTS-formatted AAC didn't work
- Channel layouts were never exported
- Channel mappings were incorrect beyond stereo
- Channel counts weren't updated after packets were decoded
- Timestamps were exported incorrectly
---
 libavcodec/audiotoolboxdec.c | 248 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 185 insertions(+), 63 deletions(-)

diff --git a/libavcodec/audiotoolboxdec.c b/libavcodec/audiotoolboxdec.c
index 1fa6f16..d95fc0f 100644
--- a/libavcodec/audiotoolboxdec.c
+++ b/libavcodec/audiotoolboxdec.c
@@ -39,7 +39,6 @@ typedef struct ATDecodeContext {
     AVPacket new_in_pkt;
     AVBitStreamFilterContext *bsf;
 
-    unsigned pkt_size;
     int64_t last_pts;
     int eof;
 } ATDecodeContext;
@@ -81,20 +80,126 @@ static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
     }
 }
 
-static void ffat_update_ctx(AVCodecContext *avctx)
+static int ffat_get_channel_id(AudioChannelLabel label)
+{
+    if (label == 0)
+        return -1;
+    else if (label <= kAudioChannelLabel_LFEScreen)
+        return label - 1;
+    else if (label <= kAudioChannelLabel_RightSurround)
+        return label + 4;
+    else if (label <= kAudioChannelLabel_CenterSurround)
+        return label + 1;
+    else if (label <= kAudioChannelLabel_RightSurroundDirect)
+        return label + 23;
+    else if (label <= kAudioChannelLabel_TopBackRight)
+        return label - 1;
+    else if (label < kAudioChannelLabel_RearSurroundLeft)
+        return -1;
+    else if (label <= kAudioChannelLabel_RearSurroundRight)
+        return label - 29;
+    else if (label <= kAudioChannelLabel_RightWide)
+        return label - 4;
+    else if (label == kAudioChannelLabel_LFE2)
+        return ff_ctzll(AV_CH_LOW_FREQUENCY_2);
+    else if (label == kAudioChannelLabel_Mono)
+        return ff_ctzll(AV_CH_FRONT_CENTER);
+    else
+        return -1;
+}
+
+static int ffat_compare_channel_descriptions(const void* a, const void* b)
+{
+    const AudioChannelDescription* da = a;
+    const AudioChannelDescription* db = b;
+    return ffat_get_channel_id(da->mChannelLabel) - ffat_get_channel_id(db->mChannelLabel);
+}
+
+static AudioChannelLayout *ffat_convert_layout(AudioChannelLayout *layout, UInt32* size)
+{
+    AudioChannelLayoutTag tag = layout->mChannelLayoutTag;
+    AudioChannelLayout *new_layout;
+    if (tag == kAudioChannelLayoutTag_UseChannelDescriptions)
+        return layout;
+    else if (tag == kAudioChannelLayoutTag_UseChannelBitmap)
+        AudioFormatGetPropertyInfo(kAudioFormatProperty_ChannelLayoutForBitmap,
+                                   sizeof(UInt32), &layout->mChannelBitmap, size);
+    else
+        AudioFormatGetPropertyInfo(kAudioFormatProperty_ChannelLayoutForTag,
+                                   sizeof(AudioChannelLayoutTag), &tag, size);
+    new_layout = av_malloc(*size);
+    if (!new_layout) {
+        av_free(layout);
+        return NULL;
+    }
+    if (tag == kAudioChannelLayoutTag_UseChannelBitmap)
+        AudioFormatGetProperty(kAudioFormatProperty_ChannelLayoutForBitmap,
+                               sizeof(UInt32), &layout->mChannelBitmap, size, new_layout);
+    else
+        AudioFormatGetProperty(kAudioFormatProperty_ChannelLayoutForTag,
+                               sizeof(AudioChannelLayoutTag), &tag, size, new_layout);
+    new_layout->mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions;
+    av_free(layout);
+    return new_layout;
+}
+
+static int ffat_update_ctx(AVCodecContext *avctx)
 {
     ATDecodeContext *at = avctx->priv_data;
-    AudioStreamBasicDescription in_format;
-    UInt32 size = sizeof(in_format);
+    AudioStreamBasicDescription format;
+    UInt32 size = sizeof(format);
     if (!AudioConverterGetProperty(at->converter,
                                    kAudioConverterCurrentInputStreamDescription,
-                                   &size, &in_format)) {
-        avctx->channels = in_format.mChannelsPerFrame;
-        at->pkt_size = in_format.mFramesPerPacket;
+                                   &size, &format)) {
+        if (format.mSampleRate)
+            avctx->sample_rate = format.mSampleRate;
+        avctx->channels = format.mChannelsPerFrame;
+        avctx->channel_layout = av_get_default_channel_layout(avctx->channels);
+        avctx->frame_size = format.mFramesPerPacket;
     }
 
-    if (!at->pkt_size)
-        at->pkt_size = 2048;
+    if (!AudioConverterGetProperty(at->converter,
+                                   kAudioConverterCurrentOutputStreamDescription,
+                                   &size, &format)) {
+        format.mSampleRate = avctx->sample_rate;
+        format.mChannelsPerFrame = avctx->channels;
+        AudioConverterSetProperty(at->converter,
+                                  kAudioConverterCurrentOutputStreamDescription,
+                                  size, &format);
+    }
+
+    if (!AudioConverterGetPropertyInfo(at->converter, kAudioConverterInputChannelLayout,
+                                       &size, NULL) && size) {
+        AudioChannelLayout *layout = av_malloc(size);
+        uint64_t layout_mask = 0;
+        int i;
+        if (!layout)
+            return AVERROR(ENOMEM);
+        AudioConverterGetProperty(at->converter, kAudioConverterInputChannelLayout,
+                                  &size, layout);
+        if (!(layout = ffat_convert_layout(layout, &size)))
+            return AVERROR(ENOMEM);
+        for (i = 0; i < layout->mNumberChannelDescriptions; i++) {
+            int id = ffat_get_channel_id(layout->mChannelDescriptions[i].mChannelLabel);
+            if (id < 0)
+                goto done;
+            layout_mask |= 1 << id;
+        }
+        qsort(layout->mChannelDescriptions, layout->mNumberChannelDescriptions,
+              sizeof(AudioChannelDescription), &ffat_compare_channel_descriptions);
+        if (AudioConverterSetProperty(at->converter, kAudioConverterOutputChannelLayout,
+                                      size, layout))
+            av_log(avctx, AV_LOG_WARNING, "Couldn't remap channel layout\n");
+        else
+            avctx->channel_layout = layout_mask;
+done:
+        av_free(layout);
+    }
+
+    if (!avctx->frame_size)
+        avctx->frame_size = 2048;
+
+    return 0;
 }
 
 static void put_descr(PutByteContext *pb, int tag, unsigned int size)
@@ -106,42 +211,11 @@ static void put_descr(PutByteContext *pb, int tag, unsigned int size)
     bytestream2_put_byte(pb, size & 0x7F);
 }
 
-static av_cold int ffat_init_decoder(AVCodecContext *avctx)
+static int ffat_set_extradata(AVCodecContext *avctx)
 {
     ATDecodeContext *at = avctx->priv_data;
-    OSStatus status;
-
-    enum AVSampleFormat sample_fmt = (avctx->bits_per_raw_sample == 32) ?
-                                     AV_SAMPLE_FMT_S32 : AV_SAMPLE_FMT_S16;
-
-    AudioStreamBasicDescription in_format = {
-        .mSampleRate = avctx->sample_rate ? avctx->sample_rate : 44100,
-        .mFormatID = ffat_get_format_id(avctx->codec_id, avctx->profile),
-        .mBytesPerPacket = avctx->block_align,
-        .mChannelsPerFrame = avctx->channels ? avctx->channels : 1,
-    };
-    AudioStreamBasicDescription out_format = {
-        .mSampleRate = in_format.mSampleRate,
-        .mFormatID = kAudioFormatLinearPCM,
-        .mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked,
-        .mFramesPerPacket = 1,
-        .mChannelsPerFrame = in_format.mChannelsPerFrame,
-        .mBitsPerChannel = av_get_bytes_per_sample(sample_fmt) * 8,
-    };
-
-    avctx->sample_fmt = sample_fmt;
-
-    if (avctx->codec_id == AV_CODEC_ID_ADPCM_IMA_QT)
-        in_format.mFramesPerPacket = 64;
-
-    status = AudioConverterNew(&in_format, &out_format, &at->converter);
-
-    if (status != 0) {
-        av_log(avctx, AV_LOG_ERROR, "AudioToolbox init error: %i\n", (int)status);
-        return AVERROR_UNKNOWN;
-    }
-
     if (avctx->extradata_size) {
+        OSStatus status;
         char *extradata = avctx->extradata;
         int extradata_size = avctx->extradata_size;
         if (avctx->codec_id == AV_CODEC_ID_AAC) {
@@ -180,7 +254,50 @@ static av_cold int ffat_init_decoder(AVCodecContext *avctx)
                                            extradata_size, extradata);
         if (status != 0)
             av_log(avctx, AV_LOG_WARNING, "AudioToolbox cookie error: %i\n", (int)status);
+
+        if (avctx->codec_id == AV_CODEC_ID_AAC)
+            av_free(extradata);
     }
+    return 0;
+}
+
+static av_cold int ffat_create_decoder(AVCodecContext *avctx)
+{
+    ATDecodeContext *at = avctx->priv_data;
+    OSStatus status;
+
+    enum AVSampleFormat sample_fmt = (avctx->bits_per_raw_sample == 32) ?
+                                     AV_SAMPLE_FMT_S32 : AV_SAMPLE_FMT_S16;
+
+    AudioStreamBasicDescription in_format = {
+        .mSampleRate = avctx->sample_rate ? avctx->sample_rate : 44100,
+        .mFormatID = ffat_get_format_id(avctx->codec_id, avctx->profile),
+        .mBytesPerPacket = avctx->block_align,
+        .mChannelsPerFrame = avctx->channels ? avctx->channels : 1,
+    };
+    AudioStreamBasicDescription out_format = {
+        .mSampleRate = in_format.mSampleRate,
+        .mFormatID = kAudioFormatLinearPCM,
+        .mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked,
+        .mFramesPerPacket = 1,
+        .mChannelsPerFrame = in_format.mChannelsPerFrame,
+        .mBitsPerChannel = av_get_bytes_per_sample(sample_fmt) * 8,
+    };
+
+    avctx->sample_fmt = sample_fmt;
+
+    if (avctx->codec_id == AV_CODEC_ID_ADPCM_IMA_QT)
+        in_format.mFramesPerPacket = 64;
+
+    status = AudioConverterNew(&in_format, &out_format, &at->converter);
+
+    if (status != 0) {
+        av_log(avctx, AV_LOG_ERROR, "AudioToolbox init error: %i\n", (int)status);
+        return AVERROR_UNKNOWN;
+    }
+
+    if ((status = ffat_set_extradata(avctx)) < 0)
+        return status;
 
     ffat_update_ctx(avctx);
 
@@ -189,6 +306,14 @@ static av_cold int ffat_init_decoder(AVCodecContext *avctx)
     return 0;
 }
 
+static av_cold int ffat_init_decoder(AVCodecContext *avctx)
+{
+    if (avctx->channels || avctx->extradata_size)
+        return ffat_create_decoder(avctx);
+    else
+        return 0;
+}
+
 static OSStatus ffat_decode_callback(AudioConverterRef converter, UInt32 *nb_packets,
                                      AudioBufferList *data,
                                      AudioStreamPacketDescription **packets,
@@ -237,24 +362,13 @@ static int ffat_decode(AVCodecContext *avctx, void *data,
     int pkt_size = avpkt->size;
     AVPacket filtered_packet;
     OSStatus ret;
-
-    AudioBufferList out_buffers = {
-        .mNumberBuffers = 1,
-        .mBuffers = {
-            {
-                .mNumberChannels = avctx->channels,
-                .mDataByteSize = av_get_bytes_per_sample(avctx->sample_fmt) * at->pkt_size * avctx->channels,
-            }
-        }
-    };
+    AudioBufferList out_buffers;
 
     if (avctx->codec_id == AV_CODEC_ID_AAC && avpkt->size > 2 &&
         (AV_RB16(avpkt->data) & 0xfff0) == 0xfff0) {
-        int first = 0;
         uint8_t *p_filtered = NULL;
         int      n_filtered = 0;
         if (!at->bsf) {
-            first = 1;
             if(!(at->bsf = av_bitstream_filter_init("aac_adtstoasc")))
                 return AVERROR(ENOMEM);
         }
@@ -267,16 +381,24 @@ static int ffat_decode(AVCodecContext *avctx, void *data,
             avpkt->data = p_filtered;
             avpkt->size = n_filtered;
         }
+    }
 
-        if (first) {
-            if ((ret = ffat_set_extradata(avctx)) < 0)
-                return ret;
-            ffat_update_ctx(avctx);
-            out_buffers.mBuffers[0].mNumberChannels = avctx->channels;
-            out_buffers.mBuffers[0].mDataByteSize = av_get_bytes_per_sample(avctx->sample_fmt) * at->pkt_size * avctx->channels;
-        }
+    if (!at->converter) {
+        if ((ret = ffat_create_decoder(avctx)) < 0)
+            return ret;
     }
 
+    out_buffers = (AudioBufferList){
+        .mNumberBuffers = 1,
+        .mBuffers = {
+            {
+                .mNumberChannels = avctx->channels,
+                .mDataByteSize = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->frame_size
+                                 * avctx->channels,
+            }
+        }
+    };
+
     av_packet_unref(&at->new_in_pkt);
 
     if (avpkt->size) {
@@ -289,7 +411,7 @@ static int ffat_decode(AVCodecContext *avctx, void *data,
 
     frame->sample_rate = avctx->sample_rate;
 
-    frame->nb_samples = at->pkt_size;
+    frame->nb_samples = avctx->frame_size;
     ff_get_buffer(avctx, frame, 0);
 
     out_buffers.mBuffers[0].mData = frame->data[0];
@@ -299,7 +421,7 @@ static int ffat_decode(AVCodecContext *avctx, void *data,
     if ((!ret || ret == 1) && frame->nb_samples) {
         *got_frame_ptr = 1;
         if (at->last_pts != AV_NOPTS_VALUE) {
-            frame->pts = at->last_pts;
+            frame->pkt_pts = at->last_pts;
             at->last_pts = avpkt->pts;
         }
     } else if (ret && ret != 1) {
-- 
2.7.3