[FFmpeg-devel] [PATCH 1/2] lavc/audiotoolboxdec: fix a number of config and timestamp issues
Rodger Combs
rodger.combs at gmail.com
Thu Mar 31 04:00:46 CEST 2016
- ADTS-formatted AAC didn't work
- Channel layouts were never exported
- Channel mappings were incorrect beyond stereo
- Channel counts weren't updated after packets were decoded
- Timestamps were exported incorrectly
---
libavcodec/audiotoolboxdec.c | 286 +++++++++++++++++++++++++++++++++----------
1 file changed, 221 insertions(+), 65 deletions(-)
diff --git a/libavcodec/audiotoolboxdec.c b/libavcodec/audiotoolboxdec.c
index 1fa6f16..4ff46ea 100644
--- a/libavcodec/audiotoolboxdec.c
+++ b/libavcodec/audiotoolboxdec.c
@@ -38,8 +38,9 @@ typedef struct ATDecodeContext {
AVPacket in_pkt;
AVPacket new_in_pkt;
AVBitStreamFilterContext *bsf;
+ char *decoded_data;
+ int channel_map[64];
- unsigned pkt_size;
int64_t last_pts;
int eof;
} ATDecodeContext;
@@ -81,20 +82,127 @@ static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
}
}
-static void ffat_update_ctx(AVCodecContext *avctx)
+static int ffat_get_channel_id(AudioChannelLabel label)
+{
+ if (label == 0)
+ return -1;
+ else if (label <= kAudioChannelLabel_LFEScreen)
+ return label - 1;
+ else if (label <= kAudioChannelLabel_RightSurround)
+ return label + 4;
+ else if (label <= kAudioChannelLabel_CenterSurround)
+ return label + 1;
+ else if (label <= kAudioChannelLabel_RightSurroundDirect)
+ return label + 23;
+ else if (label <= kAudioChannelLabel_TopBackRight)
+ return label - 1;
+ else if (label < kAudioChannelLabel_RearSurroundLeft)
+ return -1;
+ else if (label <= kAudioChannelLabel_RearSurroundRight)
+ return label - 29;
+ else if (label <= kAudioChannelLabel_RightWide)
+ return label - 4;
+ else if (label == kAudioChannelLabel_LFE2)
+ return ff_ctzll(AV_CH_LOW_FREQUENCY_2);
+ else if (label == kAudioChannelLabel_Mono)
+ return ff_ctzll(AV_CH_FRONT_CENTER);
+ else
+ return -1;
+}
+
+static int ffat_compare_channel_descriptions(const void* a, const void* b)
+{
+ const AudioChannelDescription* da = a;
+ const AudioChannelDescription* db = b;
+ return ffat_get_channel_id(da->mChannelLabel) - ffat_get_channel_id(db->mChannelLabel);
+}
+
+static AudioChannelLayout *ffat_convert_layout(AudioChannelLayout *layout, UInt32* size)
+{
+ AudioChannelLayoutTag tag = layout->mChannelLayoutTag;
+ AudioChannelLayout *new_layout;
+ if (tag == kAudioChannelLayoutTag_UseChannelDescriptions)
+ return layout;
+ else if (tag == kAudioChannelLayoutTag_UseChannelBitmap)
+ AudioFormatGetPropertyInfo(kAudioFormatProperty_ChannelLayoutForBitmap,
+ sizeof(UInt32), &layout->mChannelBitmap, size);
+ else
+ AudioFormatGetPropertyInfo(kAudioFormatProperty_ChannelLayoutForTag,
+ sizeof(AudioChannelLayoutTag), &tag, size);
+ new_layout = av_malloc(*size);
+ if (!new_layout) {
+ av_free(layout);
+ return NULL;
+ }
+ if (tag == kAudioChannelLayoutTag_UseChannelBitmap)
+ AudioFormatGetProperty(kAudioFormatProperty_ChannelLayoutForBitmap,
+ sizeof(UInt32), &layout->mChannelBitmap, size, new_layout);
+ else
+ AudioFormatGetProperty(kAudioFormatProperty_ChannelLayoutForTag,
+ sizeof(AudioChannelLayoutTag), &tag, size, new_layout);
+ new_layout->mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions;
+ av_free(layout);
+ return new_layout;
+}
+
+static int ffat_update_ctx(AVCodecContext *avctx)
{
ATDecodeContext *at = avctx->priv_data;
- AudioStreamBasicDescription in_format;
- UInt32 size = sizeof(in_format);
+ AudioStreamBasicDescription format;
+ UInt32 size = sizeof(format);
if (!AudioConverterGetProperty(at->converter,
kAudioConverterCurrentInputStreamDescription,
- &size, &in_format)) {
- avctx->channels = in_format.mChannelsPerFrame;
- at->pkt_size = in_format.mFramesPerPacket;
+ &size, &format)) {
+ if (format.mSampleRate)
+ avctx->sample_rate = format.mSampleRate;
+ avctx->channels = format.mChannelsPerFrame;
+ avctx->channel_layout = av_get_default_channel_layout(avctx->channels);
+ avctx->frame_size = format.mFramesPerPacket;
+ }
+
+ if (!AudioConverterGetProperty(at->converter,
+ kAudioConverterCurrentOutputStreamDescription,
+ &size, &format)) {
+ format.mSampleRate = avctx->sample_rate;
+ format.mChannelsPerFrame = avctx->channels;
+ AudioConverterSetProperty(at->converter,
+ kAudioConverterCurrentOutputStreamDescription,
+ size, &format);
+ }
+
+ if (!AudioConverterGetPropertyInfo(at->converter, kAudioConverterOutputChannelLayout,
+ &size, NULL) && size) {
+ AudioChannelLayout *layout = av_malloc(size);
+ uint64_t layout_mask = 0;
+ int i;
+ if (!layout)
+ return AVERROR(ENOMEM);
+ AudioConverterGetProperty(at->converter, kAudioConverterOutputChannelLayout,
+ &size, layout);
+ if (!(layout = ffat_convert_layout(layout, &size)))
+ return AVERROR(ENOMEM);
+ for (i = 0; i < layout->mNumberChannelDescriptions; i++) {
+ int id = ffat_get_channel_id(layout->mChannelDescriptions[i].mChannelLabel);
+ if (id < 0)
+ goto done;
+ if (layout_mask & (1 << id))
+ goto done;
+ layout_mask |= 1 << id;
+ layout->mChannelDescriptions[i].mChannelFlags = i; // Abusing flags as index
+ }
+ avctx->channel_layout = layout_mask;
+ qsort(layout->mChannelDescriptions, layout->mNumberChannelDescriptions,
+ sizeof(AudioChannelDescription), &ffat_compare_channel_descriptions);
+ for (i = 0; i < layout->mNumberChannelDescriptions; i++)
+ at->channel_map[i] = layout->mChannelDescriptions[i].mChannelFlags;
+done:
+ av_free(layout);
}
- if (!at->pkt_size)
- at->pkt_size = 2048;
+ if (!avctx->frame_size)
+ avctx->frame_size = 2048;
+
+ return 0;
}
static void put_descr(PutByteContext *pb, int tag, unsigned int size)
@@ -106,42 +214,11 @@ static void put_descr(PutByteContext *pb, int tag, unsigned int size)
bytestream2_put_byte(pb, size & 0x7F);
}
-static av_cold int ffat_init_decoder(AVCodecContext *avctx)
+static int ffat_set_extradata(AVCodecContext *avctx)
{
ATDecodeContext *at = avctx->priv_data;
- OSStatus status;
-
- enum AVSampleFormat sample_fmt = (avctx->bits_per_raw_sample == 32) ?
- AV_SAMPLE_FMT_S32 : AV_SAMPLE_FMT_S16;
-
- AudioStreamBasicDescription in_format = {
- .mSampleRate = avctx->sample_rate ? avctx->sample_rate : 44100,
- .mFormatID = ffat_get_format_id(avctx->codec_id, avctx->profile),
- .mBytesPerPacket = avctx->block_align,
- .mChannelsPerFrame = avctx->channels ? avctx->channels : 1,
- };
- AudioStreamBasicDescription out_format = {
- .mSampleRate = in_format.mSampleRate,
- .mFormatID = kAudioFormatLinearPCM,
- .mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked,
- .mFramesPerPacket = 1,
- .mChannelsPerFrame = in_format.mChannelsPerFrame,
- .mBitsPerChannel = av_get_bytes_per_sample(sample_fmt) * 8,
- };
-
- avctx->sample_fmt = sample_fmt;
-
- if (avctx->codec_id == AV_CODEC_ID_ADPCM_IMA_QT)
- in_format.mFramesPerPacket = 64;
-
- status = AudioConverterNew(&in_format, &out_format, &at->converter);
-
- if (status != 0) {
- av_log(avctx, AV_LOG_ERROR, "AudioToolbox init error: %i\n", (int)status);
- return AVERROR_UNKNOWN;
- }
-
if (avctx->extradata_size) {
+ OSStatus status;
char *extradata = avctx->extradata;
int extradata_size = avctx->extradata_size;
if (avctx->codec_id == AV_CODEC_ID_AAC) {
@@ -180,15 +257,74 @@ static av_cold int ffat_init_decoder(AVCodecContext *avctx)
extradata_size, extradata);
if (status != 0)
av_log(avctx, AV_LOG_WARNING, "AudioToolbox cookie error: %i\n", (int)status);
+
+ if (avctx->codec_id == AV_CODEC_ID_AAC)
+ av_free(extradata);
+ }
+ return 0;
+}
+
+static av_cold int ffat_create_decoder(AVCodecContext *avctx)
+{
+ ATDecodeContext *at = avctx->priv_data;
+ OSStatus status;
+ int i;
+
+ enum AVSampleFormat sample_fmt = (avctx->bits_per_raw_sample == 32) ?
+ AV_SAMPLE_FMT_S32 : AV_SAMPLE_FMT_S16;
+
+ AudioStreamBasicDescription in_format = {
+ .mSampleRate = avctx->sample_rate ? avctx->sample_rate : 44100,
+ .mFormatID = ffat_get_format_id(avctx->codec_id, avctx->profile),
+ .mBytesPerPacket = avctx->block_align,
+ .mChannelsPerFrame = avctx->channels ? avctx->channels : 1,
+ };
+ AudioStreamBasicDescription out_format = {
+ .mSampleRate = in_format.mSampleRate,
+ .mFormatID = kAudioFormatLinearPCM,
+ .mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked,
+ .mFramesPerPacket = 1,
+ .mChannelsPerFrame = in_format.mChannelsPerFrame,
+ .mBitsPerChannel = av_get_bytes_per_sample(sample_fmt) * 8,
+ };
+
+ avctx->sample_fmt = sample_fmt;
+
+ if (avctx->codec_id == AV_CODEC_ID_ADPCM_IMA_QT)
+ in_format.mFramesPerPacket = 64;
+
+ status = AudioConverterNew(&in_format, &out_format, &at->converter);
+
+ if (status != 0) {
+ av_log(avctx, AV_LOG_ERROR, "AudioToolbox init error: %i\n", (int)status);
+ return AVERROR_UNKNOWN;
}
+ if ((status = ffat_set_extradata(avctx)) < 0)
+ return status;
+
+ for (i = 0; i < (sizeof(at->channel_map) / sizeof(at->channel_map[0])); i++)
+ at->channel_map[i] = i;
+
ffat_update_ctx(avctx);
+ if(!(at->decoded_data = av_malloc(av_get_bytes_per_sample(avctx->sample_fmt)
+ * avctx->frame_size * avctx->channels)))
+ return AVERROR(ENOMEM);
+
at->last_pts = AV_NOPTS_VALUE;
return 0;
}
+static av_cold int ffat_init_decoder(AVCodecContext *avctx)
+{
+ if (avctx->channels || avctx->extradata_size)
+ return ffat_create_decoder(avctx);
+ else
+ return 0;
+}
+
static OSStatus ffat_decode_callback(AudioConverterRef converter, UInt32 *nb_packets,
AudioBufferList *data,
AudioStreamPacketDescription **packets,
@@ -229,6 +365,26 @@ static OSStatus ffat_decode_callback(AudioConverterRef converter, UInt32 *nb_pac
return 0;
}
+#define COPY_SAMPLES(type) \
+ type *in_ptr = (type*)at->decoded_data; \
+ type *end_ptr = in_ptr + frame->nb_samples * avctx->channels; \
+ type *out_ptr = (type*)frame->data[0]; \
+ for (; in_ptr < end_ptr; in_ptr += avctx->channels, out_ptr += avctx->channels) { \
+ int c; \
+ for (c = 0; c < avctx->channels; c++) \
+ out_ptr[c] = in_ptr[at->channel_map[c]]; \
+ }
+
+static void ffat_copy_samples(AVCodecContext *avctx, AVFrame *frame)
+{
+ ATDecodeContext *at = avctx->priv_data;
+ if (avctx->sample_fmt == AV_SAMPLE_FMT_S32) {
+ COPY_SAMPLES(int32_t);
+ } else {
+ COPY_SAMPLES(int16_t);
+ }
+}
+
static int ffat_decode(AVCodecContext *avctx, void *data,
int *got_frame_ptr, AVPacket *avpkt)
{
@@ -237,24 +393,13 @@ static int ffat_decode(AVCodecContext *avctx, void *data,
int pkt_size = avpkt->size;
AVPacket filtered_packet;
OSStatus ret;
-
- AudioBufferList out_buffers = {
- .mNumberBuffers = 1,
- .mBuffers = {
- {
- .mNumberChannels = avctx->channels,
- .mDataByteSize = av_get_bytes_per_sample(avctx->sample_fmt) * at->pkt_size * avctx->channels,
- }
- }
- };
+ AudioBufferList out_buffers;
if (avctx->codec_id == AV_CODEC_ID_AAC && avpkt->size > 2 &&
(AV_RB16(avpkt->data) & 0xfff0) == 0xfff0) {
- int first = 0;
uint8_t *p_filtered = NULL;
int n_filtered = 0;
if (!at->bsf) {
- first = 1;
if(!(at->bsf = av_bitstream_filter_init("aac_adtstoasc")))
return AVERROR(ENOMEM);
}
@@ -267,16 +412,24 @@ static int ffat_decode(AVCodecContext *avctx, void *data,
avpkt->data = p_filtered;
avpkt->size = n_filtered;
}
+ }
- if (first) {
- if ((ret = ffat_set_extradata(avctx)) < 0)
- return ret;
- ffat_update_ctx(avctx);
- out_buffers.mBuffers[0].mNumberChannels = avctx->channels;
- out_buffers.mBuffers[0].mDataByteSize = av_get_bytes_per_sample(avctx->sample_fmt) * at->pkt_size * avctx->channels;
- }
+ if (!at->converter) {
+ if ((ret = ffat_create_decoder(avctx)) < 0)
+ return ret;
}
+ out_buffers = (AudioBufferList){
+ .mNumberBuffers = 1,
+ .mBuffers = {
+ {
+ .mNumberChannels = avctx->channels,
+ .mDataByteSize = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->frame_size
+ * avctx->channels,
+ }
+ }
+ };
+
av_packet_unref(&at->new_in_pkt);
if (avpkt->size) {
@@ -289,17 +442,19 @@ static int ffat_decode(AVCodecContext *avctx, void *data,
frame->sample_rate = avctx->sample_rate;
- frame->nb_samples = at->pkt_size;
- ff_get_buffer(avctx, frame, 0);
+ frame->nb_samples = avctx->frame_size;
- out_buffers.mBuffers[0].mData = frame->data[0];
+ out_buffers.mBuffers[0].mData = at->decoded_data;
ret = AudioConverterFillComplexBuffer(at->converter, ffat_decode_callback, avctx,
&frame->nb_samples, &out_buffers, NULL);
if ((!ret || ret == 1) && frame->nb_samples) {
+ if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+ return ret;
+ ffat_copy_samples(avctx, frame);
*got_frame_ptr = 1;
if (at->last_pts != AV_NOPTS_VALUE) {
- frame->pts = at->last_pts;
+ frame->pkt_pts = at->last_pts;
at->last_pts = avpkt->pts;
}
} else if (ret && ret != 1) {
@@ -325,6 +480,7 @@ static av_cold int ffat_close_decoder(AVCodecContext *avctx)
AudioConverterDispose(at->converter);
av_packet_unref(&at->new_in_pkt);
av_packet_unref(&at->in_pkt);
+ av_free(at->decoded_data);
return 0;
}
--
2.7.3
More information about the ffmpeg-devel
mailing list