[FFmpeg-devel] [PATCH v5 1/3] libavdevice/avfoundation.m: use AudioConvert, extend supported formats
Romain Beauxis
toots at rastageeks.org
Fri Dec 17 17:58:29 EET 2021
(Apologies for the previously mangled message, I’m still honing my tools here!)
This is the first patch of a series of 3 that cleanup and enhance the
avfoundation implementation for libavdevice.
Changes:
* v2: None
* v3: None
* v4: None
* v5: Fix indentation/wrapping
* Implement support for AudioConverter
* Switch to AudioConverter's API to convert unsupported PCM
formats (non-interleaved, non-packed) to supported formats
* Minimize data copy.
This fixes: https://trac.ffmpeg.org/ticket/9502
API ref:
https://developer.apple.com/documentation/audiotoolbox/audio_converter_services
Signed-off-by: Romain Beauxis <toots at rastageeks.org>
---
libavdevice/avfoundation.m | 250 +++++++++++++++++++++----------------
1 file changed, 144 insertions(+), 106 deletions(-)
diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
index 0cd6e646d5..79c9207cfa 100644
--- a/libavdevice/avfoundation.m
+++ b/libavdevice/avfoundation.m
@@ -111,16 +111,10 @@
int num_video_devices;
- int audio_channels;
- int audio_bits_per_sample;
- int audio_float;
- int audio_be;
- int audio_signed_integer;
- int audio_packed;
- int audio_non_interleaved;
-
- int32_t *audio_buffer;
- int audio_buffer_size;
+ UInt32 audio_buffers;
+ UInt32 audio_channels;
+ UInt32 bytes_per_sample;
+ AudioConverterRef audio_converter;
enum AVPixelFormat pixel_format;
@@ -299,7 +293,10 @@ static void destroy_context(AVFContext* ctx)
ctx->avf_delegate = NULL;
ctx->avf_audio_delegate = NULL;
- av_freep(&ctx->audio_buffer);
+ if (ctx->audio_converter) {
+ AudioConverterDispose(ctx->audio_converter);
+ ctx->audio_converter = NULL;
+ }
pthread_mutex_destroy(&ctx->frame_lock);
@@ -673,6 +670,10 @@ static int get_audio_config(AVFormatContext *s)
AVFContext *ctx = (AVFContext*)s->priv_data;
CMFormatDescriptionRef format_desc;
AVStream* stream = avformat_new_stream(s, NULL);
+ AudioStreamBasicDescription output_format = {0};
+ int audio_bits_per_sample, audio_float, audio_be;
+ int audio_signed_integer, audio_packed, audio_non_interleaved;
+ int must_convert = 0;
if (!stream) {
return 1;
@@ -690,60 +691,95 @@ static int get_audio_config(AVFormatContext *s)
avpriv_set_pts_info(stream, 64, 1, avf_time_base);
format_desc = CMSampleBufferGetFormatDescription(ctx->current_audio_frame);
- const AudioStreamBasicDescription *basic_desc = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
+ const AudioStreamBasicDescription *input_format = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
- if (!basic_desc) {
+ if (!input_format) {
unlock_frames(ctx);
av_log(s, AV_LOG_ERROR, "audio format not available\n");
return 1;
}
+ if (input_format->mFormatID != kAudioFormatLinearPCM) {
+ unlock_frames(ctx);
+ av_log(s, AV_LOG_ERROR, "only PCM audio format are supported at the moment\n");
+ return 1;
+ }
+
stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
- stream->codecpar->sample_rate = basic_desc->mSampleRate;
- stream->codecpar->channels = basic_desc->mChannelsPerFrame;
+ stream->codecpar->sample_rate = input_format->mSampleRate;
+ stream->codecpar->channels = input_format->mChannelsPerFrame;
stream->codecpar->channel_layout = av_get_default_channel_layout(stream->codecpar->channels);
- ctx->audio_channels = basic_desc->mChannelsPerFrame;
- ctx->audio_bits_per_sample = basic_desc->mBitsPerChannel;
- ctx->audio_float = basic_desc->mFormatFlags & kAudioFormatFlagIsFloat;
- ctx->audio_be = basic_desc->mFormatFlags & kAudioFormatFlagIsBigEndian;
- ctx->audio_signed_integer = basic_desc->mFormatFlags & kAudioFormatFlagIsSignedInteger;
- ctx->audio_packed = basic_desc->mFormatFlags & kAudioFormatFlagIsPacked;
- ctx->audio_non_interleaved = basic_desc->mFormatFlags & kAudioFormatFlagIsNonInterleaved;
-
- if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
- ctx->audio_float &&
- ctx->audio_bits_per_sample == 32 &&
- ctx->audio_packed) {
- stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE;
- } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
- ctx->audio_signed_integer &&
- ctx->audio_bits_per_sample == 16 &&
- ctx->audio_packed) {
- stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE;
- } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
- ctx->audio_signed_integer &&
- ctx->audio_bits_per_sample == 24 &&
- ctx->audio_packed) {
- stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE;
- } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
- ctx->audio_signed_integer &&
- ctx->audio_bits_per_sample == 32 &&
- ctx->audio_packed) {
- stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
+ audio_bits_per_sample = input_format->mBitsPerChannel;
+ audio_float = input_format->mFormatFlags & kAudioFormatFlagIsFloat;
+ audio_be = input_format->mFormatFlags & kAudioFormatFlagIsBigEndian;
+ audio_signed_integer = input_format->mFormatFlags & kAudioFormatFlagIsSignedInteger;
+ audio_packed = input_format->mFormatFlags & kAudioFormatFlagIsPacked;
+ audio_non_interleaved = input_format->mFormatFlags & kAudioFormatFlagIsNonInterleaved;
+
+ ctx->bytes_per_sample = input_format->mBitsPerChannel >> 3;
+ ctx->audio_channels = input_format->mChannelsPerFrame;
+
+ if (audio_non_interleaved) {
+ ctx->audio_buffers = input_format->mChannelsPerFrame;
} else {
- unlock_frames(ctx);
- av_log(s, AV_LOG_ERROR, "audio format is not supported\n");
- return 1;
+ ctx->audio_buffers = 1;
+ }
+
+ if (audio_non_interleaved || !audio_packed) {
+ must_convert = 1;
+ }
+
+ output_format.mBitsPerChannel = input_format->mBitsPerChannel;
+ output_format.mChannelsPerFrame = ctx->audio_channels;
+ output_format.mFramesPerPacket = 1;
+ output_format.mBytesPerFrame = output_format.mChannelsPerFrame * ctx->bytes_per_sample;
+ output_format.mBytesPerPacket = output_format.mFramesPerPacket * output_format.mBytesPerFrame;
+ output_format.mFormatFlags = kAudioFormatFlagIsPacked | audio_be;
+ output_format.mFormatID = kAudioFormatLinearPCM;
+ output_format.mReserved = 0;
+ output_format.mSampleRate = input_format->mSampleRate;
+
+ if (audio_float &&
+ audio_bits_per_sample == 32) {
+ stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE;
+ output_format.mFormatFlags |= kAudioFormatFlagIsFloat;
+ } else if (audio_float &&
+ audio_bits_per_sample == 64) {
+ stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F64BE : AV_CODEC_ID_PCM_F64LE;
+ output_format.mFormatFlags |= kAudioFormatFlagIsFloat;
+ } else if (audio_signed_integer &&
+ audio_bits_per_sample == 8) {
+ stream->codecpar->codec_id = AV_CODEC_ID_PCM_S8;
+ output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
+ } else if (audio_signed_integer &&
+ audio_bits_per_sample == 16) {
+ stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE;
+ output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
+ } else if (audio_signed_integer &&
+ audio_bits_per_sample == 24) {
+ stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE;
+ output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
+ } else if (audio_signed_integer &&
+ audio_bits_per_sample == 32) {
+ stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
+ output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
+ } else if (audio_signed_integer &&
+ audio_bits_per_sample == 64) {
+ stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S64BE : AV_CODEC_ID_PCM_S64LE;
+ output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
+ } else {
+ stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
+ output_format.mBitsPerChannel = 32;
+ output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
+ must_convert = 1;
}
- if (ctx->audio_non_interleaved) {
- CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
- ctx->audio_buffer_size = CMBlockBufferGetDataLength(block_buffer);
- ctx->audio_buffer = av_malloc(ctx->audio_buffer_size);
- if (!ctx->audio_buffer) {
+ if (must_convert) {
+ OSStatus ret = AudioConverterNew(input_format, &output_format, &ctx->audio_converter);
+ if (ret != noErr) {
unlock_frames(ctx);
- av_log(s, AV_LOG_ERROR, "error allocating audio buffer\n");
+ av_log(s, AV_LOG_ERROR, "Error while allocating audio converter\n");
return 1;
}
}
@@ -1048,6 +1084,7 @@ static int copy_cvpixelbuffer(AVFormatContext *s,
static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
{
+ OSStatus ret;
AVFContext* ctx = (AVFContext*)s->priv_data;
do {
@@ -1091,7 +1128,7 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
status = copy_cvpixelbuffer(s, image_buffer, pkt);
} else {
status = 0;
- OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
+ ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
if (ret != kCMBlockBufferNoErr) {
status = AVERROR(EIO);
}
@@ -1105,82 +1142,83 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
}
} else if (ctx->current_audio_frame != nil) {
CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
- int block_buffer_size = CMBlockBufferGetDataLength(block_buffer);
- if (!block_buffer || !block_buffer_size) {
- unlock_frames(ctx);
- return AVERROR(EIO);
- }
+ size_t input_size = CMBlockBufferGetDataLength(block_buffer);
+ int buffer_size = input_size / ctx->audio_buffers;
+ int nb_samples = input_size / (ctx->audio_channels * ctx->bytes_per_sample);
+ int output_size = buffer_size;
- if (ctx->audio_non_interleaved && block_buffer_size > ctx->audio_buffer_size) {
+ UInt32 size = sizeof(output_size);
+ ret = AudioConverterGetProperty(ctx->audio_converter, kAudioConverterPropertyCalculateOutputBufferSize, &size, &output_size);
+ if (ret != noErr) {
unlock_frames(ctx);
- return AVERROR_BUFFER_TOO_SMALL;
+ return AVERROR(EIO);
}
- if (av_new_packet(pkt, block_buffer_size) < 0) {
+ if (av_new_packet(pkt, output_size) < 0) {
unlock_frames(ctx);
return AVERROR(EIO);
}
- CMItemCount count;
- CMSampleTimingInfo timing_info;
+ if (ctx->audio_converter) {
+ size_t input_buffer_size = offsetof(AudioBufferList, mBuffers[0]) + (sizeof(AudioBuffer) * ctx->audio_buffers);
+ AudioBufferList *input_buffer = av_malloc(input_buffer_size);
- if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, &timing_info, &count) == noErr) {
- AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale);
- pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q);
- }
+ input_buffer->mNumberBuffers = ctx->audio_buffers;
- pkt->stream_index = ctx->audio_stream_index;
- pkt->flags |= AV_PKT_FLAG_KEY;
+ for (int c = 0; c < ctx->audio_buffers; c++) {
+ input_buffer->mBuffers[c].mNumberChannels = 1;
- if (ctx->audio_non_interleaved) {
- int sample, c, shift, num_samples;
+ ret = CMBlockBufferGetDataPointer(block_buffer, c * buffer_size, (size_t *)&input_buffer->mBuffers[c].mDataByteSize, NULL, (void *)&input_buffer->mBuffers[c].mData);
- OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, ctx->audio_buffer);
- if (ret != kCMBlockBufferNoErr) {
- unlock_frames(ctx);
- return AVERROR(EIO);
+ if (ret != kCMBlockBufferNoErr) {
+ av_free(input_buffer);
+ unlock_frames(ctx);
+ return AVERROR(EIO);
+ }
}
- num_samples = pkt->size / (ctx->audio_channels * (ctx->audio_bits_per_sample >> 3));
-
- // transform decoded frame into output format
- #define INTERLEAVE_OUTPUT(bps) \
- { \
- int##bps##_t **src; \
- int##bps##_t *dest; \
- src = av_malloc(ctx->audio_channels * sizeof(int##bps##_t*)); \
- if (!src) { \
- unlock_frames(ctx); \
- return AVERROR(EIO); \
- } \
- \
- for (c = 0; c < ctx->audio_channels; c++) { \
- src[c] = ((int##bps##_t*)ctx->audio_buffer) + c * num_samples; \
- } \
- dest = (int##bps##_t*)pkt->data; \
- shift = bps - ctx->audio_bits_per_sample; \
- for (sample = 0; sample < num_samples; sample++) \
- for (c = 0; c < ctx->audio_channels; c++) \
- *dest++ = src[c][sample] << shift; \
- av_freep(&src); \
- }
+ AudioBufferList output_buffer = {
+ .mNumberBuffers = 1,
+ .mBuffers[0] = {
+ .mNumberChannels = ctx->audio_channels,
+ .mDataByteSize = pkt->size,
+ .mData = pkt->data
+ }
+ };
- if (ctx->audio_bits_per_sample <= 16) {
- INTERLEAVE_OUTPUT(16)
- } else {
- INTERLEAVE_OUTPUT(32)
- }
- } else {
- OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
- if (ret != kCMBlockBufferNoErr) {
+ ret = AudioConverterConvertComplexBuffer(ctx->audio_converter, nb_samples, input_buffer, &output_buffer);
+ av_free(input_buffer);
+
+ if (ret != noErr) {
unlock_frames(ctx);
return AVERROR(EIO);
}
+
+ pkt->size = output_buffer.mBuffers[0].mDataByteSize;
+ } else {
+ ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
+ if (ret != kCMBlockBufferNoErr) {
+ unlock_frames(ctx);
+ return AVERROR(EIO);
+ }
}
+ CMItemCount count;
+ CMSampleTimingInfo timing_info;
+
+ if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, &timing_info, &count) == noErr) {
+ AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale);
+ pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q);
+ }
+
+ pkt->stream_index = ctx->audio_stream_index;
+ pkt->flags |= AV_PKT_FLAG_KEY;
+
CFRelease(ctx->current_audio_frame);
ctx->current_audio_frame = nil;
+
+ unlock_frames(ctx);
} else {
pkt->data = NULL;
unlock_frames(ctx);
--
2.32.0 (Apple Git-132)
More information about the ffmpeg-devel
mailing list