[FFmpeg-user] Audio captured via AVFoundation is unusable

Wed Jun 4 10:09:05 EEST 2025

On 2025-06-04 03:20 am, FFmpeg via ffmpeg-user wrote:
>> With AVFoundation, sometimes audio parameters aren't initialized at start of capture. The OS initializes the audio very shortly afterwards, but ffmpeg uses the parameters advertised in the beginning, which will be incorrect.
> This definitely tracks with the behavior I'm witnessing.
>
>> I wrote a patch few years back to fix this - I'll find it and update it.
>> I'll post it if you can test it.
> That would be excellent, I'd be happy to test it.

Diff patch attached. Use `git apply` .

Initially test with

-f avfoundation \
-audio_probe_wait 2 \
-i ... \


Check till values of  15 or so.

Regards,
Gyan
-------------- next part --------------
From 06c626a01170d05fb5b67c0d3d9e89bfc385381f Mon Sep 17 00:00:00 2001
From: Gyan Doshi <ffmpeg at gyani.pro>
Date: Wed, 4 Jun 2025 12:31:08 +0530
Subject: [PATCH] lavd/avfoundation: allow delay for audio format probing

Set -audio_probe_wait X to probe format from frame #X
---
 libavdevice/avfoundation.m | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
index 6f15e2837e..4d609c10ae 100644
--- a/libavdevice/avfoundation.m
+++ b/libavdevice/avfoundation.m
@@ -121,6 +121,7 @@ typedef struct
     int             audio_signed_integer;
     int             audio_packed;
     int             audio_non_interleaved;
+    int             audio_probe_wait;
 
     int32_t         *audio_buffer;
     int             audio_buffer_size;
@@ -685,7 +686,7 @@ static int get_audio_config(AVFormatContext *s)
     }
 
     // Take stream info from the first frame.
-    while (ctx->audio_frames_captured < 1) {
+    while (ctx->audio_frames_captured < ctx->audio_probe_wait) {
         CFRunLoopRunInMode(kCFRunLoopDefaultMode, 0.1, YES);
     }
 
@@ -716,6 +717,13 @@ static int get_audio_config(AVFormatContext *s)
     ctx->audio_packed          = basic_desc->mFormatFlags & kAudioFormatFlagIsPacked;
     ctx->audio_non_interleaved = basic_desc->mFormatFlags & kAudioFormatFlagIsNonInterleaved;
 
+    av_log(s, AV_LOG_VERBOSE, "Detected audio format:\n"
+                              "mFormatID: %d | float: %d | s_int: %d | depth: %d | packed: %d | non_interleaved: %d \n"
+                              "mSampleRate: %d | channels: %d | mBitsPerChannel: %d | audio_be: %d\n",
+           (int)basic_desc->mFormatID, ctx->audio_float, ctx->audio_signed_integer, ctx->audio_bits_per_sample, ctx->audio_packed,
+           ctx->audio_non_interleaved, (int)basic_desc->mSampleRate, (int)basic_desc->mChannelsPerFrame,
+           (int)basic_desc->mBitsPerChannel, ctx->audio_be);
+
     if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
         ctx->audio_float &&
         ctx->audio_bits_per_sample == 32 &&
@@ -1285,6 +1293,7 @@ static const AVOption options[] = {
     { "capture_mouse_clicks", "capture the screen mouse clicks", offsetof(AVFContext, capture_mouse_clicks), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
     { "capture_raw_data", "capture the raw data from device connection", offsetof(AVFContext, capture_raw_data), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
     { "drop_late_frames", "drop frames that are available later than expected", offsetof(AVFContext, drop_late_frames), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
+    { "audio_probe_wait", "number of packets to wait for audio config probe", offsetof(AVFContext, audio_probe_wait), AV_OPT_TYPE_INT, {.i64=1}, 1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
 
     { NULL },
 };
-- 
2.49.0