[FFmpeg-devel] [PATCH] libavdevice/avfoundation: add buffer fifo and output packets in order they arrive
Mark Reid
mindmark at gmail.com
Sat Mar 13 22:45:28 EET 2021
On Sun., Feb. 28, 2021, 8:30 p.m. Mark Reid, <mindmark at gmail.com> wrote:
>
>
> On Sat, Feb 13, 2021 at 10:04 PM <mindmark at gmail.com> wrote:
>
>> From: Mark Reid <mindmark at gmail.com>
>>
>> Hi,
>> This patch fixes audio issues I've had with some capture devices. The
>> audio
>> gets really choppy and stops working. This seems to be because
>> avf_read_packet
>> stops outputting the audio frames because a video frame happens to be
>> available first.
>>
>> It base on the approach used in a patch from #4437
>> https://trac.ffmpeg.org/ticket/4437
>>
>> My approach uses an AVFifoBuffer instead of NSMutableArray and also
>> outputs the packets in the same order they arrive from AVFFoundation.
>>
>> should fix ticket #4437 and #4513
>>
>>
>> ---
>> libavdevice/avfoundation.m | 160 ++++++++++++++++++++++++++++---------
>> 1 file changed, 124 insertions(+), 36 deletions(-)
>>
>> diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
>> index 59d5b0af4f..5ac6ec4183 100644
>> --- a/libavdevice/avfoundation.m
>> +++ b/libavdevice/avfoundation.m
>> @@ -31,13 +31,17 @@
>> #include "libavutil/pixdesc.h"
>> #include "libavutil/opt.h"
>> #include "libavutil/avstring.h"
>> +#include "libavutil/avassert.h"
>> #include "libavformat/internal.h"
>> #include "libavutil/internal.h"
>> #include "libavutil/parseutils.h"
>> #include "libavutil/time.h"
>> #include "libavutil/imgutils.h"
>> +#include "libavutil/fifo.h"
>> #include "avdevice.h"
>>
>> +#define FIFO_SIZE 4
>> +
>> static const int avf_time_base = 1000000;
>>
>> static const AVRational avf_time_base_q = {
>> @@ -128,8 +132,8 @@ typedef struct
>> AVCaptureSession *capture_session;
>> AVCaptureVideoDataOutput *video_output;
>> AVCaptureAudioDataOutput *audio_output;
>> - CMSampleBufferRef current_frame;
>> - CMSampleBufferRef current_audio_frame;
>> + AVFifoBuffer *video_fifo;
>> + AVFifoBuffer *audio_fifo;
>>
>> AVCaptureDevice *observed_device;
>> #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
>> @@ -138,6 +142,11 @@ typedef struct
>> int observed_quit;
>> } AVFContext;
>>
>> +typedef struct {
>> + int64_t ts;
>> + CMSampleBufferRef frame;
>> +} BufferRef;
>> +
>> static void lock_frames(AVFContext* ctx)
>> {
>> pthread_mutex_lock(&ctx->frame_lock);
>> @@ -148,6 +157,48 @@ static void unlock_frames(AVFContext* ctx)
>> pthread_mutex_unlock(&ctx->frame_lock);
>> }
>>
>> +static inline void fifo_write(AVFifoBuffer* f, int64_t ts,
>> CMSampleBufferRef frame)
>> +{
>> + BufferRef buf = {
>> + .ts = ts,
>> + .frame = frame,
>> + };
>> +
>> + CFRetain(frame);
>> + av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL);
>> +}
>> +
>> +static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf)
>> +{
>> + if (av_fifo_size(f)) {
>> + av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL);
>> + return;
>> + }
>> + buf->frame = nil;
>> + return;
>> +}
>> +
>> +static inline void fifo_drain(AVFifoBuffer* f, int release)
>> +{
>> + av_assert2(av_fifo_size(f) >= sizeof(BufferRef));
>> + if (release) {
>> + BufferRef buf;
>> + fifo_peek(f, &buf);
>> + CFRelease(buf.frame);
>> + }
>> + av_fifo_drain(f, sizeof(BufferRef));
>> +}
>> +
>> +static inline void fifo_freep(AVFifoBuffer **f)
>> +{
>> + if (f) {
>> + while (av_fifo_size(*f)) {
>> + fifo_drain(*f, 1);
>> + }
>> + av_fifo_freep(f);
>> + }
>> +}
>> +
>> /** FrameReciever class - delegate for AVCaptureSession
>> */
>> @interface AVFFrameReceiver : NSObject
>> @@ -225,13 +276,16 @@ static void unlock_frames(AVFContext* ctx)
>> didOutputSampleBuffer:(CMSampleBufferRef)videoFrame
>> fromConnection:(AVCaptureConnection *)connection
>> {
>> + AVFifoBuffer *fifo = _context->video_fifo;
>> + int64_t ts = av_gettime_relative();
>> lock_frames(_context);
>>
>> - if (_context->current_frame != nil) {
>> - CFRelease(_context->current_frame);
>> + if (av_fifo_space(fifo) == 0) {
>> + av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest
>> frame has been dropped\n");
>> + fifo_drain(fifo, 1);
>> }
>>
>> - _context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame);
>> + fifo_write(fifo, ts, videoFrame);
>>
>> unlock_frames(_context);
>>
>> @@ -269,13 +323,16 @@ static void unlock_frames(AVFContext* ctx)
>> didOutputSampleBuffer:(CMSampleBufferRef)audioFrame
>> fromConnection:(AVCaptureConnection *)connection
>> {
>> + AVFifoBuffer *fifo = _context->audio_fifo;
>> + int64_t ts = av_gettime_relative();
>> lock_frames(_context);
>>
>> - if (_context->current_audio_frame != nil) {
>> - CFRelease(_context->current_audio_frame);
>> + if (!av_fifo_space(fifo)) {
>> + av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest
>> frame has been dropped\n");
>> + fifo_drain(fifo, 1);
>> }
>>
>> - _context->current_audio_frame =
>> (CMSampleBufferRef)CFRetain(audioFrame);
>> + fifo_write(fifo, ts, audioFrame);
>>
>> unlock_frames(_context);
>>
>> @@ -301,12 +358,10 @@ static void destroy_context(AVFContext* ctx)
>> ctx->avf_audio_delegate = NULL;
>>
>> av_freep(&ctx->audio_buffer);
>> + fifo_freep(&ctx->video_fifo);
>> + fifo_freep(&ctx->audio_fifo);
>>
>> pthread_mutex_destroy(&ctx->frame_lock);
>> -
>> - if (ctx->current_frame) {
>> - CFRelease(ctx->current_frame);
>> - }
>> }
>>
>> static void parse_device_name(AVFormatContext *s)
>> @@ -624,6 +679,7 @@ static int add_audio_device(AVFormatContext *s,
>> AVCaptureDevice *audio_device)
>> static int get_video_config(AVFormatContext *s)
>> {
>> AVFContext *ctx = (AVFContext*)s->priv_data;
>> + BufferRef buf;
>> CVImageBufferRef image_buffer;
>> CMBlockBufferRef block_buffer;
>> CGSize image_buffer_size;
>> @@ -644,8 +700,13 @@ static int get_video_config(AVFormatContext *s)
>>
>> avpriv_set_pts_info(stream, 64, 1, avf_time_base);
>>
>> - image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame);
>> - block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame);
>> + fifo_peek(ctx->video_fifo, &buf);
>> + if (buf.frame == nil) {
>> + return 1;
>> + }
>> +
>> + image_buffer = CMSampleBufferGetImageBuffer(buf.frame);
>> + block_buffer = CMSampleBufferGetDataBuffer(buf.frame);
>>
>> if (image_buffer) {
>> image_buffer_size = CVImageBufferGetEncodedSize(image_buffer);
>> @@ -661,9 +722,6 @@ static int get_video_config(AVFormatContext *s)
>> stream->codecpar->format = ctx->pixel_format;
>> }
>>
>> - CFRelease(ctx->current_frame);
>> - ctx->current_frame = nil;
>> -
>> unlock_frames(ctx);
>>
>> return 0;
>> @@ -672,6 +730,7 @@ static int get_video_config(AVFormatContext *s)
>> static int get_audio_config(AVFormatContext *s)
>> {
>> AVFContext *ctx = (AVFContext*)s->priv_data;
>> + BufferRef buf;
>> CMFormatDescriptionRef format_desc;
>> AVStream* stream = avformat_new_stream(s, NULL);
>>
>> @@ -690,7 +749,12 @@ static int get_audio_config(AVFormatContext *s)
>>
>> avpriv_set_pts_info(stream, 64, 1, avf_time_base);
>>
>> - format_desc =
>> CMSampleBufferGetFormatDescription(ctx->current_audio_frame);
>> + fifo_peek(ctx->audio_fifo, &buf);
>> + if (buf.frame == nil) {
>> + return 1;
>> + }
>> +
>> + format_desc = CMSampleBufferGetFormatDescription(buf.frame);
>> const AudioStreamBasicDescription *basic_desc =
>> CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
>>
>> if (!basic_desc) {
>> @@ -737,7 +801,7 @@ static int get_audio_config(AVFormatContext *s)
>> }
>>
>> if (ctx->audio_non_interleaved) {
>> - CMBlockBufferRef block_buffer =
>> CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
>> + CMBlockBufferRef block_buffer =
>> CMSampleBufferGetDataBuffer(buf.frame);
>> ctx->audio_buffer_size =
>> CMBlockBufferGetDataLength(block_buffer);
>> ctx->audio_buffer =
>> av_malloc(ctx->audio_buffer_size);
>> if (!ctx->audio_buffer) {
>> @@ -746,9 +810,6 @@ static int get_audio_config(AVFormatContext *s)
>> }
>> }
>>
>> - CFRelease(ctx->current_audio_frame);
>> - ctx->current_audio_frame = nil;
>> -
>> unlock_frames(ctx);
>>
>> return 0;
>> @@ -771,6 +832,9 @@ static int avf_read_header(AVFormatContext *s)
>>
>> pthread_mutex_init(&ctx->frame_lock, NULL);
>>
>> + ctx->video_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef));
>> + ctx->audio_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef));
>> +
>> #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
>> CGGetActiveDisplayList(0, NULL, &num_screens);
>> #endif
>> @@ -1051,33 +1115,52 @@ static int avf_read_packet(AVFormatContext *s,
>> AVPacket *pkt)
>> AVFContext* ctx = (AVFContext*)s->priv_data;
>>
>> do {
>> + BufferRef video;
>> + BufferRef audio;
>> CVImageBufferRef image_buffer;
>> CMBlockBufferRef block_buffer;
>> lock_frames(ctx);
>>
>> - if (ctx->current_frame != nil) {
>> + fifo_peek(ctx->video_fifo, &video);
>> + fifo_peek(ctx->audio_fifo, &audio);
>> +
>> + if (video.frame != nil && audio.frame != nil) {
>> + // process oldest CMSampleBufferRef first
>> + if (audio.ts <= video.ts) {
>> + video.frame = nil;
>> + } else {
>> + audio.frame = nil;
>> + }
>> + }
>> +
>> + if (video.frame != nil) {
>> int status;
>> int length = 0;
>>
>> - image_buffer =
>> CMSampleBufferGetImageBuffer(ctx->current_frame);
>> - block_buffer =
>> CMSampleBufferGetDataBuffer(ctx->current_frame);
>> + fifo_drain(ctx->video_fifo, 0);
>> + unlock_frames(ctx);
>> +
>> + image_buffer = CMSampleBufferGetImageBuffer(video.frame);
>> + block_buffer = CMSampleBufferGetDataBuffer(video.frame);
>>
>> if (image_buffer != nil) {
>> length = (int)CVPixelBufferGetDataSize(image_buffer);
>> } else if (block_buffer != nil) {
>> length = (int)CMBlockBufferGetDataLength(block_buffer);
>> } else {
>> + CFRelease(video.frame);
>> return AVERROR(EINVAL);
>> }
>>
>> if (av_new_packet(pkt, length) < 0) {
>> + CFRelease(video.frame);
>> return AVERROR(EIO);
>> }
>>
>> CMItemCount count;
>> CMSampleTimingInfo timing_info;
>>
>> - if
>> (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_frame, 1,
>> &timing_info, &count) == noErr) {
>> + if
>> (CMSampleBufferGetOutputSampleTimingInfoArray(video.frame, 1, &timing_info,
>> &count) == noErr) {
>> AVRational timebase_q = av_make_q(1,
>> timing_info.presentationTimeStamp.timescale);
>> pkt->pts = pkt->dts =
>> av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q,
>> avf_time_base_q);
>> }
>> @@ -1094,31 +1177,37 @@ static int avf_read_packet(AVFormatContext *s,
>> AVPacket *pkt)
>> status = AVERROR(EIO);
>> }
>> }
>> - CFRelease(ctx->current_frame);
>> - ctx->current_frame = nil;
>> + CFRelease(video.frame);
>>
>> - if (status < 0)
>> + if (status < 0) {
>> return status;
>> - } else if (ctx->current_audio_frame != nil) {
>> - CMBlockBufferRef block_buffer =
>> CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
>> + }
>> + } else if (audio.frame != nil) {
>> + CMBlockBufferRef block_buffer =
>> CMSampleBufferGetDataBuffer(audio.frame);
>> int block_buffer_size =
>> CMBlockBufferGetDataLength(block_buffer);
>>
>> + fifo_drain(ctx->audio_fifo, 0);
>> + unlock_frames(ctx);
>> +
>> if (!block_buffer || !block_buffer_size) {
>> + CFRelease(audio.frame);
>> return AVERROR(EIO);
>> }
>>
>> if (ctx->audio_non_interleaved && block_buffer_size >
>> ctx->audio_buffer_size) {
>> + CFRelease(audio.frame);
>> return AVERROR_BUFFER_TOO_SMALL;
>> }
>>
>> if (av_new_packet(pkt, block_buffer_size) < 0) {
>> + CFRelease(audio.frame);
>> return AVERROR(EIO);
>> }
>>
>> CMItemCount count;
>> CMSampleTimingInfo timing_info;
>>
>> - if
>> (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1,
>> &timing_info, &count) == noErr) {
>> + if
>> (CMSampleBufferGetOutputSampleTimingInfoArray(audio.frame, 1, &timing_info,
>> &count) == noErr) {
>> AVRational timebase_q = av_make_q(1,
>> timing_info.presentationTimeStamp.timescale);
>> pkt->pts = pkt->dts =
>> av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q,
>> avf_time_base_q);
>> }
>> @@ -1131,6 +1220,7 @@ static int avf_read_packet(AVFormatContext *s,
>> AVPacket *pkt)
>>
>> OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer,
>> 0, pkt->size, ctx->audio_buffer);
>> if (ret != kCMBlockBufferNoErr) {
>> + CFRelease(audio.frame);
>> return AVERROR(EIO);
>> }
>>
>> @@ -1162,12 +1252,12 @@ static int avf_read_packet(AVFormatContext *s,
>> AVPacket *pkt)
>> } else {
>> OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer,
>> 0, pkt->size, pkt->data);
>> if (ret != kCMBlockBufferNoErr) {
>> + CFRelease(audio.frame);
>> return AVERROR(EIO);
>> }
>> }
>>
>> - CFRelease(ctx->current_audio_frame);
>> - ctx->current_audio_frame = nil;
>> + CFRelease(audio.frame);
>> } else {
>> pkt->data = NULL;
>> unlock_frames(ctx);
>> @@ -1177,8 +1267,6 @@ static int avf_read_packet(AVFormatContext *s,
>> AVPacket *pkt)
>> return AVERROR(EAGAIN);
>> }
>> }
>> -
>> - unlock_frames(ctx);
>> } while (!pkt->data);
>>
>> return 0;
>> --
>> 2.29.2
>>
>>
> ping
>
ping
>
More information about the ffmpeg-devel
mailing list