[FFmpeg-devel] [PATCH] videotoolbox: allow to enable the async decoding.

Sun Aug 9 13:11:44 CEST 2015

This patch allows to use the Videotoolbox API in asynchonous mode.
Note that when using async decoding the user is responsible for
releasing the async frame.
Moreover, an option called videotoolbox_async was added to enable
async decoding with ffmpeg CLI.

---
 ffmpeg.h                  |   1 +
 ffmpeg_opt.c              |   1 +
 ffmpeg_videotoolbox.c     |  69 +++++++++++++----
 libavcodec/videotoolbox.c | 186 ++++++++++++++++++++++++++++++++++++++++------
 libavcodec/videotoolbox.h |  73 ++++++++++++++++++
 5 files changed, 294 insertions(+), 36 deletions(-)

diff --git a/ffmpeg.h b/ffmpeg.h
index 6544e6f..73a1031 100644
--- a/ffmpeg.h
+++ b/ffmpeg.h
@@ -522,6 +522,7 @@ extern AVIOContext *progress_avio;
 extern float max_error_rate;
 extern int vdpau_api_ver;
 extern char *videotoolbox_pixfmt;
+extern int videotoolbox_async;
 
 extern const AVIOInterruptCB int_cb;
 
diff --git a/ffmpeg_opt.c b/ffmpeg_opt.c
index 28d3051..91be9b9 100644
--- a/ffmpeg_opt.c
+++ b/ffmpeg_opt.c
@@ -3238,6 +3238,7 @@ const OptionDef options[] = {
 #endif
 #if CONFIG_VDA || CONFIG_VIDEOTOOLBOX
     { "videotoolbox_pixfmt", HAS_ARG | OPT_STRING | OPT_EXPERT, { &videotoolbox_pixfmt}, "" },
+    { "videotoolbox_async", HAS_ARG | OPT_INT | OPT_EXPERT, { &videotoolbox_async}, "" },
 #endif
     { "autorotate",       HAS_ARG | OPT_BOOL | OPT_SPEC |
                           OPT_EXPERT | OPT_INPUT,                                { .off = OFFSET(autorotate) },
diff --git a/ffmpeg_videotoolbox.c b/ffmpeg_videotoolbox.c
index 6688452..0bb0600 100644
--- a/ffmpeg_videotoolbox.c
+++ b/ffmpeg_videotoolbox.c
@@ -34,21 +34,42 @@ typedef struct VTContext {
 } VTContext;
 
 char *videotoolbox_pixfmt;
+int videotoolbox_async;
 
 static int videotoolbox_retrieve_data(AVCodecContext *s, AVFrame *frame)
 {
     InputStream *ist = s->opaque;
     VTContext  *vt = ist->hwaccel_ctx;
-    CVPixelBufferRef pixbuf = (CVPixelBufferRef)frame->data[3];
-    OSType pixel_format = CVPixelBufferGetPixelFormatType(pixbuf);
+    AVVideotoolboxContext *videotoolbox = s->hwaccel_context;
+    AVVideotoolboxAsyncFrame *async_frame = NULL;
+    CVPixelBufferRef pixbuf;
+    OSType pixel_format;
     CVReturn err;
     uint8_t *data[4] = { 0 };
     int linesize[4] = { 0 };
     int planes, ret, i;
     char codec_str[32];
+    int width, height;
 
     av_frame_unref(vt->tmp_frame);
 
+    if (videotoolbox->useAsyncDecoding) {
+        async_frame = av_videotoolbox_pop_async_frame(videotoolbox);
+
+        if (!async_frame)
+            return -1;
+
+        pixbuf = async_frame->cv_buffer;
+        width  = CVPixelBufferGetWidth(pixbuf);
+        height = CVPixelBufferGetHeight(pixbuf);
+    } else {
+        pixbuf = (CVPixelBufferRef)frame->data[3];
+        width  = frame->width;
+        height = frame->height;
+    }
+
+    pixel_format = CVPixelBufferGetPixelFormatType(pixbuf);
+
     switch (pixel_format) {
     case kCVPixelFormatType_420YpCbCr8Planar: vt->tmp_frame->format = AV_PIX_FMT_YUV420P; break;
     case kCVPixelFormatType_422YpCbCr8:       vt->tmp_frame->format = AV_PIX_FMT_UYVY422; break;
@@ -60,19 +81,21 @@ static int videotoolbox_retrieve_data(AVCodecContext *s, AVFrame *frame)
         av_get_codec_tag_string(codec_str, sizeof(codec_str), s->codec_tag);
         av_log(NULL, AV_LOG_ERROR,
                "%s: Unsupported pixel format: %s\n", codec_str, videotoolbox_pixfmt);
-        return AVERROR(ENOSYS);
+        ret = AVERROR(ENOSYS);
+        goto fail;
     }
 
-    vt->tmp_frame->width  = frame->width;
-    vt->tmp_frame->height = frame->height;
+    vt->tmp_frame->width  = width;
+    vt->tmp_frame->height = height;
     ret = av_frame_get_buffer(vt->tmp_frame, 32);
-    if (ret < 0)
-        return ret;
-
+    if (ret < 0) {
+        goto fail;
+    }
     err = CVPixelBufferLockBaseAddress(pixbuf, kCVPixelBufferLock_ReadOnly);
     if (err != kCVReturnSuccess) {
         av_log(NULL, AV_LOG_ERROR, "Error locking the pixel buffer.\n");
-        return AVERROR_UNKNOWN;
+        ret = AVERROR_UNKNOWN;
+        goto fail;
     }
 
     if (CVPixelBufferIsPlanar(pixbuf)) {
@@ -89,17 +112,27 @@ static int videotoolbox_retrieve_data(AVCodecContext *s, AVFrame *frame)
 
     av_image_copy(vt->tmp_frame->data, vt->tmp_frame->linesize,
                   (const uint8_t **)data, linesize, vt->tmp_frame->format,
-                  frame->width, frame->height);
+                  width, height);
 
     ret = av_frame_copy_props(vt->tmp_frame, frame);
     CVPixelBufferUnlockBaseAddress(pixbuf, kCVPixelBufferLock_ReadOnly);
-    if (ret < 0)
-        return ret;
+    if (ret < 0) {
+        goto fail;
+    }
 
     av_frame_unref(frame);
     av_frame_move_ref(frame, vt->tmp_frame);
 
+    if (videotoolbox->useAsyncDecoding) {
+        av_videotoolbox_release_async_frame(async_frame);
+    }
+
     return 0;
+fail:
+    if (videotoolbox->useAsyncDecoding) {
+        av_videotoolbox_release_async_frame(async_frame);
+    }
+    return ret;
 }
 
 static void videotoolbox_uninit(AVCodecContext *s)
@@ -147,10 +180,18 @@ int videotoolbox_init(AVCodecContext *s)
 
     if (ist->hwaccel_id == HWACCEL_VIDEOTOOLBOX) {
 #if CONFIG_VIDEOTOOLBOX
+        AVVideotoolboxContext *vtctx = NULL;
         if (!videotoolbox_pixfmt) {
-            ret = av_videotoolbox_default_init(s);
+            if (videotoolbox_async) {
+                vtctx = av_videotoolbox_alloc_async_context();
+            }
+            ret = av_videotoolbox_default_init2(s, vtctx);
         } else {
-            AVVideotoolboxContext *vtctx = av_videotoolbox_alloc_context();
+            if (videotoolbox_async) {
+                vtctx = av_videotoolbox_alloc_async_context();
+            } else {
+                vtctx = av_videotoolbox_alloc_context();
+            }
             CFStringRef pixfmt_str = CFStringCreateWithCString(kCFAllocatorDefault,
                                                                videotoolbox_pixfmt,
                                                                kCFStringEncodingUTF8);
diff --git a/libavcodec/videotoolbox.c b/libavcodec/videotoolbox.c
index b78238a..7047257 100644
--- a/libavcodec/videotoolbox.c
+++ b/libavcodec/videotoolbox.c
@@ -22,6 +22,7 @@
 
 #include "config.h"
 #if CONFIG_VIDEOTOOLBOX
+#  include <pthread.h>
 #  include "videotoolbox.h"
 #else
 #  include "vda.h"
@@ -177,6 +178,41 @@ int ff_videotoolbox_uninit(AVCodecContext *avctx)
 }
 
 #if CONFIG_VIDEOTOOLBOX
+static int videotoolbox_lock_operation(void **mtx, enum AVLockOp op)
+{
+    switch(op) {
+    case AV_LOCK_CREATE:
+        *mtx = av_malloc(sizeof(pthread_mutex_t));
+        if(!*mtx)
+            return 1;
+        return !!pthread_mutex_init(*mtx, NULL);
+    case AV_LOCK_OBTAIN:
+        return !!pthread_mutex_lock(*mtx);
+    case AV_LOCK_RELEASE:
+        return !!pthread_mutex_unlock(*mtx);
+    case AV_LOCK_DESTROY:
+        pthread_mutex_destroy(*mtx);
+        av_freep(mtx);
+        return 0;
+    }
+    return 1;
+}
+
+static void videotoolbox_clear_queue(struct AVVideotoolboxContext *videotoolbox)
+{
+    AVVideotoolboxAsyncFrame *top_frame;
+
+    videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_OBTAIN);
+
+    while (videotoolbox->queue != NULL) {
+        top_frame = videotoolbox->queue;
+        videotoolbox->queue = top_frame->next_frame;
+        av_videotoolbox_release_async_frame(top_frame);
+    }
+
+    videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_RELEASE);
+}
+
 static void videotoolbox_write_mp4_descr_length(PutByteContext *pb, int length)
 {
     int i;
@@ -244,11 +280,17 @@ static CFDataRef videotoolbox_esds_extradata_create(AVCodecContext *avctx)
 
 static CMSampleBufferRef videotoolbox_sample_buffer_create(CMFormatDescriptionRef fmt_desc,
                                                            void *buffer,
-                                                           int size)
+                                                           int size,
+                                                           int64_t frame_pts)
 {
     OSStatus status;
     CMBlockBufferRef  block_buf;
     CMSampleBufferRef sample_buf;
+    CMSampleTimingInfo timeInfo;
+    CMSampleTimingInfo timeInfoArray[1];
+
+    timeInfo.presentationTimeStamp = CMTimeMake(frame_pts, 1);
+    timeInfoArray[0] = timeInfo;
 
     block_buf  = NULL;
     sample_buf = NULL;
@@ -271,8 +313,8 @@ static CMSampleBufferRef videotoolbox_sample_buffer_create(CMFormatDescriptionRe
                                       0,                    // makeDataReadyRefcon
                                       fmt_desc,             // formatDescription
                                       1,                    // numSamples
-                                      0,                    // numSampleTimingEntries
-                                      NULL,                 // sampleTimingArray
+                                      1,                    // numSampleTimingEntries
+                                      timeInfoArray,        // sampleTimingArray
                                       0,                    // numSampleSizeEntries
                                       NULL,                 // sampleSizeArray
                                       &sample_buf);
@@ -293,41 +335,88 @@ static void videotoolbox_decoder_callback(void *opaque,
                                           CMTime duration)
 {
     AVCodecContext *avctx = opaque;
-    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+    AVVideotoolboxContext *videotoolbox = avctx->hwaccel_context;
 
-    if (vtctx->frame) {
-        CVPixelBufferRelease(vtctx->frame);
-        vtctx->frame = NULL;
-    }
+    if (!videotoolbox->useAsyncDecoding) {
+        VTContext *vtctx = avctx->internal->hwaccel_priv_data;
 
-    if (!image_buffer) {
-        av_log(NULL, AV_LOG_DEBUG, "vt decoder cb: output image buffer is null\n");
-        return;
-    }
+        if (vtctx->frame) {
+            CVPixelBufferRelease(vtctx->frame);
+            vtctx->frame = NULL;
+        }
 
-    vtctx->frame = CVPixelBufferRetain(image_buffer);
+        if (!image_buffer) {
+            av_log(NULL, AV_LOG_DEBUG, "vt decoder cb: output image buffer is null\n");
+            return;
+        }
+
+        vtctx->frame = CVPixelBufferRetain(image_buffer);
+    } else { // async decoding
+        AVVideotoolboxAsyncFrame *new_frame;
+        AVVideotoolboxAsyncFrame *queue_walker;
+
+        if (!image_buffer) {
+            av_log(NULL, AV_LOG_DEBUG, "vt decoder cb: output image buffer is null\n");
+            return;
+        }
+
+        new_frame = (AVVideotoolboxAsyncFrame *)av_mallocz(sizeof(AVVideotoolboxAsyncFrame));
+        new_frame->next_frame = NULL;
+        new_frame->cv_buffer = CVPixelBufferRetain(image_buffer);
+        new_frame->pts = pts.value;
+
+        videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_OBTAIN);
+
+        queue_walker = videotoolbox->queue;
+
+        if (!queue_walker || (new_frame->pts < queue_walker->pts)) {
+            /* we have an empty queue, or this frame earlier than the current queue head */
+            new_frame->next_frame = queue_walker;
+            videotoolbox->queue = new_frame;
+        } else {
+            /* walk the queue and insert this frame where it belongs in display order */
+            AVVideotoolboxAsyncFrame *next_frame;
+
+            while (1) {
+                next_frame = queue_walker->next_frame;
+
+                if (!next_frame || (new_frame->pts < next_frame->pts)) {
+                    new_frame->next_frame = next_frame;
+                    queue_walker->next_frame = new_frame;
+                    break;
+                }
+                queue_walker = next_frame;
+            }
+        }
+
+        videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_RELEASE);
+    }
 }
 
-static OSStatus videotoolbox_session_decode_frame(AVCodecContext *avctx)
+static OSStatus videotoolbox_session_decode_frame(AVCodecContext *avctx, AVFrame *frame)
 {
     OSStatus status;
     CMSampleBufferRef sample_buf;
     AVVideotoolboxContext *videotoolbox = avctx->hwaccel_context;
     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+    VTDecodeFrameFlags decodeFlags = videotoolbox->useAsyncDecoding ?
+                                         kVTDecodeFrame_EnableAsynchronousDecompression : 0;
 
     sample_buf = videotoolbox_sample_buffer_create(videotoolbox->cm_fmt_desc,
                                                    vtctx->bitstream,
-                                                   vtctx->bitstream_size);
+                                                   vtctx->bitstream_size,
+                                                   frame->pkt_pts);
 
     if (!sample_buf)
         return -1;
 
     status = VTDecompressionSessionDecodeFrame(videotoolbox->session,
                                                sample_buf,
-                                               0,       // decodeFlags
+                                               decodeFlags,
                                                NULL,    // sourceFrameRefCon
                                                0);      // infoFlagsOut
-    if (status == noErr)
+
+    if (status == noErr && !videotoolbox->useAsyncDecoding)
         status = VTDecompressionSessionWaitForAsynchronousFrames(videotoolbox->session);
 
     CFRelease(sample_buf);
@@ -344,17 +433,21 @@ static int videotoolbox_common_end_frame(AVCodecContext *avctx, AVFrame *frame)
     if (!videotoolbox->session || !vtctx->bitstream)
         return AVERROR_INVALIDDATA;
 
-    status = videotoolbox_session_decode_frame(avctx);
+    status = videotoolbox_session_decode_frame(avctx, frame);
 
     if (status) {
         av_log(avctx, AV_LOG_ERROR, "Failed to decode frame (%d)\n", status);
         return AVERROR_UNKNOWN;
     }
 
-    if (!vtctx->frame)
-        return AVERROR_UNKNOWN;
+    if (!videotoolbox->useAsyncDecoding) {
+        if (!vtctx->frame)
+            return AVERROR_UNKNOWN;
 
-    return ff_videotoolbox_buffer_create(vtctx, frame);
+        status = ff_videotoolbox_buffer_create(vtctx, frame);
+    }
+
+    return status;
 }
 
 static int videotoolbox_h264_end_frame(AVCodecContext *avctx)
@@ -508,6 +601,13 @@ static int videotoolbox_default_init(AVCodecContext *avctx)
         return -1;
     }
 
+    if (videotoolbox->useAsyncDecoding) {
+        if (av_lockmgr_register(videotoolbox_lock_operation))
+            return -1;
+
+        videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_CREATE);
+    }
+
     switch( avctx->codec_id ) {
     case AV_CODEC_ID_H263 :
         videotoolbox->cm_codec_type = kCMVideoCodecType_H263;
@@ -586,6 +686,15 @@ static void videotoolbox_default_free(AVCodecContext *avctx)
         if (videotoolbox->cm_fmt_desc)
             CFRelease(videotoolbox->cm_fmt_desc);
 
+        if (videotoolbox->useAsyncDecoding) {
+            VTDecompressionSessionWaitForAsynchronousFrames(videotoolbox->session);
+
+            videotoolbox_clear_queue(videotoolbox);
+
+            if (videotoolbox->queue_mutex != NULL)
+                videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_DESTROY);
+        }
+
         if (videotoolbox->session)
             VTDecompressionSessionInvalidate(videotoolbox->session);
     }
@@ -668,6 +777,17 @@ AVVideotoolboxContext *av_videotoolbox_alloc_context(void)
     return ret;
 }
 
+AVVideotoolboxContext *av_videotoolbox_alloc_async_context(void)
+{
+    AVVideotoolboxContext *ret = av_videotoolbox_alloc_context();
+
+    if (ret) {
+        ret->useAsyncDecoding = 1;
+    }
+
+    return ret;
+}
+
 int av_videotoolbox_default_init(AVCodecContext *avctx)
 {
     return av_videotoolbox_default_init2(avctx, NULL);
@@ -683,8 +803,30 @@ int av_videotoolbox_default_init2(AVCodecContext *avctx, AVVideotoolboxContext *
 
 void av_videotoolbox_default_free(AVCodecContext *avctx)
 {
-
     videotoolbox_default_free(avctx);
     av_freep(&avctx->hwaccel_context);
 }
+
+AVVideotoolboxAsyncFrame *av_videotoolbox_pop_async_frame(AVVideotoolboxContext *videotoolbox)
+{
+    AVVideotoolboxAsyncFrame *top_frame;
+
+    if (!videotoolbox->queue)
+        return NULL;
+
+    videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_OBTAIN);
+    top_frame = videotoolbox->queue;
+    videotoolbox->queue = top_frame->next_frame;
+    videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_RELEASE);
+
+    return top_frame;
+}
+
+void av_videotoolbox_release_async_frame(AVVideotoolboxAsyncFrame *frame)
+{
+    if (frame != NULL) {
+        CVPixelBufferRelease(frame->cv_buffer);
+        av_freep(&frame);
+    }
+}
 #endif /* CONFIG_VIDEOTOOLBOX */
diff --git a/libavcodec/videotoolbox.h b/libavcodec/videotoolbox.h
index a48638e..b5bf030 100644
--- a/libavcodec/videotoolbox.h
+++ b/libavcodec/videotoolbox.h
@@ -38,6 +38,29 @@
 #include "libavcodec/avcodec.h"
 
 /**
+ *  This structure is used to store a decoded frame information and data
+ *  when using the Videotoolbox Async API.
+ */
+typedef struct AVVideotoolboxAsyncFrame
+{
+    /**
+     * The PTS of the frame.
+     */
+    int64_t             pts;
+
+    /**
+     * The CoreVideo buffer that contains the decoded data.
+     */
+    CVPixelBufferRef    cv_buffer;
+
+    /**
+     * A pointer to the next frame.
+     */
+    struct AVVideotoolboxAsyncFrame *next_frame;
+
+} AVVideotoolboxAsyncFrame;
+
+/**
  * This struct holds all the information that needs to be passed
  * between the caller and libavcodec for initializing Videotoolbox decoding.
  * Its size is not a part of the public ABI, it must be allocated with
@@ -73,6 +96,23 @@ typedef struct AVVideotoolboxContext {
      * Set by the caller.
      */
     int cm_codec_type;
+
+    /**
+     * Enable the async decoding mode.
+     * Set by av_videotoolbox_alloc_async_context()
+     */
+    int useAsyncDecoding;
+
+    /**
+     * Videotoolbox async frames queue ordered by presentation timestamp.
+     */
+    AVVideotoolboxAsyncFrame *queue;
+
+    /**
+     * Mutex for locking queue operations when async decoding is enabled.
+     */
+    void *queue_mutex;
+
 } AVVideotoolboxContext;
 
 /**
@@ -91,6 +131,21 @@ typedef struct AVVideotoolboxContext {
 AVVideotoolboxContext *av_videotoolbox_alloc_context(void);
 
 /**
+ * Allocate and initialize an async Videotoolbox context.
+ *
+ * This function should be called from the get_format() callback when the caller
+ * selects the AV_PIX_FMT_VIDETOOLBOX format. The caller must then create
+ * the decoder object (using the output callback provided by libavcodec) that
+ * will be used for Videotoolbox-accelerated decoding.
+ *
+ * When decoding with Videotoolbox is finished, the caller must destroy the decoder
+ * object and free the Videotoolbox context using av_free().
+ *
+ * @return the newly allocated context or NULL on failure
+ */
+AVVideotoolboxContext *av_videotoolbox_alloc_async_context(void);
+
+/**
  * This is a convenience function that creates and sets up the Videotoolbox context using
  * an internal implementation.
  *
@@ -120,6 +175,24 @@ int av_videotoolbox_default_init2(AVCodecContext *avctx, AVVideotoolboxContext *
 void av_videotoolbox_default_free(AVCodecContext *avctx);
 
 /**
+ * This function must be called to retrieve the top frame of the queue when async decoding
+ * is enabled.
+ *
+ * @param vtctx the corresponding videotoolbox context
+ *
+ * @return the top async frame from the queue.
+ */
+AVVideotoolboxAsyncFrame *av_videotoolbox_pop_async_frame(AVVideotoolboxContext *vtctx);
+
+/**
+ * This function must be called to release the top frame returned by
+ * av_videotoolbox_pop_async_frame().
+ *
+ * @param frame the frame to release
+ */
+void av_videotoolbox_release_async_frame(AVVideotoolboxAsyncFrame *frame);
+
+/**
  * @}
  */
 
-- 
2.3.2 (Apple Git-55)