[FFmpeg-devel] [PATCH] videotoolbox: allow to enable the async decoding.
wm4
nfxjfg at googlemail.com
Sun Aug 9 13:30:09 CEST 2015
On Sun, 9 Aug 2015 13:11:44 +0200
Sebastien Zwickert <dilaroga at gmail.com> wrote:
> This patch allows to use the Videotoolbox API in asynchonous mode.
> Note that when using async decoding the user is responsible for
> releasing the async frame.
What does this mean?
> Moreover, an option called videotoolbox_async was added to enable
> async decoding with ffmpeg CLI.
>
> ---
> ffmpeg.h | 1 +
> ffmpeg_opt.c | 1 +
> ffmpeg_videotoolbox.c | 69 +++++++++++++----
> libavcodec/videotoolbox.c | 186 ++++++++++++++++++++++++++++++++++++++++------
> libavcodec/videotoolbox.h | 73 ++++++++++++++++++
> 5 files changed, 294 insertions(+), 36 deletions(-)
>
> diff --git a/ffmpeg.h b/ffmpeg.h
> index 6544e6f..73a1031 100644
> --- a/ffmpeg.h
> +++ b/ffmpeg.h
> @@ -522,6 +522,7 @@ extern AVIOContext *progress_avio;
> extern float max_error_rate;
> extern int vdpau_api_ver;
> extern char *videotoolbox_pixfmt;
> +extern int videotoolbox_async;
>
> extern const AVIOInterruptCB int_cb;
>
> diff --git a/ffmpeg_opt.c b/ffmpeg_opt.c
> index 28d3051..91be9b9 100644
> --- a/ffmpeg_opt.c
> +++ b/ffmpeg_opt.c
> @@ -3238,6 +3238,7 @@ const OptionDef options[] = {
> #endif
> #if CONFIG_VDA || CONFIG_VIDEOTOOLBOX
> { "videotoolbox_pixfmt", HAS_ARG | OPT_STRING | OPT_EXPERT, { &videotoolbox_pixfmt}, "" },
> + { "videotoolbox_async", HAS_ARG | OPT_INT | OPT_EXPERT, { &videotoolbox_async}, "" },
> #endif
> { "autorotate", HAS_ARG | OPT_BOOL | OPT_SPEC |
> OPT_EXPERT | OPT_INPUT, { .off = OFFSET(autorotate) },
> diff --git a/ffmpeg_videotoolbox.c b/ffmpeg_videotoolbox.c
> index 6688452..0bb0600 100644
> --- a/ffmpeg_videotoolbox.c
> +++ b/ffmpeg_videotoolbox.c
> @@ -34,21 +34,42 @@ typedef struct VTContext {
> } VTContext;
>
> char *videotoolbox_pixfmt;
> +int videotoolbox_async;
>
> static int videotoolbox_retrieve_data(AVCodecContext *s, AVFrame *frame)
> {
> InputStream *ist = s->opaque;
> VTContext *vt = ist->hwaccel_ctx;
> - CVPixelBufferRef pixbuf = (CVPixelBufferRef)frame->data[3];
> - OSType pixel_format = CVPixelBufferGetPixelFormatType(pixbuf);
> + AVVideotoolboxContext *videotoolbox = s->hwaccel_context;
> + AVVideotoolboxAsyncFrame *async_frame = NULL;
> + CVPixelBufferRef pixbuf;
> + OSType pixel_format;
> CVReturn err;
> uint8_t *data[4] = { 0 };
> int linesize[4] = { 0 };
> int planes, ret, i;
> char codec_str[32];
> + int width, height;
>
> av_frame_unref(vt->tmp_frame);
>
> + if (videotoolbox->useAsyncDecoding) {
> + async_frame = av_videotoolbox_pop_async_frame(videotoolbox);
> +
> + if (!async_frame)
> + return -1;
> +
> + pixbuf = async_frame->cv_buffer;
> + width = CVPixelBufferGetWidth(pixbuf);
> + height = CVPixelBufferGetHeight(pixbuf);
> + } else {
> + pixbuf = (CVPixelBufferRef)frame->data[3];
> + width = frame->width;
> + height = frame->height;
> + }
> +
> + pixel_format = CVPixelBufferGetPixelFormatType(pixbuf);
> +
> switch (pixel_format) {
> case kCVPixelFormatType_420YpCbCr8Planar: vt->tmp_frame->format = AV_PIX_FMT_YUV420P; break;
> case kCVPixelFormatType_422YpCbCr8: vt->tmp_frame->format = AV_PIX_FMT_UYVY422; break;
> @@ -60,19 +81,21 @@ static int videotoolbox_retrieve_data(AVCodecContext *s, AVFrame *frame)
> av_get_codec_tag_string(codec_str, sizeof(codec_str), s->codec_tag);
> av_log(NULL, AV_LOG_ERROR,
> "%s: Unsupported pixel format: %s\n", codec_str, videotoolbox_pixfmt);
> - return AVERROR(ENOSYS);
> + ret = AVERROR(ENOSYS);
> + goto fail;
> }
>
> - vt->tmp_frame->width = frame->width;
> - vt->tmp_frame->height = frame->height;
> + vt->tmp_frame->width = width;
> + vt->tmp_frame->height = height;
> ret = av_frame_get_buffer(vt->tmp_frame, 32);
> - if (ret < 0)
> - return ret;
> -
> + if (ret < 0) {
> + goto fail;
> + }
> err = CVPixelBufferLockBaseAddress(pixbuf, kCVPixelBufferLock_ReadOnly);
> if (err != kCVReturnSuccess) {
> av_log(NULL, AV_LOG_ERROR, "Error locking the pixel buffer.\n");
> - return AVERROR_UNKNOWN;
> + ret = AVERROR_UNKNOWN;
> + goto fail;
> }
>
> if (CVPixelBufferIsPlanar(pixbuf)) {
> @@ -89,17 +112,27 @@ static int videotoolbox_retrieve_data(AVCodecContext *s, AVFrame *frame)
>
> av_image_copy(vt->tmp_frame->data, vt->tmp_frame->linesize,
> (const uint8_t **)data, linesize, vt->tmp_frame->format,
> - frame->width, frame->height);
> + width, height);
>
> ret = av_frame_copy_props(vt->tmp_frame, frame);
> CVPixelBufferUnlockBaseAddress(pixbuf, kCVPixelBufferLock_ReadOnly);
> - if (ret < 0)
> - return ret;
> + if (ret < 0) {
> + goto fail;
> + }
>
> av_frame_unref(frame);
> av_frame_move_ref(frame, vt->tmp_frame);
>
> + if (videotoolbox->useAsyncDecoding) {
> + av_videotoolbox_release_async_frame(async_frame);
> + }
> +
> return 0;
> +fail:
> + if (videotoolbox->useAsyncDecoding) {
> + av_videotoolbox_release_async_frame(async_frame);
> + }
> + return ret;
> }
>
> static void videotoolbox_uninit(AVCodecContext *s)
> @@ -147,10 +180,18 @@ int videotoolbox_init(AVCodecContext *s)
>
> if (ist->hwaccel_id == HWACCEL_VIDEOTOOLBOX) {
> #if CONFIG_VIDEOTOOLBOX
> + AVVideotoolboxContext *vtctx = NULL;
> if (!videotoolbox_pixfmt) {
> - ret = av_videotoolbox_default_init(s);
> + if (videotoolbox_async) {
> + vtctx = av_videotoolbox_alloc_async_context();
> + }
> + ret = av_videotoolbox_default_init2(s, vtctx);
> } else {
> - AVVideotoolboxContext *vtctx = av_videotoolbox_alloc_context();
> + if (videotoolbox_async) {
> + vtctx = av_videotoolbox_alloc_async_context();
> + } else {
> + vtctx = av_videotoolbox_alloc_context();
> + }
> CFStringRef pixfmt_str = CFStringCreateWithCString(kCFAllocatorDefault,
> videotoolbox_pixfmt,
> kCFStringEncodingUTF8);
> diff --git a/libavcodec/videotoolbox.c b/libavcodec/videotoolbox.c
> index b78238a..7047257 100644
> --- a/libavcodec/videotoolbox.c
> +++ b/libavcodec/videotoolbox.c
> @@ -22,6 +22,7 @@
>
> #include "config.h"
> #if CONFIG_VIDEOTOOLBOX
> +# include <pthread.h>
> # include "videotoolbox.h"
> #else
> # include "vda.h"
> @@ -177,6 +178,41 @@ int ff_videotoolbox_uninit(AVCodecContext *avctx)
> }
>
> #if CONFIG_VIDEOTOOLBOX
> +static int videotoolbox_lock_operation(void **mtx, enum AVLockOp op)
> +{
> + switch(op) {
> + case AV_LOCK_CREATE:
> + *mtx = av_malloc(sizeof(pthread_mutex_t));
> + if(!*mtx)
> + return 1;
> + return !!pthread_mutex_init(*mtx, NULL);
> + case AV_LOCK_OBTAIN:
> + return !!pthread_mutex_lock(*mtx);
> + case AV_LOCK_RELEASE:
> + return !!pthread_mutex_unlock(*mtx);
> + case AV_LOCK_DESTROY:
> + pthread_mutex_destroy(*mtx);
> + av_freep(mtx);
> + return 0;
> + }
> + return 1;
> +}
This is ugly and seems to serve no purpose as far as I can see. Use
pthread directly.
> +
> +static void videotoolbox_clear_queue(struct AVVideotoolboxContext *videotoolbox)
> +{
> + AVVideotoolboxAsyncFrame *top_frame;
> +
> + videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_OBTAIN);
> +
> + while (videotoolbox->queue != NULL) {
> + top_frame = videotoolbox->queue;
> + videotoolbox->queue = top_frame->next_frame;
> + av_videotoolbox_release_async_frame(top_frame);
> + }
> +
> + videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_RELEASE);
> +}
> +
> static void videotoolbox_write_mp4_descr_length(PutByteContext *pb, int length)
> {
> int i;
> @@ -244,11 +280,17 @@ static CFDataRef videotoolbox_esds_extradata_create(AVCodecContext *avctx)
>
> static CMSampleBufferRef videotoolbox_sample_buffer_create(CMFormatDescriptionRef fmt_desc,
> void *buffer,
> - int size)
> + int size,
> + int64_t frame_pts)
> {
> OSStatus status;
> CMBlockBufferRef block_buf;
> CMSampleBufferRef sample_buf;
> + CMSampleTimingInfo timeInfo;
> + CMSampleTimingInfo timeInfoArray[1];
> +
> + timeInfo.presentationTimeStamp = CMTimeMake(frame_pts, 1);
> + timeInfoArray[0] = timeInfo;
>
> block_buf = NULL;
> sample_buf = NULL;
> @@ -271,8 +313,8 @@ static CMSampleBufferRef videotoolbox_sample_buffer_create(CMFormatDescriptionRe
> 0, // makeDataReadyRefcon
> fmt_desc, // formatDescription
> 1, // numSamples
> - 0, // numSampleTimingEntries
> - NULL, // sampleTimingArray
> + 1, // numSampleTimingEntries
> + timeInfoArray, // sampleTimingArray
> 0, // numSampleSizeEntries
> NULL, // sampleSizeArray
> &sample_buf);
> @@ -293,41 +335,88 @@ static void videotoolbox_decoder_callback(void *opaque,
> CMTime duration)
> {
> AVCodecContext *avctx = opaque;
> - VTContext *vtctx = avctx->internal->hwaccel_priv_data;
> + AVVideotoolboxContext *videotoolbox = avctx->hwaccel_context;
>
> - if (vtctx->frame) {
> - CVPixelBufferRelease(vtctx->frame);
> - vtctx->frame = NULL;
> - }
> + if (!videotoolbox->useAsyncDecoding) {
> + VTContext *vtctx = avctx->internal->hwaccel_priv_data;
>
> - if (!image_buffer) {
> - av_log(NULL, AV_LOG_DEBUG, "vt decoder cb: output image buffer is null\n");
> - return;
> - }
> + if (vtctx->frame) {
> + CVPixelBufferRelease(vtctx->frame);
> + vtctx->frame = NULL;
> + }
>
> - vtctx->frame = CVPixelBufferRetain(image_buffer);
> + if (!image_buffer) {
> + av_log(NULL, AV_LOG_DEBUG, "vt decoder cb: output image buffer is null\n");
> + return;
> + }
> +
> + vtctx->frame = CVPixelBufferRetain(image_buffer);
> + } else { // async decoding
> + AVVideotoolboxAsyncFrame *new_frame;
> + AVVideotoolboxAsyncFrame *queue_walker;
> +
> + if (!image_buffer) {
> + av_log(NULL, AV_LOG_DEBUG, "vt decoder cb: output image buffer is null\n");
> + return;
> + }
> +
> + new_frame = (AVVideotoolboxAsyncFrame *)av_mallocz(sizeof(AVVideotoolboxAsyncFrame));
> + new_frame->next_frame = NULL;
Unchecked malloc.
> + new_frame->cv_buffer = CVPixelBufferRetain(image_buffer);
> + new_frame->pts = pts.value;
> +
> + videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_OBTAIN);
> +
> + queue_walker = videotoolbox->queue;
> +
> + if (!queue_walker || (new_frame->pts < queue_walker->pts)) {
> + /* we have an empty queue, or this frame earlier than the current queue head */
> + new_frame->next_frame = queue_walker;
> + videotoolbox->queue = new_frame;
> + } else {
> + /* walk the queue and insert this frame where it belongs in display order */
> + AVVideotoolboxAsyncFrame *next_frame;
> +
> + while (1) {
> + next_frame = queue_walker->next_frame;
> +
> + if (!next_frame || (new_frame->pts < next_frame->pts)) {
> + new_frame->next_frame = next_frame;
> + queue_walker->next_frame = new_frame;
> + break;
> + }
As Hendrik Leppkes said, this is fragile.
> + queue_walker = next_frame;
> + }
> + }
> +
> + videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_RELEASE);
> + }
> }
>
> -static OSStatus videotoolbox_session_decode_frame(AVCodecContext *avctx)
> +static OSStatus videotoolbox_session_decode_frame(AVCodecContext *avctx, AVFrame *frame)
> {
> OSStatus status;
> CMSampleBufferRef sample_buf;
> AVVideotoolboxContext *videotoolbox = avctx->hwaccel_context;
> VTContext *vtctx = avctx->internal->hwaccel_priv_data;
> + VTDecodeFrameFlags decodeFlags = videotoolbox->useAsyncDecoding ?
> + kVTDecodeFrame_EnableAsynchronousDecompression : 0;
>
> sample_buf = videotoolbox_sample_buffer_create(videotoolbox->cm_fmt_desc,
> vtctx->bitstream,
> - vtctx->bitstream_size);
> + vtctx->bitstream_size,
> + frame->pkt_pts);
>
> if (!sample_buf)
> return -1;
>
> status = VTDecompressionSessionDecodeFrame(videotoolbox->session,
> sample_buf,
> - 0, // decodeFlags
> + decodeFlags,
> NULL, // sourceFrameRefCon
> 0); // infoFlagsOut
> - if (status == noErr)
> +
> + if (status == noErr && !videotoolbox->useAsyncDecoding)
> status = VTDecompressionSessionWaitForAsynchronousFrames(videotoolbox->session);
>
> CFRelease(sample_buf);
> @@ -344,17 +433,21 @@ static int videotoolbox_common_end_frame(AVCodecContext *avctx, AVFrame *frame)
> if (!videotoolbox->session || !vtctx->bitstream)
> return AVERROR_INVALIDDATA;
>
> - status = videotoolbox_session_decode_frame(avctx);
> + status = videotoolbox_session_decode_frame(avctx, frame);
>
> if (status) {
> av_log(avctx, AV_LOG_ERROR, "Failed to decode frame (%d)\n", status);
> return AVERROR_UNKNOWN;
> }
>
> - if (!vtctx->frame)
> - return AVERROR_UNKNOWN;
> + if (!videotoolbox->useAsyncDecoding) {
> + if (!vtctx->frame)
> + return AVERROR_UNKNOWN;
>
> - return ff_videotoolbox_buffer_create(vtctx, frame);
> + status = ff_videotoolbox_buffer_create(vtctx, frame);
> + }
> +
> + return status;
> }
>
> static int videotoolbox_h264_end_frame(AVCodecContext *avctx)
> @@ -508,6 +601,13 @@ static int videotoolbox_default_init(AVCodecContext *avctx)
> return -1;
> }
>
> + if (videotoolbox->useAsyncDecoding) {
> + if (av_lockmgr_register(videotoolbox_lock_operation))
> + return -1;
> +
> + videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_CREATE);
> + }
> +
> switch( avctx->codec_id ) {
> case AV_CODEC_ID_H263 :
> videotoolbox->cm_codec_type = kCMVideoCodecType_H263;
> @@ -586,6 +686,15 @@ static void videotoolbox_default_free(AVCodecContext *avctx)
> if (videotoolbox->cm_fmt_desc)
> CFRelease(videotoolbox->cm_fmt_desc);
>
> + if (videotoolbox->useAsyncDecoding) {
> + VTDecompressionSessionWaitForAsynchronousFrames(videotoolbox->session);
> +
> + videotoolbox_clear_queue(videotoolbox);
> +
> + if (videotoolbox->queue_mutex != NULL)
> + videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_DESTROY);
> + }
> +
> if (videotoolbox->session)
> VTDecompressionSessionInvalidate(videotoolbox->session);
> }
> @@ -668,6 +777,17 @@ AVVideotoolboxContext *av_videotoolbox_alloc_context(void)
> return ret;
> }
>
> +AVVideotoolboxContext *av_videotoolbox_alloc_async_context(void)
> +{
> + AVVideotoolboxContext *ret = av_videotoolbox_alloc_context();
> +
> + if (ret) {
> + ret->useAsyncDecoding = 1;
> + }
> +
> + return ret;
> +}
> +
> int av_videotoolbox_default_init(AVCodecContext *avctx)
> {
> return av_videotoolbox_default_init2(avctx, NULL);
> @@ -683,8 +803,30 @@ int av_videotoolbox_default_init2(AVCodecContext *avctx, AVVideotoolboxContext *
>
> void av_videotoolbox_default_free(AVCodecContext *avctx)
> {
> -
> videotoolbox_default_free(avctx);
> av_freep(&avctx->hwaccel_context);
> }
> +
> +AVVideotoolboxAsyncFrame *av_videotoolbox_pop_async_frame(AVVideotoolboxContext *videotoolbox)
> +{
> + AVVideotoolboxAsyncFrame *top_frame;
> +
> + if (!videotoolbox->queue)
> + return NULL;
> +
> + videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_OBTAIN);
> + top_frame = videotoolbox->queue;
> + videotoolbox->queue = top_frame->next_frame;
> + videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_RELEASE);
> +
> + return top_frame;
> +}
> +
> +void av_videotoolbox_release_async_frame(AVVideotoolboxAsyncFrame *frame)
> +{
> + if (frame != NULL) {
> + CVPixelBufferRelease(frame->cv_buffer);
> + av_freep(&frame);
> + }
> +}
> #endif /* CONFIG_VIDEOTOOLBOX */
> diff --git a/libavcodec/videotoolbox.h b/libavcodec/videotoolbox.h
> index a48638e..b5bf030 100644
> --- a/libavcodec/videotoolbox.h
> +++ b/libavcodec/videotoolbox.h
> @@ -38,6 +38,29 @@
> #include "libavcodec/avcodec.h"
>
> /**
> + * This structure is used to store a decoded frame information and data
> + * when using the Videotoolbox Async API.
> + */
> +typedef struct AVVideotoolboxAsyncFrame
> +{
> + /**
> + * The PTS of the frame.
> + */
> + int64_t pts;
> +
> + /**
> + * The CoreVideo buffer that contains the decoded data.
> + */
> + CVPixelBufferRef cv_buffer;
> +
> + /**
> + * A pointer to the next frame.
> + */
> + struct AVVideotoolboxAsyncFrame *next_frame;
> +
> +} AVVideotoolboxAsyncFrame;
> +
> +/**
> * This struct holds all the information that needs to be passed
> * between the caller and libavcodec for initializing Videotoolbox decoding.
> * Its size is not a part of the public ABI, it must be allocated with
> @@ -73,6 +96,23 @@ typedef struct AVVideotoolboxContext {
> * Set by the caller.
> */
> int cm_codec_type;
> +
> + /**
> + * Enable the async decoding mode.
> + * Set by av_videotoolbox_alloc_async_context()
> + */
> + int useAsyncDecoding;
> +
> + /**
> + * Videotoolbox async frames queue ordered by presentation timestamp.
> + */
> + AVVideotoolboxAsyncFrame *queue;
> +
> + /**
> + * Mutex for locking queue operations when async decoding is enabled.
> + */
> + void *queue_mutex;
> +
> } AVVideotoolboxContext;
>
> /**
> @@ -91,6 +131,21 @@ typedef struct AVVideotoolboxContext {
> AVVideotoolboxContext *av_videotoolbox_alloc_context(void);
>
> /**
> + * Allocate and initialize an async Videotoolbox context.
> + *
> + * This function should be called from the get_format() callback when the caller
> + * selects the AV_PIX_FMT_VIDETOOLBOX format. The caller must then create
> + * the decoder object (using the output callback provided by libavcodec) that
> + * will be used for Videotoolbox-accelerated decoding.
> + *
> + * When decoding with Videotoolbox is finished, the caller must destroy the decoder
> + * object and free the Videotoolbox context using av_free().
> + *
> + * @return the newly allocated context or NULL on failure
> + */
> +AVVideotoolboxContext *av_videotoolbox_alloc_async_context(void);
> +
> +/**
> * This is a convenience function that creates and sets up the Videotoolbox context using
> * an internal implementation.
> *
> @@ -120,6 +175,24 @@ int av_videotoolbox_default_init2(AVCodecContext *avctx, AVVideotoolboxContext *
> void av_videotoolbox_default_free(AVCodecContext *avctx);
>
> /**
> + * This function must be called to retrieve the top frame of the queue when async decoding
> + * is enabled.
> + *
> + * @param vtctx the corresponding videotoolbox context
> + *
> + * @return the top async frame from the queue.
> + */
> +AVVideotoolboxAsyncFrame *av_videotoolbox_pop_async_frame(AVVideotoolboxContext *vtctx);
> +
> +/**
> + * This function must be called to release the top frame returned by
> + * av_videotoolbox_pop_async_frame().
> + *
> + * @param frame the frame to release
> + */
> +void av_videotoolbox_release_async_frame(AVVideotoolboxAsyncFrame *frame);
> +
> +/**
> * @}
> */
>
So I'm not sure if I understand this. Is the API user supposed to use
these functions to get decoded frames, instead of using the AVFrame?
More information about the ffmpeg-devel
mailing list