[FFmpeg-cvslog] hwcontext: add a CUDA implementation
Anton Khirnov
git at videolan.org
Wed Feb 24 16:23:50 CET 2016
ffmpeg | branch: master | Anton Khirnov <anton at khirnov.net> | Wed Jan 13 14:25:58 2016 +0100| [ad884d100259e55cb51a4239cd8a4fd5154c2073] | committer: Anton Khirnov
hwcontext: add a CUDA implementation
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ad884d100259e55cb51a4239cd8a4fd5154c2073
---
doc/APIchanges | 2 +
libavutil/Makefile | 2 +
libavutil/hwcontext.c | 3 +
libavutil/hwcontext.h | 1 +
libavutil/hwcontext_cuda.c | 270 ++++++++++++++++++++++++++++++++++++++++
libavutil/hwcontext_cuda.h | 46 +++++++
libavutil/hwcontext_internal.h | 1 +
7 files changed, 325 insertions(+)
diff --git a/doc/APIchanges b/doc/APIchanges
index d815d9f..d42868e 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -17,6 +17,8 @@ API changes, most recent first:
xxxxxxx buffer.h - Add av_buffer_pool_init2().
xxxxxxx hwcontext.h - Add a new installed header hwcontext.h with a new API
for handling hwaccel frames.
+ xxxxxxx hwcontext_cuda.h - Add a new installed header hwcontext_cuda.h with
+ CUDA-specific hwcontext definitions.
xxxxxxx hwcontext_vdpau.h - Add a new installed header hwcontext_vdpau.h with
VDPAU-specific hwcontext definitions.
xxxxxxx pixfmt.h - Add AV_PIX_FMT_CUDA.
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 180c37e..bc85925 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -24,6 +24,7 @@ HEADERS = adler32.h \
frame.h \
hmac.h \
hwcontext.h \
+ hwcontext_cuda.h \
hwcontext_vdpau.h \
imgutils.h \
intfloat.h \
@@ -106,6 +107,7 @@ OBJS = adler32.o \
xtea.o \
OBJS-$(CONFIG_LZO) += lzo.o
+OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o
OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o
OBJS += $(COMPAT_OBJS:%=../compat/%)
diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index 2aa712e..b6d0518 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c
@@ -29,6 +29,9 @@
#include "pixfmt.h"
static const HWContextType *hw_table[] = {
+#if CONFIG_CUDA
+ &ff_hwcontext_type_cuda,
+#endif
#if CONFIG_VDPAU
&ff_hwcontext_type_vdpau,
#endif
diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
index b30a20a..81ae817 100644
--- a/libavutil/hwcontext.h
+++ b/libavutil/hwcontext.h
@@ -26,6 +26,7 @@
enum AVHWDeviceType {
AV_HWDEVICE_TYPE_VDPAU,
+ AV_HWDEVICE_TYPE_CUDA,
};
typedef struct AVHWDeviceInternal AVHWDeviceInternal;
diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
new file mode 100644
index 0000000..6b87b61
--- /dev/null
+++ b/libavutil/hwcontext_cuda.c
@@ -0,0 +1,270 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "buffer.h"
+#include "common.h"
+#include "hwcontext.h"
+#include "hwcontext_internal.h"
+#include "hwcontext_cuda.h"
+#include "mem.h"
+#include "pixdesc.h"
+#include "pixfmt.h"
+
+typedef struct CUDAFramesContext {
+ int shift_width, shift_height;
+} CUDAFramesContext;
+
+static const enum AVPixelFormat supported_formats[] = {
+ AV_PIX_FMT_NV12,
+ AV_PIX_FMT_YUV420P,
+ AV_PIX_FMT_YUV444P,
+};
+
+static void cuda_buffer_free(void *opaque, uint8_t *data)
+{
+ AVHWFramesContext *ctx = opaque;
+ AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
+
+ CUcontext dummy;
+
+ cuCtxPushCurrent(hwctx->cuda_ctx);
+
+ cuMemFree((CUdeviceptr)data);
+
+ cuCtxPopCurrent(&dummy);
+}
+
+static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
+{
+ AVHWFramesContext *ctx = opaque;
+ AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
+
+ AVBufferRef *ret = NULL;
+ CUcontext dummy = NULL;
+ CUdeviceptr data;
+ CUresult err;
+
+ err = cuCtxPushCurrent(hwctx->cuda_ctx);
+ if (err != CUDA_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
+ return NULL;
+ }
+
+ err = cuMemAlloc(&data, size);
+ if (err != CUDA_SUCCESS)
+ goto fail;
+
+ ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
+ if (!ret) {
+ cuMemFree(data);
+ goto fail;
+ }
+
+fail:
+ cuCtxPopCurrent(&dummy);
+ return ret;
+}
+
+static int cuda_frames_init(AVHWFramesContext *ctx)
+{
+ CUDAFramesContext *priv = ctx->internal->priv;
+ int i;
+
+ for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
+ if (ctx->sw_format == supported_formats[i])
+ break;
+ }
+ if (i == FF_ARRAY_ELEMS(supported_formats)) {
+ av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
+ av_get_pix_fmt_name(ctx->sw_format));
+ return AVERROR(ENOSYS);
+ }
+
+ av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
+
+ if (!ctx->pool) {
+ int size;
+
+ switch (ctx->sw_format) {
+ case AV_PIX_FMT_NV12:
+ case AV_PIX_FMT_YUV420P:
+ size = ctx->width * ctx->height * 3 / 2;
+ break;
+ case AV_PIX_FMT_YUV444P:
+ size = ctx->width * ctx->height * 3;
+ break;
+ }
+
+ ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
+ if (!ctx->internal->pool_internal)
+ return AVERROR(ENOMEM);
+ }
+
+ return 0;
+}
+
+static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
+{
+ frame->buf[0] = av_buffer_pool_get(ctx->pool);
+ if (!frame->buf[0])
+ return AVERROR(ENOMEM);
+
+ switch (ctx->sw_format) {
+ case AV_PIX_FMT_NV12:
+ frame->data[0] = frame->buf[0]->data;
+ frame->data[1] = frame->data[0] + ctx->width * ctx->height;
+ frame->linesize[0] = ctx->width;
+ frame->linesize[1] = ctx->width;
+ break;
+ case AV_PIX_FMT_YUV420P:
+ frame->data[0] = frame->buf[0]->data;
+ frame->data[2] = frame->data[0] + ctx->width * ctx->height;
+ frame->data[1] = frame->data[2] + ctx->width * ctx->height / 4;
+ frame->linesize[0] = ctx->width;
+ frame->linesize[1] = ctx->width / 2;
+ frame->linesize[2] = ctx->width / 2;
+ break;
+ case AV_PIX_FMT_YUV444P:
+ frame->data[0] = frame->buf[0]->data;
+ frame->data[1] = frame->data[0] + ctx->width * ctx->height;
+ frame->data[2] = frame->data[1] + ctx->width * ctx->height;
+ frame->linesize[0] = ctx->width;
+ frame->linesize[1] = ctx->width;
+ frame->linesize[2] = ctx->width;
+ break;
+ default:
+ av_frame_unref(frame);
+ return AVERROR_BUG;
+ }
+
+ frame->format = AV_PIX_FMT_CUDA;
+ frame->width = ctx->width;
+ frame->height = ctx->height;
+
+ return 0;
+}
+
+static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
+ enum AVHWFrameTransferDirection dir,
+ enum AVPixelFormat **formats)
+{
+ enum AVPixelFormat *fmts;
+
+ fmts = av_malloc_array(2, sizeof(*fmts));
+ if (!fmts)
+ return AVERROR(ENOMEM);
+
+ fmts[0] = ctx->sw_format;
+ fmts[1] = AV_PIX_FMT_NONE;
+
+ *formats = fmts;
+
+ return 0;
+}
+
+static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
+ const AVFrame *src)
+{
+ CUDAFramesContext *priv = ctx->internal->priv;
+ AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
+
+ CUcontext dummy;
+ CUresult err;
+ int i;
+
+ err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
+ if (err != CUDA_SUCCESS)
+ return AVERROR_UNKNOWN;
+
+ for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
+ CUDA_MEMCPY2D cpy = {
+ .srcMemoryType = CU_MEMORYTYPE_DEVICE,
+ .dstMemoryType = CU_MEMORYTYPE_HOST,
+ .srcDevice = (CUdeviceptr)src->data[i],
+ .dstHost = dst->data[i],
+ .srcPitch = src->linesize[i],
+ .dstPitch = dst->linesize[i],
+ .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
+ .Height = src->height >> (i ? priv->shift_height : 0),
+ };
+
+ err = cuMemcpy2D(&cpy);
+ if (err != CUDA_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
+ return AVERROR_UNKNOWN;
+ }
+ }
+
+ cuCtxPopCurrent(&dummy);
+
+ return 0;
+}
+
+static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
+ const AVFrame *src)
+{
+ CUDAFramesContext *priv = ctx->internal->priv;
+ AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
+
+ CUcontext dummy;
+ CUresult err;
+ int i;
+
+ err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
+ if (err != CUDA_SUCCESS)
+ return AVERROR_UNKNOWN;
+
+ for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
+ CUDA_MEMCPY2D cpy = {
+ .srcMemoryType = CU_MEMORYTYPE_HOST,
+ .dstMemoryType = CU_MEMORYTYPE_DEVICE,
+ .srcHost = src->data[i],
+ .dstDevice = (CUdeviceptr)dst->data[i],
+ .srcPitch = src->linesize[i],
+ .dstPitch = dst->linesize[i],
+ .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
+ .Height = src->height >> (i ? priv->shift_height : 0),
+ };
+
+ err = cuMemcpy2D(&cpy);
+ if (err != CUDA_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
+ return AVERROR_UNKNOWN;
+ }
+ }
+
+ cuCtxPopCurrent(&dummy);
+
+ return 0;
+}
+
+const HWContextType ff_hwcontext_type_cuda = {
+ .type = AV_HWDEVICE_TYPE_CUDA,
+ .name = "CUDA",
+
+ .device_hwctx_size = sizeof(AVCUDADeviceContext),
+ .frames_priv_size = sizeof(CUDAFramesContext),
+
+ .frames_init = cuda_frames_init,
+ .frames_get_buffer = cuda_get_buffer,
+ .transfer_get_formats = cuda_transfer_get_formats,
+ .transfer_data_to = cuda_transfer_data_to,
+ .transfer_data_from = cuda_transfer_data_from,
+
+ .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
+};
diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h
new file mode 100644
index 0000000..7f067c7
--- /dev/null
+++ b/libavutil/hwcontext_cuda.h
@@ -0,0 +1,46 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#ifndef AVUTIL_HWCONTEXT_CUDA_H
+#define AVUTIL_HWCONTEXT_CUDA_H
+
+#include <cuda.h>
+
+#include "pixfmt.h"
+
+/**
+ * @file
+ * An API-specific header for AV_HWDEVICE_TYPE_CUDA.
+ *
+ * This API supports dynamic frame pools. AVHWFramesContext.pool must return
+ * AVBufferRefs whose data pointer is a CUdeviceptr.
+ */
+
+/**
+ * This struct is allocated as AVHWDeviceContext.hwctx
+ */
+typedef struct AVCUDADeviceContext {
+ CUcontext cuda_ctx;
+} AVCUDADeviceContext;
+
+/**
+ * AVHWFramesContext.hwctx is currently not used
+ */
+
+#endif /* AVUTIL_HWCONTEXT_CUDA_H */
diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h
index 54f8d10..641232f 100644
--- a/libavutil/hwcontext_internal.h
+++ b/libavutil/hwcontext_internal.h
@@ -86,6 +86,7 @@ struct AVHWFramesInternal {
AVBufferPool *pool_internal;
};
+extern const HWContextType ff_hwcontext_type_cuda;
extern const HWContextType ff_hwcontext_type_vdpau;
#endif /* AVUTIL_HWCONTEXT_INTERNAL_H */
More information about the ffmpeg-cvslog
mailing list