[FFmpeg-cvslog] hwcontext: add a CUDA implementation

Anton Khirnov git at videolan.org
Wed Feb 24 16:23:50 CET 2016


ffmpeg | branch: master | Anton Khirnov <anton at khirnov.net> | Wed Jan 13 14:25:58 2016 +0100| [ad884d100259e55cb51a4239cd8a4fd5154c2073] | committer: Anton Khirnov

hwcontext: add a CUDA implementation

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ad884d100259e55cb51a4239cd8a4fd5154c2073
---

 doc/APIchanges                 |    2 +
 libavutil/Makefile             |    2 +
 libavutil/hwcontext.c          |    3 +
 libavutil/hwcontext.h          |    1 +
 libavutil/hwcontext_cuda.c     |  270 ++++++++++++++++++++++++++++++++++++++++
 libavutil/hwcontext_cuda.h     |   46 +++++++
 libavutil/hwcontext_internal.h |    1 +
 7 files changed, 325 insertions(+)

diff --git a/doc/APIchanges b/doc/APIchanges
index d815d9f..d42868e 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -17,6 +17,8 @@ API changes, most recent first:
   xxxxxxx buffer.h - Add av_buffer_pool_init2().
   xxxxxxx hwcontext.h - Add a new installed header hwcontext.h with a new API
                         for handling hwaccel frames.
+  xxxxxxx hwcontext_cuda.h - Add a new installed header hwcontext_cuda.h with
+                             CUDA-specific hwcontext definitions.
   xxxxxxx hwcontext_vdpau.h - Add a new installed header hwcontext_vdpau.h with
                               VDPAU-specific hwcontext definitions.
   xxxxxxx pixfmt.h - Add AV_PIX_FMT_CUDA.
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 180c37e..bc85925 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -24,6 +24,7 @@ HEADERS = adler32.h                                                     \
           frame.h                                                       \
           hmac.h                                                        \
           hwcontext.h                                                   \
+          hwcontext_cuda.h                                              \
           hwcontext_vdpau.h                                             \
           imgutils.h                                                    \
           intfloat.h                                                    \
@@ -106,6 +107,7 @@ OBJS = adler32.o                                                        \
        xtea.o                                                           \
 
 OBJS-$(CONFIG_LZO)                      += lzo.o
+OBJS-$(CONFIG_CUDA)                     += hwcontext_cuda.o
 OBJS-$(CONFIG_VDPAU)                    += hwcontext_vdpau.o
 
 OBJS += $(COMPAT_OBJS:%=../compat/%)
diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index 2aa712e..b6d0518 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c
@@ -29,6 +29,9 @@
 #include "pixfmt.h"
 
 static const HWContextType *hw_table[] = {
+#if CONFIG_CUDA
+    &ff_hwcontext_type_cuda,
+#endif
 #if CONFIG_VDPAU
     &ff_hwcontext_type_vdpau,
 #endif
diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
index b30a20a..81ae817 100644
--- a/libavutil/hwcontext.h
+++ b/libavutil/hwcontext.h
@@ -26,6 +26,7 @@
 
 enum AVHWDeviceType {
     AV_HWDEVICE_TYPE_VDPAU,
+    AV_HWDEVICE_TYPE_CUDA,
 };
 
 typedef struct AVHWDeviceInternal AVHWDeviceInternal;
diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
new file mode 100644
index 0000000..6b87b61
--- /dev/null
+++ b/libavutil/hwcontext_cuda.c
@@ -0,0 +1,270 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "buffer.h"
+#include "common.h"
+#include "hwcontext.h"
+#include "hwcontext_internal.h"
+#include "hwcontext_cuda.h"
+#include "mem.h"
+#include "pixdesc.h"
+#include "pixfmt.h"
+
+typedef struct CUDAFramesContext {
+    int shift_width, shift_height;
+} CUDAFramesContext;
+
+static const enum AVPixelFormat supported_formats[] = {
+    AV_PIX_FMT_NV12,
+    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_YUV444P,
+};
+
+static void cuda_buffer_free(void *opaque, uint8_t *data)
+{
+    AVHWFramesContext *ctx = opaque;
+    AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
+
+    CUcontext dummy;
+
+    cuCtxPushCurrent(hwctx->cuda_ctx);
+
+    cuMemFree((CUdeviceptr)data);
+
+    cuCtxPopCurrent(&dummy);
+}
+
+static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
+{
+    AVHWFramesContext     *ctx = opaque;
+    AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
+
+    AVBufferRef *ret = NULL;
+    CUcontext dummy = NULL;
+    CUdeviceptr data;
+    CUresult err;
+
+    err = cuCtxPushCurrent(hwctx->cuda_ctx);
+    if (err != CUDA_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
+        return NULL;
+    }
+
+    err = cuMemAlloc(&data, size);
+    if (err != CUDA_SUCCESS)
+        goto fail;
+
+    ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
+    if (!ret) {
+        cuMemFree(data);
+        goto fail;
+    }
+
+fail:
+    cuCtxPopCurrent(&dummy);
+    return ret;
+}
+
+static int cuda_frames_init(AVHWFramesContext *ctx)
+{
+    CUDAFramesContext *priv = ctx->internal->priv;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
+        if (ctx->sw_format == supported_formats[i])
+            break;
+    }
+    if (i == FF_ARRAY_ELEMS(supported_formats)) {
+        av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
+               av_get_pix_fmt_name(ctx->sw_format));
+        return AVERROR(ENOSYS);
+    }
+
+    av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
+
+    if (!ctx->pool) {
+        int size;
+
+        switch (ctx->sw_format) {
+        case AV_PIX_FMT_NV12:
+        case AV_PIX_FMT_YUV420P:
+            size = ctx->width * ctx->height * 3 / 2;
+            break;
+        case AV_PIX_FMT_YUV444P:
+            size = ctx->width * ctx->height * 3;
+            break;
+        }
+
+        ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
+        if (!ctx->internal->pool_internal)
+            return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
+{
+    frame->buf[0] = av_buffer_pool_get(ctx->pool);
+    if (!frame->buf[0])
+        return AVERROR(ENOMEM);
+
+    switch (ctx->sw_format) {
+    case AV_PIX_FMT_NV12:
+        frame->data[0]     = frame->buf[0]->data;
+        frame->data[1]     = frame->data[0] + ctx->width * ctx->height;
+        frame->linesize[0] = ctx->width;
+        frame->linesize[1] = ctx->width;
+        break;
+    case AV_PIX_FMT_YUV420P:
+        frame->data[0]     = frame->buf[0]->data;
+        frame->data[2]     = frame->data[0] + ctx->width * ctx->height;
+        frame->data[1]     = frame->data[2] + ctx->width * ctx->height / 4;
+        frame->linesize[0] = ctx->width;
+        frame->linesize[1] = ctx->width / 2;
+        frame->linesize[2] = ctx->width / 2;
+        break;
+    case AV_PIX_FMT_YUV444P:
+        frame->data[0]     = frame->buf[0]->data;
+        frame->data[1]     = frame->data[0] + ctx->width * ctx->height;
+        frame->data[2]     = frame->data[1] + ctx->width * ctx->height;
+        frame->linesize[0] = ctx->width;
+        frame->linesize[1] = ctx->width;
+        frame->linesize[2] = ctx->width;
+        break;
+    default:
+        av_frame_unref(frame);
+        return AVERROR_BUG;
+    }
+
+    frame->format = AV_PIX_FMT_CUDA;
+    frame->width  = ctx->width;
+    frame->height = ctx->height;
+
+    return 0;
+}
+
+static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
+                                     enum AVHWFrameTransferDirection dir,
+                                     enum AVPixelFormat **formats)
+{
+    enum AVPixelFormat *fmts;
+
+    fmts = av_malloc_array(2, sizeof(*fmts));
+    if (!fmts)
+        return AVERROR(ENOMEM);
+
+    fmts[0] = ctx->sw_format;
+    fmts[1] = AV_PIX_FMT_NONE;
+
+    *formats = fmts;
+
+    return 0;
+}
+
+static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
+                                   const AVFrame *src)
+{
+    CUDAFramesContext           *priv = ctx->internal->priv;
+    AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
+
+    CUcontext dummy;
+    CUresult err;
+    int i;
+
+    err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
+    if (err != CUDA_SUCCESS)
+        return AVERROR_UNKNOWN;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
+        CUDA_MEMCPY2D cpy = {
+            .srcMemoryType = CU_MEMORYTYPE_DEVICE,
+            .dstMemoryType = CU_MEMORYTYPE_HOST,
+            .srcDevice     = (CUdeviceptr)src->data[i],
+            .dstHost       = dst->data[i],
+            .srcPitch      = src->linesize[i],
+            .dstPitch      = dst->linesize[i],
+            .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]),
+            .Height        = src->height >> (i ? priv->shift_height : 0),
+        };
+
+        err = cuMemcpy2D(&cpy);
+        if (err != CUDA_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
+            return AVERROR_UNKNOWN;
+        }
+    }
+
+    cuCtxPopCurrent(&dummy);
+
+    return 0;
+}
+
+static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
+                                 const AVFrame *src)
+{
+    CUDAFramesContext           *priv = ctx->internal->priv;
+    AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
+
+    CUcontext dummy;
+    CUresult err;
+    int i;
+
+    err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
+    if (err != CUDA_SUCCESS)
+        return AVERROR_UNKNOWN;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
+        CUDA_MEMCPY2D cpy = {
+            .srcMemoryType = CU_MEMORYTYPE_HOST,
+            .dstMemoryType = CU_MEMORYTYPE_DEVICE,
+            .srcHost       = src->data[i],
+            .dstDevice     = (CUdeviceptr)dst->data[i],
+            .srcPitch      = src->linesize[i],
+            .dstPitch      = dst->linesize[i],
+            .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]),
+            .Height        = src->height >> (i ? priv->shift_height : 0),
+        };
+
+        err = cuMemcpy2D(&cpy);
+        if (err != CUDA_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
+            return AVERROR_UNKNOWN;
+        }
+    }
+
+    cuCtxPopCurrent(&dummy);
+
+    return 0;
+}
+
+const HWContextType ff_hwcontext_type_cuda = {
+    .type                 = AV_HWDEVICE_TYPE_CUDA,
+    .name                 = "CUDA",
+
+    .device_hwctx_size    = sizeof(AVCUDADeviceContext),
+    .frames_priv_size     = sizeof(CUDAFramesContext),
+
+    .frames_init          = cuda_frames_init,
+    .frames_get_buffer    = cuda_get_buffer,
+    .transfer_get_formats = cuda_transfer_get_formats,
+    .transfer_data_to     = cuda_transfer_data_to,
+    .transfer_data_from   = cuda_transfer_data_from,
+
+    .pix_fmts             = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
+};
diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h
new file mode 100644
index 0000000..7f067c7
--- /dev/null
+++ b/libavutil/hwcontext_cuda.h
@@ -0,0 +1,46 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#ifndef AVUTIL_HWCONTEXT_CUDA_H
+#define AVUTIL_HWCONTEXT_CUDA_H
+
+#include <cuda.h>
+
+#include "pixfmt.h"
+
+/**
+ * @file
+ * An API-specific header for AV_HWDEVICE_TYPE_CUDA.
+ *
+ * This API supports dynamic frame pools. AVHWFramesContext.pool must return
+ * AVBufferRefs whose data pointer is a CUdeviceptr.
+ */
+
+/**
+ * This struct is allocated as AVHWDeviceContext.hwctx
+ */
+typedef struct AVCUDADeviceContext {
+    CUcontext cuda_ctx;
+} AVCUDADeviceContext;
+
+/**
+ * AVHWFramesContext.hwctx is currently not used
+ */
+
+#endif /* AVUTIL_HWCONTEXT_CUDA_H */
diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h
index 54f8d10..641232f 100644
--- a/libavutil/hwcontext_internal.h
+++ b/libavutil/hwcontext_internal.h
@@ -86,6 +86,7 @@ struct AVHWFramesInternal {
     AVBufferPool *pool_internal;
 };
 
+extern const HWContextType ff_hwcontext_type_cuda;
 extern const HWContextType ff_hwcontext_type_vdpau;
 
 #endif /* AVUTIL_HWCONTEXT_INTERNAL_H */



More information about the ffmpeg-cvslog mailing list