[FFmpeg-devel] [PATCH] Vulkan hwcontext and filters
Mark Thompson
sw at jkqxz.net
Sat Jan 18 22:23:00 EET 2020
On 10/01/2020 21:05, Lynne wrote:
> From d5f1bbc61fab452803443511b1241931169359b7 Mon Sep 17 00:00:00 2001
> From: Lynne <dev at lynne.ee>
> Date: Wed, 28 Aug 2019 21:58:10 +0100
> Subject: [PATCH 2/9] lavu: add Vulkan hwcontext code
>
> This commit adds the necessary code to initialize and use a Vulkan device
> within the hwcontext libavutil framework.
> Currently direct mapping to VAAPI and DRM frames is functional, and
> transfers to CUDA and native frames are supported.
>
> Lets hope the future Vulkan video decode extension fits well within this
> framework.
> ---
Yay!
> configure | 17 +-
> doc/APIchanges | 4 +
> libavutil/Makefile | 3 +
> libavutil/hwcontext.c | 4 +
> libavutil/hwcontext.h | 1 +
> libavutil/hwcontext_cuda.c | 121 ++
> libavutil/hwcontext_internal.h | 1 +
> libavutil/hwcontext_vulkan.c | 2804 ++++++++++++++++++++++++++++++++
> libavutil/hwcontext_vulkan.h | 152 ++
> libavutil/pixdesc.c | 4 +
> libavutil/pixfmt.h | 7 +
> 11 files changed, 3112 insertions(+), 6 deletions(-)
> create mode 100644 libavutil/hwcontext_vulkan.c
> create mode 100644 libavutil/hwcontext_vulkan.h
The CUDA parts look like they could be split off into a separate commit? (It's already huge.)
>
> diff --git a/configure b/configure
> index 46f2038627..3113ebfdd8 100755
> --- a/configure
> +++ b/configure
> @@ -309,6 +309,7 @@ External library support:
> --enable-openssl enable openssl, needed for https support
> if gnutls, libtls or mbedtls is not used [no]
> --enable-pocketsphinx enable PocketSphinx, needed for asr filter [no]
> + --enable-vulkan enable Vulkan code [no]
Alphabetical order.
> --disable-sndio disable sndio support [autodetect]
> --disable-schannel disable SChannel SSP, needed for TLS support on
> Windows if openssl and gnutls are not used [autodetect]
> @@ -1549,11 +1550,11 @@ require_cc(){
> }
>
> require_cpp(){
> - name="$1"
> - headers="$2"
> - classes="$3"
> - shift 3
> - check_lib_cpp "$headers" "$classes" "$@" || die "ERROR: $name not found"
> + log require_cpp "$@"
> + name_version="$1"
> + name="${1%% *}"
> + shift
> + check_lib_cpp "$name" "$@" || die "ERROR: $name_version not found"
> }
This change looks unrelated. (require_cpp isn't used in this patch at all.)
>
> require_headers(){
> @@ -1854,6 +1855,7 @@ HWACCEL_LIBRARY_LIST="
> mmal
> omx
> opencl
> + vulkan
> "
>
> DOCUMENT_LIST="
> @@ -3639,7 +3641,7 @@ avformat_deps="avcodec avutil"
> avformat_suggest="libm network zlib"
> avresample_deps="avutil"
> avresample_suggest="libm"
> -avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt"
> +avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi vulkan videotoolbox corefoundation corevideo coremedia bcrypt"
> postproc_deps="avutil gpl"
> postproc_suggest="libm"
> swresample_deps="avutil"
> @@ -6626,6 +6628,9 @@ enabled vdpau &&
>
> enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.h" DtsCrystalHDVersion -lcrystalhd
>
> +enabled vulkan &&
> + require_pkg_config vulkan "vulkan >= 1.1.97" "vulkan/vulkan.h" vkCreateInstance
Presumably you have some specific requirement in mind which wants this version - can you note it somewhere? (Either here or in the commit message.)
> +
> if enabled x86; then
> case $target_os in
> mingw32*|mingw64*|win32|win64|linux|cygwin*)
> ...
> diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
> index f5a4b62387..f874af9f8f 100644
> --- a/libavutil/hwcontext.h
> +++ b/libavutil/hwcontext.h
> @@ -36,6 +36,7 @@ enum AVHWDeviceType {
> AV_HWDEVICE_TYPE_DRM,
> AV_HWDEVICE_TYPE_OPENCL,
> AV_HWDEVICE_TYPE_MEDIACODEC,
> + AV_HWDEVICE_TYPE_VULKAN,
> };
>
> typedef struct AVHWDeviceInternal AVHWDeviceInternal;
> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> index 30611b1912..18abb87bbd 100644
> --- a/libavutil/hwcontext_cuda.c
> +++ b/libavutil/hwcontext_cuda.c
> @@ -21,6 +21,9 @@
> #include "hwcontext.h"
> #include "hwcontext_internal.h"
> #include "hwcontext_cuda_internal.h"
> +#if CONFIG_VULKAN
> +#include "hwcontext_vulkan.h"
> +#endif
> #include "cuda_check.h"
> #include "mem.h"
> #include "pixdesc.h"
> @@ -42,6 +45,9 @@ static const enum AVPixelFormat supported_formats[] = {
> AV_PIX_FMT_YUV444P16,
> AV_PIX_FMT_0RGB32,
> AV_PIX_FMT_0BGR32,
> +#if CONFIG_VULKAN
> + AV_PIX_FMT_VULKAN,
> +#endif
Do all devices we can do CUDA on also support Vulkan? If not, this should probably filter it out in get_constraints() to avoid exposing something which can't possibly work.
> };
>
> #define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x)
> @@ -205,6 +211,10 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
> CUcontext dummy;
> int i, ret;
>
> + /* We don't support transfers to HW devices. */
> + if (dst->hw_frames_ctx)
> + return AVERROR(ENOSYS);
> +
> ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
> if (ret < 0)
> return ret;
> @@ -247,6 +257,10 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
> CUcontext dummy;
> int i, ret;
>
> + /* We don't support transfers from HW devices. */
> + if (src->hw_frames_ctx)
> + return AVERROR(ENOSYS);
> +
> ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
> if (ret < 0)
> return ret;
> @@ -389,6 +403,112 @@ error:
> return AVERROR_UNKNOWN;
> }
>
> +static int cuda_device_derive(AVHWDeviceContext *device_ctx,
> + AVHWDeviceContext *src_ctx,
> + int flags) {
> + AVCUDADeviceContext *hwctx = device_ctx->hwctx;
> + CudaFunctions *cu;
> + const char *src_uuid = NULL;
> + CUcontext dummy;
> + int ret, i, device_count, dev_active = 0;
> + unsigned int dev_flags = 0;
> +
> + const unsigned int desired_flags = CU_CTX_SCHED_BLOCKING_SYNC;
> +
> + switch (src_ctx->type) {
> +#if CONFIG_VULKAN
> + case AV_HWDEVICE_TYPE_VULKAN: {
> + AVVulkanDeviceContext *vkctx = src_ctx->hwctx;
> + src_uuid = vkctx->device_uuid;
> + break;
> + }
> +#endif
> + default:
> + return AVERROR(ENOSYS);
> + }
> +
> + if (!src_uuid) {
> + av_log(device_ctx, AV_LOG_ERROR,
> + "Failed to get UUID of source device.\n");
> + goto error;
> + }
> +
> + if (cuda_device_init(device_ctx) < 0)
> + goto error;
> +
> + cu = hwctx->internal->cuda_dl;
> +
> + ret = CHECK_CU(cu->cuInit(0));
> + if (ret < 0)
> + goto error;
> +
> + ret = CHECK_CU(cu->cuDeviceGetCount(&device_count));
> + if (ret < 0)
> + goto error;
> +
> + hwctx->internal->cuda_device = -1;
> + for (i = 0; i < device_count; i++) {
> + CUdevice dev;
> + CUuuid uuid;
> +
> + ret = CHECK_CU(cu->cuDeviceGet(&dev, i));
> + if (ret < 0)
> + goto error;
> +
> + ret = CHECK_CU(cu->cuDeviceGetUuid(&uuid, dev));
> + if (ret < 0)
> + goto error;
> +
> + if (memcmp(src_uuid, uuid.bytes, sizeof (uuid.bytes)) == 0) {
> + hwctx->internal->cuda_device = dev;
> + break;
> + }
> + }
> +
> + if (hwctx->internal->cuda_device == -1) {
> + av_log(device_ctx, AV_LOG_ERROR, "Could not derive CUDA device.\n");
This error is maybe more like "Can't find the matching CUDA device to the supplied Vulkan device".
> + goto error;
> + }
> +
> + hwctx->internal->flags = flags;
> +
> + if (flags & AV_CUDA_USE_PRIMARY_CONTEXT) {
> + ret = CHECK_CU(cu->cuDevicePrimaryCtxGetState(hwctx->internal->cuda_device, &dev_flags, &dev_active));
> + if (ret < 0)
> + goto error;
> +
> + if (dev_active && dev_flags != desired_flags) {
> + av_log(device_ctx, AV_LOG_ERROR, "Primary context already active with incompatible flags.\n");
> + goto error;
> + } else if (dev_flags != desired_flags) {
> + ret = CHECK_CU(cu->cuDevicePrimaryCtxSetFlags(hwctx->internal->cuda_device, desired_flags));
> + if (ret < 0)
> + goto error;
> + }
> +
> + ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->internal->cuda_device));
> + if (ret < 0)
> + goto error;
> + } else {
> + ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, desired_flags, hwctx->internal->cuda_device));
> + if (ret < 0)
> + goto error;
> +
> + CHECK_CU(cu->cuCtxPopCurrent(&dummy));
> + }
> +
> + hwctx->internal->is_allocated = 1;
> +
> + // Setting stream to NULL will make functions automatically use the default CUstream
> + hwctx->stream = NULL;
> +
> + return 0;
> +
> +error:
> + cuda_device_uninit(device_ctx);
> + return AVERROR_UNKNOWN;
> +}
> +
> const HWContextType ff_hwcontext_type_cuda = {
> .type = AV_HWDEVICE_TYPE_CUDA,
> .name = "CUDA",
> @@ -397,6 +517,7 @@ const HWContextType ff_hwcontext_type_cuda = {
> .frames_priv_size = sizeof(CUDAFramesContext),
>
> .device_create = cuda_device_create,
> + .device_derive = cuda_device_derive,
> .device_init = cuda_device_init,
> .device_uninit = cuda_device_uninit,
> .frames_get_constraints = cuda_frames_get_constraints,
> ...
> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
> new file mode 100644
> index 0000000000..d4eb8ffd35
> --- /dev/null
> +++ b/libavutil/hwcontext_vulkan.c
> @@ -0,0 +1,2804 @@
> ...
> +
> +static const struct {
> + enum AVPixelFormat pixfmt;
> + const VkFormat vkfmts[3];
> +} vk_pixfmt_map[] = {
> + { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } },
> + { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } },
> + { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
> +
> + { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
> + { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
Is P010 still safe when the low bits might have any value?
> + { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
> +
> + { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
> + { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
> + { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
> +
> + { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
> + { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
> + { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
> +
> + { AV_PIX_FMT_ABGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
> + { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } },
> + { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } },
> + { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } },
> + { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } },
> + { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } },
> + { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
> + { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
> + { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
> + { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } },
> + { AV_PIX_FMT_0BGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
> + { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } },
> +
> + { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
> +};
> +
> ...
> +static int check_extensions(AVHWDeviceContext *ctx, int dev,
> + const char * const **dst, uint32_t *num, int debug)
> +{
> + const char *tstr;
> + const char **extension_names = NULL;
> + VulkanDevicePriv *p = ctx->internal->priv;
> + AVVulkanDeviceContext *hwctx = ctx->hwctx;
> + int err = 0, found, extensions_found = 0;
> +
> + const char *mod;
> + int optional_exts_num;
> + uint32_t sup_ext_count;
> + VkExtensionProperties *sup_ext;
> + const VulkanOptExtension *optional_exts;
> +
> + if (!dev) {
> + mod = "instance";
> + optional_exts = optional_instance_exts;
> + optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
> + vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
> + sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
> + if (!sup_ext)
> + return AVERROR(ENOMEM);
> + vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
> + } else {
> + mod = "device";
> + optional_exts = optional_device_exts;
> + optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
> + vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
> + &sup_ext_count, NULL);
> + sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
> + if (!sup_ext)
> + return AVERROR(ENOMEM);
> + vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
> + &sup_ext_count, sup_ext);
> + }
> +
> + for (int i = 0; i < optional_exts_num; i++) {
> + int req = optional_exts[i].flag & EXT_REQUIRED;
> + tstr = optional_exts[i].name;
> +
> + found = 0;
> + for (int j = 0; j < sup_ext_count; j++) {
> + if (!strcmp(tstr, sup_ext[j].extensionName)) {
> + found = 1;
> + break;
> + }
> + }
> + if (!found) {
> + int lvl = req ? AV_LOG_ERROR : AV_LOG_VERBOSE;
> + av_log(ctx, lvl, "Extension \"%s\" not found!\n", tstr);
> + if (req) {
> + err = AVERROR(EINVAL);
> + goto end;
> + }
> + continue;
> + }
> + if (!req)
> + p->extensions |= optional_exts[i].flag;
> +
> + av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
> +
> + ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
> + }
> +
> + if (debug && !dev) {
> + tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
> + found = 0;
> + for (int j = 0; j < sup_ext_count; j++) {
> + if (!strcmp(tstr, sup_ext[j].extensionName)) {
> + found = 1;
> + break;
> + }
> + }
> + if (found) {
> + ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
> + } else {
> + av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
> + tstr);
> + err = AVERROR(EINVAL);
> + goto end;
> + }
> + }
> +
> + *dst = extension_names;
> + *num = extensions_found;
> +
> +end:
> + av_free(sup_ext);
I think this leaks the extension_names array with some of the later failures.
> + return err;
> +}
> +
> ...
> +
> +typedef struct VulkanDeviceSelection {
> + uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
> + int has_uuid;
> + const char *name; /* Will use this second unless NULL */
> + uint32_t pci_device; /* Will use this second unless 0x0 */
> + uint32_t vendor_id; /* Last resort to find something deterministic */
> + int index; /* Finally fall back to index */
> +} VulkanDeviceSelection;
> +
> +static const char *vk_dev_type(enum VkPhysicalDeviceType type)
> +{
> + switch (type) {
> + case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
> + case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete";
> + case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual";
> + case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software";
> + default: return "unknown";
> + }
> +}
> +
> +/* Finds a device */
> +static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
> +{
> ...
Yay, I like the improvement to the selection options :)
> +}
> +
> ...
> +
> +static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd)
> +{
> + AVVulkanDeviceContext *hwctx = ctx->hwctx;
> +
> + vkDestroyFence(hwctx->act_dev, cmd->fence, hwctx->alloc);
> +
> + if (cmd->buf)
> + vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, 1, &cmd->buf);
> + if (cmd->pool)
> + vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
> +}
> +
> +static void vulkan_device_free(AVHWDeviceContext *ctx)
> +{
> + VulkanDevicePriv *p = ctx->internal->priv;
> + AVVulkanDeviceContext *hwctx = ctx->hwctx;
> +
> + free_exec_ctx(ctx, &p->cmd);
A device destroyed before it is fully created need not have a valid exec_ctx.
(E.g. "./ffmpeg_g -init_hw_device vulkan:123456789" segfaults here.)
> ...
> +
> +static int vulkan_device_init(AVHWDeviceContext *ctx)
> +{
> + int err;
> + uint32_t queue_num;
> + AVVulkanDeviceContext *hwctx = ctx->hwctx;
> + VulkanDevicePriv *p = ctx->internal->priv;
> +
> + vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
> + if (!queue_num) {
> + av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
> + return AVERROR_EXTERNAL;
> + }
> +
> + if (hwctx->queue_family_index >= queue_num ||
> + hwctx->queue_family_tx_index >= queue_num ||
> + hwctx->queue_family_comp_index >= queue_num) {
> + av_log(ctx, AV_LOG_ERROR, "Invalid queue index!\n");
Maybe say the invalid indices.
> + return AVERROR_EXTERNAL;
I think this is EINVAL - the user supplied the invalid queue index.
> + }
> +
> + /* Create exec context - if there's something invalid this will error out */
> + err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index);
> + if (err)
> + return err;
> +
> + /* Get device capabilities */
> + vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
> +
> + return 0;
> +}
> +
> +static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
> + AVDictionary *opts, int flags)
> +{
> + VulkanDeviceSelection dev_select = { 0 };
> + if (device && device[0]) {
> + char *end = NULL;
> + dev_select.index = strtol(device, &end, 10);
> + if (end == device) {
> + dev_select.index = 0;
> + dev_select.name = device;
> + }
> + }
Is it worth making uuid=f00 work here as well? (From opts rather than device: "-init_hw_device vulkan:,uuid=f00".)
> +
> + return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
> +}
> +
> +static int vulkan_device_derive(AVHWDeviceContext *ctx,
> + AVHWDeviceContext *src_ctx, int flags)
> +{
> + av_unused VulkanDeviceSelection dev_select = { 0 };
> +
> + /* If there's only one device on the system, then even if its not covered
> + * by the following checks (e.g. non-PCIe ARM GPU), having an empty
> + * dev_select will mean it'll get picked. */
Kindof evil, but makes sense.
> + switch(src_ctx->type) {
> +#if CONFIG_LIBDRM
> +#if CONFIG_VAAPI
> + case AV_HWDEVICE_TYPE_VAAPI: {
> + AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
> +
> + const char *vendor = vaQueryVendorString(src_hwctx->display);
> + if (!vendor) {
> + av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
> + return AVERROR_EXTERNAL;
> + }
> +
> + if (strstr(vendor, "Intel"))
> + dev_select.vendor_id = 0x8086;
> + if (strstr(vendor, "AMD"))
> + dev_select.vendor_id = 0x1002;
Yuck, but I don't see a better way :(
I might look into adding something to libva to allow this to work properly, since this combination will happen.
> +
> + return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
> + }
> +#endif
> + case AV_HWDEVICE_TYPE_DRM: {
> + AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
> +
> + drmDevice *drm_dev_info;
> + int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
> + if (err) {
> + av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n");
> + return AVERROR_EXTERNAL;
> + }
> +
> + if (drm_dev_info->bustype == DRM_BUS_PCI)
> + dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
> +
> + drmFreeDevice(&drm_dev_info);
> +
> + return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
> + }
> +#endif
> +#if CONFIG_CUDA
> + case AV_HWDEVICE_TYPE_CUDA: {
> + AVHWDeviceContext *cuda_cu = src_ctx;
> + AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
> + AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
> + CudaFunctions *cu = cu_internal->cuda_dl;
> +
> + int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
> + cu_internal->cuda_device));
> + if (ret < 0) {
> + av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
> + return AVERROR_EXTERNAL;
> + }
> +
> + dev_select.has_uuid = 1;
> +
> + return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
> + }
> +#endif
> + default:
> + return AVERROR(ENOSYS);
> + }
> +}
> +
> +static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
> + const void *hwconfig,
> + AVHWFramesConstraints *constraints)
> +{
> + int count = 0;
> + AVVulkanDeviceContext *hwctx = ctx->hwctx;
> + VulkanDevicePriv *p = ctx->internal->priv;
> +
> + for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
> + count += pixfmt_is_supported(hwctx, i, p->use_linear_images);
> +
> +#if CONFIG_CUDA
> + count++;
I think you should be able to test whether a device supports CUDA here, so it isn't included for non-Nvidia devices?
> +#endif
> +
> + constraints->valid_sw_formats = av_malloc_array(count + 1,
> + sizeof(enum AVPixelFormat));
> + if (!constraints->valid_sw_formats)
> + return AVERROR(ENOMEM);
> +
> + count = 0;
> + for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
> + if (pixfmt_is_supported(hwctx, i, p->use_linear_images))
> + constraints->valid_sw_formats[count++] = i;
> +
> +#if CONFIG_CUDA
> + constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
> +#endif
> + constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
> +
> + constraints->min_width = 0;
> + constraints->min_height = 0;
> + constraints->max_width = p->props.limits.maxImageDimension2D;
> + constraints->max_height = p->props.limits.maxImageDimension2D;
> +
> + constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
> + if (!constraints->valid_hw_formats)
> + return AVERROR(ENOMEM);
> +
> + constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
> + constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
> +
> + return 0;
> +}
> +
> ...
> +
> +typedef struct VulkanFramesPriv {
> + VulkanExecCtx cmd;
> +} VulkanFramesPriv;
I think put this definition at the top so that it's easy to find what priv on a Vulkan HWFC is.
> ...
> +
> +static void vulkan_frame_free(void *opaque, uint8_t *data)
> +{
> + AVVkFrame *f = (AVVkFrame *)data;
> + AVHWFramesContext *hwfc = opaque;
> + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
> + int planes = av_pix_fmt_count_planes(hwfc->sw_format);
> +
> + if (!f)
> + return;
When can you have !f? That seems invalid in an "assert that f is not null" type of way.
> +
> + vulkan_free_internal(f->internal);
> +
> + for (int i = 0; i < planes; i++) {
> + vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
> + vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
> + vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
> + }
> +
> + av_free(f);
> +}
> +
> ...
> +
> +static int vulkan_frames_init(AVHWFramesContext *hwfc)
> +{
> + int err;
> + AVVkFrame *f;
> + AVVulkanFramesContext *hwctx = hwfc->hwctx;
> + VulkanFramesPriv *fp = hwfc->internal->priv;
> + AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
> + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
> +
> + if (hwfc->pool)
> + return 0;
> +
> + /* Default pool flags */
> + hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
> + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
> +
> + hwctx->usage |= DEFAULT_USAGE_FLAGS;
Is it possible that this disallows some use-cases in a device where those default flags need not be not supported? For example, some sort of magic image-writer like a video decoder where the output images can only ever be used as a source by non-magic operations. Baking that into the API (as in the comment on usage in the header) seems bad if so.
> +
> + err = create_exec_ctx(hwfc->device_ctx, &fp->cmd,
> + dev_hwctx->queue_family_tx_index);
> + if (err)
> + return err;
> +
> + /* Test to see if allocation will fail */
> + err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
> + hwctx->create_pnext);
> + if (err)
> + return err;
> +
> + vulkan_frame_free(hwfc, (uint8_t *)f);
> +
> + hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
> + hwfc, vulkan_pool_alloc,
> + NULL);
> + if (!hwfc->internal->pool_internal)
> + return AVERROR(ENOMEM);
> +
> + return 0;
> +}
> +
> ...
> +
> +static const struct {
> + uint32_t va_fourcc;
va_fourcc?
> + VkFormat vk_format;
> +} vulkan_drm_format_map[] = {
> + { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
> + { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
> + { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
> + { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
> + { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
> + { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
Are RG88 and RG1616 right? I thought you would always want them reversed.
> + { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
> + { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
> + { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
> + { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
> +};
> +
> +static inline VkFormat drm_to_vulkan_fmt(uint32_t va_fourcc)
va_fourcc?
> +{
> + for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
> + if (vulkan_drm_format_map[i].va_fourcc == va_fourcc)
> + return vulkan_drm_format_map[i].vk_format;
> + return VK_FORMAT_UNDEFINED;
> +}
> +
> +static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
> + AVDRMFrameDescriptor *desc)
> +{
> + int err = 0;
> + VkResult ret;
> + AVVkFrame *f;
> + AVHWDeviceContext *ctx = hwfc->device_ctx;
> + AVVulkanDeviceContext *hwctx = ctx->hwctx;
> + VulkanDevicePriv *p = ctx->internal->priv;
> + const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
> + const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format);
> + const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS;
> + VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS];
> + VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS];
> + VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
> +
> + VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR);
> +
> + for (int i = 0; i < desc->nb_layers; i++) {
> + if (desc->layers[i].nb_planes > 1) {
> + av_log(ctx, AV_LOG_ERROR, "Cannot import DMABUFS with more than 1 "
> + "plane per layer!\n");
> + return AVERROR(EINVAL);
> + }
> +
> + if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
> + av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format!\n");
Maybe say what the unsupported format is for someone reporting the message, since this is probably relatively easy to hit (e.g. YUYV).
> + return AVERROR(EINVAL);
> + }
> + }
> +
> + if (!(f = av_mallocz(sizeof(*f)))) {
> + av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
> + err = AVERROR(ENOMEM);
> + goto fail;
> + }
> +
> + for (int i = 0; i < desc->nb_objects; i++) {
> + VkMemoryFdPropertiesKHR fdmp = {
> + .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
> + };
> + VkMemoryRequirements req = {
> + .size = desc->objects[i].size,
> + };
> + VkImportMemoryFdInfoKHR idesc = {
> + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
> + .handleType = htype,
> + .fd = desc->objects[i].fd,
> + };
> +
> + ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype,
> + desc->objects[i].fd, &fdmp);
> + if (ret != VK_SUCCESS) {
> + av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
> + vk_ret2str(ret));
> + err = AVERROR_EXTERNAL;
> + goto fail;
> + }
> +
> + req.memoryTypeBits = fdmp.memoryTypeBits;
> +
> + err = alloc_mem(ctx, &req, 0x0, &idesc, &f->flags, &f->mem[i]);
> + if (err)
> + return err;
> +
> + f->size[i] = desc->objects[i].size;
> + }
> +
> + f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
> + desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
> + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
> +
> + for (int i = 0; i < desc->nb_layers; i++) {
> + VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
> + .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
> + .drmFormatModifier = desc->objects[0].format_modifier,
> + .drmFormatModifierPlaneCount = desc->layers[i].nb_planes,
> + .pPlaneLayouts = (const VkSubresourceLayout *)&plane_data,
> + };
> +
> + VkExternalMemoryImageCreateInfo einfo = {
> + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
> + .pNext = has_modifiers ? &drm_info : NULL,
> + .handleTypes = htype,
> + };
> +
> + VkSemaphoreCreateInfo sem_spawn = {
> + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
> + };
> +
> + const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width;
> + const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height;
> +
> + VkImageCreateInfo image_create_info = {
> + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
> + .pNext = &einfo,
> + .imageType = VK_IMAGE_TYPE_2D,
> + .format = drm_to_vulkan_fmt(desc->layers[i].format),
> + .extent.width = p_w,
> + .extent.height = p_h,
> + .extent.depth = 1,
> + .mipLevels = 1,
> + .arrayLayers = 1,
> + .flags = VK_IMAGE_CREATE_ALIAS_BIT,
> + .tiling = f->tiling,
> + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
> + .usage = DEFAULT_USAGE_FLAGS,
> + .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
> + .samples = VK_SAMPLE_COUNT_1_BIT,
> + };
> +
> + for (int j = 0; j < desc->layers[i].nb_planes; j++) {
> + plane_data[j].offset = desc->layers[i].planes[j].offset;
> + plane_data[j].rowPitch = desc->layers[i].planes[j].pitch;
> + plane_data[j].size = 0; /* The specs say so for all 3 */
> + plane_data[j].arrayPitch = 0;
> + plane_data[j].depthPitch = 0;
> + }
> +
> + /* Create image */
> + ret = vkCreateImage(hwctx->act_dev, &image_create_info,
> + hwctx->alloc, &f->img[i]);
> + if (ret != VK_SUCCESS) {
> + av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
> + vk_ret2str(ret));
> + err = AVERROR(EINVAL);
> + goto fail;
> + }
> +
> + ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
> + hwctx->alloc, &f->sem[i]);
> + if (ret != VK_SUCCESS) {
> + av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
> + vk_ret2str(ret));
> + return AVERROR_EXTERNAL;
> + }
> +
> + /* We'd import a semaphore onto the one we created using
> + * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
> + * offer us anything we could import and sync with, so instead
> + * leave the semaphore unsignalled and enjoy the validation spam. */
I have some vague intent to look into this subject myself. VAAPI needs proper async, and interop with Vulkan is an important use of that.
> +
> + f->layout[i] = image_create_info.initialLayout;
> + f->access[i] = 0x0;
> +
> + /* TODO: Fix to support more than 1 plane per layer */
> + bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
> + bind_info[i].pNext = NULL;
> + bind_info[i].image = f->img[i];
> + bind_info[i].memory = f->mem[desc->layers[i].planes[0].object_index];
> + bind_info[i].memoryOffset = desc->layers[i].planes[0].offset;
> + }
> +
> + /* Bind the allocated memory to the images */
> + ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info);
> + if (ret != VK_SUCCESS) {
> + av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
> + vk_ret2str(ret));
> + return AVERROR_EXTERNAL;
> + }
> +
> + *frame = f;
> +
> + return 0;
> +
> +fail:
> + for (int i = 0; i < planes; i++) {
> + vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
> + vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
> + vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
> + }
> +
> + av_free(f);
> +
> + return err;
> +}
> +
> ...
> +
> +static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
> + const AVFrame *src, int flags)
> +{
> + int err = 0;
> + VkResult ret;
> + AVVkFrame *f = (AVVkFrame *)src->data[0];
> + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
> + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
> + const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
> + VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
> + VkImageDrmFormatModifierPropertiesEXT drm_mod = {
> + .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
> + };
Do you need a sync here for any writing being finished, or is it implicit somehow below?
> +
> + AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
> + if (!drm_desc)
> + return AVERROR(ENOMEM);
> +
> + err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
> + if (err < 0)
> + goto end;
> +
> + if (p->extensions & EXT_DRM_MODIFIER_FLAGS) {
> + VK_LOAD_PFN(hwctx->inst, vkGetImageDrmFormatModifierPropertiesEXT);
> + ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
> + &drm_mod);
> + if (ret != VK_SUCCESS) {
> + av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
> + err = AVERROR_EXTERNAL;
> + goto end;
> + }
> + }
> +
> + for (int i = 0; (i < planes) && (f->mem[i]); i++) {
> + VkMemoryGetFdInfoKHR export_info = {
> + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
> + .memory = f->mem[i],
> + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
> + };
> +
> + ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
> + &drm_desc->objects[i].fd);
> + if (ret != VK_SUCCESS) {
> + av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
> + err = AVERROR_EXTERNAL;
> + goto end;
> + }
> +
> + drm_desc->nb_objects++;
> + drm_desc->objects[i].size = f->size[i];
> + drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
> + }
> +
> + drm_desc->nb_layers = planes;
> + for (int i = 0; i < drm_desc->nb_layers; i++) {
> + VkSubresourceLayout layout;
> + VkImageSubresource sub = {
> + .aspectMask = p->extensions & EXT_DRM_MODIFIER_FLAGS ?
> + VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
> + VK_IMAGE_ASPECT_COLOR_BIT,
> + };
> + VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
> +
> + drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt);
> + drm_desc->layers[i].nb_planes = 1;
> +
> + if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
> + av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
> + err = AVERROR_PATCHWELCOME;
> + goto end;
> + }
> +
> + drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
> +
> + if (f->tiling != VK_IMAGE_TILING_OPTIMAL)
> + continue;
> +
> + vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
> + drm_desc->layers[i].planes[0].offset = layout.offset;
> + drm_desc->layers[i].planes[0].pitch = layout.rowPitch;
> + }
> +
> + dst->width = src->width;
> + dst->height = src->height;
> + dst->data[0] = (uint8_t *)drm_desc;
> +
> + av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
> +
> + return 0;
> +
> +end:
> + av_free(drm_desc);
> + return err;
> +}
> +
> ...
> +
> +/* Technically we can use VK_EXT_external_memory_host to upload and download,
> + * however the alignment requirements make this unfeasible as both the pointer
> + * and the size of each plane need to be aligned to the minimum alignment
> + * requirement, which on all current implementations (anv, radv) is 4096.
> + * If the requirement gets relaxed (unlikely) this can easily be implemented. */
What does the pointer alignment requirement actually apply to?
(Could we lie about the start of the image by rounding to a page, and then do the transfer with an offset?)
> ...
> +
> +static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
> + const AVFrame *src)
> +{
> + int err = 0;
> + AVFrame tmp;
> + AVVkFrame *f = (AVVkFrame *)src->data[0];
> + AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
> + ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } };
> + const int planes = av_pix_fmt_count_planes(dst->format);
> + int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h;
> +
> + if (dst->width > hwfc->width || dst->height > hwfc->height)
> + return AVERROR(EINVAL);
> +
> + /* For linear, host visiable images */
> + if (f->tiling == VK_IMAGE_TILING_LINEAR &&
> + f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
Is it generally expected that this is actually faster than the next option? (Because evil uncached memory is a thing.)
> + AVFrame *map = av_frame_alloc();
> + if (!map)
> + return AVERROR(ENOMEM);
> + map->format = dst->format;
> +
> + err = vulkan_map_frame_to_mem(hwfc, map, src, AV_HWFRAME_MAP_READ);
> + if (err)
> + return err;
> +
> + err = av_frame_copy(dst, map);
> + av_frame_free(&map);
> + return err;
> + }
> +
> + /* Create buffers */
> + for (int i = 0; i < planes; i++) {
> + int h = dst->height;
> + int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
> +
> + tmp.linesize[i] = dst->linesize[i];
> + err = create_buf(dev_ctx, &buf[i], p_height,
> + &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT,
> + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
> + }
> +
> + /* Copy image to buffer */
> + if ((err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize,
> + dst->width, dst->height, dst->format, 1)))
> + goto end;
> +
> + /* Map, copy buffer to frame, unmap */
> + if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 1)))
> + goto end;
> +
> + av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data,
> + tmp.linesize, dst->format, dst->width, dst->height);
> +
> + err = unmap_buffers(dev_ctx, buf, planes, 0);
> +
> +end:
> + for (int i = 0; i < planes; i++)
> + free_buf(dev_ctx, &buf[i]);
> +
> + return err;
> +}
> ...
> diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
> new file mode 100644
> index 0000000000..4146f14d6e
> --- /dev/null
> +++ b/libavutil/hwcontext_vulkan.h
> @@ -0,0 +1,152 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVUTIL_HWCONTEXT_VULKAN_H
> +#define AVUTIL_HWCONTEXT_VULKAN_H
> +
> +#include <vulkan/vulkan.h>
> +
> +/**
> + * @file
> + * API-specific header for AV_HWDEVICE_TYPE_VULKAN.
> + *
> + * For user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs
> + * with the data pointer set to an AVVkFrame.
> + */
> +
> +/**
> + * Main Vulkan context, allocated as AVHWDeviceContext.hwctx.
> + * All of these can be set before init to change what the context uses
> + */
> +typedef struct AVVulkanDeviceContext {
> + /**
> + * Custom memory allocator, else NULL
> + */
> + const VkAllocationCallbacks *alloc;
Do you have some specific use-case in mind for this? (You haven't used it in anything so far.)
> + /**
> + * Instance
> + */
> + VkInstance inst;
> + /**
> + * Physical device
> + */
> + VkPhysicalDevice phys_dev;
> + /**
> + * Activated physical device
> + */
> + VkDevice act_dev;
I weakly argue for not abbreviating names in the public API like this (but feel free to ignore me).
> + /**
> + * Queue family index for graphics
> + */
> + int queue_family_index;
> + /**
> + * Queue family index for transfer ops only. By default, the priority order
> + * is dedicated transfer > dedicated compute > graphics.
> + */
> + int queue_family_tx_index;
In my experience "tx" is always short for "transmit", not for "transfer".
> + /**
> + * Queue family index for compute ops. Will be equal to the graphics
> + * one unless a dedicated transfer queue is found.
> + */
> + int queue_family_comp_index;
> + /**
> + * The UUID of the selected physical device.
> + */
> + uint8_t device_uuid[VK_UUID_SIZE];
> +} AVVulkanDeviceContext;
> +
> +/**
> + * Allocated as AVHWFramesContext.hwctx, used to set pool-specific options
> + */
> +typedef struct AVVulkanFramesContext {
> + /**
> + * Controls the tiling of output frames.
> + */
> + VkImageTiling tiling;
> + /**
> + * Defines extra usage of output frames. This is bitwise OR'd with the
> + * standard usage flags (SAMPLED, STORAGE, TRANSFER_SRC and TRANSFER_DST).
(Referred to somewhere above.)
> + */
> + VkImageUsageFlagBits usage;
> + /**
> + * Extension data for image creation. By default, if the extension is
> + * available, this will be chained to a VkImageFormatListCreateInfoKHR.
> + */
> + void *create_pnext;
> + /**
> + * Extension data for memory allocation. Must have as many entries as
> + * the number of planes of the sw_format.
> + * This will be chained to VkExportMemoryAllocateInfo, which is used
> + * to make all pool images exportable to other APIs.
> + */
> + void *alloc_pnext[AV_NUM_DATA_POINTERS];
> +} AVVulkanFramesContext;
> +
> +/*
> + * Frame structure, the VkFormat of the image will always match
> + * the pool's sw_format.
> + * All frames, imported or allocated, will be created with the
> + * VK_IMAGE_CREATE_ALIAS_BIT flag set, so the memory may be aliased if needed.
> + */
> +typedef struct AVVkFrame {
> + /**
> + * Vulkan images to which the memory is bound to.
> + */
> + VkImage img[AV_NUM_DATA_POINTERS];
> +
> + /**
> + * Same tiling must be used for all images.
> + */
> + VkImageTiling tiling;
> +
> + /**
> + * Memory backing the images. Could be less than the amount of images
> + * if importing from a DRM or VAAPI frame.
Or absent entirely?
> + */
> + VkDeviceMemory mem[AV_NUM_DATA_POINTERS];
> + size_t size[AV_NUM_DATA_POINTERS];
> +
> + /**
> + * OR'd flags for all memory allocated
> + */
> + VkMemoryPropertyFlagBits flags;
> +
> + /**
> + * Updated after every barrier
> + */
> + VkAccessFlagBits access[AV_NUM_DATA_POINTERS];
> + VkImageLayout layout[AV_NUM_DATA_POINTERS];
> +
> + /**
> + * Per-image semaphores. Must not be freed manually. Must be waited on
> + * and signalled at every queue submission.
Perhaps a little more explanation of exactly what is needed on reads/writes would be helpful here. As written it sounds like multiple readers must be serialised by it, which I'm not sure is intended.
> + */
> + VkSemaphore sem[AV_NUM_DATA_POINTERS];
> +
> + /**
> + * Internal data.
> + */
> + struct AVVkFrameInternal *internal;
> +} AVVkFrame;
> +
> +/**
> + * Returns the format of each image up to the number of planes for a given sw_format.
> + */
> +const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p);
> +
> +#endif /* AVUTIL_HWCONTEXT_VULKAN_H */
> ...
> diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
> index 37ecebd501..5640c9f23d 100644
> --- a/libavutil/pixfmt.h
> +++ b/libavutil/pixfmt.h
> @@ -348,6 +348,13 @@ enum AVPixelFormat {
> AV_PIX_FMT_NV24, ///< planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (first byte U and the following byte V)
> AV_PIX_FMT_NV42, ///< as above, but U and V bytes are swapped
>
> + /**
> + * Vulkan hardware images.
> + *
> + * data[0] contain an AVVkFrame
points to an AVVkFrame?
> + */
> + AV_PIX_FMT_VULKAN,
> +
> AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
> };
>
> --
> 2.25.0.rc2
>
Thanks,
- Mark
More information about the ffmpeg-devel
mailing list