[FFmpeg-devel] [PATCH] Vulkan hwcontext and filters

Sat Jan 18 22:23:00 EET 2020

On 10/01/2020 21:05, Lynne wrote:
> From d5f1bbc61fab452803443511b1241931169359b7 Mon Sep 17 00:00:00 2001
> From: Lynne <dev at lynne.ee>
> Date: Wed, 28 Aug 2019 21:58:10 +0100
> Subject: [PATCH 2/9] lavu: add Vulkan hwcontext code
> 
> This commit adds the necessary code to initialize and use a Vulkan device
> within the hwcontext libavutil framework.
> Currently direct mapping to VAAPI and DRM frames is functional, and
> transfers to CUDA and native frames are supported.
> 
> Lets hope the future Vulkan video decode extension fits well within this
> framework.
> ---

Yay!

>  configure                      |   17 +-
>  doc/APIchanges                 |    4 +
>  libavutil/Makefile             |    3 +
>  libavutil/hwcontext.c          |    4 +
>  libavutil/hwcontext.h          |    1 +
>  libavutil/hwcontext_cuda.c     |  121 ++
>  libavutil/hwcontext_internal.h |    1 +
>  libavutil/hwcontext_vulkan.c   | 2804 ++++++++++++++++++++++++++++++++
>  libavutil/hwcontext_vulkan.h   |  152 ++
>  libavutil/pixdesc.c            |    4 +
>  libavutil/pixfmt.h             |    7 +
>  11 files changed, 3112 insertions(+), 6 deletions(-)
>  create mode 100644 libavutil/hwcontext_vulkan.c
>  create mode 100644 libavutil/hwcontext_vulkan.h

The CUDA parts look like they could be split off into a separate commit?  (It's already huge.)

> 
> diff --git a/configure b/configure
> index 46f2038627..3113ebfdd8 100755
> --- a/configure
> +++ b/configure
> @@ -309,6 +309,7 @@ External library support:
>    --enable-openssl         enable openssl, needed for https support
>                             if gnutls, libtls or mbedtls is not used [no]
>    --enable-pocketsphinx    enable PocketSphinx, needed for asr filter [no]
> +  --enable-vulkan          enable Vulkan code [no]

Alphabetical order.

>    --disable-sndio          disable sndio support [autodetect]
>    --disable-schannel       disable SChannel SSP, needed for TLS support on
>                             Windows if openssl and gnutls are not used [autodetect]
> @@ -1549,11 +1550,11 @@ require_cc(){
>  }
>  
>  require_cpp(){
> -    name="$1"
> -    headers="$2"
> -    classes="$3"
> -    shift 3
> -    check_lib_cpp "$headers" "$classes" "$@" || die "ERROR: $name not found"
> +    log require_cpp "$@"
> +    name_version="$1"
> +    name="${1%% *}"
> +    shift
> +    check_lib_cpp "$name" "$@" || die "ERROR: $name_version not found"
>  }

This change looks unrelated.  (require_cpp isn't used in this patch at all.)

>  
>  require_headers(){
> @@ -1854,6 +1855,7 @@ HWACCEL_LIBRARY_LIST="
>      mmal
>      omx
>      opencl
> +    vulkan
>  "
>  
>  DOCUMENT_LIST="
> @@ -3639,7 +3641,7 @@ avformat_deps="avcodec avutil"
>  avformat_suggest="libm network zlib"
>  avresample_deps="avutil"
>  avresample_suggest="libm"
> -avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt"
> +avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi vulkan videotoolbox corefoundation corevideo coremedia bcrypt"
>  postproc_deps="avutil gpl"
>  postproc_suggest="libm"
>  swresample_deps="avutil"
> @@ -6626,6 +6628,9 @@ enabled vdpau &&
>  
>  enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.h" DtsCrystalHDVersion -lcrystalhd
>  
> +enabled vulkan &&
> +    require_pkg_config vulkan "vulkan >= 1.1.97" "vulkan/vulkan.h" vkCreateInstance

Presumably you have some specific requirement in mind which wants this version - can you note it somewhere?  (Either here or in the commit message.)

> +
>  if enabled x86; then
>      case $target_os in
>          mingw32*|mingw64*|win32|win64|linux|cygwin*)
> ...
> diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
> index f5a4b62387..f874af9f8f 100644
> --- a/libavutil/hwcontext.h
> +++ b/libavutil/hwcontext.h
> @@ -36,6 +36,7 @@ enum AVHWDeviceType {
>      AV_HWDEVICE_TYPE_DRM,
>      AV_HWDEVICE_TYPE_OPENCL,
>      AV_HWDEVICE_TYPE_MEDIACODEC,
> +    AV_HWDEVICE_TYPE_VULKAN,
>  };
>  
>  typedef struct AVHWDeviceInternal AVHWDeviceInternal;
> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> index 30611b1912..18abb87bbd 100644
> --- a/libavutil/hwcontext_cuda.c
> +++ b/libavutil/hwcontext_cuda.c
> @@ -21,6 +21,9 @@
>  #include "hwcontext.h"
>  #include "hwcontext_internal.h"
>  #include "hwcontext_cuda_internal.h"
> +#if CONFIG_VULKAN
> +#include "hwcontext_vulkan.h"
> +#endif
>  #include "cuda_check.h"
>  #include "mem.h"
>  #include "pixdesc.h"
> @@ -42,6 +45,9 @@ static const enum AVPixelFormat supported_formats[] = {
>      AV_PIX_FMT_YUV444P16,
>      AV_PIX_FMT_0RGB32,
>      AV_PIX_FMT_0BGR32,
> +#if CONFIG_VULKAN
> +    AV_PIX_FMT_VULKAN,
> +#endif

Do all devices we can do CUDA on also support Vulkan?  If not, this should probably filter it out in get_constraints() to avoid exposing something which can't possibly work.

>  };
>  
>  #define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x)
> @@ -205,6 +211,10 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
>      CUcontext dummy;
>      int i, ret;
>  
> +    /* We don't support transfers to HW devices. */
> +    if (dst->hw_frames_ctx)
> +        return AVERROR(ENOSYS);
> +
>      ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
>      if (ret < 0)
>          return ret;
> @@ -247,6 +257,10 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
>      CUcontext dummy;
>      int i, ret;
>  
> +    /* We don't support transfers from HW devices. */
> +    if (src->hw_frames_ctx)
> +        return AVERROR(ENOSYS);
> +
>      ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
>      if (ret < 0)
>          return ret;
> @@ -389,6 +403,112 @@ error:
>      return AVERROR_UNKNOWN;
>  }
>  
> +static int cuda_device_derive(AVHWDeviceContext *device_ctx,
> +                              AVHWDeviceContext *src_ctx,
> +                              int flags) {
> +    AVCUDADeviceContext *hwctx = device_ctx->hwctx;
> +    CudaFunctions *cu;
> +    const char *src_uuid = NULL;
> +    CUcontext dummy;
> +    int ret, i, device_count, dev_active = 0;
> +    unsigned int dev_flags = 0;
> +
> +    const unsigned int desired_flags = CU_CTX_SCHED_BLOCKING_SYNC;
> +
> +    switch (src_ctx->type) {
> +#if CONFIG_VULKAN
> +    case AV_HWDEVICE_TYPE_VULKAN: {
> +        AVVulkanDeviceContext *vkctx = src_ctx->hwctx;
> +        src_uuid = vkctx->device_uuid;
> +        break;
> +    }
> +#endif
> +    default:
> +        return AVERROR(ENOSYS);
> +    }
> +
> +    if (!src_uuid) {
> +        av_log(device_ctx, AV_LOG_ERROR,
> +               "Failed to get UUID of source device.\n");
> +        goto error;
> +    }
> +
> +    if (cuda_device_init(device_ctx) < 0)
> +        goto error;
> +
> +    cu = hwctx->internal->cuda_dl;
> +
> +    ret = CHECK_CU(cu->cuInit(0));
> +    if (ret < 0)
> +        goto error;
> +
> +    ret = CHECK_CU(cu->cuDeviceGetCount(&device_count));
> +    if (ret < 0)
> +        goto error;
> +
> +    hwctx->internal->cuda_device = -1;
> +    for (i = 0; i < device_count; i++) {
> +        CUdevice dev;
> +        CUuuid uuid;
> +
> +        ret = CHECK_CU(cu->cuDeviceGet(&dev, i));
> +        if (ret < 0)
> +            goto error;
> +
> +        ret = CHECK_CU(cu->cuDeviceGetUuid(&uuid, dev));
> +        if (ret < 0)
> +            goto error;
> +
> +        if (memcmp(src_uuid, uuid.bytes, sizeof (uuid.bytes)) == 0) {
> +            hwctx->internal->cuda_device = dev;
> +            break;
> +        }
> +    }
> +
> +    if (hwctx->internal->cuda_device == -1) {
> +        av_log(device_ctx, AV_LOG_ERROR, "Could not derive CUDA device.\n");

This error is maybe more like "Can't find the matching CUDA device to the supplied Vulkan device".

> +        goto error;
> +    }
> +
> +    hwctx->internal->flags = flags;
> +
> +    if (flags & AV_CUDA_USE_PRIMARY_CONTEXT) {
> +        ret = CHECK_CU(cu->cuDevicePrimaryCtxGetState(hwctx->internal->cuda_device, &dev_flags, &dev_active));
> +        if (ret < 0)
> +            goto error;
> +
> +        if (dev_active && dev_flags != desired_flags) {
> +            av_log(device_ctx, AV_LOG_ERROR, "Primary context already active with incompatible flags.\n");
> +            goto error;
> +        } else if (dev_flags != desired_flags) {
> +            ret = CHECK_CU(cu->cuDevicePrimaryCtxSetFlags(hwctx->internal->cuda_device, desired_flags));
> +            if (ret < 0)
> +                goto error;
> +        }
> +
> +        ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->internal->cuda_device));
> +        if (ret < 0)
> +            goto error;
> +    } else {
> +        ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, desired_flags, hwctx->internal->cuda_device));
> +        if (ret < 0)
> +            goto error;
> +
> +        CHECK_CU(cu->cuCtxPopCurrent(&dummy));
> +    }
> +
> +    hwctx->internal->is_allocated = 1;
> +
> +    // Setting stream to NULL will make functions automatically use the default CUstream
> +    hwctx->stream = NULL;
> +
> +    return 0;
> +
> +error:
> +    cuda_device_uninit(device_ctx);
> +    return AVERROR_UNKNOWN;
> +}
> +
>  const HWContextType ff_hwcontext_type_cuda = {
>      .type                 = AV_HWDEVICE_TYPE_CUDA,
>      .name                 = "CUDA",
> @@ -397,6 +517,7 @@ const HWContextType ff_hwcontext_type_cuda = {
>      .frames_priv_size     = sizeof(CUDAFramesContext),
>  
>      .device_create        = cuda_device_create,
> +    .device_derive        = cuda_device_derive,
>      .device_init          = cuda_device_init,
>      .device_uninit        = cuda_device_uninit,
>      .frames_get_constraints = cuda_frames_get_constraints,
> ...
> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
> new file mode 100644
> index 0000000000..d4eb8ffd35
> --- /dev/null
> +++ b/libavutil/hwcontext_vulkan.c
> @@ -0,0 +1,2804 @@
> ...
> +
> +static const struct {
> +    enum AVPixelFormat pixfmt;
> +    const VkFormat vkfmts[3];
> +} vk_pixfmt_map[] = {
> +    { AV_PIX_FMT_GRAY8,   { VK_FORMAT_R8_UNORM } },
> +    { AV_PIX_FMT_GRAY16,  { VK_FORMAT_R16_UNORM } },
> +    { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
> +
> +    { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
> +    { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },

Is P010 still safe when the low bits might have any value?

> +    { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
> +
> +    { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
> +    { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
> +    { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
> +
> +    { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
> +    { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
> +    { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
> +
> +    { AV_PIX_FMT_ABGR,   { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
> +    { AV_PIX_FMT_BGRA,   { VK_FORMAT_B8G8R8A8_UNORM } },
> +    { AV_PIX_FMT_RGBA,   { VK_FORMAT_R8G8B8A8_UNORM } },
> +    { AV_PIX_FMT_RGB24,  { VK_FORMAT_R8G8B8_UNORM } },
> +    { AV_PIX_FMT_BGR24,  { VK_FORMAT_B8G8R8_UNORM } },
> +    { AV_PIX_FMT_RGB48,  { VK_FORMAT_R16G16B16_UNORM } },
> +    { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
> +    { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
> +    { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
> +    { AV_PIX_FMT_BGR0,   { VK_FORMAT_B8G8R8A8_UNORM } },
> +    { AV_PIX_FMT_0BGR,   { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
> +    { AV_PIX_FMT_RGB0,   { VK_FORMAT_R8G8B8A8_UNORM } },
> +
> +    { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
> +};
> +
> ...
> +static int check_extensions(AVHWDeviceContext *ctx, int dev,
> +                            const char * const **dst, uint32_t *num, int debug)
> +{
> +    const char *tstr;
> +    const char **extension_names = NULL;
> +    VulkanDevicePriv *p = ctx->internal->priv;
> +    AVVulkanDeviceContext *hwctx = ctx->hwctx;
> +    int err = 0, found, extensions_found = 0;
> +
> +    const char *mod;
> +    int optional_exts_num;
> +    uint32_t sup_ext_count;
> +    VkExtensionProperties *sup_ext;
> +    const VulkanOptExtension *optional_exts;
> +
> +    if (!dev) {
> +        mod = "instance";
> +        optional_exts = optional_instance_exts;
> +        optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
> +        vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
> +        sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
> +        if (!sup_ext)
> +            return AVERROR(ENOMEM);
> +        vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
> +    } else {
> +        mod = "device";
> +        optional_exts = optional_device_exts;
> +        optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
> +        vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
> +                                             &sup_ext_count, NULL);
> +        sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
> +        if (!sup_ext)
> +            return AVERROR(ENOMEM);
> +        vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
> +                                             &sup_ext_count, sup_ext);
> +    }
> +
> +    for (int i = 0; i < optional_exts_num; i++) {
> +        int req = optional_exts[i].flag & EXT_REQUIRED;
> +        tstr = optional_exts[i].name;
> +
> +        found = 0;
> +        for (int j = 0; j < sup_ext_count; j++) {
> +            if (!strcmp(tstr, sup_ext[j].extensionName)) {
> +                found = 1;
> +                break;
> +            }
> +        }
> +        if (!found) {
> +            int lvl = req ? AV_LOG_ERROR : AV_LOG_VERBOSE;
> +            av_log(ctx, lvl, "Extension \"%s\" not found!\n", tstr);
> +            if (req) {
> +                err = AVERROR(EINVAL);
> +                goto end;
> +            }
> +            continue;
> +        }
> +        if (!req)
> +            p->extensions |= optional_exts[i].flag;
> +
> +        av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
> +
> +        ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
> +    }
> +
> +    if (debug && !dev) {
> +        tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
> +        found = 0;
> +        for (int j = 0; j < sup_ext_count; j++) {
> +            if (!strcmp(tstr, sup_ext[j].extensionName)) {
> +                found = 1;
> +                break;
> +            }
> +        }
> +        if (found) {
> +            ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
> +        } else {
> +            av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
> +                   tstr);
> +            err = AVERROR(EINVAL);
> +            goto end;
> +        }
> +    }
> +
> +    *dst = extension_names;
> +    *num = extensions_found;
> +
> +end:
> +    av_free(sup_ext);

I think this leaks the extension_names array with some of the later failures.

> +    return err;
> +}
> +
> ...
> +
> +typedef struct VulkanDeviceSelection {
> +    uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
> +    int has_uuid;
> +    const char *name; /* Will use this second unless NULL */
> +    uint32_t pci_device; /* Will use this second unless 0x0 */
> +    uint32_t vendor_id; /* Last resort to find something deterministic */
> +    int index; /* Finally fall back to index */
> +} VulkanDeviceSelection;
> +
> +static const char *vk_dev_type(enum VkPhysicalDeviceType type)
> +{
> +    switch (type) {
> +    case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
> +    case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:   return "discrete";
> +    case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:    return "virtual";
> +    case VK_PHYSICAL_DEVICE_TYPE_CPU:            return "software";
> +    default:                                     return "unknown";
> +    }
> +}
> +
> +/* Finds a device */
> +static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
> +{
> ...

Yay, I like the improvement to the selection options :)

> +}
> +
> ...
> +
> +static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd)
> +{
> +    AVVulkanDeviceContext *hwctx = ctx->hwctx;
> +
> +    vkDestroyFence(hwctx->act_dev, cmd->fence, hwctx->alloc);
> +
> +    if (cmd->buf)
> +        vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, 1, &cmd->buf);
> +    if (cmd->pool)
> +        vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
> +}
> +
> +static void vulkan_device_free(AVHWDeviceContext *ctx)
> +{
> +    VulkanDevicePriv *p = ctx->internal->priv;
> +    AVVulkanDeviceContext *hwctx = ctx->hwctx;
> +
> +    free_exec_ctx(ctx, &p->cmd);

A device destroyed before it is fully created need not have a valid exec_ctx.

(E.g. "./ffmpeg_g -init_hw_device vulkan:123456789" segfaults here.)

> ...
> +
> +static int vulkan_device_init(AVHWDeviceContext *ctx)
> +{
> +    int err;
> +    uint32_t queue_num;
> +    AVVulkanDeviceContext *hwctx = ctx->hwctx;
> +    VulkanDevicePriv *p = ctx->internal->priv;
> +
> +    vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
> +    if (!queue_num) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
> +        return AVERROR_EXTERNAL;
> +    }
> +
> +    if (hwctx->queue_family_index      >= queue_num ||
> +        hwctx->queue_family_tx_index   >= queue_num ||
> +        hwctx->queue_family_comp_index >= queue_num) {
> +        av_log(ctx, AV_LOG_ERROR, "Invalid queue index!\n");

Maybe say the invalid indices.

> +        return AVERROR_EXTERNAL;

I think this is EINVAL - the user supplied the invalid queue index.

> +    }
> +
> +    /* Create exec context - if there's something invalid this will error out */
> +    err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index);
> +    if (err)
> +        return err;
> +
> +    /* Get device capabilities */
> +    vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
> +
> +    return 0;
> +}
> +
> +static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
> +                                AVDictionary *opts, int flags)
> +{
> +    VulkanDeviceSelection dev_select = { 0 };
> +    if (device && device[0]) {
> +        char *end = NULL;
> +        dev_select.index = strtol(device, &end, 10);
> +        if (end == device) {
> +            dev_select.index = 0;
> +            dev_select.name  = device;
> +        }
> +    }

Is it worth making uuid=f00 work here as well?  (From opts rather than device: "-init_hw_device vulkan:,uuid=f00".)

> +
> +    return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
> +}
> +
> +static int vulkan_device_derive(AVHWDeviceContext *ctx,
> +                                AVHWDeviceContext *src_ctx, int flags)
> +{
> +    av_unused VulkanDeviceSelection dev_select = { 0 };
> +
> +    /* If there's only one device on the system, then even if its not covered
> +     * by the following checks (e.g. non-PCIe ARM GPU), having an empty
> +     * dev_select will mean it'll get picked. */

Kindof evil, but makes sense.

> +    switch(src_ctx->type) {
> +#if CONFIG_LIBDRM
> +#if CONFIG_VAAPI
> +    case AV_HWDEVICE_TYPE_VAAPI: {
> +        AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
> +
> +        const char *vendor = vaQueryVendorString(src_hwctx->display);
> +        if (!vendor) {
> +            av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
> +            return AVERROR_EXTERNAL;
> +        }
> +
> +        if (strstr(vendor, "Intel"))
> +            dev_select.vendor_id = 0x8086;
> +        if (strstr(vendor, "AMD"))
> +            dev_select.vendor_id = 0x1002;

Yuck, but I don't see a better way :(

I might look into adding something to libva to allow this to work properly, since this combination will happen.

> +
> +        return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
> +    }
> +#endif
> +    case AV_HWDEVICE_TYPE_DRM: {
> +        AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
> +
> +        drmDevice *drm_dev_info;
> +        int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
> +        if (err) {
> +            av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n");
> +            return AVERROR_EXTERNAL;
> +        }
> +
> +        if (drm_dev_info->bustype == DRM_BUS_PCI)
> +            dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
> +
> +        drmFreeDevice(&drm_dev_info);
> +
> +        return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
> +    }
> +#endif
> +#if CONFIG_CUDA
> +    case AV_HWDEVICE_TYPE_CUDA: {
> +        AVHWDeviceContext *cuda_cu = src_ctx;
> +        AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
> +        AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
> +        CudaFunctions *cu = cu_internal->cuda_dl;
> +
> +        int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
> +                                               cu_internal->cuda_device));
> +        if (ret < 0) {
> +            av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
> +            return AVERROR_EXTERNAL;
> +        }
> +
> +        dev_select.has_uuid = 1;
> +
> +        return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
> +    }
> +#endif
> +    default:
> +        return AVERROR(ENOSYS);
> +    }
> +}
> +
> +static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
> +                                         const void *hwconfig,
> +                                         AVHWFramesConstraints *constraints)
> +{
> +    int count = 0;
> +    AVVulkanDeviceContext *hwctx = ctx->hwctx;
> +    VulkanDevicePriv *p = ctx->internal->priv;
> +
> +    for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
> +        count += pixfmt_is_supported(hwctx, i, p->use_linear_images);
> +
> +#if CONFIG_CUDA
> +    count++;

I think you should be able to test whether a device supports CUDA here, so it isn't included for non-Nvidia devices?

> +#endif
> +
> +    constraints->valid_sw_formats = av_malloc_array(count + 1,
> +                                                    sizeof(enum AVPixelFormat));
> +    if (!constraints->valid_sw_formats)
> +        return AVERROR(ENOMEM);
> +
> +    count = 0;
> +    for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
> +        if (pixfmt_is_supported(hwctx, i, p->use_linear_images))
> +            constraints->valid_sw_formats[count++] = i;
> +
> +#if CONFIG_CUDA
> +    constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
> +#endif
> +    constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
> +
> +    constraints->min_width  = 0;
> +    constraints->min_height = 0;
> +    constraints->max_width  = p->props.limits.maxImageDimension2D;
> +    constraints->max_height = p->props.limits.maxImageDimension2D;
> +
> +    constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
> +    if (!constraints->valid_hw_formats)
> +        return AVERROR(ENOMEM);
> +
> +    constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
> +    constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
> +
> +    return 0;
> +}
> +
> ...
> +
> +typedef struct VulkanFramesPriv {
> +    VulkanExecCtx cmd;
> +} VulkanFramesPriv;

I think put this definition at the top so that it's easy to find what priv on a Vulkan HWFC is.

> ...
> +
> +static void vulkan_frame_free(void *opaque, uint8_t *data)
> +{
> +    AVVkFrame *f = (AVVkFrame *)data;
> +    AVHWFramesContext *hwfc = opaque;
> +    AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
> +    int planes = av_pix_fmt_count_planes(hwfc->sw_format);
> +
> +    if (!f)
> +        return;

When can you have !f?  That seems invalid in an "assert that f is not null" type of way.

> +
> +    vulkan_free_internal(f->internal);
> +
> +    for (int i = 0; i < planes; i++) {
> +        vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
> +        vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
> +        vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
> +    }
> +
> +    av_free(f);
> +}
> +
> ...
> +
> +static int vulkan_frames_init(AVHWFramesContext *hwfc)
> +{
> +    int err;
> +    AVVkFrame *f;
> +    AVVulkanFramesContext *hwctx = hwfc->hwctx;
> +    VulkanFramesPriv *fp = hwfc->internal->priv;
> +    AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
> +    VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
> +
> +    if (hwfc->pool)
> +        return 0;
> +
> +    /* Default pool flags */
> +    hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
> +                    VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
> +
> +    hwctx->usage |= DEFAULT_USAGE_FLAGS;

Is it possible that this disallows some use-cases in a device where those default flags need not be not supported?  For example, some sort of magic image-writer like a video decoder where the output images can only ever be used as a source by non-magic operations.  Baking that into the API (as in the comment on usage in the header) seems bad if so.

> +
> +    err = create_exec_ctx(hwfc->device_ctx, &fp->cmd,
> +                          dev_hwctx->queue_family_tx_index);
> +    if (err)
> +        return err;
> +
> +    /* Test to see if allocation will fail */
> +    err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
> +                       hwctx->create_pnext);
> +    if (err)
> +        return err;
> +
> +    vulkan_frame_free(hwfc, (uint8_t *)f);
> +
> +    hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
> +                                                         hwfc, vulkan_pool_alloc,
> +                                                         NULL);
> +    if (!hwfc->internal->pool_internal)
> +        return AVERROR(ENOMEM);
> +
> +    return 0;
> +}
> +
> ...
> +
> +static const struct {
> +    uint32_t va_fourcc;

va_fourcc?

> +    VkFormat vk_format;
> +} vulkan_drm_format_map[] = {
> +    { DRM_FORMAT_R8,       VK_FORMAT_R8_UNORM       },
> +    { DRM_FORMAT_R16,      VK_FORMAT_R16_UNORM      },
> +    { DRM_FORMAT_GR88,     VK_FORMAT_R8G8_UNORM     },
> +    { DRM_FORMAT_RG88,     VK_FORMAT_R8G8_UNORM     },
> +    { DRM_FORMAT_GR1616,   VK_FORMAT_R16G16_UNORM   },
> +    { DRM_FORMAT_RG1616,   VK_FORMAT_R16G16_UNORM   },

Are RG88 and RG1616 right?  I thought you would always want them reversed.

> +    { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
> +    { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
> +    { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
> +    { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
> +};
> +
> +static inline VkFormat drm_to_vulkan_fmt(uint32_t va_fourcc)

va_fourcc?

> +{
> +    for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
> +        if (vulkan_drm_format_map[i].va_fourcc == va_fourcc)
> +            return vulkan_drm_format_map[i].vk_format;
> +    return VK_FORMAT_UNDEFINED;
> +}
> +
> +static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
> +                                          AVDRMFrameDescriptor *desc)
> +{
> +    int err = 0;
> +    VkResult ret;
> +    AVVkFrame *f;
> +    AVHWDeviceContext *ctx = hwfc->device_ctx;
> +    AVVulkanDeviceContext *hwctx = ctx->hwctx;
> +    VulkanDevicePriv *p = ctx->internal->priv;
> +    const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
> +    const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format);
> +    const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS;
> +    VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS];
> +    VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS];
> +    VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
> +
> +    VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR);
> +
> +    for (int i = 0; i < desc->nb_layers; i++) {
> +        if (desc->layers[i].nb_planes > 1) {
> +            av_log(ctx, AV_LOG_ERROR, "Cannot import DMABUFS with more than 1 "
> +                                      "plane per layer!\n");
> +            return AVERROR(EINVAL);
> +        }
> +
> +        if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
> +            av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format!\n");

Maybe say what the unsupported format is for someone reporting the message, since this is probably relatively easy to hit (e.g. YUYV).

> +            return AVERROR(EINVAL);
> +        }
> +    }
> +
> +    if (!(f = av_mallocz(sizeof(*f)))) {
> +        av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
> +        err = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
> +    for (int i = 0; i < desc->nb_objects; i++) {
> +        VkMemoryFdPropertiesKHR fdmp = {
> +            .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
> +        };
> +        VkMemoryRequirements req = {
> +            .size = desc->objects[i].size,
> +        };
> +        VkImportMemoryFdInfoKHR idesc = {
> +            .sType      = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
> +            .handleType = htype,
> +            .fd         = desc->objects[i].fd,
> +        };
> +
> +        ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype,
> +                                             desc->objects[i].fd, &fdmp);
> +        if (ret != VK_SUCCESS) {
> +            av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
> +                   vk_ret2str(ret));
> +            err = AVERROR_EXTERNAL;
> +            goto fail;
> +        }
> +
> +        req.memoryTypeBits = fdmp.memoryTypeBits;
> +
> +        err = alloc_mem(ctx, &req, 0x0, &idesc, &f->flags, &f->mem[i]);
> +        if (err)
> +            return err;
> +
> +        f->size[i] = desc->objects[i].size;
> +    }
> +
> +    f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
> +                desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
> +                VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
> +
> +    for (int i = 0; i < desc->nb_layers; i++) {
> +        VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
> +            .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
> +            .drmFormatModifier = desc->objects[0].format_modifier,
> +            .drmFormatModifierPlaneCount = desc->layers[i].nb_planes,
> +            .pPlaneLayouts = (const VkSubresourceLayout *)&plane_data,
> +        };
> +
> +        VkExternalMemoryImageCreateInfo einfo = {
> +            .sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
> +            .pNext       = has_modifiers ? &drm_info : NULL,
> +            .handleTypes = htype,
> +        };
> +
> +        VkSemaphoreCreateInfo sem_spawn = {
> +            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
> +        };
> +
> +        const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width;
> +        const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height;
> +
> +        VkImageCreateInfo image_create_info = {
> +            .sType         = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
> +            .pNext         = &einfo,
> +            .imageType     = VK_IMAGE_TYPE_2D,
> +            .format        = drm_to_vulkan_fmt(desc->layers[i].format),
> +            .extent.width  = p_w,
> +            .extent.height = p_h,
> +            .extent.depth  = 1,
> +            .mipLevels     = 1,
> +            .arrayLayers   = 1,
> +            .flags         = VK_IMAGE_CREATE_ALIAS_BIT,
> +            .tiling        = f->tiling,
> +            .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
> +            .usage         = DEFAULT_USAGE_FLAGS,
> +            .sharingMode   = VK_SHARING_MODE_EXCLUSIVE,
> +            .samples       = VK_SAMPLE_COUNT_1_BIT,
> +        };
> +
> +        for (int j = 0; j < desc->layers[i].nb_planes; j++) {
> +            plane_data[j].offset     = desc->layers[i].planes[j].offset;
> +            plane_data[j].rowPitch   = desc->layers[i].planes[j].pitch;
> +            plane_data[j].size       = 0; /* The specs say so for all 3 */
> +            plane_data[j].arrayPitch = 0;
> +            plane_data[j].depthPitch = 0;
> +        }
> +
> +        /* Create image */
> +        ret = vkCreateImage(hwctx->act_dev, &image_create_info,
> +                            hwctx->alloc, &f->img[i]);
> +        if (ret != VK_SUCCESS) {
> +            av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
> +                   vk_ret2str(ret));
> +            err = AVERROR(EINVAL);
> +            goto fail;
> +        }
> +
> +        ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
> +                                hwctx->alloc, &f->sem[i]);
> +        if (ret != VK_SUCCESS) {
> +            av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
> +                   vk_ret2str(ret));
> +            return AVERROR_EXTERNAL;
> +        }
> +
> +        /* We'd import a semaphore onto the one we created using
> +         * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
> +         * offer us anything we could import and sync with, so instead
> +         * leave the semaphore unsignalled and enjoy the validation spam. */

I have some vague intent to look into this subject myself.  VAAPI needs proper async, and interop with Vulkan is an important use of that.

> +
> +        f->layout[i] = image_create_info.initialLayout;
> +        f->access[i] = 0x0;
> +
> +        /* TODO: Fix to support more than 1 plane per layer */
> +        bind_info[i].sType  = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
> +        bind_info[i].pNext  = NULL;
> +        bind_info[i].image  = f->img[i];
> +        bind_info[i].memory = f->mem[desc->layers[i].planes[0].object_index];
> +        bind_info[i].memoryOffset = desc->layers[i].planes[0].offset;
> +    }
> +
> +    /* Bind the allocated memory to the images */
> +    ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info);
> +    if (ret != VK_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
> +               vk_ret2str(ret));
> +        return AVERROR_EXTERNAL;
> +    }
> +
> +    *frame = f;
> +
> +    return 0;
> +
> +fail:
> +    for (int i = 0; i < planes; i++) {
> +        vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
> +        vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
> +        vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
> +    }
> +
> +    av_free(f);
> +
> +    return err;
> +}
> +
> ...
> +
> +static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
> +                             const AVFrame *src, int flags)
> +{
> +    int err = 0;
> +    VkResult ret;
> +    AVVkFrame *f = (AVVkFrame *)src->data[0];
> +    VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
> +    AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
> +    const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
> +    VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
> +    VkImageDrmFormatModifierPropertiesEXT drm_mod = {
> +        .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
> +    };

Do you need a sync here for any writing being finished, or is it implicit somehow below?

> +
> +    AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
> +    if (!drm_desc)
> +        return AVERROR(ENOMEM);
> +
> +    err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
> +    if (err < 0)
> +        goto end;
> +
> +    if (p->extensions & EXT_DRM_MODIFIER_FLAGS) {
> +        VK_LOAD_PFN(hwctx->inst, vkGetImageDrmFormatModifierPropertiesEXT);
> +        ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
> +                                                           &drm_mod);
> +        if (ret != VK_SUCCESS) {
> +            av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
> +            err = AVERROR_EXTERNAL;
> +            goto end;
> +        }
> +    }
> +
> +    for (int i = 0; (i < planes) && (f->mem[i]); i++) {
> +        VkMemoryGetFdInfoKHR export_info = {
> +            .sType      = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
> +            .memory     = f->mem[i],
> +            .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
> +        };
> +
> +        ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
> +                                   &drm_desc->objects[i].fd);
> +        if (ret != VK_SUCCESS) {
> +            av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
> +            err = AVERROR_EXTERNAL;
> +            goto end;
> +        }
> +
> +        drm_desc->nb_objects++;
> +        drm_desc->objects[i].size = f->size[i];
> +        drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
> +    }
> +
> +    drm_desc->nb_layers = planes;
> +    for (int i = 0; i < drm_desc->nb_layers; i++) {
> +        VkSubresourceLayout layout;
> +        VkImageSubresource sub = {
> +            .aspectMask = p->extensions & EXT_DRM_MODIFIER_FLAGS ?
> +                          VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
> +                          VK_IMAGE_ASPECT_COLOR_BIT,
> +        };
> +        VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
> +
> +        drm_desc->layers[i].format    = vulkan_fmt_to_drm(plane_vkfmt);
> +        drm_desc->layers[i].nb_planes = 1;
> +
> +        if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
> +            av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
> +            err = AVERROR_PATCHWELCOME;
> +            goto end;
> +        }
> +
> +        drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
> +
> +        if (f->tiling != VK_IMAGE_TILING_OPTIMAL)
> +            continue;
> +
> +        vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
> +        drm_desc->layers[i].planes[0].offset       = layout.offset;
> +        drm_desc->layers[i].planes[0].pitch        = layout.rowPitch;
> +    }
> +
> +    dst->width   = src->width;
> +    dst->height  = src->height;
> +    dst->data[0] = (uint8_t *)drm_desc;
> +
> +    av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
> +
> +    return 0;
> +
> +end:
> +    av_free(drm_desc);
> +    return err;
> +}
> +
> ...
> +
> +/* Technically we can use VK_EXT_external_memory_host to upload and download,
> + * however the alignment requirements make this unfeasible as both the pointer
> + * and the size of each plane need to be aligned to the minimum alignment
> + * requirement, which on all current implementations (anv, radv) is 4096.
> + * If the requirement gets relaxed (unlikely) this can easily be implemented. */

What does the pointer alignment requirement actually apply to?

(Could we lie about the start of the image by rounding to a page, and then do the transfer with an offset?)

> ...
> +
> +static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
> +                                       const AVFrame *src)
> +{
> +    int err = 0;
> +    AVFrame tmp;
> +    AVVkFrame *f = (AVVkFrame *)src->data[0];
> +    AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
> +    ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } };
> +    const int planes = av_pix_fmt_count_planes(dst->format);
> +    int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h;
> +
> +    if (dst->width > hwfc->width || dst->height > hwfc->height)
> +        return AVERROR(EINVAL);
> +
> +    /* For linear, host visiable images */
> +    if (f->tiling == VK_IMAGE_TILING_LINEAR &&
> +        f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {

Is it generally expected that this is actually faster than the next option?  (Because evil uncached memory is a thing.)

> +        AVFrame *map = av_frame_alloc();
> +        if (!map)
> +            return AVERROR(ENOMEM);
> +        map->format = dst->format;
> +
> +        err = vulkan_map_frame_to_mem(hwfc, map, src, AV_HWFRAME_MAP_READ);
> +        if (err)
> +            return err;
> +
> +        err = av_frame_copy(dst, map);
> +        av_frame_free(&map);
> +        return err;
> +    }
> +
> +    /* Create buffers */
> +    for (int i = 0; i < planes; i++) {
> +        int h = dst->height;
> +        int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
> +
> +        tmp.linesize[i] = dst->linesize[i];
> +        err = create_buf(dev_ctx, &buf[i], p_height,
> +                         &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT,
> +                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
> +    }
> +
> +    /* Copy image to buffer */
> +    if ((err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize,
> +                                  dst->width, dst->height, dst->format, 1)))
> +        goto end;
> +
> +    /* Map, copy buffer to frame, unmap */
> +    if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 1)))
> +        goto end;
> +
> +    av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data,
> +                  tmp.linesize, dst->format, dst->width, dst->height);
> +
> +    err = unmap_buffers(dev_ctx, buf, planes, 0);
> +
> +end:
> +    for (int i = 0; i < planes; i++)
> +        free_buf(dev_ctx, &buf[i]);
> +
> +    return err;
> +}
> ...
> diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
> new file mode 100644
> index 0000000000..4146f14d6e
> --- /dev/null
> +++ b/libavutil/hwcontext_vulkan.h
> @@ -0,0 +1,152 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVUTIL_HWCONTEXT_VULKAN_H
> +#define AVUTIL_HWCONTEXT_VULKAN_H
> +
> +#include <vulkan/vulkan.h>
> +
> +/**
> + * @file
> + * API-specific header for AV_HWDEVICE_TYPE_VULKAN.
> + *
> + * For user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs
> + * with the data pointer set to an AVVkFrame.
> + */
> +
> +/**
> + * Main Vulkan context, allocated as AVHWDeviceContext.hwctx.
> + * All of these can be set before init to change what the context uses
> + */
> +typedef struct AVVulkanDeviceContext {
> +    /**
> +     * Custom memory allocator, else NULL
> +     */
> +    const VkAllocationCallbacks *alloc;

Do you have some specific use-case in mind for this?  (You haven't used it in anything so far.)

> +    /**
> +     * Instance
> +     */
> +    VkInstance inst;
> +    /**
> +     * Physical device
> +     */
> +    VkPhysicalDevice phys_dev;
> +    /**
> +     * Activated physical device
> +     */
> +    VkDevice act_dev;

I weakly argue for not abbreviating names in the public API like this (but feel free to ignore me).

> +    /**
> +     * Queue family index for graphics
> +     */
> +    int queue_family_index;
> +    /**
> +     * Queue family index for transfer ops only. By default, the priority order
> +     * is dedicated transfer > dedicated compute > graphics.
> +     */
> +    int queue_family_tx_index;

In my experience "tx" is always short for "transmit", not for "transfer".

> +    /**
> +     * Queue family index for compute ops. Will be equal to the graphics
> +     * one unless a dedicated transfer queue is found.
> +     */
> +    int queue_family_comp_index;
> +    /**
> +     * The UUID of the selected physical device.
> +     */
> +    uint8_t device_uuid[VK_UUID_SIZE];
> +} AVVulkanDeviceContext;
> +
> +/**
> + * Allocated as AVHWFramesContext.hwctx, used to set pool-specific options
> + */
> +typedef struct AVVulkanFramesContext {
> +    /**
> +     * Controls the tiling of output frames.
> +     */
> +    VkImageTiling tiling;
> +    /**
> +     * Defines extra usage of output frames. This is bitwise OR'd with the
> +     * standard usage flags (SAMPLED, STORAGE, TRANSFER_SRC and TRANSFER_DST).

(Referred to somewhere above.)

> +     */
> +    VkImageUsageFlagBits usage;
> +    /**
> +     * Extension data for image creation. By default, if the extension is
> +     * available, this will be chained to a VkImageFormatListCreateInfoKHR.
> +     */
> +    void *create_pnext;
> +    /**
> +     * Extension data for memory allocation. Must have as many entries as
> +     * the number of planes of the sw_format.
> +     * This will be chained to VkExportMemoryAllocateInfo, which is used
> +     * to make all pool images exportable to other APIs.
> +     */
> +    void *alloc_pnext[AV_NUM_DATA_POINTERS];
> +} AVVulkanFramesContext;
> +
> +/*
> + * Frame structure, the VkFormat of the image will always match
> + * the pool's sw_format.
> + * All frames, imported or allocated, will be created with the
> + * VK_IMAGE_CREATE_ALIAS_BIT flag set, so the memory may be aliased if needed.
> + */
> +typedef struct AVVkFrame {
> +    /**
> +     * Vulkan images to which the memory is bound to.
> +     */
> +    VkImage img[AV_NUM_DATA_POINTERS];
> +
> +    /**
> +     * Same tiling must be used for all images.
> +     */
> +    VkImageTiling tiling;
> +
> +    /**
> +     * Memory backing the images. Could be less than the amount of images
> +     * if importing from a DRM or VAAPI frame.

Or absent entirely?

> +     */
> +    VkDeviceMemory mem[AV_NUM_DATA_POINTERS];
> +    size_t size[AV_NUM_DATA_POINTERS];
> +
> +    /**
> +     * OR'd flags for all memory allocated
> +     */
> +    VkMemoryPropertyFlagBits flags;
> +
> +    /**
> +     * Updated after every barrier
> +     */
> +    VkAccessFlagBits access[AV_NUM_DATA_POINTERS];
> +    VkImageLayout layout[AV_NUM_DATA_POINTERS];
> +
> +    /**
> +     * Per-image semaphores. Must not be freed manually. Must be waited on
> +     * and signalled at every queue submission.

Perhaps a little more explanation of exactly what is needed on reads/writes would be helpful here.  As written it sounds like multiple readers must be serialised by it, which I'm not sure is intended.

> +     */
> +    VkSemaphore sem[AV_NUM_DATA_POINTERS];
> +
> +    /**
> +     * Internal data.
> +     */
> +    struct AVVkFrameInternal *internal;
> +} AVVkFrame;
> +
> +/**
> + * Returns the format of each image up to the number of planes for a given sw_format.
> + */
> +const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p);
> +
> +#endif /* AVUTIL_HWCONTEXT_VULKAN_H */
> ...
> diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
> index 37ecebd501..5640c9f23d 100644
> --- a/libavutil/pixfmt.h
> +++ b/libavutil/pixfmt.h
> @@ -348,6 +348,13 @@ enum AVPixelFormat {
>      AV_PIX_FMT_NV24,      ///< planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (first byte U and the following byte V)
>      AV_PIX_FMT_NV42,      ///< as above, but U and V bytes are swapped
>  
> +    /**
> +     * Vulkan hardware images.
> +     *
> +     * data[0] contain an AVVkFrame

points to an AVVkFrame?

> +     */
> +    AV_PIX_FMT_VULKAN,
> +
>      AV_PIX_FMT_NB         ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
>  };
>  
> -- 
> 2.25.0.rc2
> 

Thanks,

- Mark