[FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering

Lynne dev at lynne.ee
Thu May 11 21:55:40 EEST 2023


May 11, 2023, 18:04 by anton at khirnov.net:

> Quoting Lynne (2023-04-24 17:56:38)
>
>> From b0c429d0d77d1789b6349bc6b296449ae1f8e9da Mon Sep 17 00:00:00 2001
>> From: Lynne <dev at lynne.ee>
>> Date: Tue, 15 Mar 2022 23:00:32 +0100
>> Subject: [PATCH 26/97] hwcontext_vulkan: support threadsafe queue and frame
>>  operations
>>
>> ---
>>  libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++----------
>>  libavutil/hwcontext_vulkan.h |  40 +++++++-
>>  2 files changed, 167 insertions(+), 49 deletions(-)
>>
>> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
>> index 894b4b83f3..b0db59b2d8 100644
>> --- a/libavutil/hwcontext_vulkan.c
>> +++ b/libavutil/hwcontext_vulkan.c
>> @@ -27,6 +27,7 @@
>>  #include <dlfcn.h>
>>  #endif
>>  
>> +#include <pthread.h>
>>  #include <unistd.h>
>>  
>>  #include "config.h"
>> @@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
>>  VkPhysicalDeviceVulkan13Features device_features_1_3;
>>  
>>  /* Queues */
>> -    uint32_t qfs[5];
>> -    int num_qfs;
>> +    pthread_mutex_t **qf_mutex;
>> +    int nb_tot_qfs;
>> +    uint32_t img_qfs[5];
>> +    int nb_img_qfs;
>>
>
> This patch would be so much more readable without random renamings.
>

They're not random, the meaning of each variable is different
to what they meant before.
nb_img_qfs is the total number of enabled queue familiesnb_tot_qfs is the total number of queue families listed by the driver


>> /* Debug callback */
>>  VkDebugUtilsMessengerEXT debug_ctx;
>> @@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
>>  } VulkanFramesPriv;
>>  
>>  typedef struct AVVkFrameInternal {
>> +    pthread_mutex_t update_mutex;
>>
>
> As far as I can see, none of the mutices you're adding here are
> ever destroyed.
>

Fixed.


>> +
>>  #if CONFIG_CUDA
>>  /* Importing external memory into cuda is really expensive so we keep the
>>  * memory imported all the time */
>> @@ -1304,6 +1309,10 @@ static void vulkan_device_free(AVHWDeviceContext *ctx)
>>  if (p->libvulkan)
>>  dlclose(p->libvulkan);
>>  
>> +    for (int i = 0; i < p->nb_tot_qfs; i++)
>> +        av_freep(&p->qf_mutex[i]);
>> +    av_freep(&p->qf_mutex);
>> +
>>  RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
>>  RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
>>  }
>> @@ -1436,13 +1445,26 @@ end:
>>  return err;
>>  }
>>  
>> +static void lock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
>>
>
> It'd be nice to be consistent with types.
> These are uint32 in vulkan, no?
>

Fixed. Though, they're more closely related to the
number of queue families given in the hwcontext, which
are 32-bit ints.


>> +{
>> +    VulkanDevicePriv *p = ctx->internal->priv;
>> +    pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
>> +}
>> +
>> +static void unlock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
>> +{
>> +    VulkanDevicePriv *p = ctx->internal->priv;
>> +    pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
>> +}
>> +
>>  static int vulkan_device_init(AVHWDeviceContext *ctx)
>>  {
>>  int err;
>> -    uint32_t queue_num;
>> +    uint32_t qf_num;
>>  AVVulkanDeviceContext *hwctx = ctx->hwctx;
>>  VulkanDevicePriv *p = ctx->internal->priv;
>>  FFVulkanFunctions *vk = &p->vkfn;
>> +    VkQueueFamilyProperties *qf;
>>  int graph_index, comp_index, tx_index, enc_index, dec_index;
>>  
>>  /* Set device extension flags */
>> @@ -1481,12 +1503,31 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>>  p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
>>  p->dev_is_intel  = (p->props.properties.vendorID == 0x8086);
>>  
>> -    vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
>> -    if (!queue_num) {
>> +    vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
>> +    if (!qf_num) {
>>  av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
>>  return AVERROR_EXTERNAL;
>>  }
>>  
>> +    qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
>> +    if (!qf)
>> +        return AVERROR(ENOMEM);
>> +
>> +    vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
>> +
>> +    p->qf_mutex = av_mallocz(qf_num*sizeof(*p->qf_mutex));
>>
>
> av_calloc()
>
>> +    if (!p->qf_mutex)
>> +        return AVERROR(ENOMEM);
>> +    p->nb_tot_qfs = qf_num;
>> +
>> +    for (int i = 0; i < qf_num; i++) {
>> +        p->qf_mutex[i] = av_mallocz(qf[i].queueCount*sizeof(**p->qf_mutex));
>>
>
> av_calloc()
>
>> +        if (!p->qf_mutex[i])
>> +            return AVERROR(ENOMEM);
>> +        for (int j = 0; j < qf[i].queueCount; j++)
>> +            pthread_mutex_init(&p->qf_mutex[i][j], NULL);
>>
>
> Should be checked.
>

Fixed all three.


>> +    }
>> +
>>  graph_index = hwctx->queue_family_index;
>>  comp_index  = hwctx->queue_family_comp_index;
>>  tx_index    = hwctx->queue_family_tx_index;
>> @@ -1501,9 +1542,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>>  return AVERROR(EINVAL);                                                             \
>>  } else if (fidx < 0 || ctx_qf < 0) {                                                    \
>>  break;                                                                              \
>> -        } else if (ctx_qf >= queue_num) {                                                       \
>> +        } else if (ctx_qf >= qf_num) {                                                          \
>>  av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
>> -                   type, ctx_qf, queue_num);                                                    \
>> +                   type, ctx_qf, qf_num);                                                       \
>>  return AVERROR(EINVAL);                                                             \
>>  }                                                                                       \
>>  \
>> @@ -1520,7 +1561,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>>  tx_index    = (ctx_qf == tx_index)    ? -1 : tx_index;                                  \
>>  enc_index   = (ctx_qf == enc_index)   ? -1 : enc_index;                                 \
>>  dec_index   = (ctx_qf == dec_index)   ? -1 : dec_index;                                 \
>> -        p->qfs[p->num_qfs++] = ctx_qf;                                                          \
>> +        p->img_qfs[p->nb_img_qfs++] = ctx_qf;                                                   \
>>  } while (0)
>>  
>>  CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index,        hwctx->nb_graphics_queues);
>> @@ -1531,6 +1572,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>>  
>>  #undef CHECK_QUEUE
>>  
>> +    if (!hwctx->lock_queue)
>> +        hwctx->lock_queue = lock_queue;
>> +    if (!hwctx->unlock_queue)
>> +        hwctx->unlock_queue = unlock_queue;
>> +
>>  /* Get device capabilities */
>>  vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
>>  
>> @@ -1732,9 +1778,6 @@ static void vulkan_free_internal(AVVkFrame *f)
>>  {
>>  AVVkFrameInternal *internal = f->internal;
>>  
>> -    if (!internal)
>> -        return;
>> -
>>  #if CONFIG_CUDA
>>  if (internal->cuda_fc_ref) {
>>  AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
>> @@ -1923,9 +1966,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
>>  uint32_t src_qf, dst_qf;
>>  VkImageLayout new_layout;
>>  VkAccessFlags new_access;
>> +    AVVulkanFramesContext *vkfc = hwfc->hwctx;
>>  const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
>>  VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
>>  FFVulkanFunctions *vk = &p->vkfn;
>> +    AVFrame tmp = { .data[0] = (uint8_t *)frame };
>>
>
> ???
>

This enables us to use the common dependency/dispatch code.
The prepare_frame function is used for both frame initialization
and frame import/export queue family transfer operations.
In the former case, no AVFrame exists yet, so, as this is purely
libavutil code, we create a temporary frame on stack. Otherwise,
we'd need to allocate multiple frames somewhere, one for each
possible command buffer dispatch.

Comment added to commit message.


More information about the ffmpeg-devel mailing list