[FFmpeg-cvslog] vulkan: use push descriptors where possible
Lynne
git at videolan.org
Mon Sep 23 14:42:59 EEST 2024
ffmpeg | branch: master | Lynne <dev at lynne.ee> | Sun Sep 22 13:43:33 2024 +0200| [bc36fe6f1fc5244d5fc85c3b763015f58d50b358] | committer: Lynne
vulkan: use push descriptors where possible
Push descriptors are in theory slightly faster, but come with
limitations for which we have to check.
Either way, they're not difficult to implement, so even though
no one should be using peasant-tier descriptors, do it anyway.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bc36fe6f1fc5244d5fc85c3b763015f58d50b358
---
libavfilter/vf_nlmeans_vulkan.c | 16 +++---
libavutil/hwcontext_vulkan.c | 2 +-
libavutil/vulkan.c | 124 +++++++++++++++++++++++++++-------------
libavutil/vulkan.h | 1 +
libavutil/vulkan_functions.h | 2 +
libavutil/vulkan_loader.h | 1 +
6 files changed, 97 insertions(+), 49 deletions(-)
diff --git a/libavfilter/vf_nlmeans_vulkan.c b/libavfilter/vf_nlmeans_vulkan.c
index 9d96efa27b..f0a7353d5c 100644
--- a/libavfilter/vf_nlmeans_vulkan.c
+++ b/libavfilter/vf_nlmeans_vulkan.c
@@ -687,14 +687,16 @@ static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec,
VkBufferMemoryBarrier2 buf_bar[8];
int nb_buf_bar = 0;
+ DenoisePushData pd = {
+ { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
+ };
+
/* Denoise pass pipeline */
ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_denoise);
/* Push data */
ff_vk_update_push_exec(vkctx, exec, &s->pl_denoise, VK_SHADER_STAGE_COMPUTE_BIT,
- 0, sizeof(DenoisePushData), &(DenoisePushData) {
- { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
- });
+ 0, sizeof(pd), &pd);
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
@@ -970,6 +972,10 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
offsets_dispatched,
};
+ /* Push data */
+ ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd), &pd);
+
if (offsets_dispatched) {
nb_buf_bar = 0;
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
@@ -995,10 +1001,6 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
integral_vk->access = buf_bar[1].dstAccessMask;
}
- /* Push data */
- ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT,
- 0, sizeof(pd), &pd);
-
wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]);
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 7e7d9cb70b..6317ab7d0e 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -422,7 +422,7 @@ static const VulkanOptExtension optional_instance_exts[] = {
static const VulkanOptExtension optional_device_exts[] = {
/* Misc or required by other extensions */
{ VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
- { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
+ { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR },
{ VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, },
{ VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM },
{ VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT },
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 7ea4c33619..046ac5d67e 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1520,12 +1520,8 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
FFVulkanDescriptorSetBinding *desc, int nb,
int singular, int print_to_shader_only)
{
- VkResult ret;
int has_sampler = 0;
- FFVulkanFunctions *vk = &s->vkfn;
FFVulkanDescriptorSet *set;
- VkDescriptorSetLayout *layout;
- VkDescriptorSetLayoutCreateInfo desc_create_layout;
if (print_to_shader_only)
goto print;
@@ -1537,14 +1533,7 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
return AVERROR(ENOMEM);
pl->desc_set = set;
- layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
- pl->nb_descriptor_sets + 1);
- if (!layout)
- return AVERROR(ENOMEM);
- pl->desc_layout = layout;
-
set = &set[pl->nb_descriptor_sets];
- layout = &layout[pl->nb_descriptor_sets];
memset(set, 0, sizeof(*set));
set->binding = av_calloc(nb, sizeof(*set->binding));
@@ -1557,14 +1546,6 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
return AVERROR(ENOMEM);
}
- desc_create_layout = (VkDescriptorSetLayoutCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .bindingCount = nb,
- .pBindings = set->binding,
- .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ?
- VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : 0x0,
- };
-
for (int i = 0; i < nb; i++) {
set->binding[i].binding = i;
set->binding[i].descriptorType = desc[i].type;
@@ -1582,22 +1563,7 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
if (has_sampler)
set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT;
- ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
- s->hwctx->alloc, layout);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to init descriptor set layout: %s",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
-
- if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
- vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, *layout, &set->layout_size);
- set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment);
-
- for (int i = 0; i < nb; i++)
- vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, *layout,
- i, &set->binding_offset[i]);
- } else {
+ if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) {
for (int i = 0; i < nb; i++) {
int j;
VkDescriptorPoolSize *desc_pool_size;
@@ -1606,8 +1572,8 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
break;
if (j >= pl->nb_desc_pool_size) {
desc_pool_size = av_realloc_array(pl->desc_pool_size,
- sizeof(*desc_pool_size),
- pl->nb_desc_pool_size + 1);
+ sizeof(*desc_pool_size),
+ pl->nb_desc_pool_size + 1);
if (!desc_pool_size)
return AVERROR(ENOMEM);
@@ -1703,7 +1669,7 @@ int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool,
pl->bound_buffer_indices[i] = i;
}
- } else {
+ } else if (!pl->use_push) {
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
VkDescriptorSetLayout *tmp_layouts;
@@ -1796,8 +1762,16 @@ static inline void update_set_pool_write(FFVulkanContext *s,
vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL);
}
} else {
- write_info->dstSet = pl->desc_sets[e->idx*pl->nb_descriptor_sets + set];
- vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL);
+ if (pl->use_push) {
+ vk->CmdPushDescriptorSetKHR(e->buf,
+ pl->bind_point,
+ pl->pipeline_layout,
+ set, 1,
+ write_info);
+ } else {
+ write_info->dstSet = pl->desc_sets[e->idx*pl->nb_descriptor_sets + set];
+ vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL);
+ }
}
}
@@ -1954,6 +1928,70 @@ void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
stage, offset, size, src);
}
+static int init_descriptors(FFVulkanContext *s, FFVulkanPipeline *pl)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ pl->desc_layout = av_malloc_array(pl->nb_descriptor_sets,
+ sizeof(*pl->desc_layout));
+ if (!pl->desc_layout)
+ return AVERROR(ENOMEM);
+
+ if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) {
+ int has_singular = 0;
+ for (int i = 0; i < pl->nb_descriptor_sets; i++) {
+ if (pl->desc_set[i].singular) {
+ has_singular = 1;
+ break;
+ }
+ }
+ pl->use_push = (s->extensions & FF_VK_EXT_PUSH_DESCRIPTOR) &&
+ (pl->nb_descriptor_sets == 1) &&
+ !has_singular;
+ }
+
+ for (int i = 0; i < pl->nb_descriptor_sets; i++) {
+ FFVulkanDescriptorSet *set = &pl->desc_set[i];
+ VkDescriptorSetLayoutCreateInfo desc_layout_create = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = set->nb_bindings,
+ .pBindings = set->binding,
+ .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ?
+ VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT :
+ (pl->use_push) ?
+ VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR :
+ 0x0,
+ };
+
+ ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev,
+ &desc_layout_create,
+ s->hwctx->alloc,
+ &pl->desc_layout[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to create descriptor set layout: %s",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
+ vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, pl->desc_layout[i],
+ &set->layout_size);
+
+ set->aligned_size = FFALIGN(set->layout_size,
+ s->desc_buf_props.descriptorBufferOffsetAlignment);
+
+ for (int j = 0; j < set->nb_bindings; j++)
+ vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev,
+ pl->desc_layout[i],
+ j,
+ &set->binding_offset[j]);
+ }
+ }
+
+ return 0;
+}
+
static int init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
{
VkResult ret;
@@ -1989,6 +2027,10 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
VkComputePipelineCreateInfo pipeline_create_info;
+ err = init_descriptors(s, pl);
+ if (err < 0)
+ return err;
+
err = init_pipeline_layout(s, pl);
if (err < 0)
return err;
@@ -2038,7 +2080,7 @@ void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e,
vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout,
0, pl->nb_descriptor_sets,
pl->bound_buffer_indices, offsets);
- } else {
+ } else if (!pl->use_push) {
vk->CmdBindDescriptorSets(e->buf, pl->bind_point, pl->pipeline_layout,
0, pl->nb_descriptor_sets,
&pl->desc_sets[e->idx*pl->nb_descriptor_sets],
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 7009104a8f..e03fd702ca 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -226,6 +226,7 @@ typedef struct FFVulkanPipeline {
int nb_descriptor_sets;
/* Descriptor pool */
+ int use_push;
VkDescriptorSet *desc_sets;
VkDescriptorPool desc_pool;
VkDescriptorPoolSize *desc_pool_size;
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index da555b37c7..91dd8b91e0 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -48,6 +48,7 @@ typedef enum FFVulkanExtensions {
FF_VK_EXT_COOP_MATRIX = 1ULL << 16, /* VK_KHR_cooperative_matrix */
FF_VK_EXT_OPTICAL_FLOW = 1ULL << 17, /* VK_NV_optical_flow */
FF_VK_EXT_SHADER_OBJECT = 1ULL << 18, /* VK_EXT_shader_object */
+ FF_VK_EXT_PUSH_DESCRIPTOR = 1ULL << 19, /* VK_KHR_push_descriptor */
FF_VK_EXT_VIDEO_MAINTENANCE_1 = 1ULL << 27, /* VK_KHR_video_maintenance1 */
FF_VK_EXT_VIDEO_ENCODE_QUEUE = 1ULL << 28, /* VK_KHR_video_encode_queue */
@@ -179,6 +180,7 @@ typedef enum FFVulkanExtensions {
\
/* Descriptors */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSets) \
+ MACRO(1, 1, FF_VK_EXT_PUSH_DESCRIPTOR, CmdPushDescriptorSetKHR) \
\
/* Queries */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateQueryPool) \
diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h
index 82ed16c1f0..2022eb320b 100644
--- a/libavutil/vulkan_loader.h
+++ b/libavutil/vulkan_loader.h
@@ -65,6 +65,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions,
{ VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 },
{ VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
{ VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 },
+ { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR },
};
FFVulkanExtensions mask = 0x0;
More information about the ffmpeg-cvslog
mailing list