[FFmpeg-cvslog] bwdif_vulkan: convert to storage images

Lynne git at videolan.org
Tue Feb 18 11:45:23 EET 2025


ffmpeg | branch: master | Lynne <dev at lynne.ee> | Mon Feb 17 18:22:45 2025 +0100| [779763181f85bf25c53af5789d8b9b99d3bb0d1d] | committer: Lynne

bwdif_vulkan: convert to storage images

texture() uses bilinear scaling; imageLoad() accesses the image directly.
The reason why texture() was used throughout Vulkan filters is that
back when they were written, they were targetting old Intel hardware,
which had a texel cache only for sampled images.

These days, GPUs have a generic cache that doesn't care what source it
gets populated with. Additionally, bypassing the sampling circuitry saves
us some performance.

Finally, all the old texture() code had an issue where unnormalized
coordinates were used, but an offset of 0.5 was not added, hence each
pixel ended up being interpolated. This fixes this.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=779763181f85bf25c53af5789d8b9b99d3bb0d1d
---

 libavfilter/vf_bwdif_vulkan.c | 26 +++++++----------
 libavfilter/vulkan/bwdif.comp | 68 +++++++++++++++++++++----------------------
 2 files changed, 45 insertions(+), 49 deletions(-)

diff --git a/libavfilter/vf_bwdif_vulkan.c b/libavfilter/vf_bwdif_vulkan.c
index 0afe8ac0ed..549e814886 100644
--- a/libavfilter/vf_bwdif_vulkan.c
+++ b/libavfilter/vf_bwdif_vulkan.c
@@ -34,7 +34,6 @@ typedef struct BWDIFVulkanContext {
     int initialized;
     FFVkExecPool e;
     AVVulkanDeviceQueueFamily *qf;
-    VkSampler sampler;
     FFVulkanShader shd;
 } BWDIFVulkanContext;
 
@@ -73,7 +72,6 @@ static av_cold int init_filter(AVFilterContext *ctx)
     }
 
     RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL));
-    RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
 
     RET(ff_vk_shader_init(vkctx, &s->shd, "bwdif",
                           VK_SHADER_STAGE_COMPUTE_BIT,
@@ -85,27 +83,30 @@ static av_cold int init_filter(AVFilterContext *ctx)
     desc = (FFVulkanDescriptorSetBinding []) {
         {
             .name       = "prev",
-            .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT),
+            .mem_quali  = "readonly",
             .dimensions = 2,
             .elems      = planes,
             .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-            .samplers   = DUP_SAMPLER(s->sampler),
         },
         {
             .name       = "cur",
-            .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT),
+            .mem_quali  = "readonly",
             .dimensions = 2,
             .elems      = planes,
             .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-            .samplers   = DUP_SAMPLER(s->sampler),
         },
         {
             .name       = "next",
-            .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT),
+            .mem_quali  = "readonly",
             .dimensions = 2,
             .elems      = planes,
             .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-            .samplers   = DUP_SAMPLER(s->sampler),
         },
         {
             .name       = "dst",
@@ -166,7 +167,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
             GLSLC(2, if (!IS_WITHIN(pos, size))                                    );
             GLSLC(3,     return;                                                   );
         }
-        GLSLF(2,     imageStore(dst[%i], pos, texture(cur[%i], pos));              ,i, i);
+        GLSLF(2,     imageStore(dst[%i], pos, imageLoad(cur[%i], pos));            ,i, i);
     }
     GLSLC(1,     }                                                                 );
     GLSLC(0, }                                                                     );
@@ -201,7 +202,7 @@ static void bwdif_vulkan_filter_frame(AVFilterContext *ctx, AVFrame *dst,
 
     ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->shd, dst,
                              (AVFrame *[]){ y->prev, y->cur, y->next }, 3,
-                             s->sampler, &params, sizeof(params));
+                             VK_NULL_HANDLE, &params, sizeof(params));
 
     if (y->current_field == YADIF_FIELD_END)
         y->current_field = YADIF_FIELD_NORMAL;
@@ -211,15 +212,10 @@ static void bwdif_vulkan_uninit(AVFilterContext *avctx)
 {
     BWDIFVulkanContext *s = avctx->priv;
     FFVulkanContext *vkctx = &s->vkctx;
-    FFVulkanFunctions *vk = &vkctx->vkfn;
 
     ff_vk_exec_pool_free(vkctx, &s->e);
     ff_vk_shader_free(vkctx, &s->shd);
 
-    if (s->sampler)
-        vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
-                           vkctx->hwctx->alloc);
-
     ff_vk_uninit(&s->vkctx);
 
     ff_yadif_uninit(avctx);
diff --git a/libavfilter/vulkan/bwdif.comp b/libavfilter/vulkan/bwdif.comp
index aec58c656b..5c988f472e 100644
--- a/libavfilter/vulkan/bwdif.comp
+++ b/libavfilter/vulkan/bwdif.comp
@@ -30,10 +30,10 @@ vec4 process_intra(vec4 cur[4])
 void process_plane_intra(int idx, ivec2 pos)
 {
     vec4 dcur[4];
-    dcur[0] = texture(cur[idx], pos - ivec2(0, 3));
-    dcur[1] = texture(cur[idx], pos - ivec2(0, 1));
-    dcur[2] = texture(cur[idx], pos + ivec2(0, 1));
-    dcur[3] = texture(cur[idx], pos + ivec2(0, 3));
+    dcur[0] = imageLoad(cur[idx], pos - ivec2(0, 3));
+    dcur[1] = imageLoad(cur[idx], pos - ivec2(0, 1));
+    dcur[2] = imageLoad(cur[idx], pos + ivec2(0, 1));
+    dcur[3] = imageLoad(cur[idx], pos + ivec2(0, 3));
     imageStore(dst[idx], pos, process_intra(dcur));
 }
 
@@ -81,41 +81,41 @@ void process_plane(int idx, const ivec2 pos, bool filter_field,
     vec4 prev2[5];
     vec4 next2[5];
 
-    dcur[0] = texture(cur[idx], pos - ivec2(0, 3));
-    dcur[1] = texture(cur[idx], pos - ivec2(0, 1));
-    dcur[2] = texture(cur[idx], pos + ivec2(0, 1));
-    dcur[3] = texture(cur[idx], pos + ivec2(0, 3));
+    dcur[0] = imageLoad(cur[idx], pos - ivec2(0, 3));
+    dcur[1] = imageLoad(cur[idx], pos - ivec2(0, 1));
+    dcur[2] = imageLoad(cur[idx], pos + ivec2(0, 1));
+    dcur[3] = imageLoad(cur[idx], pos + ivec2(0, 3));
 
-    prev1[0] = texture(prev[idx], pos - ivec2(0, 1));
-    prev1[1] = texture(prev[idx], pos + ivec2(0, 1));
+    prev1[0] = imageLoad(prev[idx], pos - ivec2(0, 1));
+    prev1[1] = imageLoad(prev[idx], pos + ivec2(0, 1));
 
-    next1[0] = texture(next[idx], pos - ivec2(0, 1));
-    next1[1] = texture(next[idx], pos + ivec2(0, 1));
+    next1[0] = imageLoad(next[idx], pos - ivec2(0, 1));
+    next1[1] = imageLoad(next[idx], pos + ivec2(0, 1));
 
     if (field_parity) {
-        prev2[0] = texture(prev[idx], pos - ivec2(0, 4));
-        prev2[1] = texture(prev[idx], pos - ivec2(0, 2));
-        prev2[2] = texture(prev[idx], pos);
-        prev2[3] = texture(prev[idx], pos + ivec2(0, 2));
-        prev2[4] = texture(prev[idx], pos + ivec2(0, 4));
-
-        next2[0] = texture(cur[idx], pos - ivec2(0, 4));
-        next2[1] = texture(cur[idx], pos - ivec2(0, 2));
-        next2[2] = texture(cur[idx], pos);
-        next2[3] = texture(cur[idx], pos + ivec2(0, 2));
-        next2[4] = texture(cur[idx], pos + ivec2(0, 4));
+        prev2[0] = imageLoad(prev[idx], pos - ivec2(0, 4));
+        prev2[1] = imageLoad(prev[idx], pos - ivec2(0, 2));
+        prev2[2] = imageLoad(prev[idx], pos);
+        prev2[3] = imageLoad(prev[idx], pos + ivec2(0, 2));
+        prev2[4] = imageLoad(prev[idx], pos + ivec2(0, 4));
+
+        next2[0] = imageLoad(cur[idx], pos - ivec2(0, 4));
+        next2[1] = imageLoad(cur[idx], pos - ivec2(0, 2));
+        next2[2] = imageLoad(cur[idx], pos);
+        next2[3] = imageLoad(cur[idx], pos + ivec2(0, 2));
+        next2[4] = imageLoad(cur[idx], pos + ivec2(0, 4));
     } else {
-        prev2[0] = texture(cur[idx], pos - ivec2(0, 4));
-        prev2[1] = texture(cur[idx], pos - ivec2(0, 2));
-        prev2[2] = texture(cur[idx], pos);
-        prev2[3] = texture(cur[idx], pos + ivec2(0, 2));
-        prev2[4] = texture(cur[idx], pos + ivec2(0, 4));
-
-        next2[0] = texture(next[idx], pos - ivec2(0, 4));
-        next2[1] = texture(next[idx], pos - ivec2(0, 2));
-        next2[2] = texture(next[idx], pos);
-        next2[3] = texture(next[idx], pos + ivec2(0, 2));
-        next2[4] = texture(next[idx], pos + ivec2(0, 4));
+        prev2[0] = imageLoad(cur[idx], pos - ivec2(0, 4));
+        prev2[1] = imageLoad(cur[idx], pos - ivec2(0, 2));
+        prev2[2] = imageLoad(cur[idx], pos);
+        prev2[3] = imageLoad(cur[idx], pos + ivec2(0, 2));
+        prev2[4] = imageLoad(cur[idx], pos + ivec2(0, 4));
+
+        next2[0] = imageLoad(next[idx], pos - ivec2(0, 4));
+        next2[1] = imageLoad(next[idx], pos - ivec2(0, 2));
+        next2[2] = imageLoad(next[idx], pos);
+        next2[3] = imageLoad(next[idx], pos + ivec2(0, 2));
+        next2[4] = imageLoad(next[idx], pos + ivec2(0, 4));
     }
 
     imageStore(dst[idx], pos, process_line(prev2, prev1, dcur, next1, next2));



More information about the ffmpeg-cvslog mailing list