[FFmpeg-cvslog] vulkan_decode: use a single execution pool

Lynne git at videolan.org
Sun Dec 22 21:25:54 EET 2024


ffmpeg | branch: master | Lynne <dev at lynne.ee> | Tue Dec  3 18:11:16 2024 +0900| [7239be07bea6cce5a9e09d5dc7ebb1a14e34121f] | committer: Lynne

vulkan_decode: use a single execution pool

Originally, the decoder had a single execution pool, with one
execution context per thread. Execution pools were always intended
to be thread-safe, as long as there were enough execution contexts
in the pool to satisfy all threads.

Due to synchronization issues, the threading part was removed at some
point, and, for decoding, each thread had its own execution pool.
Having a single execution pool per context is hacky, not to mention
wasteful.
Most importantly, we *cannot* associate single shaders across multiple
execution pools for a single application. This means that we cannot
use shaders to either apply film grain, or use this framework for
software-defined decoders.

The recent commits added threading capabilities back to the execution
pool, and the number of contexts in each pool was increased. This was
done with the assumption that the execution pool was singular, which
it was not. This led to increased parallelism and number of frames
in flight, which is taxing on memory.

This commit finally restores proper threading behaviour.
The validation layer has isses that are reported and addressed in the
earlier commit.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7239be07bea6cce5a9e09d5dc7ebb1a14e34121f
---

 libavcodec/vulkan_decode.c | 47 +++++++++++++++-------------------------------
 libavcodec/vulkan_decode.h |  2 +-
 2 files changed, 16 insertions(+), 33 deletions(-)

diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 5936c0bc4a..1a5e70b2d6 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -83,25 +83,6 @@ int ff_vk_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
     FFVulkanDecodeContext *src_ctx = src->internal->hwaccel_priv_data;
     FFVulkanDecodeContext *dst_ctx = dst->internal->hwaccel_priv_data;
 
-    if (!dst_ctx->exec_pool.cmd_bufs) {
-        FFVulkanDecodeShared *ctx = src_ctx->shared_ctx;
-
-        const VkVideoProfileInfoKHR *profile = get_video_profile(ctx, dst->codec_id);
-        if (!profile) {
-            av_log(dst, AV_LOG_ERROR, "Video profile missing from frames context!\n");
-            return AVERROR(EINVAL);
-        }
-
-        err = ff_vk_exec_pool_init(&ctx->s, &ctx->qf,
-                                   &dst_ctx->exec_pool,
-                                   src_ctx->exec_pool.pool_size,
-                                   src_ctx->exec_pool.nb_queries,
-                                   VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR, 0,
-                                   profile);
-        if (err < 0)
-            return err;
-    }
-
     av_refstruct_replace(&dst_ctx->shared_ctx, src_ctx->shared_ctx);
 
     if (src_ctx->session_params) {
@@ -293,8 +274,11 @@ void ff_vk_decode_flush(AVCodecContext *avctx)
     };
 
     VkCommandBuffer cmd_buf;
-    FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &dec->exec_pool);
-    int had_submission = exec->had_submission;
+    FFVkExecContext *exec;
+    int had_submission;
+
+    exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
+    had_submission = exec->had_submission;
     ff_vk_exec_start(&ctx->s, exec);
     cmd_buf = exec->buf;
 
@@ -345,7 +329,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
     size_t data_size = FFALIGN(vp->slices_size,
                                ctx->caps.minBitstreamBufferSizeAlignment);
 
-    FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &dec->exec_pool);
+    FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
 
     /* The current decoding reference has to be bound as an inactive reference */
     VkVideoReferenceSlotInfoKHR *cur_vk_ref;
@@ -354,7 +338,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
     cur_vk_ref[0].slotIndex = -1;
     decode_start.referenceSlotCount++;
 
-    if (dec->exec_pool.nb_queries && exec->had_submission) {
+    if (ctx->exec_pool.nb_queries && exec->had_submission) {
         uint32_t *result;
         ret = ff_vk_exec_get_query(&ctx->s, exec, (void **)&result,
                                    VK_QUERY_RESULT_WAIT_BIT);
@@ -525,14 +509,14 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
     vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start);
 
     /* Start status query */
-    if (dec->exec_pool.nb_queries)
-        vk->CmdBeginQuery(cmd_buf, dec->exec_pool.query_pool, exec->query_idx + 0, 0);
+    if (ctx->exec_pool.nb_queries)
+        vk->CmdBeginQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0, 0);
 
     vk->CmdDecodeVideoKHR(cmd_buf, &vp->decode_info);
 
     /* End status query */
-    if (dec->exec_pool.nb_queries)
-        vk->CmdEndQuery(cmd_buf, dec->exec_pool.query_pool, exec->query_idx + 0);
+    if (ctx->exec_pool.nb_queries)
+        vk->CmdEndQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0);
 
     vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end);
 
@@ -577,6 +561,9 @@ static void free_common(AVRefStructOpaque unused, void *obj)
     FFVulkanContext *s = &ctx->s;
     FFVulkanFunctions *vk = &ctx->s.vkfn;
 
+    /* Wait on and free execution pool */
+    ff_vk_exec_pool_free(&ctx->s, &ctx->exec_pool);
+
     /* This also frees all references from this pool */
     av_frame_free(&ctx->common.layered_frame);
 
@@ -1066,10 +1053,6 @@ int ff_vk_decode_create_params(AVBufferRef **par_ref, void *logctx, FFVulkanDeco
 int ff_vk_decode_uninit(AVCodecContext *avctx)
 {
     FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
-    FFVulkanDecodeShared *ctx = dec->shared_ctx;
-
-    /* Wait on and free execution pool */
-    ff_vk_exec_pool_free(&ctx->s, &dec->exec_pool);
 
     av_freep(&dec->hevc_headers);
     av_buffer_unref(&dec->session_params);
@@ -1159,7 +1142,7 @@ int ff_vk_decode_init(AVCodecContext *avctx)
     /* Create decode exec context for this specific main thread.
      * 2 async contexts per thread was experimentally determined to be optimal
      * for a majority of streams. */
-    err = ff_vk_exec_pool_init(s, &ctx->qf, &dec->exec_pool,
+    err = ff_vk_exec_pool_init(s, &ctx->qf, &ctx->exec_pool,
                                FFMAX(2*ctx->qf.nb_queues, avctx->thread_count),
                                nb_q, VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR, 0,
                                profile);
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index 76e60836b5..f29cc5b162 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -47,6 +47,7 @@ typedef struct FFVulkanDecodeShared {
     FFVulkanContext s;
     FFVkVideoCommon common;
     FFVkQueueFamilyCtx qf;
+    FFVkExecPool exec_pool;
 
     AVBufferPool *buf_pool;
 
@@ -59,7 +60,6 @@ typedef struct FFVulkanDecodeShared {
 typedef struct FFVulkanDecodeContext {
     FFVulkanDecodeShared *shared_ctx;
     AVBufferRef *session_params;
-    FFVkExecPool exec_pool;
 
     int dedicated_dpb; /* Oddity  #1 - separate DPB images */
     int external_fg;   /* Oddity  #2 - hardware can't apply film grain */



More information about the ffmpeg-cvslog mailing list