[FFmpeg-devel] [PATCH v3 2/2] fftools/ffplay: add hwaccel decoding support

Zhao Zhili quinkblack at foxmail.com
Sat Oct 28 21:13:40 EEST 2023


From: Zhao Zhili <zhilizhao at tencent.com>

---
v3: shared vulkan instance between libplacebo and hwcontext

 fftools/ffplay.c          |  43 ++++++
 fftools/ffplay_renderer.c | 316 +++++++++++++++++++++++++++++++++++++-
 fftools/ffplay_renderer.h |   2 +
 3 files changed, 355 insertions(+), 6 deletions(-)

diff --git a/fftools/ffplay.c b/fftools/ffplay.c
index 305d72d8b8..29b37a5e46 100644
--- a/fftools/ffplay.c
+++ b/fftools/ffplay.c
@@ -352,6 +352,7 @@ static int autorotate = 1;
 static int find_stream_info = 1;
 static int filter_nbthreads = 0;
 static int enable_vulkan = 0;
+static const char *hwaccel = NULL;
 
 /* current context */
 static int is_full_screen;
@@ -2557,6 +2558,37 @@ static int audio_open(void *opaque, AVChannelLayout *wanted_channel_layout, int
     return spec.size;
 }
 
+static int create_hwaccel(AVBufferRef **device_ctx)
+{
+    enum AVHWDeviceType type;
+    int ret;
+    AVBufferRef *vk_dev;
+
+    *device_ctx = NULL;
+
+    if (!hwaccel)
+        return 0;
+
+    type = av_hwdevice_find_type_by_name(hwaccel);
+    if (type == AV_HWDEVICE_TYPE_NONE)
+        return AVERROR(ENOTSUP);
+
+    ret = vk_renderer_get_hw_dev(vk_renderer, &vk_dev);
+    if (ret < 0)
+        return ret;
+
+    ret = av_hwdevice_ctx_create_derived(device_ctx, type, vk_dev, 0);
+    if (!ret)
+        return 0;
+
+    if (ret != AVERROR(ENOSYS))
+        return ret;
+
+    av_log(NULL, AV_LOG_WARNING, "Derive %s from vulkan not supported.\n", hwaccel);
+    ret = av_hwdevice_ctx_create(device_ctx, type, NULL, NULL, 0);
+    return ret;
+}
+
 /* open a given stream. Return 0 if OK */
 static int stream_component_open(VideoState *is, int stream_index)
 {
@@ -2624,6 +2656,12 @@ static int stream_component_open(VideoState *is, int stream_index)
 
     av_dict_set(&opts, "flags", "+copy_opaque", AV_DICT_MULTIKEY);
 
+    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+        ret = create_hwaccel(&avctx->hw_device_ctx);
+        if (ret < 0)
+            goto fail;
+    }
+
     if ((ret = avcodec_open2(avctx, codec, &opts)) < 0) {
         goto fail;
     }
@@ -3625,6 +3663,7 @@ static const OptionDef options[] = {
         "read and decode the streams to fill missing information with heuristics" },
     { "filter_threads", HAS_ARG | OPT_INT | OPT_EXPERT, { &filter_nbthreads }, "number of filter threads per graph" },
     { "enable_vulkan", OPT_BOOL, { &enable_vulkan }, "enable vulkan render" },
+    { "hwaccel", HAS_ARG | OPT_STRING | OPT_EXPERT, { &hwaccel }, "use HW accelerated decoding" },
     { NULL, },
 };
 
@@ -3739,6 +3778,10 @@ int main(int argc, char **argv)
 #ifdef SDL_HINT_VIDEO_X11_NET_WM_BYPASS_COMPOSITOR
         SDL_SetHint(SDL_HINT_VIDEO_X11_NET_WM_BYPASS_COMPOSITOR, "0");
 #endif
+        if (hwaccel && !enable_vulkan) {
+            av_log(NULL, AV_LOG_INFO, "Enable vulkan renderer to support hwaccel %s\n", hwaccel);
+            enable_vulkan = 1;
+        }
         if (enable_vulkan) {
             vk_renderer = vk_get_renderer();
             if (vk_renderer) {
diff --git a/fftools/ffplay_renderer.c b/fftools/ffplay_renderer.c
index 49e5516d5d..497d758ec7 100644
--- a/fftools/ffplay_renderer.c
+++ b/fftools/ffplay_renderer.c
@@ -35,6 +35,8 @@ struct VkRenderer {
 
     int (*create)(VkRenderer *renderer, SDL_Window *window);
 
+    int (*get_hw_dev)(VkRenderer *renderer, AVBufferRef **dev);
+
     int (*display)(VkRenderer *renderer, AVFrame *frame);
 
     int (*resize)(VkRenderer *renderer, int width, int height);
@@ -54,6 +56,13 @@ typedef struct RendererContext {
     pl_tex tex[4];
 
     pl_log vk_log;
+
+    AVBufferRef *hw_device_ref;
+    AVBufferRef *hw_frame_ref;
+    enum AVPixelFormat *transfer_formats;
+    AVHWFramesConstraints *constraints;
+
+    AVFrame *vk_frame;
 } RendererContext;
 
 static void vk_log_cb(void *log_priv, enum pl_log_level level, const char *msg) {
@@ -74,35 +83,46 @@ static void vk_log_cb(void *log_priv, enum pl_log_level level, const char *msg)
 static int create(VkRenderer *renderer, SDL_Window *window)
 {
     int ret = 0;
-    unsigned ext_num = 0;
+    unsigned num_ext = 0;
     const char **ext = NULL;
     int w, h;
     struct pl_log_params vk_log_params = {
             .log_cb = vk_log_cb,
-            .log_level = PL_LOG_WARN,
+            .log_level = PL_LOG_DEBUG,
             .log_priv = renderer,
     };
     RendererContext *ctx = (RendererContext *)renderer;
 
-    if (!SDL_Vulkan_GetInstanceExtensions(window, &ext_num, NULL)) {
+    static const char *opt_exts[] = {
+        VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
+        VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME,
+        VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME,
+        VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME,
+        VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME,
+        VK_KHR_VIDEO_QUEUE_EXTENSION_NAME,
+        "VK_MESA_video_decode_av1",
+    };
+    const int num_opt_exts = FF_ARRAY_ELEMS(opt_exts);
+
+    if (!SDL_Vulkan_GetInstanceExtensions(window, &num_ext, NULL)) {
         av_log(NULL, AV_LOG_FATAL, "Failed to get vulkan extensions: %s\n", SDL_GetError());
         return AVERROR_EXTERNAL;
     }
 
-    ext = av_calloc(ext_num, sizeof(*ext));
+    ext = av_calloc(num_ext, sizeof(*ext));
     if (!ext) {
         ret = AVERROR(ENOMEM);
         goto out;
     }
 
-    SDL_Vulkan_GetInstanceExtensions(window, &ext_num, ext);
+    SDL_Vulkan_GetInstanceExtensions(window, &num_ext, ext);
 
     ctx->vk_log = pl_log_create(PL_API_VER, &vk_log_params);
     ctx->vk_inst = pl_vk_inst_create(ctx->vk_log, pl_vk_inst_params(
             .get_proc_addr = SDL_Vulkan_GetVkGetInstanceProcAddr(),
             .debug = false,
             .extensions = ext,
-            .num_extensions = ext_num,
+            .num_extensions = num_ext
     ));
     if (!ctx->vk_inst) {
         ret = AVERROR_EXTERNAL;
@@ -119,6 +139,9 @@ static int create(VkRenderer *renderer, SDL_Window *window)
             .get_proc_addr = ctx->vk_inst->get_proc_addr,
             .surface = ctx->vk_surface,
             .allow_software = false,
+            .opt_extensions = opt_exts,
+            .num_opt_extensions = num_opt_exts,
+            .extra_queues = VK_QUEUE_VIDEO_DECODE_BIT_KHR,
     ));
     if (!ctx->pl_vk) {
         ret = AVERROR_EXTERNAL;
@@ -137,6 +160,13 @@ static int create(VkRenderer *renderer, SDL_Window *window)
         ret = AVERROR_EXTERNAL;
         goto out;
     }
+
+    ctx->vk_frame = av_frame_alloc();
+    if (!ctx->vk_frame) {
+        ret = AVERROR(ENOMEM);
+        goto out;
+    }
+
     ret = 0;
 
 out:
@@ -144,6 +174,264 @@ out:
     return ret;
 }
 
+static void lock_queue(struct AVHWDeviceContext *dev_ctx, uint32_t queue_family, uint32_t index)
+{
+    RendererContext *ctx = dev_ctx->user_opaque;
+    pl_vulkan vk = ctx->pl_vk;
+    vk->lock_queue(vk, queue_family, index);
+}
+
+static void unlock_queue(struct AVHWDeviceContext *dev_ctx, uint32_t queue_family, uint32_t index)
+{
+    RendererContext *ctx = dev_ctx->user_opaque;
+    pl_vulkan vk = ctx->pl_vk;
+    vk->unlock_queue(vk, queue_family, index);
+}
+
+static int get_decode_queue(VkRenderer *renderer, int *index, int *count)
+{
+    RendererContext *ctx = (RendererContext *) renderer;
+    VkQueueFamilyProperties *queue_family_prop = NULL;
+    uint32_t num_queue_family_prop = 0;
+    PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties;
+
+    *index = -1;
+    *count = 0;
+    vkGetPhysicalDeviceQueueFamilyProperties = (PFN_vkGetPhysicalDeviceQueueFamilyProperties)
+            ctx->vk_inst->get_proc_addr(ctx->vk_inst->instance,
+                                        "vkGetPhysicalDeviceQueueFamilyProperties");
+    vkGetPhysicalDeviceQueueFamilyProperties(ctx->pl_vk->phys_device, &num_queue_family_prop, NULL);
+    if (!num_queue_family_prop)
+        return AVERROR_EXTERNAL;
+
+    queue_family_prop = av_calloc(num_queue_family_prop, sizeof(*queue_family_prop));
+    if (!queue_family_prop)
+        return AVERROR(ENOMEM);
+
+    vkGetPhysicalDeviceQueueFamilyProperties(ctx->pl_vk->phys_device, &num_queue_family_prop, queue_family_prop);
+
+    for (int i = 0; i < num_queue_family_prop; i++) {
+        if (queue_family_prop[i].queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
+            *index = i;
+            *count = queue_family_prop[i].queueCount;
+            break;
+        }
+    }
+    av_free(queue_family_prop);
+
+    return 0;
+}
+
+static int get_hw_dev(VkRenderer *renderer, AVBufferRef **dev)
+{
+    RendererContext *ctx = (RendererContext *)renderer;
+    AVHWDeviceContext *device_ctx;
+    AVVulkanDeviceContext *vk_dev_ctx;
+    int decode_index;
+    int decode_count;
+    int ret;
+
+    *dev = NULL;
+    if (ctx->hw_device_ref) {
+        *dev = ctx->hw_device_ref;
+        return 0;
+    }
+
+    ctx->hw_device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VULKAN);
+    if (!ctx->hw_device_ref) {
+        return AVERROR(ENOMEM);
+    }
+
+    device_ctx = (AVHWDeviceContext *)ctx->hw_device_ref->data;
+    device_ctx->user_opaque = ctx;
+
+    vk_dev_ctx = device_ctx->hwctx;
+    vk_dev_ctx->lock_queue = lock_queue,
+    vk_dev_ctx->unlock_queue = unlock_queue;
+
+    vk_dev_ctx->get_proc_addr = ctx->vk_inst->get_proc_addr;
+
+    vk_dev_ctx->inst = ctx->vk_inst->instance;
+    vk_dev_ctx->phys_dev = ctx->pl_vk->phys_device;
+    vk_dev_ctx->act_dev = ctx->pl_vk->device;
+
+    vk_dev_ctx->device_features = *ctx->pl_vk->features;
+
+    vk_dev_ctx->enabled_inst_extensions = ctx->vk_inst->extensions;
+    vk_dev_ctx->nb_enabled_inst_extensions = ctx->vk_inst->num_extensions;
+
+    vk_dev_ctx->enabled_dev_extensions = ctx->pl_vk->extensions;
+    vk_dev_ctx->nb_enabled_dev_extensions = ctx->pl_vk->num_extensions;
+
+    vk_dev_ctx->queue_family_index = ctx->pl_vk->queue_graphics.index;
+    vk_dev_ctx->nb_graphics_queues = ctx->pl_vk->queue_graphics.count;
+
+    vk_dev_ctx->queue_family_tx_index = ctx->pl_vk->queue_transfer.index;
+    vk_dev_ctx->nb_tx_queues = ctx->pl_vk->queue_transfer.count;
+
+    vk_dev_ctx->queue_family_comp_index = ctx->pl_vk->queue_compute.index;
+    vk_dev_ctx->nb_comp_queues = ctx->pl_vk->queue_compute.count;
+
+    ret = get_decode_queue(renderer, &decode_index, &decode_count);
+    if (ret < 0)
+        return ret;
+
+    vk_dev_ctx->queue_family_decode_index = decode_index;
+    vk_dev_ctx->nb_decode_queues = decode_count;
+
+    ret = av_hwdevice_ctx_init(ctx->hw_device_ref);
+    if (ret < 0)
+        return ret;
+
+    *dev = ctx->hw_device_ref;
+    return 0;
+}
+
+static int create_hw_frame(VkRenderer *renderer, AVFrame *frame)
+{
+    RendererContext *ctx = (RendererContext *)renderer;
+    AVHWFramesContext *src_hw_frame = (AVHWFramesContext *)frame->hw_frames_ctx->data;
+    AVHWFramesContext *hw_frame;
+    AVVulkanFramesContext *vk_frame_ctx;
+    int ret;
+
+    if (ctx->hw_frame_ref) {
+        hw_frame = (AVHWFramesContext *)ctx->hw_frame_ref->data;
+
+        if (hw_frame->width == frame->width &&
+            hw_frame->height == frame->height &&
+            hw_frame->sw_format == src_hw_frame->sw_format)
+            return 0;
+
+        av_buffer_unref(&ctx->hw_frame_ref);
+    }
+
+    if (!ctx->constraints) {
+        ctx->constraints = av_hwdevice_get_hwframe_constraints(ctx->hw_device_ref, NULL);
+        if (!ctx->constraints)
+            return AVERROR(ENOMEM);
+    }
+
+    // Check constraints and skip create hwframe.
+    // Don't take it as error since we can fallback to memory copy from GPU to CPU.
+    if ((ctx->constraints->max_width && ctx->constraints->max_width < frame->width) ||
+        (ctx->constraints->max_height && ctx->constraints->max_height < frame->height) ||
+        (ctx->constraints->min_width && ctx->constraints->min_width > frame->width) ||
+        (ctx->constraints->min_height && ctx->constraints->min_height > frame->height))
+        return 0;
+
+    if (ctx->constraints->valid_sw_formats) {
+        enum AVPixelFormat *sw_formats = ctx->constraints->valid_sw_formats;
+        while (*sw_formats != AV_PIX_FMT_NONE) {
+            if (*sw_formats == src_hw_frame->sw_format)
+                break;
+            sw_formats++;
+        }
+        if (*sw_formats == AV_PIX_FMT_NONE)
+            return 0;
+    }
+
+    ctx->hw_frame_ref = av_hwframe_ctx_alloc(ctx->hw_device_ref);
+    if (!ctx->hw_frame_ref)
+        return AVERROR(ENOMEM);
+
+    hw_frame = (AVHWFramesContext *)ctx->hw_frame_ref->data;
+    hw_frame->format = AV_PIX_FMT_VULKAN;
+    hw_frame->sw_format = src_hw_frame->sw_format;
+    hw_frame->width = frame->width;
+    hw_frame->height = frame->height;
+
+    if (frame->format == AV_PIX_FMT_CUDA) {
+        vk_frame_ctx = hw_frame->hwctx;
+        vk_frame_ctx->flags = AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE;
+    }
+
+    ret = av_hwframe_ctx_init(ctx->hw_frame_ref);
+    if (ret < 0) {
+        av_log(renderer, AV_LOG_ERROR, "Create hwframe context failed, %s\n", av_err2str(ret));
+        return ret;
+    }
+
+    av_hwframe_transfer_get_formats(ctx->hw_frame_ref, AV_HWFRAME_TRANSFER_DIRECTION_TO,
+                                    &ctx->transfer_formats, 0);
+
+    return 0;
+}
+
+static inline int check_hw_transfer(RendererContext *ctx, AVFrame *frame)
+{
+    if (!ctx->hw_frame_ref || !ctx->transfer_formats)
+        return 0;
+
+    for (int i = 0; ctx->transfer_formats[i] != AV_PIX_FMT_NONE; i++)
+        if (ctx->transfer_formats[i] == frame->format)
+            return 1;
+
+    return 0;
+}
+
+static int transfer_frame(VkRenderer *renderer, AVFrame *frame)
+{
+    RendererContext *ctx = (RendererContext *)renderer;
+    int ret;
+
+    if (!frame->hw_frames_ctx)
+        return 0;
+
+    if (frame->format == AV_PIX_FMT_VULKAN)
+        return 0;
+
+    ret = create_hw_frame(renderer, frame);
+    if (ret < 0)
+        return ret;
+
+    // Try map data first
+    av_frame_unref(ctx->vk_frame);
+    if (ctx->hw_frame_ref) {
+        ctx->vk_frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frame_ref);
+        ctx->vk_frame->format = AV_PIX_FMT_VULKAN;
+    }
+    ret = av_hwframe_map(ctx->vk_frame, frame, AV_HWFRAME_MAP_READ);
+    if (!ret)
+        goto out;
+
+    if (ret != AVERROR(ENOSYS)) {
+        av_log(NULL, AV_LOG_FATAL, "Map data to vulkan failed: %s\n", av_err2str(ret));
+        return ret;
+    }
+
+    // Try transfer to vulkan
+    if (check_hw_transfer(ctx, frame)) {
+        av_frame_unref(ctx->vk_frame);
+        av_hwframe_get_buffer(ctx->hw_frame_ref, ctx->vk_frame, 0);
+        ret = av_hwframe_transfer_data(ctx->vk_frame, frame, 1);
+        if (!ret)
+            goto out;
+
+        if (ret < 0 && ret != AVERROR(ENOSYS)) {
+            av_log(NULL, AV_LOG_FATAL, "Transfer data to vulkan failed: %s\n", av_err2str(ret));
+            return ret;
+        }
+    }
+
+    // Transfer to CPU
+    av_frame_unref(ctx->vk_frame);
+    ret = av_hwframe_transfer_data(ctx->vk_frame, frame, 0);
+    if (ret < 0) {
+        av_log(NULL, AV_LOG_FATAL, "Transfer data to CPU failed: %s\n", av_err2str(ret));
+        return ret;
+    }
+
+out:
+    ret = av_frame_copy_props(ctx->vk_frame, frame);
+    if (ret < 0)
+        return ret;
+    av_frame_unref(frame);
+    av_frame_move_ref(frame, ctx->vk_frame);
+
+    return 0;
+}
+
 static int display(VkRenderer *renderer, AVFrame *frame)
 {
     struct pl_swapchain_frame swap_frame = {0};
@@ -152,6 +440,10 @@ static int display(VkRenderer *renderer, AVFrame *frame)
     RendererContext *ctx = (RendererContext *)renderer;
     int ret = 0;
 
+    ret = transfer_frame(renderer, frame);
+    if (ret < 0)
+        return ret;
+
     if (!pl_map_avframe_ex(ctx->pl_vk->gpu, &pl_frame, pl_avframe_params(
             .frame = frame,
             .tex = ctx->tex))) {
@@ -198,6 +490,12 @@ static void destroy(VkRenderer *renderer)
     PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR;
     RendererContext *ctx = (RendererContext *)renderer;
 
+    av_freep(&ctx->transfer_formats);
+    av_hwframe_constraints_free(&ctx->constraints);
+    av_buffer_unref(&ctx->hw_frame_ref);
+    av_buffer_unref(&ctx->hw_device_ref);
+    av_frame_free(&ctx->vk_frame);
+
     if (ctx->pl_vk) {
         for (int i = 0; i < FF_ARRAY_ELEMS(ctx->tex); i++)
             pl_tex_destroy(ctx->pl_vk->gpu, &ctx->tex[i]);
@@ -233,6 +531,7 @@ VkRenderer *vk_get_renderer(void)
 
     renderer = &ctx->api;
     renderer->class = &vulkan_renderer_class;
+    renderer->get_hw_dev = get_hw_dev;
     renderer->create = create;
     renderer->display = display;
     renderer->resize = resize;
@@ -255,6 +554,11 @@ int vk_renderer_create(VkRenderer *renderer, SDL_Window *window)
     return renderer->create(renderer, window);
 }
 
+int vk_renderer_get_hw_dev(VkRenderer *renderer, AVBufferRef **dev)
+{
+    return renderer->get_hw_dev(renderer, dev);
+}
+
 int vk_renderer_display(VkRenderer *renderer, AVFrame *frame)
 {
     return renderer->display(renderer, frame);
diff --git a/fftools/ffplay_renderer.h b/fftools/ffplay_renderer.h
index 4029f154a9..b982ba0192 100644
--- a/fftools/ffplay_renderer.h
+++ b/fftools/ffplay_renderer.h
@@ -29,6 +29,8 @@ VkRenderer *vk_get_renderer(void);
 
 int vk_renderer_create(VkRenderer *renderer, SDL_Window *window);
 
+int vk_renderer_get_hw_dev(VkRenderer *renderer, AVBufferRef **dev);
+
 int vk_renderer_display(VkRenderer *renderer, AVFrame *frame);
 
 int vk_renderer_resize(VkRenderer *renderer, int width, int height);
-- 
2.42.0



More information about the ffmpeg-devel mailing list