[FFmpeg-devel] [PATCH v3 2/2] fftools/ffplay: add hwaccel decoding support
Zhao Zhili
quinkblack at foxmail.com
Sat Oct 28 21:13:40 EEST 2023
From: Zhao Zhili <zhilizhao at tencent.com>
---
v3: shared vulkan instance between libplacebo and hwcontext
fftools/ffplay.c | 43 ++++++
fftools/ffplay_renderer.c | 316 +++++++++++++++++++++++++++++++++++++-
fftools/ffplay_renderer.h | 2 +
3 files changed, 355 insertions(+), 6 deletions(-)
diff --git a/fftools/ffplay.c b/fftools/ffplay.c
index 305d72d8b8..29b37a5e46 100644
--- a/fftools/ffplay.c
+++ b/fftools/ffplay.c
@@ -352,6 +352,7 @@ static int autorotate = 1;
static int find_stream_info = 1;
static int filter_nbthreads = 0;
static int enable_vulkan = 0;
+static const char *hwaccel = NULL;
/* current context */
static int is_full_screen;
@@ -2557,6 +2558,37 @@ static int audio_open(void *opaque, AVChannelLayout *wanted_channel_layout, int
return spec.size;
}
+static int create_hwaccel(AVBufferRef **device_ctx)
+{
+ enum AVHWDeviceType type;
+ int ret;
+ AVBufferRef *vk_dev;
+
+ *device_ctx = NULL;
+
+ if (!hwaccel)
+ return 0;
+
+ type = av_hwdevice_find_type_by_name(hwaccel);
+ if (type == AV_HWDEVICE_TYPE_NONE)
+ return AVERROR(ENOTSUP);
+
+ ret = vk_renderer_get_hw_dev(vk_renderer, &vk_dev);
+ if (ret < 0)
+ return ret;
+
+ ret = av_hwdevice_ctx_create_derived(device_ctx, type, vk_dev, 0);
+ if (!ret)
+ return 0;
+
+ if (ret != AVERROR(ENOSYS))
+ return ret;
+
+ av_log(NULL, AV_LOG_WARNING, "Derive %s from vulkan not supported.\n", hwaccel);
+ ret = av_hwdevice_ctx_create(device_ctx, type, NULL, NULL, 0);
+ return ret;
+}
+
/* open a given stream. Return 0 if OK */
static int stream_component_open(VideoState *is, int stream_index)
{
@@ -2624,6 +2656,12 @@ static int stream_component_open(VideoState *is, int stream_index)
av_dict_set(&opts, "flags", "+copy_opaque", AV_DICT_MULTIKEY);
+ if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+ ret = create_hwaccel(&avctx->hw_device_ctx);
+ if (ret < 0)
+ goto fail;
+ }
+
if ((ret = avcodec_open2(avctx, codec, &opts)) < 0) {
goto fail;
}
@@ -3625,6 +3663,7 @@ static const OptionDef options[] = {
"read and decode the streams to fill missing information with heuristics" },
{ "filter_threads", HAS_ARG | OPT_INT | OPT_EXPERT, { &filter_nbthreads }, "number of filter threads per graph" },
{ "enable_vulkan", OPT_BOOL, { &enable_vulkan }, "enable vulkan render" },
+ { "hwaccel", HAS_ARG | OPT_STRING | OPT_EXPERT, { &hwaccel }, "use HW accelerated decoding" },
{ NULL, },
};
@@ -3739,6 +3778,10 @@ int main(int argc, char **argv)
#ifdef SDL_HINT_VIDEO_X11_NET_WM_BYPASS_COMPOSITOR
SDL_SetHint(SDL_HINT_VIDEO_X11_NET_WM_BYPASS_COMPOSITOR, "0");
#endif
+ if (hwaccel && !enable_vulkan) {
+ av_log(NULL, AV_LOG_INFO, "Enable vulkan renderer to support hwaccel %s\n", hwaccel);
+ enable_vulkan = 1;
+ }
if (enable_vulkan) {
vk_renderer = vk_get_renderer();
if (vk_renderer) {
diff --git a/fftools/ffplay_renderer.c b/fftools/ffplay_renderer.c
index 49e5516d5d..497d758ec7 100644
--- a/fftools/ffplay_renderer.c
+++ b/fftools/ffplay_renderer.c
@@ -35,6 +35,8 @@ struct VkRenderer {
int (*create)(VkRenderer *renderer, SDL_Window *window);
+ int (*get_hw_dev)(VkRenderer *renderer, AVBufferRef **dev);
+
int (*display)(VkRenderer *renderer, AVFrame *frame);
int (*resize)(VkRenderer *renderer, int width, int height);
@@ -54,6 +56,13 @@ typedef struct RendererContext {
pl_tex tex[4];
pl_log vk_log;
+
+ AVBufferRef *hw_device_ref;
+ AVBufferRef *hw_frame_ref;
+ enum AVPixelFormat *transfer_formats;
+ AVHWFramesConstraints *constraints;
+
+ AVFrame *vk_frame;
} RendererContext;
static void vk_log_cb(void *log_priv, enum pl_log_level level, const char *msg) {
@@ -74,35 +83,46 @@ static void vk_log_cb(void *log_priv, enum pl_log_level level, const char *msg)
static int create(VkRenderer *renderer, SDL_Window *window)
{
int ret = 0;
- unsigned ext_num = 0;
+ unsigned num_ext = 0;
const char **ext = NULL;
int w, h;
struct pl_log_params vk_log_params = {
.log_cb = vk_log_cb,
- .log_level = PL_LOG_WARN,
+ .log_level = PL_LOG_DEBUG,
.log_priv = renderer,
};
RendererContext *ctx = (RendererContext *)renderer;
- if (!SDL_Vulkan_GetInstanceExtensions(window, &ext_num, NULL)) {
+ static const char *opt_exts[] = {
+ VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
+ VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME,
+ VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME,
+ VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME,
+ VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME,
+ VK_KHR_VIDEO_QUEUE_EXTENSION_NAME,
+ "VK_MESA_video_decode_av1",
+ };
+ const int num_opt_exts = FF_ARRAY_ELEMS(opt_exts);
+
+ if (!SDL_Vulkan_GetInstanceExtensions(window, &num_ext, NULL)) {
av_log(NULL, AV_LOG_FATAL, "Failed to get vulkan extensions: %s\n", SDL_GetError());
return AVERROR_EXTERNAL;
}
- ext = av_calloc(ext_num, sizeof(*ext));
+ ext = av_calloc(num_ext, sizeof(*ext));
if (!ext) {
ret = AVERROR(ENOMEM);
goto out;
}
- SDL_Vulkan_GetInstanceExtensions(window, &ext_num, ext);
+ SDL_Vulkan_GetInstanceExtensions(window, &num_ext, ext);
ctx->vk_log = pl_log_create(PL_API_VER, &vk_log_params);
ctx->vk_inst = pl_vk_inst_create(ctx->vk_log, pl_vk_inst_params(
.get_proc_addr = SDL_Vulkan_GetVkGetInstanceProcAddr(),
.debug = false,
.extensions = ext,
- .num_extensions = ext_num,
+ .num_extensions = num_ext
));
if (!ctx->vk_inst) {
ret = AVERROR_EXTERNAL;
@@ -119,6 +139,9 @@ static int create(VkRenderer *renderer, SDL_Window *window)
.get_proc_addr = ctx->vk_inst->get_proc_addr,
.surface = ctx->vk_surface,
.allow_software = false,
+ .opt_extensions = opt_exts,
+ .num_opt_extensions = num_opt_exts,
+ .extra_queues = VK_QUEUE_VIDEO_DECODE_BIT_KHR,
));
if (!ctx->pl_vk) {
ret = AVERROR_EXTERNAL;
@@ -137,6 +160,13 @@ static int create(VkRenderer *renderer, SDL_Window *window)
ret = AVERROR_EXTERNAL;
goto out;
}
+
+ ctx->vk_frame = av_frame_alloc();
+ if (!ctx->vk_frame) {
+ ret = AVERROR(ENOMEM);
+ goto out;
+ }
+
ret = 0;
out:
@@ -144,6 +174,264 @@ out:
return ret;
}
+static void lock_queue(struct AVHWDeviceContext *dev_ctx, uint32_t queue_family, uint32_t index)
+{
+ RendererContext *ctx = dev_ctx->user_opaque;
+ pl_vulkan vk = ctx->pl_vk;
+ vk->lock_queue(vk, queue_family, index);
+}
+
+static void unlock_queue(struct AVHWDeviceContext *dev_ctx, uint32_t queue_family, uint32_t index)
+{
+ RendererContext *ctx = dev_ctx->user_opaque;
+ pl_vulkan vk = ctx->pl_vk;
+ vk->unlock_queue(vk, queue_family, index);
+}
+
+static int get_decode_queue(VkRenderer *renderer, int *index, int *count)
+{
+ RendererContext *ctx = (RendererContext *) renderer;
+ VkQueueFamilyProperties *queue_family_prop = NULL;
+ uint32_t num_queue_family_prop = 0;
+ PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties;
+
+ *index = -1;
+ *count = 0;
+ vkGetPhysicalDeviceQueueFamilyProperties = (PFN_vkGetPhysicalDeviceQueueFamilyProperties)
+ ctx->vk_inst->get_proc_addr(ctx->vk_inst->instance,
+ "vkGetPhysicalDeviceQueueFamilyProperties");
+ vkGetPhysicalDeviceQueueFamilyProperties(ctx->pl_vk->phys_device, &num_queue_family_prop, NULL);
+ if (!num_queue_family_prop)
+ return AVERROR_EXTERNAL;
+
+ queue_family_prop = av_calloc(num_queue_family_prop, sizeof(*queue_family_prop));
+ if (!queue_family_prop)
+ return AVERROR(ENOMEM);
+
+ vkGetPhysicalDeviceQueueFamilyProperties(ctx->pl_vk->phys_device, &num_queue_family_prop, queue_family_prop);
+
+ for (int i = 0; i < num_queue_family_prop; i++) {
+ if (queue_family_prop[i].queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
+ *index = i;
+ *count = queue_family_prop[i].queueCount;
+ break;
+ }
+ }
+ av_free(queue_family_prop);
+
+ return 0;
+}
+
+static int get_hw_dev(VkRenderer *renderer, AVBufferRef **dev)
+{
+ RendererContext *ctx = (RendererContext *)renderer;
+ AVHWDeviceContext *device_ctx;
+ AVVulkanDeviceContext *vk_dev_ctx;
+ int decode_index;
+ int decode_count;
+ int ret;
+
+ *dev = NULL;
+ if (ctx->hw_device_ref) {
+ *dev = ctx->hw_device_ref;
+ return 0;
+ }
+
+ ctx->hw_device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VULKAN);
+ if (!ctx->hw_device_ref) {
+ return AVERROR(ENOMEM);
+ }
+
+ device_ctx = (AVHWDeviceContext *)ctx->hw_device_ref->data;
+ device_ctx->user_opaque = ctx;
+
+ vk_dev_ctx = device_ctx->hwctx;
+ vk_dev_ctx->lock_queue = lock_queue,
+ vk_dev_ctx->unlock_queue = unlock_queue;
+
+ vk_dev_ctx->get_proc_addr = ctx->vk_inst->get_proc_addr;
+
+ vk_dev_ctx->inst = ctx->vk_inst->instance;
+ vk_dev_ctx->phys_dev = ctx->pl_vk->phys_device;
+ vk_dev_ctx->act_dev = ctx->pl_vk->device;
+
+ vk_dev_ctx->device_features = *ctx->pl_vk->features;
+
+ vk_dev_ctx->enabled_inst_extensions = ctx->vk_inst->extensions;
+ vk_dev_ctx->nb_enabled_inst_extensions = ctx->vk_inst->num_extensions;
+
+ vk_dev_ctx->enabled_dev_extensions = ctx->pl_vk->extensions;
+ vk_dev_ctx->nb_enabled_dev_extensions = ctx->pl_vk->num_extensions;
+
+ vk_dev_ctx->queue_family_index = ctx->pl_vk->queue_graphics.index;
+ vk_dev_ctx->nb_graphics_queues = ctx->pl_vk->queue_graphics.count;
+
+ vk_dev_ctx->queue_family_tx_index = ctx->pl_vk->queue_transfer.index;
+ vk_dev_ctx->nb_tx_queues = ctx->pl_vk->queue_transfer.count;
+
+ vk_dev_ctx->queue_family_comp_index = ctx->pl_vk->queue_compute.index;
+ vk_dev_ctx->nb_comp_queues = ctx->pl_vk->queue_compute.count;
+
+ ret = get_decode_queue(renderer, &decode_index, &decode_count);
+ if (ret < 0)
+ return ret;
+
+ vk_dev_ctx->queue_family_decode_index = decode_index;
+ vk_dev_ctx->nb_decode_queues = decode_count;
+
+ ret = av_hwdevice_ctx_init(ctx->hw_device_ref);
+ if (ret < 0)
+ return ret;
+
+ *dev = ctx->hw_device_ref;
+ return 0;
+}
+
+static int create_hw_frame(VkRenderer *renderer, AVFrame *frame)
+{
+ RendererContext *ctx = (RendererContext *)renderer;
+ AVHWFramesContext *src_hw_frame = (AVHWFramesContext *)frame->hw_frames_ctx->data;
+ AVHWFramesContext *hw_frame;
+ AVVulkanFramesContext *vk_frame_ctx;
+ int ret;
+
+ if (ctx->hw_frame_ref) {
+ hw_frame = (AVHWFramesContext *)ctx->hw_frame_ref->data;
+
+ if (hw_frame->width == frame->width &&
+ hw_frame->height == frame->height &&
+ hw_frame->sw_format == src_hw_frame->sw_format)
+ return 0;
+
+ av_buffer_unref(&ctx->hw_frame_ref);
+ }
+
+ if (!ctx->constraints) {
+ ctx->constraints = av_hwdevice_get_hwframe_constraints(ctx->hw_device_ref, NULL);
+ if (!ctx->constraints)
+ return AVERROR(ENOMEM);
+ }
+
+ // Check constraints and skip create hwframe.
+ // Don't take it as error since we can fallback to memory copy from GPU to CPU.
+ if ((ctx->constraints->max_width && ctx->constraints->max_width < frame->width) ||
+ (ctx->constraints->max_height && ctx->constraints->max_height < frame->height) ||
+ (ctx->constraints->min_width && ctx->constraints->min_width > frame->width) ||
+ (ctx->constraints->min_height && ctx->constraints->min_height > frame->height))
+ return 0;
+
+ if (ctx->constraints->valid_sw_formats) {
+ enum AVPixelFormat *sw_formats = ctx->constraints->valid_sw_formats;
+ while (*sw_formats != AV_PIX_FMT_NONE) {
+ if (*sw_formats == src_hw_frame->sw_format)
+ break;
+ sw_formats++;
+ }
+ if (*sw_formats == AV_PIX_FMT_NONE)
+ return 0;
+ }
+
+ ctx->hw_frame_ref = av_hwframe_ctx_alloc(ctx->hw_device_ref);
+ if (!ctx->hw_frame_ref)
+ return AVERROR(ENOMEM);
+
+ hw_frame = (AVHWFramesContext *)ctx->hw_frame_ref->data;
+ hw_frame->format = AV_PIX_FMT_VULKAN;
+ hw_frame->sw_format = src_hw_frame->sw_format;
+ hw_frame->width = frame->width;
+ hw_frame->height = frame->height;
+
+ if (frame->format == AV_PIX_FMT_CUDA) {
+ vk_frame_ctx = hw_frame->hwctx;
+ vk_frame_ctx->flags = AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE;
+ }
+
+ ret = av_hwframe_ctx_init(ctx->hw_frame_ref);
+ if (ret < 0) {
+ av_log(renderer, AV_LOG_ERROR, "Create hwframe context failed, %s\n", av_err2str(ret));
+ return ret;
+ }
+
+ av_hwframe_transfer_get_formats(ctx->hw_frame_ref, AV_HWFRAME_TRANSFER_DIRECTION_TO,
+ &ctx->transfer_formats, 0);
+
+ return 0;
+}
+
+static inline int check_hw_transfer(RendererContext *ctx, AVFrame *frame)
+{
+ if (!ctx->hw_frame_ref || !ctx->transfer_formats)
+ return 0;
+
+ for (int i = 0; ctx->transfer_formats[i] != AV_PIX_FMT_NONE; i++)
+ if (ctx->transfer_formats[i] == frame->format)
+ return 1;
+
+ return 0;
+}
+
+static int transfer_frame(VkRenderer *renderer, AVFrame *frame)
+{
+ RendererContext *ctx = (RendererContext *)renderer;
+ int ret;
+
+ if (!frame->hw_frames_ctx)
+ return 0;
+
+ if (frame->format == AV_PIX_FMT_VULKAN)
+ return 0;
+
+ ret = create_hw_frame(renderer, frame);
+ if (ret < 0)
+ return ret;
+
+ // Try map data first
+ av_frame_unref(ctx->vk_frame);
+ if (ctx->hw_frame_ref) {
+ ctx->vk_frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frame_ref);
+ ctx->vk_frame->format = AV_PIX_FMT_VULKAN;
+ }
+ ret = av_hwframe_map(ctx->vk_frame, frame, AV_HWFRAME_MAP_READ);
+ if (!ret)
+ goto out;
+
+ if (ret != AVERROR(ENOSYS)) {
+ av_log(NULL, AV_LOG_FATAL, "Map data to vulkan failed: %s\n", av_err2str(ret));
+ return ret;
+ }
+
+ // Try transfer to vulkan
+ if (check_hw_transfer(ctx, frame)) {
+ av_frame_unref(ctx->vk_frame);
+ av_hwframe_get_buffer(ctx->hw_frame_ref, ctx->vk_frame, 0);
+ ret = av_hwframe_transfer_data(ctx->vk_frame, frame, 1);
+ if (!ret)
+ goto out;
+
+ if (ret < 0 && ret != AVERROR(ENOSYS)) {
+ av_log(NULL, AV_LOG_FATAL, "Transfer data to vulkan failed: %s\n", av_err2str(ret));
+ return ret;
+ }
+ }
+
+ // Transfer to CPU
+ av_frame_unref(ctx->vk_frame);
+ ret = av_hwframe_transfer_data(ctx->vk_frame, frame, 0);
+ if (ret < 0) {
+ av_log(NULL, AV_LOG_FATAL, "Transfer data to CPU failed: %s\n", av_err2str(ret));
+ return ret;
+ }
+
+out:
+ ret = av_frame_copy_props(ctx->vk_frame, frame);
+ if (ret < 0)
+ return ret;
+ av_frame_unref(frame);
+ av_frame_move_ref(frame, ctx->vk_frame);
+
+ return 0;
+}
+
static int display(VkRenderer *renderer, AVFrame *frame)
{
struct pl_swapchain_frame swap_frame = {0};
@@ -152,6 +440,10 @@ static int display(VkRenderer *renderer, AVFrame *frame)
RendererContext *ctx = (RendererContext *)renderer;
int ret = 0;
+ ret = transfer_frame(renderer, frame);
+ if (ret < 0)
+ return ret;
+
if (!pl_map_avframe_ex(ctx->pl_vk->gpu, &pl_frame, pl_avframe_params(
.frame = frame,
.tex = ctx->tex))) {
@@ -198,6 +490,12 @@ static void destroy(VkRenderer *renderer)
PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR;
RendererContext *ctx = (RendererContext *)renderer;
+ av_freep(&ctx->transfer_formats);
+ av_hwframe_constraints_free(&ctx->constraints);
+ av_buffer_unref(&ctx->hw_frame_ref);
+ av_buffer_unref(&ctx->hw_device_ref);
+ av_frame_free(&ctx->vk_frame);
+
if (ctx->pl_vk) {
for (int i = 0; i < FF_ARRAY_ELEMS(ctx->tex); i++)
pl_tex_destroy(ctx->pl_vk->gpu, &ctx->tex[i]);
@@ -233,6 +531,7 @@ VkRenderer *vk_get_renderer(void)
renderer = &ctx->api;
renderer->class = &vulkan_renderer_class;
+ renderer->get_hw_dev = get_hw_dev;
renderer->create = create;
renderer->display = display;
renderer->resize = resize;
@@ -255,6 +554,11 @@ int vk_renderer_create(VkRenderer *renderer, SDL_Window *window)
return renderer->create(renderer, window);
}
+int vk_renderer_get_hw_dev(VkRenderer *renderer, AVBufferRef **dev)
+{
+ return renderer->get_hw_dev(renderer, dev);
+}
+
int vk_renderer_display(VkRenderer *renderer, AVFrame *frame)
{
return renderer->display(renderer, frame);
diff --git a/fftools/ffplay_renderer.h b/fftools/ffplay_renderer.h
index 4029f154a9..b982ba0192 100644
--- a/fftools/ffplay_renderer.h
+++ b/fftools/ffplay_renderer.h
@@ -29,6 +29,8 @@ VkRenderer *vk_get_renderer(void);
int vk_renderer_create(VkRenderer *renderer, SDL_Window *window);
+int vk_renderer_get_hw_dev(VkRenderer *renderer, AVBufferRef **dev);
+
int vk_renderer_display(VkRenderer *renderer, AVFrame *frame);
int vk_renderer_resize(VkRenderer *renderer, int width, int height);
--
2.42.0
More information about the ffmpeg-devel
mailing list