[FFmpeg-devel] [PATCH] avcodec/d3d12va_encode: texture array support for HEVC
Araz Iusubov
primeadvice at gmail.com
Tue May 6 19:54:59 EEST 2025
This patch adds support for the texture array feature
used by AMD boards in the D3D12 HEVC encoder.
In texture array mode, a single texture array is shared for all
reference and reconstructed pictures using different subresources.
The implementation ensures compatibility
and has been successfully tested on AMD, Intel, and NVIDIA GPUs.
---
libavcodec/d3d12va_encode.c | 241 +++++++++++++++++++++++++------
libavcodec/d3d12va_encode.h | 29 ++++
libavcodec/d3d12va_encode_hevc.c | 5 +-
3 files changed, 231 insertions(+), 44 deletions(-)
diff --git a/libavcodec/d3d12va_encode.c b/libavcodec/d3d12va_encode.c
index 4d738200fe..580d2ea383 100644
--- a/libavcodec/d3d12va_encode.c
+++ b/libavcodec/d3d12va_encode.c
@@ -264,6 +264,11 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
av_log(avctx, AV_LOG_DEBUG, "Input surface is %p.\n", pic->input_surface->texture);
+ if (ctx->is_texture_array) {
+ base_pic->recon_image->data[0] = ctx->texture_array_frame;
+ pic->subresource_index = (ctx->subresource_used_index++) % ctx->max_subresource_array_size;
+ }
+
pic->recon_surface = (AVD3D12VAFrame *)base_pic->recon_image->data[0];
av_log(avctx, AV_LOG_DEBUG, "Recon surface is %p.\n",
pic->recon_surface->texture);
@@ -325,11 +330,28 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
goto fail;
}
+ if (ctx->is_texture_array) {
+ d3d12_refs.pSubresources = av_calloc(d3d12_refs.NumTexture2Ds,
+ sizeof(*d3d12_refs.pSubresources));
+ if (!d3d12_refs.pSubresources) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+ }
+
i = 0;
- for (j = 0; j < base_pic->nb_refs[0]; j++)
- d3d12_refs.ppTexture2Ds[i++] = ((D3D12VAEncodePicture *)base_pic->refs[0][j]->priv)->recon_surface->texture;
- for (j = 0; j < base_pic->nb_refs[1]; j++)
- d3d12_refs.ppTexture2Ds[i++] = ((D3D12VAEncodePicture *)base_pic->refs[1][j]->priv)->recon_surface->texture;
+ for (j = 0; j < base_pic->nb_refs[0]; j++) {
+ d3d12_refs.ppTexture2Ds[i] = ((D3D12VAEncodePicture *)base_pic->refs[0][j]->priv)->recon_surface->texture;
+ if (ctx->is_texture_array)
+ d3d12_refs.pSubresources[i] = ((D3D12VAEncodePicture *)base_pic->refs[0][j]->priv)->subresource_index;
+ i++;
+ }
+ for (j = 0; j < base_pic->nb_refs[1]; j++) {
+ d3d12_refs.ppTexture2Ds[i] = ((D3D12VAEncodePicture *)base_pic->refs[1][j]->priv)->recon_surface->texture;
+ if (ctx->is_texture_array)
+ d3d12_refs.pSubresources[i] = ((D3D12VAEncodePicture *)base_pic->refs[1][j]->priv)->subresource_index;
+ i++;
+ }
}
input_args.PictureControlDesc.IntraRefreshFrameIndex = 0;
@@ -343,7 +365,11 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
output_args.Bitstream.pBuffer = pic->output_buffer;
output_args.Bitstream.FrameStartOffset = pic->aligned_header_size;
output_args.ReconstructedPicture.pReconstructedPicture = pic->recon_surface->texture;
- output_args.ReconstructedPicture.ReconstructedPictureSubresource = 0;
+ if (ctx->is_texture_array) {
+ output_args.ReconstructedPicture.ReconstructedPictureSubresource = pic->subresource_index;
+ } else {
+ output_args.ReconstructedPicture.ReconstructedPictureSubresource = 0;
+ }
output_args.EncoderOutputMetadata.pBuffer = pic->encoded_metadata;
output_args.EncoderOutputMetadata.Offset = 0;
@@ -381,35 +407,87 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
}, \
}
+#define TRANSITION_BARRIER_SUBRESOURCE(res, subres,before, after) \
+ (D3D12_RESOURCE_BARRIER) { \
+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, \
+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, \
+ .Transition = { \
+ .pResource = res, \
+ .Subresource = subres, \
+ .StateBefore = before, \
+ .StateAfter = after, \
+ }, \
+ }
+
barriers[0] = TRANSITION_BARRIER(pic->input_surface->texture,
D3D12_RESOURCE_STATE_COMMON,
D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
barriers[1] = TRANSITION_BARRIER(pic->output_buffer,
D3D12_RESOURCE_STATE_COMMON,
D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
- barriers[2] = TRANSITION_BARRIER(pic->recon_surface->texture,
+ barriers[2] = TRANSITION_BARRIER(pic->encoded_metadata,
D3D12_RESOURCE_STATE_COMMON,
D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
- barriers[3] = TRANSITION_BARRIER(pic->encoded_metadata,
+ barriers[3] = TRANSITION_BARRIER(pic->resolved_metadata,
D3D12_RESOURCE_STATE_COMMON,
D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
- barriers[4] = TRANSITION_BARRIER(pic->resolved_metadata,
- D3D12_RESOURCE_STATE_COMMON,
- D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
-
- ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 5, barriers);
-
- if (d3d12_refs.NumTexture2Ds) {
- D3D12_RESOURCE_BARRIER refs_barriers[3];
-
- for (i = 0; i < d3d12_refs.NumTexture2Ds; i++)
- refs_barriers[i] = TRANSITION_BARRIER(d3d12_refs.ppTexture2Ds[i],
- D3D12_RESOURCE_STATE_COMMON,
- D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
-
- ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, d3d12_refs.NumTexture2Ds,
- refs_barriers);
+ ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 4, barriers);
+
+ //set transit barriers for reference pic and recon pic
+ int barriers_ref_index = 0;
+ D3D12_RESOURCE_BARRIER *barriers_ref = NULL;
+ if (ctx->is_texture_array) {
+ barriers_ref = av_calloc(ctx->max_subresource_array_size * ctx->plane_count,
+ sizeof(D3D12_RESOURCE_BARRIER));
+ } else {
+ barriers_ref = av_calloc(MAX_DPB_SIZE,sizeof(D3D12_RESOURCE_BARRIER));
+ }
+
+ if (ctx->is_texture_array) {
+ // In Texture array mode, the D3D12 uses the same texture array for all the input
+ // reference pics in ppTexture2Ds and also for the pReconstructedPicture output allocations,
+ //just different subresources.
+ D3D12_RESOURCE_DESC references_tex_array_desc = { 0 };
+ pic->recon_surface->texture->lpVtbl->GetDesc(pic->recon_surface->texture, &references_tex_array_desc);
+
+ for (uint32_t reference_subresource = 0; reference_subresource < references_tex_array_desc.DepthOrArraySize;
+ reference_subresource++) {
+
+ //D3D12 DecomposeSubresource
+ uint32_t mip_slice, plane_slice, array_slice, array_size;
+ array_size = references_tex_array_desc.DepthOrArraySize;
+ mip_slice = reference_subresource % references_tex_array_desc.MipLevels;
+ array_slice = (reference_subresource / references_tex_array_desc.MipLevels) % array_size;
+
+ for (plane_slice = 0; plane_slice < ctx->plane_count; plane_slice++) {
+ //Calculate the subresource index
+ uint32_t planeOutputSubresource = mip_slice + array_slice * references_tex_array_desc.MipLevels +
+ plane_slice * references_tex_array_desc.MipLevels * array_size;
+
+ if (reference_subresource == pic->subresource_index) {
+ barriers_ref[barriers_ref_index++] = TRANSITION_BARRIER_SUBRESOURCE(pic->recon_surface->texture, planeOutputSubresource,
+ D3D12_RESOURCE_STATE_COMMON,
+ D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
+ } else {
+ barriers_ref[barriers_ref_index++] = TRANSITION_BARRIER_SUBRESOURCE(pic->recon_surface->texture, planeOutputSubresource,
+ D3D12_RESOURCE_STATE_COMMON,
+ D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
+ }
+ }
+ }
+ } else {
+ barriers_ref[barriers_ref_index++] = TRANSITION_BARRIER(pic->recon_surface->texture,
+ D3D12_RESOURCE_STATE_COMMON,
+ D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
+
+ if (d3d12_refs.NumTexture2Ds) {
+ for (i = 0; i < d3d12_refs.NumTexture2Ds; i++)
+ barriers_ref[barriers_ref_index++] = TRANSITION_BARRIER(d3d12_refs.ppTexture2Ds[i],
+ D3D12_RESOURCE_STATE_COMMON,
+ D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
+ }
}
+ ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, barriers_ref_index, barriers_ref);
ID3D12VideoEncodeCommandList2_EncodeFrame(cmd_list, ctx->encoder, ctx->encoder_heap,
&input_args, &output_args);
@@ -422,16 +500,15 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
ID3D12VideoEncodeCommandList2_ResolveEncoderOutputMetadata(cmd_list, &input_metadata, &output_metadata);
- if (d3d12_refs.NumTexture2Ds) {
- D3D12_RESOURCE_BARRIER refs_barriers[3];
-
- for (i = 0; i < d3d12_refs.NumTexture2Ds; i++)
- refs_barriers[i] = TRANSITION_BARRIER(d3d12_refs.ppTexture2Ds[i],
- D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ,
- D3D12_RESOURCE_STATE_COMMON);
-
- ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, d3d12_refs.NumTexture2Ds,
- refs_barriers);
+ //swap the barriers_ref transition state
+ if (barriers_ref_index > 0) {
+ for (i = 0; i < barriers_ref_index; i++) {
+ D3D12_RESOURCE_STATES temp_statue = barriers_ref[i].Transition.StateBefore;
+ barriers_ref[i].Transition.StateBefore = barriers_ref[i].Transition.StateAfter;
+ barriers_ref[i].Transition.StateAfter = temp_statue;
+ }
+ ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, barriers_ref_index,
+ barriers_ref);
}
barriers[0] = TRANSITION_BARRIER(pic->input_surface->texture,
@@ -440,17 +517,14 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
barriers[1] = TRANSITION_BARRIER(pic->output_buffer,
D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE,
D3D12_RESOURCE_STATE_COMMON);
- barriers[2] = TRANSITION_BARRIER(pic->recon_surface->texture,
- D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE,
- D3D12_RESOURCE_STATE_COMMON);
- barriers[3] = TRANSITION_BARRIER(pic->encoded_metadata,
+ barriers[2] = TRANSITION_BARRIER(pic->encoded_metadata,
D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ,
D3D12_RESOURCE_STATE_COMMON);
- barriers[4] = TRANSITION_BARRIER(pic->resolved_metadata,
+ barriers[3] = TRANSITION_BARRIER(pic->resolved_metadata,
D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE,
D3D12_RESOURCE_STATE_COMMON);
- ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 5, barriers);
+ ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 4, barriers);
hr = ID3D12VideoEncodeCommandList2_Close(cmd_list);
if (FAILED(hr)) {
@@ -489,6 +563,14 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
if (d3d12_refs.ppTexture2Ds)
av_freep(&d3d12_refs.ppTexture2Ds);
+ if (ctx->is_texture_array) {
+ if (d3d12_refs.pSubresources)
+ av_freep(&d3d12_refs.pSubresources);
+ }
+
+ if (barriers_ref)
+ av_freep(&barriers_ref);
+
return 0;
fail:
@@ -498,6 +580,14 @@ fail:
if (d3d12_refs.ppTexture2Ds)
av_freep(&d3d12_refs.ppTexture2Ds);
+ if (ctx->is_texture_array) {
+ if (d3d12_refs.pSubresources)
+ av_freep(&d3d12_refs.pSubresources);
+ }
+
+ if (barriers_ref)
+ av_freep(&barriers_ref);
+
if (ctx->codec->free_picture_params)
ctx->codec->free_picture_params(pic);
@@ -1088,13 +1178,15 @@ static int d3d12va_encode_init_gop_structure(AVCodecContext *avctx)
switch (ctx->codec->d3d12_codec) {
case D3D12_VIDEO_ENCODER_CODEC_H264:
ref_l0 = FFMIN(support.PictureSupport.pH264Support->MaxL0ReferencesForP,
- support.PictureSupport.pH264Support->MaxL1ReferencesForB);
+ support.PictureSupport.pHEVCSupport->MaxL1ReferencesForB ?
+ support.PictureSupport.pHEVCSupport->MaxL1ReferencesForB : UINT_MAX);
ref_l1 = support.PictureSupport.pH264Support->MaxL1ReferencesForB;
break;
case D3D12_VIDEO_ENCODER_CODEC_HEVC:
ref_l0 = FFMIN(support.PictureSupport.pHEVCSupport->MaxL0ReferencesForP,
- support.PictureSupport.pHEVCSupport->MaxL1ReferencesForB);
+ support.PictureSupport.pHEVCSupport->MaxL1ReferencesForB ?
+ support.PictureSupport.pHEVCSupport->MaxL1ReferencesForB : UINT_MAX);
ref_l1 = support.PictureSupport.pHEVCSupport->MaxL1ReferencesForB;
break;
@@ -1336,6 +1428,47 @@ fail:
return err;
}
+static int d3d12va_create_texture_array(AVHWFramesContext *ctx, D3D12VAEncodeContext *encode_context)
+{
+ AVD3D12VAFramesContext *hwctx = ctx->hwctx;
+ AVD3D12VADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
+
+ AVD3D12VAFrame *frame;
+ D3D12_HEAP_PROPERTIES props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
+
+ encode_context->max_subresource_array_size = MAX_DPB_SIZE + D3D12VA_VIDEO_ENC_ASYNC_DEPTH + 1;
+
+ D3D12_RESOURCE_DESC desc = {
+ .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
+ .Alignment = 0,
+ .Width = ctx->width,
+ .Height = ctx->height,
+ .DepthOrArraySize = encode_context->max_subresource_array_size,
+ .MipLevels = 1,
+ .Format = hwctx->format,
+ .SampleDesc = {.Count = 1, .Quality = 0 },
+ .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
+ .Flags = hwctx->flags,
+ };
+
+ frame = av_mallocz(sizeof(AVD3D12VAFrame));
+ if (!frame)
+ return AVERROR(ENOMEM);
+
+ if (FAILED(ID3D12Device_CreateCommittedResource(device_hwctx->device, &props, D3D12_HEAP_FLAG_NONE, &desc,
+ D3D12_RESOURCE_STATE_COMMON, NULL, &IID_ID3D12Resource, (void **)&frame->texture))) {
+ av_log(ctx, AV_LOG_ERROR, "Could not create the texture\n");
+ return AVERROR(EINVAL);
+ }
+
+ ID3D12Device_CreateFence(device_hwctx->device, 0, D3D12_FENCE_FLAG_NONE,
+ &IID_ID3D12Fence, (void **)&frame->sync_ctx.fence);
+
+ frame->sync_ctx.event = CreateEvent(NULL, FALSE, FALSE, NULL);
+ encode_context->texture_array_frame = frame;
+ return 0;
+}
+
static int d3d12va_encode_create_recon_frames(AVCodecContext *avctx)
{
FFHWBaseEncodeContext *base_ctx = avctx->priv_data;
@@ -1394,6 +1527,7 @@ int ff_d3d12va_encode_init(AVCodecContext *avctx)
FFHWBaseEncodeContext *base_ctx = avctx->priv_data;
D3D12VAEncodeContext *ctx = avctx->priv_data;
D3D12_FEATURE_DATA_VIDEO_FEATURE_AREA_SUPPORT support = { 0 };
+ D3D12_FEATURE_DATA_FORMAT_INFO format_info = {0};
int err;
HRESULT hr;
@@ -1429,6 +1563,15 @@ int ff_d3d12va_encode_init(AVCodecContext *avctx)
goto fail;
}
+ format_info.Format = ((AVD3D12VAFramesContext*)base_ctx->input_frames->hwctx)->format;
+ if (FAILED(ID3D12VideoDevice_CheckFeatureSupport(ctx->hwctx->device, D3D12_FEATURE_FORMAT_INFO,
+ &format_info, sizeof(format_info)))) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to query format plane count: 0x%x\n", hr);
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+ ctx->plane_count = format_info.PlaneCount;
+
err = d3d12va_encode_set_profile(avctx);
if (err < 0)
goto fail;
@@ -1485,6 +1628,10 @@ int ff_d3d12va_encode_init(AVCodecContext *avctx)
goto fail;
}
+ if (ctx->is_texture_array) {
+ d3d12va_create_texture_array(base_ctx->recon_frames, avctx->priv_data);
+ }
+
base_ctx->output_delay = base_ctx->b_per_p;
base_ctx->decode_delay = base_ctx->max_b_depth;
@@ -1528,6 +1675,18 @@ int ff_d3d12va_encode_close(AVCodecContext *avctx)
av_buffer_pool_uninit(&ctx->output_buffer_pool);
+ if (ctx->is_texture_array) {
+ ID3D12Resource *pResource = ctx->texture_array_frame->texture;
+ if (pResource) {
+ D3D12_OBJECT_RELEASE(pResource);
+ ctx->texture_array_frame->texture = NULL;
+ }
+ D3D12_OBJECT_RELEASE(ctx->texture_array_frame->sync_ctx.fence);
+ if (ctx->texture_array_frame->sync_ctx.event)
+ CloseHandle(ctx->texture_array_frame->sync_ctx.event);
+ av_free(ctx->texture_array_frame);
+ }
+
D3D12_OBJECT_RELEASE(ctx->command_list);
D3D12_OBJECT_RELEASE(ctx->command_queue);
diff --git a/libavcodec/d3d12va_encode.h b/libavcodec/d3d12va_encode.h
index 3b0b8153d5..fc31857f1a 100644
--- a/libavcodec/d3d12va_encode.h
+++ b/libavcodec/d3d12va_encode.h
@@ -52,6 +52,8 @@ typedef struct D3D12VAEncodePicture {
ID3D12Resource *encoded_metadata;
ID3D12Resource *resolved_metadata;
+ int subresource_index;
+
D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA pic_ctl;
int fence_value;
@@ -189,6 +191,33 @@ typedef struct D3D12VAEncodeContext {
*/
AVBufferPool *output_buffer_pool;
+ /**
+ * Flag indicates if the HW is texture array mode.
+ */
+ int is_texture_array;
+
+ /**
+ * In texture array mode, the D3D12 uses the same texture array for all the input
+ * reference pics in ppTexture2Ds and also for the pReconstructedPicture output
+ * allocations, just different subresources.
+ */
+ AVD3D12VAFrame *texture_array_frame;
+
+ /**
+ * The max number of subresources in the texture array.
+ */
+ int max_subresource_array_size;
+
+ /**
+ * The used subresource index for pic in the texture array.
+ */
+ int subresource_used_index;
+
+ /**
+ * The number of planes in the input DXGI FORMAT .
+ */
+ int plane_count;
+
/**
* D3D12 video encoder.
*/
diff --git a/libavcodec/d3d12va_encode_hevc.c b/libavcodec/d3d12va_encode_hevc.c
index 938ba01f54..7e1d973f7e 100644
--- a/libavcodec/d3d12va_encode_hevc.c
+++ b/libavcodec/d3d12va_encode_hevc.c
@@ -280,9 +280,8 @@ static int d3d12va_encode_hevc_init_sequence_params(AVCodecContext *avctx)
}
if (support.SupportFlags & D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RECONSTRUCTED_FRAMES_REQUIRE_TEXTURE_ARRAYS) {
- av_log(avctx, AV_LOG_ERROR, "D3D12 video encode on this device requires texture array support, "
- "but it's not implemented.\n");
- return AVERROR_PATCHWELCOME;
+ ctx->is_texture_array = 1;
+ av_log(avctx, AV_LOG_DEBUG, "D3D12 video encode on this device uses texture array mode.\n");
}
desc = av_pix_fmt_desc_get(base_ctx->input_frames->sw_format);
--
2.45.2.windows.1
More information about the ffmpeg-devel
mailing list