[FFmpeg-devel] [PATCH 3/4] avcodec/h264dec: implement export of video coding info for H.264
Timothée Regaud
timothee.informatique at regaud-chapuy.fr
Fri Jul 18 13:30:54 EEST 2025
From: Timothee Regaud <timothee.informatique at regaud-chapuy.fr>
Hooks into the H.264 decoder to populate the new generic video coding info structures. It handles allocation of the side data buffer, collection of modes/MVs/refs for all macroblock types, and attach the final side data buffer to the output frame.
This should serve as a template for adding support for other codecs down the line.
Signed-off-by: Timothee Regaud <timothee.informatique at regaud-chapuy.fr>
---
Changelog | 1 +
libavcodec/h264_mb.c | 150 ++++++++++++++++++++++++++++++++++
libavcodec/h264_mb_template.c | 3 +
libavcodec/h264_picture.c | 3 +
libavcodec/h264_slice.c | 19 +++++
libavcodec/h264dec.c | 17 ++++
libavcodec/h264dec.h | 12 +++
7 files changed, 205 insertions(+)
diff --git a/Changelog b/Changelog
index ad2361a481..360f6fd28a 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,7 @@ Entries are sorted chronologically from oldest to youngest within each release,
releases are sorted from youngest to oldest.
version <next>:
+- avcodec: add generic side data export for video coding info
- Drop support for OpenSSL < 1.1.0
- yasm support dropped, users need to use nasm
- VVC VAAPI decoder
diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c
index 0d6562b583..af790fd854 100644
--- a/libavcodec/h264_mb.c
+++ b/libavcodec/h264_mb.c
@@ -37,6 +37,156 @@
#include "rectangle.h"
#include "threadframe.h"
+/**
+ * Collects detailed mode, reference, and motion vector information for the
+ * current macroblock and stores it in the picture's coding_info buffer.
+ * This populates the generic AVVideoCodingInfoBlock structure for an H.264 macroblock.
+ */
+static void ff_h264_collect_coding_info(const H264Context *h, H264SliceContext *sl)
+{
+ AVVideoCodingInfo *coding_info;
+ AVVideoCodingInfoBlock *block;
+ AVVideoCodingInfoBlock *blocks_array;
+ uint8_t *mb_sub_data_base;
+ int mb_type;
+ int i, j, list;
+
+ if (!h->cur_pic_ptr || !h->cur_pic_ptr->coding_info_ref) {
+ return;
+ }
+
+ if (sl->mb_xy >= h->mb_num) {
+ return;
+ }
+
+ coding_info = (AVVideoCodingInfo*)h->cur_pic_ptr->coding_info_ref->data;
+ blocks_array = (AVVideoCodingInfoBlock*)((uint8_t*)coding_info + coding_info->blocks_offset);
+ block = &blocks_array[sl->mb_xy];
+ mb_type = h->cur_pic.mb_type[sl->mb_xy];
+
+ AVVideoCodingInfoBlock *child_blocks_pool = (AVVideoCodingInfoBlock*)(blocks_array + h->mb_num);
+ uint8_t *sub_data_pool_start = (uint8_t*)(child_blocks_pool + h->mb_num * 4);
+ mb_sub_data_base = sub_data_pool_start + sl->mb_xy * H264_MAX_SUB_DATA_PER_MB;
+
+ block->x = sl->mb_x * 16;
+ block->y = sl->mb_y * 16;
+ block->w = 16;
+ block->h = 16;
+ block->is_intra = IS_INTRA(mb_type);
+ block->codec_specific_type = mb_type;
+ block->num_children = 0;
+ block->children_offset = 0;
+
+ if (IS_8X8(mb_type)) {
+ block->num_children = 4;
+ block->children_offset = (uint8_t*)(child_blocks_pool + (sl->mb_xy * 4)) - (uint8_t*)coding_info;
+ const size_t sub_data_per_child = H264_MAX_SUB_DATA_PER_MB >> 2;
+
+ for (i = 0; i < 4; i++) {
+ AVVideoCodingInfoBlock *child = &((AVVideoCodingInfoBlock*)((uint8_t*)coding_info + block->children_offset))[i];
+ uint8_t *child_sub_data_base = mb_sub_data_base + i * sub_data_per_child;
+ int sub_mb_type = sl->sub_mb_type[i];
+ // Calculate 8x8 sub-block offsets from the raster-scan index 'i'.
+ int part_x = (i & 1) * 8; // Isolates bit 0 for horizontal position.
+ int part_y = (i & 2) * 4; // Isolates bit 1 for vertical position.
+
+ child->x = block->x + part_x;
+ child->y = block->y + part_y;
+ child->w = 8;
+ child->h = 8;
+ child->is_intra = 0;
+ child->codec_specific_type = sub_mb_type;
+ child->num_children = 0;
+ child->children_offset = 0;
+
+ int num_partitions = 1;
+ if (IS_SUB_8X4(sub_mb_type) || IS_SUB_4X8(sub_mb_type)) num_partitions = 2;
+ if (IS_SUB_4X4(sub_mb_type)) num_partitions = 4;
+
+ // Define the memory layout for this child's sub-data
+ int16_t (*mv_l0)[2] = (void*)child_sub_data_base;
+ int8_t *ref_idx_l0 = (int8_t*)(mv_l0 + num_partitions);
+ int16_t (*mv_l1)[2] = (void*)(ref_idx_l0 + num_partitions);
+ int8_t *ref_idx_l1 = (int8_t*)(mv_l1 + num_partitions);
+
+ for (list = 0; list < 2; list++) {
+ if (USES_LIST(sub_mb_type, list)) {
+ child->inter.num_mv[list] = num_partitions;
+ child->inter.mv_offset[list] = !list ? ((uint8_t*)mv_l0 - (uint8_t*)coding_info) : ((uint8_t*)mv_l1 - (uint8_t*)coding_info);
+ child->inter.ref_idx_offset[list] = !list ? ((uint8_t*)ref_idx_l0 - (uint8_t*)coding_info) : ((uint8_t*)ref_idx_l1 - (uint8_t*)coding_info);
+
+ // Reconstruct pointers to write data
+ int16_t (*current_mv)[2] = (int16_t (*)[2])((uint8_t*)coding_info + child->inter.mv_offset[list]);
+ int8_t *current_ref_idx = (int8_t*)((uint8_t*)coding_info + child->inter.ref_idx_offset[list]);
+
+ for (j = 0; j < num_partitions; j++) {
+ int block_idx = i * 4;
+ if (IS_SUB_8X4(sub_mb_type)) block_idx += j * 2;
+ else if (IS_SUB_4X8(sub_mb_type)) block_idx += j;
+ else if (IS_SUB_4X4(sub_mb_type)) block_idx += j;
+
+ current_ref_idx[j] = sl->ref_cache[list][scan8[block_idx]];
+ current_mv[j][0] = sl->mv_cache[list][scan8[block_idx]][0];
+ current_mv[j][1] = sl->mv_cache[list][scan8[block_idx]][1];
+ }
+ } else {
+ child->inter.num_mv[list] = 0;
+ child->inter.mv_offset[list] = 0;
+ child->inter.ref_idx_offset[list] = 0;
+ }
+ }
+ }
+ } else if (block->is_intra) {
+ block->intra.pred_mode_offset = (uint8_t*)mb_sub_data_base - (uint8_t*)coding_info;
+ int8_t *pred_mode = (int8_t*)mb_sub_data_base; // Keep temporary pointer to write data
+ if (IS_INTRA4x4(mb_type)) {
+ block->intra.num_pred_modes = 16;
+ for (i = 0; i < 16; i++)
+ pred_mode[i] = sl->intra4x4_pred_mode_cache[scan8[i]];
+ } else {
+ block->intra.num_pred_modes = 1;
+ pred_mode[0] = sl->intra16x16_pred_mode;
+ }
+ block->intra.chroma_pred_mode = sl->chroma_pred_mode;
+ } else { // Non-8x8 Inter modes
+ int num_mvs = 0;
+ if (IS_16X16(mb_type)) num_mvs = 1;
+ else if (IS_16X8(mb_type) || IS_8X16(mb_type)) num_mvs = 2;
+
+ // Define the memory layout for this block's sub-data
+ int16_t (*mv_l0)[2] = (void*)mb_sub_data_base;
+ int16_t (*mv_l1)[2] = mv_l0 + num_mvs;
+ int8_t *ref_idx_l0 = (int8_t*)(mv_l1 + num_mvs);
+ int8_t *ref_idx_l1 = ref_idx_l0 + num_mvs;
+
+ for (list = 0; list < 2; list++) {
+ if (USES_LIST(mb_type, list)) {
+ block->inter.num_mv[list] = num_mvs;
+ block->inter.mv_offset[list] = !list ? ((uint8_t*)mv_l0 - (uint8_t*)coding_info) : ((uint8_t*)mv_l1 - (uint8_t*)coding_info);
+ block->inter.ref_idx_offset[list] = !list ? ((uint8_t*)ref_idx_l0 - (uint8_t*)coding_info) : ((uint8_t*)ref_idx_l1 - (uint8_t*)coding_info);
+
+ // Reconstruct pointers to write data
+ int16_t (*current_mv)[2] = (int16_t (*)[2])((uint8_t*)coding_info + block->inter.mv_offset[list]);
+ int8_t *current_ref_idx = (int8_t*)((uint8_t*)coding_info + block->inter.ref_idx_offset[list]);
+
+ for (i = 0; i < num_mvs; i++) {
+ int block_idx = 0;
+ if (IS_16X8(mb_type)) block_idx = i * 8;
+ else if (IS_8X16(mb_type)) block_idx = i * 4;
+
+ current_ref_idx[i] = sl->ref_cache[list][scan8[block_idx]];
+ current_mv[i][0] = sl->mv_cache[list][scan8[block_idx]][0];
+ current_mv[i][1] = sl->mv_cache[list][scan8[block_idx]][1];
+ }
+ } else {
+ block->inter.num_mv[list] = 0;
+ block->inter.mv_offset[list] = 0;
+ block->inter.ref_idx_offset[list] = 0;
+ }
+ }
+ }
+}
+
static inline int get_lowest_part_list_y(H264SliceContext *sl,
int n, int height, int y_offset, int list)
{
diff --git a/libavcodec/h264_mb_template.c b/libavcodec/h264_mb_template.c
index d5ea26a6e3..6dc09f0611 100644
--- a/libavcodec/h264_mb_template.c
+++ b/libavcodec/h264_mb_template.c
@@ -53,6 +53,9 @@ static av_noinline void FUNC(hl_decode_mb)(const H264Context *h, H264SliceContex
const int block_h = 16 >> h->chroma_y_shift;
const int chroma422 = CHROMA422(h);
+ // Collect macroblock information after decoding
+ ff_h264_collect_coding_info(h, sl);
+
dest_y = h->cur_pic.f->data[0] + ((mb_x << PIXEL_SHIFT) + mb_y * sl->linesize) * 16;
dest_cb = h->cur_pic.f->data[1] + (mb_x << PIXEL_SHIFT) * 8 + mb_y * sl->uvlinesize * block_h;
dest_cr = h->cur_pic.f->data[2] + (mb_x << PIXEL_SHIFT) * 8 + mb_y * sl->uvlinesize * block_h;
diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index f5d2b31cd6..5572f45fae 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c
@@ -35,6 +35,7 @@
#include "libavutil/refstruct.h"
#include "thread.h"
#include "threadframe.h"
+#include "libavutil/mem.h"
void ff_h264_unref_picture(H264Picture *pic)
{
@@ -56,6 +57,7 @@ void ff_h264_unref_picture(H264Picture *pic)
av_refstruct_unref(&pic->ref_index[i]);
}
av_refstruct_unref(&pic->decode_error_flags);
+ av_buffer_unref(&pic->coding_info_ref);
memset((uint8_t*)pic + off, 0, sizeof(*pic) - off);
}
@@ -103,6 +105,7 @@ static void h264_copy_picture_params(H264Picture *dst, const H264Picture *src)
dst->mb_height = src->mb_height;
dst->mb_stride = src->mb_stride;
dst->needs_fg = src->needs_fg;
+ av_buffer_replace(&dst->coding_info_ref, src->coding_info_ref);
}
int ff_h264_ref_picture(H264Picture *dst, const H264Picture *src)
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 7e53e38cca..4398cf2f98 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -266,6 +266,25 @@ static int alloc_picture(H264Context *h, H264Picture *pic)
pic->mb_height = h->mb_height;
pic->mb_stride = h->mb_stride;
+ // Allocate the coding info buffer for this picture.
+ if (h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_CODING_INFO) {
+ // Total size must account for the main struct, the array of parent blocks,
+ // a pool for all potential child blocks, and the sub-data for all blocks.
+ // For H.264, the max children per MB is 4 (for 8x8 mode).
+ size_t coding_info_size = sizeof(AVVideoCodingInfo) +
+ h->mb_num * sizeof(AVVideoCodingInfoBlock) + // Parent blocks
+ h->mb_num * 4 * sizeof(AVVideoCodingInfoBlock) + // Pool for child blocks
+ h->mb_num * H264_MAX_SUB_DATA_PER_MB; // Pool for sub-data (MVs, modes)
+
+
+ pic->coding_info_ref = av_buffer_allocz(coding_info_size);
+ if (!pic->coding_info_ref)
+ goto fail;
+ AVVideoCodingInfo *info = (AVVideoCodingInfo*)pic->coding_info_ref->data;
+ info->nb_blocks = h->mb_num;
+ info->blocks_offset = sizeof(AVVideoCodingInfo);
+ }
+
return 0;
fail:
ff_h264_unref_picture(pic);
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 82b85b3387..a7b9e56db3 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -887,6 +887,23 @@ static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
goto fail;
}
+ // Attach the coding info from the main context.
+ if (srcp->coding_info_ref) {
+ AVFrameSideData *side_data;
+
+ av_log(h->avctx, AV_LOG_DEBUG, "Attaching coding_info to frame %"PRId64"\n", dst->pts);
+
+ // Create a new side data entry.
+ side_data = av_frame_new_side_data_from_buf(dst, AV_FRAME_DATA_VIDEO_CODING_INFO, srcp->coding_info_ref);
+ if (!side_data) {
+ av_log(h->avctx, AV_LOG_ERROR, "Failed to allocate side data for coding info.\n");
+ } else {
+ // The AVFrame now owns the buffer, so we release our reference to it.
+ // It will be freed when the frame is unreferenced.
+ srcp->coding_info_ref = NULL;
+ }
+ }
+
if (!(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN))
av_frame_remove_side_data(dst, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index c28d278240..e52f737766 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -45,6 +45,7 @@
#include "mpegutils.h"
#include "threadframe.h"
#include "videodsp.h"
+#include "libavutil/video_coding_info.h"
#define H264_MAX_PICTURE_COUNT 36
@@ -102,6 +103,14 @@
// does this mb use listX, note does not work if subMBs
#define USES_LIST(a, list) ((a) & ((MB_TYPE_P0L0 | MB_TYPE_P1L0) << (2 * (list))))
+/* Constants for AVVideoCodingInfo buffer allocation for H.264.
+ * Max sub-data per MB is for inter prediction with 16 partitions. */
+static const size_t H264_MAX_MV_SIZE_PER_LIST = 16 * sizeof(int16_t[2]);
+static const size_t H264_MAX_REF_SIZE_PER_LIST = 16 * sizeof(int8_t);
+static const size_t H264_INTER_SUB_DATA_SIZE = 2 * (H264_MAX_MV_SIZE_PER_LIST + H264_MAX_REF_SIZE_PER_LIST);
+static const size_t H264_INTRA_SUB_DATA_SIZE = 16 * sizeof(int8_t);
+static const size_t H264_MAX_SUB_DATA_PER_MB = FFMAX(H264_INTER_SUB_DATA_SIZE, H264_INTRA_SUB_DATA_SIZE);
+
/**
* Memory management control operation.
*/
@@ -164,6 +173,9 @@ typedef struct H264Picture {
atomic_int *decode_error_flags;
int gray;
+
+ // Buffer to store macroblock mode information for this picture.
+ AVBufferRef *coding_info_ref;
} H264Picture;
typedef struct H264Ref {
--
2.39.5
More information about the ffmpeg-devel
mailing list