[FFmpeg-devel] [PATCH 3/4] avcodec/h264dec: implement export of video coding info for H.264

Fri Jul 18 13:30:54 EEST 2025

From: Timothee Regaud <timothee.informatique at regaud-chapuy.fr>

Hooks into the H.264 decoder to populate the new generic video coding info structures. It handles allocation of the side data buffer, collection of modes/MVs/refs for all macroblock types, and attach the final side data buffer to the output frame.

This should serve as a template for adding support for other codecs down the line.

Signed-off-by: Timothee Regaud <timothee.informatique at regaud-chapuy.fr>
---
 Changelog                     |   1 +
 libavcodec/h264_mb.c          | 150 ++++++++++++++++++++++++++++++++++
 libavcodec/h264_mb_template.c |   3 +
 libavcodec/h264_picture.c     |   3 +
 libavcodec/h264_slice.c       |  19 +++++
 libavcodec/h264dec.c          |  17 ++++
 libavcodec/h264dec.h          |  12 +++
 7 files changed, 205 insertions(+)

diff --git a/Changelog b/Changelog
index ad2361a481..360f6fd28a 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,7 @@ Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
 version <next>:
+- avcodec: add generic side data export for video coding info
 - Drop support for OpenSSL < 1.1.0
 - yasm support dropped, users need to use nasm
 - VVC VAAPI decoder
diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c
index 0d6562b583..af790fd854 100644
--- a/libavcodec/h264_mb.c
+++ b/libavcodec/h264_mb.c
@@ -37,6 +37,156 @@
 #include "rectangle.h"
 #include "threadframe.h"
 
+/**
+ * Collects detailed mode, reference, and motion vector information for the
+ * current macroblock and stores it in the picture's coding_info buffer.
+ * This populates the generic AVVideoCodingInfoBlock structure for an H.264 macroblock.
+ */
+static void ff_h264_collect_coding_info(const H264Context *h, H264SliceContext *sl)
+{
+    AVVideoCodingInfo *coding_info;
+    AVVideoCodingInfoBlock *block;
+    AVVideoCodingInfoBlock *blocks_array;
+    uint8_t *mb_sub_data_base;
+    int mb_type;
+    int i, j, list;
+
+    if (!h->cur_pic_ptr || !h->cur_pic_ptr->coding_info_ref) {
+        return;
+    }
+
+    if (sl->mb_xy >= h->mb_num) {
+        return;
+    }
+
+    coding_info = (AVVideoCodingInfo*)h->cur_pic_ptr->coding_info_ref->data;
+    blocks_array = (AVVideoCodingInfoBlock*)((uint8_t*)coding_info + coding_info->blocks_offset);
+    block = &blocks_array[sl->mb_xy];
+    mb_type = h->cur_pic.mb_type[sl->mb_xy];
+
+    AVVideoCodingInfoBlock *child_blocks_pool = (AVVideoCodingInfoBlock*)(blocks_array + h->mb_num);
+    uint8_t *sub_data_pool_start = (uint8_t*)(child_blocks_pool + h->mb_num * 4);
+    mb_sub_data_base = sub_data_pool_start + sl->mb_xy * H264_MAX_SUB_DATA_PER_MB;
+
+    block->x = sl->mb_x * 16;
+    block->y = sl->mb_y * 16;
+    block->w = 16;
+    block->h = 16;
+    block->is_intra = IS_INTRA(mb_type);
+    block->codec_specific_type = mb_type;
+    block->num_children = 0;
+    block->children_offset = 0;
+
+    if (IS_8X8(mb_type)) {
+        block->num_children = 4;
+        block->children_offset = (uint8_t*)(child_blocks_pool + (sl->mb_xy * 4)) - (uint8_t*)coding_info;
+        const size_t sub_data_per_child = H264_MAX_SUB_DATA_PER_MB >> 2;
+
+        for (i = 0; i < 4; i++) {
+            AVVideoCodingInfoBlock *child = &((AVVideoCodingInfoBlock*)((uint8_t*)coding_info + block->children_offset))[i];
+            uint8_t *child_sub_data_base = mb_sub_data_base + i * sub_data_per_child;
+            int sub_mb_type = sl->sub_mb_type[i];
+            // Calculate 8x8 sub-block offsets from the raster-scan index 'i'.
+            int part_x = (i & 1) * 8; // Isolates bit 0 for horizontal position.
+            int part_y = (i & 2) * 4; // Isolates bit 1 for vertical position.
+
+            child->x = block->x + part_x;
+            child->y = block->y + part_y;
+            child->w = 8;
+            child->h = 8;
+            child->is_intra = 0;
+            child->codec_specific_type = sub_mb_type;
+            child->num_children = 0;
+            child->children_offset = 0;
+
+            int num_partitions = 1;
+            if (IS_SUB_8X4(sub_mb_type) || IS_SUB_4X8(sub_mb_type)) num_partitions = 2;
+            if (IS_SUB_4X4(sub_mb_type)) num_partitions = 4;
+
+            // Define the memory layout for this child's sub-data
+            int16_t (*mv_l0)[2]   = (void*)child_sub_data_base;
+            int8_t  *ref_idx_l0 = (int8_t*)(mv_l0 + num_partitions);
+            int16_t (*mv_l1)[2]   = (void*)(ref_idx_l0 + num_partitions);
+            int8_t  *ref_idx_l1 = (int8_t*)(mv_l1 + num_partitions);
+
+            for (list = 0; list < 2; list++) {
+                if (USES_LIST(sub_mb_type, list)) {
+                    child->inter.num_mv[list] = num_partitions;
+                    child->inter.mv_offset[list] = !list ? ((uint8_t*)mv_l0 - (uint8_t*)coding_info) : ((uint8_t*)mv_l1 - (uint8_t*)coding_info);
+                    child->inter.ref_idx_offset[list] = !list ? ((uint8_t*)ref_idx_l0 - (uint8_t*)coding_info) : ((uint8_t*)ref_idx_l1 - (uint8_t*)coding_info);
+
+                    // Reconstruct pointers to write data
+                    int16_t (*current_mv)[2] = (int16_t (*)[2])((uint8_t*)coding_info + child->inter.mv_offset[list]);
+                    int8_t *current_ref_idx = (int8_t*)((uint8_t*)coding_info + child->inter.ref_idx_offset[list]);
+
+                    for (j = 0; j < num_partitions; j++) {
+                        int block_idx = i * 4;
+                        if (IS_SUB_8X4(sub_mb_type)) block_idx += j * 2;
+                        else if (IS_SUB_4X8(sub_mb_type)) block_idx += j;
+                        else if (IS_SUB_4X4(sub_mb_type)) block_idx += j;
+
+                        current_ref_idx[j] = sl->ref_cache[list][scan8[block_idx]];
+                        current_mv[j][0]   = sl->mv_cache[list][scan8[block_idx]][0];
+                        current_mv[j][1]   = sl->mv_cache[list][scan8[block_idx]][1];
+                    }
+                } else {
+                    child->inter.num_mv[list] = 0;
+                    child->inter.mv_offset[list] = 0;
+                    child->inter.ref_idx_offset[list] = 0;
+                }
+            }
+        }
+    } else if (block->is_intra) {
+        block->intra.pred_mode_offset = (uint8_t*)mb_sub_data_base - (uint8_t*)coding_info;
+        int8_t *pred_mode = (int8_t*)mb_sub_data_base; // Keep temporary pointer to write data
+        if (IS_INTRA4x4(mb_type)) {
+            block->intra.num_pred_modes = 16;
+            for (i = 0; i < 16; i++)
+                pred_mode[i] = sl->intra4x4_pred_mode_cache[scan8[i]];
+        } else {
+            block->intra.num_pred_modes = 1;
+            pred_mode[0] = sl->intra16x16_pred_mode;
+        }
+        block->intra.chroma_pred_mode = sl->chroma_pred_mode;
+    } else { // Non-8x8 Inter modes
+        int num_mvs = 0;
+        if (IS_16X16(mb_type)) num_mvs = 1;
+        else if (IS_16X8(mb_type) || IS_8X16(mb_type)) num_mvs = 2;
+
+        // Define the memory layout for this block's sub-data
+        int16_t (*mv_l0)[2]   = (void*)mb_sub_data_base;
+        int16_t (*mv_l1)[2]   = mv_l0 + num_mvs;
+        int8_t  *ref_idx_l0 = (int8_t*)(mv_l1 + num_mvs);
+        int8_t  *ref_idx_l1 = ref_idx_l0 + num_mvs;
+
+        for (list = 0; list < 2; list++) {
+            if (USES_LIST(mb_type, list)) {
+                block->inter.num_mv[list] = num_mvs;
+                block->inter.mv_offset[list] = !list ? ((uint8_t*)mv_l0 - (uint8_t*)coding_info) : ((uint8_t*)mv_l1 - (uint8_t*)coding_info);
+                block->inter.ref_idx_offset[list] = !list ? ((uint8_t*)ref_idx_l0 - (uint8_t*)coding_info) : ((uint8_t*)ref_idx_l1 - (uint8_t*)coding_info);
+
+                // Reconstruct pointers to write data
+                int16_t (*current_mv)[2] = (int16_t (*)[2])((uint8_t*)coding_info + block->inter.mv_offset[list]);
+                int8_t *current_ref_idx = (int8_t*)((uint8_t*)coding_info + block->inter.ref_idx_offset[list]);
+
+                for (i = 0; i < num_mvs; i++) {
+                    int block_idx = 0;
+                    if (IS_16X8(mb_type)) block_idx = i * 8;
+                    else if (IS_8X16(mb_type)) block_idx = i * 4;
+
+                    current_ref_idx[i] = sl->ref_cache[list][scan8[block_idx]];
+                    current_mv[i][0]   = sl->mv_cache[list][scan8[block_idx]][0];
+                    current_mv[i][1]   = sl->mv_cache[list][scan8[block_idx]][1];
+                }
+            } else {
+                block->inter.num_mv[list] = 0;
+                block->inter.mv_offset[list] = 0;
+                block->inter.ref_idx_offset[list] = 0;
+            }
+        }
+    }
+}
+
 static inline int get_lowest_part_list_y(H264SliceContext *sl,
                                          int n, int height, int y_offset, int list)
 {
diff --git a/libavcodec/h264_mb_template.c b/libavcodec/h264_mb_template.c
index d5ea26a6e3..6dc09f0611 100644
--- a/libavcodec/h264_mb_template.c
+++ b/libavcodec/h264_mb_template.c
@@ -53,6 +53,9 @@ static av_noinline void FUNC(hl_decode_mb)(const H264Context *h, H264SliceContex
     const int block_h   = 16 >> h->chroma_y_shift;
     const int chroma422 = CHROMA422(h);
 
+    // Collect macroblock information after decoding
+    ff_h264_collect_coding_info(h, sl);
+
     dest_y  = h->cur_pic.f->data[0] + ((mb_x << PIXEL_SHIFT)     + mb_y * sl->linesize)  * 16;
     dest_cb = h->cur_pic.f->data[1] +  (mb_x << PIXEL_SHIFT) * 8 + mb_y * sl->uvlinesize * block_h;
     dest_cr = h->cur_pic.f->data[2] +  (mb_x << PIXEL_SHIFT) * 8 + mb_y * sl->uvlinesize * block_h;
diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index f5d2b31cd6..5572f45fae 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c
@@ -35,6 +35,7 @@
 #include "libavutil/refstruct.h"
 #include "thread.h"
 #include "threadframe.h"
+#include "libavutil/mem.h"
 
 void ff_h264_unref_picture(H264Picture *pic)
 {
@@ -56,6 +57,7 @@ void ff_h264_unref_picture(H264Picture *pic)
         av_refstruct_unref(&pic->ref_index[i]);
     }
     av_refstruct_unref(&pic->decode_error_flags);
+    av_buffer_unref(&pic->coding_info_ref);
 
     memset((uint8_t*)pic + off, 0, sizeof(*pic) - off);
 }
@@ -103,6 +105,7 @@ static void h264_copy_picture_params(H264Picture *dst, const H264Picture *src)
     dst->mb_height     = src->mb_height;
     dst->mb_stride     = src->mb_stride;
     dst->needs_fg      = src->needs_fg;
+    av_buffer_replace(&dst->coding_info_ref, src->coding_info_ref);
 }
 
 int ff_h264_ref_picture(H264Picture *dst, const H264Picture *src)
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 7e53e38cca..4398cf2f98 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -266,6 +266,25 @@ static int alloc_picture(H264Context *h, H264Picture *pic)
     pic->mb_height = h->mb_height;
     pic->mb_stride = h->mb_stride;
 
+    // Allocate the coding info buffer for this picture.
+    if (h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_CODING_INFO) {
+        // Total size must account for the main struct, the array of parent blocks,
+        // a pool for all potential child blocks, and the sub-data for all blocks.
+        // For H.264, the max children per MB is 4 (for 8x8 mode).
+        size_t coding_info_size = sizeof(AVVideoCodingInfo) +
+                              h->mb_num * sizeof(AVVideoCodingInfoBlock) +      // Parent blocks
+                              h->mb_num * 4 * sizeof(AVVideoCodingInfoBlock) +  // Pool for child blocks
+                              h->mb_num * H264_MAX_SUB_DATA_PER_MB;             // Pool for sub-data (MVs, modes)
+
+
+        pic->coding_info_ref = av_buffer_allocz(coding_info_size);
+        if (!pic->coding_info_ref)
+            goto fail;
+        AVVideoCodingInfo *info = (AVVideoCodingInfo*)pic->coding_info_ref->data;
+        info->nb_blocks = h->mb_num;
+        info->blocks_offset = sizeof(AVVideoCodingInfo);
+    }
+
     return 0;
 fail:
     ff_h264_unref_picture(pic);
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 82b85b3387..a7b9e56db3 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -887,6 +887,23 @@ static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
             goto fail;
     }
 
+    // Attach the coding info from the main context.
+    if (srcp->coding_info_ref) {
+        AVFrameSideData *side_data;
+
+        av_log(h->avctx, AV_LOG_DEBUG, "Attaching coding_info to frame %"PRId64"\n", dst->pts);
+
+        // Create a new side data entry.
+        side_data = av_frame_new_side_data_from_buf(dst, AV_FRAME_DATA_VIDEO_CODING_INFO, srcp->coding_info_ref);
+        if (!side_data) {
+            av_log(h->avctx, AV_LOG_ERROR, "Failed to allocate side data for coding info.\n");
+        } else {
+            // The AVFrame now owns the buffer, so we release our reference to it.
+            // It will be freed when the frame is unreferenced.
+            srcp->coding_info_ref = NULL;
+        }
+    }
+
     if (!(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN))
         av_frame_remove_side_data(dst, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
 
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index c28d278240..e52f737766 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -45,6 +45,7 @@
 #include "mpegutils.h"
 #include "threadframe.h"
 #include "videodsp.h"
+#include "libavutil/video_coding_info.h"
 
 #define H264_MAX_PICTURE_COUNT 36
 
@@ -102,6 +103,14 @@
 // does this mb use listX, note does not work if subMBs
 #define USES_LIST(a, list) ((a) & ((MB_TYPE_P0L0 | MB_TYPE_P1L0) << (2 * (list))))
 
+/* Constants for AVVideoCodingInfo buffer allocation for H.264.
+ * Max sub-data per MB is for inter prediction with 16 partitions. */
+static const size_t H264_MAX_MV_SIZE_PER_LIST = 16 * sizeof(int16_t[2]);
+static const size_t H264_MAX_REF_SIZE_PER_LIST = 16 * sizeof(int8_t);
+static const size_t H264_INTER_SUB_DATA_SIZE = 2 * (H264_MAX_MV_SIZE_PER_LIST + H264_MAX_REF_SIZE_PER_LIST);
+static const size_t H264_INTRA_SUB_DATA_SIZE = 16 * sizeof(int8_t);
+static const size_t H264_MAX_SUB_DATA_PER_MB = FFMAX(H264_INTER_SUB_DATA_SIZE, H264_INTRA_SUB_DATA_SIZE);
+
 /**
  * Memory management control operation.
  */
@@ -164,6 +173,9 @@ typedef struct H264Picture {
     atomic_int *decode_error_flags;
 
     int gray;
+
+    // Buffer to store macroblock mode information for this picture.
+    AVBufferRef *coding_info_ref;
 } H264Picture;
 
 typedef struct H264Ref {
-- 
2.39.5