[FFmpeg-devel] [PATCH] avcodec/h264_slice: compute and export film grain seed

Wed Aug 4 20:00:48 EEST 2021

From: Niklas Haas <git at haasn.dev>

>From SMPTE RDD 5-2006, the grain seed is to be computed from the
following definition of `pic_offset`:

> When decoding H.264 | MPEG-4 AVC bitstreams, pic_offset is defined as
> follows:
>   - pic_offset = PicOrderCnt(CurrPic) + (PicOrderCnt_offset << 5)
> where:
>   - PicOrderCnt(CurrPic) is the picture order count of the current frame,
>     which shall be derived from [the video stream].
>
>   - PicOrderCnt_offset is set to idr_pic_id on IDR frames. idr_pic_id
>     shall be read from the slice header of [the video stream]. On non-IDR I
>     frames, PicOrderCnt_offset is set to 0. A frame shall be classified as I
>     frame when all its slices are I slices, which may be optionally
>     designated by setting primary_pic_type to 0 in the access delimiter NAL
>     unit. Otherwise, PicOrderCnt_offset it not changed. PicOrderCnt_offset is
>     updated in decoding order.

To forward this information correctly, we need to make a number of
annoying changes to h264_slice:

- We need to keep track of the `PicOrderCnt_offset`, and update it to
  the decoded value of `idr_pic_id` on IDR frames.
- We need to keep track of whether or not a frame contained any non-I
  slices, so we can detect the "non-IDR I frame" condition, and reset
  `PicOrderCnt_offset` back to 0. This is *not* synonymous with existing
  fields such as `pic->f->pict_type` because that is somehow only set
  based on the first slice in a frame, so we introduce a new field
  `picture_intra_only` which is set to 0 when decoding any non-I slice.
- We need to compute this derived `pic_offset` at the *end* of the
  frame, rather than at the beginning (where the SEI is decoded/set),
  because it now depends on the types of the slices present in the same
  frame.

If there's a less ugly way to accomplish the above, I don't see it.
Suggestions welcome(tm).

Signed-off-by: Niklas Haas <git at haasn.dev>
---
 libavcodec/h264_picture.c     | 13 ++++++++++++-
 libavcodec/h264_slice.c       | 11 ++++++++++-
 libavcodec/h264dec.h          |  7 +++++++
 libavutil/film_grain_params.h |  3 +++
 4 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index eec5e9fb9a..18daf423b5 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c
@@ -27,6 +27,7 @@
 
 #include "libavutil/avassert.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/film_grain_params.h"
 #include "internal.h"
 #include "cabac.h"
 #include "cabac_functions.h"
@@ -159,6 +160,8 @@ void ff_h264_set_erpic(ERPicture *dst, H264Picture *src)
 int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup)
 {
     AVCodecContext *const avctx = h->avctx;
+    H264Picture *cur = h->cur_pic_ptr;
+    AVFrameSideData *sd;
     int err = 0;
     h->mb_y = 0;
 
@@ -172,6 +175,9 @@ int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup)
         h->poc.prev_frame_num        = h->poc.frame_num;
     }
 
+    if (!h->picture_idr && h->picture_intra_only)
+        h->poc_offset = 0;
+
     if (avctx->hwaccel) {
         err = avctx->hwaccel->end_frame(avctx);
         if (err < 0)
@@ -179,8 +185,13 @@ int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup)
                    "hardware accelerator failed to decode picture\n");
     }
 
+    if ((sd = av_frame_get_side_data(cur->f, AV_FRAME_DATA_FILM_GRAIN_PARAMS))) {
+        AVFilmGrainParams *fgp = (AVFilmGrainParams *) sd->data;
+        fgp->seed = cur->poc + h->poc_offset << 5;
+    }
+
     if (!in_setup && !h->droppable)
-        ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
+        ff_thread_report_progress(&cur->tf, INT_MAX,
                                   h->picture_structure == PICT_BOTTOM_FIELD);
     emms_c();
 
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 41338fbcb6..19283fb4ec 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -410,6 +410,7 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
 
     h->next_output_pic   = h1->next_output_pic;
     h->next_outputed_poc = h1->next_outputed_poc;
+    h->poc_offset        = h1->poc_offset;
 
     memcpy(h->mmco, h1->mmco, sizeof(h->mmco));
     h->nb_mmco         = h1->nb_mmco;
@@ -514,6 +515,7 @@ static int h264_frame_start(H264Context *h)
     pic->sei_recovery_frame_cnt = h->sei.recovery_point.recovery_frame_cnt;
 
     pic->f->pict_type = h->slice_ctx[0].slice_type;
+    h->picture_intra_only = 1; // set to 0 by h264_slice_init
 
     pic->f->crop_left   = h->crop_left;
     pic->f->crop_right  = h->crop_right;
@@ -1339,6 +1341,7 @@ static int h264_export_frame_props(H264Context *h)
             return AVERROR(ENOMEM);
 
         fgp->type = AV_FILM_GRAIN_PARAMS_H274;
+        /* fgp->seed is set by ff_h264_field_end */
 
         fgp->codec.h274.model_id = fgc->model_id;
         if (fgc->separate_colour_description_present_flag) {
@@ -1547,6 +1550,9 @@ static int h264_field_start(H264Context *h, const H264SliceContext *sl,
     h->poc.delta_poc[0]     = sl->delta_poc[0];
     h->poc.delta_poc[1]     = sl->delta_poc[1];
 
+    if (nal->type == H264_NAL_IDR_SLICE)
+        h->poc_offset = sl->idr_pic_id;
+
     /* Shorten frame num gaps so we don't have to allocate reference
      * frames just to throw them away */
     if (h->poc.frame_num != h->poc.prev_frame_num) {
@@ -1895,7 +1901,7 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
     }
 
     if (nal->type == H264_NAL_IDR_SLICE)
-        get_ue_golomb_long(&sl->gb); /* idr_pic_id */
+        sl->idr_pic_id = get_ue_golomb_long(&sl->gb);
 
     if (sps->poc_type == 0) {
         sl->poc_lsb = get_bits(&sl->gb, sps->log2_max_poc_lsb);
@@ -2025,6 +2031,9 @@ static int h264_slice_init(H264Context *h, H264SliceContext *sl,
         return AVERROR_INVALIDDATA;
     }
 
+    if (sl->slice_type_nos != AV_PICTURE_TYPE_I)
+        h->picture_intra_only = 0;
+
     av_assert1(h->mb_num == h->mb_width * h->mb_height);
     if (sl->first_mb_addr << FIELD_OR_MBAFF_PICTURE(h) >= h->mb_num ||
         sl->first_mb_addr >= h->mb_num) {
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index 8954b74795..3a1dc5d239 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -331,6 +331,7 @@ typedef struct H264SliceContext {
     int explicit_ref_marking;
 
     int frame_num;
+    int idr_pic_id;
     int poc_lsb;
     int delta_poc_bottom;
     int delta_poc[2];
@@ -384,6 +385,11 @@ typedef struct H264Context {
      */
     int picture_idr;
 
+    /*
+     * Set to 1 when the current picture contains only I slices, 0 otherwise.
+     */
+    int picture_intra_only;
+
     int crop_left;
     int crop_right;
     int crop_top;
@@ -473,6 +479,7 @@ typedef struct H264Context {
     int last_pocs[MAX_DELAYED_PIC_COUNT];
     H264Picture *next_output_pic;
     int next_outputed_poc;
+    int poc_offset;         ///< PicOrderCnt_offset from SMPTE RDD-2006
 
     /**
      * memory management control operations buffer.
diff --git a/libavutil/film_grain_params.h b/libavutil/film_grain_params.h
index 7350dfc5b8..f3bd0a4a6a 100644
--- a/libavutil/film_grain_params.h
+++ b/libavutil/film_grain_params.h
@@ -221,6 +221,9 @@ typedef struct AVFilmGrainParams {
 
     /**
      * Seed to use for the synthesis process, if the codec allows for it.
+     *
+     * @note For H.264, this refers to `pic_offset` as defined in
+     *       SMPTE RDD 5-2006.
      */
     uint64_t seed;
 
-- 
2.32.0