[FFmpeg-devel] [PATCH v2] libavformat/mpegtsenc.c: correctly re-emit extradata ahead of IDR pictures

John Coiner jpcoiner at gmail.com
Mon Feb 13 01:06:07 EET 2023


This is v2 of the patch for https://trac.ffmpeg.org/ticket/10148.

It implements the handling described at http://ffmpeg.org/pipermail/ffmpeg-devel/2023-February/306542.html, that is:
  * If we receive [AUD][IDR], we emit [AUD][SPS][PPS][IDR]
  * If we receive [SEI][AUD][IDR], we emit [AUD][SPS][PPS][SEI][IDR]

This is speculative; it would be good to hear from Marton or others about whether this is the right handling.

The other possible handling would be to simply prepend the extradata to the existing frame's bitstream without relocating the AUD to be in front. So if we received:
  [AUD][IDR]
We would emit:
  [SPS][PPS][AUD][IDR]
That's not quite compliant with the H.264 spec, which says an AUD shall be the first NAL in a frame when it's present. I doubt real decoders care and it would be simpler to implement. Let me know if you'd prefer the simpler handling.

---
 libavformat/mpegtsenc.c | 55 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 49 insertions(+), 6 deletions(-)

diff --git a/libavformat/mpegtsenc.c b/libavformat/mpegtsenc.c
index 00ad426086..a4a2d8cdaf 100644
--- a/libavformat/mpegtsenc.c
+++ b/libavformat/mpegtsenc.c
@@ -1835,6 +1835,13 @@ static int opus_get_packet_samples(AVFormatContext *s, AVPacket *pkt)
     return duration;
 }
 
+// Copies `size_bytes` from `source` to `dest`.
+// Returns a pointer to the next destination address after the range written.
+static uint8_t* append(uint8_t* dest, const uint8_t* source, int size_bytes) {
+  memcpy(dest, source, size_bytes);
+  return dest + size_bytes;
+}
+
 static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
 {
     AVStream *st = s->streams[pkt->stream_index];
@@ -1877,6 +1884,7 @@ static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
 
     if (st->codecpar->codec_id == AV_CODEC_ID_H264) {
         const uint8_t *p = buf, *buf_end = p + size;
+        const uint8_t *found_aud = NULL, *found_aud_end = NULL;
         uint32_t state = -1;
         int extradd = (pkt->flags & AV_PKT_FLAG_KEY) ? st->codecpar->extradata_size : 0;
         int ret = ff_check_h264_startcode(s, st, pkt);
@@ -1886,17 +1894,34 @@ static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
         if (extradd && AV_RB24(st->codecpar->extradata) > 1)
             extradd = 0;
 
+        // Ensure that all pictures are prefixed with an AUD, and that
+        // IDR pictures are also prefixed with SPS and PPS. SPS and PPS
+        // are assumed to be available in 'extradata' if not found in-band.
         do {
             p = avpriv_find_start_code(p, buf_end, &state);
             av_log(s, AV_LOG_TRACE, "nal %"PRId32"\n", state & 0x1f);
-            if ((state & 0x1f) == 7)
+            if ((state & 0x1f) == 7)  // SPS NAL
                 extradd = 0;
-        } while (p < buf_end && (state & 0x1f) != 9 &&
-                 (state & 0x1f) != 5 && (state & 0x1f) != 1);
-
-        if ((state & 0x1f) != 5)
+            if ((state & 0x1f) == 9) {  // AUD NAL
+                found_aud = p - 4;  // start of the 0x000001 start code.
+                found_aud_end = p + 1; // first byte past the AUD.
+
+                if (found_aud < buf)
+                    found_aud = buf;
+                if (buf_end < found_aud_end)
+                    found_aud_end = buf_end;
+            }
+        } while (p < buf_end
+                 && (state & 0x1f) != 5  // IDR picture
+                 && (state & 0x1f) != 1  // non-IDR picture
+                 && (extradd > 0 || !found_aud));
+        if ((state & 0x1f) != 5) {
+            // Did not find an IDR picture; do not emit extradata.
             extradd = 0;
-        if ((state & 0x1f) != 9) { // AUD NAL
+        }
+
+        if (!found_aud) {
+            // Prefix 'buf' with the missing AUD, and extradata if needed.
             data = av_malloc(pkt->size + 6 + extradd);
             if (!data)
                 return AVERROR(ENOMEM);
@@ -1907,6 +1932,24 @@ static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
             data[5] = 0xf0; // any slice type (0xe) + rbsp stop one bit
             buf     = data;
             size    = pkt->size + 6 + extradd;
+        } else if (extradd != 0) {
+            // Move the AUD up to the beginning of the frame, where the H.264
+            // spec requires it to appear. Emit the extradata after it.
+            const int aud_size = found_aud_end - found_aud;
+            const int new_pkt_size = pkt->size + 1 + extradd;
+            uint8_t *pos;
+            data = av_malloc(new_pkt_size);
+            if (!data)
+                return AVERROR(ENOMEM);
+            AV_WB8(data, 0x0);
+            pos = data + 1;
+            pos = append(pos, found_aud, aud_size);
+            pos = append(pos, st->codecpar->extradata, extradd);
+            pos = append(pos, pkt->data, found_aud - pkt->data);
+            pos = append(pos, found_aud_end, buf_end - found_aud_end);
+            av_assert0(data + new_pkt_size == pos);
+            buf     = data;
+            size    = new_pkt_size;
         }
     } else if (st->codecpar->codec_id == AV_CODEC_ID_AAC) {
         if (pkt->size < 2) {
-- 
2.39.1.581.gbfd45094c4-goog



More information about the ffmpeg-devel mailing list