[FFmpeg-devel] [PATCH] rtpdec_vp9: Update header parsing to RFC 9628
Parallelc
realparallelc at gmail.com
Sat Apr 19 15:52:45 EEST 2025
Signed-off-by: Parallelc <realparallelc at gmail.com>
---
libavformat/rtpdec_vp9.c | 141 ++++++++++++++++++++-------------------
1 file changed, 72 insertions(+), 69 deletions(-)
diff --git a/libavformat/rtpdec_vp9.c b/libavformat/rtpdec_vp9.c
index 6bbdf4847a..924065b5da 100644
--- a/libavformat/rtpdec_vp9.c
+++ b/libavformat/rtpdec_vp9.c
@@ -1,5 +1,5 @@
/*
- * RTP parser for VP9 payload format (draft version 02) - experimental
+ * RTP parser for VP9 payload format (RFC 9628) - experimental
* Copyright (c) 2015 Thomas Volkert <thomas at homer-conferencing.com>
*
* This file is part of FFmpeg.
@@ -47,8 +47,7 @@ static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx,
{
int has_pic_id, has_layer_idc, has_ref_idc, has_ss_data;
av_unused int pic_id = 0, non_key_frame = 0, inter_picture_layer_frame;
- av_unused int layer_temporal = -1, layer_spatial = -1, layer_quality = -1;
- int ref_fields = 0, has_ref_field_ext_pic_id = 0;
+ av_unused int layer_temporal = -1, layer_spatial = -1;
int first_fragment, last_fragment;
int rtp_m;
int res = 0;
@@ -68,16 +67,17 @@ static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx,
*
* 0 1 2 3 4 5 6 7
* +-+-+-+-+-+-+-+-+
- * |I|P|L|F|B|E|V|-| (REQUIRED)
+ * |I|P|L|F|B|E|V|Z| (REQUIRED)
* +-+-+-+-+-+-+-+-+
*
- * I: PictureID present
- * P: Inter-picture predicted layer frame
+ * I: Picture ID (PID) present
+ * P: Inter-picture predicted frame
* L: Layer indices present
* F: Flexible mode
- * B: Start of VP9 frame
- * E: End of picture
- * V: Scalability Structure (SS) present
+ * B: Start of Frame
+ * E: End of Frame
+ * V: Scalability Structure (SS) data present
+ * Z: Not a reference frame for upper spatial layers
*/
has_pic_id = !!(buf[0] & 0x80);
inter_picture_layer_frame = !!(buf[0] & 0x40);
@@ -89,7 +89,7 @@ static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx,
rtp_m = !!(flags & RTP_FLAG_MARKER);
- /* sanity check for markers: B should always be equal to the RTP M marker */
+ /* sanity check for markers: E should always be equal to the RTP M marker */
if (last_fragment != rtp_m) {
av_log(ctx, AV_LOG_ERROR, "Invalid combination of B and M marker (%d != %d)\n", last_fragment, rtp_m);
return AVERROR_INVALIDDATA;
@@ -134,72 +134,70 @@ static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx,
*
* 0 1 2 3 4 5 6 7
* +-+-+-+-+-+-+-+-+
- * L: | T | S | Q | R | (CONDITIONALLY RECOMMENDED)
+ * L: | TID |U| SID |D| (Conditionally RECOMMENDED)
+ * +-+-+-+-+-+-+-+-+
+ * | TL0PICIDX | (Conditionally REQUIRED)
* +-+-+-+-+-+-+-+-+
*
- * T, S and Q are 2-bit indices for temporal, spatial, and quality layers.
- * If "F" is set in the initial octet, R is 2 bits representing the number
- * of reference fields this frame refers to.
+ * TID: Temporal layer ID (3 bits)
+ * U: Switching up point (1 bit)
+ * SID: Spatial layer ID (3 bits)
+ * D: Inter-layer dependency used (1 bit)
+ * TL0PICIDX: Temporal Layer 0 Picture Index (8 bits, non-flexible mode only)
*/
if (has_layer_idc) {
if (len < 1) {
av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
return AVERROR_INVALIDDATA;
}
- layer_temporal = buf[0] & 0xC0;
- layer_spatial = buf[0] & 0x30;
- layer_quality = buf[0] & 0x0C;
- if (has_ref_idc) {
- ref_fields = buf[0] & 0x03;
- if (ref_fields)
- non_key_frame = 1;
- }
+ layer_temporal = buf[0] >> 5;
+ layer_spatial = (buf[0] >> 1) & 0x07;
buf++;
len--;
+
+ if (!has_ref_idc) {
+ if (len < 1) {
+ av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
+ return AVERROR_INVALIDDATA;
+ }
+ /* ignore TL0PICIDX */
+ buf++;
+ len--;
+ }
}
/*
- * decode the reference fields
+ * decode reference indices
*
* 0 1 2 3 4 5 6 7
- * +-+-+-+-+-+-+-+-+ -\
- * F: | PID |X| RS| RQ| (OPTIONAL) .
- * +-+-+-+-+-+-+-+-+ . - R times
- * X: | EXTENDED PID | (OPTIONAL) .
- * +-+-+-+-+-+-+-+-+ -/
+ * +-+-+-+-+-+-+-+-+ -\
+ * P,F: | P_DIFF |N| (Conditionally REQUIRED) - up to 3 times
+ * +-+-+-+-+-+-+-+-+ -/
*
- * PID: The relative Picture ID referred to by this frame.
- * RS and RQ: The spatial and quality layer IDs.
- * X: 1 if this layer index has an extended relative Picture ID.
+ * P_DIFF: Relative Picture ID (7 bits)
+ * N: 1 if another P_DIFF follows
*/
- if (has_ref_idc) {
- while (ref_fields) {
+ if (has_ref_idc && inter_picture_layer_frame) {
+ int i, p_diff, has_more;
+ for (i = 0; i < 3; i++) {
if (len < 1) {
av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
return AVERROR_INVALIDDATA;
}
- has_ref_field_ext_pic_id = buf[0] & 0x10;
-
- /* pass ref. field */
- if (has_ref_field_ext_pic_id) {
- if (len < 2) {
- av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
- return AVERROR_INVALIDDATA;
- }
-
- /* ignore ref. data */
+ p_diff = buf[0] >> 1;
+ has_more = buf[0] & 0x01;
- buf += 2;
- len -= 2;
- } else {
+ if (!p_diff) {
+ av_log(ctx, AV_LOG_ERROR, "Invalid P_DIFF value 0\n");
+ return AVERROR_INVALIDDATA;
+ }
- /* ignore ref. data */
+ buf++;
+ len--;
- buf++;
- len--;
- }
- ref_fields--;
+ if (!has_more)
+ break;
}
}
@@ -208,18 +206,30 @@ static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx,
*
* 0 1 2 3 4 5 6 7
* +-+-+-+-+-+-+-+-+
- * V: | PATTERN LENGTH|
- * +-+-+-+-+-+-+-+-+ -\
- * | T | S | Q | R | (OPTIONAL) .
- * +-+-+-+-+-+-+-+-+ -\ .
- * | PID |X| RS| RQ| (OPTIONAL) . . - PAT. LEN. times
- * +-+-+-+-+-+-+-+-+ . - R times .
- * X: | EXTENDED PID | (OPTIONAL) . .
- * +-+-+-+-+-+-+-+-+ -/ -/
+ * V: | N_S |Y|G|-|-|-|
+ * +-+-+-+-+-+-+-+-+ -\
+ * Y: | WIDTH | (OPTIONAL) .
+ * + + .
+ * | | (OPTIONAL) .
+ * +-+-+-+-+-+-+-+-+ . - N_S + 1 times
+ * | HEIGHT | (OPTIONAL) .
+ * + + .
+ * | | (OPTIONAL) .
+ * +-+-+-+-+-+-+-+-+ -/
+ * G: | N_G | (OPTIONAL)
+ * +-+-+-+-+-+-+-+-+ -\
+ * N_G: | TID |U| R |-|-| (OPTIONAL) .
+ * +-+-+-+-+-+-+-+-+ -\ . - N_G times
+ * | P_DIFF | (OPTIONAL) . - R times .
+ * +-+-+-+-+-+-+-+-+ -/ -/
*
- * PID: The relative Picture ID referred to by this frame.
- * RS and RQ: The spatial and quality layer IDs.
- * X: 1 if this layer index has an extended relative Picture ID.
+ * N_S: Number of spatial layers minus 1
+ * Y: Each spatial layer's resolution present
+ * G: Picture Group description present
+ * N_G: Number of pictures in Picture Group
+ * TID: Temporal layer ID
+ * U: Switching up point
+ * R: Number of P_DIFF fields
*/
if (has_ss_data) {
int n_s, y, g, i;
@@ -282,13 +292,6 @@ static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx,
}
}
- /*
- * decode the VP9 payload header
- *
- * spec. is tbd
- */
- //XXX: implement when specified
-
/* sanity check: 1 byte payload as minimum */
if (len < 1) {
av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
--
2.43.0
More information about the ffmpeg-devel
mailing list