[FFmpeg-devel] [PATCH] avformat: add AV1 RTP depacketizer and packetizer

Chris Hodges Chris.Hodges at axis.com
Fri Nov 22 09:48:01 EET 2024


Add RTP packetizer and depacketizer according to (most)
of the official AV1 RTP specification. This enables
streaming via RTSP between ffmpeg and ffmpeg and has
also been tested to work with AV1 RTSP streams via
GStreamer.

It also adds the required SDP additions.

Signed-off-by: Chris Hodges <chrishod at axis.com>
---
  libavformat/Makefile         |   2 +
  libavformat/demux.c          |   1 +
  libavformat/rtp_av1.h        | 128 +++++++++++
  libavformat/rtpdec.c         |   1 +
  libavformat/rtpdec_av1.c     | 417 +++++++++++++++++++++++++++++++++++
  libavformat/rtpdec_formats.h |   1 +
  libavformat/rtpenc.c         |   4 +
  libavformat/rtpenc.h         |   1 +
  libavformat/rtpenc_av1.c     | 305 +++++++++++++++++++++++++
  libavformat/sdp.c            |  30 +++
  10 files changed, 890 insertions(+)
  create mode 100644 libavformat/rtp_av1.h
  create mode 100644 libavformat/rtpdec_av1.c
  create mode 100644 libavformat/rtpenc_av1.c

diff --git a/libavformat/Makefile b/libavformat/Makefile
index 7ca68a7036..1200668a2f 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -47,6 +47,7 @@ OBJS-$(CONFIG_RTPDEC)                    += rdt.o 
                  \
                                              rtpdec_ac3.o                \
                                              rtpdec_amr.o                \
                                              rtpdec_asf.o                \
+                                            rtpdec_av1.o                \
                                              rtpdec_dv.o                 \
                                              rtpdec_g726.o               \
                                              rtpdec_h261.o               \
@@ -515,6 +516,7 @@ OBJS-$(CONFIG_RTP_MUXER)                 += rtp.o 
      \
                                              rtpenc_aac.o     \
                                              rtpenc_latm.o    \
                                              rtpenc_amr.o     \
+                                            rtpenc_av1.o     \
                                              rtpenc_h261.o    \
                                              rtpenc_h263.o    \
                                              rtpenc_h263_rfc2190.o \
diff --git a/libavformat/demux.c b/libavformat/demux.c
index cba1f2e4df..8357a3bff1 100644
--- a/libavformat/demux.c
+++ b/libavformat/demux.c
@@ -111,6 +111,7 @@ static int set_codec_from_probe_data(AVFormatContext 
*s, AVStream *st,
          { "aac",        AV_CODEC_ID_AAC,          AVMEDIA_TYPE_AUDIO    },
          { "ac3",        AV_CODEC_ID_AC3,          AVMEDIA_TYPE_AUDIO    },
          { "aptx",       AV_CODEC_ID_APTX,         AVMEDIA_TYPE_AUDIO    },
+        { "av1",        AV_CODEC_ID_AV1,          AVMEDIA_TYPE_VIDEO    },
          { "dts",        AV_CODEC_ID_DTS,          AVMEDIA_TYPE_AUDIO    },
          { "dvbsub",     AV_CODEC_ID_DVB_SUBTITLE, AVMEDIA_TYPE_SUBTITLE },
          { "dvbtxt",     AV_CODEC_ID_DVB_TELETEXT, AVMEDIA_TYPE_SUBTITLE },
diff --git a/libavformat/rtp_av1.h b/libavformat/rtp_av1.h
new file mode 100644
index 0000000000..a353fc0e4e
--- /dev/null
+++ b/libavformat/rtp_av1.h
@@ -0,0 +1,128 @@
+/*
+ * Shared definitions and helper functions for
+ * AV1 (de)packetization.
+ * Copyright (c) 2024 Axis Communications
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
02110-1301 USA
+ */
+
+/**
+ * @file
+ * @brief shared defines and functions for AV1 RTP dec/enc
+ * @author Chris Hodges <chris.hodges at axis.com>
+ */
+
+#ifndef AVFORMAT_RTP_AV1_H
+#define AVFORMAT_RTP_AV1_H
+
+// define a couple of flags and bit fields
+#define AV1B_OBU_FORBIDDEN      7
+#define AV1F_OBU_FORBIDDEN      (1u << AV1B_OBU_FORBIDDEN)
+#define AV1S_OBU_TYPE           3
+#define AV1M_OBU_TYPE           15
+#define AV1B_OBU_EXTENSION_FLAG 2
+#define AV1F_OBU_EXTENSION_FLAG (1u << AV1B_OBU_EXTENSION_FLAG)
+#define AV1B_OBU_HAS_SIZE_FIELD 1
+#define AV1F_OBU_HAS_SIZE_FIELD (1u << AV1B_OBU_HAS_SIZE_FIELD)
+#define AV1B_OBU_RESERVED_1BIT  0
+#define AV1F_OBU_RESERVED_1BIT  (1u << AV1B_OBU_RESERVED_1BIT)
+
+#define AV1B_AGGR_HDR_FRAG_CONT 7
+#define AV1F_AGGR_HDR_FRAG_CONT (1u << AV1B_AGGR_HDR_FRAG_CONT)
+#define AV1B_AGGR_HDR_LAST_FRAG 6
+#define AV1F_AGGR_HDR_LAST_FRAG (1u << AV1B_AGGR_HDR_LAST_FRAG)
+#define AV1S_AGGR_HDR_NUM_OBUS  4
+#define AV1M_AGGR_HDR_NUM_OBUS  3
+#define AV1B_AGGR_HDR_FIRST_PKT 3
+#define AV1F_AGGR_HDR_FIRST_PKT (1u << AV1B_AGGR_HDR_FIRST_PKT)
+
+/// calculate number of required LEB bytes for the given length
+static inline unsigned int calc_leb_size(uint32_t length) {
+    unsigned int num_lebs = 0;
+    do {
+        num_lebs++;
+        length >>= 7;
+    } while (length);
+    return num_lebs;
+}
+
+/// write out variable number of LEB bytes for the given length
+static inline unsigned int write_leb(uint8_t *lebptr, uint32_t length) {
+    unsigned int num_lebs = 0;
+    do {
+        num_lebs++;
+        if (length < 0x80) {
+            *lebptr = length;
+            break;
+        }
+        *lebptr++ = length | 0x80; // no need to mask out
+        length >>= 7;
+    } while (1);
+    return num_lebs;
+}
+
+/// write out fixed number of LEB bytes (may have "unused" bytes)
+static inline void write_leb_n(uint8_t *lebptr, uint32_t length, 
unsigned int num_lebs) {
+    for (int i = 0; i < num_lebs; i++) {
+        if (i == num_lebs - 1) {
+            *lebptr = length & 0x7f;
+        } else {
+            *lebptr++ = length | 0x80; // no need to mask out
+        }
+        length >>= 7;
+    }
+}
+
+/// securely parse LEB bytes and return the resulting encoded length
+static inline unsigned int parse_leb(AVFormatContext *ctx, const 
uint8_t *buf_ptr,
+                                     uint32_t buffer_size, uint32_t 
*obu_size) {
+    uint8_t leb128;
+    unsigned int num_lebs = 0;
+    *obu_size = 0;
+    do {
+        uint32_t leb7;
+        if (!buffer_size) {
+            av_log(ctx, AV_LOG_ERROR, "AV1: Out of data in OBU size 
field AV1 RTP packet\n");
+            return 0;
+        }
+        leb128 = *buf_ptr++;
+        leb7 = leb128 & 0x7f;
+        buffer_size--;
+        /* AV1 spec says that the maximum value returned from leb128 
must fit in
+         * 32 bits, so if the next byte will shift data out, we have 
some kind
+         * of violation here. It is legal, though, to have the most 
significant
+         * bytes with all zero bits (in the lower 7 bits). */
+        if (((num_lebs == 4) && (leb7 >= 0x10)) || ((num_lebs > 4) && 
leb7)) {
+            av_log(ctx, AV_LOG_ERROR, "AV1: OBU size field exceeds 32 
bit in AV1 RTP packet\n");
+            return 0;
+        }
+        if ((num_lebs == 7) && (leb128 >= 0x80)) {
+            /* leb128 is defined to be up to 8 bytes (why???), 8th byte 
MUST NOT
+             * indicate continuation */
+            av_log(ctx, AV_LOG_ERROR, "AV1: OBU size field consists of 
too many bytes in AV1 RTP packet\n");
+            return 0;
+        }
+        // shifts >= 32 are undefined in C!
+        if (num_lebs <= 4) {
+            *obu_size |= leb7 << (7 * num_lebs);
+        }
+        num_lebs++;
+    } while (leb128 >= 0x80);
+    return num_lebs;
+}
+
+#endif /* AVFORMAT_RTP_AV1_H */
diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c
index 729bf83685..a7d5a79a83 100644
--- a/libavformat/rtpdec.c
+++ b/libavformat/rtpdec.c
@@ -83,6 +83,7 @@ static const RTPDynamicProtocolHandler *const 
rtp_dynamic_protocol_handler_list[
      &ff_ac3_dynamic_handler,
      &ff_amr_nb_dynamic_handler,
      &ff_amr_wb_dynamic_handler,
+    &ff_av1_dynamic_handler,
      &ff_dv_dynamic_handler,
      &ff_g726_16_dynamic_handler,
      &ff_g726_24_dynamic_handler,
diff --git a/libavformat/rtpdec_av1.c b/libavformat/rtpdec_av1.c
new file mode 100644
index 0000000000..af8df368fe
--- /dev/null
+++ b/libavformat/rtpdec_av1.c
@@ -0,0 +1,417 @@
+/*
+ * Depacketization for RTP Payload Format For AV1 (v1.0)
+ * https://aomediacodec.github.io/av1-rtp-spec/
+ * Copyright (c) 2024 Axis Communications
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
02110-1301 USA
+ */
+
+/**
+ * @file
+ * @brief AV1 / RTP depacketization code (RTP Payload Format For AV1 
(v1.0))
+ * @author Chris Hodges <chris.hodges at axis.com>
+ * @note The process will restore TDs and put back size fields into 
headers.
+ *       It will also try to keep complete OBUs and remove partial OBUs
+ *       caused by packet drops and thus keep the stream syntactically 
intact.
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/mem.h"
+#include "avformat.h"
+
+#include "rtpdec.h"
+#include "libavcodec/av1.h"
+#include "rtp_av1.h"
+
+/**
+ * RTP/AV1 specific private data.
+ */
+struct PayloadContext {
+    uint32_t timestamp; ///< last received timestamp for frame
+    uint8_t profile;    ///< profile (main/high/professional)
+    uint8_t level_idx;  ///< level (0-31)
+    uint8_t tier;       ///< main tier or high tier
+    uint16_t prev_seq;  ///< sequence number of previous packet
+    unsigned int frag_obu_size;     ///< current total size of 
fragmented OBU
+    unsigned int frag_pkt_leb_pos;  ///< offset in buffer where OBU LEB 
starts
+    unsigned int frag_lebs_res;     ///< number of bytes reserved for LEB
+    unsigned int frag_header_size;  ///< size of OBU header (1 or 2)
+    int needs_td;                   ///< indicates that a TD should be 
output
+    int drop_fragment;              ///< drop all fragments until next 
frame
+};
+
+static int sdp_parse_fmtp_config_av1(AVFormatContext *s,
+                                     AVStream *stream,
+                                     PayloadContext *av1_data,
+                                     const char *attr, const char *value) {
+    if (!strcmp(attr, "profile")) {
+        av1_data->profile = atoi(value);
+        av_log(s, AV_LOG_DEBUG, "RTP AV1 profile: %u\n", 
av1_data->profile);
+    } else if (!strcmp(attr, "level-idx")) {
+        av1_data->level_idx = atoi(value);
+        av_log(s, AV_LOG_DEBUG, "RTP AV1 level: %u\n", av1_data->profile);
+    } else if (!strcmp(attr, "tier")) {
+        av1_data->tier = atoi(value);
+        av_log(s, AV_LOG_DEBUG, "RTP AV1 tier: %u\n", av1_data->tier);
+    }
+    return 0;
+}
+
+// return 0 on complete packet, -1 on partial packet
+static int av1_handle_packet(AVFormatContext *ctx, PayloadContext *data,
+                             AVStream *st, AVPacket *pkt, uint32_t 
*timestamp,
+                             const uint8_t *buf, int len, uint16_t seq,
+                             int flags) {
+    uint8_t aggr_hdr;
+    int result = 0;
+    int is_frag_cont;
+    int is_last_fragmented;
+    int is_first_pkt;
+    unsigned int num_obus;
+    unsigned int obu_cnt = 1;
+    unsigned int rem_pkt_size = len;
+    unsigned int pktpos;
+    const uint8_t *buf_ptr = buf;
+    uint16_t expected_seq = data->prev_seq + 1;
+
+    data->prev_seq = seq;
+
+    if (!len) {
+        av_log(ctx, AV_LOG_ERROR, "Empty AV1 RTP packet\n");
+        return AVERROR_INVALIDDATA;
+    }
+    if (len < 2) {
+        av_log(ctx, AV_LOG_ERROR, "AV1 RTP packet too short\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* The payload structure is supposed to be straight-forward, but 
there are a
+     * couple of edge cases which need to be tackled and make things a 
bit more
+     * complex.
+     * These are mainly due to:
+     * - To reconstruct the OBU size for fragmented packets and place 
it the OBU
+     *   header, the final size will not be known until the last 
fragment has
+     *   been parsed. However, the number LEBs in the header is variable
+     *   depending on the length of the payload.
+     * - We are increasing the out-packet size while we are getting 
fragmented
+     *   OBUs. If an RTP packet gets dropped, we would create corrupted 
OBUs.
+     *   In this case we decide to drop the whole frame.
+     */
+
+    av_log(ctx, AV_LOG_DEBUG, "RTP Packet %d in (%x), len=%d:\n",
+           seq, flags, len);
+    av_hex_dump_log(ctx, AV_LOG_TRACE, buf, FFMIN(len, 128));
+
+    /* 8 bit aggregate header: Z Y W W N - - - */
+    aggr_hdr = *buf_ptr++;
+    rem_pkt_size--;
+
+    /* Z: MUST be set to 1 if the first OBU element is an OBU fragment 
that is a
+     * continuation of an OBU fragment from the previous packet, and 
MUST be set
+     * to 0 otherwise */
+    is_frag_cont = (aggr_hdr >> AV1B_AGGR_HDR_FRAG_CONT) & 1;
+
+    /* Y: MUST be set to 1 if the last OBU element is an OBU fragment 
that will
+     * continue in the next packet, and MUST be set to 0 otherwise */
+    is_last_fragmented = (aggr_hdr >> AV1B_AGGR_HDR_LAST_FRAG) & 1;
+
+    /* W: two bit field that describes the number of OBU elements in 
the packet.
+     * This field MUST be set equal to 0 or equal to the number of OBU 
elements
+     * contained in the packet.
+     * If set to 0, each OBU element MUST be preceded by a length field.
+     * If not set to 0 (i.e., W = 1, 2 or 3) the last OBU element MUST 
NOT be
+     * preceded by a length field (it's derived from RTP packet size 
minus other
+     * known lengths). */
+    num_obus = (aggr_hdr >> AV1S_AGGR_HDR_NUM_OBUS) & 
AV1M_AGGR_HDR_NUM_OBUS;
+
+    /* N: MUST be set to 1 if the packet is the first packet of a coded 
video
+     * sequence, and MUST be set to 0 otherwise.*/
+    is_first_pkt = (aggr_hdr >> AV1B_AGGR_HDR_FIRST_PKT) & 1;
+
+    if (is_frag_cont) {
+        int16_t seq_diff = seq - expected_seq;
+
+        if (data->drop_fragment) {
+            return AVERROR_INVALIDDATA;
+        }
+        if (is_first_pkt) {
+            av_log(ctx, AV_LOG_ERROR, "Illegal aggregation header in 
first AV1 RTP packet\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if (seq_diff) {
+            av_log(ctx, AV_LOG_WARNING, "AV1 RTP packet sequence 
mismatch (%d != %d), dropping fragment\n",
+                   seq, expected_seq);
+            goto drop_fragment;
+        }
+        if (!pkt->size || !data->frag_obu_size) {
+            av_log(ctx, AV_LOG_WARNING, "Unexpected fragment 
continuation in AV1 RTP packet\n");
+            goto drop_fragment; // avoid repeated output for the same 
fragment
+        }
+    } else {
+        data->drop_fragment = 0;
+        if ((data->timestamp != *timestamp) || is_first_pkt) {
+            av_log(ctx, AV_LOG_TRACE, "Timestamp changed to %d (or 
first pkt), forcing TD\n", *timestamp);
+            data->needs_td = 1;
+            data->frag_obu_size = 0; // new temporal unit might have 
been caused by dropped packets
+        }
+        if (data->frag_obu_size) {
+            data->frag_obu_size = 0; // make sure we recover
+            av_log(ctx, AV_LOG_ERROR, "Missing fragment continuation in 
AV1 RTP packet\n");
+            return AVERROR_INVALIDDATA;
+        }
+        // update the timestamp in the frame packet with the one from 
the RTP packet
+        data->timestamp = *timestamp;
+    }
+    pktpos = pkt->size;
+    av_log(ctx, AV_LOG_DEBUG, "Input buffer size %d, aggr head 0x%02x 
fc %d, lf %d, no %d, fp %d\n",
+           len, aggr_hdr, is_frag_cont, is_last_fragmented, num_obus, 
is_first_pkt);
+
+    // loop over OBU elements
+    while (rem_pkt_size) {
+        uint32_t obu_size;
+        int num_lebs;
+        int needs_size_field;
+        int output_size;
+        unsigned int obu_payload_size;
+        uint8_t obu_hdr;
+
+        obu_size = rem_pkt_size;
+        if (!num_obus || obu_cnt < num_obus) {
+            // read out explicit OBU element size (which almost 
corresponds to the original OBU size)
+            num_lebs = parse_leb(ctx, buf_ptr, rem_pkt_size, &obu_size);
+            if (!num_lebs) {
+                return AVERROR_INVALIDDATA;
+            }
+            rem_pkt_size -= num_lebs;
+            buf_ptr += num_lebs;
+        }
+        // read first byte (which is the header byte only for 
non-fragmented elements)
+        obu_hdr = *buf_ptr;
+        if (obu_size > rem_pkt_size) {
+            av_log(ctx, AV_LOG_ERROR, "AV1 OBU size %u larger than 
remaining pkt size %d\n", obu_size, rem_pkt_size);
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (!obu_size) {
+            av_log(ctx, AV_LOG_ERROR, "Unreasonable AV1 OBU size %u\n", 
obu_size);
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (!is_frag_cont) {
+            uint8_t obu_type = (obu_hdr >> AV1S_OBU_TYPE) & AV1M_OBU_TYPE;
+            if (obu_hdr & AV1F_OBU_FORBIDDEN) {
+                av_log(ctx, AV_LOG_ERROR, "Forbidden bit set in AV1 OBU 
header (0x%02x)\n", obu_hdr);
+                return AVERROR_INVALIDDATA;
+            }
+            // ignore and remove OBUs according to spec
+            if ((obu_type == AV1_OBU_TEMPORAL_DELIMITER) ||
+                (obu_type == AV1_OBU_TILE_LIST)) {
+                pktpos += obu_size;
+                rem_pkt_size -= obu_size;
+                // TODO: This probably breaks if the OBU_TILE_LIST is 
fragmented
+                // into the next RTP packet, so at least check and fail 
here
+                if (rem_pkt_size == 0 && is_last_fragmented) {
+                    av_log(ctx, AV_LOG_ERROR, "AV1 OBU_TILE_LIST 
fragmented, unsupported\n");
+                    return AVERROR_INVALIDDATA;
+                }
+                obu_cnt++;
+                continue;
+            }
+        }
+
+        // If we need to add a size field, out size will be different
+        output_size = obu_size;
+        // Spec says the OBUs should have their size fields removed,
+        // but this is not mandatory
+        if (is_frag_cont || (obu_hdr & AV1F_OBU_HAS_SIZE_FIELD)) {
+            needs_size_field = 0;
+        } else {
+            needs_size_field = 1;
+            // (re)calculate number of LEB bytes needed (if it was 
implicit, there were no LEBs)
+            output_size += calc_leb_size(obu_size - (1 + ((obu_hdr & 
AV1F_OBU_EXTENSION_FLAG) ? 1 : 0)));
+        }
+
+        if (!is_frag_cont && (obu_cnt == 1)) {
+            if (data->needs_td) {
+                av_log(ctx, AV_LOG_TRACE, "TD will be added\n");
+                output_size += 2; // for Temporal Delimiter (TD)
+            }
+            if (pkt->data) {
+                if ((result = av_grow_packet(pkt, output_size)) < 0)
+                    return result;
+            } else {
+                if ((result = av_new_packet(pkt, output_size) < 0))
+                    return result;
+            }
+
+            if (data->needs_td) {
+                // restore TD
+                pkt->data[pktpos++] = 0x12;
+                pkt->data[pktpos++] = 0x00;
+            }
+            data->needs_td = 0;
+        } else {
+            if ((result = av_grow_packet(pkt, output_size)) < 0)
+                return result;
+        }
+
+        obu_payload_size = obu_size;
+        // do we need to restore the OBU size field?
+        if (needs_size_field) {
+            // set obu_has_size_field in header byte
+            pkt->data[pktpos++] = *buf_ptr++ | AV1F_OBU_HAS_SIZE_FIELD;
+            data->frag_header_size = 1;
+            obu_payload_size--;
+
+            // copy extension byte, if available
+            if (obu_hdr & AV1F_OBU_EXTENSION_FLAG) {
+                /* TODO we cannot handle the edge case where last 
element is a
+                 * fragment of exactly one byte AND the header has the 
extension
+                 * flag set. Note that it would be more efficient to 
not send a
+                 * fragment of one byte and instead drop the size field 
of the
+                 * prior element */
+                if (!obu_payload_size) {
+                    av_log(ctx, AV_LOG_ERROR, "AV1 OBU too short for 
extension byte (0x%02x)\n",
+                           obu_hdr);
+                    return AVERROR_INVALIDDATA;
+                }
+                pkt->data[pktpos++] = *buf_ptr++;
+                data->frag_header_size = 2;
+                obu_payload_size--;
+            }
+
+            // remember start position of LEB for possibly fragmented 
packet to
+            // fixup OBU size later
+            data->frag_pkt_leb_pos = pktpos;
+            // write intermediate OBU size field
+            num_lebs = write_leb(pkt->data + pktpos, obu_payload_size);
+            data->frag_lebs_res = num_lebs;
+            pktpos += num_lebs;
+        }
+        // copy verbatim or without above header size patch
+        memcpy(pkt->data + pktpos, buf_ptr, obu_payload_size);
+        pktpos += obu_payload_size;
+        buf_ptr += obu_payload_size;
+        rem_pkt_size -= obu_size;
+
+        // if we were handling a fragmented packet and this was the last
+        // fragment, correct OBU size field
+        if (data->frag_obu_size && (rem_pkt_size || !is_last_fragmented)) {
+            uint32_t final_obu_size = data->frag_obu_size + obu_size - 
data->frag_header_size;
+            uint8_t *lebptr = pkt->data + data->frag_pkt_leb_pos;
+            num_lebs = calc_leb_size(final_obu_size);
+
+            // check if we had allocated enough LEB bytes in header,
+            // otherwise make some extra space
+            if (num_lebs > data->frag_lebs_res) {
+                int extra_bytes = num_lebs - data->frag_lebs_res;
+                if ((result = av_grow_packet(pkt, extra_bytes)) < 0)
+                    return result;
+                // update pointer in case buffer address changed
+                lebptr = pkt->data + data->frag_pkt_leb_pos;
+                // move existing data for OBU back a bit
+                memmove(lebptr + extra_bytes, lebptr,
+                        pkt->size - extra_bytes - data->frag_pkt_leb_pos);
+            }
+
+            // update OBU size field
+            write_leb(lebptr, final_obu_size);
+
+            data->frag_obu_size = 0; // signal end of fragment
+        } else if (is_last_fragmented && !rem_pkt_size) {
+            // add to total OBU size, so we can fix that in OBU header
+            // (but only if the OBU size was missing!)
+            if (needs_size_field || data->frag_obu_size) {
+                data->frag_obu_size += obu_size;
+            }
+            // packet not yet finished!
+            result = -1;
+        }
+        is_frag_cont = 0;
+
+        if (!rem_pkt_size && !num_obus && (num_obus != obu_cnt)) {
+            av_log(ctx, AV_LOG_WARNING, "AV1 aggregation header 
indicated %u OBU elements, was %u\n",
+                   num_obus, obu_cnt);
+        }
+        obu_cnt++;
+    }
+
+    if (flags & RTP_FLAG_MARKER) {
+        av_log(ctx, AV_LOG_TRACE, "TD on next packet due to marker\n");
+        data->needs_td = 1;
+    }
+
+    if (!is_last_fragmented) {
+        data->frag_obu_size = 0;
+        data->frag_pkt_leb_pos = 0;
+    }
+
+    if (!result) {
+        av_log(ctx, AV_LOG_DEBUG, "AV1 out pkt-size: %d\n", pkt->size);
+        av_hex_dump_log(ctx, AV_LOG_TRACE, pkt->data, FFMIN(pkt->size, 
128));
+    }
+    pkt->stream_index = st->index;
+
+    return result;
+
+drop_fragment:
+    data->drop_fragment = 1;
+    data->frag_obu_size = 0;
+    data->needs_td = 1;
+    if (pkt->size) {
+        av_log(ctx, AV_LOG_TRACE, "Dumping current AV1 frame packet\n");
+        // we can't seem to deallocate the fragmented packet, but we 
can shrink it to 0
+        av_shrink_packet(pkt, 0);
+    }
+    return AVERROR_INVALIDDATA;
+}
+
+static void av1_close_context(PayloadContext *data) {
+}
+
+static int parse_av1_sdp_line(AVFormatContext *s, int st_index,
+                              PayloadContext *av1_data, const char *line) {
+    AVStream * stream;
+    const char *p = line;
+    int result = 0;
+
+    if (st_index < 0)
+        return 0;
+
+    stream = s->streams[st_index];
+
+    /* Optional parameters are profile, level-idx, and tier.
+     * See Section 7.2.1 of https://aomediacodec.github.io/av1-rtp-spec/ */
+    if (av_strstart(p, "fmtp:", &p)) {
+        result = ff_parse_fmtp(s, stream, av1_data, p, 
sdp_parse_fmtp_config_av1);
+        av_log(s, AV_LOG_DEBUG,"RTP AV1 Profile: %u, Level: %u, Tier: 
%u\n",
+               av1_data->profile, av1_data->level_idx, av1_data->tier);
+    }
+
+    return result;
+}
+
+const RTPDynamicProtocolHandler ff_av1_dynamic_handler = {
+        .enc_name         = "AV1",
+        .codec_type       = AVMEDIA_TYPE_VIDEO,
+        .codec_id         = AV_CODEC_ID_AV1,
+        .need_parsing     = AVSTREAM_PARSE_FULL,
+        .priv_data_size   = sizeof(PayloadContext),
+        .parse_sdp_a_line = parse_av1_sdp_line,
+        .close            = av1_close_context,
+        .parse_packet     = av1_handle_packet,
+};
diff --git a/libavformat/rtpdec_formats.h b/libavformat/rtpdec_formats.h
index dad2b8ac1b..72a8f16a90 100644
--- a/libavformat/rtpdec_formats.h
+++ b/libavformat/rtpdec_formats.h
@@ -50,6 +50,7 @@ void ff_h264_parse_framesize(AVCodecParameters *par, 
const char *p);
  extern const RTPDynamicProtocolHandler ff_ac3_dynamic_handler;
  extern const RTPDynamicProtocolHandler ff_amr_nb_dynamic_handler;
  extern const RTPDynamicProtocolHandler ff_amr_wb_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_av1_dynamic_handler;
  extern const RTPDynamicProtocolHandler ff_dv_dynamic_handler;
  extern const RTPDynamicProtocolHandler ff_g726_16_dynamic_handler;
  extern const RTPDynamicProtocolHandler ff_g726_24_dynamic_handler;
diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c
index 7b4ae37d13..ecef92d75d 100644
--- a/libavformat/rtpenc.c
+++ b/libavformat/rtpenc.c
@@ -79,6 +79,7 @@ static int is_supported(enum AVCodecID id)
      case AV_CODEC_ID_THEORA:
      case AV_CODEC_ID_VP8:
      case AV_CODEC_ID_VP9:
+    case AV_CODEC_ID_AV1:
      case AV_CODEC_ID_ADPCM_G722:
      case AV_CODEC_ID_ADPCM_G726:
      case AV_CODEC_ID_ADPCM_G726LE:
@@ -579,6 +580,9 @@ static int rtp_write_packet(AVFormatContext *s1, 
AVPacket *pkt)
      case AV_CODEC_ID_AMR_WB:
          ff_rtp_send_amr(s1, pkt->data, size);
          break;
+    case AV_CODEC_ID_AV1:
+        ff_rtp_send_av1(s1, pkt->data, size);
+        break;
      case AV_CODEC_ID_MPEG2TS:
          rtp_send_mpegts_raw(s1, pkt->data, size);
          break;
diff --git a/libavformat/rtpenc.h b/libavformat/rtpenc.h
index 854bf07f0e..24925a6ba8 100644
--- a/libavformat/rtpenc.h
+++ b/libavformat/rtpenc.h
@@ -94,6 +94,7 @@ void ff_rtp_send_xiph(AVFormatContext *s1, const 
uint8_t *buff, int size);
  void ff_rtp_send_vc2hq(AVFormatContext *s1, const uint8_t *buf, int 
size, int interlaced);
  void ff_rtp_send_vp8(AVFormatContext *s1, const uint8_t *buff, int size);
  void ff_rtp_send_vp9(AVFormatContext *s1, const uint8_t *buff, int size);
+void ff_rtp_send_av1(AVFormatContext *s1, const uint8_t *buf1, int size);
  void ff_rtp_send_jpeg(AVFormatContext *s1, const uint8_t *buff, int size);
  void ff_rtp_send_raw_rfc4175(AVFormatContext *s1, const uint8_t *buf, 
int size, int interlaced, int field);
  diff --git a/libavformat/rtpenc_av1.c b/libavformat/rtpenc_av1.c
new file mode 100644
index 0000000000..812ff9ba30
--- /dev/null
+++ b/libavformat/rtpenc_av1.c
@@ -0,0 +1,305 @@
+/*
+ * Packetization for RTP Payload Format For AV1 (v1.0)
+ * https://aomediacodec.github.io/av1-rtp-spec/
+ * Copyright (c) 2024 Axis Communications
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
02110-1301 USA
+ */
+
+/**
+ * @file
+ * @brief AV1 / RTP packetization code (RTP Payload Format For AV1 (v1.0))
+ * @author Chris Hodges <chris.hodges at axis.com>
+ * @note This will remove TDs and OBU size fields
+ */
+
+#include "avformat.h"
+#include "rtpenc.h"
+#include "libavcodec/av1.h"
+#include "rtp_av1.h"
+
+void ff_rtp_send_av1(AVFormatContext *ctx, const uint8_t *frame_buf, 
int frame_size) {
+    uint8_t aggr_hdr = 0;
+    int last_packet_of_frame = 0;
+    RTPMuxContext *rtp_ctx = ctx->priv_data;
+    const uint8_t *obu_ptr = frame_buf;
+    int start_new_packet = 0;
+    unsigned int num_obus = 0;
+    unsigned int rem_pkt_size = rtp_ctx->max_payload_size - 1;
+    uint8_t *pkt_ptr = NULL;
+
+    const uint8_t *curr_obu_ptr = NULL;
+    uint32_t curr_elem_size = 0;
+    int curr_obu_hdr = -1;
+    int curr_obu_ext = -1;
+    const uint8_t *last_obu_ptr = NULL;
+    uint32_t last_elem_size = 0;
+    int last_obu_hdr = -1;
+    int last_obu_ext = -1;
+
+    rtp_ctx->timestamp = rtp_ctx->cur_timestamp;
+
+    /* The payload structure is supposed to be straight-forward, but 
there are a
+     * couple of edge cases to be tackled and make things very complex.
+     * These are mainly due to:
+     * - the OBU element size being optional for the last element, but 
MANDATORY
+     *   if there are more than 3 elements
+     * - the size field of the element is made up of a variable number of
+     *   LEB bytes
+     * - the latter in combination with the desire to fill the max 
packet size
+     *   could cause a catch22
+     * - if there's less than 2 bytes remaining (depending on the 
required LEB),
+     *   one would not have space for the payload of an element and 
must instead
+     *   start the next packet
+     * - if there's less than 3 bytes remaining, the header byte plus the
+     *   optional extension byte will not fit in the fragment making the
+     *   handling even more complicated
+     * - as some OBU types are supposed to be filtered out, it is hard 
to decide
+     *   via the remaining length whether the outputted OBU element will
+     *   actually be the last one
+     *
+     * There are two major ways to tackle that: Pre-parsing of all OBUs 
within a
+     * frame (adds memory complexity) or lazy copying of the prior element.
+     * Here, the latter is implemented.
+     */
+
+    /* this actually doesn't seem to work (flag is apparently never set?),
+     * but it should be the way to do it */
+    if (rtp_ctx->flags & AV_PKT_FLAG_KEY) {
+        av_log(ctx, AV_LOG_DEBUG, "Marking FIRST packet\n");
+        aggr_hdr |= AV1F_AGGR_HDR_FIRST_PKT;
+    }
+
+    rem_pkt_size = rtp_ctx->max_payload_size - 1;
+    pkt_ptr = rtp_ctx->buf + 1;
+
+    av_log(ctx, AV_LOG_TRACE, "AV1 Frame %d in (%x), size=%d:\n",
+           rtp_ctx->seq, rtp_ctx->flags, frame_size);
+    av_hex_dump_log(ctx, AV_LOG_TRACE, frame_buf, FFMIN(frame_size, 128));
+
+    while (frame_size) {
+        uint32_t obu_size;
+        int num_lebs = 0;
+        int ext_byte = -1;
+
+        uint8_t obu_hdr = *obu_ptr++;
+        uint8_t obu_type = (obu_hdr >> AV1S_OBU_TYPE) & AV1M_OBU_TYPE;
+        frame_size--;
+
+        if (obu_hdr & AV1F_OBU_FORBIDDEN) {
+            av_log(ctx, AV_LOG_ERROR, "Forbidden bit set in AV1 OBU 
header (0x%02x)\n", obu_hdr);
+            return;
+        }
+
+        if (obu_hdr & AV1F_OBU_EXTENSION_FLAG) {
+            if (!frame_size) {
+                av_log(ctx, AV_LOG_ERROR, "Out of data for AV1 OBU 
header extension byte\n");
+                return;
+            }
+            ext_byte = *obu_ptr++;
+            frame_size--;
+        }
+
+        if (obu_hdr & AV1F_OBU_HAS_SIZE_FIELD) {
+            obu_hdr &= ~AV1F_OBU_HAS_SIZE_FIELD; // remove size field
+            // read out explicit OBU size
+            num_lebs = parse_leb(ctx, obu_ptr, frame_size, &obu_size);
+            if (!num_lebs) {
+                return;
+            }
+            obu_ptr += num_lebs;
+            frame_size -= num_lebs;
+        } else {
+            av_log(ctx, AV_LOG_ERROR, "Cannot handle AV1 OBUs without 
size fields\n");
+            return;
+        }
+
+        if ((long) obu_size > frame_size) {
+            av_log(ctx, AV_LOG_ERROR, "AV1 OBU size %d larger than 
remaining frame size %d\n", obu_size, frame_size);
+            return;
+        }
+
+        if (obu_size > 0xfffffffd) {
+            av_log(ctx, AV_LOG_ERROR, "AV1 OBU size 0x%x might overflow 
(attack?)\n", obu_size);
+            return;
+        }
+
+        frame_size -= obu_size;
+
+        if ((obu_type == AV1_OBU_TEMPORAL_DELIMITER) ||
+            (obu_type == AV1_OBU_TILE_LIST)) {
+            // ignore and remove according to spec
+            obu_ptr += obu_size;
+            continue;
+        }
+
+        /* Workaround: If there is a sequence header in the frame, 
assume it's a
+         * keyframe. This is may not be strictly true for all streams 
as it is
+         * allowed to repeat the (identical) sequence header in every 
frame,
+         * but no "normal" AV1 encoder does that. */
+        if (obu_type == AV1_OBU_SEQUENCE_HEADER) {
+            av_log(ctx, AV_LOG_DEBUG, "Marking FIRST packet 
(workaround)\n");
+            aggr_hdr |= AV1F_AGGR_HDR_FIRST_PKT;
+        }
+
+        /* if the last OBU had a temporal or spatial ID, they need to 
match to current;
+         * otherwise start new packet */
+        if ((last_obu_ext >= 0) && (curr_obu_ext != last_obu_ext)) {
+            start_new_packet = 1;
+        }
+
+flush_last_packet:
+        last_obu_ptr = curr_obu_ptr;
+        last_elem_size = curr_elem_size;
+        last_obu_hdr = curr_obu_hdr;
+        last_obu_ext = curr_obu_ext;
+
+        curr_obu_ptr = obu_ptr; // behind header
+        curr_elem_size = obu_size + 1 + ((ext_byte >= 0) ? 1 : 0);
+        curr_obu_hdr = obu_hdr;
+        curr_obu_ext = ext_byte;
+
+        obu_ptr += obu_size;
+
+        if (last_obu_ptr) {
+            unsigned int first_elem_with_size = last_elem_size + 
calc_leb_size(last_elem_size);
+            // check if last packet fits completely and has reasonable 
space for
+            // at least a fragment of the next
+            if (!last_packet_of_frame && (first_elem_with_size + 10 < 
rem_pkt_size)) {
+                num_lebs = write_leb(pkt_ptr, last_elem_size);
+                pkt_ptr += num_lebs;
+                rem_pkt_size -= num_lebs;
+            } else {
+                if ((num_obus >= 3) && (last_packet_of_frame || 
(first_elem_with_size <= rem_pkt_size))) {
+                    // last fits with forced size, but nothing else
+                    num_lebs = write_leb(pkt_ptr, last_elem_size);
+                    pkt_ptr += num_lebs;
+                    rem_pkt_size -= num_lebs;
+                }
+                // force new packet
+                start_new_packet = 1;
+            }
+
+            // write header and optional extension byte (if not a 
continued fragment)
+            if (last_obu_hdr >= 0) {
+                *pkt_ptr++ = last_obu_hdr;
+                last_elem_size--;
+                rem_pkt_size--;
+                if (last_obu_ext >= 0) {
+                    *pkt_ptr++ = last_obu_ext;
+                    last_elem_size--;
+                    rem_pkt_size--;
+                }
+            }
+            // copy payload
+            memcpy(pkt_ptr, last_obu_ptr, last_elem_size);
+            pkt_ptr += last_elem_size;
+            rem_pkt_size -= last_elem_size;
+            num_obus++;
+        }
+
+        if (start_new_packet || last_packet_of_frame) {
+            if (num_obus < 4) {
+                aggr_hdr |= num_obus << AV1S_AGGR_HDR_NUM_OBUS;
+            }
+            rtp_ctx->buf[0] = aggr_hdr;
+            av_log(ctx, AV_LOG_TRACE, "Sending NON-FRAG packet %ld/%d, 
%d OBUs\n",
+                   pkt_ptr - rtp_ctx->buf, rtp_ctx->max_payload_size, 
num_obus);
+            av_hex_dump_log(ctx, AV_LOG_TRACE, rtp_ctx->buf, 
FFMIN(pkt_ptr - rtp_ctx->buf, 128));
+
+            ff_rtp_send_data(ctx, rtp_ctx->buf, pkt_ptr - rtp_ctx->buf, 
last_packet_of_frame);
+
+            rem_pkt_size = rtp_ctx->max_payload_size - 1;
+            pkt_ptr = rtp_ctx->buf + 1;
+            aggr_hdr = 0;
+            num_obus = 0;
+        }
+
+        if (last_packet_of_frame) {
+            break;
+        }
+
+        // check if element needs to be fragmented, otherwise we will 
deal with
+        // it in the next iteration
+        if ((curr_elem_size > rem_pkt_size) ||
+            ((num_obus >= 3) && (curr_elem_size + 
calc_leb_size(curr_elem_size)) > rem_pkt_size)) {
+            uint32_t frag_size = rem_pkt_size;
+
+            // if there are going more than 3 OBU elements, we are 
obliged to
+            // have the length field for the last
+            if (num_obus >= 3) {
+                // that's an upper limit of LEBs
+                num_lebs = calc_leb_size(rem_pkt_size - 1);
+                frag_size -= num_lebs;
+
+                // write a fixed number of LEBs, in case the frag_size 
could
+                // now be specified with one less byte
+                write_leb_n(pkt_ptr, frag_size, num_lebs);
+                pkt_ptr += num_lebs;
+                rem_pkt_size -= num_lebs;
+            }
+
+            // write header and optional extension byte
+            *pkt_ptr++ = curr_obu_hdr;
+            curr_elem_size--;
+            rem_pkt_size--;
+            if (curr_obu_ext >= 0) {
+                *pkt_ptr++ = curr_obu_ext;
+                curr_elem_size--;
+                rem_pkt_size--;
+            }
+
+            // disable header writing for final fragment
+            curr_obu_hdr = -1;
+            curr_obu_ext = -1;
+
+            // send more full packet sized fragments
+            do {
+                // copy payload
+                memcpy(pkt_ptr, curr_obu_ptr, rem_pkt_size);
+                pkt_ptr += rem_pkt_size;
+                curr_obu_ptr += rem_pkt_size;
+                curr_elem_size -= rem_pkt_size;
+                num_obus++;
+
+                aggr_hdr |= AV1F_AGGR_HDR_LAST_FRAG;
+                if (num_obus < 4) {
+                    aggr_hdr |= num_obus << AV1S_AGGR_HDR_NUM_OBUS;
+                }
+                rtp_ctx->buf[0] = aggr_hdr;
+
+                av_log(ctx, AV_LOG_TRACE, "Sending FRAG packet %ld/%d, 
%d OBUs\n",
+                       pkt_ptr - rtp_ctx->buf, 
rtp_ctx->max_payload_size, num_obus);
+                av_hex_dump_log(ctx, AV_LOG_TRACE, rtp_ctx->buf, 
FFMIN(pkt_ptr - rtp_ctx->buf, 128));
+
+                ff_rtp_send_data(ctx, rtp_ctx->buf, pkt_ptr - 
rtp_ctx->buf, 0);
+                rem_pkt_size = rtp_ctx->max_payload_size - 1;
+                pkt_ptr = rtp_ctx->buf + 1;
+
+                aggr_hdr = AV1F_AGGR_HDR_FRAG_CONT;
+                num_obus = 0;
+            } while (curr_elem_size > rem_pkt_size);
+            start_new_packet = 0;
+        }
+
+        if (!frame_size) {
+            // we're done, flush the last packet, set RTP marker bit
+            last_packet_of_frame = 1;
+            goto flush_last_packet;
+        }
+    }
+}
diff --git a/libavformat/sdp.c b/libavformat/sdp.c
index a9e964bae1..2738bb2056 100644
--- a/libavformat/sdp.c
+++ b/libavformat/sdp.c
@@ -31,6 +31,7 @@
  #include "libavcodec/mpeg4audio.h"
  #include "avformat.h"
  #include "internal.h"
+#include "av1.h"
  #include "avc.h"
  #include "hevc.h"
  #include "nal.h"
@@ -155,6 +156,26 @@ static int sdp_get_address(char *dest_addr, int 
size, int *ttl, const char *url)
      return port;
  }
  +static int extradata2psets_av1(AVFormatContext *s, const 
AVCodecParameters *par,
+                               char **out)
+{
+    char *psets;
+    AV1SequenceParameters seq;
+
+    if (ff_av1_parse_seq_header(&seq, par->extradata, 
par->extradata_size) < 0)
+        return AVERROR_INVALIDDATA;
+
+    psets = av_mallocz(64);
+    if (!psets) {
+        av_log(s, AV_LOG_ERROR, "Cannot allocate memory for the 
parameter sets.\n");
+        return AVERROR(ENOMEM);
+    }
+    av_strlcatf(psets, 64, "profile=%u;level-idx=%u;tier=%u",
+                seq.profile, seq.level, seq.tier);
+    *out = psets;
+    return 0;
+}
+
  #define MAX_PSET_SIZE 1024
  static int extradata2psets(AVFormatContext *s, const AVCodecParameters 
*par,
                             char **out)
@@ -522,6 +543,15 @@ static int sdp_write_media_attributes(char *buff, 
int size, const AVStream *st,
      int ret = 0;
       switch (p->codec_id) {
+    case AV_CODEC_ID_AV1:
+        av_strlcatf(buff, size, "a=rtpmap:%d AV1/90000\r\n", payload_type);
+        if (p->extradata_size) {
+            ret = extradata2psets_av1(fmt, p, &config);
+            if (ret < 0)
+                return ret;
+            av_strlcatf(buff, size, "a=fmtp:%d %s\r\n", payload_type, 
config);
+        }
+        break;
      case AV_CODEC_ID_DIRAC:
          av_strlcatf(buff, size, "a=rtpmap:%d VC2/90000\r\n", 
payload_type);
          break;
-- 
2.39.2



More information about the ffmpeg-devel mailing list