diff --git a/libavformat/Makefile b/libavformat/Makefile index 074efc118a..7730e7c4e6 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -47,6 +47,7 @@ OBJS-$(CONFIG_RTPDEC) += rdt.o \ rtpdec_ac3.o \ rtpdec_amr.o \ rtpdec_asf.o \ + rtpdec_av1.o \ rtpdec_dv.o \ rtpdec_g726.o \ rtpdec_h261.o \ @@ -517,6 +518,7 @@ OBJS-$(CONFIG_RTP_MUXER) += rtp.o \ rtpenc_aac.o \ rtpenc_latm.o \ rtpenc_amr.o \ + rtpenc_av1.o \ rtpenc_h261.o \ rtpenc_h263.o \ rtpenc_h263_rfc2190.o \ diff --git a/libavformat/demux.c b/libavformat/demux.c index 0e39346f62..9c37be0852 100644 --- a/libavformat/demux.c +++ b/libavformat/demux.c @@ -111,6 +111,7 @@ static int set_codec_from_probe_data(AVFormatContext *s, AVStream *st, { "aac", AV_CODEC_ID_AAC, AVMEDIA_TYPE_AUDIO }, { "ac3", AV_CODEC_ID_AC3, AVMEDIA_TYPE_AUDIO }, { "aptx", AV_CODEC_ID_APTX, AVMEDIA_TYPE_AUDIO }, + { "av1", AV_CODEC_ID_AV1, AVMEDIA_TYPE_VIDEO }, { "dts", AV_CODEC_ID_DTS, AVMEDIA_TYPE_AUDIO }, { "dvbsub", AV_CODEC_ID_DVB_SUBTITLE, AVMEDIA_TYPE_SUBTITLE }, { "dvbtxt", AV_CODEC_ID_DVB_TELETEXT, AVMEDIA_TYPE_SUBTITLE }, diff --git a/libavformat/rtp_av1.h b/libavformat/rtp_av1.h new file mode 100644 index 0000000000..a353fc0e4e --- /dev/null +++ b/libavformat/rtp_av1.h @@ -0,0 +1,128 @@ +/* + * Shared definitions and helper functions for + * AV1 (de)packetization. + * Copyright (c) 2024 Axis Communications + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * @brief shared defines and functions for AV1 RTP dec/enc + * @author Chris Hodges + */ + +#ifndef AVFORMAT_RTP_AV1_H +#define AVFORMAT_RTP_AV1_H + +// define a couple of flags and bit fields +#define AV1B_OBU_FORBIDDEN 7 +#define AV1F_OBU_FORBIDDEN (1u << AV1B_OBU_FORBIDDEN) +#define AV1S_OBU_TYPE 3 +#define AV1M_OBU_TYPE 15 +#define AV1B_OBU_EXTENSION_FLAG 2 +#define AV1F_OBU_EXTENSION_FLAG (1u << AV1B_OBU_EXTENSION_FLAG) +#define AV1B_OBU_HAS_SIZE_FIELD 1 +#define AV1F_OBU_HAS_SIZE_FIELD (1u << AV1B_OBU_HAS_SIZE_FIELD) +#define AV1B_OBU_RESERVED_1BIT 0 +#define AV1F_OBU_RESERVED_1BIT (1u << AV1B_OBU_RESERVED_1BIT) + +#define AV1B_AGGR_HDR_FRAG_CONT 7 +#define AV1F_AGGR_HDR_FRAG_CONT (1u << AV1B_AGGR_HDR_FRAG_CONT) +#define AV1B_AGGR_HDR_LAST_FRAG 6 +#define AV1F_AGGR_HDR_LAST_FRAG (1u << AV1B_AGGR_HDR_LAST_FRAG) +#define AV1S_AGGR_HDR_NUM_OBUS 4 +#define AV1M_AGGR_HDR_NUM_OBUS 3 +#define AV1B_AGGR_HDR_FIRST_PKT 3 +#define AV1F_AGGR_HDR_FIRST_PKT (1u << AV1B_AGGR_HDR_FIRST_PKT) + +/// calculate number of required LEB bytes for the given length +static inline unsigned int calc_leb_size(uint32_t length) { + unsigned int num_lebs = 0; + do { + num_lebs++; + length >>= 7; + } while (length); + return num_lebs; +} + +/// write out variable number of LEB bytes for the given length +static inline unsigned int write_leb(uint8_t *lebptr, uint32_t length) { + unsigned int num_lebs = 0; + do { + num_lebs++; + if (length < 0x80) { + *lebptr = length; + break; + } + *lebptr++ = length | 0x80; // no need to mask out + length >>= 7; + } while (1); + return num_lebs; +} + +/// write out fixed number of LEB bytes (may have "unused" bytes) +static inline void write_leb_n(uint8_t *lebptr, uint32_t length, unsigned int num_lebs) { + for (int i = 0; i < num_lebs; i++) { + if (i == num_lebs - 1) { + *lebptr = length & 0x7f; + } else { + *lebptr++ = length | 0x80; // no need to mask out + } + length >>= 7; + } +} + +/// securely parse LEB bytes and return the resulting encoded length +static inline unsigned int parse_leb(AVFormatContext *ctx, const uint8_t *buf_ptr, + uint32_t buffer_size, uint32_t *obu_size) { + uint8_t leb128; + unsigned int num_lebs = 0; + *obu_size = 0; + do { + uint32_t leb7; + if (!buffer_size) { + av_log(ctx, AV_LOG_ERROR, "AV1: Out of data in OBU size field AV1 RTP packet\n"); + return 0; + } + leb128 = *buf_ptr++; + leb7 = leb128 & 0x7f; + buffer_size--; + /* AV1 spec says that the maximum value returned from leb128 must fit in + * 32 bits, so if the next byte will shift data out, we have some kind + * of violation here. It is legal, though, to have the most significant + * bytes with all zero bits (in the lower 7 bits). */ + if (((num_lebs == 4) && (leb7 >= 0x10)) || ((num_lebs > 4) && leb7)) { + av_log(ctx, AV_LOG_ERROR, "AV1: OBU size field exceeds 32 bit in AV1 RTP packet\n"); + return 0; + } + if ((num_lebs == 7) && (leb128 >= 0x80)) { + /* leb128 is defined to be up to 8 bytes (why???), 8th byte MUST NOT + * indicate continuation */ + av_log(ctx, AV_LOG_ERROR, "AV1: OBU size field consists of too many bytes in AV1 RTP packet\n"); + return 0; + } + // shifts >= 32 are undefined in C! + if (num_lebs <= 4) { + *obu_size |= leb7 << (7 * num_lebs); + } + num_lebs++; + } while (leb128 >= 0x80); + return num_lebs; +} + +#endif /* AVFORMAT_RTP_AV1_H */ diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c index 729bf83685..a7d5a79a83 100644 --- a/libavformat/rtpdec.c +++ b/libavformat/rtpdec.c @@ -83,6 +83,7 @@ static const RTPDynamicProtocolHandler *const rtp_dynamic_protocol_handler_list[ &ff_ac3_dynamic_handler, &ff_amr_nb_dynamic_handler, &ff_amr_wb_dynamic_handler, + &ff_av1_dynamic_handler, &ff_dv_dynamic_handler, &ff_g726_16_dynamic_handler, &ff_g726_24_dynamic_handler, diff --git a/libavformat/rtpdec_av1.c b/libavformat/rtpdec_av1.c new file mode 100644 index 0000000000..af3f9e0f09 --- /dev/null +++ b/libavformat/rtpdec_av1.c @@ -0,0 +1,461 @@ +/* + * Depacketization for RTP Payload Format For AV1 (v1.0) + * https://aomediacodec.github.io/av1-rtp-spec/ + * Copyright (c) 2024 Axis Communications + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * @brief AV1 / RTP depacketization code (RTP Payload Format For AV1 (v1.0)) + * @author Chris Hodges + * @note The process will restore TDs and put back size fields into headers. + * It will also try to keep complete OBUs and remove partial OBUs + * caused by packet drops and thus keep the stream syntactically intact. + */ + +#include "libavutil/avstring.h" +#include "libavutil/mem.h" +#include "avformat.h" + +#include "rtpdec.h" +#include "libavcodec/av1.h" +#include "rtp_av1.h" + +// enable tracing of packet data +//#define RTPDEC_AV1_VERBOSE_TRACE + +/** + * RTP/AV1 specific private data. + */ +struct PayloadContext { + uint32_t timestamp; ///< last received timestamp for frame + uint8_t profile; ///< profile (main/high/professional) + uint8_t level_idx; ///< level (0-31) + uint8_t tier; ///< main tier or high tier + uint16_t prev_seq; ///< sequence number of previous packet + unsigned int frag_obu_size; ///< current total size of fragmented OBU + unsigned int frag_pkt_leb_pos; ///< offset in buffer where OBU LEB starts + unsigned int frag_lebs_res; ///< number of bytes reserved for LEB + unsigned int frag_header_size; ///< size of OBU header (1 or 2) + int needs_td; ///< indicates that a TD should be output + int drop_fragment; ///< drop all fragments until next frame + int keyframe_seen; ///< keyframe was seen + int wait_for_keyframe; ///< message about waiting for keyframe has been issued +}; + +static int sdp_parse_fmtp_config_av1(AVFormatContext *s, + AVStream *stream, + PayloadContext *av1_data, + const char *attr, const char *value) { + if (!strcmp(attr, "profile")) { + av1_data->profile = atoi(value); + av_log(s, AV_LOG_DEBUG, "RTP AV1 profile: %u\n", av1_data->profile); + } else if (!strcmp(attr, "level-idx")) { + av1_data->level_idx = atoi(value); + av_log(s, AV_LOG_DEBUG, "RTP AV1 level: %u\n", av1_data->profile); + } else if (!strcmp(attr, "tier")) { + av1_data->tier = atoi(value); + av_log(s, AV_LOG_DEBUG, "RTP AV1 tier: %u\n", av1_data->tier); + } + return 0; +} + +// return 0 on complete packet, -1 on partial packet +static int av1_handle_packet(AVFormatContext *ctx, PayloadContext *data, + AVStream *st, AVPacket *pkt, uint32_t *timestamp, + const uint8_t *buf, int len, uint16_t seq, + int flags) { + uint8_t aggr_hdr; + int result = 0; + int is_frag_cont; + int is_last_fragmented; + int is_first_pkt; + unsigned int num_obus; + unsigned int obu_cnt = 1; + unsigned int rem_pkt_size = len; + unsigned int pktpos; + const uint8_t *buf_ptr = buf; + uint16_t expected_seq = data->prev_seq + 1; + int16_t seq_diff = seq - expected_seq; + + data->prev_seq = seq; + + if (!len) { + av_log(ctx, AV_LOG_ERROR, "Empty AV1 RTP packet\n"); + return AVERROR_INVALIDDATA; + } + if (len < 2) { + av_log(ctx, AV_LOG_ERROR, "AV1 RTP packet too short\n"); + return AVERROR_INVALIDDATA; + } + + /* The payload structure is supposed to be straight-forward, but there are a + * couple of edge cases which need to be tackled and make things a bit more + * complex. + * These are mainly due to: + * - To reconstruct the OBU size for fragmented packets and place it the OBU + * header, the final size will not be known until the last fragment has + * been parsed. However, the number LEBs in the header is variable + * depending on the length of the payload. + * - We are increasing the out-packet size while we are getting fragmented + * OBUs. If an RTP packet gets dropped, we would create corrupted OBUs. + * In this case we decide to drop the whole frame. + */ + +#ifdef RTPDEC_AV1_VERBOSE_TRACE + av_log(ctx, AV_LOG_TRACE, "RTP Packet %d in (%x), len=%d:\n", + seq, flags, len); + av_hex_dump_log(ctx, AV_LOG_TRACE, buf, FFMIN(len, 64)); + av_log(ctx, AV_LOG_TRACE, "... end at offset %x:\n", FFMAX(len - 64, 0)); + av_hex_dump_log(ctx, AV_LOG_TRACE, buf + FFMAX(len - 64, 0), FFMIN(len - 64, 64)); +#endif + + /* 8 bit aggregate header: Z Y W W N - - - */ + aggr_hdr = *buf_ptr++; + rem_pkt_size--; + + /* Z: MUST be set to 1 if the first OBU element is an OBU fragment that is a + * continuation of an OBU fragment from the previous packet, and MUST be set + * to 0 otherwise */ + is_frag_cont = (aggr_hdr >> AV1B_AGGR_HDR_FRAG_CONT) & 1; + + /* Y: MUST be set to 1 if the last OBU element is an OBU fragment that will + * continue in the next packet, and MUST be set to 0 otherwise */ + is_last_fragmented = (aggr_hdr >> AV1B_AGGR_HDR_LAST_FRAG) & 1; + + /* W: two bit field that describes the number of OBU elements in the packet. + * This field MUST be set equal to 0 or equal to the number of OBU elements + * contained in the packet. + * If set to 0, each OBU element MUST be preceded by a length field. + * If not set to 0 (i.e., W = 1, 2 or 3) the last OBU element MUST NOT be + * preceded by a length field (it's derived from RTP packet size minus other + * known lengths). */ + num_obus = (aggr_hdr >> AV1S_AGGR_HDR_NUM_OBUS) & AV1M_AGGR_HDR_NUM_OBUS; + + /* N: MUST be set to 1 if the packet is the first packet of a coded video + * sequence, and MUST be set to 0 otherwise.*/ + is_first_pkt = (aggr_hdr >> AV1B_AGGR_HDR_FIRST_PKT) & 1; + + if (is_frag_cont) { + if (data->drop_fragment) { + return AVERROR_INVALIDDATA; + } + if (is_first_pkt) { + av_log(ctx, AV_LOG_ERROR, "Illegal aggregation header in first AV1 RTP packet\n"); + return AVERROR_INVALIDDATA; + } + if (seq_diff) { + av_log(ctx, AV_LOG_WARNING, "AV1 RTP frag packet sequence mismatch (%d != %d), dropping temporal unit\n", + seq, expected_seq); + goto drop_fragment; + } + if (!pkt->size || !data->frag_obu_size) { + av_log(ctx, AV_LOG_WARNING, "Unexpected fragment continuation in AV1 RTP packet\n"); + goto drop_fragment; // avoid repeated output for the same fragment + } + } else { + if (!is_first_pkt && !data->keyframe_seen) { + if (!data->wait_for_keyframe) { + data->wait_for_keyframe = 1; + av_log(ctx, AV_LOG_WARNING, "AV1 RTP packet before keyframe, dropping and waiting for next keyframe\n"); + } + goto drop_fragment; + } + if (seq_diff && !is_first_pkt) { + av_log(ctx, AV_LOG_WARNING, "AV1 RTP unfrag packet sequence mismatch (%d != %d), dropping temporal unit\n", + seq, expected_seq); + goto drop_fragment; + } + data->drop_fragment = 0; + if (!data->needs_td && ((data->timestamp != *timestamp) || is_first_pkt)) { + av_log(ctx, AV_LOG_TRACE, "Timestamp changed to %u (or first pkt %d), forcing TD\n", *timestamp, is_first_pkt); + data->needs_td = 1; + data->frag_obu_size = 0; // new temporal unit might have been caused by dropped packets + } + if (data->frag_obu_size) { + data->frag_obu_size = 0; // make sure we recover + av_log(ctx, AV_LOG_ERROR, "Missing fragment continuation in AV1 RTP packet\n"); + return AVERROR_INVALIDDATA; + } + // update the timestamp in the frame packet with the one from the RTP packet + data->timestamp = *timestamp; + } + pktpos = pkt->size; + +#ifdef RTPDEC_AV1_VERBOSE_TRACE + av_log(ctx, AV_LOG_TRACE, "Input buffer size %d, aggr head 0x%02x fc %d, lf %d, no %d, fp %d\n", + len, aggr_hdr, is_frag_cont, is_last_fragmented, num_obus, is_first_pkt); +#endif + + if (is_first_pkt) { + pkt->flags |= AV_PKT_FLAG_KEY; + data->keyframe_seen = 1; + data->wait_for_keyframe = 0; + } + + // loop over OBU elements + while (rem_pkt_size) { + uint32_t obu_size; + int num_lebs; + int needs_size_field; + int output_size; + unsigned int obu_payload_size; + uint8_t obu_hdr; + + obu_size = rem_pkt_size; + if (!num_obus || obu_cnt < num_obus) { + // read out explicit OBU element size (which almost corresponds to the original OBU size) + num_lebs = parse_leb(ctx, buf_ptr, rem_pkt_size, &obu_size); + if (!num_lebs) { + return AVERROR_INVALIDDATA; + } + rem_pkt_size -= num_lebs; + buf_ptr += num_lebs; + } + // read first byte (which is the header byte only for non-fragmented elements) + obu_hdr = *buf_ptr; + if (obu_size > rem_pkt_size) { + av_log(ctx, AV_LOG_ERROR, "AV1 OBU size %u larger than remaining pkt size %d\n", obu_size, rem_pkt_size); + return AVERROR_INVALIDDATA; + } + + if (!obu_size) { + av_log(ctx, AV_LOG_ERROR, "Unreasonable AV1 OBU size %u\n", obu_size); + return AVERROR_INVALIDDATA; + } + + if (!is_frag_cont) { + uint8_t obu_type = (obu_hdr >> AV1S_OBU_TYPE) & AV1M_OBU_TYPE; + if (obu_hdr & AV1F_OBU_FORBIDDEN) { + av_log(ctx, AV_LOG_ERROR, "Forbidden bit set in AV1 OBU header (0x%02x)\n", obu_hdr); + return AVERROR_INVALIDDATA; + } + // ignore and remove OBUs according to spec + if ((obu_type == AV1_OBU_TEMPORAL_DELIMITER) || + (obu_type == AV1_OBU_TILE_LIST)) { + pktpos += obu_size; + rem_pkt_size -= obu_size; + // TODO: This probably breaks if the OBU_TILE_LIST is fragmented + // into the next RTP packet, so at least check and fail here + if (rem_pkt_size == 0 && is_last_fragmented) { + avpriv_report_missing_feature(ctx, "AV1 OBU_TILE_LIST (should not be there!) to be ignored but is fragmented\n"); + return AVERROR_PATCHWELCOME; + } + obu_cnt++; + continue; + } + } + + // If we need to add a size field, out size will be different + output_size = obu_size; + // Spec says the OBUs should have their size fields removed, + // but this is not mandatory + if (is_frag_cont || (obu_hdr & AV1F_OBU_HAS_SIZE_FIELD)) { + needs_size_field = 0; + } else { + needs_size_field = 1; + // (re)calculate number of LEB bytes needed (if it was implicit, there were no LEBs) + output_size += calc_leb_size(obu_size - (1 + ((obu_hdr & AV1F_OBU_EXTENSION_FLAG) ? 1 : 0))); + } + + if (!is_frag_cont && (obu_cnt == 1)) { + if (data->needs_td) { + output_size += 2; // for Temporal Delimiter (TD) + } + if (pkt->data) { + if ((result = av_grow_packet(pkt, output_size)) < 0) + return result; + } else { + if ((result = av_new_packet(pkt, output_size) < 0)) + return result; + } + + if (data->needs_td) { + // restore TD + pkt->data[pktpos++] = 0x12; + pkt->data[pktpos++] = 0x00; + } + data->needs_td = 0; + } else { + if ((result = av_grow_packet(pkt, output_size)) < 0) + return result; + } + + obu_payload_size = obu_size; + // do we need to restore the OBU size field? + if (needs_size_field) { + // set obu_has_size_field in header byte + pkt->data[pktpos++] = *buf_ptr++ | AV1F_OBU_HAS_SIZE_FIELD; + data->frag_header_size = 1; + obu_payload_size--; + + // copy extension byte, if available + if (obu_hdr & AV1F_OBU_EXTENSION_FLAG) { + /* TODO we cannot handle the edge case where last element is a + * fragment of exactly one byte AND the header has the extension + * flag set. Note that it would be more efficient to not send a + * fragment of one byte and instead drop the size field of the + * prior element */ + if (!obu_payload_size) { + av_log(ctx, AV_LOG_ERROR, "AV1 OBU too short for extension byte (0x%02x)\n", + obu_hdr); + return AVERROR_INVALIDDATA; + } + pkt->data[pktpos++] = *buf_ptr++; + data->frag_header_size = 2; + obu_payload_size--; + } + + // remember start position of LEB for possibly fragmented packet to + // fixup OBU size later + data->frag_pkt_leb_pos = pktpos; + // write intermediate OBU size field + num_lebs = write_leb(pkt->data + pktpos, obu_payload_size); + data->frag_lebs_res = num_lebs; + pktpos += num_lebs; + } + // copy verbatim or without above header size patch + memcpy(pkt->data + pktpos, buf_ptr, obu_payload_size); + pktpos += obu_payload_size; + buf_ptr += obu_payload_size; + rem_pkt_size -= obu_size; + + // if we were handling a fragmented packet and this was the last + // fragment, correct OBU size field + if (data->frag_obu_size && (rem_pkt_size || !is_last_fragmented)) { + uint32_t final_obu_size = data->frag_obu_size + obu_size - data->frag_header_size; + uint8_t *lebptr = pkt->data + data->frag_pkt_leb_pos; + num_lebs = calc_leb_size(final_obu_size); + + // check if we had allocated enough LEB bytes in header, + // otherwise make some extra space + if (num_lebs > data->frag_lebs_res) { + int extra_bytes = num_lebs - data->frag_lebs_res; + if ((result = av_grow_packet(pkt, extra_bytes)) < 0) + return result; + // update pointer in case buffer address changed + lebptr = pkt->data + data->frag_pkt_leb_pos; + // move existing data for OBU back a bit + memmove(lebptr + extra_bytes, lebptr, + pkt->size - extra_bytes - data->frag_pkt_leb_pos); + // move pktpos further down for following OBUs in same packet. + pktpos += extra_bytes; + } + + // update OBU size field + write_leb(lebptr, final_obu_size); + + data->frag_obu_size = 0; // signal end of fragment + } else if (is_last_fragmented && !rem_pkt_size) { + // add to total OBU size, so we can fix that in OBU header + // (but only if the OBU size was missing!) + if (needs_size_field || data->frag_obu_size) { + data->frag_obu_size += obu_size; + } + // fragment not yet finished! + result = -1; + } + is_frag_cont = 0; + + if (!rem_pkt_size && !num_obus && (num_obus != obu_cnt)) { + av_log(ctx, AV_LOG_WARNING, "AV1 aggregation header indicated %u OBU elements, was %u\n", + num_obus, obu_cnt); + } + obu_cnt++; + } + + if (flags & RTP_FLAG_MARKER) { + av_log(ctx, AV_LOG_TRACE, "TD on next packet due to marker\n"); + data->needs_td = 1; + } else { + // fragment may be complete, but temporal unit is not yet finished + result = -1; + } + + if (!is_last_fragmented) { + data->frag_obu_size = 0; + data->frag_pkt_leb_pos = 0; + } + +#ifdef RTPDEC_AV1_VERBOSE_TRACE + if (!result) { + av_log(ctx, AV_LOG_TRACE, "AV1 out pkt-size: %d\n", pkt->size); + av_hex_dump_log(ctx, AV_LOG_TRACE, pkt->data, FFMIN(pkt->size, 64)); + av_log(ctx, AV_LOG_TRACE, "... end at offset %x:\n", FFMAX(pkt->size - 64, 0)); + av_hex_dump_log(ctx, AV_LOG_TRACE, pkt->data + FFMAX(pkt->size - 64, 0), FFMIN(pkt->size, 64)); + } +#endif + pkt->stream_index = st->index; + + return result; + +drop_fragment: + data->keyframe_seen = 0; + data->drop_fragment = 1; + data->frag_obu_size = 0; + data->needs_td = 1; + if (pkt->size) { + av_log(ctx, AV_LOG_TRACE, "Dumping current AV1 frame packet\n"); + // we can't seem to deallocate the fragmented packet, but we can shrink it to 0 + av_shrink_packet(pkt, 0); + } + return AVERROR_INVALIDDATA; +} + +static void av1_close_context(PayloadContext *data) { +} + +static int av1_need_keyframe(PayloadContext *data) +{ + return !data->keyframe_seen; +} + +static int parse_av1_sdp_line(AVFormatContext *s, int st_index, + PayloadContext *av1_data, const char *line) { + AVStream * stream; + const char *p = line; + int result = 0; + + if (st_index < 0) + return 0; + + stream = s->streams[st_index]; + + /* Optional parameters are profile, level-idx, and tier. + * See Section 7.2.1 of https://aomediacodec.github.io/av1-rtp-spec/ */ + if (av_strstart(p, "fmtp:", &p)) { + result = ff_parse_fmtp(s, stream, av1_data, p, sdp_parse_fmtp_config_av1); + av_log(s, AV_LOG_DEBUG, "RTP AV1 Profile: %u, Level: %u, Tier: %u\n", + av1_data->profile, av1_data->level_idx, av1_data->tier); + } + + return result; +} + +const RTPDynamicProtocolHandler ff_av1_dynamic_handler = { + .enc_name = "AV1", + .codec_type = AVMEDIA_TYPE_VIDEO, + .codec_id = AV_CODEC_ID_AV1, + .need_parsing = AVSTREAM_PARSE_FULL, + .priv_data_size = sizeof(PayloadContext), + .parse_sdp_a_line = parse_av1_sdp_line, + .close = av1_close_context, + .parse_packet = av1_handle_packet, + .need_keyframe = av1_need_keyframe, +}; diff --git a/libavformat/rtpdec_formats.h b/libavformat/rtpdec_formats.h index dad2b8ac1b..72a8f16a90 100644 --- a/libavformat/rtpdec_formats.h +++ b/libavformat/rtpdec_formats.h @@ -50,6 +50,7 @@ void ff_h264_parse_framesize(AVCodecParameters *par, const char *p); extern const RTPDynamicProtocolHandler ff_ac3_dynamic_handler; extern const RTPDynamicProtocolHandler ff_amr_nb_dynamic_handler; extern const RTPDynamicProtocolHandler ff_amr_wb_dynamic_handler; +extern const RTPDynamicProtocolHandler ff_av1_dynamic_handler; extern const RTPDynamicProtocolHandler ff_dv_dynamic_handler; extern const RTPDynamicProtocolHandler ff_g726_16_dynamic_handler; extern const RTPDynamicProtocolHandler ff_g726_24_dynamic_handler; diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c index 7b4ae37d13..3db13ee0b2 100644 --- a/libavformat/rtpenc.c +++ b/libavformat/rtpenc.c @@ -79,6 +79,7 @@ static int is_supported(enum AVCodecID id) case AV_CODEC_ID_THEORA: case AV_CODEC_ID_VP8: case AV_CODEC_ID_VP9: + case AV_CODEC_ID_AV1: case AV_CODEC_ID_ADPCM_G722: case AV_CODEC_ID_ADPCM_G726: case AV_CODEC_ID_ADPCM_G726LE: @@ -228,6 +229,16 @@ static int rtp_write_header(AVFormatContext *s1) goto fail; } break; + case AV_CODEC_ID_AV1: + if (s1->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) { + av_log(s, AV_LOG_ERROR, + "Packetizing AV1 is experimental and its specification is " + "still in draft state. " + "Please set -strict experimental in order to enable it.\n"); + ret = AVERROR_EXPERIMENTAL; + goto fail; + } + break; case AV_CODEC_ID_VORBIS: case AV_CODEC_ID_THEORA: s->max_frames_per_packet = 15; @@ -579,6 +590,9 @@ static int rtp_write_packet(AVFormatContext *s1, AVPacket *pkt) case AV_CODEC_ID_AMR_WB: ff_rtp_send_amr(s1, pkt->data, size); break; + case AV_CODEC_ID_AV1: + ff_rtp_send_av1(s1, pkt->data, size, (pkt->flags & AV_PKT_FLAG_KEY) ? 1 : 0); + break; case AV_CODEC_ID_MPEG2TS: rtp_send_mpegts_raw(s1, pkt->data, size); break; diff --git a/libavformat/rtpenc.h b/libavformat/rtpenc.h index 854bf07f0e..ba88bfefc0 100644 --- a/libavformat/rtpenc.h +++ b/libavformat/rtpenc.h @@ -94,6 +94,7 @@ void ff_rtp_send_xiph(AVFormatContext *s1, const uint8_t *buff, int size); void ff_rtp_send_vc2hq(AVFormatContext *s1, const uint8_t *buf, int size, int interlaced); void ff_rtp_send_vp8(AVFormatContext *s1, const uint8_t *buff, int size); void ff_rtp_send_vp9(AVFormatContext *s1, const uint8_t *buff, int size); +void ff_rtp_send_av1(AVFormatContext *s1, const uint8_t *buf1, int size, int is_keyframe); void ff_rtp_send_jpeg(AVFormatContext *s1, const uint8_t *buff, int size); void ff_rtp_send_raw_rfc4175(AVFormatContext *s1, const uint8_t *buf, int size, int interlaced, int field); diff --git a/libavformat/rtpenc_av1.c b/libavformat/rtpenc_av1.c new file mode 100644 index 0000000000..fbf9212216 --- /dev/null +++ b/libavformat/rtpenc_av1.c @@ -0,0 +1,357 @@ +/* + * Packetization for RTP Payload Format For AV1 (v1.0) + * https://aomediacodec.github.io/av1-rtp-spec/ + * Copyright (c) 2024 Axis Communications + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * @brief AV1 / RTP packetization code (RTP Payload Format For AV1 (v1.0)) + * @author Chris Hodges + * @note This will remove TDs and OBU size fields + */ + +#include "avformat.h" +#include "rtpenc.h" +#include "libavcodec/av1.h" +#include "rtp_av1.h" + +// enable tracing of packet data +//#define RTPENC_AV1_VERBOSE_TRACE + +// enable searching for sequence header as workaround for AV1 encoders +// that do not set AV_PKT_FLAG_KEY correctly +#define RTPENC_AV1_SEARCH_SEQ_HEADER 1 + +void ff_rtp_send_av1(AVFormatContext *ctx, const uint8_t *frame_buf, int frame_size, int is_keyframe) { + uint8_t aggr_hdr = 0; + int last_packet_of_frame = 0; + RTPMuxContext *rtp_ctx = ctx->priv_data; + const uint8_t *obu_ptr = frame_buf; + int start_new_packet = 0; + unsigned int num_obus = 0; + unsigned int rem_pkt_size = rtp_ctx->max_payload_size - 1; + uint8_t *pkt_ptr = NULL; + + const uint8_t *curr_obu_ptr = NULL; + uint32_t curr_elem_size = 0; + int curr_obu_hdr = -1; + int curr_obu_ext = -1; + const uint8_t *last_obu_ptr = NULL; + uint32_t last_elem_size = 0; + int last_obu_hdr = -1; + int last_obu_ext = -1; + + rtp_ctx->timestamp = rtp_ctx->cur_timestamp; + + /* The payload structure is supposed to be straight-forward, but there are a + * couple of edge cases to be tackled and make things very complex. + * These are mainly due to: + * - the OBU element size being optional for the last element, but MANDATORY + * if there are more than 3 elements + * - the size field of the element is made up of a variable number of + * LEB bytes + * - the latter in combination with the desire to fill the max packet size + * could cause a catch22 + * - if there's less than 2 bytes remaining (depending on the required LEB), + * one would not have space for the payload of an element and must instead + * start the next packet + * - if there's less than 3 bytes remaining, the header byte plus the + * optional extension byte will not fit in the fragment making the + * handling even more complicated + * - as some OBU types are supposed to be filtered out, it is hard to decide + * via the remaining length whether the outputted OBU element will + * actually be the last one + * + * There are two major ways to tackle that: Pre-parsing of all OBUs within a + * frame (adds memory complexity) or lazy copying of the prior element. + * Here, the latter is implemented. + */ + + if (is_keyframe) { +#if RTPENC_AV1_SEARCH_SEQ_HEADER + /* search for OBU_SEQUENCE_HEADER to get a better indication that + * the frame was marked as keyframe is really a KEY_FRAME and not + * a INTRA_ONLY frame. This might be unnecessary if the AV1 parser/ + * encoder always correctly specifies AV_PKT_FLAG_KEY. + * + * Note: Spec does NOT prohibit resending bit-identical + * OBU_SEQUENCE_HEADER for ANY kind of frame, though! + */ + int rem_size = frame_size; + const uint8_t *buf_ptr = frame_buf; + while (rem_size > 0) { + uint32_t obu_size; + uint8_t obu_hdr = *buf_ptr++; + uint8_t obu_type = (obu_hdr >> AV1S_OBU_TYPE) & AV1M_OBU_TYPE; + int num_lebs; + + if (obu_type == AV1_OBU_SEQUENCE_HEADER) { + av_log(ctx, AV_LOG_DEBUG, "Marking FIRST packet\n"); + aggr_hdr |= AV1F_AGGR_HDR_FIRST_PKT; + break; + } + if (!(obu_hdr & AV1F_OBU_HAS_SIZE_FIELD)) { + break; + } + rem_size--; + // read out explicit OBU size + num_lebs = parse_leb(ctx, buf_ptr, rem_size, &obu_size); + if (!num_lebs) { + break; + } + buf_ptr += num_lebs + obu_size; + rem_size -= num_lebs + obu_size; + } +#else // RTPENC_AV1_SEARCH_SEQ_HEADER + av_log(ctx, AV_LOG_DEBUG, "Marking FIRST packet\n"); + aggr_hdr |= AV1F_AGGR_HDR_FIRST_PKT; +#endif // RTPENC_AV1_SEARCH_SEQ_HEADER + } + rem_pkt_size = rtp_ctx->max_payload_size - 1; + pkt_ptr = rtp_ctx->buf + 1; + +#ifdef RTPENC_AV1_VERBOSE_TRACE + av_log(ctx, AV_LOG_TRACE, "AV1 Frame %d in (%x), size=%d:\n", + rtp_ctx->seq, rtp_ctx->flags, frame_size); + av_hex_dump_log(ctx, AV_LOG_TRACE, frame_buf, FFMIN(frame_size, 128)); +#endif + + while (frame_size) { + uint32_t obu_size; + int num_lebs = 0; + int ext_byte = -1; + + uint8_t obu_hdr = *obu_ptr++; + uint8_t obu_type = (obu_hdr >> AV1S_OBU_TYPE) & AV1M_OBU_TYPE; + frame_size--; + + if (obu_hdr & AV1F_OBU_FORBIDDEN) { + av_log(ctx, AV_LOG_ERROR, "Forbidden bit set in AV1 OBU header (0x%02x)\n", obu_hdr); + return; + } + + if (obu_hdr & AV1F_OBU_EXTENSION_FLAG) { + if (!frame_size) { + av_log(ctx, AV_LOG_ERROR, "Out of data for AV1 OBU header extension byte\n"); + return; + } + ext_byte = *obu_ptr++; + frame_size--; + } + + if (obu_hdr & AV1F_OBU_HAS_SIZE_FIELD) { + obu_hdr &= ~AV1F_OBU_HAS_SIZE_FIELD; // remove size field + // read out explicit OBU size + num_lebs = parse_leb(ctx, obu_ptr, frame_size, &obu_size); + if (!num_lebs) { + return; + } + obu_ptr += num_lebs; + frame_size -= num_lebs; + } else { + av_log(ctx, AV_LOG_ERROR, "Cannot handle AV1 OBUs without size fields\n"); + return; + } + + if ((long) obu_size > frame_size) { + av_log(ctx, AV_LOG_ERROR, "AV1 OBU size %d larger than remaining frame size %d\n", obu_size, frame_size); + return; + } + + if (obu_size > 0xfffffffd) { + av_log(ctx, AV_LOG_ERROR, "AV1 OBU size 0x%x might overflow (attack?)\n", obu_size); + return; + } + + frame_size -= obu_size; + + if ((obu_type == AV1_OBU_TEMPORAL_DELIMITER) || + (obu_type == AV1_OBU_TILE_LIST) || + (obu_type == AV1_OBU_PADDING)) { + // ignore and remove according to spec (note that OBU_PADDING is not + // mentioned in spec, but it does not make sense to transmit it). + obu_ptr += obu_size; + // additional handling if the ignored OBU was the last one + if (!frame_size) { + // we're done, flush the last packet, set RTP marker bit + last_packet_of_frame = 1; + goto flush_last_packet; + } + continue; + } + + /* if the last OBU had a temporal or spatial ID, they need to match to + * current; otherwise start new packet */ + if ((last_obu_ext >= 0) && (curr_obu_ext != last_obu_ext)) { + start_new_packet = 1; + } + +flush_last_packet: + last_obu_ptr = curr_obu_ptr; + last_elem_size = curr_elem_size; + last_obu_hdr = curr_obu_hdr; + last_obu_ext = curr_obu_ext; + + curr_obu_ptr = obu_ptr; // behind header + curr_elem_size = obu_size + 1 + ((ext_byte >= 0) ? 1 : 0); + curr_obu_hdr = obu_hdr; + curr_obu_ext = ext_byte; + + obu_ptr += obu_size; + + if (last_obu_ptr) { + unsigned int first_elem_with_size = last_elem_size + calc_leb_size(last_elem_size); + // check if last packet fits completely and has reasonable space for + // at least a fragment of the next + if (!last_packet_of_frame && (first_elem_with_size + 10 < rem_pkt_size)) { + num_lebs = write_leb(pkt_ptr, last_elem_size); + pkt_ptr += num_lebs; + rem_pkt_size -= num_lebs; + } else { + if ((num_obus >= 3) && (last_packet_of_frame || (first_elem_with_size <= rem_pkt_size))) { + // last fits with forced size, but nothing else + num_lebs = write_leb(pkt_ptr, last_elem_size); + pkt_ptr += num_lebs; + rem_pkt_size -= num_lebs; + } + // force new packet + start_new_packet = 1; + } + + // write header and optional extension byte (if not a continued fragment) + if (last_obu_hdr >= 0) { + *pkt_ptr++ = last_obu_hdr; + last_elem_size--; + rem_pkt_size--; + if (last_obu_ext >= 0) { + *pkt_ptr++ = last_obu_ext; + last_elem_size--; + rem_pkt_size--; + } + } + // copy payload + memcpy(pkt_ptr, last_obu_ptr, last_elem_size); + pkt_ptr += last_elem_size; + rem_pkt_size -= last_elem_size; + num_obus++; + } + + if (start_new_packet || last_packet_of_frame) { + if (num_obus < 4) { + aggr_hdr |= num_obus << AV1S_AGGR_HDR_NUM_OBUS; + } + rtp_ctx->buf[0] = aggr_hdr; + +#ifdef RTPENC_AV1_VERBOSE_TRACE + av_log(ctx, AV_LOG_TRACE, "Sending NON-FRAG packet no %d, %ld/%d, %d OBUs (marker=%d)\n", + ((RTPMuxContext *) ctx->priv_data)->seq, + pkt_ptr - rtp_ctx->buf, rtp_ctx->max_payload_size, num_obus, last_packet_of_frame); + av_hex_dump_log(ctx, AV_LOG_TRACE, rtp_ctx->buf, FFMIN(pkt_ptr - rtp_ctx->buf, 64)); + av_log(ctx, AV_LOG_TRACE, "... end at offset %lx:\n", FFMAX((pkt_ptr - rtp_ctx->buf) - 64, 0)); + av_hex_dump_log(ctx, AV_LOG_TRACE, rtp_ctx->buf + FFMAX((pkt_ptr - rtp_ctx->buf) - 64, 0), FFMIN(pkt_ptr - rtp_ctx->buf, 64)); +#endif + + ff_rtp_send_data(ctx, rtp_ctx->buf, pkt_ptr - rtp_ctx->buf, last_packet_of_frame); + + rem_pkt_size = rtp_ctx->max_payload_size - 1; + pkt_ptr = rtp_ctx->buf + 1; + aggr_hdr = 0; + num_obus = 0; + } + + if (last_packet_of_frame) { + break; + } + + // check if element needs to be fragmented, otherwise we will deal with + // it in the next iteration + if ((curr_elem_size > rem_pkt_size) || + ((num_obus >= 3) && (curr_elem_size + calc_leb_size(curr_elem_size)) > rem_pkt_size)) { + uint32_t frag_size = rem_pkt_size; + + // if there are going more than 3 OBU elements, we are obliged to + // have the length field for the last + if (num_obus >= 3) { + // that's an upper limit of LEBs + num_lebs = calc_leb_size(rem_pkt_size - 1); + frag_size -= num_lebs; + + // write a fixed number of LEBs, in case the frag_size could + // now be specified with one less byte + write_leb_n(pkt_ptr, frag_size, num_lebs); + pkt_ptr += num_lebs; + rem_pkt_size -= num_lebs; + } + + // write header and optional extension byte + *pkt_ptr++ = curr_obu_hdr; + curr_elem_size--; + rem_pkt_size--; + if (curr_obu_ext >= 0) { + *pkt_ptr++ = curr_obu_ext; + curr_elem_size--; + rem_pkt_size--; + } + + // disable header writing for final fragment + curr_obu_hdr = -1; + curr_obu_ext = -1; + + // send more full packet sized fragments + do { + // copy payload + memcpy(pkt_ptr, curr_obu_ptr, rem_pkt_size); + pkt_ptr += rem_pkt_size; + curr_obu_ptr += rem_pkt_size; + curr_elem_size -= rem_pkt_size; + num_obus++; + + aggr_hdr |= AV1F_AGGR_HDR_LAST_FRAG; + if (num_obus < 4) { + aggr_hdr |= num_obus << AV1S_AGGR_HDR_NUM_OBUS; + } + rtp_ctx->buf[0] = aggr_hdr; + +#ifdef RTPENC_AV1_VERBOSE_TRACE + av_log(ctx, AV_LOG_DEBUG, "Sending FRAG packet no %d, %ld/%d, %d OBUs\n", + ((RTPMuxContext *) ctx->priv_data)->seq, + pkt_ptr - rtp_ctx->buf, rtp_ctx->max_payload_size, num_obus); + av_hex_dump_log(ctx, AV_LOG_TRACE, rtp_ctx->buf, FFMIN(pkt_ptr - rtp_ctx->buf, 64)); + av_log(ctx, AV_LOG_TRACE, "... end at offset %lx:\n", FFMAX((pkt_ptr - rtp_ctx->buf) - 64, 0)); + av_hex_dump_log(ctx, AV_LOG_TRACE, rtp_ctx->buf + FFMAX((pkt_ptr - rtp_ctx->buf) - 64, 0), FFMIN(pkt_ptr - rtp_ctx->buf, 64)); +#endif + + ff_rtp_send_data(ctx, rtp_ctx->buf, pkt_ptr - rtp_ctx->buf, 0); + rem_pkt_size = rtp_ctx->max_payload_size - 1; + pkt_ptr = rtp_ctx->buf + 1; + + aggr_hdr = AV1F_AGGR_HDR_FRAG_CONT; + num_obus = 0; + } while (curr_elem_size > rem_pkt_size); + start_new_packet = 0; + } + + if (!frame_size) { + // we're done, flush the last packet, set RTP marker bit + last_packet_of_frame = 1; + goto flush_last_packet; + } + } +} diff --git a/libavformat/sdp.c b/libavformat/sdp.c index 28490d77f3..215e38f8fc 100644 --- a/libavformat/sdp.c +++ b/libavformat/sdp.c @@ -31,6 +31,7 @@ #include "libavcodec/mpeg4audio.h" #include "avformat.h" #include "internal.h" +#include "av1.h" #include "avc.h" #include "hevc.h" #include "nal.h" @@ -155,6 +156,26 @@ static int sdp_get_address(char *dest_addr, int size, int *ttl, const char *url) return port; } +static int extradata2psets_av1(AVFormatContext *s, const AVCodecParameters *par, + char **out) +{ + char *psets; + AV1SequenceParameters seq; + + if (ff_av1_parse_seq_header(&seq, par->extradata, par->extradata_size) < 0) + return AVERROR_INVALIDDATA; + + psets = av_mallocz(64); + if (!psets) { + av_log(s, AV_LOG_ERROR, "Cannot allocate memory for the parameter sets.\n"); + return AVERROR(ENOMEM); + } + av_strlcatf(psets, 64, "profile=%u;level-idx=%u;tier=%u", + seq.profile, seq.level, seq.tier); + *out = psets; + return 0; +} + #define MAX_PSET_SIZE 1024 static int extradata2psets(AVFormatContext *s, const AVCodecParameters *par, char **out) @@ -523,6 +544,15 @@ static int sdp_write_media_attributes(char *buff, int size, const AVStream *st, int ret = 0; switch (p->codec_id) { + case AV_CODEC_ID_AV1: + av_strlcatf(buff, size, "a=rtpmap:%d AV1/90000\r\n", payload_type); + if (p->extradata_size) { + ret = extradata2psets_av1(fmt, p, &config); + if (ret < 0) + return ret; + av_strlcatf(buff, size, "a=fmtp:%d %s\r\n", payload_type, config); + } + break; case AV_CODEC_ID_DIRAC: av_strlcatf(buff, size, "a=rtpmap:%d VC2/90000\r\n", payload_type); break;