From cbfbacff20a8477a3a15fbd20e7f5ceddb8ba615 Mon Sep 17 00:00:00 2001
From: James Almer <jamrial@gmail.com>
Date: Sat, 15 Jun 2024 14:50:03 -0300
Subject: [PATCH] avformat/hevc: add a function to write a lhvC box

Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavformat/hevc.c | 163 +++++++++++++++++++++++++++++++++++----------
 libavformat/hevc.h |  17 +++++
 2 files changed, 145 insertions(+), 35 deletions(-)

diff --git a/libavformat/hevc.c b/libavformat/hevc.c
index f3e5542699..69e6f719e5 100644
--- a/libavformat/hevc.c
+++ b/libavformat/hevc.c
@@ -43,12 +43,16 @@ enum {
 
 #define FLAG_ARRAY_COMPLETENESS (1 << 0)
 #define FLAG_IS_NALFF           (1 << 1)
+#define FLAG_IS_LHVC            (1 << 2)
 
 typedef struct HVCCNALUnit {
     uint8_t nuh_layer_id;
     uint8_t parameter_set_id;
     uint16_t nalUnitLength;
     const uint8_t *nalUnit;
+
+    // VPS
+    uint8_t vps_max_sub_layers_minus1;
 } HVCCNALUnit;
 
 typedef struct HVCCNALUnitArray {
@@ -383,8 +387,6 @@ static void skip_sub_layer_ordering_info(GetBitContext *gb)
 static int hvcc_parse_vps(GetBitContext *gb, HVCCNALUnit *nal,
                           HEVCDecoderConfigurationRecord *hvcc)
 {
-    unsigned int vps_max_sub_layers_minus1;
-
     nal->parameter_set_id = get_bits(gb, 4);
     /*
      * vps_reserved_three_2bits   u(2)
@@ -392,7 +394,7 @@ static int hvcc_parse_vps(GetBitContext *gb, HVCCNALUnit *nal,
      */
     skip_bits(gb, 8);
 
-    vps_max_sub_layers_minus1 = get_bits(gb, 3);
+    nal->vps_max_sub_layers_minus1 = get_bits(gb, 3);
 
     /*
      * numTemporalLayers greater than 1 indicates that the stream to which this
@@ -403,7 +405,7 @@ static int hvcc_parse_vps(GetBitContext *gb, HVCCNALUnit *nal,
      * that it is unknown whether the stream is temporally scalable.
      */
     hvcc->numTemporalLayers = FFMAX(hvcc->numTemporalLayers,
-                                    vps_max_sub_layers_minus1 + 1);
+                                    nal->vps_max_sub_layers_minus1 + 1);
 
     /*
      * vps_temporal_id_nesting_flag u(1)
@@ -411,7 +413,7 @@ static int hvcc_parse_vps(GetBitContext *gb, HVCCNALUnit *nal,
      */
     skip_bits(gb, 17);
 
-    hvcc_parse_ptl(gb, hvcc, vps_max_sub_layers_minus1);
+    hvcc_parse_ptl(gb, hvcc, nal->vps_max_sub_layers_minus1);
 
     /* nothing useful for hvcC past this point */
     return 0;
@@ -508,10 +510,33 @@ static int hvcc_parse_sps(GetBitContext *gb, HVCCNALUnit *nal,
 {
     unsigned int i, sps_max_sub_layers_minus1, log2_max_pic_order_cnt_lsb_minus4;
     unsigned int num_short_term_ref_pic_sets, num_delta_pocs[HEVC_MAX_SHORT_TERM_REF_PIC_SETS];
+    unsigned int sps_ext_or_max_sub_layers_minus1, multi_layer_ext_sps_flag;
 
-    skip_bits(gb, 4); // sps_video_parameter_set_id
+    unsigned int sps_video_parameter_set_id = get_bits(gb, 4);
 
-    sps_max_sub_layers_minus1 = get_bits (gb, 3);
+    if (nal->nuh_layer_id == 0) {
+        sps_ext_or_max_sub_layers_minus1 = 0;
+        sps_max_sub_layers_minus1 = get_bits(gb, 3);
+    } else {
+        sps_ext_or_max_sub_layers_minus1 = get_bits(gb, 3);
+        if (sps_ext_or_max_sub_layers_minus1 == 7) {
+            const HVCCNALUnitArray *array = &hvcc->arrays[VPS_INDEX];
+            const HVCCNALUnit *vps = NULL;
+
+            for (i = 0; i < array->numNalus; i++)
+                if (sps_video_parameter_set_id == array->nal[i].parameter_set_id) {
+                    vps = &array->nal[i];
+                    break;
+                }
+            if (!vps)
+                return AVERROR_INVALIDDATA;
+
+            sps_max_sub_layers_minus1 = vps->vps_max_sub_layers_minus1;
+        } else
+            sps_max_sub_layers_minus1 = sps_ext_or_max_sub_layers_minus1;
+    }
+    multi_layer_ext_sps_flag = nal->nuh_layer_id &&
+                               sps_ext_or_max_sub_layers_minus1 == 7;
 
     /*
      * numTemporalLayers greater than 1 indicates that the stream to which this
@@ -524,12 +549,17 @@ static int hvcc_parse_sps(GetBitContext *gb, HVCCNALUnit *nal,
     hvcc->numTemporalLayers = FFMAX(hvcc->numTemporalLayers,
                                     sps_max_sub_layers_minus1 + 1);
 
-    hvcc->temporalIdNested = get_bits1(gb);
-
-    hvcc_parse_ptl(gb, hvcc, sps_max_sub_layers_minus1);
+    if (!multi_layer_ext_sps_flag) {
+        hvcc->temporalIdNested = get_bits1(gb);
+        hvcc_parse_ptl(gb, hvcc, sps_max_sub_layers_minus1);
+    }
 
     nal->parameter_set_id = get_ue_golomb_long(gb);
 
+    if (multi_layer_ext_sps_flag) {
+        if (get_bits1(gb)) // update_rep_format_flag
+            skip_bits(gb, 8); // sps_rep_format_idx
+    } else {
     hvcc->chromaFormat = get_ue_golomb_long(gb);
 
     if (hvcc->chromaFormat == 3)
@@ -547,12 +577,15 @@ static int hvcc_parse_sps(GetBitContext *gb, HVCCNALUnit *nal,
 
     hvcc->bitDepthLumaMinus8          = get_ue_golomb_long(gb);
     hvcc->bitDepthChromaMinus8        = get_ue_golomb_long(gb);
+    }
     log2_max_pic_order_cnt_lsb_minus4 = get_ue_golomb_long(gb);
 
+    if (!multi_layer_ext_sps_flag) {
     /* sps_sub_layer_ordering_info_present_flag */
     i = get_bits1(gb) ? 0 : sps_max_sub_layers_minus1;
     for (; i <= sps_max_sub_layers_minus1; i++)
         skip_sub_layer_ordering_info(gb);
+    }
 
     get_ue_golomb_long(gb); // log2_min_luma_coding_block_size_minus3
     get_ue_golomb_long(gb); // log2_diff_max_min_luma_coding_block_size
@@ -561,9 +594,15 @@ static int hvcc_parse_sps(GetBitContext *gb, HVCCNALUnit *nal,
     get_ue_golomb_long(gb); // max_transform_hierarchy_depth_inter
     get_ue_golomb_long(gb); // max_transform_hierarchy_depth_intra
 
-    if (get_bits1(gb) && // scaling_list_enabled_flag
-        get_bits1(gb))   // sps_scaling_list_data_present_flag
-        skip_scaling_list_data(gb);
+    if (get_bits1(gb)) { // scaling_list_enabled_flag
+        int sps_infer_scaling_list_flag = 0;
+        if (multi_layer_ext_sps_flag)
+            sps_infer_scaling_list_flag = get_bits1(gb);
+        if (sps_infer_scaling_list_flag)
+            skip_bits(gb, 6);   // sps_scaling_list_ref_layer_id
+        else if (get_bits1(gb)) // sps_scaling_list_data_present_flag
+            skip_scaling_list_data(gb);
+    }
 
     skip_bits1(gb); // amp_enabled_flag
     skip_bits1(gb); // sample_adaptive_offset_enabled_flag
@@ -703,6 +742,7 @@ static int hvcc_add_nal_unit(const uint8_t *nal_buf, uint32_t nal_size,
 {
     int ret = 0;
     int is_nalff = !!(flags & FLAG_IS_NALFF);
+    int is_lhvc = !!(flags & FLAG_IS_LHVC);
     int ps_array_completeness = !!(flags & FLAG_ARRAY_COMPLETENESS);
     HVCCNALUnitArray *const array = &hvcc->arrays[array_idx];
     HVCCNALUnit *nal;
@@ -722,7 +762,7 @@ static int hvcc_add_nal_unit(const uint8_t *nal_buf, uint32_t nal_size,
         goto end;
 
     nal_unit_parse_header(&gbc, &nal_type, &nuh_layer_id);
-    if (nuh_layer_id > 0)
+    if (!is_lhvc && nuh_layer_id > 0)
         goto end;
 
     /*
@@ -798,9 +838,12 @@ static void hvcc_close(HEVCDecoderConfigurationRecord *hvcc)
     }
 }
 
-static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
+static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc,
+                      int flags)
 {
-    uint16_t vps_count, sps_count, pps_count;
+    uint16_t numNalus[NB_ARRAYS] = { 0 };
+    int is_lhvc = !!(flags & FLAG_IS_LHVC);
+    int numOfArrays = 0;
 
     /*
      * We only support writing HEVCDecoderConfigurationRecord version 1.
@@ -826,10 +869,32 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
      * let's always set them to values meaning 'unspecified'.
      */
     hvcc->avgFrameRate      = 0;
-    hvcc->constantFrameRate = 0;
+    /*
+     * lhvC doesn't store this field. It instead reserves the bits, setting them
+     * to '11'b.
+     */
+    hvcc->constantFrameRate = is_lhvc * 0x3;
 
+    /*
+     * Skip all NALUs with nuh_layer_id == 0 if writing lhvC. We do it here and
+     * not before parsing them as some parameter sets with nuh_layer_id > 0
+     * may reference base layer parameters sets.
+     */
+    for (unsigned i = 0; i < FF_ARRAY_ELEMS(hvcc->arrays); i++) {
+        const HVCCNALUnitArray *const array = &hvcc->arrays[i];
+
+        if (array->numNalus == 0)
+            continue;
+
+        for (unsigned j = 0; j < array->numNalus; j++)
+            numNalus[i] += !is_lhvc || (array->nal[j].nuh_layer_id != 0);
+        numOfArrays += (numNalus[i] > 0);
+    }
+
+    av_log(NULL, AV_LOG_TRACE,  "%s\n", is_lhvc ? "lhvC" : "hvcC");
     av_log(NULL, AV_LOG_TRACE,  "configurationVersion:                %"PRIu8"\n",
             hvcc->configurationVersion);
+    if (!is_lhvc) {
     av_log(NULL, AV_LOG_TRACE,  "general_profile_space:               %"PRIu8"\n",
             hvcc->general_profile_space);
     av_log(NULL, AV_LOG_TRACE,  "general_tier_flag:                   %"PRIu8"\n",
@@ -842,10 +907,12 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
             hvcc->general_constraint_indicator_flags);
     av_log(NULL, AV_LOG_TRACE,  "general_level_idc:                   %"PRIu8"\n",
             hvcc->general_level_idc);
+    }
     av_log(NULL, AV_LOG_TRACE,  "min_spatial_segmentation_idc:        %"PRIu16"\n",
             hvcc->min_spatial_segmentation_idc);
     av_log(NULL, AV_LOG_TRACE,  "parallelismType:                     %"PRIu8"\n",
             hvcc->parallelismType);
+    if (!is_lhvc) {
     av_log(NULL, AV_LOG_TRACE,  "chromaFormat:                        %"PRIu8"\n",
             hvcc->chromaFormat);
     av_log(NULL, AV_LOG_TRACE,  "bitDepthLumaMinus8:                  %"PRIu8"\n",
@@ -856,6 +923,7 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
             hvcc->avgFrameRate);
     av_log(NULL, AV_LOG_TRACE,  "constantFrameRate:                   %"PRIu8"\n",
             hvcc->constantFrameRate);
+    }
     av_log(NULL, AV_LOG_TRACE,  "numTemporalLayers:                   %"PRIu8"\n",
             hvcc->numTemporalLayers);
     av_log(NULL, AV_LOG_TRACE,  "temporalIdNested:                    %"PRIu8"\n",
@@ -863,11 +931,11 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
     av_log(NULL, AV_LOG_TRACE,  "lengthSizeMinusOne:                  %"PRIu8"\n",
             hvcc->lengthSizeMinusOne);
     av_log(NULL, AV_LOG_TRACE,  "numOfArrays:                         %"PRIu8"\n",
-            hvcc->numOfArrays);
+            numOfArrays);
     for (unsigned i = 0, j = 0; i < FF_ARRAY_ELEMS(hvcc->arrays); i++) {
         const HVCCNALUnitArray *const array = &hvcc->arrays[i];
 
-        if (array->numNalus == 0)
+        if (numNalus[i] == 0)
             continue;
 
         av_log(NULL, AV_LOG_TRACE, "array_completeness[%u]:               %"PRIu8"\n",
@@ -875,28 +943,31 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
         av_log(NULL, AV_LOG_TRACE, "NAL_unit_type[%u]:                    %"PRIu8"\n",
                j, array->NAL_unit_type);
         av_log(NULL, AV_LOG_TRACE, "numNalus[%u]:                         %"PRIu16"\n",
-               j, array->numNalus);
-        for (unsigned k = 0; k < array->numNalus; k++)
+               j, numNalus[i]);
+        for (unsigned k = 0; k < array->numNalus; k++) {
+            if (is_lhvc && array->nal[k].nuh_layer_id == 0)
+                continue;
+
             av_log(NULL, AV_LOG_TRACE,
                     "nalUnitLength[%u][%u]:                 %"PRIu16"\n",
                    j, k, array->nal[k].nalUnitLength);
+        }
         j++;
     }
 
     /*
      * We need at least one of each: VPS, SPS and PPS.
      */
-    vps_count = hvcc->arrays[VPS_INDEX].numNalus;
-    sps_count = hvcc->arrays[SPS_INDEX].numNalus;
-    pps_count = hvcc->arrays[PPS_INDEX].numNalus;
-    if (!vps_count || vps_count > HEVC_MAX_VPS_COUNT ||
-        !sps_count || sps_count > HEVC_MAX_SPS_COUNT ||
-        !pps_count || pps_count > HEVC_MAX_PPS_COUNT)
+    if ((!numNalus[VPS_INDEX] || numNalus[VPS_INDEX] > HEVC_MAX_VPS_COUNT) && !is_lhvc)
+        return AVERROR_INVALIDDATA;
+    if (!numNalus[SPS_INDEX] || numNalus[SPS_INDEX] > HEVC_MAX_SPS_COUNT ||
+        !numNalus[PPS_INDEX] || numNalus[PPS_INDEX] > HEVC_MAX_PPS_COUNT)
         return AVERROR_INVALIDDATA;
 
     /* unsigned int(8) configurationVersion = 1; */
     avio_w8(pb, hvcc->configurationVersion);
 
+    if (!is_lhvc) {
     /*
      * unsigned int(2) general_profile_space;
      * unsigned int(1) general_tier_flag;
@@ -915,6 +986,7 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
 
     /* unsigned int(8) general_level_idc; */
     avio_w8(pb, hvcc->general_level_idc);
+    }
 
     /*
      * bit(4) reserved = '1111'b;
@@ -928,6 +1000,7 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
      */
     avio_w8(pb, hvcc->parallelismType | 0xfc);
 
+    if (!is_lhvc) {
     /*
      * bit(6) reserved = '111111'b;
      * unsigned int(2) chromaFormat;
@@ -948,9 +1021,13 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
 
     /* bit(16) avgFrameRate; */
     avio_wb16(pb, hvcc->avgFrameRate);
+    }
 
     /*
+     * if (!is_lhvc)
      * bit(2) constantFrameRate;
+     * else
+     *     bit(2) reserved = '11'b;
      * bit(3) numTemporalLayers;
      * bit(1) temporalIdNested;
      * unsigned int(2) lengthSizeMinusOne;
@@ -961,12 +1038,12 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
                 hvcc->lengthSizeMinusOne);
 
     /* unsigned int(8) numOfArrays; */
-    avio_w8(pb, hvcc->numOfArrays);
+    avio_w8(pb, numOfArrays);
 
     for (unsigned i = 0; i < FF_ARRAY_ELEMS(hvcc->arrays); i++) {
         const HVCCNALUnitArray *const array = &hvcc->arrays[i];
 
-        if (!array->numNalus)
+        if (!numNalus[i])
             continue;
         /*
          * bit(1) array_completeness;
@@ -977,11 +1054,14 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc)
                     array->NAL_unit_type & 0x3f);
 
         /* unsigned int(16) numNalus; */
-        avio_wb16(pb, array->numNalus);
+        avio_wb16(pb, numNalus[i]);
 
         for (unsigned j = 0; j < array->numNalus; j++) {
             HVCCNALUnit *nal = &array->nal[j];
 
+            if (is_lhvc && nal->nuh_layer_id == 0)
+                continue;
+
             /* unsigned int(16) nalUnitLength; */
             avio_wb16(pb, nal->nalUnitLength);
 
@@ -1082,12 +1162,11 @@ static int hvcc_parse_nal_unit(const uint8_t *buf, uint32_t len, int type,
     return 0;
 }
 
-int ff_isom_write_hvcc(AVIOContext *pb, const uint8_t *data,
-                       int size, int ps_array_completeness)
+static int write_configuration_record(AVIOContext *pb, const uint8_t *data,
+                                      int size, int flags)
 {
     HEVCDecoderConfigurationRecord hvcc;
     uint8_t *buf, *end, *start = NULL;
-    int flags = !!ps_array_completeness * FLAG_ARRAY_COMPLETENESS;
     int ret;
 
     if (size < 6) {
@@ -1154,7 +1233,7 @@ int ff_isom_write_hvcc(AVIOContext *pb, const uint8_t *data,
             }
         }
 
-        ret = hvcc_write(pb, &hvcc);
+        ret = hvcc_write(pb, &hvcc, flags);
         goto end;
     } else if (!(AV_RB24(data) == 1 || AV_RB32(data) == 1)) {
         /* Not a valid Annex B start code prefix */
@@ -1183,10 +1262,24 @@ int ff_isom_write_hvcc(AVIOContext *pb, const uint8_t *data,
         buf += len;
     }
 
-    ret = hvcc_write(pb, &hvcc);
+    ret = hvcc_write(pb, &hvcc, flags);
 
 end:
     hvcc_close(&hvcc);
     av_free(start);
     return ret;
 }
+
+int ff_isom_write_hvcc(AVIOContext *pb, const uint8_t *data,
+                       int size, int ps_array_completeness)
+{
+    return write_configuration_record(pb, data, size,
+                                      !!ps_array_completeness * FLAG_ARRAY_COMPLETENESS);
+}
+
+int ff_isom_write_lhvc(AVIOContext *pb, const uint8_t *data,
+                       int size, int ps_array_completeness)
+{
+    return write_configuration_record(pb, data, size,
+                                      (!!ps_array_completeness * FLAG_ARRAY_COMPLETENESS) | FLAG_IS_LHVC);
+}
diff --git a/libavformat/hevc.h b/libavformat/hevc.h
index cb66ac66ac..12000b25a5 100644
--- a/libavformat/hevc.h
+++ b/libavformat/hevc.h
@@ -97,4 +97,21 @@ int ff_hevc_annexb2mp4_buf(const uint8_t *buf_in, uint8_t **buf_out,
 int ff_isom_write_hvcc(AVIOContext *pb, const uint8_t *data,
                        int size, int ps_array_completeness);
 
+/**
+ * Writes L-HEVC extradata (parameter sets with nuh_layer_id > 0, as a
+ * LHEVCDecoderConfigurationRecord) to the provided AVIOContext.
+ *
+ * If the extradata is Annex B format, it gets converted to lhvC format before
+ * writing. Otherwise, hvcC formated extradata is expected, not lhvC.
+ *
+ * @param pb address of the AVIOContext where the lhvC shall be written
+ * @param data address of the buffer holding the data needed to write the lhvC
+ * @param size size (in bytes) of the data buffer
+ * @param ps_array_completeness whether all parameter sets are in the lhvC (1)
+ *        or there may be additional parameter sets in the bitstream (0)
+ * @return >=0 in case of success, a negative value corresponding to an AVERROR
+ *         code in case of failure
+ */
+int ff_isom_write_lhvc(AVIOContext *pb, const uint8_t *data,
+                       int size, int ps_array_completeness);
 #endif /* AVFORMAT_HEVC_H */