From f282c34c009e3653ec160c3880e64fc1a9300d0e Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Thu, 30 Sep 2021 14:18:22 +0200
Subject: [PATCH] avcodec/amr*dec: add multichannel support

---
 libavcodec/amr_parser.c | 44 +++++++++++++++++++-------
 libavcodec/amrnbdec.c   | 54 ++++++++++++++++++++-----------
 libavcodec/amrwbdec.c   | 59 +++++++++++++++++++++-------------
 libavformat/amr.c       | 70 ++++++++++++++++++++++++++++-------------
 4 files changed, 154 insertions(+), 73 deletions(-)

diff --git a/libavcodec/amr_parser.c b/libavcodec/amr_parser.c
index 2659cb40d7..79258d4d0c 100644
--- a/libavcodec/amr_parser.c
+++ b/libavcodec/amr_parser.c
@@ -39,9 +39,17 @@ typedef struct AMRParseContext {
     ParseContext pc;
     uint64_t cumulated_size;
     uint64_t block_count;
+    int current_channel;
     int remaining;
 } AMRParseContext;
 
+static av_cold int amr_parse_init(AVCodecParserContext *s1)
+{
+    AMRParseContext *s = s1->priv_data;
+    s->remaining = -1;
+    return 0;
+}
+
 static int amr_parse(AVCodecParserContext *s1,
                      AVCodecContext *avctx,
                      const uint8_t **poutbuf, int *poutbuf_size,
@@ -57,21 +65,34 @@ static int amr_parse(AVCodecParserContext *s1,
     if (s1->flags & PARSER_FLAG_COMPLETE_FRAMES) {
         next = buf_size;
     } else {
-        if (s->remaining) {
-            next = s->remaining;
-        } else {
-            int mode = (buf[0] >> 3) & 0x0F;
+        int ch, offset = 0;
 
-            if (avctx->codec_id == AV_CODEC_ID_AMR_NB) {
-                next = amrnb_packed_size[mode];
-            } else if (avctx->codec_id == AV_CODEC_ID_AMR_WB) {
-                next = amrwb_packed_size[mode];
+        for (ch = s->current_channel; ch < avctx->channels; ch++) {
+            if (s->remaining >= 0) {
+                next = s->remaining;
+            } else {
+                int mode = (buf[offset] >> 3) & 0x0F;
+
+                if (avctx->codec_id == AV_CODEC_ID_AMR_NB) {
+                    next = amrnb_packed_size[mode];
+                } else if (avctx->codec_id == AV_CODEC_ID_AMR_WB) {
+                    next = amrwb_packed_size[mode];
+                }
+            }
+
+            offset += next;
+            if (offset >= buf_size) {
+                s->remaining = offset - buf_size;
+                next = END_NOT_FOUND;
+                break;
+            } else {
+                s->remaining = -1;
             }
         }
 
-        s->remaining = next - FFMIN(buf_size, next);
-        if (s->remaining)
-            next = END_NOT_FOUND;
+        s->current_channel = ch % avctx->channels;
+        if (s->remaining < 0)
+            next = offset;
 
         if (next != END_NOT_FOUND) {
             if (s->cumulated_size < UINT64_MAX - next) {
@@ -98,6 +119,7 @@ static int amr_parse(AVCodecParserContext *s1,
 const AVCodecParser ff_amr_parser = {
     .codec_ids      = { AV_CODEC_ID_AMR_NB, AV_CODEC_ID_AMR_WB },
     .priv_data_size = sizeof(AMRParseContext),
+    .parser_init    = amr_parse_init,
     .parser_parse   = amr_parse,
     .parser_close   = ff_parse_close,
 };
diff --git a/libavcodec/amrnbdec.c b/libavcodec/amrnbdec.c
index e366a09976..472fa85f87 100644
--- a/libavcodec/amrnbdec.c
+++ b/libavcodec/amrnbdec.c
@@ -145,6 +145,10 @@ typedef struct AMRContext {
 
 } AMRContext;
 
+typedef struct AMRChannelsContext {
+    AMRContext ch[2];
+} AMRChannelsContext;
+
 /** Double version of ff_weighted_vector_sumf() */
 static void weighted_vector_sumd(double *out, const double *in_a,
                                  const double *in_b, double weight_coeff_a,
@@ -159,20 +163,24 @@ static void weighted_vector_sumd(double *out, const double *in_a,
 
 static av_cold int amrnb_decode_init(AVCodecContext *avctx)
 {
-    AMRContext *p = avctx->priv_data;
+    AMRChannelsContext *s = avctx->priv_data;
     int i;
 
-    if (avctx->channels > 1) {
-        avpriv_report_missing_feature(avctx, "multi-channel AMR");
+    if (avctx->channels > 2) {
+        avpriv_report_missing_feature(avctx, ">2 channel AMR");
         return AVERROR_PATCHWELCOME;
     }
 
-    avctx->channels       = 1;
-    avctx->channel_layout = AV_CH_LAYOUT_MONO;
+    if (!avctx->channels) {
+        avctx->channels       = 1;
+        avctx->channel_layout = AV_CH_LAYOUT_MONO;
+    }
     if (!avctx->sample_rate)
         avctx->sample_rate = 8000;
-    avctx->sample_fmt     = AV_SAMPLE_FMT_FLT;
+    avctx->sample_fmt     = AV_SAMPLE_FMT_FLTP;
 
+    for (int ch = 0; ch < avctx->channels; ch++) {
+        AMRContext *p = &s->ch[ch];
     // p->excitation always points to the same position in p->excitation_buf
     p->excitation = &p->excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER + 1];
 
@@ -188,6 +196,7 @@ static av_cold int amrnb_decode_init(AVCodecContext *avctx)
     ff_acelp_vectors_init(&p->acelpv_ctx);
     ff_celp_filter_init(&p->celpf_ctx);
     ff_celp_math_init(&p->celpm_ctx);
+    }
 
     return 0;
 }
@@ -949,25 +958,30 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
                               int *got_frame_ptr, AVPacket *avpkt)
 {
 
-    AMRContext *p = avctx->priv_data;        // pointer to private data
+    AMRChannelsContext *s = avctx->priv_data;        // pointer to private data
     AVFrame *frame     = data;
     const uint8_t *buf = avpkt->data;
     int buf_size       = avpkt->size;
-    float *buf_out;                          // pointer to the output data buffer
-    int i, subframe, ret;
-    float fixed_gain_factor;
-    AMRFixed fixed_sparse = {0};             // fixed vector up to anti-sparseness processing
-    float spare_vector[AMR_SUBFRAME_SIZE];   // extra stack space to hold result from anti-sparseness processing
-    float synth_fixed_gain;                  // the fixed gain that synthesis should use
-    const float *synth_fixed_vector;         // pointer to the fixed vector that synthesis should use
+    int ret;
 
     /* get output buffer */
     frame->nb_samples = AMR_BLOCK_SIZE;
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    buf_out = (float *)frame->data[0];
+
+    for (int ch = 0; ch < avctx->channels; ch++) {
+        AMRContext *p = &s->ch[ch];
+        float fixed_gain_factor;
+        AMRFixed fixed_sparse = {0};             // fixed vector up to anti-sparseness processing
+        float spare_vector[AMR_SUBFRAME_SIZE];   // extra stack space to hold result from anti-sparseness processing
+        float synth_fixed_gain;                  // the fixed gain that synthesis should use
+        const float *synth_fixed_vector;         // pointer to the fixed vector that synthesis should use
+        float *buf_out = (float *)frame->extended_data[ch];
+        int channel_size;
+        int i, subframe;
 
     p->cur_frame_mode = unpack_bitstream(p, buf, buf_size);
+        channel_size = frame_sizes_nb[p->cur_frame_mode] + 1; // +7 for rounding and +8 for TOC
     if (p->cur_frame_mode == NO_DATA) {
         av_log(avctx, AV_LOG_ERROR, "Corrupt bitstream\n");
         return AVERROR_INVALIDDATA;
@@ -1072,11 +1086,13 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
      * qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */
     p->acelpv_ctx.weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3],
                             0.84, 0.16, LP_FILTER_ORDER);
+        buf += channel_size;
+        buf_size -= channel_size;
+    }
 
     *got_frame_ptr = 1;
 
-    /* return the amount of bytes consumed if everything was OK */
-    return frame_sizes_nb[p->cur_frame_mode] + 1; // +7 for rounding and +8 for TOC
+    return avpkt->size;
 }
 
 
@@ -1085,10 +1101,10 @@ const AVCodec ff_amrnb_decoder = {
     .long_name      = NULL_IF_CONFIG_SMALL("AMR-NB (Adaptive Multi-Rate NarrowBand)"),
     .type           = AVMEDIA_TYPE_AUDIO,
     .id             = AV_CODEC_ID_AMR_NB,
-    .priv_data_size = sizeof(AMRContext),
+    .priv_data_size = sizeof(AMRChannelsContext),
     .init           = amrnb_decode_init,
     .decode         = amrnb_decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF,
-    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
+    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
                                                      AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
index a6c1d894d7..998dd82791 100644
--- a/libavcodec/amrwbdec.c
+++ b/libavcodec/amrwbdec.c
@@ -93,21 +93,30 @@ typedef struct AMRWBContext {
 
 } AMRWBContext;
 
+typedef struct AMRWBChannelsContext {
+    AMRWBContext ch[2];
+} AMRWBChannelsContext;
+
 static av_cold int amrwb_decode_init(AVCodecContext *avctx)
 {
-    AMRWBContext *ctx = avctx->priv_data;
+    AMRWBChannelsContext *s = avctx->priv_data;
     int i;
 
-    if (avctx->channels > 1) {
-        avpriv_report_missing_feature(avctx, "multi-channel AMR");
+    if (avctx->channels > 2) {
+        avpriv_report_missing_feature(avctx, ">2 channel AMR");
         return AVERROR_PATCHWELCOME;
     }
 
-    avctx->channels       = 1;
-    avctx->channel_layout = AV_CH_LAYOUT_MONO;
+    if (!avctx->channels) {
+        avctx->channels       = 1;
+        avctx->channel_layout = AV_CH_LAYOUT_MONO;
+    }
     if (!avctx->sample_rate)
         avctx->sample_rate = 16000;
-    avctx->sample_fmt     = AV_SAMPLE_FMT_FLT;
+    avctx->sample_fmt     = AV_SAMPLE_FMT_FLTP;
+
+    for (int ch = 0; ch < avctx->channels; ch++) {
+    AMRWBContext *ctx = &s->ch[ch];
 
     av_lfg_init(&ctx->prng, 1);
 
@@ -124,6 +133,7 @@ static av_cold int amrwb_decode_init(AVCodecContext *avctx)
     ff_acelp_vectors_init(&ctx->acelpv_ctx);
     ff_celp_filter_init(&ctx->celpf_ctx);
     ff_celp_math_init(&ctx->celpm_ctx);
+    }
 
     return 0;
 }
@@ -1094,13 +1104,21 @@ static void update_sub_state(AMRWBContext *ctx)
 static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
                               int *got_frame_ptr, AVPacket *avpkt)
 {
-    AMRWBContext *ctx  = avctx->priv_data;
+    AMRWBChannelsContext *s  = avctx->priv_data;
     AVFrame *frame     = data;
-    AMRWBFrame   *cf   = &ctx->frame;
     const uint8_t *buf = avpkt->data;
     int buf_size       = avpkt->size;
+    int sub, i, ret;
+
+    /* get output buffer */
+    frame->nb_samples = 4 * AMRWB_SFR_SIZE_16k;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    for (int ch = 0; ch < avctx->channels; ch++) {
+    AMRWBContext *ctx  = &s->ch[ch];
+    AMRWBFrame   *cf   = &ctx->frame;
     int expected_fr_size, header_size;
-    float *buf_out;
     float spare_vector[AMRWB_SFR_SIZE];      // extra stack space to hold result from anti-sparseness processing
     float fixed_gain_factor;                 // fixed gain correction factor (gamma)
     float *synth_fixed_vector;               // pointer to the fixed vector that synthesis should use
@@ -1110,13 +1128,7 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
     float hb_exc[AMRWB_SFR_SIZE_16k];        // excitation for the high frequency band
     float hb_samples[AMRWB_SFR_SIZE_16k];    // filtered high-band samples from synthesis
     float hb_gain;
-    int sub, i, ret;
-
-    /* get output buffer */
-    frame->nb_samples = 4 * AMRWB_SFR_SIZE_16k;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
-        return ret;
-    buf_out = (float *)frame->data[0];
+    float *buf_out = (float *)frame->extended_data[ch];
 
     header_size      = decode_mime_header(ctx, buf);
     expected_fr_size = ((cf_sizes_wb[ctx->fr_cur_mode] + 7) >> 3) + 1;
@@ -1127,9 +1139,10 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
     if (ctx->fr_cur_mode == NO_DATA || !ctx->fr_quality) {
         /* The specification suggests a "random signal" and
            "a muting technique" to "gradually decrease the output level". */
-        av_samples_set_silence(&frame->data[0], 0, frame->nb_samples, 1, AV_SAMPLE_FMT_FLT);
-        *got_frame_ptr = 1;
-        return expected_fr_size;
+        av_samples_set_silence(&frame->extended_data[ch], 0, frame->nb_samples, 1, AV_SAMPLE_FMT_FLT);
+        buf += expected_fr_size;
+        buf_size -= expected_fr_size;
+        continue;
     }
     if (ctx->fr_cur_mode > MODE_SID) {
         av_log(avctx, AV_LOG_ERROR,
@@ -1270,9 +1283,13 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
     memcpy(ctx->isp_sub4_past, ctx->isp[3], LP_ORDER * sizeof(ctx->isp[3][0]));
     memcpy(ctx->isf_past_final, ctx->isf_cur, LP_ORDER * sizeof(float));
 
+        buf += expected_fr_size;
+        buf_size -= expected_fr_size;
+    }
+
     *got_frame_ptr = 1;
 
-    return expected_fr_size;
+    return avpkt->size;
 }
 
 const AVCodec ff_amrwb_decoder = {
@@ -1280,7 +1297,7 @@ const AVCodec ff_amrwb_decoder = {
     .long_name      = NULL_IF_CONFIG_SMALL("AMR-WB (Adaptive Multi-Rate WideBand)"),
     .type           = AVMEDIA_TYPE_AUDIO,
     .id             = AV_CODEC_ID_AMR_WB,
-    .priv_data_size = sizeof(AMRWBContext),
+    .priv_data_size = sizeof(AMRWBChannelsContext),
     .init           = amrwb_decode_init,
     .decode         = amrwb_decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF,
diff --git a/libavformat/amr.c b/libavformat/amr.c
index 8e79caee18..0a044d69af 100644
--- a/libavformat/amr.c
+++ b/libavformat/amr.c
@@ -21,13 +21,12 @@
 
 /*
 Write and read amr data according to RFC3267, http://www.ietf.org/rfc/rfc3267.txt?number=3267
-
-Only mono files are supported.
-
 */
 
 #include "libavutil/channel_layout.h"
+#include "libavutil/intreadwrite.h"
 #include "avformat.h"
+#include "avio_internal.h"
 #include "internal.h"
 #include "rawdec.h"
 #include "rawenc.h"
@@ -36,8 +35,10 @@ typedef struct AMRContext {
     FFRawDemuxerContext rawctx;
 } AMRContext;
 
-static const char AMR_header[]   = "#!AMR\n";
-static const char AMRWB_header[] = "#!AMR-WB\n";
+static const uint8_t AMR_header[6]      = "#!AMR\x0a";
+static const uint8_t AMRMC_header[12]   = "#!AMR_MC1.0\x0a";
+static const uint8_t AMRWB_header[9]    = "#!AMR-WB\x0a";
+static const uint8_t AMRWBMC_header[15] = "#!AMR-WB_MC1.0\x0a";
 
 static const uint8_t amrnb_packed_size[16] = {
     13, 14, 16, 18, 20, 21, 27, 32, 6, 1, 1, 1, 1, 1, 1, 1
@@ -69,7 +70,7 @@ static int amr_probe(const AVProbeData *p)
 {
     // Only check for "#!AMR" which could be amr-wb, amr-nb.
     // This will also trigger multichannel files: "#!AMR_MC1.0\n" and
-    // "#!AMR-WB_MC1.0\n" (not supported)
+    // "#!AMR-WB_MC1.0\n"
 
     if (!memcmp(p->buf, AMR_header, 5))
         return AVPROBE_SCORE_MAX;
@@ -82,35 +83,60 @@ static int amr_read_header(AVFormatContext *s)
 {
     AVIOContext *pb = s->pb;
     AVStream *st;
-    uint8_t header[9];
+    uint8_t header[19] = { 0 };
+    int read, back = 0, ret;
 
-    if (avio_read(pb, header, 6) != 6)
-        return AVERROR_INVALIDDATA;
+    ret = ffio_ensure_seekback(s->pb, sizeof(header));
+    if (ret < 0)
+        return ret;
+
+    read = avio_read(pb, header, sizeof(header));
+    if (read < 0)
+        return ret;
 
     st = avformat_new_stream(s, NULL);
     if (!st)
         return AVERROR(ENOMEM);
-    if (memcmp(header, AMR_header, 6)) {
-        if (avio_read(pb, header + 6, 3) != 3)
-            return AVERROR_INVALIDDATA;
-        if (memcmp(header, AMRWB_header, 9)) {
-            return -1;
-        }
-
-        st->codecpar->codec_tag   = MKTAG('s', 'a', 'w', 'b');
-        st->codecpar->codec_id    = AV_CODEC_ID_AMR_WB;
-        st->codecpar->sample_rate = 16000;
-    } else {
+    if (!memcmp(header, AMR_header, sizeof(AMR_header))) {
         st->codecpar->codec_tag   = MKTAG('s', 'a', 'm', 'r');
         st->codecpar->codec_id    = AV_CODEC_ID_AMR_NB;
         st->codecpar->sample_rate = 8000;
+        st->codecpar->channels = 1;
+        st->codecpar->channel_layout = AV_CH_LAYOUT_MONO;
+        back = read - sizeof(AMR_header);
+    } else if (!memcmp(header, AMRWB_header, sizeof(AMRWB_header))) {
+        st->codecpar->codec_tag   = MKTAG('s', 'a', 'w', 'b');
+        st->codecpar->codec_id    = AV_CODEC_ID_AMR_WB;
+        st->codecpar->sample_rate = 16000;
+        st->codecpar->channels = 1;
+        st->codecpar->channel_layout = AV_CH_LAYOUT_MONO;
+        back = read - sizeof(AMRWB_header);
+    } else if (!memcmp(header, AMRMC_header, sizeof(AMRMC_header))) {
+        st->codecpar->codec_tag   = MKTAG('s', 'a', 'm', 'r');
+        st->codecpar->codec_id    = AV_CODEC_ID_AMR_NB;
+        st->codecpar->sample_rate = 8000;
+        st->codecpar->channels    = AV_RL32(header + 12);
+        back = read - 4 - sizeof(AMRMC_header);
+    } else if (!memcmp(header, AMRWBMC_header, sizeof(AMRWBMC_header))) {
+        st->codecpar->codec_tag   = MKTAG('s', 'a', 'w', 'b');
+        st->codecpar->codec_id    = AV_CODEC_ID_AMR_WB;
+        st->codecpar->sample_rate = 16000;
+        st->codecpar->channels    = AV_RL32(header + 15);
+        back = read - 4 - sizeof(AMRWBMC_header);
+    } else {
+        return AVERROR_INVALIDDATA;
     }
-    st->codecpar->channels   = 1;
-    st->codecpar->channel_layout = AV_CH_LAYOUT_MONO;
+
+    if (st->codecpar->channels < 1)
+        return AVERROR_INVALIDDATA;
+
     st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
     ffstream(st)->need_parsing = AVSTREAM_PARSE_FULL_RAW;
     avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
 
+    if (back > 0)
+        avio_seek(pb, -back, SEEK_CUR);
+
     return 0;
 }