From f282c34c009e3653ec160c3880e64fc1a9300d0e Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Thu, 30 Sep 2021 14:18:22 +0200 Subject: [PATCH] avcodec/amr*dec: add multichannel support --- libavcodec/amr_parser.c | 44 +++++++++++++++++++------- libavcodec/amrnbdec.c | 54 ++++++++++++++++++++----------- libavcodec/amrwbdec.c | 59 +++++++++++++++++++++------------- libavformat/amr.c | 70 ++++++++++++++++++++++++++++------------- 4 files changed, 154 insertions(+), 73 deletions(-) diff --git a/libavcodec/amr_parser.c b/libavcodec/amr_parser.c index 2659cb40d7..79258d4d0c 100644 --- a/libavcodec/amr_parser.c +++ b/libavcodec/amr_parser.c @@ -39,9 +39,17 @@ typedef struct AMRParseContext { ParseContext pc; uint64_t cumulated_size; uint64_t block_count; + int current_channel; int remaining; } AMRParseContext; +static av_cold int amr_parse_init(AVCodecParserContext *s1) +{ + AMRParseContext *s = s1->priv_data; + s->remaining = -1; + return 0; +} + static int amr_parse(AVCodecParserContext *s1, AVCodecContext *avctx, const uint8_t **poutbuf, int *poutbuf_size, @@ -57,21 +65,34 @@ static int amr_parse(AVCodecParserContext *s1, if (s1->flags & PARSER_FLAG_COMPLETE_FRAMES) { next = buf_size; } else { - if (s->remaining) { - next = s->remaining; - } else { - int mode = (buf[0] >> 3) & 0x0F; + int ch, offset = 0; - if (avctx->codec_id == AV_CODEC_ID_AMR_NB) { - next = amrnb_packed_size[mode]; - } else if (avctx->codec_id == AV_CODEC_ID_AMR_WB) { - next = amrwb_packed_size[mode]; + for (ch = s->current_channel; ch < avctx->channels; ch++) { + if (s->remaining >= 0) { + next = s->remaining; + } else { + int mode = (buf[offset] >> 3) & 0x0F; + + if (avctx->codec_id == AV_CODEC_ID_AMR_NB) { + next = amrnb_packed_size[mode]; + } else if (avctx->codec_id == AV_CODEC_ID_AMR_WB) { + next = amrwb_packed_size[mode]; + } + } + + offset += next; + if (offset >= buf_size) { + s->remaining = offset - buf_size; + next = END_NOT_FOUND; + break; + } else { + s->remaining = -1; } } - s->remaining = next - FFMIN(buf_size, next); - if (s->remaining) - next = END_NOT_FOUND; + s->current_channel = ch % avctx->channels; + if (s->remaining < 0) + next = offset; if (next != END_NOT_FOUND) { if (s->cumulated_size < UINT64_MAX - next) { @@ -98,6 +119,7 @@ static int amr_parse(AVCodecParserContext *s1, const AVCodecParser ff_amr_parser = { .codec_ids = { AV_CODEC_ID_AMR_NB, AV_CODEC_ID_AMR_WB }, .priv_data_size = sizeof(AMRParseContext), + .parser_init = amr_parse_init, .parser_parse = amr_parse, .parser_close = ff_parse_close, }; diff --git a/libavcodec/amrnbdec.c b/libavcodec/amrnbdec.c index e366a09976..472fa85f87 100644 --- a/libavcodec/amrnbdec.c +++ b/libavcodec/amrnbdec.c @@ -145,6 +145,10 @@ typedef struct AMRContext { } AMRContext; +typedef struct AMRChannelsContext { + AMRContext ch[2]; +} AMRChannelsContext; + /** Double version of ff_weighted_vector_sumf() */ static void weighted_vector_sumd(double *out, const double *in_a, const double *in_b, double weight_coeff_a, @@ -159,20 +163,24 @@ static void weighted_vector_sumd(double *out, const double *in_a, static av_cold int amrnb_decode_init(AVCodecContext *avctx) { - AMRContext *p = avctx->priv_data; + AMRChannelsContext *s = avctx->priv_data; int i; - if (avctx->channels > 1) { - avpriv_report_missing_feature(avctx, "multi-channel AMR"); + if (avctx->channels > 2) { + avpriv_report_missing_feature(avctx, ">2 channel AMR"); return AVERROR_PATCHWELCOME; } - avctx->channels = 1; - avctx->channel_layout = AV_CH_LAYOUT_MONO; + if (!avctx->channels) { + avctx->channels = 1; + avctx->channel_layout = AV_CH_LAYOUT_MONO; + } if (!avctx->sample_rate) avctx->sample_rate = 8000; - avctx->sample_fmt = AV_SAMPLE_FMT_FLT; + avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; + for (int ch = 0; ch < avctx->channels; ch++) { + AMRContext *p = &s->ch[ch]; // p->excitation always points to the same position in p->excitation_buf p->excitation = &p->excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER + 1]; @@ -188,6 +196,7 @@ static av_cold int amrnb_decode_init(AVCodecContext *avctx) ff_acelp_vectors_init(&p->acelpv_ctx); ff_celp_filter_init(&p->celpf_ctx); ff_celp_math_init(&p->celpm_ctx); + } return 0; } @@ -949,25 +958,30 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr, AVPacket *avpkt) { - AMRContext *p = avctx->priv_data; // pointer to private data + AMRChannelsContext *s = avctx->priv_data; // pointer to private data AVFrame *frame = data; const uint8_t *buf = avpkt->data; int buf_size = avpkt->size; - float *buf_out; // pointer to the output data buffer - int i, subframe, ret; - float fixed_gain_factor; - AMRFixed fixed_sparse = {0}; // fixed vector up to anti-sparseness processing - float spare_vector[AMR_SUBFRAME_SIZE]; // extra stack space to hold result from anti-sparseness processing - float synth_fixed_gain; // the fixed gain that synthesis should use - const float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use + int ret; /* get output buffer */ frame->nb_samples = AMR_BLOCK_SIZE; if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) return ret; - buf_out = (float *)frame->data[0]; + + for (int ch = 0; ch < avctx->channels; ch++) { + AMRContext *p = &s->ch[ch]; + float fixed_gain_factor; + AMRFixed fixed_sparse = {0}; // fixed vector up to anti-sparseness processing + float spare_vector[AMR_SUBFRAME_SIZE]; // extra stack space to hold result from anti-sparseness processing + float synth_fixed_gain; // the fixed gain that synthesis should use + const float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use + float *buf_out = (float *)frame->extended_data[ch]; + int channel_size; + int i, subframe; p->cur_frame_mode = unpack_bitstream(p, buf, buf_size); + channel_size = frame_sizes_nb[p->cur_frame_mode] + 1; // +7 for rounding and +8 for TOC if (p->cur_frame_mode == NO_DATA) { av_log(avctx, AV_LOG_ERROR, "Corrupt bitstream\n"); return AVERROR_INVALIDDATA; @@ -1072,11 +1086,13 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data, * qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */ p->acelpv_ctx.weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3], 0.84, 0.16, LP_FILTER_ORDER); + buf += channel_size; + buf_size -= channel_size; + } *got_frame_ptr = 1; - /* return the amount of bytes consumed if everything was OK */ - return frame_sizes_nb[p->cur_frame_mode] + 1; // +7 for rounding and +8 for TOC + return avpkt->size; } @@ -1085,10 +1101,10 @@ const AVCodec ff_amrnb_decoder = { .long_name = NULL_IF_CONFIG_SMALL("AMR-NB (Adaptive Multi-Rate NarrowBand)"), .type = AVMEDIA_TYPE_AUDIO, .id = AV_CODEC_ID_AMR_NB, - .priv_data_size = sizeof(AMRContext), + .priv_data_size = sizeof(AMRChannelsContext), .init = amrnb_decode_init, .decode = amrnb_decode_frame, .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF, - .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT, + .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE }, }; diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c index a6c1d894d7..998dd82791 100644 --- a/libavcodec/amrwbdec.c +++ b/libavcodec/amrwbdec.c @@ -93,21 +93,30 @@ typedef struct AMRWBContext { } AMRWBContext; +typedef struct AMRWBChannelsContext { + AMRWBContext ch[2]; +} AMRWBChannelsContext; + static av_cold int amrwb_decode_init(AVCodecContext *avctx) { - AMRWBContext *ctx = avctx->priv_data; + AMRWBChannelsContext *s = avctx->priv_data; int i; - if (avctx->channels > 1) { - avpriv_report_missing_feature(avctx, "multi-channel AMR"); + if (avctx->channels > 2) { + avpriv_report_missing_feature(avctx, ">2 channel AMR"); return AVERROR_PATCHWELCOME; } - avctx->channels = 1; - avctx->channel_layout = AV_CH_LAYOUT_MONO; + if (!avctx->channels) { + avctx->channels = 1; + avctx->channel_layout = AV_CH_LAYOUT_MONO; + } if (!avctx->sample_rate) avctx->sample_rate = 16000; - avctx->sample_fmt = AV_SAMPLE_FMT_FLT; + avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; + + for (int ch = 0; ch < avctx->channels; ch++) { + AMRWBContext *ctx = &s->ch[ch]; av_lfg_init(&ctx->prng, 1); @@ -124,6 +133,7 @@ static av_cold int amrwb_decode_init(AVCodecContext *avctx) ff_acelp_vectors_init(&ctx->acelpv_ctx); ff_celp_filter_init(&ctx->celpf_ctx); ff_celp_math_init(&ctx->celpm_ctx); + } return 0; } @@ -1094,13 +1104,21 @@ static void update_sub_state(AMRWBContext *ctx) static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr, AVPacket *avpkt) { - AMRWBContext *ctx = avctx->priv_data; + AMRWBChannelsContext *s = avctx->priv_data; AVFrame *frame = data; - AMRWBFrame *cf = &ctx->frame; const uint8_t *buf = avpkt->data; int buf_size = avpkt->size; + int sub, i, ret; + + /* get output buffer */ + frame->nb_samples = 4 * AMRWB_SFR_SIZE_16k; + if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) + return ret; + + for (int ch = 0; ch < avctx->channels; ch++) { + AMRWBContext *ctx = &s->ch[ch]; + AMRWBFrame *cf = &ctx->frame; int expected_fr_size, header_size; - float *buf_out; float spare_vector[AMRWB_SFR_SIZE]; // extra stack space to hold result from anti-sparseness processing float fixed_gain_factor; // fixed gain correction factor (gamma) float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use @@ -1110,13 +1128,7 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, float hb_exc[AMRWB_SFR_SIZE_16k]; // excitation for the high frequency band float hb_samples[AMRWB_SFR_SIZE_16k]; // filtered high-band samples from synthesis float hb_gain; - int sub, i, ret; - - /* get output buffer */ - frame->nb_samples = 4 * AMRWB_SFR_SIZE_16k; - if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) - return ret; - buf_out = (float *)frame->data[0]; + float *buf_out = (float *)frame->extended_data[ch]; header_size = decode_mime_header(ctx, buf); expected_fr_size = ((cf_sizes_wb[ctx->fr_cur_mode] + 7) >> 3) + 1; @@ -1127,9 +1139,10 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, if (ctx->fr_cur_mode == NO_DATA || !ctx->fr_quality) { /* The specification suggests a "random signal" and "a muting technique" to "gradually decrease the output level". */ - av_samples_set_silence(&frame->data[0], 0, frame->nb_samples, 1, AV_SAMPLE_FMT_FLT); - *got_frame_ptr = 1; - return expected_fr_size; + av_samples_set_silence(&frame->extended_data[ch], 0, frame->nb_samples, 1, AV_SAMPLE_FMT_FLT); + buf += expected_fr_size; + buf_size -= expected_fr_size; + continue; } if (ctx->fr_cur_mode > MODE_SID) { av_log(avctx, AV_LOG_ERROR, @@ -1270,9 +1283,13 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, memcpy(ctx->isp_sub4_past, ctx->isp[3], LP_ORDER * sizeof(ctx->isp[3][0])); memcpy(ctx->isf_past_final, ctx->isf_cur, LP_ORDER * sizeof(float)); + buf += expected_fr_size; + buf_size -= expected_fr_size; + } + *got_frame_ptr = 1; - return expected_fr_size; + return avpkt->size; } const AVCodec ff_amrwb_decoder = { @@ -1280,7 +1297,7 @@ const AVCodec ff_amrwb_decoder = { .long_name = NULL_IF_CONFIG_SMALL("AMR-WB (Adaptive Multi-Rate WideBand)"), .type = AVMEDIA_TYPE_AUDIO, .id = AV_CODEC_ID_AMR_WB, - .priv_data_size = sizeof(AMRWBContext), + .priv_data_size = sizeof(AMRWBChannelsContext), .init = amrwb_decode_init, .decode = amrwb_decode_frame, .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF, diff --git a/libavformat/amr.c b/libavformat/amr.c index 8e79caee18..0a044d69af 100644 --- a/libavformat/amr.c +++ b/libavformat/amr.c @@ -21,13 +21,12 @@ /* Write and read amr data according to RFC3267, http://www.ietf.org/rfc/rfc3267.txt?number=3267 - -Only mono files are supported. - */ #include "libavutil/channel_layout.h" +#include "libavutil/intreadwrite.h" #include "avformat.h" +#include "avio_internal.h" #include "internal.h" #include "rawdec.h" #include "rawenc.h" @@ -36,8 +35,10 @@ typedef struct AMRContext { FFRawDemuxerContext rawctx; } AMRContext; -static const char AMR_header[] = "#!AMR\n"; -static const char AMRWB_header[] = "#!AMR-WB\n"; +static const uint8_t AMR_header[6] = "#!AMR\x0a"; +static const uint8_t AMRMC_header[12] = "#!AMR_MC1.0\x0a"; +static const uint8_t AMRWB_header[9] = "#!AMR-WB\x0a"; +static const uint8_t AMRWBMC_header[15] = "#!AMR-WB_MC1.0\x0a"; static const uint8_t amrnb_packed_size[16] = { 13, 14, 16, 18, 20, 21, 27, 32, 6, 1, 1, 1, 1, 1, 1, 1 @@ -69,7 +70,7 @@ static int amr_probe(const AVProbeData *p) { // Only check for "#!AMR" which could be amr-wb, amr-nb. // This will also trigger multichannel files: "#!AMR_MC1.0\n" and - // "#!AMR-WB_MC1.0\n" (not supported) + // "#!AMR-WB_MC1.0\n" if (!memcmp(p->buf, AMR_header, 5)) return AVPROBE_SCORE_MAX; @@ -82,35 +83,60 @@ static int amr_read_header(AVFormatContext *s) { AVIOContext *pb = s->pb; AVStream *st; - uint8_t header[9]; + uint8_t header[19] = { 0 }; + int read, back = 0, ret; - if (avio_read(pb, header, 6) != 6) - return AVERROR_INVALIDDATA; + ret = ffio_ensure_seekback(s->pb, sizeof(header)); + if (ret < 0) + return ret; + + read = avio_read(pb, header, sizeof(header)); + if (read < 0) + return ret; st = avformat_new_stream(s, NULL); if (!st) return AVERROR(ENOMEM); - if (memcmp(header, AMR_header, 6)) { - if (avio_read(pb, header + 6, 3) != 3) - return AVERROR_INVALIDDATA; - if (memcmp(header, AMRWB_header, 9)) { - return -1; - } - - st->codecpar->codec_tag = MKTAG('s', 'a', 'w', 'b'); - st->codecpar->codec_id = AV_CODEC_ID_AMR_WB; - st->codecpar->sample_rate = 16000; - } else { + if (!memcmp(header, AMR_header, sizeof(AMR_header))) { st->codecpar->codec_tag = MKTAG('s', 'a', 'm', 'r'); st->codecpar->codec_id = AV_CODEC_ID_AMR_NB; st->codecpar->sample_rate = 8000; + st->codecpar->channels = 1; + st->codecpar->channel_layout = AV_CH_LAYOUT_MONO; + back = read - sizeof(AMR_header); + } else if (!memcmp(header, AMRWB_header, sizeof(AMRWB_header))) { + st->codecpar->codec_tag = MKTAG('s', 'a', 'w', 'b'); + st->codecpar->codec_id = AV_CODEC_ID_AMR_WB; + st->codecpar->sample_rate = 16000; + st->codecpar->channels = 1; + st->codecpar->channel_layout = AV_CH_LAYOUT_MONO; + back = read - sizeof(AMRWB_header); + } else if (!memcmp(header, AMRMC_header, sizeof(AMRMC_header))) { + st->codecpar->codec_tag = MKTAG('s', 'a', 'm', 'r'); + st->codecpar->codec_id = AV_CODEC_ID_AMR_NB; + st->codecpar->sample_rate = 8000; + st->codecpar->channels = AV_RL32(header + 12); + back = read - 4 - sizeof(AMRMC_header); + } else if (!memcmp(header, AMRWBMC_header, sizeof(AMRWBMC_header))) { + st->codecpar->codec_tag = MKTAG('s', 'a', 'w', 'b'); + st->codecpar->codec_id = AV_CODEC_ID_AMR_WB; + st->codecpar->sample_rate = 16000; + st->codecpar->channels = AV_RL32(header + 15); + back = read - 4 - sizeof(AMRWBMC_header); + } else { + return AVERROR_INVALIDDATA; } - st->codecpar->channels = 1; - st->codecpar->channel_layout = AV_CH_LAYOUT_MONO; + + if (st->codecpar->channels < 1) + return AVERROR_INVALIDDATA; + st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; ffstream(st)->need_parsing = AVSTREAM_PARSE_FULL_RAW; avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate); + if (back > 0) + avio_seek(pb, -back, SEEK_CUR); + return 0; }