/*
 * WavArc audio decoder
 * Copyright (c) 2023 Paul B Mahol
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavutil/intreadwrite.h"
#include "libavutil/mem.h"
#include "avcodec.h"
#include "codec_internal.h"
#include "decode.h"
#include "get_bits.h"
#include "mathops.h"
#include "unary.h"

typedef struct WavArcContext {
    AVClass *av_class;

    GetBitContext gb;

    int shift;
    int nb_samples;
    int offset;
    int align;

    int eof;
    int skip;
    uint8_t *bitstream;
    int64_t max_framesize;
    int bitstream_size;
    int bitstream_index;

    int pred[2][70];
    int filter[2][70];
    int samples[2][640];
    uint8_t model[256];
    uint16_t freqs[257];
    uint16_t ac_value;
    uint16_t ac_low;
    uint16_t ac_high;
    uint16_t range_high;
    uint16_t range_low;
    uint16_t freq_range;
    int ac_pred[70];
    int ac_out[570];
} WavArcContext;

static av_cold int wavarc_init(AVCodecContext *avctx)
{
    WavArcContext *s = avctx->priv_data;

    if (avctx->extradata_size < 52)
        return AVERROR_INVALIDDATA;
    if (AV_RL32(avctx->extradata + 16) != MKTAG('R','I','F','F'))
        return AVERROR_INVALIDDATA;
    if (AV_RL32(avctx->extradata + 24) != MKTAG('W','A','V','E'))
        return AVERROR_INVALIDDATA;
    if (AV_RL32(avctx->extradata + 28) != MKTAG('f','m','t',' '))
        return AVERROR_INVALIDDATA;
    if (AV_RL16(avctx->extradata + 38) != 1 &&
        AV_RL16(avctx->extradata + 38) != 2)
        return AVERROR_INVALIDDATA;

    av_channel_layout_uninit(&avctx->ch_layout);
    av_channel_layout_default(&avctx->ch_layout, AV_RL16(avctx->extradata + 38));
    avctx->sample_rate = AV_RL32(avctx->extradata + 40);

    s->align = avctx->ch_layout.nb_channels;

    switch (AV_RL16(avctx->extradata + 50)) {
    case  8: avctx->sample_fmt = AV_SAMPLE_FMT_U8P;  break;
    case 16: s->align *= 2;
             avctx->sample_fmt = AV_SAMPLE_FMT_S16P; break;
    }

    s->shift = 0;
    switch (avctx->codec_tag) {
    case MKTAG('0','C','P','Y'):
        s->nb_samples = 640;
        s->offset = 0;
        break;
    case MKTAG('1','D','I','F'):
        s->nb_samples = 256;
        s->offset = 4;
        break;
    case MKTAG('2','S','L','P'):
    case MKTAG('3','N','L','P'):
    case MKTAG('4','A','L','P'):
    case MKTAG('5','E','L','P'):
        s->nb_samples = 570;
        s->offset = 70;
        break;
    default:
        return AVERROR_INVALIDDATA;
    }

    s->max_framesize = s->nb_samples * 16;
    s->bitstream = av_calloc(s->max_framesize + AV_INPUT_BUFFER_PADDING_SIZE, sizeof(*s->bitstream));
    if (!s->bitstream)
        return AVERROR(ENOMEM);

    return 0;
}

static unsigned get_urice(GetBitContext *gb, int k)
{
    unsigned x = get_unary(gb, 1, get_bits_left(gb));
    unsigned y = get_bits_long(gb, k);
    unsigned z = (x << k) | y;

    return z;
}

static int get_srice(GetBitContext *gb, int k)
{
    unsigned z = get_urice(gb, k);

    return (z & 1) ? ~((int)(z >> 1)) : z >> 1;
}

static void do_stereo(WavArcContext *s, int ch, int correlated, int len)
{
    const int nb_samples = s->nb_samples;
    const int shift = s->shift;

    if (ch == 0) {
        if (correlated) {
            for (int n = 0; n < len; n++) {
                s->samples[0][n] = s->samples[0][nb_samples + n] >> shift;
                s->samples[1][n] = s->pred[1][n] >> shift;
            }
        } else {
            for (int n = 0; n < len; n++) {
                s->samples[0][n] = s->samples[0][nb_samples + n] >> shift;
                s->samples[1][n] = s->pred[0][n] >> shift;
            }
        }
    } else {
        if (correlated) {
            for (int n = 0; n < nb_samples; n++)
                s->samples[1][n + len] += (unsigned)s->samples[0][n + len];
        }
        for (int n = 0; n < len; n++) {
            s->pred[0][n] = s->samples[1][nb_samples + n];
            s->pred[1][n] = s->pred[0][n] - (unsigned)s->samples[0][nb_samples + n];
        }
    }
}

static int decode_0cpy(AVCodecContext *avctx,
                       WavArcContext *s, GetBitContext *gb)
{
    const int bits = s->align * 8;

    s->nb_samples = FFMIN(640, get_bits_left(gb) / bits);

    switch (avctx->sample_fmt) {
    case AV_SAMPLE_FMT_U8P:
        for (int n = 0; n < s->nb_samples; n++) {
            for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++)
                s->samples[ch][n] = get_bits(gb, 8) - 0x80;
        }
        break;
    case AV_SAMPLE_FMT_S16P:
        for (int n = 0; n < s->nb_samples; n++) {
            for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++)
                s->samples[ch][n] = sign_extend(av_bswap16(get_bits(gb, 16)), 16);
        }
        break;
    }
    return 0;
}

static int decode_1dif(AVCodecContext *avctx,
                       WavArcContext *s, GetBitContext *gb)
{
    int ch, finished, fill, correlated;

    ch = 0;
    finished = 0;
    while (!finished) {
        int *samples = s->samples[ch];
        int k, block_type;

        if (get_bits_left(gb) <= 0)
            return AVERROR_INVALIDDATA;

        block_type = get_urice(gb, 1);
        if (block_type < 4 && block_type >= 0) {
            k = 1 + (avctx->sample_fmt == AV_SAMPLE_FMT_S16P);
            k = get_urice(gb, k) + 1;
            if (k >= 32)
                return AVERROR_INVALIDDATA;
        }

        switch (block_type) {
        case 8:
            s->eof = 1;
            return AVERROR_EOF;
        case 7:
            s->nb_samples = get_bits(gb, 8);
            continue;
        case 6:
            s->shift = get_urice(gb, 2);
            if ((unsigned)s->shift > 31) {
                s->shift = 0;
                return AVERROR_INVALIDDATA;
            }
            continue;
        case 5:
            if (avctx->sample_fmt == AV_SAMPLE_FMT_U8P) {
                fill = (int8_t)get_bits(gb, 8);
                fill -= 0x80;
            } else {
                fill = (int16_t)get_bits(gb, 16);
                fill -= 0x8000;
            }

            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 4] = fill;
            finished = 1;
            break;
        case 4:
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 4] = 0;
            finished = 1;
            break;
        case 3:
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 4] = get_srice(gb, k) + (samples[n + 3] - (unsigned)samples[n + 2]) * 3 +
                                          samples[n + 1];
            finished = 1;
            break;
        case 2:
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 4] = get_srice(gb, k) + (samples[n + 3] * 2U - samples[n + 2]);
            finished = 1;
            break;
        case 1:
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 4] = get_srice(gb, k) + (unsigned)samples[n + 3];
            finished = 1;
            break;
        case 0:
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 4] = get_srice(gb, k);
            finished = 1;
            break;
        default:
            return AVERROR_INVALIDDATA;
        }

        if (finished == 1 && avctx->ch_layout.nb_channels == 2) {
            if (ch == 0)
                correlated = get_bits1(gb);
            finished = ch != 0;
            do_stereo(s, ch, correlated, 4);
            ch = 1;
        }
    }

    if (avctx->ch_layout.nb_channels == 1) {
        for (int n = 0; n < 4; n++)
            s->samples[0][n] = s->samples[0][s->nb_samples + n];
    }

    return 0;
}

static int decode_2slp(AVCodecContext *avctx,
                       WavArcContext *s, GetBitContext *gb)
{
    int ch, finished, fill, correlated, order;

    ch = 0;
    finished = 0;
    while (!finished) {
        int *samples = s->samples[ch];
        int k, block_type;

        if (get_bits_left(gb) <= 0)
            return AVERROR_INVALIDDATA;

        block_type = get_urice(gb, 1);
        if (block_type < 5 && block_type >= 0) {
            k = 1 + (avctx->sample_fmt == AV_SAMPLE_FMT_S16P);
            k = get_urice(gb, k) + 1;
            if (k >= 32)
                return AVERROR_INVALIDDATA;
        }

        switch (block_type) {
        case 9:
            s->eof = 1;
            return AVERROR_EOF;
        case 8:
            s->nb_samples = get_urice(gb, 8);
            if (s->nb_samples > 570U) {
                s->nb_samples = 570;
                return AVERROR_INVALIDDATA;
            }
            continue;
        case 7:
            s->shift = get_urice(gb, 2);
            if ((unsigned)s->shift > 31) {
                s->shift = 0;
                return AVERROR_INVALIDDATA;
            }
            continue;
        case 6:
            if (avctx->sample_fmt == AV_SAMPLE_FMT_U8P) {
                fill = (int8_t)get_bits(gb, 8);
                fill -= 0x80;
            } else {
                fill = (int16_t)get_bits(gb, 16);
                fill -= 0x8000;
            }

            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] = fill;
            finished = 1;
            break;
        case 5:
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] = 0;
            finished = 1;
            break;
        case 4:
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] = get_srice(gb, k) + (samples[n + 69] - (unsigned)samples[n + 68]) * 3 +
                                           samples[n + 67];
            finished = 1;
            break;
        case 3:
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] = get_srice(gb, k) + (samples[n + 69] * 2U - samples[n + 68]);
            finished = 1;
            break;
        case 2:
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] = get_srice(gb, k);
            finished = 1;
            break;
        case 1:
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] = get_srice(gb, k) + (unsigned)samples[n + 69];
            finished = 1;
            break;
        case 0:
            order = get_urice(gb, 2);
            if ((unsigned)order > FF_ARRAY_ELEMS(s->filter[ch]))
                return AVERROR_INVALIDDATA;
            for (int o = 0; o < order; o++)
                s->filter[ch][o] = get_srice(gb, 2);
            for (int n = 0; n < s->nb_samples; n++) {
                int sum = 15;

                for (int o = 0; o < order; o++)
                    sum += s->filter[ch][o] * (unsigned)samples[n + 70 - o - 1];

                samples[n + 70] = get_srice(gb, k) + (unsigned)(sum >> 4);
            }
            finished = 1;
            break;
        default:
            return AVERROR_INVALIDDATA;
        }

        if (finished == 1 && avctx->ch_layout.nb_channels == 2) {
            if (ch == 0)
                correlated = get_bits1(gb);
            finished = ch != 0;
            do_stereo(s, ch, correlated, 70);
            ch = 1;
        }
    }

    if (avctx->ch_layout.nb_channels == 1) {
        for (int n = 0; n < 70; n++)
            s->samples[0][n] = s->samples[0][s->nb_samples + n];
    }

    return 0;
}

static int ac_init(AVCodecContext *avctx,
                   WavArcContext *s, GetBitContext *gb)
{
    s->ac_low   = 0;
    s->ac_high  = 0xffffu;
    s->ac_value = get_bits(gb, 16);

    s->freq_range = s->freqs[256];
    if (!s->freq_range)
        return AVERROR_INVALIDDATA;
    return 0;
}

static uint16_t ac_get_prob(WavArcContext *s)
{
    return ((s->freq_range - 1) + (s->ac_value - s->ac_low) * (unsigned)s->freq_range) /
           ((s->ac_high - s->ac_low) + 1U);
}

static uint8_t ac_map_symbol(WavArcContext *s, uint16_t prob)
{
    int idx = 255;

    while (prob < s->freqs[idx])
        idx--;

    s->range_high = s->freqs[idx + 1];
    s->range_low  = s->freqs[idx];

    return idx;
}

static int ac_normalize(AVCodecContext *avctx, WavArcContext *s, GetBitContext *gb)
{
    int range;

    if (s->ac_high < s->ac_low)
        goto fail;

    range = (s->ac_high - s->ac_low) + 1;
    s->ac_high = (range * (unsigned)s->range_high) / s->freq_range + s->ac_low - 1;
    s->ac_low += (range * (unsigned)s->range_low)  / s->freq_range;

    if (s->ac_high < s->ac_low)
        goto fail;

    for (;;) {
        if ((s->ac_high & 0x8000) != (s->ac_low & 0x8000)) {
            if (((s->ac_low & 0x4000) == 0) || ((s->ac_high & 0x4000) != 0))
                return 0;
            s->ac_value ^= 0x4000;
            s->ac_low   &= 0x3fff;
            s->ac_high  |= 0x4000;
        }

        s->ac_low = s->ac_low * 2;
        s->ac_high = s->ac_high * 2 | 1;
        if (s->ac_high < s->ac_low)
            goto fail;

        if (get_bits_left(gb) <= 0) {
            av_log(avctx, AV_LOG_ERROR, "overread in arithmetic coder\n");
            goto fail;
        }

        s->ac_value = s->ac_value * 2 + get_bits1(gb);
        if (s->ac_low > s->ac_value || s->ac_high < s->ac_value)
            goto fail;
    }

fail:
    av_log(avctx, AV_LOG_ERROR, "invalid state\n");
    return AVERROR_INVALIDDATA;
}

static void ac_init_model(WavArcContext *s)
{
    memset(s->freqs, 0, sizeof(s->freqs));

    for (int n = 0; n < 256; n++)
        s->freqs[n+1] = s->model[n] + s->freqs[n];
}

static int ac_read_model(AVCodecContext *avctx,
                         WavArcContext *s,
                         GetBitContext *gb)
{
    unsigned start, end;

    memset(s->model, 0, sizeof(s->model));

    start = get_bits(gb, 8);
    end = get_bits(gb, 8);

    for (;;) {
        while (start <= end) {
            if (get_bits_left(gb) < 8)
                return AVERROR_INVALIDDATA;
            s->model[start++] = get_bits(gb, 8);
        }

        if (get_bits_left(gb) < 8)
            return AVERROR_INVALIDDATA;

        start = get_bits(gb, 8);
        if (!start)
            break;

        end = get_bits(gb, 8);
    }

    ac_init_model(s);

    return 0;
}

static int decode_5elp(AVCodecContext *avctx,
                       WavArcContext *s, GetBitContext *gb)
{
    int ch, finished, fill, correlated, order = 0;

    ch = 0;
    finished = 0;
    while (!finished) {
        int *samples = s->samples[ch];
        int *ac_pred = s->ac_pred;
        int *ac_out = s->ac_out;
        int k, block_type;

        if (get_bits_left(gb) <= 0)
            return AVERROR_INVALIDDATA;

        memset(s->ac_out, 0, sizeof(s->ac_out));

        block_type = get_urice(gb, 1);
        av_log(avctx, AV_LOG_DEBUG, "block_type : %d\n", block_type);

        if (block_type >= 0 && block_type <= 7) {
            k = 1 + (avctx->sample_fmt == AV_SAMPLE_FMT_S16P);
            k = get_urice(gb, k) + 1;
            if (k >= 32)
                return AVERROR_INVALIDDATA;
        }

        if (block_type <=  2 || block_type ==  6 || block_type == 13 ||
            block_type == 14 || block_type == 15 || block_type == 19) {
            order = get_urice(gb, 2);
            if ((unsigned)order > FF_ARRAY_ELEMS(s->filter[ch]))
                return AVERROR_INVALIDDATA;
            for (int o = 0; o < order; o++)
                s->filter[ch][o] = get_srice(gb, 2);
        }

        if (block_type >= 0 && block_type <= 7) {
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] = get_srice(gb, k);
        } else {
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] = 0;
        }

        if (block_type >= 13 && block_type <= 20) {
            const int ac_size = get_bits(gb, 12);
            const int ac_pos = get_bits_count(gb);
            GetBitContext ac_gb = *gb;
            int ret;

            skip_bits_long(gb, ac_size);
            ret = ac_read_model(avctx, s, &ac_gb);
            if (ret < 0) {
                av_log(avctx, AV_LOG_ERROR, "bad arithmetic model\n");
                return ret;
            }

            ret = ac_init(avctx, s, &ac_gb);
            if (ret < 0) {
                av_log(avctx, AV_LOG_ERROR, "cannot init arithmetic decoder\n");
                return ret;
            }

            for (int n = 0; n < s->nb_samples; n++) {
                uint16_t prob = ac_get_prob(s);
                int ac = ac_map_symbol(s, prob);
                ac_out[n] = ac - 0x80;
                if ((ret = ac_normalize(avctx, s, &ac_gb)) < 0)
                    return ret;
            }

            if (get_bits_count(&ac_gb) != ac_pos + ac_size) {
                av_log(avctx, AV_LOG_DEBUG, "over/under-read in arithmetic coder: %d\n",
                       ac_pos + ac_size - get_bits_count(&ac_gb));
            }
        }

        switch (block_type) {
        case 12:
            s->eof = 1;
            return AVERROR_EOF;
        case 11:
            s->nb_samples = get_urice(gb, 8);
            if (s->nb_samples > 570U) {
                s->nb_samples = 570;
                return AVERROR_INVALIDDATA;
            }
            continue;
        case 10:
            s->shift = get_urice(gb, 2);
            if ((unsigned)s->shift > 31) {
                s->shift = 0;
                return AVERROR_INVALIDDATA;
            }
            continue;
        case 9:
            if (avctx->sample_fmt == AV_SAMPLE_FMT_U8P) {
                fill = (int8_t)get_bits(gb, 8);
                fill -= 0x80;
            } else {
                fill = (int16_t)get_bits(gb, 16);
                fill -= 0x8000;
            }

            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] = fill;
            finished = 1;
            break;
        case 8:
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] = 0;
            finished = 1;
            break;
        case 20:
        case 7:
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] += ac_out[n] + samples[n + 69] * 3U - samples[n + 68] * 3U + samples[n + 67];
            finished = 1;
            break;
        case 19:
        case 6:
            for (int n = 0; n < 70; n++) {
                ac_pred[n] = samples[n];
                samples[n] = 0;
            }

            for (int n = 0; n < s->nb_samples; n++) {
                int sum = 15;

                for (int o = 0; o < order; o++)
                    sum += s->filter[ch][o] * (unsigned)samples[n + 70 - o - 1];

                samples[n + 70] += ac_out[n] + (sum >> 4);
            }

            for (int n = 0; n < 70; n++)
                samples[n] = ac_pred[n];

            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] += ac_out[n] + samples[n + 69] * 3U - samples[n + 68] * 3U + samples[n + 67];

            finished = 1;
            break;
        case 18:
        case 5:
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] += ac_out[n] + samples[n + 69] * 2U - samples[n + 68];
            finished = 1;
            break;
        case 17:
        case 4:
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] += ac_out[n];
            finished = 1;
            break;
        case 16:
        case 3:
            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] += ac_out[n] + (unsigned)samples[n + 69];
            finished = 1;
            break;
        case 15:
        case 2:
            for (int n = 0; n < 70; n++) {
                ac_pred[n] = samples[n];
                samples[n] = 0;
            }

            for (int n = 0; n < s->nb_samples; n++) {
                int sum = 15;

                for (int o = 0; o < order; o++)
                    sum += s->filter[ch][o] * (unsigned)samples[n + 70 - o - 1];

                samples[n + 70] += ac_out[n] + (sum >> 4);
            }

            for (int n = 0; n < 70; n++)
                samples[n] = ac_pred[n];

            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] += samples[n + 69] * 2U - samples[n + 68];

            finished = 1;
            break;
        case 14:
        case 1:
            for (int n = 0; n < 70; n++) {
                ac_pred[n] = samples[n];
                samples[n] = 0;
            }

            for (int n = 0; n < s->nb_samples; n++) {
                int sum = 15;

                for (int o = 0; o < order; o++)
                    sum += s->filter[ch][o] * (unsigned)samples[n + 70 - o - 1];

                samples[n + 70] += (unsigned)ac_out[n] + (sum >> 4);
            }

            for (int n = 0; n < 70; n++)
                samples[n] = ac_pred[n];

            for (int n = 0; n < s->nb_samples; n++)
                samples[n + 70] += (unsigned)samples[n + 69];

            finished = 1;
            break;
        case 13:
        case 0:
            for (int n = 0; n < s->nb_samples; n++) {
                int sum = 15;

                for (int o = 0; o < order; o++)
                    sum += s->filter[ch][o] * (unsigned)samples[n + 70 - o - 1];

                samples[n + 70] += (unsigned)ac_out[n] + (sum >> 4);
            }
            finished = 1;
            break;
        default:
            return AVERROR_INVALIDDATA;
        }

        if (finished == 1 && avctx->ch_layout.nb_channels == 2) {
            if (ch == 0)
                correlated = get_bits1(gb);
            finished = ch != 0;
            do_stereo(s, ch, correlated, 70);
            ch = 1;
        }
    }

    if (avctx->ch_layout.nb_channels == 1) {
        for (int n = 0; n < 70; n++)
            s->samples[0][n] = s->samples[0][s->nb_samples + n];
    }

    return 0;
}

static int wavarc_decode(AVCodecContext *avctx, AVFrame *frame,
                         int *got_frame_ptr, AVPacket *pkt)
{
    WavArcContext *s = avctx->priv_data;
    GetBitContext *gb = &s->gb;
    int buf_size, input_buf_size;
    const uint8_t *buf;
    int ret, n;

    if ((!pkt->size && !s->bitstream_size) || s->nb_samples == 0 || s->eof) {
        *got_frame_ptr = 0;
        return pkt->size;
    }

    buf_size = FFMIN(pkt->size, s->max_framesize - s->bitstream_size);
    input_buf_size = buf_size;
    if (s->bitstream_index + s->bitstream_size + buf_size + AV_INPUT_BUFFER_PADDING_SIZE > s->max_framesize) {
        memmove(s->bitstream, &s->bitstream[s->bitstream_index], s->bitstream_size);
        s->bitstream_index = 0;
    }
    if (pkt->data)
        memcpy(&s->bitstream[s->bitstream_index + s->bitstream_size], pkt->data, buf_size);
    buf                = &s->bitstream[s->bitstream_index];
    buf_size          += s->bitstream_size;
    s->bitstream_size  = buf_size;
    if (buf_size < s->max_framesize && pkt->data) {
        *got_frame_ptr = 0;
        return input_buf_size;
    }

    if ((ret = init_get_bits8(gb, buf, buf_size)) < 0)
        goto fail;
    skip_bits(gb, s->skip);

    switch (avctx->codec_tag) {
    case MKTAG('0','C','P','Y'):
        ret = decode_0cpy(avctx, s, gb);
        break;
    case MKTAG('1','D','I','F'):
        ret = decode_1dif(avctx, s, gb);
        break;
    case MKTAG('2','S','L','P'):
    case MKTAG('3','N','L','P'):
    case MKTAG('4','A','L','P'):
        ret = decode_2slp(avctx, s, gb);
        break;
    case MKTAG('5','E','L','P'):
        ret = decode_5elp(avctx, s, gb);
        break;
    default:
        ret = AVERROR_INVALIDDATA;
    }

    if (ret < 0)
        goto fail;

    s->skip = get_bits_count(gb) - 8 * (get_bits_count(gb) / 8);
    n = get_bits_count(gb) / 8;

    if (n > buf_size) {
fail:
        s->bitstream_size = 0;
        s->bitstream_index = 0;
        if (ret == AVERROR_EOF)
            return 0;
        return AVERROR_INVALIDDATA;
    }

    frame->nb_samples = s->nb_samples;
    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
        goto fail;

    switch (avctx->sample_fmt) {
    case AV_SAMPLE_FMT_U8P:
        for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++) {
            uint8_t *dst = (uint8_t *)frame->extended_data[ch];
            const int *src = s->samples[ch] + s->offset;

            for (int n = 0; n < frame->nb_samples; n++)
                dst[n] = src[n] * (1U << s->shift) + 0x80U;
        }
        break;
    case AV_SAMPLE_FMT_S16P:
        for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++) {
            int16_t *dst = (int16_t *)frame->extended_data[ch];
            const int *src = s->samples[ch] + s->offset;

            for (int n = 0; n < frame->nb_samples; n++)
                dst[n] = src[n] * (1U << s->shift);
        }
        break;
    }

    *got_frame_ptr = 1;

    if (s->bitstream_size) {
        s->bitstream_index += n;
        s->bitstream_size  -= n;
        return input_buf_size;
    }

    return n;
}

static av_cold int wavarc_close(AVCodecContext *avctx)
{
    WavArcContext *s = avctx->priv_data;

    av_freep(&s->bitstream);
    s->bitstream_size = 0;

    return 0;
}

const FFCodec ff_wavarc_decoder = {
    .p.name           = "wavarc",
    CODEC_LONG_NAME("Waveform Archiver"),
    .p.type           = AVMEDIA_TYPE_AUDIO,
    .p.id             = AV_CODEC_ID_WAVARC,
    .priv_data_size   = sizeof(WavArcContext),
    .init             = wavarc_init,
    FF_CODEC_DECODE_CB(wavarc_decode),
    .close            = wavarc_close,
    .p.capabilities   = AV_CODEC_CAP_DR1 |
#if FF_API_SUBFRAMES
                        AV_CODEC_CAP_SUBFRAMES |
#endif
                        AV_CODEC_CAP_DELAY,
    .p.sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_U8P,
                                                        AV_SAMPLE_FMT_S16P,
                                                        AV_SAMPLE_FMT_NONE },
};