avcodec/wmalosslessdec: improve 24bit support

Signed-off-by: Paul B Mahol <onemda@gmail.com>
2025-03-03 14:32:16 +02:00 · 2016-04-13 20:36:26 +02:00 · 2016-04-13 20:36:26 +02:00 · 56759f69a6
commit 56759f69a6
parent 5ac71e9db8
3 changed files with 28 additions and 9 deletions
--- a/libavcodec/lossless_audiodsp.c
+++ b/libavcodec/lossless_audiodsp.c
@ -36,9 +36,23 @@ static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2,
    return res;
 }

+static int32_t scalarproduct_and_madd_int32_c(int32_t *v1, const int32_t *v2,
+                                              const int32_t *v3,
+                                              int order, int mul)
+{
+    int res = 0;
+
+    while (order--) {
+        res   += *v1 * *v2++;
+        *v1++ += mul * *v3++;
+    }
+    return res;
+}
+
 av_cold void ff_llauddsp_init(LLAudDSPContext *c)
 {
    c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
+    c->scalarproduct_and_madd_int32 = scalarproduct_and_madd_int32_c;

    if (ARCH_ARM)
        ff_llauddsp_init_arm(c);
--- a/libavcodec/lossless_audiodsp.h
+++ b/libavcodec/lossless_audiodsp.h
@ -36,6 +36,11 @@ typedef struct LLAudDSPContext {
                                            const int16_t *v2,
                                            const int16_t *v3,
                                            int len, int mul);
+
+    int32_t (*scalarproduct_and_madd_int32)(int32_t *v1 /* align 16 */,
+                                            const int32_t *v2,
+                                            const int32_t *v3,
+                                            int len, int mul);
 } LLAudDSPContext;

 void ff_llauddsp_init(LLAudDSPContext *c);
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@ -135,8 +135,8 @@ typedef struct WmallDecodeCtx {
    int8_t  mclms_scaling;
    int16_t mclms_coeffs[WMALL_MAX_CHANNELS * WMALL_MAX_CHANNELS * 32];
    int16_t mclms_coeffs_cur[WMALL_MAX_CHANNELS * WMALL_MAX_CHANNELS];
-    int16_t mclms_prevvalues[WMALL_MAX_CHANNELS * 2 * 32];
-    int16_t mclms_updates[WMALL_MAX_CHANNELS * 2 * 32];
+    int32_t mclms_prevvalues[WMALL_MAX_CHANNELS * 2 * 32];
+    int32_t mclms_updates[WMALL_MAX_CHANNELS * 2 * 32];
    int     mclms_recent;

    int     movave_scaling;
@ -147,9 +147,9 @@ typedef struct WmallDecodeCtx {
        int scaling;
        int coefsend;
        int bitsend;
-        DECLARE_ALIGNED(16, int16_t, coefs)[MAX_ORDER + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
-        DECLARE_ALIGNED(16, int16_t, lms_prevvalues)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
-        DECLARE_ALIGNED(16, int16_t, lms_updates)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
+        DECLARE_ALIGNED(16, int32_t, coefs)[MAX_ORDER + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
+        DECLARE_ALIGNED(16, int32_t, lms_prevvalues)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
+        DECLARE_ALIGNED(16, int32_t, lms_updates)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
        int recent;
    } cdlms[WMALL_MAX_CHANNELS][9];

@ -657,10 +657,10 @@ static void mclms_update(WmallDecodeCtx *s, int icoef, int *pred)
    if (s->mclms_recent == 0) {
        memcpy(&s->mclms_prevvalues[order * num_channels],
               s->mclms_prevvalues,
-               sizeof(int16_t) * order * num_channels);
+               sizeof(int32_t) * order * num_channels);
        memcpy(&s->mclms_updates[order * num_channels],
               s->mclms_updates,
-               sizeof(int16_t) * order * num_channels);
+               sizeof(int32_t) * order * num_channels);
        s->mclms_recent = num_channels * order;
    }
 }
@ -719,7 +719,7 @@ static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int input)
    s->cdlms[ich][ilms].lms_updates[recent + (order >> 3)] >>= 1;
    s->cdlms[ich][ilms].recent = recent;
    memset(s->cdlms[ich][ilms].lms_updates + recent + order, 0,
-           sizeof(s->cdlms[ich][ilms].lms_updates) - 2*(recent+order));
+           sizeof(s->cdlms[ich][ilms].lms_updates) - 4*(recent+order));
 }

 static void use_high_update_speed(WmallDecodeCtx *s, int ich)
@ -767,7 +767,7 @@ static void revert_cdlms(WmallDecodeCtx *s, int ch,
        for (icoef = coef_begin; icoef < coef_end; icoef++) {
            pred = 1 << (s->cdlms[ch][ilms].scaling - 1);
            residue = s->channel_residues[ch][icoef];
-            pred += s->dsp.scalarproduct_and_madd_int16(s->cdlms[ch][ilms].coefs,
+            pred += s->dsp.scalarproduct_and_madd_int32(s->cdlms[ch][ilms].coefs,
                                                        s->cdlms[ch][ilms].lms_prevvalues
                                                            + s->cdlms[ch][ilms].recent,
                                                        s->cdlms[ch][ilms].lms_updates