Merge commit '2008f76054906e9ff6bf744800af0e5a5bfe61be'

* commit '2008f76054906e9ff6bf744800af0e5a5bfe61be': dca: remove unused decode_hf function and quant_d tables Merged-by: Hendrik Leppkes <h.leppkes@gmail.com>
2025-02-09 14:14:39 +02:00 · 2016-01-02 13:17:48 +01:00 · 2016-01-02 13:17:48 +01:00 · d03da3e240
commit d03da3e240
parent af1238f863 2008f76054
12 changed files with 8 additions and 294 deletions
--- a/libavcodec/aarch64/dcadsp_init.c
+++ b/libavcodec/aarch64/dcadsp_init.c
@ -41,12 +41,6 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
                                float out[32], const float in[32],
                                float scale);
 void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8],
                       const int32_t vq_num[DCA_SUBBANDS],
                       const int8_t hf_vq[1024][32], intptr_t vq_offset,
                       int32_t scale[DCA_SUBBANDS][2],
                       intptr_t start, intptr_t end);
 av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
 {
    int cpu_flags = av_get_cpu_flags();
@ -54,7 +48,6 @@ av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
    if (have_neon(cpu_flags)) {
        s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
        s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
        s->decode_hf  = ff_decode_hf_neon;
    }
 }
--- a/libavcodec/aarch64/dcadsp_neon.S
+++ b/libavcodec/aarch64/dcadsp_neon.S
@ -21,66 +21,6 @@
 #include "libavutil/aarch64/asm.S"
 function ff_decode_hf_neon, export=1
        add             x2,  x2,  x3
        add             x0,  x0,  x5, lsl #5
        add             x1,  x1,  x5, lsl #2
        add             x4,  x4,  x5, lsl #3
        sub             x6,  x6,  x5
        ldr             w7,  [x1], #4
        add             x7,  x2,  x7, lsl #5
        subs            x6,  x6,  #1
        b.eq            1f
        b.gt            2f
        ret
 2:
        ldr             w8,  [x1], #4
        subs            x6,  x6,  #2
        add             x8,  x2,  x8, lsl #5
        ld1             {v2.4s},  [x4], #16
        ld1             {v0.8b},  [x7]
        ld1             {v4.8b},  [x8]
        sxtl            v3.8h,  v0.8b
        sxtl            v7.8h,  v4.8b
        scvtf           v2.4s,  v2.4s,  #4
        sxtl            v0.4s,  v3.4h
        sxtl2           v1.4s,  v3.8h
        sxtl            v4.4s,  v7.4h
        sxtl2           v5.4s,  v7.8h
        scvtf           v0.4s,  v0.4s
        scvtf           v1.4s,  v1.4s
        scvtf           v4.4s,  v4.4s
        scvtf           v5.4s,  v5.4s
        fmul            v0.4s,  v0.4s,  v2.s[0]
        fmul            v1.4s,  v1.4s,  v2.s[0]
        fmul            v4.4s,  v4.4s,  v2.s[2]
        fmul            v5.4s,  v5.4s,  v2.s[2]
        b.lt            10f
        ldr             w7,  [x1], #4
        add             x7,  x2,  x7, lsl #5
        st1             {v0.4s,v1.4s},  [x0], #32
        st1             {v4.4s,v5.4s},  [x0], #32
        b.gt            2b
 1:
        ldr             w9,  [x4]
        ld1             {v0.8b},  [x7]
        scvtf           s2,  w9,  #4
        sxtl            v3.8h,  v0.8b
        sxtl            v0.4s,  v3.4h
        sxtl2           v1.4s,  v3.8h
        scvtf           v0.4s,  v0.4s
        scvtf           v1.4s,  v1.4s
        fmul            v0.4s,  v0.4s,  v2.s[0]
        fmul            v1.4s,  v1.4s,  v2.s[0]
        st1             {v0.4s,v1.4s},  [x0]
        ret
 10:
        st1             {v0.4s,v1.4s},  [x0], #32
        st1             {v4.4s,v5.4s},  [x0]
        ret
 endfunc
 function ff_dca_lfe_fir0_neon, export=1
        mov             x3,  #32                // decifactor
        sub             x1,  x1,  #7*4
--- a/libavcodec/arm/dcadsp_init_arm.c
+++ b/libavcodec/arm/dcadsp_init_arm.c
@ -49,12 +49,6 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
                                float out[32], const float in[32],
                                float scale);
 void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8],
                       const int32_t vq_num[DCA_SUBBANDS],
                       const int8_t hf_vq[1024][32], intptr_t vq_offset,
                       int32_t scale[DCA_SUBBANDS][2],
                       intptr_t start, intptr_t end);
 av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
 {
    int cpu_flags = av_get_cpu_flags();
@ -67,7 +61,6 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
    if (have_neon(cpu_flags)) {
        s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
        s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
        s->decode_hf  = ff_decode_hf_neon;
    }
 }
--- a/libavcodec/arm/dcadsp_neon.S
+++ b/libavcodec/arm/dcadsp_neon.S
@ -20,35 +20,6 @@
 #include "libavutil/arm/asm.S"
 function ff_decode_hf_neon, export=1
        push            {r4-r5,lr}
        add             r2,  r2,  r3
        ldr             r3,       [sp, #12]
        ldrd            r4,  r5,  [sp, #16]
        add             r3,  r3,  r4, lsl #3
        add             r1,  r1,  r4, lsl #2
        add             r0,  r0,  r4, lsl #5
 1:      ldr_post        lr,  r1,  #4
        add             r4,  r4,  #1
        add             lr,  r2,  lr, lsl #5
        cmp             r4,  r5
        vld1.32         {d7},     [r3]!
        vld1.8          {d0},     [lr,:64]
        vcvt.f32.s32    d7,  d7,  #4
        vmovl.s8        q1,  d0
        vmovl.s16       q0,  d2
        vmovl.s16       q1,  d3
        vcvt.f32.s32    q0,  q0
        vcvt.f32.s32    q1,  q1
        vmul.f32        q0,  q0,  d7[0]
        vmul.f32        q1,  q1,  d7[0]
        vst1.32         {q0-q1},  [r0,:128]!
        bne             1b
        pop             {r4-r5,pc}
 endfunc
 function ff_dca_lfe_fir0_neon, export=1
        push            {r4-r6,lr}
        mov             r3,  #32                @ decifactor
--- a/libavcodec/dcadata.c
+++ b/libavcodec/dcadata.c
@ -4189,13 +4189,6 @@ const uint32_t ff_dca_lossy_quant[32] = {
        84,      42,      21,       0,       0,       0,       0,       0
 };
 const float ff_dca_lossy_quant_d[32] = {
          0,     1.6,      1.0,     0.8,    0.59,    0.50,    0.42,    0.34,
       0.19,    0.11,     0.06,   0.035,   0.019,   0.011,  0.0065,  0.0040,
     0.0025,  0.0014,   0.0008, 0.00045, 0.00030, 0.00017, 0.00008, 0.00004,
    0.00002, 0.00001, 0.000005,       0,       0,       0,       0,       0
 };
 /* 20bits unsigned fractional binary codes */
 const uint32_t ff_dca_lossless_quant[32] = {
         0, 4194304, 2097152, 1384120, 1048576, 696254, 524288, 348127,
--- a/libavcodec/dcadata.h
+++ b/libavcodec/dcadata.h
@ -35,7 +35,6 @@ extern const uint32_t ff_dca_scale_factor_quant6[64];
 extern const uint32_t ff_dca_scale_factor_quant7[128];
 extern const uint32_t ff_dca_lossy_quant[32];
 extern const float ff_dca_lossy_quant_d[32];
 extern const uint32_t ff_dca_lossless_quant[32];
 extern const float ff_dca_lossless_quant_d[32];
--- a/libavcodec/dcadec.c
+++ b/libavcodec/dcadec.c
@ -992,12 +992,12 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
                s->debug_flag |= 0x01;
            }
-            s->dcadsp.decode_hf_int(subband_samples, s->dca_chan[k].high_freq_vq,
+            s->dcadsp.decode_hf(subband_samples, s->dca_chan[k].high_freq_vq,
-                                    ff_dca_high_freq_vq, subsubframe * SAMPLES_PER_SUBBAND,
+                                ff_dca_high_freq_vq,
-                                    s->dca_chan[k].scale_factor,
+                                subsubframe * SAMPLES_PER_SUBBAND,
-                                    s->audio_header.vq_start_subband[k],
+                                s->dca_chan[k].scale_factor,
-                                    s->audio_header.subband_activity[k]);
+                                s->audio_header.vq_start_subband[k],
-
+                                s->audio_header.subband_activity[k]);
        }
    }
--- a/libavcodec/dcadsp.c
+++ b/libavcodec/dcadsp.c
@ -27,29 +27,11 @@
 #include "dcadsp.h"
 #include "dcamath.h"
-static void decode_hf_c(float dst[DCA_SUBBANDS][8],
+static void decode_hf_c(int32_t dst[DCA_SUBBANDS][8],
                        const int32_t vq_num[DCA_SUBBANDS],
                        const int8_t hf_vq[1024][32], intptr_t vq_offset,
                        int32_t scale[DCA_SUBBANDS][2],
                        intptr_t start, intptr_t end)
 {
    int i, l;
    for (l = start; l < end; l++) {
        /* 1 vector -> 32 samples but we only need the 8 samples
         * for this subsubframe. */
        const int8_t *ptr = &hf_vq[vq_num[l]][vq_offset];
        float fscale = scale[l][0] * (1 / 16.0);
        for (i = 0; i < 8; i++)
            dst[l][i] = ptr[i] * fscale;
    }
 }
 static void decode_hf_int_c(int32_t dst[DCA_SUBBANDS][8],
                            const int32_t vq_num[DCA_SUBBANDS],
                            const int8_t hf_vq[1024][32], intptr_t vq_offset,
                            int32_t scale[DCA_SUBBANDS][2],
                            intptr_t start, intptr_t end)
 {
    int i, j;
@ -141,7 +123,6 @@ av_cold void ff_dcadsp_init(DCADSPContext *s)
    s->lfe_fir[1]      = dca_lfe_fir1_c;
    s->qmf_32_subbands = dca_qmf_32_subbands;
    s->decode_hf       = decode_hf_c;
    s->decode_hf_int   = decode_hf_int_c;
    s->dequantize      = dequantize_c;
    if (ARCH_AARCH64)
--- a/libavcodec/dcadsp.h
+++ b/libavcodec/dcadsp.h
@ -32,16 +32,11 @@ typedef struct DCADSPContext {
                            int *synth_buf_offset, float synth_buf2[32],
                            const float window[512], float *samples_out,
                            float raXin[32], float scale);
-    void (*decode_hf)(float dst[DCA_SUBBANDS][8],
+    void (*decode_hf)(int32_t dst[DCA_SUBBANDS][8],
                      const int32_t vq_num[DCA_SUBBANDS],
                      const int8_t hf_vq[1024][32], intptr_t vq_offset,
                      int32_t scale[DCA_SUBBANDS][2],
                      intptr_t start, intptr_t end);
    void (*decode_hf_int)(int32_t dst[DCA_SUBBANDS][8],
                          const int32_t vq_num[DCA_SUBBANDS],
                          const int8_t hf_vq[1024][32], intptr_t vq_offset,
                          int32_t scale[DCA_SUBBANDS][2],
                          intptr_t start, intptr_t end);
    void (*dequantize)(int32_t *samples, uint32_t step_size, uint32_t scale);
 } DCADSPContext;
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@ -26,92 +26,6 @@ pf_inv16:  times 4 dd 0x3D800000 ; 1/16
 SECTION .text
 ; void decode_hf(float dst[DCA_SUBBANDS][8], const int32_t vq_num[DCA_SUBBANDS],
 ;                const int8_t hf_vq[1024][32], intptr_t vq_offset,
 ;                int32_t scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end)
 %macro DECODE_HF 0
 cglobal decode_hf, 6,6,5, dst, num, src, offset, scale, start, end
    lea       srcq, [srcq + offsetq]
    shl     startq, 2
    mov    offsetd, endm
 %define DICT offsetq
    shl    offsetq, 2
    mov       endm, offsetq
 .loop:
 %if ARCH_X86_64
    mov    offsetd, [scaleq + 2 * startq]
    cvtsi2ss    m0, offsetd
 %else
    cvtsi2ss    m0, [scaleq + 2 * startq]
 %endif
    mov    offsetd, [numq + startq]
    mulss       m0, [pf_inv16]
    shl       DICT, 5
    shufps      m0, m0, 0
 %if cpuflag(sse2)
 %if cpuflag(sse4)
    pmovsxbd    m1, [srcq + DICT + 0]
    pmovsxbd    m2, [srcq + DICT + 4]
 %else
    movq        m1, [srcq + DICT]
    punpcklbw   m1, m1
    mova        m2, m1
    punpcklwd   m1, m1
    punpckhwd   m2, m2
    psrad       m1, 24
    psrad       m2, 24
 %endif
    cvtdq2ps    m1, m1
    cvtdq2ps    m2, m2
 %else
    movd       mm0, [srcq + DICT + 0]
    movd       mm1, [srcq + DICT + 4]
    punpcklbw  mm0, mm0
    punpcklbw  mm1, mm1
    movq       mm2, mm0
    movq       mm3, mm1
    punpcklwd  mm0, mm0
    punpcklwd  mm1, mm1
    punpckhwd  mm2, mm2
    punpckhwd  mm3, mm3
    psrad      mm0, 24
    psrad      mm1, 24
    psrad      mm2, 24
    psrad      mm3, 24
    cvtpi2ps    m1, mm0
    cvtpi2ps    m2, mm1
    cvtpi2ps    m3, mm2
    cvtpi2ps    m4, mm3
    shufps      m0, m0, 0
    shufps      m1, m3, q1010
    shufps      m2, m4, q1010
 %endif
    mulps       m1, m0
    mulps       m2, m0
    mova [dstq + 8 * startq +  0], m1
    mova [dstq + 8 * startq + 16], m2
    add     startq, 4
    cmp     startq, endm
    jl       .loop
 .end:
 %if notcpuflag(sse2)
    emms
 %endif
    REP_RET
 %endmacro
 %if ARCH_X86_32
 INIT_XMM sse
 DECODE_HF
 %endif
 INIT_XMM sse2
 DECODE_HF
 INIT_XMM sse4
 DECODE_HF
 ; %1=v0/v1  %2=in1  %3=in2
 %macro FIR_LOOP 2-3
 .loop%1:
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@ -23,15 +23,6 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/dcadsp.h"
 void ff_decode_hf_sse(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
                      const int8_t hf_vq[1024][32], intptr_t vq_offset,
                      int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
 void ff_decode_hf_sse2(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
                       const int8_t hf_vq[1024][32], intptr_t vq_offset,
                       int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
 void ff_decode_hf_sse4(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
                       const int8_t hf_vq[1024][32], intptr_t vq_offset,
                       int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
 void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs);
 void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs);
 void ff_dca_lfe_fir0_fma3(float *out, const float *in, const float *coefs);
@ -41,21 +32,10 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
    int cpu_flags = av_get_cpu_flags();
    if (EXTERNAL_SSE(cpu_flags)) {
 #if ARCH_X86_32
        s->decode_hf = ff_decode_hf_sse;
 #endif
        s->lfe_fir[0]        = ff_dca_lfe_fir0_sse;
        s->lfe_fir[1]        = ff_dca_lfe_fir1_sse;
    }
    if (EXTERNAL_SSE2(cpu_flags)) {
        s->decode_hf = ff_decode_hf_sse2;
    }
    if (EXTERNAL_SSE4(cpu_flags)) {
        s->decode_hf = ff_decode_hf_sse4;
    }
    if (EXTERNAL_FMA3(cpu_flags)) {
        s->lfe_fir[0]        = ff_dca_lfe_fir0_fma3;
    }
--- a/tests/checkasm/dcadsp.c
+++ b/tests/checkasm/dcadsp.c
@ -75,16 +75,6 @@
        }                                                               \
    } while (0)
 #define randomize_decode_hf()                                   \
    do {                                                        \
        int i;                                                  \
        for (i = 0; i < DCA_SUBBANDS; i++) {                    \
            vq_num[i]   = rnd() >> 22;                          \
            scale[i][0] = rnd() >> 26;                          \
            scale[i][1] = INT32_MIN;                            \
        }                                                       \
    } while (0)
 void checkasm_check_dcadsp(void)
 {
    DCADSPContext c;
@ -98,40 +88,5 @@ void checkasm_check_dcadsp(void)
    if (check_func(c.lfe_fir[1], "dca_lfe_fir1"))
        check_lfe_fir(64, 1.0e-6f);
    if (check_func(c.decode_hf,  "dca_decode_hf")) {
        LOCAL_ALIGNED_16(float,   dst0,   [DCA_SUBBANDS], [8]);
        LOCAL_ALIGNED_16(float,   dst1,   [DCA_SUBBANDS], [8]);
        LOCAL_ALIGNED_16(int32_t, scale,  [DCA_SUBBANDS], [2]);
        LOCAL_ALIGNED_16(int32_t, vq_num, [DCA_SUBBANDS]);
        intptr_t start, end = 32, offset;
        declare_func(void, float[DCA_SUBBANDS][8], const int32_t[DCA_SUBBANDS],
                     const int8_t[1024][DCA_SUBBANDS], intptr_t, int32_t[DCA_SUBBANDS][2],
                     intptr_t, intptr_t);
        for (start = 0; start < 32; start++) {
            for (offset = 0; offset < 32; offset += 8) {
                int j;
                for (j = 0; j < DCA_SUBBANDS; j++) {
                    memset(dst0[j], 0, sizeof(*(dst0[j])) * 8);
                    memset(dst1[j], 0, sizeof(*(dst1[j])) * 8);
                }
                randomize_decode_hf();
                call_ref(dst0, vq_num, ff_dca_high_freq_vq, offset, scale, start, end);
                call_new(dst1, vq_num, ff_dca_high_freq_vq, offset, scale, start, end);
                for (j = 0; j < 8 * DCA_SUBBANDS; j++) {
                    if (!float_near_ulp(dst0[j>>3][j&7], dst1[j>>3][j&7], 1)) {
                        fail();
                        break;
                    }
                }
                bench_new(dst1, vq_num, ff_dca_high_freq_vq, offset, scale, start, end);
            }
        }
    }
    report("dcadsp");
 }