mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-29 22:00:58 +02:00
dca: remove unused decode_hf function and quant_d tables
They were superseded with their integer equivalents. Rename integer decode_hf to decode_hf.
This commit is contained in:
parent
aebf07075f
commit
2008f76054
@ -41,12 +41,6 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
|
||||
float out[32], const float in[32],
|
||||
float scale);
|
||||
|
||||
void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8],
|
||||
const int32_t vq_num[DCA_SUBBANDS],
|
||||
const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||
int32_t scale[DCA_SUBBANDS][2],
|
||||
intptr_t start, intptr_t end);
|
||||
|
||||
av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
@ -54,7 +48,6 @@ av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
|
||||
if (have_neon(cpu_flags)) {
|
||||
s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
|
||||
s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
|
||||
s->decode_hf = ff_decode_hf_neon;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -21,66 +21,6 @@
|
||||
|
||||
#include "libavutil/aarch64/asm.S"
|
||||
|
||||
function ff_decode_hf_neon, export=1
|
||||
add x2, x2, x3
|
||||
add x0, x0, x5, lsl #5
|
||||
add x1, x1, x5, lsl #2
|
||||
add x4, x4, x5, lsl #3
|
||||
sub x6, x6, x5
|
||||
ldr w7, [x1], #4
|
||||
add x7, x2, x7, lsl #5
|
||||
subs x6, x6, #1
|
||||
b.eq 1f
|
||||
b.gt 2f
|
||||
ret
|
||||
2:
|
||||
ldr w8, [x1], #4
|
||||
subs x6, x6, #2
|
||||
add x8, x2, x8, lsl #5
|
||||
ld1 {v2.4s}, [x4], #16
|
||||
ld1 {v0.8b}, [x7]
|
||||
ld1 {v4.8b}, [x8]
|
||||
sxtl v3.8h, v0.8b
|
||||
sxtl v7.8h, v4.8b
|
||||
scvtf v2.4s, v2.4s, #4
|
||||
sxtl v0.4s, v3.4h
|
||||
sxtl2 v1.4s, v3.8h
|
||||
sxtl v4.4s, v7.4h
|
||||
sxtl2 v5.4s, v7.8h
|
||||
scvtf v0.4s, v0.4s
|
||||
scvtf v1.4s, v1.4s
|
||||
scvtf v4.4s, v4.4s
|
||||
scvtf v5.4s, v5.4s
|
||||
fmul v0.4s, v0.4s, v2.s[0]
|
||||
fmul v1.4s, v1.4s, v2.s[0]
|
||||
fmul v4.4s, v4.4s, v2.s[2]
|
||||
fmul v5.4s, v5.4s, v2.s[2]
|
||||
b.lt 10f
|
||||
|
||||
ldr w7, [x1], #4
|
||||
add x7, x2, x7, lsl #5
|
||||
st1 {v0.4s,v1.4s}, [x0], #32
|
||||
st1 {v4.4s,v5.4s}, [x0], #32
|
||||
b.gt 2b
|
||||
1:
|
||||
ldr w9, [x4]
|
||||
ld1 {v0.8b}, [x7]
|
||||
scvtf s2, w9, #4
|
||||
sxtl v3.8h, v0.8b
|
||||
sxtl v0.4s, v3.4h
|
||||
sxtl2 v1.4s, v3.8h
|
||||
scvtf v0.4s, v0.4s
|
||||
scvtf v1.4s, v1.4s
|
||||
fmul v0.4s, v0.4s, v2.s[0]
|
||||
fmul v1.4s, v1.4s, v2.s[0]
|
||||
st1 {v0.4s,v1.4s}, [x0]
|
||||
ret
|
||||
10:
|
||||
st1 {v0.4s,v1.4s}, [x0], #32
|
||||
st1 {v4.4s,v5.4s}, [x0]
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_dca_lfe_fir0_neon, export=1
|
||||
mov x3, #32 // decifactor
|
||||
sub x1, x1, #7*4
|
||||
|
@ -49,12 +49,6 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
|
||||
float out[32], const float in[32],
|
||||
float scale);
|
||||
|
||||
void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8],
|
||||
const int32_t vq_num[DCA_SUBBANDS],
|
||||
const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||
int32_t scale[DCA_SUBBANDS][2],
|
||||
intptr_t start, intptr_t end);
|
||||
|
||||
av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
@ -67,7 +61,6 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
|
||||
if (have_neon(cpu_flags)) {
|
||||
s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
|
||||
s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
|
||||
s->decode_hf = ff_decode_hf_neon;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -20,35 +20,6 @@
|
||||
|
||||
#include "libavutil/arm/asm.S"
|
||||
|
||||
function ff_decode_hf_neon, export=1
|
||||
push {r4-r5,lr}
|
||||
add r2, r2, r3
|
||||
ldr r3, [sp, #12]
|
||||
ldrd r4, r5, [sp, #16]
|
||||
add r3, r3, r4, lsl #3
|
||||
add r1, r1, r4, lsl #2
|
||||
add r0, r0, r4, lsl #5
|
||||
|
||||
1: ldr_post lr, r1, #4
|
||||
add r4, r4, #1
|
||||
add lr, r2, lr, lsl #5
|
||||
cmp r4, r5
|
||||
vld1.32 {d7}, [r3]!
|
||||
vld1.8 {d0}, [lr,:64]
|
||||
vcvt.f32.s32 d7, d7, #4
|
||||
vmovl.s8 q1, d0
|
||||
vmovl.s16 q0, d2
|
||||
vmovl.s16 q1, d3
|
||||
vcvt.f32.s32 q0, q0
|
||||
vcvt.f32.s32 q1, q1
|
||||
vmul.f32 q0, q0, d7[0]
|
||||
vmul.f32 q1, q1, d7[0]
|
||||
vst1.32 {q0-q1}, [r0,:128]!
|
||||
bne 1b
|
||||
|
||||
pop {r4-r5,pc}
|
||||
endfunc
|
||||
|
||||
function ff_dca_lfe_fir0_neon, export=1
|
||||
push {r4-r6,lr}
|
||||
mov r3, #32 @ decifactor
|
||||
|
@ -4187,13 +4187,6 @@ const uint32_t ff_dca_lossy_quant[32] = {
|
||||
84, 42, 21, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
const float ff_dca_lossy_quant_d[32] = {
|
||||
0, 1.6, 1.0, 0.8, 0.59, 0.50, 0.42, 0.34,
|
||||
0.19, 0.11, 0.06, 0.035, 0.019, 0.011, 0.0065, 0.0040,
|
||||
0.0025, 0.0014, 0.0008, 0.00045, 0.00030, 0.00017, 0.00008, 0.00004,
|
||||
0.00002, 0.00001, 0.000005, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
/* 20bits unsigned fractional binary codes */
|
||||
const uint32_t ff_dca_lossless_quant[32] = {
|
||||
0, 4194304, 2097152, 1384120, 1048576, 696254, 524288, 348127,
|
||||
@ -4202,14 +4195,6 @@ const uint32_t ff_dca_lossless_quant[32] = {
|
||||
4, 2, 1, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
const float ff_dca_lossless_quant_d[32] = {
|
||||
0, 1.0, 0.5, 0.33, 0.25, 0.166, 0.125,
|
||||
0.083, 0.0625, 0.03125, 0.0156, 7.874E-3, 3.922E-3, 1.957E-3,
|
||||
9.775E-4, 4.885E-4, 2.442E-4, 1.221E-4, 6.104E-5, 3.052E-5, 1.526E-5,
|
||||
7.629E-6, 3.815E-6, 1.907E-6, 9.537E-7, 4.768E-7, 2.384E-7, 0,
|
||||
0, 0, 0, 0
|
||||
};
|
||||
|
||||
/* Vector quantization tables */
|
||||
|
||||
DECLARE_ALIGNED(8, const int8_t, ff_dca_high_freq_vq)[1024][32] = {
|
||||
|
@ -35,10 +35,8 @@ extern const uint32_t ff_dca_scale_factor_quant6[64];
|
||||
extern const uint32_t ff_dca_scale_factor_quant7[128];
|
||||
|
||||
extern const uint32_t ff_dca_lossy_quant[32];
|
||||
extern const float ff_dca_lossy_quant_d[32];
|
||||
|
||||
extern const uint32_t ff_dca_lossless_quant[32];
|
||||
extern const float ff_dca_lossless_quant_d[32];
|
||||
|
||||
extern const int8_t ff_dca_high_freq_vq[1024][32];
|
||||
|
||||
|
@ -913,12 +913,12 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
|
||||
s->debug_flag |= 0x01;
|
||||
}
|
||||
|
||||
s->dcadsp.decode_hf_int(subband_samples, s->dca_chan[k].high_freq_vq,
|
||||
ff_dca_high_freq_vq, subsubframe * SAMPLES_PER_SUBBAND,
|
||||
s->dca_chan[k].scale_factor,
|
||||
s->audio_header.vq_start_subband[k],
|
||||
s->audio_header.subband_activity[k]);
|
||||
|
||||
s->dcadsp.decode_hf(subband_samples, s->dca_chan[k].high_freq_vq,
|
||||
ff_dca_high_freq_vq,
|
||||
subsubframe * SAMPLES_PER_SUBBAND,
|
||||
s->dca_chan[k].scale_factor,
|
||||
s->audio_header.vq_start_subband[k],
|
||||
s->audio_header.subband_activity[k]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -27,29 +27,11 @@
|
||||
#include "dcadsp.h"
|
||||
#include "dcamath.h"
|
||||
|
||||
static void decode_hf_c(float dst[DCA_SUBBANDS][8],
|
||||
static void decode_hf_c(int32_t dst[DCA_SUBBANDS][8],
|
||||
const int32_t vq_num[DCA_SUBBANDS],
|
||||
const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||
int32_t scale[DCA_SUBBANDS][2],
|
||||
intptr_t start, intptr_t end)
|
||||
{
|
||||
int i, l;
|
||||
|
||||
for (l = start; l < end; l++) {
|
||||
/* 1 vector -> 32 samples but we only need the 8 samples
|
||||
* for this subsubframe. */
|
||||
const int8_t *ptr = &hf_vq[vq_num[l]][vq_offset];
|
||||
float fscale = scale[l][0] * (1 / 16.0);
|
||||
for (i = 0; i < 8; i++)
|
||||
dst[l][i] = ptr[i] * fscale;
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_hf_int_c(int32_t dst[DCA_SUBBANDS][8],
|
||||
const int32_t vq_num[DCA_SUBBANDS],
|
||||
const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||
int32_t scale[DCA_SUBBANDS][2],
|
||||
intptr_t start, intptr_t end)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
@ -141,7 +123,6 @@ av_cold void ff_dcadsp_init(DCADSPContext *s)
|
||||
s->lfe_fir[1] = dca_lfe_fir1_c;
|
||||
s->qmf_32_subbands = dca_qmf_32_subbands;
|
||||
s->decode_hf = decode_hf_c;
|
||||
s->decode_hf_int = decode_hf_int_c;
|
||||
s->dequantize = dequantize_c;
|
||||
|
||||
if (ARCH_AARCH64)
|
||||
|
@ -32,17 +32,12 @@ typedef struct DCADSPContext {
|
||||
int *synth_buf_offset, float synth_buf2[32],
|
||||
const float window[512], float *samples_out,
|
||||
float raXin[32], float scale);
|
||||
void (*decode_hf)(float dst[DCA_SUBBANDS][8],
|
||||
void (*decode_hf)(int32_t dst[DCA_SUBBANDS][8],
|
||||
const int32_t vq_num[DCA_SUBBANDS],
|
||||
const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||
int32_t scale[DCA_SUBBANDS][2],
|
||||
intptr_t start, intptr_t end);
|
||||
void (*decode_hf_int)(int32_t dst[DCA_SUBBANDS][8],
|
||||
const int32_t vq_num[DCA_SUBBANDS],
|
||||
const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||
int32_t scale[DCA_SUBBANDS][2],
|
||||
intptr_t start, intptr_t end);
|
||||
void (*dequantize)(int32_t *samples, uint32_t step_size, uint64_t scale);
|
||||
void (*dequantize)(int32_t *samples, uint32_t step_size, uint32_t scale);
|
||||
} DCADSPContext;
|
||||
|
||||
void ff_dcadsp_init(DCADSPContext *s);
|
||||
|
@ -26,92 +26,6 @@ pf_inv16: times 4 dd 0x3D800000 ; 1/16
|
||||
|
||||
SECTION .text
|
||||
|
||||
; void decode_hf(float dst[DCA_SUBBANDS][8], const int32_t vq_num[DCA_SUBBANDS],
|
||||
; const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||
; int32_t scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end)
|
||||
|
||||
%macro DECODE_HF 0
|
||||
cglobal decode_hf, 6,6,5, dst, num, src, offset, scale, start, end
|
||||
lea srcq, [srcq + offsetq]
|
||||
shl startq, 2
|
||||
mov offsetd, endm
|
||||
%define DICT offsetq
|
||||
shl offsetq, 2
|
||||
mov endm, offsetq
|
||||
.loop:
|
||||
%if ARCH_X86_64
|
||||
mov offsetd, [scaleq + 2 * startq]
|
||||
cvtsi2ss m0, offsetd
|
||||
%else
|
||||
cvtsi2ss m0, [scaleq + 2 * startq]
|
||||
%endif
|
||||
mov offsetd, [numq + startq]
|
||||
mulss m0, [pf_inv16]
|
||||
shl DICT, 5
|
||||
shufps m0, m0, 0
|
||||
%if cpuflag(sse2)
|
||||
%if cpuflag(sse4)
|
||||
pmovsxbd m1, [srcq + DICT + 0]
|
||||
pmovsxbd m2, [srcq + DICT + 4]
|
||||
%else
|
||||
movq m1, [srcq + DICT]
|
||||
punpcklbw m1, m1
|
||||
mova m2, m1
|
||||
punpcklwd m1, m1
|
||||
punpckhwd m2, m2
|
||||
psrad m1, 24
|
||||
psrad m2, 24
|
||||
%endif
|
||||
cvtdq2ps m1, m1
|
||||
cvtdq2ps m2, m2
|
||||
%else
|
||||
movd mm0, [srcq + DICT + 0]
|
||||
movd mm1, [srcq + DICT + 4]
|
||||
punpcklbw mm0, mm0
|
||||
punpcklbw mm1, mm1
|
||||
movq mm2, mm0
|
||||
movq mm3, mm1
|
||||
punpcklwd mm0, mm0
|
||||
punpcklwd mm1, mm1
|
||||
punpckhwd mm2, mm2
|
||||
punpckhwd mm3, mm3
|
||||
psrad mm0, 24
|
||||
psrad mm1, 24
|
||||
psrad mm2, 24
|
||||
psrad mm3, 24
|
||||
cvtpi2ps m1, mm0
|
||||
cvtpi2ps m2, mm1
|
||||
cvtpi2ps m3, mm2
|
||||
cvtpi2ps m4, mm3
|
||||
shufps m0, m0, 0
|
||||
shufps m1, m3, q1010
|
||||
shufps m2, m4, q1010
|
||||
%endif
|
||||
mulps m1, m0
|
||||
mulps m2, m0
|
||||
mova [dstq + 8 * startq + 0], m1
|
||||
mova [dstq + 8 * startq + 16], m2
|
||||
add startq, 4
|
||||
cmp startq, endm
|
||||
jl .loop
|
||||
.end:
|
||||
%if notcpuflag(sse2)
|
||||
emms
|
||||
%endif
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
%if ARCH_X86_32
|
||||
INIT_XMM sse
|
||||
DECODE_HF
|
||||
%endif
|
||||
|
||||
INIT_XMM sse2
|
||||
DECODE_HF
|
||||
|
||||
INIT_XMM sse4
|
||||
DECODE_HF
|
||||
|
||||
; %1=v0/v1 %2=in1 %3=in2
|
||||
%macro FIR_LOOP 2-3
|
||||
.loop%1:
|
||||
|
@ -23,15 +23,6 @@
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavcodec/dcadsp.h"
|
||||
|
||||
void ff_decode_hf_sse(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
|
||||
const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||
int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
|
||||
void ff_decode_hf_sse2(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
|
||||
const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||
int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
|
||||
void ff_decode_hf_sse4(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
|
||||
const int8_t hf_vq[1024][32], intptr_t vq_offset,
|
||||
int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
|
||||
void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs);
|
||||
void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs);
|
||||
|
||||
@ -40,20 +31,9 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_SSE(cpu_flags)) {
|
||||
#if ARCH_X86_32
|
||||
s->decode_hf = ff_decode_hf_sse;
|
||||
#endif
|
||||
s->lfe_fir[0] = ff_dca_lfe_fir0_sse;
|
||||
s->lfe_fir[1] = ff_dca_lfe_fir1_sse;
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
s->decode_hf = ff_decode_hf_sse2;
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSE4(cpu_flags)) {
|
||||
s->decode_hf = ff_decode_hf_sse4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -75,16 +75,6 @@
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define randomize_decode_hf() \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < DCA_SUBBANDS; i++) { \
|
||||
vq_num[i] = rnd() >> 22; \
|
||||
scale[i][0] = rnd() >> 26; \
|
||||
scale[i][1] = INT32_MIN; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void checkasm_check_dcadsp(void)
|
||||
{
|
||||
DCADSPContext c;
|
||||
@ -98,40 +88,5 @@ void checkasm_check_dcadsp(void)
|
||||
if (check_func(c.lfe_fir[1], "dca_lfe_fir1"))
|
||||
check_lfe_fir(64, 1.0e-6f);
|
||||
|
||||
if (check_func(c.decode_hf, "dca_decode_hf")) {
|
||||
LOCAL_ALIGNED_16(float, dst0, [DCA_SUBBANDS], [8]);
|
||||
LOCAL_ALIGNED_16(float, dst1, [DCA_SUBBANDS], [8]);
|
||||
LOCAL_ALIGNED_16(int32_t, scale, [DCA_SUBBANDS], [2]);
|
||||
LOCAL_ALIGNED_16(int32_t, vq_num, [DCA_SUBBANDS]);
|
||||
intptr_t start, end = 32, offset;
|
||||
|
||||
declare_func(void, float[DCA_SUBBANDS][8], const int32_t[DCA_SUBBANDS],
|
||||
const int8_t[1024][DCA_SUBBANDS], intptr_t, int32_t[DCA_SUBBANDS][2],
|
||||
intptr_t, intptr_t);
|
||||
|
||||
for (start = 0; start < 32; start++) {
|
||||
for (offset = 0; offset < 32; offset += 8) {
|
||||
int j;
|
||||
for (j = 0; j < DCA_SUBBANDS; j++) {
|
||||
memset(dst0[j], 0, sizeof(*(dst0[j])) * 8);
|
||||
memset(dst1[j], 0, sizeof(*(dst1[j])) * 8);
|
||||
}
|
||||
randomize_decode_hf();
|
||||
|
||||
call_ref(dst0, vq_num, ff_dca_high_freq_vq, offset, scale, start, end);
|
||||
call_new(dst1, vq_num, ff_dca_high_freq_vq, offset, scale, start, end);
|
||||
|
||||
for (j = 0; j < 8 * DCA_SUBBANDS; j++) {
|
||||
if (!float_near_ulp(dst0[j>>3][j&7], dst1[j>>3][j&7], 1)) {
|
||||
fail();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bench_new(dst1, vq_num, ff_dca_high_freq_vq, offset, scale, start, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
report("dcadsp");
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user