mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-03-17 20:17:55 +02:00
dsputil: remove shift parameter from scalarproduct_int16
There is only one caller, which does not need the shifting. Other use cases are situations where different roundings would be needed. The x86 and neon versions are modified accordingly. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
This commit is contained in:
parent
dabf8dd34a
commit
7e1ce6a6ac
@ -106,7 +106,7 @@ int16_t ff_acelp_decode_gain_code(
|
|||||||
mr_energy += quant_energy[i] * ma_prediction_coeff[i];
|
mr_energy += quant_energy[i] * ma_prediction_coeff[i];
|
||||||
|
|
||||||
mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) /
|
mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) /
|
||||||
sqrt(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size, 0));
|
sqrt(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size));
|
||||||
return mr_energy >> 12;
|
return mr_energy >> 12;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,8 +171,7 @@ void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
|
|||||||
|
|
||||||
void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
|
void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
|
||||||
|
|
||||||
int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len,
|
int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len);
|
||||||
int shift);
|
|
||||||
int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2,
|
int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2,
|
||||||
const int16_t *v3, int len, int mul);
|
const int16_t *v3, int len, int mul);
|
||||||
|
|
||||||
|
@ -29,31 +29,7 @@ function ff_scalarproduct_int16_neon, export=1
|
|||||||
vmov.i16 q1, #0
|
vmov.i16 q1, #0
|
||||||
vmov.i16 q2, #0
|
vmov.i16 q2, #0
|
||||||
vmov.i16 q3, #0
|
vmov.i16 q3, #0
|
||||||
negs r3, r3
|
|
||||||
beq 2f
|
|
||||||
|
|
||||||
vdup.s32 q12, r3
|
|
||||||
1: vld1.16 {d16-d17}, [r0]!
|
1: vld1.16 {d16-d17}, [r0]!
|
||||||
vld1.16 {d20-d21}, [r1,:128]!
|
|
||||||
vmull.s16 q12, d16, d20
|
|
||||||
vld1.16 {d18-d19}, [r0]!
|
|
||||||
vmull.s16 q13, d17, d21
|
|
||||||
vld1.16 {d22-d23}, [r1,:128]!
|
|
||||||
vmull.s16 q14, d18, d22
|
|
||||||
vmull.s16 q15, d19, d23
|
|
||||||
vshl.s32 q8, q12, q12
|
|
||||||
vshl.s32 q9, q13, q12
|
|
||||||
vadd.s32 q0, q0, q8
|
|
||||||
vshl.s32 q10, q14, q12
|
|
||||||
vadd.s32 q1, q1, q9
|
|
||||||
vshl.s32 q11, q15, q12
|
|
||||||
vadd.s32 q2, q2, q10
|
|
||||||
vadd.s32 q3, q3, q11
|
|
||||||
subs r2, r2, #16
|
|
||||||
bne 1b
|
|
||||||
b 3f
|
|
||||||
|
|
||||||
2: vld1.16 {d16-d17}, [r0]!
|
|
||||||
vld1.16 {d20-d21}, [r1,:128]!
|
vld1.16 {d20-d21}, [r1,:128]!
|
||||||
vmlal.s16 q0, d16, d20
|
vmlal.s16 q0, d16, d20
|
||||||
vld1.16 {d18-d19}, [r0]!
|
vld1.16 {d18-d19}, [r0]!
|
||||||
@ -62,9 +38,9 @@ function ff_scalarproduct_int16_neon, export=1
|
|||||||
vmlal.s16 q2, d18, d22
|
vmlal.s16 q2, d18, d22
|
||||||
vmlal.s16 q3, d19, d23
|
vmlal.s16 q3, d19, d23
|
||||||
subs r2, r2, #16
|
subs r2, r2, #16
|
||||||
bne 2b
|
bne 1b
|
||||||
|
|
||||||
3: vpadd.s32 d16, d0, d1
|
vpadd.s32 d16, d0, d1
|
||||||
vpadd.s32 d17, d2, d3
|
vpadd.s32 d17, d2, d3
|
||||||
vpadd.s32 d10, d4, d5
|
vpadd.s32 d10, d4, d5
|
||||||
vpadd.s32 d11, d6, d7
|
vpadd.s32 d11, d6, d7
|
||||||
|
@ -2559,12 +2559,12 @@ static void vector_clipf_c(float *dst, const float *src, float min, float max, i
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
|
static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
|
||||||
{
|
{
|
||||||
int res = 0;
|
int res = 0;
|
||||||
|
|
||||||
while (order--)
|
while (order--)
|
||||||
res += (*v1++ * *v2++) >> shift;
|
res += *v1++ * *v2++;
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -536,9 +536,8 @@ typedef struct DSPContext {
|
|||||||
/**
|
/**
|
||||||
* Calculate scalar product of two vectors.
|
* Calculate scalar product of two vectors.
|
||||||
* @param len length of vectors, should be multiple of 16
|
* @param len length of vectors, should be multiple of 16
|
||||||
* @param shift number of bits to discard from product
|
|
||||||
*/
|
*/
|
||||||
int32_t (*scalarproduct_int16)(const int16_t *v1, const int16_t *v2/*align 16*/, int len, int shift);
|
int32_t (*scalarproduct_int16)(const int16_t *v1, const int16_t *v2/*align 16*/, int len);
|
||||||
/* ape functions */
|
/* ape functions */
|
||||||
/**
|
/**
|
||||||
* Calculate scalar product of v1 and v2,
|
* Calculate scalar product of v1 and v2,
|
||||||
|
@ -35,13 +35,12 @@ pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
|
|||||||
SECTION_TEXT
|
SECTION_TEXT
|
||||||
|
|
||||||
%macro SCALARPRODUCT 1
|
%macro SCALARPRODUCT 1
|
||||||
; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order, int shift)
|
; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
|
||||||
cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift
|
cglobal scalarproduct_int16_%1, 3,3,3, v1, v2, order
|
||||||
shl orderq, 1
|
shl orderq, 1
|
||||||
add v1q, orderq
|
add v1q, orderq
|
||||||
add v2q, orderq
|
add v2q, orderq
|
||||||
neg orderq
|
neg orderq
|
||||||
movd m3, shiftm
|
|
||||||
pxor m2, m2
|
pxor m2, m2
|
||||||
.loop:
|
.loop:
|
||||||
movu m0, [v1q + orderq]
|
movu m0, [v1q + orderq]
|
||||||
@ -55,10 +54,8 @@ cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift
|
|||||||
%if mmsize == 16
|
%if mmsize == 16
|
||||||
movhlps m0, m2
|
movhlps m0, m2
|
||||||
paddd m2, m0
|
paddd m2, m0
|
||||||
psrad m2, m3
|
|
||||||
pshuflw m0, m2, 0x4e
|
pshuflw m0, m2, 0x4e
|
||||||
%else
|
%else
|
||||||
psrad m2, m3
|
|
||||||
pshufw m0, m2, 0x4e
|
pshufw m0, m2, 0x4e
|
||||||
%endif
|
%endif
|
||||||
paddd m2, m0
|
paddd m2, m0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user