mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
celp: optimise ff_celp_lp_synthesis_filter()
Adding instead of subtracting the products in the loop allows the compiler to generate more efficient multiply-accumulate instructions when 16-bit multiply-subtract is not available. ARM has only multiply-accumulate for 16-bit operands. In general, if only one variant exists, it is usually accumulate rather than subtract. In the same spirit, using the dedicated saturation function enables use of any special optimised versions of this. Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
parent
6c4975eaaf
commit
fddc5b9bea
@ -63,17 +63,16 @@ int ff_celp_lp_synthesis_filter(int16_t *out, const int16_t *filter_coeffs,
|
||||
int i,n;
|
||||
|
||||
for (n = 0; n < buffer_length; n++) {
|
||||
int sum = rounder;
|
||||
int sum = -rounder, sum1;
|
||||
for (i = 1; i <= filter_length; i++)
|
||||
sum -= filter_coeffs[i-1] * out[n-i];
|
||||
sum += filter_coeffs[i-1] * out[n-i];
|
||||
|
||||
sum = ((sum >> 12) + in[n]) >> shift;
|
||||
sum1 = ((-sum >> 12) + in[n]) >> shift;
|
||||
sum = av_clip_int16(sum1);
|
||||
|
||||
if (stop_on_overflow && sum != sum1)
|
||||
return 1;
|
||||
|
||||
if (sum + 0x8000 > 0xFFFFU) {
|
||||
if (stop_on_overflow)
|
||||
return 1;
|
||||
sum = (sum >> 31) ^ 32767;
|
||||
}
|
||||
out[n] = sum;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user