mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
Remove unneeded add bias from 3 functions.
DSPContext.vector_fmul_window() DCADSPContext.lfe_fir() SynthFilterContext.synth_filter_float() Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
parent
8cb3c557a9
commit
80ba1ddb58
@ -1721,19 +1721,19 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
|
|||||||
*/
|
*/
|
||||||
if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
|
if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
|
||||||
(ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
|
(ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
|
||||||
ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, 0, 512);
|
ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512);
|
||||||
} else {
|
} else {
|
||||||
memcpy( out, saved, 448 * sizeof(float));
|
memcpy( out, saved, 448 * sizeof(float));
|
||||||
|
|
||||||
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
|
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
|
||||||
ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 0, 64);
|
ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64);
|
||||||
ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 0, 64);
|
ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64);
|
||||||
ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 0, 64);
|
ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64);
|
||||||
ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 0, 64);
|
ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64);
|
||||||
ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 0, 64);
|
ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64);
|
||||||
memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
|
memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
|
||||||
} else {
|
} else {
|
||||||
ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 0, 64);
|
ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64);
|
||||||
memcpy( out + 576, buf + 64, 448 * sizeof(float));
|
memcpy( out + 576, buf + 64, 448 * sizeof(float));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1741,9 +1741,9 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
|
|||||||
// buffer update
|
// buffer update
|
||||||
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
|
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
|
||||||
memcpy( saved, temp + 64, 64 * sizeof(float));
|
memcpy( saved, temp + 64, 64 * sizeof(float));
|
||||||
ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
|
ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64);
|
||||||
ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
|
ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
|
||||||
ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
|
ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
|
||||||
memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
|
memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
|
||||||
} else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
|
} else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
|
||||||
memcpy( saved, buf + 512, 448 * sizeof(float));
|
memcpy( saved, buf + 512, 448 * sizeof(float));
|
||||||
|
@ -628,13 +628,13 @@ static inline void do_imdct(AC3DecodeContext *s, int channels)
|
|||||||
for(i=0; i<128; i++)
|
for(i=0; i<128; i++)
|
||||||
x[i] = s->transform_coeffs[ch][2*i];
|
x[i] = s->transform_coeffs[ch][2*i];
|
||||||
ff_imdct_half(&s->imdct_256, s->tmp_output, x);
|
ff_imdct_half(&s->imdct_256, s->tmp_output, x);
|
||||||
s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 0, 128);
|
s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128);
|
||||||
for(i=0; i<128; i++)
|
for(i=0; i<128; i++)
|
||||||
x[i] = s->transform_coeffs[ch][2*i+1];
|
x[i] = s->transform_coeffs[ch][2*i+1];
|
||||||
ff_imdct_half(&s->imdct_256, s->delay[ch-1], x);
|
ff_imdct_half(&s->imdct_256, s->delay[ch-1], x);
|
||||||
} else {
|
} else {
|
||||||
ff_imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
|
ff_imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
|
||||||
s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 0, 128);
|
s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128);
|
||||||
memcpy(s->delay[ch-1], s->tmp_output+128, 128*sizeof(float));
|
memcpy(s->delay[ch-1], s->tmp_output+128, 128*sizeof(float));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
#include "libavcodec/dcadsp.h"
|
#include "libavcodec/dcadsp.h"
|
||||||
|
|
||||||
void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs,
|
void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs,
|
||||||
int decifactor, float scale, float bias);
|
int decifactor, float scale);
|
||||||
|
|
||||||
void av_cold ff_dcadsp_init_arm(DCADSPContext *s)
|
void av_cold ff_dcadsp_init_arm(DCADSPContext *s)
|
||||||
{
|
{
|
||||||
|
@ -29,7 +29,7 @@ function ff_dca_lfe_fir_neon, export=1
|
|||||||
cmp r3, #32
|
cmp r3, #32
|
||||||
moveq r6, #256/32
|
moveq r6, #256/32
|
||||||
movne r6, #256/64
|
movne r6, #256/64
|
||||||
NOVFP vldr d0, [sp, #16] @ scale, bias
|
NOVFP vldr s0, [sp, #16] @ scale
|
||||||
mov lr, #-16
|
mov lr, #-16
|
||||||
1:
|
1:
|
||||||
vmov.f32 q2, #0.0 @ v0
|
vmov.f32 q2, #0.0 @ v0
|
||||||
@ -51,8 +51,7 @@ NOVFP vldr d0, [sp, #16] @ scale, bias
|
|||||||
vadd.f32 d4, d4, d5
|
vadd.f32 d4, d4, d5
|
||||||
vadd.f32 d6, d6, d7
|
vadd.f32 d6, d6, d7
|
||||||
vpadd.f32 d4, d4, d6
|
vpadd.f32 d4, d4, d6
|
||||||
vdup.32 d5, d0[1]
|
vmul.f32 d5, d4, d0[0]
|
||||||
vmla.f32 d5, d4, d0[0]
|
|
||||||
vst1.32 {d5[0]}, [r0,:32]!
|
vst1.32 {d5[0]}, [r0,:32]!
|
||||||
vst1.32 {d5[1]}, [r4,:32]!
|
vst1.32 {d5[1]}, [r4,:32]!
|
||||||
bne 1b
|
bne 1b
|
||||||
|
@ -140,8 +140,7 @@ void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *);
|
|||||||
|
|
||||||
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
|
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
|
||||||
void ff_vector_fmul_window_neon(float *dst, const float *src0,
|
void ff_vector_fmul_window_neon(float *dst, const float *src0,
|
||||||
const float *src1, const float *win,
|
const float *src1, const float *win, int len);
|
||||||
float add_bias, int len);
|
|
||||||
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
|
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
|
||||||
int len);
|
int len);
|
||||||
void ff_vector_fmul_sv_scalar_2_neon(float *dst, const float *src,
|
void ff_vector_fmul_sv_scalar_2_neon(float *dst, const float *src,
|
||||||
|
@ -777,11 +777,8 @@ function ff_vector_fmul_neon, export=1
|
|||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
function ff_vector_fmul_window_neon, export=1
|
function ff_vector_fmul_window_neon, export=1
|
||||||
VFP vdup.32 q8, d0[0]
|
|
||||||
NOVFP vld1.32 {d16[],d17[]}, [sp,:32]
|
|
||||||
push {r4,r5,lr}
|
push {r4,r5,lr}
|
||||||
VFP ldr lr, [sp, #12]
|
ldr lr, [sp, #12]
|
||||||
NOVFP ldr lr, [sp, #16]
|
|
||||||
sub r2, r2, #8
|
sub r2, r2, #8
|
||||||
sub r5, lr, #2
|
sub r5, lr, #2
|
||||||
add r2, r2, r5, lsl #2
|
add r2, r2, r5, lsl #2
|
||||||
@ -793,14 +790,12 @@ NOVFP ldr lr, [sp, #16]
|
|||||||
vld1.64 {d4,d5}, [r3,:128]!
|
vld1.64 {d4,d5}, [r3,:128]!
|
||||||
vld1.64 {d6,d7}, [r4,:128], r5
|
vld1.64 {d6,d7}, [r4,:128], r5
|
||||||
1: subs lr, lr, #4
|
1: subs lr, lr, #4
|
||||||
vmov q11, q8
|
vmul.f32 d22, d0, d4
|
||||||
vmla.f32 d22, d0, d4
|
|
||||||
vmov q10, q8
|
|
||||||
vmla.f32 d23, d1, d5
|
|
||||||
vrev64.32 q3, q3
|
vrev64.32 q3, q3
|
||||||
vmla.f32 d20, d0, d7
|
vmul.f32 d23, d1, d5
|
||||||
vrev64.32 q1, q1
|
vrev64.32 q1, q1
|
||||||
vmla.f32 d21, d1, d6
|
vmul.f32 d20, d0, d7
|
||||||
|
vmul.f32 d21, d1, d6
|
||||||
beq 2f
|
beq 2f
|
||||||
vmla.f32 d22, d3, d7
|
vmla.f32 d22, d3, d7
|
||||||
vld1.64 {d0,d1}, [r1,:128]!
|
vld1.64 {d0,d1}, [r1,:128]!
|
||||||
|
@ -34,7 +34,7 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
|
|||||||
float *synth_buf_ptr, int *synth_buf_offset,
|
float *synth_buf_ptr, int *synth_buf_offset,
|
||||||
float synth_buf2[32], const float window[512],
|
float synth_buf2[32], const float window[512],
|
||||||
float out[32], const float in[32],
|
float out[32], const float in[32],
|
||||||
float scale, float bias);
|
float scale);
|
||||||
|
|
||||||
av_cold void ff_fft_init_arm(FFTContext *s)
|
av_cold void ff_fft_init_arm(FFTContext *s)
|
||||||
{
|
{
|
||||||
|
@ -42,7 +42,7 @@ VFP vpop {d0}
|
|||||||
|
|
||||||
ldr r5, [sp, #9*4] @ window
|
ldr r5, [sp, #9*4] @ window
|
||||||
ldr r2, [sp, #10*4] @ out
|
ldr r2, [sp, #10*4] @ out
|
||||||
NOVFP vldr d0, [sp, #12*4] @ scale, bias
|
NOVFP vldr s0, [sp, #12*4] @ scale
|
||||||
add r8, r9, #12*4
|
add r8, r9, #12*4
|
||||||
|
|
||||||
mov lr, #64*4
|
mov lr, #64*4
|
||||||
@ -90,10 +90,8 @@ NOVFP vldr d0, [sp, #12*4] @ scale, bias
|
|||||||
sub r11, r11, #512*4
|
sub r11, r11, #512*4
|
||||||
b 2b
|
b 2b
|
||||||
3:
|
3:
|
||||||
vdup.32 q8, d0[1]
|
vmul.f32 q8, q10, d0[0]
|
||||||
vdup.32 q9, d0[1]
|
vmul.f32 q9, q1, d0[0]
|
||||||
vmla.f32 q8, q10, d0[0]
|
|
||||||
vmla.f32 q9, q1, d0[0]
|
|
||||||
vst1.32 {q3}, [r3,:128]
|
vst1.32 {q3}, [r3,:128]
|
||||||
sub r3, r3, #16*4
|
sub r3, r3, #16*4
|
||||||
vst1.32 {q2}, [r3,:128]
|
vst1.32 {q2}, [r3,:128]
|
||||||
|
@ -141,7 +141,7 @@ static int at1_imdct_block(AT1SUCtx* su, AT1Ctx *q)
|
|||||||
|
|
||||||
/* overlap and window */
|
/* overlap and window */
|
||||||
q->dsp.vector_fmul_window(&q->bands[band_num][start_pos], prev_buf,
|
q->dsp.vector_fmul_window(&q->bands[band_num][start_pos], prev_buf,
|
||||||
&su->spectrum[0][ref_pos + start_pos], ff_sine_32, 0, 16);
|
&su->spectrum[0][ref_pos + start_pos], ff_sine_32, 16);
|
||||||
|
|
||||||
prev_buf = &su->spectrum[0][ref_pos+start_pos + 16];
|
prev_buf = &su->spectrum[0][ref_pos+start_pos + 16];
|
||||||
start_pos += block_size;
|
start_pos += block_size;
|
||||||
|
@ -896,7 +896,7 @@ static void qmf_32_subbands(DCAContext * s, int chans,
|
|||||||
s->synth.synth_filter_float(&s->imdct,
|
s->synth.synth_filter_float(&s->imdct,
|
||||||
s->subband_fir_hist[chans], &s->hist_index[chans],
|
s->subband_fir_hist[chans], &s->hist_index[chans],
|
||||||
s->subband_fir_noidea[chans], prCoeff,
|
s->subband_fir_noidea[chans], prCoeff,
|
||||||
samples_out, s->raXin, scale, 0);
|
samples_out, s->raXin, scale);
|
||||||
samples_out+= 32;
|
samples_out+= 32;
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -929,7 +929,7 @@ static void lfe_interpolation_fir(DCAContext *s, int decimation_select,
|
|||||||
/* Interpolation */
|
/* Interpolation */
|
||||||
for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
|
for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
|
||||||
s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor,
|
s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor,
|
||||||
scale, 0);
|
scale);
|
||||||
samples_in++;
|
samples_in++;
|
||||||
samples_out += 2 * decifactor;
|
samples_out += 2 * decifactor;
|
||||||
}
|
}
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
#include "dcadsp.h"
|
#include "dcadsp.h"
|
||||||
|
|
||||||
static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
|
static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
|
||||||
int decifactor, float scale, float bias)
|
int decifactor, float scale)
|
||||||
{
|
{
|
||||||
float *out2 = out + decifactor;
|
float *out2 = out + decifactor;
|
||||||
const float *cf0 = coefs;
|
const float *cf0 = coefs;
|
||||||
@ -39,8 +39,8 @@ static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
|
|||||||
v0 += s * *cf0++;
|
v0 += s * *cf0++;
|
||||||
v1 += s * *--cf1;
|
v1 += s * *--cf1;
|
||||||
}
|
}
|
||||||
*out++ = (v0 * scale) + bias;
|
*out++ = v0 * scale;
|
||||||
*out2++ = (v1 * scale) + bias;
|
*out2++ = v1 * scale;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
typedef struct DCADSPContext {
|
typedef struct DCADSPContext {
|
||||||
void (*lfe_fir)(float *out, const float *in, const float *coefs,
|
void (*lfe_fir)(float *out, const float *in, const float *coefs,
|
||||||
int decifactor, float scale, float bias);
|
int decifactor, float scale);
|
||||||
} DCADSPContext;
|
} DCADSPContext;
|
||||||
|
|
||||||
void ff_dcadsp_init(DCADSPContext *s);
|
void ff_dcadsp_init(DCADSPContext *s);
|
||||||
|
@ -3776,7 +3776,9 @@ static void vector_fmul_add_c(float *dst, const float *src0, const float *src1,
|
|||||||
dst[i] = src0[i] * src1[i] + src2[i];
|
dst[i] = src0[i] * src1[i] + src2[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
|
static void vector_fmul_window_c(float *dst, const float *src0,
|
||||||
|
const float *src1, const float *win, int len)
|
||||||
|
{
|
||||||
int i,j;
|
int i,j;
|
||||||
dst += len;
|
dst += len;
|
||||||
win += len;
|
win += len;
|
||||||
@ -3786,8 +3788,8 @@ void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, c
|
|||||||
float s1 = src1[j];
|
float s1 = src1[j];
|
||||||
float wi = win[i];
|
float wi = win[i];
|
||||||
float wj = win[j];
|
float wj = win[j];
|
||||||
dst[i] = s0*wj - s1*wi + add_bias;
|
dst[i] = s0*wj - s1*wi;
|
||||||
dst[j] = s0*wi + s1*wj + add_bias;
|
dst[j] = s0*wi + s1*wj;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4434,7 +4436,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->vector_fmul = vector_fmul_c;
|
c->vector_fmul = vector_fmul_c;
|
||||||
c->vector_fmul_reverse = vector_fmul_reverse_c;
|
c->vector_fmul_reverse = vector_fmul_reverse_c;
|
||||||
c->vector_fmul_add = vector_fmul_add_c;
|
c->vector_fmul_add = vector_fmul_add_c;
|
||||||
c->vector_fmul_window = ff_vector_fmul_window_c;
|
c->vector_fmul_window = vector_fmul_window_c;
|
||||||
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
|
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
|
||||||
c->vector_clipf = vector_clipf_c;
|
c->vector_clipf = vector_clipf_c;
|
||||||
c->float_to_int16 = ff_float_to_int16_c;
|
c->float_to_int16 = ff_float_to_int16_c;
|
||||||
|
@ -68,9 +68,6 @@ void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul);
|
|||||||
void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
|
void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
|
||||||
void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
|
void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
|
||||||
|
|
||||||
void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
|
|
||||||
const float *win, float add_bias, int len);
|
|
||||||
|
|
||||||
/* encoding scans */
|
/* encoding scans */
|
||||||
extern const uint8_t ff_alternate_horizontal_scan[64];
|
extern const uint8_t ff_alternate_horizontal_scan[64];
|
||||||
extern const uint8_t ff_alternate_vertical_scan[64];
|
extern const uint8_t ff_alternate_vertical_scan[64];
|
||||||
@ -393,7 +390,7 @@ typedef struct DSPContext {
|
|||||||
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
|
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
|
||||||
void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
|
void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
|
||||||
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
|
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
|
||||||
void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
|
void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len);
|
||||||
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
|
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
|
||||||
void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
|
void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
|
||||||
void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
|
void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
|
||||||
|
@ -90,13 +90,9 @@ static void vector_fmul_add_altivec(float *dst, const float *src0,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len)
|
static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, int len)
|
||||||
{
|
{
|
||||||
union {
|
vector float zero, t0, t1, s0, s1, wi, wj;
|
||||||
vector float v;
|
|
||||||
float s[4];
|
|
||||||
} vadd;
|
|
||||||
vector float vadd_bias, zero, t0, t1, s0, s1, wi, wj;
|
|
||||||
const vector unsigned char reverse = vcprm(3,2,1,0);
|
const vector unsigned char reverse = vcprm(3,2,1,0);
|
||||||
int i,j;
|
int i,j;
|
||||||
|
|
||||||
@ -104,8 +100,6 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa
|
|||||||
win += len;
|
win += len;
|
||||||
src0+= len;
|
src0+= len;
|
||||||
|
|
||||||
vadd.s[0] = add_bias;
|
|
||||||
vadd_bias = vec_splat(vadd.v, 0);
|
|
||||||
zero = (vector float)vec_splat_u32(0);
|
zero = (vector float)vec_splat_u32(0);
|
||||||
|
|
||||||
for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {
|
for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {
|
||||||
@ -117,9 +111,9 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa
|
|||||||
s1 = vec_perm(s1, s1, reverse);
|
s1 = vec_perm(s1, s1, reverse);
|
||||||
wj = vec_perm(wj, wj, reverse);
|
wj = vec_perm(wj, wj, reverse);
|
||||||
|
|
||||||
t0 = vec_madd(s0, wj, vadd_bias);
|
t0 = vec_madd(s0, wj, zero);
|
||||||
t0 = vec_nmsub(s1, wi, t0);
|
t0 = vec_nmsub(s1, wi, t0);
|
||||||
t1 = vec_madd(s0, wi, vadd_bias);
|
t1 = vec_madd(s0, wi, zero);
|
||||||
t1 = vec_madd(s1, wj, t1);
|
t1 = vec_madd(s1, wj, t1);
|
||||||
t1 = vec_perm(t1, t1, reverse);
|
t1 = vec_perm(t1, t1, reverse);
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@
|
|||||||
static void synth_filter_float(FFTContext *imdct,
|
static void synth_filter_float(FFTContext *imdct,
|
||||||
float *synth_buf_ptr, int *synth_buf_offset,
|
float *synth_buf_ptr, int *synth_buf_offset,
|
||||||
float synth_buf2[32], const float window[512],
|
float synth_buf2[32], const float window[512],
|
||||||
float out[32], const float in[32], float scale, float bias)
|
float out[32], const float in[32], float scale)
|
||||||
{
|
{
|
||||||
float *synth_buf= synth_buf_ptr + *synth_buf_offset;
|
float *synth_buf= synth_buf_ptr + *synth_buf_offset;
|
||||||
int i, j;
|
int i, j;
|
||||||
@ -48,8 +48,8 @@ static void synth_filter_float(FFTContext *imdct,
|
|||||||
c += window[i + j + 32]*( synth_buf[16 + i + j - 512]);
|
c += window[i + j + 32]*( synth_buf[16 + i + j - 512]);
|
||||||
d += window[i + j + 48]*( synth_buf[31 - i + j - 512]);
|
d += window[i + j + 48]*( synth_buf[31 - i + j - 512]);
|
||||||
}
|
}
|
||||||
out[i ] = a*scale + bias;
|
out[i ] = a*scale;
|
||||||
out[i + 16] = b*scale + bias;
|
out[i + 16] = b*scale;
|
||||||
synth_buf2[i ] = c;
|
synth_buf2[i ] = c;
|
||||||
synth_buf2[i + 16] = d;
|
synth_buf2[i + 16] = d;
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,7 @@ typedef struct SynthFilterContext {
|
|||||||
float *synth_buf_ptr, int *synth_buf_offset,
|
float *synth_buf_ptr, int *synth_buf_offset,
|
||||||
float synth_buf2[32], const float window[512],
|
float synth_buf2[32], const float window[512],
|
||||||
float out[32], const float in[32],
|
float out[32], const float in[32],
|
||||||
float scale, float bias);
|
float scale);
|
||||||
} SynthFilterContext;
|
} SynthFilterContext;
|
||||||
|
|
||||||
void ff_synth_filter_init(SynthFilterContext *c);
|
void ff_synth_filter_init(SynthFilterContext *c);
|
||||||
|
@ -646,7 +646,6 @@ static void imdct_and_window(TwinContext *tctx, enum FrameType ftype, int wtype,
|
|||||||
prev_buf + (bsize-wsize)/2,
|
prev_buf + (bsize-wsize)/2,
|
||||||
buf1 + bsize*j,
|
buf1 + bsize*j,
|
||||||
ff_sine_windows[av_log2(wsize)],
|
ff_sine_windows[av_log2(wsize)],
|
||||||
0.0,
|
|
||||||
wsize/2);
|
wsize/2);
|
||||||
out2 += wsize;
|
out2 += wsize;
|
||||||
|
|
||||||
|
@ -1575,13 +1575,13 @@ static int vorbis_parse_audio_packet(vorbis_context *vc)
|
|||||||
const float *win = vc->win[blockflag & previous_window];
|
const float *win = vc->win[blockflag & previous_window];
|
||||||
|
|
||||||
if (blockflag == previous_window) {
|
if (blockflag == previous_window) {
|
||||||
vc->dsp.vector_fmul_window(ret, saved, buf, win, 0, blocksize / 4);
|
vc->dsp.vector_fmul_window(ret, saved, buf, win, blocksize / 4);
|
||||||
} else if (blockflag > previous_window) {
|
} else if (blockflag > previous_window) {
|
||||||
vc->dsp.vector_fmul_window(ret, saved, buf, win, 0, bs0 / 4);
|
vc->dsp.vector_fmul_window(ret, saved, buf, win, bs0 / 4);
|
||||||
memcpy(ret+bs0/2, buf+bs0/4, ((bs1-bs0)/4) * sizeof(float));
|
memcpy(ret+bs0/2, buf+bs0/4, ((bs1-bs0)/4) * sizeof(float));
|
||||||
} else {
|
} else {
|
||||||
memcpy(ret, saved, ((bs1 - bs0) / 4) * sizeof(float));
|
memcpy(ret, saved, ((bs1 - bs0) / 4) * sizeof(float));
|
||||||
vc->dsp.vector_fmul_window(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, 0, bs0 / 4);
|
vc->dsp.vector_fmul_window(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, bs0 / 4);
|
||||||
}
|
}
|
||||||
memcpy(saved, buf + blocksize / 4, blocksize / 4 * sizeof(float));
|
memcpy(saved, buf + blocksize / 4, blocksize / 4 * sizeof(float));
|
||||||
}
|
}
|
||||||
|
@ -1031,7 +1031,7 @@ static void wmapro_window(WMAProDecodeCtx *s)
|
|||||||
winlen >>= 1;
|
winlen >>= 1;
|
||||||
|
|
||||||
s->dsp.vector_fmul_window(start, start, start + winlen,
|
s->dsp.vector_fmul_window(start, start, start + winlen,
|
||||||
window, 0, winlen);
|
window, winlen);
|
||||||
|
|
||||||
s->channel[c].prev_block_len = s->subframe_len;
|
s->channel[c].prev_block_len = s->subframe_len;
|
||||||
}
|
}
|
||||||
|
@ -2190,10 +2190,9 @@ static void vector_fmul_add_sse(float *dst, const float *src0, const float *src1
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
|
|
||||||
const float *win, float add_bias, int len){
|
|
||||||
#if HAVE_6REGS
|
#if HAVE_6REGS
|
||||||
if(add_bias == 0){
|
static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
|
||||||
|
const float *win, int len){
|
||||||
x86_reg i = -len*4;
|
x86_reg i = -len*4;
|
||||||
x86_reg j = len*4-8;
|
x86_reg j = len*4-8;
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
@ -2220,15 +2219,10 @@ static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float
|
|||||||
:"+r"(i), "+r"(j)
|
:"+r"(i), "+r"(j)
|
||||||
:"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
|
:"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
|
||||||
);
|
);
|
||||||
}else
|
|
||||||
#endif
|
|
||||||
ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1,
|
static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1,
|
||||||
const float *win, float add_bias, int len){
|
const float *win, int len){
|
||||||
#if HAVE_6REGS
|
|
||||||
if(add_bias == 0){
|
|
||||||
x86_reg i = -len*4;
|
x86_reg i = -len*4;
|
||||||
x86_reg j = len*4-16;
|
x86_reg j = len*4-16;
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
@ -2256,10 +2250,8 @@ static void vector_fmul_window_sse(float *dst, const float *src0, const float *s
|
|||||||
:"+r"(i), "+r"(j)
|
:"+r"(i), "+r"(j)
|
||||||
:"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
|
:"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
|
||||||
);
|
);
|
||||||
}else
|
|
||||||
#endif
|
|
||||||
ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
|
|
||||||
}
|
}
|
||||||
|
#endif /* HAVE_6REGS */
|
||||||
|
|
||||||
static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len)
|
static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len)
|
||||||
{
|
{
|
||||||
@ -2882,7 +2874,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
}
|
}
|
||||||
if(mm_flags & AV_CPU_FLAG_3DNOWEXT){
|
if(mm_flags & AV_CPU_FLAG_3DNOWEXT){
|
||||||
c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
|
c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
|
||||||
|
#if HAVE_6REGS
|
||||||
c->vector_fmul_window = vector_fmul_window_3dnow2;
|
c->vector_fmul_window = vector_fmul_window_3dnow2;
|
||||||
|
#endif
|
||||||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
||||||
c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
|
c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
|
||||||
}
|
}
|
||||||
@ -2899,7 +2893,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->vector_fmul = vector_fmul_sse;
|
c->vector_fmul = vector_fmul_sse;
|
||||||
c->vector_fmul_reverse = vector_fmul_reverse_sse;
|
c->vector_fmul_reverse = vector_fmul_reverse_sse;
|
||||||
c->vector_fmul_add = vector_fmul_add_sse;
|
c->vector_fmul_add = vector_fmul_add_sse;
|
||||||
|
#if HAVE_6REGS
|
||||||
c->vector_fmul_window = vector_fmul_window_sse;
|
c->vector_fmul_window = vector_fmul_window_sse;
|
||||||
|
#endif
|
||||||
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
|
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
|
||||||
c->vector_clipf = vector_clipf_sse;
|
c->vector_clipf = vector_clipf_sse;
|
||||||
c->float_to_int16 = float_to_int16_sse;
|
c->float_to_int16 = float_to_int16_sse;
|
||||||
|
Loading…
Reference in New Issue
Block a user