From 80ba1ddb58b5923b9f36a6acd542affc4ca722eb Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Mon, 31 Jan 2011 19:26:02 +0000 Subject: [PATCH] Remove unneeded add bias from 3 functions. DSPContext.vector_fmul_window() DCADSPContext.lfe_fir() SynthFilterContext.synth_filter_float() Signed-off-by: Mans Rullgard --- libavcodec/aacdec.c | 20 ++++++++++---------- libavcodec/ac3dec.c | 4 ++-- libavcodec/arm/dcadsp_init_arm.c | 2 +- libavcodec/arm/dcadsp_neon.S | 5 ++--- libavcodec/arm/dsputil_init_neon.c | 3 +-- libavcodec/arm/dsputil_neon.S | 15 +++++---------- libavcodec/arm/fft_init_arm.c | 2 +- libavcodec/arm/synth_filter_neon.S | 8 +++----- libavcodec/atrac1.c | 2 +- libavcodec/dca.c | 4 ++-- libavcodec/dcadsp.c | 6 +++--- libavcodec/dcadsp.h | 2 +- libavcodec/dsputil.c | 10 ++++++---- libavcodec/dsputil.h | 5 +---- libavcodec/ppc/float_altivec.c | 14 ++++---------- libavcodec/synth_filter.c | 6 +++--- libavcodec/synth_filter.h | 2 +- libavcodec/twinvq.c | 1 - libavcodec/vorbis_dec.c | 6 +++--- libavcodec/wmaprodec.c | 2 +- libavcodec/x86/dsputil_mmx.c | 20 ++++++++------------ 21 files changed, 59 insertions(+), 80 deletions(-) diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c index 21270994da..0ea7dc84a5 100644 --- a/libavcodec/aacdec.c +++ b/libavcodec/aacdec.c @@ -1721,19 +1721,19 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce) */ if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { - ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, 0, 512); + ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512); } else { memcpy( out, saved, 448 * sizeof(float)); if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { - ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 0, 64); - ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 0, 64); - ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 0, 64); - ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 0, 64); - ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 0, 64); + ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64); + ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64); + ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64); + ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64); + ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64); memcpy( out + 448 + 4*128, temp, 64 * sizeof(float)); } else { - ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 0, 64); + ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64); memcpy( out + 576, buf + 64, 448 * sizeof(float)); } } @@ -1741,9 +1741,9 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce) // buffer update if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { memcpy( saved, temp + 64, 64 * sizeof(float)); - ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64); - ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64); - ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64); + ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64); + ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64); + ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64); memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { memcpy( saved, buf + 512, 448 * sizeof(float)); diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c index ab738771b5..8e40ce1ccc 100644 --- a/libavcodec/ac3dec.c +++ b/libavcodec/ac3dec.c @@ -628,13 +628,13 @@ static inline void do_imdct(AC3DecodeContext *s, int channels) for(i=0; i<128; i++) x[i] = s->transform_coeffs[ch][2*i]; ff_imdct_half(&s->imdct_256, s->tmp_output, x); - s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 0, 128); + s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128); for(i=0; i<128; i++) x[i] = s->transform_coeffs[ch][2*i+1]; ff_imdct_half(&s->imdct_256, s->delay[ch-1], x); } else { ff_imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]); - s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 0, 128); + s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128); memcpy(s->delay[ch-1], s->tmp_output+128, 128*sizeof(float)); } } diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c index 816718d483..5663cd7fc2 100644 --- a/libavcodec/arm/dcadsp_init_arm.c +++ b/libavcodec/arm/dcadsp_init_arm.c @@ -23,7 +23,7 @@ #include "libavcodec/dcadsp.h" void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs, - int decifactor, float scale, float bias); + int decifactor, float scale); void av_cold ff_dcadsp_init_arm(DCADSPContext *s) { diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S index 19960ab193..c3bddd3e41 100644 --- a/libavcodec/arm/dcadsp_neon.S +++ b/libavcodec/arm/dcadsp_neon.S @@ -29,7 +29,7 @@ function ff_dca_lfe_fir_neon, export=1 cmp r3, #32 moveq r6, #256/32 movne r6, #256/64 -NOVFP vldr d0, [sp, #16] @ scale, bias +NOVFP vldr s0, [sp, #16] @ scale mov lr, #-16 1: vmov.f32 q2, #0.0 @ v0 @@ -51,8 +51,7 @@ NOVFP vldr d0, [sp, #16] @ scale, bias vadd.f32 d4, d4, d5 vadd.f32 d6, d6, d7 vpadd.f32 d4, d4, d6 - vdup.32 d5, d0[1] - vmla.f32 d5, d4, d0[0] + vmul.f32 d5, d4, d0[0] vst1.32 {d5[0]}, [r0,:32]! vst1.32 {d5[1]}, [r4,:32]! bne 1b diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 221183cef8..67982048f9 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -140,8 +140,7 @@ void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *); void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len); void ff_vector_fmul_window_neon(float *dst, const float *src0, - const float *src1, const float *win, - float add_bias, int len); + const float *src1, const float *win, int len); void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, int len); void ff_vector_fmul_sv_scalar_2_neon(float *dst, const float *src, diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index 42fb38de52..8329f6cc57 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -777,11 +777,8 @@ function ff_vector_fmul_neon, export=1 endfunc function ff_vector_fmul_window_neon, export=1 -VFP vdup.32 q8, d0[0] -NOVFP vld1.32 {d16[],d17[]}, [sp,:32] push {r4,r5,lr} -VFP ldr lr, [sp, #12] -NOVFP ldr lr, [sp, #16] + ldr lr, [sp, #12] sub r2, r2, #8 sub r5, lr, #2 add r2, r2, r5, lsl #2 @@ -793,14 +790,12 @@ NOVFP ldr lr, [sp, #16] vld1.64 {d4,d5}, [r3,:128]! vld1.64 {d6,d7}, [r4,:128], r5 1: subs lr, lr, #4 - vmov q11, q8 - vmla.f32 d22, d0, d4 - vmov q10, q8 - vmla.f32 d23, d1, d5 + vmul.f32 d22, d0, d4 vrev64.32 q3, q3 - vmla.f32 d20, d0, d7 + vmul.f32 d23, d1, d5 vrev64.32 q1, q1 - vmla.f32 d21, d1, d6 + vmul.f32 d20, d0, d7 + vmul.f32 d21, d1, d6 beq 2f vmla.f32 d22, d3, d7 vld1.64 {d0,d1}, [r1,:128]! diff --git a/libavcodec/arm/fft_init_arm.c b/libavcodec/arm/fft_init_arm.c index bde12400de..3f2d554413 100644 --- a/libavcodec/arm/fft_init_arm.c +++ b/libavcodec/arm/fft_init_arm.c @@ -34,7 +34,7 @@ void ff_synth_filter_float_neon(FFTContext *imdct, float *synth_buf_ptr, int *synth_buf_offset, float synth_buf2[32], const float window[512], float out[32], const float in[32], - float scale, float bias); + float scale); av_cold void ff_fft_init_arm(FFTContext *s) { diff --git a/libavcodec/arm/synth_filter_neon.S b/libavcodec/arm/synth_filter_neon.S index a7c23df02b..1464abe562 100644 --- a/libavcodec/arm/synth_filter_neon.S +++ b/libavcodec/arm/synth_filter_neon.S @@ -42,7 +42,7 @@ VFP vpop {d0} ldr r5, [sp, #9*4] @ window ldr r2, [sp, #10*4] @ out -NOVFP vldr d0, [sp, #12*4] @ scale, bias +NOVFP vldr s0, [sp, #12*4] @ scale add r8, r9, #12*4 mov lr, #64*4 @@ -90,10 +90,8 @@ NOVFP vldr d0, [sp, #12*4] @ scale, bias sub r11, r11, #512*4 b 2b 3: - vdup.32 q8, d0[1] - vdup.32 q9, d0[1] - vmla.f32 q8, q10, d0[0] - vmla.f32 q9, q1, d0[0] + vmul.f32 q8, q10, d0[0] + vmul.f32 q9, q1, d0[0] vst1.32 {q3}, [r3,:128] sub r3, r3, #16*4 vst1.32 {q2}, [r3,:128] diff --git a/libavcodec/atrac1.c b/libavcodec/atrac1.c index 4de1dcea04..be78445b8f 100644 --- a/libavcodec/atrac1.c +++ b/libavcodec/atrac1.c @@ -141,7 +141,7 @@ static int at1_imdct_block(AT1SUCtx* su, AT1Ctx *q) /* overlap and window */ q->dsp.vector_fmul_window(&q->bands[band_num][start_pos], prev_buf, - &su->spectrum[0][ref_pos + start_pos], ff_sine_32, 0, 16); + &su->spectrum[0][ref_pos + start_pos], ff_sine_32, 16); prev_buf = &su->spectrum[0][ref_pos+start_pos + 16]; start_pos += block_size; diff --git a/libavcodec/dca.c b/libavcodec/dca.c index fc5c0b5558..3861813556 100644 --- a/libavcodec/dca.c +++ b/libavcodec/dca.c @@ -896,7 +896,7 @@ static void qmf_32_subbands(DCAContext * s, int chans, s->synth.synth_filter_float(&s->imdct, s->subband_fir_hist[chans], &s->hist_index[chans], s->subband_fir_noidea[chans], prCoeff, - samples_out, s->raXin, scale, 0); + samples_out, s->raXin, scale); samples_out+= 32; } @@ -929,7 +929,7 @@ static void lfe_interpolation_fir(DCAContext *s, int decimation_select, /* Interpolation */ for (deciindex = 0; deciindex < num_deci_sample; deciindex++) { s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor, - scale, 0); + scale); samples_in++; samples_out += 2 * decifactor; } diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c index af48e3ce42..dd4994d276 100644 --- a/libavcodec/dcadsp.c +++ b/libavcodec/dcadsp.c @@ -23,7 +23,7 @@ #include "dcadsp.h" static void dca_lfe_fir_c(float *out, const float *in, const float *coefs, - int decifactor, float scale, float bias) + int decifactor, float scale) { float *out2 = out + decifactor; const float *cf0 = coefs; @@ -39,8 +39,8 @@ static void dca_lfe_fir_c(float *out, const float *in, const float *coefs, v0 += s * *cf0++; v1 += s * *--cf1; } - *out++ = (v0 * scale) + bias; - *out2++ = (v1 * scale) + bias; + *out++ = v0 * scale; + *out2++ = v1 * scale; } } diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h index 20020ae06c..bb157f7650 100644 --- a/libavcodec/dcadsp.h +++ b/libavcodec/dcadsp.h @@ -21,7 +21,7 @@ typedef struct DCADSPContext { void (*lfe_fir)(float *out, const float *in, const float *coefs, - int decifactor, float scale, float bias); + int decifactor, float scale); } DCADSPContext; void ff_dcadsp_init(DCADSPContext *s); diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index f604f0e7f9..2d4ec72026 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -3776,7 +3776,9 @@ static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, dst[i] = src0[i] * src1[i] + src2[i]; } -void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){ +static void vector_fmul_window_c(float *dst, const float *src0, + const float *src1, const float *win, int len) +{ int i,j; dst += len; win += len; @@ -3786,8 +3788,8 @@ void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, c float s1 = src1[j]; float wi = win[i]; float wj = win[j]; - dst[i] = s0*wj - s1*wi + add_bias; - dst[j] = s0*wi + s1*wj + add_bias; + dst[i] = s0*wj - s1*wi; + dst[j] = s0*wi + s1*wj; } } @@ -4434,7 +4436,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->vector_fmul = vector_fmul_c; c->vector_fmul_reverse = vector_fmul_reverse_c; c->vector_fmul_add = vector_fmul_add_c; - c->vector_fmul_window = ff_vector_fmul_window_c; + c->vector_fmul_window = vector_fmul_window_c; c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c; c->vector_clipf = vector_clipf_c; c->float_to_int16 = ff_float_to_int16_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 9a1172b1dc..b942e66a37 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -68,9 +68,6 @@ void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul); void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp); void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); -void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, - const float *win, float add_bias, int len); - /* encoding scans */ extern const uint8_t ff_alternate_horizontal_scan[64]; extern const uint8_t ff_alternate_vertical_scan[64]; @@ -393,7 +390,7 @@ typedef struct DSPContext { /* assume len is a multiple of 8, and src arrays are 16-byte aligned */ void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len); /* assume len is a multiple of 4, and arrays are 16-byte aligned */ - void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len); + void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len); /* assume len is a multiple of 8, and arrays are 16-byte aligned */ void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len); void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */); diff --git a/libavcodec/ppc/float_altivec.c b/libavcodec/ppc/float_altivec.c index 188e03ea2d..60bae9a757 100644 --- a/libavcodec/ppc/float_altivec.c +++ b/libavcodec/ppc/float_altivec.c @@ -90,13 +90,9 @@ static void vector_fmul_add_altivec(float *dst, const float *src0, } } -static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len) +static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, int len) { - union { - vector float v; - float s[4]; - } vadd; - vector float vadd_bias, zero, t0, t1, s0, s1, wi, wj; + vector float zero, t0, t1, s0, s1, wi, wj; const vector unsigned char reverse = vcprm(3,2,1,0); int i,j; @@ -104,8 +100,6 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa win += len; src0+= len; - vadd.s[0] = add_bias; - vadd_bias = vec_splat(vadd.v, 0); zero = (vector float)vec_splat_u32(0); for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) { @@ -117,9 +111,9 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa s1 = vec_perm(s1, s1, reverse); wj = vec_perm(wj, wj, reverse); - t0 = vec_madd(s0, wj, vadd_bias); + t0 = vec_madd(s0, wj, zero); t0 = vec_nmsub(s1, wi, t0); - t1 = vec_madd(s0, wi, vadd_bias); + t1 = vec_madd(s0, wi, zero); t1 = vec_madd(s1, wj, t1); t1 = vec_perm(t1, t1, reverse); diff --git a/libavcodec/synth_filter.c b/libavcodec/synth_filter.c index a0ae364d79..4af496d372 100644 --- a/libavcodec/synth_filter.c +++ b/libavcodec/synth_filter.c @@ -24,7 +24,7 @@ static void synth_filter_float(FFTContext *imdct, float *synth_buf_ptr, int *synth_buf_offset, float synth_buf2[32], const float window[512], - float out[32], const float in[32], float scale, float bias) + float out[32], const float in[32], float scale) { float *synth_buf= synth_buf_ptr + *synth_buf_offset; int i, j; @@ -48,8 +48,8 @@ static void synth_filter_float(FFTContext *imdct, c += window[i + j + 32]*( synth_buf[16 + i + j - 512]); d += window[i + j + 48]*( synth_buf[31 - i + j - 512]); } - out[i ] = a*scale + bias; - out[i + 16] = b*scale + bias; + out[i ] = a*scale; + out[i + 16] = b*scale; synth_buf2[i ] = c; synth_buf2[i + 16] = d; } diff --git a/libavcodec/synth_filter.h b/libavcodec/synth_filter.h index d6209d5dba..33edcc437f 100644 --- a/libavcodec/synth_filter.h +++ b/libavcodec/synth_filter.h @@ -28,7 +28,7 @@ typedef struct SynthFilterContext { float *synth_buf_ptr, int *synth_buf_offset, float synth_buf2[32], const float window[512], float out[32], const float in[32], - float scale, float bias); + float scale); } SynthFilterContext; void ff_synth_filter_init(SynthFilterContext *c); diff --git a/libavcodec/twinvq.c b/libavcodec/twinvq.c index 8334e83d7f..bd43104e6e 100644 --- a/libavcodec/twinvq.c +++ b/libavcodec/twinvq.c @@ -646,7 +646,6 @@ static void imdct_and_window(TwinContext *tctx, enum FrameType ftype, int wtype, prev_buf + (bsize-wsize)/2, buf1 + bsize*j, ff_sine_windows[av_log2(wsize)], - 0.0, wsize/2); out2 += wsize; diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c index 69b784c668..9fef5eb26f 100644 --- a/libavcodec/vorbis_dec.c +++ b/libavcodec/vorbis_dec.c @@ -1575,13 +1575,13 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) const float *win = vc->win[blockflag & previous_window]; if (blockflag == previous_window) { - vc->dsp.vector_fmul_window(ret, saved, buf, win, 0, blocksize / 4); + vc->dsp.vector_fmul_window(ret, saved, buf, win, blocksize / 4); } else if (blockflag > previous_window) { - vc->dsp.vector_fmul_window(ret, saved, buf, win, 0, bs0 / 4); + vc->dsp.vector_fmul_window(ret, saved, buf, win, bs0 / 4); memcpy(ret+bs0/2, buf+bs0/4, ((bs1-bs0)/4) * sizeof(float)); } else { memcpy(ret, saved, ((bs1 - bs0) / 4) * sizeof(float)); - vc->dsp.vector_fmul_window(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, 0, bs0 / 4); + vc->dsp.vector_fmul_window(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, bs0 / 4); } memcpy(saved, buf + blocksize / 4, blocksize / 4 * sizeof(float)); } diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c index bb1cd31146..d248ae4ac0 100644 --- a/libavcodec/wmaprodec.c +++ b/libavcodec/wmaprodec.c @@ -1031,7 +1031,7 @@ static void wmapro_window(WMAProDecodeCtx *s) winlen >>= 1; s->dsp.vector_fmul_window(start, start, start + winlen, - window, 0, winlen); + window, winlen); s->channel[c].prev_block_len = s->subframe_len; } diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 5ddfecae24..cd8d7471dd 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2190,10 +2190,9 @@ static void vector_fmul_add_sse(float *dst, const float *src0, const float *src1 ); } -static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1, - const float *win, float add_bias, int len){ #if HAVE_6REGS - if(add_bias == 0){ +static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1, + const float *win, int len){ x86_reg i = -len*4; x86_reg j = len*4-8; __asm__ volatile( @@ -2220,15 +2219,10 @@ static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float :"+r"(i), "+r"(j) :"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len) ); - }else -#endif - ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len); } static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1, - const float *win, float add_bias, int len){ -#if HAVE_6REGS - if(add_bias == 0){ + const float *win, int len){ x86_reg i = -len*4; x86_reg j = len*4-16; __asm__ volatile( @@ -2256,10 +2250,8 @@ static void vector_fmul_window_sse(float *dst, const float *src0, const float *s :"+r"(i), "+r"(j) :"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len) ); - }else -#endif - ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len); } +#endif /* HAVE_6REGS */ static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len) { @@ -2882,7 +2874,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } if(mm_flags & AV_CPU_FLAG_3DNOWEXT){ c->vector_fmul_reverse = vector_fmul_reverse_3dnow2; +#if HAVE_6REGS c->vector_fmul_window = vector_fmul_window_3dnow2; +#endif if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ c->float_to_int16_interleave = float_to_int16_interleave_3dn2; } @@ -2899,7 +2893,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->vector_fmul = vector_fmul_sse; c->vector_fmul_reverse = vector_fmul_reverse_sse; c->vector_fmul_add = vector_fmul_add_sse; +#if HAVE_6REGS c->vector_fmul_window = vector_fmul_window_sse; +#endif c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse; c->vector_clipf = vector_clipf_sse; c->float_to_int16 = float_to_int16_sse;