diff --git a/libavcodec/i386/fft_sse.c b/libavcodec/i386/fft_sse.c index 966b8a93f5..39e64c7008 100644 --- a/libavcodec/i386/fft_sse.c +++ b/libavcodec/i386/fft_sse.c @@ -170,12 +170,12 @@ void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, asm volatile ( "movaps %0, %%xmm0 \n\t" // xmm0 = r0 X r1 X : in2 "movaps %1, %%xmm3 \n\t" // xmm3 = X i1 X i0: in1 - "movaps -16+%0, %%xmm4 \n\t" // xmm4 = r0 X r1 X : in2 - "movaps 16+%1, %%xmm7 \n\t" // xmm7 = X i1 X i0: in1 + "movaps -16+1*%0, %%xmm4 \n\t" // xmm4 = r0 X r1 X : in2 + "movaps 16+1*%1, %%xmm7 \n\t" // xmm7 = X i1 X i0: in1 "movlps %2, %%xmm1 \n\t" // xmm1 = X X R1 R0: tcos "movlps %3, %%xmm2 \n\t" // xmm2 = X X I1 I0: tsin - "movlps 8+%2, %%xmm5 \n\t" // xmm5 = X X R1 R0: tcos - "movlps 8+%3, %%xmm6 \n\t" // xmm6 = X X I1 I0: tsin + "movlps 8+1*%2, %%xmm5 \n\t" // xmm5 = X X R1 R0: tcos + "movlps 8+1*%3, %%xmm6 \n\t" // xmm6 = X X I1 I0: tsin "shufps $95, %%xmm0, %%xmm0 \n\t" // xmm0 = r1 r1 r0 r0 "shufps $160,%%xmm3, %%xmm3 \n\t" // xmm3 = i1 i1 i0 i0 "shufps $95, %%xmm4, %%xmm4 \n\t" // xmm4 = r1 r1 r0 r0 @@ -222,13 +222,13 @@ void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, for (k = 0; k < n4; k += 4) { asm ( "movaps %0, %%xmm0 \n\t" // xmm0 = i1 r1 i0 r0: z - "movaps 16+%0, %%xmm4 \n\t" // xmm4 = i1 r1 i0 r0: z + "movaps 16+1*%0, %%xmm4 \n\t" // xmm4 = i1 r1 i0 r0: z "movlps %1, %%xmm1 \n\t" // xmm1 = X X R1 R0: tcos - "movlps 8+%1, %%xmm5 \n\t" // xmm5 = X X R1 R0: tcos + "movlps 8+1*%1, %%xmm5 \n\t" // xmm5 = X X R1 R0: tcos "movaps %%xmm0, %%xmm3 \n\t" // xmm3 = i1 r1 i0 r0 "movaps %%xmm4, %%xmm7 \n\t" // xmm7 = i1 r1 i0 r0 "movlps %2, %%xmm2 \n\t" // xmm2 = X X I1 I0: tsin - "movlps 8+%2, %%xmm6 \n\t" // xmm6 = X X I1 I0: tsin + "movlps 8+1*%2, %%xmm6 \n\t" // xmm6 = X X I1 I0: tsin "shufps $160,%%xmm0, %%xmm0 \n\t" // xmm0 = r1 r1 r0 r0 "shufps $245,%%xmm3, %%xmm3 \n\t" // xmm3 = i1 i1 i0 i0 "shufps $160,%%xmm4, %%xmm4 \n\t" // xmm4 = r1 r1 r0 r0 @@ -248,7 +248,7 @@ void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, "addps %%xmm3, %%xmm0 \n\t" // xmm0 = result "addps %%xmm7, %%xmm4 \n\t" // xmm4 = result "movaps %%xmm0, %0 \n\t" - "movaps %%xmm4, 16+%0 \n\t" + "movaps %%xmm4, 16+1*%0\n\t" :"+m"(z[k]) :"m"(tcos[k]), "m"(tsin[k]) #ifndef ARCH_X86_64