x86: fft: fix imdct_half() for AVX

Some calculations were changed in b6a3849 to use mmsize, which was not correct for the AVX version, which uses INIT_YMM and therefore has mmsize == 32. Fixes Bug 341. Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
2025-11-23 21:54:53 +02:00 · 2012-08-02 12:15:46 -05:00
parent 150adea6da
commit c728518b3c
1 changed files with 10 additions and 3 deletions
--- a/libavcodec/x86/fft_mmx.asm
+++ b/libavcodec/x86/fft_mmx.asm
@@ -1009,7 +1009,11 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
    push  rrevtab
 %endif
-    sub   r3, mmsize/4
+%if mmsize == 8
    sub   r3, 2
 %else
    sub   r3, 4
 %endif
 %if ARCH_X86_64 || mmsize == 8
    xor   r4, r4
    sub   r4, r3
@@ -1036,7 +1040,9 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
    mova [r1+r5*8], m0
    mova [r1+r6*8], m2
    add    r4, 2
-%elif ARCH_X86_64
+    sub    r4, 2
 %else
 %if ARCH_X86_64
    movzx  r5,  word [rrevtab+r4-4]
    movzx  r6,  word [rrevtab+r4-2]
    movzx  r10, word [rrevtab+r3]
@@ -1057,7 +1063,8 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
    movlps [r1+r5*8], xmm1
    movhps [r1+r4*8], xmm1
 %endif
-    sub    r3, mmsize/4
+    sub    r3, 4
 %endif
    jns    .pre
    mov  r5, r0