You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-15 14:13:16 +02:00
x86: fft: replace call to memcpy by a loop
The function call was a mess to handle, and memcpy cannot make the assumptions we do in the new code. Tested on an IMC sample: 430c -> 370c. Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
committed by
Mans Rullgard
parent
75d339e044
commit
a5bfa66df5
@@ -615,8 +615,6 @@ cglobal fft_calc, 2,5,8
|
|||||||
.end:
|
.end:
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
cextern_naked memcpy
|
|
||||||
|
|
||||||
cglobal fft_permute, 2,7,1
|
cglobal fft_permute, 2,7,1
|
||||||
mov r4, [r0 + FFTContext.revtab]
|
mov r4, [r0 + FFTContext.revtab]
|
||||||
mov r5, [r0 + FFTContext.tmpbuf]
|
mov r5, [r0 + FFTContext.tmpbuf]
|
||||||
@@ -637,29 +635,18 @@ cglobal fft_permute, 2,7,1
|
|||||||
cmp r0, r2
|
cmp r0, r2
|
||||||
jl .loop
|
jl .loop
|
||||||
shl r2, 3
|
shl r2, 3
|
||||||
%if ARCH_X86_64
|
add r1, r2
|
||||||
mov r0, r1
|
add r5, r2
|
||||||
mov r1, r5
|
neg r2
|
||||||
%endif
|
; nbits >= 2 (FFT4) and sizeof(FFTComplex)=8 => at least 32B
|
||||||
%if WIN64
|
.loopcopy:
|
||||||
sub rsp, 8
|
movaps xmm0, [r5 + r2]
|
||||||
call memcpy
|
movaps xmm1, [r5 + r2 + 16]
|
||||||
add rsp, 8
|
movaps [r1 + r2], xmm0
|
||||||
RET
|
movaps [r1 + r2 + 16], xmm1
|
||||||
%elif ARCH_X86_64
|
add r2, 32
|
||||||
%ifdef PIC
|
jl .loopcopy
|
||||||
jmp memcpy wrt ..plt
|
REP_RET
|
||||||
%else
|
|
||||||
jmp memcpy
|
|
||||||
%endif
|
|
||||||
%else
|
|
||||||
push r2
|
|
||||||
push r5
|
|
||||||
push r1
|
|
||||||
call memcpy
|
|
||||||
add esp, 12
|
|
||||||
RET
|
|
||||||
%endif
|
|
||||||
|
|
||||||
cglobal imdct_calc, 3,5,3
|
cglobal imdct_calc, 3,5,3
|
||||||
mov r3d, [r0 + FFTContext.mdctsize]
|
mov r3d, [r0 + FFTContext.mdctsize]
|
||||||
|
Reference in New Issue
Block a user