mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-24 13:56:33 +02:00
x86/tx_float: save a branch during coefficient deinterleaving
Directly branch into the special 64-point deinterleave subroutine rather than going through the general deinterleave. 64-point transform timings on Zen 3: Before: 1974 decicycles in av_tx (fft),16776864 runs, 352 skips After: 1956 decicycles in av_tx (fft),16775378 runs, 1838 skips
This commit is contained in:
parent
5cdf4c0bed
commit
98b32ef462
@ -1044,7 +1044,7 @@ ALIGN 16
|
||||
add lutq, (mmsize/2)*8
|
||||
%endif
|
||||
cmp tgtq, 64
|
||||
je .deinterleave
|
||||
je .64pt_deint
|
||||
|
||||
SPLIT_RADIX_COMBINE_64
|
||||
|
||||
@ -1190,9 +1190,6 @@ FFT_SPLIT_RADIX_DEF 131072
|
||||
; Final synthesis + deinterleaving code
|
||||
;===============================================================================
|
||||
.deinterleave:
|
||||
cmp lenq, 64
|
||||
je .64pt_deint
|
||||
|
||||
imul tmpq, lenq, 2
|
||||
lea lutq, [4*lenq + tmpq]
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user