You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-10 06:10:52 +02:00
x86/tx_float: save a branch during coefficient deinterleaving
Directly branch into the special 64-point deinterleave subroutine rather than going through the general deinterleave. 64-point transform timings on Zen 3: Before: 1974 decicycles in av_tx (fft),16776864 runs, 352 skips After: 1956 decicycles in av_tx (fft),16775378 runs, 1838 skips
This commit is contained in:
@@ -1044,7 +1044,7 @@ ALIGN 16
|
|||||||
add lutq, (mmsize/2)*8
|
add lutq, (mmsize/2)*8
|
||||||
%endif
|
%endif
|
||||||
cmp tgtq, 64
|
cmp tgtq, 64
|
||||||
je .deinterleave
|
je .64pt_deint
|
||||||
|
|
||||||
SPLIT_RADIX_COMBINE_64
|
SPLIT_RADIX_COMBINE_64
|
||||||
|
|
||||||
@@ -1190,9 +1190,6 @@ FFT_SPLIT_RADIX_DEF 131072
|
|||||||
; Final synthesis + deinterleaving code
|
; Final synthesis + deinterleaving code
|
||||||
;===============================================================================
|
;===============================================================================
|
||||||
.deinterleave:
|
.deinterleave:
|
||||||
cmp lenq, 64
|
|
||||||
je .64pt_deint
|
|
||||||
|
|
||||||
imul tmpq, lenq, 2
|
imul tmpq, lenq, 2
|
||||||
lea lutq, [4*lenq + tmpq]
|
lea lutq, [4*lenq + tmpq]
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user