mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
lavu/x86/tx_float: fix FMA3 implying AVX2 is available
It's the other way around - AVX2 implies FMA3 is available.
This commit is contained in:
parent
a5f996de4f
commit
e448a4b4ea
@ -100,7 +100,7 @@ SECTION .text
|
|||||||
; %6 - temporary register (for avx only)
|
; %6 - temporary register (for avx only)
|
||||||
; %7 - temporary register (for avx only, enables vgatherdpd (AVX2) if FMA3 is set)
|
; %7 - temporary register (for avx only, enables vgatherdpd (AVX2) if FMA3 is set)
|
||||||
%macro LOAD64_LUT 5-7
|
%macro LOAD64_LUT 5-7
|
||||||
%if %0 > 6 && cpuflag(fma3)
|
%if %0 > 6 && cpuflag(avx2)
|
||||||
pcmpeqd %6, %6 ; pcmpeqq has a 0.5 throughput on Zen 3, this has 0.25
|
pcmpeqd %6, %6 ; pcmpeqq has a 0.5 throughput on Zen 3, this has 0.25
|
||||||
movapd xmm%7, [%3 + %4] ; float mov since vgatherdpd is a float instruction
|
movapd xmm%7, [%3 + %4] ; float mov since vgatherdpd is a float instruction
|
||||||
vgatherdpd %1, [%2 + xmm%7*8], %6 ; must use separate registers for args
|
vgatherdpd %1, [%2 + xmm%7*8], %6 ; must use separate registers for args
|
||||||
@ -1208,5 +1208,5 @@ FFT_SPLIT_RADIX_DEF 131072
|
|||||||
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
FFT_SPLIT_RADIX_FN avx
|
FFT_SPLIT_RADIX_FN avx
|
||||||
FFT_SPLIT_RADIX_FN fma3
|
FFT_SPLIT_RADIX_FN avx2
|
||||||
%endif
|
%endif
|
||||||
|
@ -32,7 +32,7 @@ void ff_fft32_float_avx (AVTXContext *s, void *out, void *in, ptrdiff_t stri
|
|||||||
void ff_fft32_float_fma3 (AVTXContext *s, void *out, void *in, ptrdiff_t stride);
|
void ff_fft32_float_fma3 (AVTXContext *s, void *out, void *in, ptrdiff_t stride);
|
||||||
|
|
||||||
void ff_split_radix_fft_float_avx (AVTXContext *s, void *out, void *in, ptrdiff_t stride);
|
void ff_split_radix_fft_float_avx (AVTXContext *s, void *out, void *in, ptrdiff_t stride);
|
||||||
void ff_split_radix_fft_float_fma3(AVTXContext *s, void *out, void *in, ptrdiff_t stride);
|
void ff_split_radix_fft_float_avx2(AVTXContext *s, void *out, void *in, ptrdiff_t stride);
|
||||||
|
|
||||||
av_cold void ff_tx_init_float_x86(AVTXContext *s, av_tx_fn *tx)
|
av_cold void ff_tx_init_float_x86(AVTXContext *s, av_tx_fn *tx)
|
||||||
{
|
{
|
||||||
@ -87,10 +87,15 @@ av_cold void ff_tx_init_float_x86(AVTXContext *s, av_tx_fn *tx)
|
|||||||
#if ARCH_X86_64
|
#if ARCH_X86_64
|
||||||
else if (s->m == 32)
|
else if (s->m == 32)
|
||||||
TXFN(ff_fft32_float_fma3, 1, 8, 2);
|
TXFN(ff_fft32_float_fma3, 1, 8, 2);
|
||||||
else if (s->m >= 64 && s->m <= 131072 && !(s->flags & AV_TX_INPLACE))
|
|
||||||
TXFN(ff_split_radix_fft_float_fma3, 1, 8, 2);
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if ARCH_X86_64
|
||||||
|
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
|
||||||
|
if (s->m >= 64 && s->m <= 131072 && !(s->flags & AV_TX_INPLACE))
|
||||||
|
TXFN(ff_split_radix_fft_float_avx2, 1, 8, 2);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gen_revtab)
|
if (gen_revtab)
|
||||||
|
Loading…
Reference in New Issue
Block a user