You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	x86/tx_float: enable AVX-only split-radix FFT codelets
Sandy Bridge, Ivy Bridge and Bulldozer cores don't support FMA3.
This commit is contained in:
		| @@ -1379,6 +1379,8 @@ cglobal fft_sr_ns_float, 4, 10, 16, 272, ctx, out, in, tmp, len, lut, itab, rtab | ||||
| %endmacro | ||||
|  | ||||
| %if ARCH_X86_64 | ||||
| FFT_SPLIT_RADIX_FN avx, 0 | ||||
| FFT_SPLIT_RADIX_FN avx, 1 | ||||
| FFT_SPLIT_RADIX_FN fma3, 0 | ||||
| FFT_SPLIT_RADIX_FN fma3, 1 | ||||
| %if HAVE_AVX2_EXTERNAL | ||||
|   | ||||
| @@ -38,6 +38,8 @@ TX_DECL_FN(fft32,     avx) | ||||
| TX_DECL_FN(fft32_ns,  avx) | ||||
| TX_DECL_FN(fft32,     fma3) | ||||
| TX_DECL_FN(fft32_ns,  fma3) | ||||
| TX_DECL_FN(fft_sr,    avx) | ||||
| TX_DECL_FN(fft_sr_ns, avx) | ||||
| TX_DECL_FN(fft_sr,    fma3) | ||||
| TX_DECL_FN(fft_sr_ns, fma3) | ||||
| TX_DECL_FN(fft_sr,    avx2) | ||||
| @@ -57,6 +59,7 @@ TX_DECL_FN(fft16_asm, avx) | ||||
| TX_DECL_FN(fft16_asm, fma3) | ||||
| TX_DECL_FN(fft32_asm, avx) | ||||
| TX_DECL_FN(fft32_asm, fma3) | ||||
| TX_DECL_FN(fft_sr_asm, avx) | ||||
| TX_DECL_FN(fft_sr_asm, fma3) | ||||
| TX_DECL_FN(fft_sr_asm, avx2) | ||||
|  | ||||
| @@ -214,6 +217,11 @@ const FFTXCodelet * const ff_tx_codelet_list_float_x86[] = { | ||||
|            AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, AV_CPU_FLAG_AVXSLOW), | ||||
|     TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE, | ||||
|            AV_CPU_FLAG_AVXSLOW), | ||||
|     TX_DEF(fft_sr,    FFT, 64, 131072, 2, 0, 256, b8_i2, avx, AVX,  0, AV_CPU_FLAG_AVXSLOW), | ||||
|     TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX, | ||||
|            AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, AV_CPU_FLAG_AVXSLOW), | ||||
|     TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX,  AV_TX_INPLACE | FF_TX_PRESHUFFLE, | ||||
|            AV_CPU_FLAG_AVXSLOW), | ||||
|     TX_DEF(fft_sr,    FFT, 64, 131072, 2, 0, 288, b8_i2, fma3,  FMA3,  0, AV_CPU_FLAG_AVXSLOW), | ||||
|     TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 352, b8_i2, fma3,  FMA3, | ||||
|            AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, AV_CPU_FLAG_AVXSLOW), | ||||
|   | ||||
		Reference in New Issue
	
	Block a user