From a2af8eddab75f1eac712411e4dde89823c0845e8 Mon Sep 17 00:00:00 2001 From: James Almer Date: Sat, 22 Feb 2014 02:47:01 -0300 Subject: [PATCH] x86: add detection for FMA3 instruction set Based on x264 code Signed-off-by: James Almer Signed-off-by: Michael Niedermayer --- configure | 5 +++++ libavutil/cpu.c | 4 ++++ libavutil/cpu.h | 1 + libavutil/x86/cpu.c | 5 ++++- libavutil/x86/cpu.h | 3 +++ 5 files changed, 17 insertions(+), 1 deletion(-) diff --git a/configure b/configure index 6385038042..70f20b5538 100755 --- a/configure +++ b/configure @@ -323,6 +323,7 @@ Optimization options (experts only): --disable-sse42 disable SSE4.2 optimizations --disable-avx disable AVX optimizations --disable-xop disable XOP optimizations + --disable-fma3 disable FMA3 optimizations --disable-fma4 disable FMA4 optimizations --disable-avx2 disable AVX2 optimizations --disable-armv5te disable armv5te optimizations @@ -1455,6 +1456,7 @@ ARCH_EXT_LIST_X86=' avx avx2 cpunop + fma3 fma4 i686 mmx @@ -1828,6 +1830,7 @@ sse4_deps="ssse3" sse42_deps="sse4" avx_deps="sse42" xop_deps="avx" +fma3_deps="avx" fma4_deps="avx" avx2_deps="avx" @@ -4252,6 +4255,7 @@ EOF die "yasm/nasm not found or too old. Use --disable-yasm for a crippled build." check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx_external avresample check_yasm "vpmacsdd xmm0, xmm1, xmm2, xmm3" || disable xop_external + check_yasm "vfmadd132ps ymm0, ymm1, ymm2" || disable fma3_external check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4_external check_yasm "CPU amdnop" && enable cpunop fi @@ -4937,6 +4941,7 @@ if enabled x86; then echo "SSSE3 enabled ${ssse3-no}" echo "AVX enabled ${avx-no}" echo "XOP enabled ${xop-no}" + echo "FMA3 enabled ${fma3-no}" echo "FMA4 enabled ${fma4-no}" echo "i686 features enabled ${i686-no}" echo "CMOV is fast ${fast_cmov-no}" diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 9ac599826d..74de61e0b7 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -91,6 +91,7 @@ int av_parse_cpu_flags(const char *s) #define CPUFLAG_SSE42 (AV_CPU_FLAG_SSE42 | CPUFLAG_SSE4) #define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42) #define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX) +#define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX) #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX) #define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX) static const AVOption cpuflags_opts[] = { @@ -111,6 +112,7 @@ int av_parse_cpu_flags(const char *s) { "sse4.2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE42 }, .unit = "flags" }, { "avx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX }, .unit = "flags" }, { "xop" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_XOP }, .unit = "flags" }, + { "fma3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3 }, .unit = "flags" }, { "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4 }, .unit = "flags" }, { "avx2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX2 }, .unit = "flags" }, { "3dnow" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOW }, .unit = "flags" }, @@ -166,6 +168,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) { "sse4.2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE42 }, .unit = "flags" }, { "avx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX }, .unit = "flags" }, { "xop" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_XOP }, .unit = "flags" }, + { "fma3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_FMA3 }, .unit = "flags" }, { "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_FMA4 }, .unit = "flags" }, { "avx2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX2 }, .unit = "flags" }, { "3dnow" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_3DNOW }, .unit = "flags" }, @@ -279,6 +282,7 @@ static const struct { { AV_CPU_FLAG_SSE42, "sse4.2" }, { AV_CPU_FLAG_AVX, "avx" }, { AV_CPU_FLAG_XOP, "xop" }, + { AV_CPU_FLAG_FMA3, "fma3" }, { AV_CPU_FLAG_FMA4, "fma4" }, { AV_CPU_FLAG_3DNOW, "3dnow" }, { AV_CPU_FLAG_3DNOWEXT, "3dnowext" }, diff --git a/libavutil/cpu.h b/libavutil/cpu.h index 55c3ec9a06..1d0293fed3 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -51,6 +51,7 @@ // #define AV_CPU_FLAG_CMOV 0x1000 ///< supports cmov instruction // #endif #define AV_CPU_FLAG_AVX2 0x8000 ///< AVX2 functions: requires OS support even if YMM registers aren't used +#define AV_CPU_FLAG_FMA3 0x10000 ///< Haswell FMA3 functions #define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 18049eaead..333b0f805f 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -131,8 +131,11 @@ int ff_get_cpu_flags_x86(void) if ((ecx & 0x18000000) == 0x18000000) { /* Check for OS support */ xgetbv(0, eax, edx); - if ((eax & 0x6) == 0x6) + if ((eax & 0x6) == 0x6) { rval |= AV_CPU_FLAG_AVX; + if (ecx&0x00001000) + rval |= AV_CPU_FLAG_FMA3; + } } #if HAVE_AVX2 if (max_std_level >= 7) { diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h index a151c887d3..bc64b1b3bd 100644 --- a/libavutil/x86/cpu.h +++ b/libavutil/x86/cpu.h @@ -38,6 +38,7 @@ #define X86_SSE42(flags) CPUEXT(flags, SSE42) #define X86_AVX(flags) CPUEXT(flags, AVX) #define X86_XOP(flags) CPUEXT(flags, XOP) +#define X86_FMA3(flags) CPUEXT(flags, FMA3) #define X86_FMA4(flags) CPUEXT(flags, FMA4) #define X86_AVX2(flags) CPUEXT(flags, AVX2) @@ -53,6 +54,7 @@ #define EXTERNAL_SSE42(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE42) #define EXTERNAL_AVX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX) #define EXTERNAL_XOP(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, XOP) +#define EXTERNAL_FMA3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA3) #define EXTERNAL_FMA4(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA4) #define EXTERNAL_AVX2(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX2) @@ -68,6 +70,7 @@ #define INLINE_SSE42(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE42) #define INLINE_AVX(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX) #define INLINE_XOP(flags) CPUEXT_SUFFIX(flags, _INLINE, XOP) +#define INLINE_FMA3(flags) CPUEXT_SUFFIX(flags, _INLINE, FMA3) #define INLINE_FMA4(flags) CPUEXT_SUFFIX(flags, _INLINE, FMA4) #define INLINE_AVX2(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX2)