From ab04fedaaac34df4eeb71cff0e63aaad9e9ff711 Mon Sep 17 00:00:00 2001 From: Shiyou Yin Date: Mon, 12 Apr 2021 23:37:05 +0800 Subject: [PATCH] mips: Fix potential illegal instruction error. MSA2 optimizations are attached to MSA macros in generic_macros_msa.h. It's difficult to do runtime check for them. Remove this part of code can make it more robust. H264 1080p decoding: 5.13x==>5.12x. Signed-off-by: Michael Niedermayer --- configure | 7 +----- libavutil/mips/generic_macros_msa.h | 37 ----------------------------- 2 files changed, 1 insertion(+), 43 deletions(-) diff --git a/configure b/configure index 9ece040c7b..2d7b1bb429 100755 --- a/configure +++ b/configure @@ -450,7 +450,6 @@ Optimization options (experts only): --disable-mipsdsp disable MIPS DSP ASE R1 optimizations --disable-mipsdspr2 disable MIPS DSP ASE R2 optimizations --disable-msa disable MSA optimizations - --disable-msa2 disable MSA2 optimizations --disable-mipsfpu disable floating point MIPS optimizations --disable-mmi disable Loongson SIMD optimizations --disable-fast-unaligned consider unaligned accesses slow @@ -2053,7 +2052,6 @@ ARCH_EXT_LIST_MIPS=" mipsdsp mipsdspr2 msa - msa2 " ARCH_EXT_LIST_LOONGSON=" @@ -2592,7 +2590,6 @@ mipsdsp_deps="mips" mipsdspr2_deps="mips" mmi_deps_any="loongson2 loongson3" msa_deps="mipsfpu" -msa2_deps="msa" cpunop_deps="i686" x86_64_select="i686" @@ -5932,9 +5929,8 @@ elif enabled mips; then enabled mipsdsp && check_inline_asm_flags mipsdsp '"addu.qb $t0, $t1, $t2"' '-mdsp' enabled mipsdspr2 && check_inline_asm_flags mipsdspr2 '"absq_s.qb $t0, $t1"' '-mdspr2' - # MSA and MSA2 can be detected at runtime so we supply extra flags here + # MSA can be detected at runtime so we supply extra flags here enabled mipsfpu && enabled msa && check_inline_asm msa '"addvi.b $w0, $w1, 1"' '-mmsa' && append MSAFLAGS '-mmsa' - enabled msa && enabled msa2 && check_inline_asm msa2 '"nxbits.any.b $w0, $w0"' '-mmsa2' && append MSAFLAGS '-mmsa2' # loongson2 have no switch cflag so we can only probe toolchain ability enabled loongson2 && check_inline_asm loongson2 '"dmult.g $8, $9, $10"' && disable loongson3 @@ -7363,7 +7359,6 @@ if enabled mips; then echo "MIPS DSP R1 enabled ${mipsdsp-no}" echo "MIPS DSP R2 enabled ${mipsdspr2-no}" echo "MIPS MSA enabled ${msa-no}" - echo "MIPS MSA2 enabled ${msa2-no}" echo "LOONGSON MMI enabled ${mmi-no}" fi if enabled ppc; then diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h index bb25e9fd74..1486f7296e 100644 --- a/libavutil/mips/generic_macros_msa.h +++ b/libavutil/mips/generic_macros_msa.h @@ -25,10 +25,6 @@ #include #include -#if HAVE_MSA2 -#include -#endif - #define ALIGNMENT 16 #define ALLOC_ALIGNED(align) __attribute__ ((aligned((align) << 1))) @@ -1119,15 +1115,6 @@ unsigned absolute diff values, even-odd pairs are added together to generate 8 halfword results. */ -#if HAVE_MSA2 -#define SAD_UB2_UH(in0, in1, ref0, ref1) \ -( { \ - v8u16 sad_m = { 0 }; \ - sad_m += __builtin_msa2_sad_adj2_u_w2x_b((v16u8) in0, (v16u8) ref0); \ - sad_m += __builtin_msa2_sad_adj2_u_w2x_b((v16u8) in1, (v16u8) ref1); \ - sad_m; \ -} ) -#else #define SAD_UB2_UH(in0, in1, ref0, ref1) \ ( { \ v16u8 diff0_m, diff1_m; \ @@ -1141,7 +1128,6 @@ \ sad_m; \ } ) -#endif // #if HAVE_MSA2 /* Description : Insert specified word elements from input vectors to 1 destination vector @@ -2183,12 +2169,6 @@ extracted and interleaved with same vector 'in0' to generate 4 word elements keeping sign intact */ -#if HAVE_MSA2 -#define UNPCK_R_SH_SW(in, out) \ -{ \ - out = (v4i32) __builtin_msa2_w2x_lo_s_h((v8i16) in); \ -} -#else #define UNPCK_R_SH_SW(in, out) \ { \ v8i16 sign_m; \ @@ -2196,7 +2176,6 @@ sign_m = __msa_clti_s_h((v8i16) in, 0); \ out = (v4i32) __msa_ilvr_h(sign_m, (v8i16) in); \ } -#endif // #if HAVE_MSA2 /* Description : Sign extend byte elements from input vector and return halfword results in pair of vectors @@ -2209,13 +2188,6 @@ Then interleaved left with same vector 'in0' to generate 8 signed halfword elements in 'out1' */ -#if HAVE_MSA2 -#define UNPCK_SB_SH(in, out0, out1) \ -{ \ - out0 = (v4i32) __builtin_msa2_w2x_lo_s_b((v16i8) in); \ - out1 = (v4i32) __builtin_msa2_w2x_hi_s_b((v16i8) in); \ -} -#else #define UNPCK_SB_SH(in, out0, out1) \ { \ v16i8 tmp_m; \ @@ -2223,7 +2195,6 @@ tmp_m = __msa_clti_s_b((v16i8) in, 0); \ ILVRL_B2_SH(tmp_m, in, out0, out1); \ } -#endif // #if HAVE_MSA2 /* Description : Zero extend unsigned byte elements to halfword elements Arguments : Inputs - in (1 input unsigned byte vector) @@ -2250,13 +2221,6 @@ Then interleaved left with same vector 'in0' to generate 4 signed word elements in 'out1' */ -#if HAVE_MSA2 -#define UNPCK_SH_SW(in, out0, out1) \ -{ \ - out0 = (v4i32) __builtin_msa2_w2x_lo_s_h((v8i16) in); \ - out1 = (v4i32) __builtin_msa2_w2x_hi_s_h((v8i16) in); \ -} -#else #define UNPCK_SH_SW(in, out0, out1) \ { \ v8i16 tmp_m; \ @@ -2264,7 +2228,6 @@ tmp_m = __msa_clti_s_h((v8i16) in, 0); \ ILVRL_H2_SW(tmp_m, in, out0, out1); \ } -#endif // #if HAVE_MSA2 /* Description : Swap two variables Arguments : Inputs - in0, in1