mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-21 10:55:51 +02:00
mips: Fix potential illegal instruction error.
MSA2 optimizations are attached to MSA macros in generic_macros_msa.h. It's difficult to do runtime check for them. Remove this part of code can make it more robust. H264 1080p decoding: 5.13x==>5.12x. Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
parent
464d28c070
commit
ab04fedaaa
7
configure
vendored
7
configure
vendored
@ -450,7 +450,6 @@ Optimization options (experts only):
|
||||
--disable-mipsdsp disable MIPS DSP ASE R1 optimizations
|
||||
--disable-mipsdspr2 disable MIPS DSP ASE R2 optimizations
|
||||
--disable-msa disable MSA optimizations
|
||||
--disable-msa2 disable MSA2 optimizations
|
||||
--disable-mipsfpu disable floating point MIPS optimizations
|
||||
--disable-mmi disable Loongson SIMD optimizations
|
||||
--disable-fast-unaligned consider unaligned accesses slow
|
||||
@ -2053,7 +2052,6 @@ ARCH_EXT_LIST_MIPS="
|
||||
mipsdsp
|
||||
mipsdspr2
|
||||
msa
|
||||
msa2
|
||||
"
|
||||
|
||||
ARCH_EXT_LIST_LOONGSON="
|
||||
@ -2592,7 +2590,6 @@ mipsdsp_deps="mips"
|
||||
mipsdspr2_deps="mips"
|
||||
mmi_deps_any="loongson2 loongson3"
|
||||
msa_deps="mipsfpu"
|
||||
msa2_deps="msa"
|
||||
|
||||
cpunop_deps="i686"
|
||||
x86_64_select="i686"
|
||||
@ -5932,9 +5929,8 @@ elif enabled mips; then
|
||||
enabled mipsdsp && check_inline_asm_flags mipsdsp '"addu.qb $t0, $t1, $t2"' '-mdsp'
|
||||
enabled mipsdspr2 && check_inline_asm_flags mipsdspr2 '"absq_s.qb $t0, $t1"' '-mdspr2'
|
||||
|
||||
# MSA and MSA2 can be detected at runtime so we supply extra flags here
|
||||
# MSA can be detected at runtime so we supply extra flags here
|
||||
enabled mipsfpu && enabled msa && check_inline_asm msa '"addvi.b $w0, $w1, 1"' '-mmsa' && append MSAFLAGS '-mmsa'
|
||||
enabled msa && enabled msa2 && check_inline_asm msa2 '"nxbits.any.b $w0, $w0"' '-mmsa2' && append MSAFLAGS '-mmsa2'
|
||||
|
||||
# loongson2 have no switch cflag so we can only probe toolchain ability
|
||||
enabled loongson2 && check_inline_asm loongson2 '"dmult.g $8, $9, $10"' && disable loongson3
|
||||
@ -7363,7 +7359,6 @@ if enabled mips; then
|
||||
echo "MIPS DSP R1 enabled ${mipsdsp-no}"
|
||||
echo "MIPS DSP R2 enabled ${mipsdspr2-no}"
|
||||
echo "MIPS MSA enabled ${msa-no}"
|
||||
echo "MIPS MSA2 enabled ${msa2-no}"
|
||||
echo "LOONGSON MMI enabled ${mmi-no}"
|
||||
fi
|
||||
if enabled ppc; then
|
||||
|
@ -25,10 +25,6 @@
|
||||
#include <msa.h>
|
||||
#include <config.h>
|
||||
|
||||
#if HAVE_MSA2
|
||||
#include <msa2.h>
|
||||
#endif
|
||||
|
||||
#define ALIGNMENT 16
|
||||
#define ALLOC_ALIGNED(align) __attribute__ ((aligned((align) << 1)))
|
||||
|
||||
@ -1119,15 +1115,6 @@
|
||||
unsigned absolute diff values, even-odd pairs are added
|
||||
together to generate 8 halfword results.
|
||||
*/
|
||||
#if HAVE_MSA2
|
||||
#define SAD_UB2_UH(in0, in1, ref0, ref1) \
|
||||
( { \
|
||||
v8u16 sad_m = { 0 }; \
|
||||
sad_m += __builtin_msa2_sad_adj2_u_w2x_b((v16u8) in0, (v16u8) ref0); \
|
||||
sad_m += __builtin_msa2_sad_adj2_u_w2x_b((v16u8) in1, (v16u8) ref1); \
|
||||
sad_m; \
|
||||
} )
|
||||
#else
|
||||
#define SAD_UB2_UH(in0, in1, ref0, ref1) \
|
||||
( { \
|
||||
v16u8 diff0_m, diff1_m; \
|
||||
@ -1141,7 +1128,6 @@
|
||||
\
|
||||
sad_m; \
|
||||
} )
|
||||
#endif // #if HAVE_MSA2
|
||||
|
||||
/* Description : Insert specified word elements from input vectors to 1
|
||||
destination vector
|
||||
@ -2183,12 +2169,6 @@
|
||||
extracted and interleaved with same vector 'in0' to generate
|
||||
4 word elements keeping sign intact
|
||||
*/
|
||||
#if HAVE_MSA2
|
||||
#define UNPCK_R_SH_SW(in, out) \
|
||||
{ \
|
||||
out = (v4i32) __builtin_msa2_w2x_lo_s_h((v8i16) in); \
|
||||
}
|
||||
#else
|
||||
#define UNPCK_R_SH_SW(in, out) \
|
||||
{ \
|
||||
v8i16 sign_m; \
|
||||
@ -2196,7 +2176,6 @@
|
||||
sign_m = __msa_clti_s_h((v8i16) in, 0); \
|
||||
out = (v4i32) __msa_ilvr_h(sign_m, (v8i16) in); \
|
||||
}
|
||||
#endif // #if HAVE_MSA2
|
||||
|
||||
/* Description : Sign extend byte elements from input vector and return
|
||||
halfword results in pair of vectors
|
||||
@ -2209,13 +2188,6 @@
|
||||
Then interleaved left with same vector 'in0' to
|
||||
generate 8 signed halfword elements in 'out1'
|
||||
*/
|
||||
#if HAVE_MSA2
|
||||
#define UNPCK_SB_SH(in, out0, out1) \
|
||||
{ \
|
||||
out0 = (v4i32) __builtin_msa2_w2x_lo_s_b((v16i8) in); \
|
||||
out1 = (v4i32) __builtin_msa2_w2x_hi_s_b((v16i8) in); \
|
||||
}
|
||||
#else
|
||||
#define UNPCK_SB_SH(in, out0, out1) \
|
||||
{ \
|
||||
v16i8 tmp_m; \
|
||||
@ -2223,7 +2195,6 @@
|
||||
tmp_m = __msa_clti_s_b((v16i8) in, 0); \
|
||||
ILVRL_B2_SH(tmp_m, in, out0, out1); \
|
||||
}
|
||||
#endif // #if HAVE_MSA2
|
||||
|
||||
/* Description : Zero extend unsigned byte elements to halfword elements
|
||||
Arguments : Inputs - in (1 input unsigned byte vector)
|
||||
@ -2250,13 +2221,6 @@
|
||||
Then interleaved left with same vector 'in0' to
|
||||
generate 4 signed word elements in 'out1'
|
||||
*/
|
||||
#if HAVE_MSA2
|
||||
#define UNPCK_SH_SW(in, out0, out1) \
|
||||
{ \
|
||||
out0 = (v4i32) __builtin_msa2_w2x_lo_s_h((v8i16) in); \
|
||||
out1 = (v4i32) __builtin_msa2_w2x_hi_s_h((v8i16) in); \
|
||||
}
|
||||
#else
|
||||
#define UNPCK_SH_SW(in, out0, out1) \
|
||||
{ \
|
||||
v8i16 tmp_m; \
|
||||
@ -2264,7 +2228,6 @@
|
||||
tmp_m = __msa_clti_s_h((v8i16) in, 0); \
|
||||
ILVRL_H2_SW(tmp_m, in, out0, out1); \
|
||||
}
|
||||
#endif // #if HAVE_MSA2
|
||||
|
||||
/* Description : Swap two variables
|
||||
Arguments : Inputs - in0, in1
|
||||
|
Loading…
Reference in New Issue
Block a user