You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	Merge commit '652f5185945c8405fc57aed353286858df8d066f'
* commit '652f5185945c8405fc57aed353286858df8d066f': x86: mmx2 ---> mmxext in comments and messages Conflicts: libswscale/x86/swscale_template.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
		| @@ -253,7 +253,7 @@ Optimization guide for ARM11 (used in Nokia N800 Internet Tablet): | ||||
| http://infocenter.arm.com/help/topic/com.arm.doc.ddi0211j/DDI0211J_arm1136_r1p5_trm.pdf | ||||
| Optimization guide for Intel XScale (used in Sharp Zaurus PDA): | ||||
| http://download.intel.com/design/intelxscale/27347302.pdf | ||||
| Intel Wireless MMX2 Coprocessor: Programmers Reference Manual | ||||
| Intel Wireless MMX 2 Coprocessor: Programmers Reference Manual | ||||
| http://download.intel.com/design/intelxscale/31451001.pdf | ||||
|  | ||||
| PowerPC-specific: | ||||
|   | ||||
| @@ -58,7 +58,7 @@ Input to YUV Converter | ||||
|  | ||||
| Horizontal scaler | ||||
|     There are several horizontal scalers. A special case worth mentioning is | ||||
|     the fast bilinear scaler that is made of runtime-generated MMX2 code | ||||
|     the fast bilinear scaler that is made of runtime-generated MMXEXT code | ||||
|     using specially tuned pshufw instructions. | ||||
|     The remaining scalers are specially-tuned for various filter lengths. | ||||
|     They scale 8-bit unsigned planar data to 16-bit signed planar data. | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| /* | ||||
|  * DSP utils : average functions are compiled twice for 3dnow/mmx2 | ||||
|  * DSP utils : average functions are compiled twice for 3dnow/mmxext | ||||
|  * Copyright (c) 2000, 2001 Fabrice Bellard | ||||
|  * Copyright (c) 2002-2004 Michael Niedermayer | ||||
|  * | ||||
|   | ||||
| @@ -206,11 +206,11 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; | ||||
| #undef OP_AVG | ||||
|  | ||||
| /***********************************/ | ||||
| /* MMX2 specific */ | ||||
| /* MMXEXT specific */ | ||||
|  | ||||
| #define DEF(x) x ## _mmx2 | ||||
|  | ||||
| /* Introduced only in MMX2 set */ | ||||
| /* Introduced only in MMXEXT set */ | ||||
| #define PAVGB "pavgb" | ||||
| #define OP_AVG PAVGB | ||||
|  | ||||
|   | ||||
| @@ -122,7 +122,7 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | ||||
| /* | ||||
|  * RGB15->RGB16 original by Strepto/Astral | ||||
|  * ported to gcc & bugfixed : A'rpi | ||||
|  * MMX2, 3DNOW optimization by Nick Kurshev | ||||
|  * MMXEXT, 3DNOW optimization by Nick Kurshev | ||||
|  * 32-bit C version, and and&add trick by Michael Niedermayer | ||||
|  */ | ||||
|  | ||||
|   | ||||
| @@ -77,7 +77,7 @@ static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, | ||||
| /* | ||||
|  * original by Strepto/Astral | ||||
|  * ported to gcc & bugfixed: A'rpi | ||||
|  * MMX2, 3DNOW optimization by Nick Kurshev | ||||
|  * MMXEXT, 3DNOW optimization by Nick Kurshev | ||||
|  * 32-bit C version, and and&add trick by Michael Niedermayer | ||||
|  */ | ||||
| static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size) | ||||
|   | ||||
| @@ -312,10 +312,10 @@ typedef struct SwsContext { | ||||
|     int vChrFilterSize;           ///< Vertical   filter size for chroma     pixels. | ||||
|     //@} | ||||
|  | ||||
|     int lumMmx2FilterCodeSize;    ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for luma/alpha planes. | ||||
|     int chrMmx2FilterCodeSize;    ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for chroma     planes. | ||||
|     uint8_t *lumMmx2FilterCode;   ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for luma/alpha planes. | ||||
|     uint8_t *chrMmx2FilterCode;   ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for chroma     planes. | ||||
|     int lumMmx2FilterCodeSize;    ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for luma/alpha planes. | ||||
|     int chrMmx2FilterCodeSize;    ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for chroma planes. | ||||
|     uint8_t *lumMmx2FilterCode;   ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for luma/alpha planes. | ||||
|     uint8_t *chrMmx2FilterCode;   ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for chroma planes. | ||||
|  | ||||
|     int canMMX2BeUsed; | ||||
|  | ||||
|   | ||||
| @@ -643,7 +643,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, | ||||
|     int xpos, i; | ||||
|  | ||||
|     // create an optimized horizontal scaling routine | ||||
|     /* This scaler is made of runtime-generated MMX2 code using specially tuned | ||||
|     /* This scaler is made of runtime-generated MMXEXT code using specially tuned | ||||
|      * pshufw instructions. For every four output pixels, if four input pixels | ||||
|      * are enough for the fast bilinear scaling, then a chunk of fragmentB is | ||||
|      * used. If five input pixels are needed, then a chunk of fragmentA is used. | ||||
| @@ -1069,7 +1069,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | ||||
|             && (flags & SWS_FAST_BILINEAR)) { | ||||
|             if (flags & SWS_PRINT_INFO) | ||||
|                 av_log(c, AV_LOG_INFO, | ||||
|                        "output width is not a multiple of 32 -> no MMX2 scaler\n"); | ||||
|                        "output width is not a multiple of 32 -> no MMXEXT scaler\n"); | ||||
|         } | ||||
|         if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat)) | ||||
|             c->canMMX2BeUsed=0; | ||||
| @@ -1313,7 +1313,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | ||||
|                av_get_pix_fmt_name(dstFormat)); | ||||
|  | ||||
|         if (INLINE_MMXEXT(cpu_flags)) | ||||
|             av_log(c, AV_LOG_INFO, "using MMX2\n"); | ||||
|             av_log(c, AV_LOG_INFO, "using MMXEXT\n"); | ||||
|         else if (INLINE_AMD3DNOW(cpu_flags)) | ||||
|             av_log(c, AV_LOG_INFO, "using 3DNOW\n"); | ||||
|         else if (INLINE_MMX(cpu_flags)) | ||||
|   | ||||
| @@ -218,10 +218,10 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset | ||||
| %else ; %1 == 9/10 | ||||
| %if cpuflag(sse4) | ||||
|     packusdw        m2,  m1 | ||||
| %else ; mmx2/sse2 | ||||
| %else ; mmxext/sse2 | ||||
|     packssdw        m2,  m1 | ||||
|     pmaxsw          m2,  m6 | ||||
| %endif ; mmx2/sse2/sse4/avx | ||||
| %endif ; mmxext/sse2/sse4/avx | ||||
|     pminsw          m2, [yuv2yuvX_%1_upper] | ||||
| %endif ; %1 == 9/10/16 | ||||
|     mova   [dstq+r5*2],  m2 | ||||
|   | ||||
| @@ -87,7 +87,7 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid)    = 0x2080208020802080ULL; | ||||
| #define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) | ||||
| #define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5)) | ||||
|  | ||||
| //Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one. | ||||
| // Note: We have C, MMX, MMXEXT, 3DNOW versions, there is no 3DNOW + MMXEXT one. | ||||
|  | ||||
| #define COMPILE_TEMPLATE_MMXEXT 0 | ||||
| #define COMPILE_TEMPLATE_AMD3DNOW 0 | ||||
| @@ -98,7 +98,7 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid)    = 0x2080208020802080ULL; | ||||
| #define RENAME(a) a ## _MMX | ||||
| #include "rgb2rgb_template.c" | ||||
|  | ||||
| //MMX2 versions | ||||
| // MMXEXT versions | ||||
| #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMXEXT | ||||
| #define COMPILE_TEMPLATE_MMXEXT 1 | ||||
| @@ -126,7 +126,7 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid)    = 0x2080208020802080ULL; | ||||
| /* | ||||
|  RGB15->RGB16 original by Strepto/Astral | ||||
|  ported to gcc & bugfixed : A'rpi | ||||
|  MMX2, 3DNOW optimization by Nick Kurshev | ||||
|  MMXEXT, 3DNOW optimization by Nick Kurshev | ||||
|  32-bit C version, and and&add trick by Michael Niedermayer | ||||
| */ | ||||
|  | ||||
|   | ||||
| @@ -178,7 +178,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr | ||||
| /* | ||||
|  original by Strepto/Astral | ||||
|  ported to gcc & bugfixed: A'rpi | ||||
|  MMX2, 3DNOW optimization by Nick Kurshev | ||||
|  MMXEXT, 3DNOW optimization by Nick Kurshev | ||||
|  32-bit C version, and and&add trick by Michael Niedermayer | ||||
| */ | ||||
| static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size) | ||||
|   | ||||
| @@ -80,7 +80,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL; | ||||
| #include "swscale_template.c" | ||||
| #endif | ||||
|  | ||||
| //MMX2 versions | ||||
| // MMXEXT versions | ||||
| #if HAVE_MMXEXT_INLINE | ||||
| #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMXEXT | ||||
|   | ||||
| @@ -1690,7 +1690,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) | ||||
|     } | ||||
|  | ||||
|     if (c->srcBpc == 8 && c->dstBpc <= 14) { | ||||
|     // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one). | ||||
|     // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one). | ||||
| #if COMPILE_TEMPLATE_MMXEXT | ||||
|     if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed) | ||||
|     { | ||||
|   | ||||
| @@ -3,7 +3,7 @@ | ||||
|  * | ||||
|  * Copyright (C) 2009 Konstantin Shishkov | ||||
|  * | ||||
|  * MMX/MMX2 template stuff (needed for fast movntq support), | ||||
|  * MMX/MMXEXT template stuff (needed for fast movntq support), | ||||
|  * 1,4,8bpp support and context / deglobalize stuff | ||||
|  * by Michael Niedermayer (michaelni@gmx.at) | ||||
|  * | ||||
| @@ -58,7 +58,7 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; | ||||
| #include "yuv2rgb_template.c" | ||||
| #endif /* HAVE_MMX_INLINE */ | ||||
|  | ||||
| //MMX2 versions | ||||
| // MMXEXT versions | ||||
| #if HAVE_MMXEXT_INLINE | ||||
| #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMXEXT | ||||
|   | ||||
		Reference in New Issue
	
	Block a user