You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	Convert asm keyword into __asm__.
Neither the asm() nor the __asm__() keyword is part of the C99 standard, but while GCC accepts the former in C89 syntax, it is not accepted in C99 unless GNU extensions are turned on (with -fasm). The latter form is accepted in any syntax as an extension (without requiring further command-line options). Sun Studio C99 compiler also does not accept asm() while accepting __asm__(), albeit reporting warnings that it's not valid C99 syntax. Originally committed as revision 15627 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
		
							
								
								
									
										6
									
								
								configure
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								configure
									
									
									
									
										vendored
									
									
								
							| @@ -448,7 +448,7 @@ check_asm(){ | ||||
|     asm="$2" | ||||
|     shift 2 | ||||
|     check_cc "$@" <<EOF && enable $name || disable $name | ||||
| int foo(void){ asm volatile($asm); } | ||||
| int foo(void){ __asm__ volatile($asm); } | ||||
| EOF | ||||
| } | ||||
|  | ||||
| @@ -1574,7 +1574,7 @@ if enabled x86; then | ||||
|     # base pointer is cleared in the inline assembly code. | ||||
|     check_exec_crash <<EOF && enable ebp_available | ||||
|     volatile int i=0; | ||||
|     asm volatile ( | ||||
|     __asm__ volatile ( | ||||
|         "xorl %%ebp, %%ebp" | ||||
|     ::: "%ebp"); | ||||
|     return i; | ||||
| @@ -1934,7 +1934,7 @@ VHOOKCFLAGS="-fPIC" | ||||
| # Find out if the .align argument is a power of two or not. | ||||
| if test $asmalign_pot = "unknown"; then | ||||
|     disable asmalign_pot | ||||
|     echo 'asm (".align 3");' | check_cc && enable asmalign_pot | ||||
|     echo '__asm__ (".align 3");' | check_cc && enable asmalign_pot | ||||
| fi | ||||
|  | ||||
| enabled_any $DECODER_LIST      && enable decoders | ||||
|   | ||||
| @@ -154,17 +154,17 @@ The minimum guaranteed alignment is written in the .h files, for example: | ||||
| General Tips: | ||||
| ------------- | ||||
| Use asm loops like: | ||||
| asm( | ||||
| __asm__( | ||||
|     "1: .... | ||||
|     ... | ||||
|     "jump_instruciton .... | ||||
| Do not use C loops: | ||||
| do{ | ||||
|     asm( | ||||
|     __asm__( | ||||
|         ... | ||||
| }while() | ||||
|  | ||||
| Use asm() instead of intrinsics. The latter requires a good optimizing compiler | ||||
| Use __asm__() instead of intrinsics. The latter requires a good optimizing compiler | ||||
| which gcc is not. | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -105,21 +105,21 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); | ||||
| #define implver         __builtin_alpha_implver | ||||
| #define rpcc            __builtin_alpha_rpcc | ||||
| #else | ||||
| #define prefetch(p)     asm volatile("ldl $31,%0"  : : "m"(*(const char *) (p)) : "memory") | ||||
| #define prefetch_en(p)  asm volatile("ldq $31,%0"  : : "m"(*(const char *) (p)) : "memory") | ||||
| #define prefetch_m(p)   asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory") | ||||
| #define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory") | ||||
| #define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define extql(a, b)  ({ uint64_t __r; asm ("extql   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define extwl(a, b)  ({ uint64_t __r; asm ("extwl   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define extqh(a, b)  ({ uint64_t __r; asm ("extqh   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define zap(a, b)    ({ uint64_t __r; asm ("zap     %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define zapnot(a, b) ({ uint64_t __r; asm ("zapnot  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define amask(a)     ({ uint64_t __r; asm ("amask   %1,%0"      : "=r" (__r) : "rI"  (a));           __r; }) | ||||
| #define implver()    ({ uint64_t __r; asm ("implver %0"         : "=r" (__r));                       __r; }) | ||||
| #define rpcc()       ({ uint64_t __r; asm volatile ("rpcc %0"   : "=r" (__r));                       __r; }) | ||||
| #define prefetch(p)     __asm__ volatile("ldl $31,%0"  : : "m"(*(const char *) (p)) : "memory") | ||||
| #define prefetch_en(p)  __asm__ volatile("ldq $31,%0"  : : "m"(*(const char *) (p)) : "memory") | ||||
| #define prefetch_m(p)   __asm__ volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory") | ||||
| #define prefetch_men(p) __asm__ volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory") | ||||
| #define cmpbge(a, b) ({ uint64_t __r; __asm__ ("cmpbge  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define extql(a, b)  ({ uint64_t __r; __asm__ ("extql   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define extwl(a, b)  ({ uint64_t __r; __asm__ ("extwl   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define extqh(a, b)  ({ uint64_t __r; __asm__ ("extqh   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define zap(a, b)    ({ uint64_t __r; __asm__ ("zap     %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define zapnot(a, b) ({ uint64_t __r; __asm__ ("zapnot  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; }) | ||||
| #define amask(a)     ({ uint64_t __r; __asm__ ("amask   %1,%0"      : "=r" (__r) : "rI"  (a));           __r; }) | ||||
| #define implver()    ({ uint64_t __r; __asm__ ("implver %0"         : "=r" (__r));                       __r; }) | ||||
| #define rpcc()       ({ uint64_t __r; __asm__ volatile ("rpcc %0"   : "=r" (__r));                       __r; }) | ||||
| #endif | ||||
| #define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory") | ||||
| #define wh64(p) __asm__ volatile("wh64 (%0)" : : "r"(p) : "memory") | ||||
|  | ||||
| #if GNUC_PREREQ(3,3) && defined(__alpha_max__) | ||||
| #define minub8  __builtin_alpha_minub8 | ||||
| @@ -136,19 +136,19 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); | ||||
| #define unpkbl  __builtin_alpha_unpkbl | ||||
| #define unpkbw  __builtin_alpha_unpkbw | ||||
| #else | ||||
| #define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define minuw4(a, b) ({ uint64_t __r; asm (".arch ev6; minuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define minsw4(a, b) ({ uint64_t __r; asm (".arch ev6; minsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define maxub8(a, b) ({ uint64_t __r; asm (".arch ev6; maxub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define maxsb8(a, b) ({ uint64_t __r; asm (".arch ev6; maxsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define perr(a, b)   ({ uint64_t __r; asm (".arch ev6; perr    %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; }) | ||||
| #define pklb(a)      ({ uint64_t __r; asm (".arch ev6; pklb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; }) | ||||
| #define pkwb(a)      ({ uint64_t __r; asm (".arch ev6; pkwb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; }) | ||||
| #define unpkbl(a)    ({ uint64_t __r; asm (".arch ev6; unpkbl  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; }) | ||||
| #define unpkbw(a)    ({ uint64_t __r; asm (".arch ev6; unpkbw  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; }) | ||||
| #define minub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define minsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define minuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define minsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define maxub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define maxsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define maxuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define maxsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define perr(a, b)   ({ uint64_t __r; __asm__ (".arch ev6; perr    %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; }) | ||||
| #define pklb(a)      ({ uint64_t __r; __asm__ (".arch ev6; pklb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; }) | ||||
| #define pkwb(a)      ({ uint64_t __r; __asm__ (".arch ev6; pkwb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; }) | ||||
| #define unpkbl(a)    ({ uint64_t __r; __asm__ (".arch ev6; unpkbl  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; }) | ||||
| #define unpkbw(a)    ({ uint64_t __r; __asm__ (".arch ev6; unpkbw  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; }) | ||||
| #endif | ||||
|  | ||||
| #elif defined(__DECC)           /* Digital/Compaq/hp "ccc" compiler */ | ||||
| @@ -158,31 +158,31 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); | ||||
| #define ldl(p) (*(const int32_t *)  (p)) | ||||
| #define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0) | ||||
| #define stl(l, p) do { *(int32_t *)  (p) = (l); } while (0) | ||||
| #define ldq_u(a)     asm ("ldq_u   %v0,0(%a0)", a) | ||||
| #define ldq_u(a)     __asm__ ("ldq_u   %v0,0(%a0)", a) | ||||
| #define uldq(a)      (*(const __unaligned uint64_t *) (a)) | ||||
| #define cmpbge(a, b) asm ("cmpbge  %a0,%a1,%v0", a, b) | ||||
| #define extql(a, b)  asm ("extql   %a0,%a1,%v0", a, b) | ||||
| #define extwl(a, b)  asm ("extwl   %a0,%a1,%v0", a, b) | ||||
| #define extqh(a, b)  asm ("extqh   %a0,%a1,%v0", a, b) | ||||
| #define zap(a, b)    asm ("zap     %a0,%a1,%v0", a, b) | ||||
| #define zapnot(a, b) asm ("zapnot  %a0,%a1,%v0", a, b) | ||||
| #define amask(a)     asm ("amask   %a0,%v0", a) | ||||
| #define implver()    asm ("implver %v0") | ||||
| #define rpcc()       asm ("rpcc           %v0") | ||||
| #define minub8(a, b) asm ("minub8  %a0,%a1,%v0", a, b) | ||||
| #define minsb8(a, b) asm ("minsb8  %a0,%a1,%v0", a, b) | ||||
| #define minuw4(a, b) asm ("minuw4  %a0,%a1,%v0", a, b) | ||||
| #define minsw4(a, b) asm ("minsw4  %a0,%a1,%v0", a, b) | ||||
| #define maxub8(a, b) asm ("maxub8  %a0,%a1,%v0", a, b) | ||||
| #define maxsb8(a, b) asm ("maxsb8  %a0,%a1,%v0", a, b) | ||||
| #define maxuw4(a, b) asm ("maxuw4  %a0,%a1,%v0", a, b) | ||||
| #define maxsw4(a, b) asm ("maxsw4  %a0,%a1,%v0", a, b) | ||||
| #define perr(a, b)   asm ("perr    %a0,%a1,%v0", a, b) | ||||
| #define pklb(a)      asm ("pklb    %a0,%v0", a) | ||||
| #define pkwb(a)      asm ("pkwb    %a0,%v0", a) | ||||
| #define unpkbl(a)    asm ("unpkbl  %a0,%v0", a) | ||||
| #define unpkbw(a)    asm ("unpkbw  %a0,%v0", a) | ||||
| #define wh64(a)      asm ("wh64    %a0", a) | ||||
| #define cmpbge(a, b) __asm__ ("cmpbge  %a0,%a1,%v0", a, b) | ||||
| #define extql(a, b)  __asm__ ("extql   %a0,%a1,%v0", a, b) | ||||
| #define extwl(a, b)  __asm__ ("extwl   %a0,%a1,%v0", a, b) | ||||
| #define extqh(a, b)  __asm__ ("extqh   %a0,%a1,%v0", a, b) | ||||
| #define zap(a, b)    __asm__ ("zap     %a0,%a1,%v0", a, b) | ||||
| #define zapnot(a, b) __asm__ ("zapnot  %a0,%a1,%v0", a, b) | ||||
| #define amask(a)     __asm__ ("amask   %a0,%v0", a) | ||||
| #define implver()    __asm__ ("implver %v0") | ||||
| #define rpcc()       __asm__ ("rpcc           %v0") | ||||
| #define minub8(a, b) __asm__ ("minub8  %a0,%a1,%v0", a, b) | ||||
| #define minsb8(a, b) __asm__ ("minsb8  %a0,%a1,%v0", a, b) | ||||
| #define minuw4(a, b) __asm__ ("minuw4  %a0,%a1,%v0", a, b) | ||||
| #define minsw4(a, b) __asm__ ("minsw4  %a0,%a1,%v0", a, b) | ||||
| #define maxub8(a, b) __asm__ ("maxub8  %a0,%a1,%v0", a, b) | ||||
| #define maxsb8(a, b) __asm__ ("maxsb8  %a0,%a1,%v0", a, b) | ||||
| #define maxuw4(a, b) __asm__ ("maxuw4  %a0,%a1,%v0", a, b) | ||||
| #define maxsw4(a, b) __asm__ ("maxsw4  %a0,%a1,%v0", a, b) | ||||
| #define perr(a, b)   __asm__ ("perr    %a0,%a1,%v0", a, b) | ||||
| #define pklb(a)      __asm__ ("pklb    %a0,%v0", a) | ||||
| #define pkwb(a)      __asm__ ("pkwb    %a0,%v0", a) | ||||
| #define unpkbl(a)    __asm__ ("unpkbl  %a0,%v0", a) | ||||
| #define unpkbw(a)    __asm__ ("unpkbw  %a0,%v0", a) | ||||
| #define wh64(a)      __asm__ ("wh64    %a0", a) | ||||
|  | ||||
| #else | ||||
| #error "Unknown compiler!" | ||||
|   | ||||
| @@ -66,7 +66,7 @@ CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8) | ||||
|  | ||||
| static void add_pixels_clamped_ARM(short *block, unsigned char *dest, int line_size) | ||||
| { | ||||
|     asm volatile ( | ||||
|     __asm__ volatile ( | ||||
|                   "mov r10, #8 \n\t" | ||||
|  | ||||
|                   "1: \n\t" | ||||
| @@ -206,7 +206,7 @@ static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block) | ||||
| #ifdef HAVE_ARMV5TE | ||||
| static void prefetch_arm(void *mem, int stride, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "1:              \n\t" | ||||
|         "subs %0, %0, #1 \n\t" | ||||
|         "pld  [%1]       \n\t" | ||||
|   | ||||
| @@ -22,7 +22,7 @@ | ||||
| #include "libavcodec/dsputil.h" | ||||
|  | ||||
| #define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt | ||||
| #define SET_RND(regd)  asm volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12"); | ||||
| #define SET_RND(regd)  __asm__ volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12"); | ||||
| #define WAVG2B "wavg2b" | ||||
| #include "dsputil_iwmmxt_rnd.h" | ||||
| #undef DEF | ||||
| @@ -30,7 +30,7 @@ | ||||
| #undef WAVG2B | ||||
|  | ||||
| #define DEF(x, y) x ## _ ## y ##_iwmmxt | ||||
| #define SET_RND(regd)  asm volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12"); | ||||
| #define SET_RND(regd)  __asm__ volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12"); | ||||
| #define WAVG2B "wavg2br" | ||||
| #include "dsputil_iwmmxt_rnd.h" | ||||
| #undef DEF | ||||
| @@ -39,7 +39,7 @@ | ||||
|  | ||||
| // need scheduling | ||||
| #define OP(AVG)                                         \ | ||||
|     asm volatile (                                      \ | ||||
|     __asm__ volatile (                                      \ | ||||
|         /* alignment */                                 \ | ||||
|         "and r12, %[pixels], #7 \n\t"                   \ | ||||
|         "bic %[pixels], %[pixels], #7 \n\t"             \ | ||||
| @@ -89,7 +89,7 @@ void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_s | ||||
| { | ||||
|     uint8_t *pixels2 = pixels + line_size; | ||||
|  | ||||
|     asm volatile ( | ||||
|     __asm__ volatile ( | ||||
|         "mov            r12, #4                 \n\t" | ||||
|         "1:                                     \n\t" | ||||
|         "pld            [%[pixels], %[line_size2]]              \n\t" | ||||
| @@ -125,7 +125,7 @@ void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_s | ||||
|  | ||||
| static void clear_blocks_iwmmxt(DCTELEM *blocks) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|                 "wzero wr0                      \n\t" | ||||
|                 "mov r1, #(128 * 6 / 32)        \n\t" | ||||
|                 "1:                             \n\t" | ||||
|   | ||||
| @@ -26,7 +26,7 @@ | ||||
| void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) | ||||
| { | ||||
|     int stride = line_size; | ||||
|     asm volatile ( | ||||
|     __asm__ volatile ( | ||||
|         "and r12, %[pixels], #7 \n\t" | ||||
|         "bic %[pixels], %[pixels], #7 \n\t" | ||||
|         "tmcr wcgr1, r12 \n\t" | ||||
| @@ -60,7 +60,7 @@ void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_siz | ||||
| void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) | ||||
| { | ||||
|     int stride = line_size; | ||||
|     asm volatile ( | ||||
|     __asm__ volatile ( | ||||
|         "and r12, %[pixels], #7 \n\t" | ||||
|         "bic %[pixels], %[pixels], #7 \n\t" | ||||
|         "tmcr wcgr1, r12 \n\t" | ||||
| @@ -102,7 +102,7 @@ void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_siz | ||||
| void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) | ||||
| { | ||||
|     int stride = line_size; | ||||
|     asm volatile ( | ||||
|     __asm__ volatile ( | ||||
|         "and r12, %[pixels], #7 \n\t" | ||||
|         "bic %[pixels], %[pixels], #7 \n\t" | ||||
|         "tmcr wcgr1, r12 \n\t" | ||||
| @@ -142,7 +142,7 @@ void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_si | ||||
| void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) | ||||
| { | ||||
|     int stride = line_size; | ||||
|     asm volatile ( | ||||
|     __asm__ volatile ( | ||||
|         "pld [%[pixels]]                \n\t" | ||||
|         "pld [%[pixels], #32]           \n\t" | ||||
|         "pld [%[block]]                 \n\t" | ||||
| @@ -201,7 +201,7 @@ void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_ | ||||
|     // [wr0 wr1 wr2 wr3] for previous line | ||||
|     // [wr4 wr5 wr6 wr7] for current line | ||||
|     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pld [%[pixels]]                \n\t" | ||||
|         "pld [%[pixels], #32]           \n\t" | ||||
|         "and r12, %[pixels], #7         \n\t" | ||||
| @@ -250,7 +250,7 @@ void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line | ||||
|     // [wr0 wr1 wr2 wr3] for previous line | ||||
|     // [wr4 wr5 wr6 wr7] for current line | ||||
|     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pld [%[pixels]]                \n\t" | ||||
|         "pld [%[pixels], #32]           \n\t" | ||||
|         "and r12, %[pixels], #7         \n\t" | ||||
| @@ -311,7 +311,7 @@ void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_ | ||||
|     // [wr0 wr1 wr2 wr3] for previous line | ||||
|     // [wr4 wr5 wr6 wr7] for current line | ||||
|     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pld [%[pixels]]                \n\t" | ||||
|         "pld [%[pixels], #32]           \n\t" | ||||
|         "pld [%[block]]                 \n\t" | ||||
| @@ -372,7 +372,7 @@ void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line | ||||
|     // [wr0 wr1 wr2 wr3] for previous line | ||||
|     // [wr4 wr5 wr6 wr7] for current line | ||||
|     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pld [%[pixels]]                \n\t" | ||||
|         "pld [%[pixels], #32]           \n\t" | ||||
|         "pld [%[block]]                 \n\t" | ||||
| @@ -448,7 +448,7 @@ void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_ | ||||
|     int stride = line_size; | ||||
|     // [wr0 wr1 wr2 wr3] for previous line | ||||
|     // [wr4 wr5 wr6 wr7] for current line | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pld            [%[pixels]]                             \n\t" | ||||
|         "pld            [%[pixels], #32]                        \n\t" | ||||
|         "and            r12, %[pixels], #7                      \n\t" | ||||
| @@ -502,7 +502,7 @@ void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line | ||||
|     int stride = line_size; | ||||
|     // [wr0 wr1 wr2 wr3] for previous line | ||||
|     // [wr4 wr5 wr6 wr7] for current line | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pld [%[pixels]]                \n\t" | ||||
|         "pld [%[pixels], #32]           \n\t" | ||||
|         "and r12, %[pixels], #7         \n\t" | ||||
| @@ -559,7 +559,7 @@ void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line | ||||
|     int stride = line_size; | ||||
|     // [wr0 wr1 wr2 wr3] for previous line | ||||
|     // [wr4 wr5 wr6 wr7] for current line | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pld [%[pixels]]                \n\t" | ||||
|         "pld [%[pixels], #32]           \n\t" | ||||
|         "and r12, %[pixels], #7         \n\t" | ||||
| @@ -627,7 +627,7 @@ void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line | ||||
|     // [wr0 wr1 wr2 wr3] for previous line | ||||
|     // [wr4 wr5 wr6 wr7] for current line | ||||
|     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pld [%[pixels]]                \n\t" | ||||
|         "mov r12, #2                    \n\t" | ||||
|         "pld [%[pixels], #32]           \n\t" | ||||
| @@ -721,7 +721,7 @@ void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int lin | ||||
|     // [wr0 wr1 wr2 wr3] for previous line | ||||
|     // [wr4 wr5 wr6 wr7] for current line | ||||
|     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pld [%[pixels]]                \n\t" | ||||
|         "mov r12, #2                    \n\t" | ||||
|         "pld [%[pixels], #32]           \n\t" | ||||
| @@ -863,7 +863,7 @@ void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line | ||||
|     // [wr0 wr1 wr2 wr3] for previous line | ||||
|     // [wr4 wr5 wr6 wr7] for current line | ||||
|     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pld [%[block]]                 \n\t" | ||||
|         "pld [%[block], #32]            \n\t" | ||||
|         "pld [%[pixels]]                \n\t" | ||||
| @@ -967,7 +967,7 @@ void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int lin | ||||
|     // [wr0 wr1 wr2 wr3] for previous line | ||||
|     // [wr4 wr5 wr6 wr7] for current line | ||||
|     SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pld [%[block]]                 \n\t" | ||||
|         "pld [%[block], #32]            \n\t" | ||||
|         "pld [%[pixels]]                \n\t" | ||||
|   | ||||
| @@ -42,7 +42,7 @@ | ||||
| static void vector_fmul_vfp(float *dst, const float *src, int len) | ||||
| { | ||||
|     int tmp; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "fmrx       %[tmp], fpscr\n\t" | ||||
|         "orr        %[tmp], %[tmp], #(3 << 16)\n\t" /* set vector size to 4 */ | ||||
|         "fmxr       fpscr, %[tmp]\n\t" | ||||
| @@ -90,7 +90,7 @@ static void vector_fmul_vfp(float *dst, const float *src, int len) | ||||
| static void vector_fmul_reverse_vfp(float *dst, const float *src0, const float *src1, int len) | ||||
| { | ||||
|     src1 += len; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "fldmdbs    %[src1]!, {s0-s3}\n\t" | ||||
|         "fldmias    %[src0]!, {s8-s11}\n\t" | ||||
|         "fldmdbs    %[src1]!, {s4-s7}\n\t" | ||||
| @@ -149,7 +149,7 @@ static void vector_fmul_reverse_vfp(float *dst, const float *src0, const float * | ||||
|  */ | ||||
| void float_to_int16_vfp(int16_t *dst, const float *src, int len) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "fldmias    %[src]!, {s16-s23}\n\t" | ||||
|         "ftosis     s0, s16\n\t" | ||||
|         "ftosis     s1, s17\n\t" | ||||
|   | ||||
| @@ -25,7 +25,7 @@ | ||||
| #ifdef FRAC_BITS | ||||
| #   define MULL(a, b) \ | ||||
|         ({  int lo, hi;\ | ||||
|          asm("smull %0, %1, %2, %3     \n\t"\ | ||||
|          __asm__("smull %0, %1, %2, %3     \n\t"\ | ||||
|              "mov   %0, %0,     lsr %4\n\t"\ | ||||
|              "add   %1, %0, %1, lsl %5\n\t"\ | ||||
|              : "=&r"(lo), "=&r"(hi)\ | ||||
| @@ -37,21 +37,21 @@ | ||||
| static inline av_const int MULH(int a, int b) | ||||
| { | ||||
|     int r; | ||||
|     asm ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b)); | ||||
|     __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b)); | ||||
|     return r; | ||||
| } | ||||
| #define MULH MULH | ||||
| #else | ||||
| #define MULH(a, b) \ | ||||
|     ({ int lo, hi;\ | ||||
|      asm ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));\ | ||||
|      __asm__ ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));\ | ||||
|      hi; }) | ||||
| #endif | ||||
|  | ||||
| static inline av_const int64_t MUL64(int a, int b) | ||||
| { | ||||
|     union { uint64_t x; unsigned hl[2]; } x; | ||||
|     asm ("smull %0, %1, %2, %3" | ||||
|     __asm__ ("smull %0, %1, %2, %3" | ||||
|          : "=r"(x.hl[0]), "=r"(x.hl[1]) : "r"(a), "r"(b)); | ||||
|     return x.x; | ||||
| } | ||||
| @@ -60,7 +60,7 @@ static inline av_const int64_t MUL64(int a, int b) | ||||
| static inline av_const int64_t MAC64(int64_t d, int a, int b) | ||||
| { | ||||
|     union { uint64_t x; unsigned hl[2]; } x = { d }; | ||||
|     asm ("smlal %0, %1, %2, %3" | ||||
|     __asm__ ("smlal %0, %1, %2, %3" | ||||
|          : "+r"(x.hl[0]), "+r"(x.hl[1]) : "r"(a), "r"(b)); | ||||
|     return x.x; | ||||
| } | ||||
| @@ -71,11 +71,11 @@ static inline av_const int64_t MAC64(int64_t d, int a, int b) | ||||
|  | ||||
| /* signed 16x16 -> 32 multiply add accumulate */ | ||||
| #   define MAC16(rt, ra, rb) \ | ||||
|         asm ("smlabb %0, %2, %3, %0" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); | ||||
|         __asm__ ("smlabb %0, %2, %3, %0" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); | ||||
| /* signed 16x16 -> 32 multiply */ | ||||
| #   define MUL16(ra, rb)                                                \ | ||||
|         ({ int __rt;                                                    \ | ||||
|          asm ("smulbb %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb));  \ | ||||
|          __asm__ ("smulbb %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb));  \ | ||||
|          __rt; }) | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -65,7 +65,7 @@ static inline void dct_unquantize_h263_helper_c(DCTELEM *block, int qmul, int qa | ||||
| ({ DCTELEM *xblock = xxblock; \ | ||||
|    int xqmul = xxqmul, xqadd = xxqadd, xcount = xxcount, xtmp; \ | ||||
|    int xdata1, xdata2; \ | ||||
| asm volatile( \ | ||||
| __asm__ volatile( \ | ||||
|         "subs %[count], %[count], #2       \n\t" \ | ||||
|         "ble 2f                            \n\t" \ | ||||
|         "ldrd r4, [%[block], #0]           \n\t" \ | ||||
|   | ||||
| @@ -48,7 +48,7 @@ static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s, | ||||
|     else | ||||
|         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; | ||||
|  | ||||
|     asm volatile ( | ||||
|     __asm__ volatile ( | ||||
| /*      "movd %1, %%mm6                 \n\t" //qmul */ | ||||
| /*      "packssdw %%mm6, %%mm6          \n\t" */ | ||||
| /*      "packssdw %%mm6, %%mm6          \n\t" */ | ||||
|   | ||||
| @@ -77,7 +77,7 @@ static void bfin_clear_blocks (DCTELEM *blocks) | ||||
| { | ||||
|     // This is just a simple memset. | ||||
|     // | ||||
|     asm("P0=192; " | ||||
|     __asm__("P0=192; " | ||||
|         "I0=%0;  " | ||||
|         "R0=0;   " | ||||
|         "LSETUP(clear_blocks_blkfn_lab,clear_blocks_blkfn_lab)LC0=P0;" | ||||
|   | ||||
| @@ -24,7 +24,7 @@ | ||||
|  | ||||
| #ifdef CONFIG_MPEGAUDIO_HP | ||||
| #define MULH(X,Y) ({ int xxo;                           \ | ||||
|     asm (                                               \ | ||||
|     __asm__ (                                               \ | ||||
|         "a1 = %2.L * %1.L (FU);\n\t"                    \ | ||||
|         "a1 = a1 >> 16;\n\t"                            \ | ||||
|         "a1 += %2.H * %1.L (IS,M);\n\t"                 \ | ||||
| @@ -34,7 +34,7 @@ | ||||
|         : "=d" (xxo) : "d" (X), "d" (Y) : "A0","A1"); xxo; }) | ||||
| #else | ||||
| #define MULH(X,Y) ({ int xxo;                           \ | ||||
|     asm (                                               \ | ||||
|     __asm__ (                                               \ | ||||
|         "a1 = %2.H * %1.L (IS,M);\n\t"                  \ | ||||
|         "a0 = %1.H * %2.H, a1+= %1.H * %2.L (IS,M);\n\t"\ | ||||
|         "a1 = a1 >>> 16;\n\t"                           \ | ||||
| @@ -44,7 +44,7 @@ | ||||
|  | ||||
| /* signed 16x16 -> 32 multiply */ | ||||
| #define MUL16(a, b) ({ int xxo;                         \ | ||||
|     asm (                                               \ | ||||
|     __asm__ (                                               \ | ||||
|        "%0 = %1.l*%2.l (is);\n\t"                       \ | ||||
|        : "=W" (xxo) : "d" (a), "d" (b) : "A1");         \ | ||||
|     xxo; }) | ||||
|   | ||||
| @@ -88,7 +88,7 @@ static int dct_quantize_bfin (MpegEncContext *s, | ||||
|     /*      block[i] = level;                                 */ | ||||
|     /*  } */ | ||||
|  | ||||
|     asm volatile | ||||
|     __asm__ volatile | ||||
|         ("i2=%1;\n\t" | ||||
|          "r1=[%1++];                                                         \n\t" | ||||
|          "r0=r1>>>15 (v);                                                    \n\t" | ||||
| @@ -114,7 +114,7 @@ static int dct_quantize_bfin (MpegEncContext *s, | ||||
|  | ||||
|     PROF("zzscan",5); | ||||
|  | ||||
|     asm volatile | ||||
|     __asm__ volatile | ||||
|         ("r0=b[%1--] (x);         \n\t" | ||||
|          "lsetup (0f,1f) lc0=%3;  \n\t"     /*    for(i=63; i>=start_i; i--) { */ | ||||
|          "0: p0=r0;               \n\t"     /*        j = scantable[i];        */ | ||||
|   | ||||
| @@ -55,14 +55,14 @@ extern const uint8_t ff_reverse[256]; | ||||
| #if defined(ARCH_X86) | ||||
| // avoid +32 for shift optimization (gcc should do that ...) | ||||
| static inline  int32_t NEG_SSR32( int32_t a, int8_t s){ | ||||
|     asm ("sarl %1, %0\n\t" | ||||
|     __asm__ ("sarl %1, %0\n\t" | ||||
|          : "+r" (a) | ||||
|          : "ic" ((uint8_t)(-s)) | ||||
|     ); | ||||
|     return a; | ||||
| } | ||||
| static inline uint32_t NEG_USR32(uint32_t a, int8_t s){ | ||||
|     asm ("shrl %1, %0\n\t" | ||||
|     __asm__ ("shrl %1, %0\n\t" | ||||
|          : "+r" (a) | ||||
|          : "ic" ((uint8_t)(-s)) | ||||
|     ); | ||||
| @@ -248,7 +248,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) | ||||
| { | ||||
| #    ifdef ALIGNED_BITSTREAM_WRITER | ||||
| #        if defined(ARCH_X86) | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movl %0, %%ecx                 \n\t" | ||||
|         "xorl %%eax, %%eax              \n\t" | ||||
|         "shrdl %%cl, %1, %%eax          \n\t" | ||||
| @@ -279,7 +279,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) | ||||
| #        endif | ||||
| #    else //ALIGNED_BITSTREAM_WRITER | ||||
| #        if defined(ARCH_X86) | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movl $7, %%ecx                 \n\t" | ||||
|         "andl %0, %%ecx                 \n\t" | ||||
|         "addl %3, %%ecx                 \n\t" | ||||
| @@ -556,7 +556,7 @@ static inline void skip_bits_long(GetBitContext *s, int n){ | ||||
|  | ||||
| #if defined(ARCH_X86) | ||||
| #   define SKIP_CACHE(name, gb, num)\ | ||||
|         asm(\ | ||||
|         __asm__(\ | ||||
|             "shldl %2, %1, %0          \n\t"\ | ||||
|             "shll %2, %1               \n\t"\ | ||||
|             : "+r" (name##_cache0), "+r" (name##_cache1)\ | ||||
|   | ||||
| @@ -304,7 +304,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){ | ||||
|     int temp; | ||||
| #if 0 | ||||
|     //P3:683    athlon:475 | ||||
|     asm( | ||||
|     __asm__( | ||||
|         "lea -0x100(%0), %2         \n\t" | ||||
|         "shr $31, %2                \n\t"  //FIXME 31->63 for x86-64 | ||||
|         "shl %%cl, %0               \n\t" | ||||
| @@ -313,7 +313,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){ | ||||
|     ); | ||||
| #elif 0 | ||||
|     //P3:680    athlon:474 | ||||
|     asm( | ||||
|     __asm__( | ||||
|         "cmp $0x100, %0             \n\t" | ||||
|         "setb %%cl                  \n\t"  //FIXME 31->63 for x86-64 | ||||
|         "shl %%cl, %0               \n\t" | ||||
| @@ -323,7 +323,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){ | ||||
| #elif 1 | ||||
|     int temp2; | ||||
|     //P3:665    athlon:517 | ||||
|     asm( | ||||
|     __asm__( | ||||
|         "lea -0x100(%0), %%eax      \n\t" | ||||
|         "cltd                       \n\t" | ||||
|         "mov %0, %%eax              \n\t" | ||||
| @@ -336,7 +336,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){ | ||||
| #elif 0 | ||||
|     int temp2; | ||||
|     //P3:673    athlon:509 | ||||
|     asm( | ||||
|     __asm__( | ||||
|         "cmp $0x100, %0             \n\t" | ||||
|         "sbb %%edx, %%edx           \n\t" | ||||
|         "mov %0, %%eax              \n\t" | ||||
| @@ -349,7 +349,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){ | ||||
| #else | ||||
|     int temp2; | ||||
|     //P3:677    athlon:511 | ||||
|     asm( | ||||
|     __asm__( | ||||
|         "cmp $0x100, %0             \n\t" | ||||
|         "lea (%0, %0), %%eax        \n\t" | ||||
|         "lea (%1, %1), %%edx        \n\t" | ||||
| @@ -385,7 +385,7 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st | ||||
|     int bit; | ||||
|  | ||||
| #ifndef BRANCHLESS_CABAC_DECODER | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movzbl (%1), %0                        \n\t" | ||||
|         "movl "RANGE    "(%2), %%ebx            \n\t" | ||||
|         "movl "RANGE    "(%2), %%edx            \n\t" | ||||
| @@ -524,7 +524,7 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st | ||||
|         "add    "tmp"       , "low"                                     \n\t"\ | ||||
|         "1:                                                             \n\t" | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movl "RANGE    "(%2), %%esi            \n\t" | ||||
|         "movl "LOW      "(%2), %%ebx            \n\t" | ||||
|         BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl") | ||||
| @@ -591,7 +591,7 @@ static int av_unused get_cabac(CABACContext *c, uint8_t * const state){ | ||||
| static int av_unused get_cabac_bypass(CABACContext *c){ | ||||
| #if 0 //not faster | ||||
|     int bit; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movl "RANGE    "(%1), %%ebx            \n\t" | ||||
|         "movl "LOW      "(%1), %%eax            \n\t" | ||||
|         "shl $17, %%ebx                         \n\t" | ||||
| @@ -638,7 +638,7 @@ static int av_unused get_cabac_bypass(CABACContext *c){ | ||||
|  | ||||
| static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){ | ||||
| #if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movl "RANGE    "(%1), %%ebx            \n\t" | ||||
|         "movl "LOW      "(%1), %%eax            \n\t" | ||||
|         "shl $17, %%ebx                         \n\t" | ||||
|   | ||||
| @@ -177,7 +177,7 @@ static inline void mmx_emms(void) | ||||
| { | ||||
| #ifdef HAVE_MMX | ||||
|     if (cpu_flags & MM_MMX) | ||||
|         asm volatile ("emms\n\t"); | ||||
|         __asm__ volatile ("emms\n\t"); | ||||
| #endif | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -579,7 +579,7 @@ void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int li | ||||
|  | ||||
| static inline void emms(void) | ||||
| { | ||||
|     asm volatile ("emms;":::"memory"); | ||||
|     __asm__ volatile ("emms;":::"memory"); | ||||
| } | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -35,7 +35,7 @@ | ||||
|  | ||||
| static inline void cavs_idct8_1d(int16_t *block, uint64_t bias) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq 112(%0), %%mm4  \n\t" /* mm4 = src7 */ | ||||
|         "movq  16(%0), %%mm5  \n\t" /* mm5 = src1 */ | ||||
|         "movq  80(%0), %%mm2  \n\t" /* mm2 = src5 */ | ||||
| @@ -120,7 +120,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | ||||
|  | ||||
|         cavs_idct8_1d(block+4*i, ff_pw_4); | ||||
|  | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "psraw     $3, %%mm7  \n\t" | ||||
|             "psraw     $3, %%mm6  \n\t" | ||||
|             "psraw     $3, %%mm5  \n\t" | ||||
| @@ -150,7 +150,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | ||||
|     for(i=0; i<2; i++){ | ||||
|         cavs_idct8_1d(b2+4*i, ff_pw_64); | ||||
|  | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "psraw     $7, %%mm7  \n\t" | ||||
|             "psraw     $7, %%mm6  \n\t" | ||||
|             "psraw     $7, %%mm5  \n\t" | ||||
| @@ -175,7 +175,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | ||||
|     add_pixels_clamped_mmx(b2, dst, stride); | ||||
|  | ||||
|     /* clear block */ | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|             "pxor %%mm7, %%mm7   \n\t" | ||||
|             "movq %%mm7, (%0)    \n\t" | ||||
|             "movq %%mm7, 8(%0)   \n\t" | ||||
| @@ -275,7 +275,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | ||||
|     src -= 2*srcStride;\ | ||||
|     \ | ||||
|     while(w--){\ | ||||
|       asm volatile(\ | ||||
|       __asm__ volatile(\ | ||||
|         "pxor %%mm7, %%mm7          \n\t"\ | ||||
|         "movd (%0), %%mm0           \n\t"\ | ||||
|         "add %2, %0                 \n\t"\ | ||||
| @@ -306,7 +306,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | ||||
|         : "memory"\ | ||||
|      );\ | ||||
|      if(h==16){\ | ||||
|         asm volatile(\ | ||||
|         __asm__ volatile(\ | ||||
|             VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ | ||||
|             VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ | ||||
|             VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ | ||||
| @@ -328,7 +328,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | ||||
| #define QPEL_CAVS(OPNAME, OP, MMX)\ | ||||
| static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||||
|     int h=8;\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "pxor %%mm7, %%mm7          \n\t"\ | ||||
|         "movq %5, %%mm6             \n\t"\ | ||||
|         "1:                         \n\t"\ | ||||
|   | ||||
| @@ -28,7 +28,7 @@ | ||||
|  | ||||
| /* ebx saving is necessary for PIC. gcc seems unable to see it alone */ | ||||
| #define cpuid(index,eax,ebx,ecx,edx)\ | ||||
|     asm volatile\ | ||||
|     __asm__ volatile\ | ||||
|         ("mov %%"REG_b", %%"REG_S"\n\t"\ | ||||
|          "cpuid\n\t"\ | ||||
|          "xchg %%"REG_b", %%"REG_S\ | ||||
| @@ -44,7 +44,7 @@ int mm_support(void) | ||||
|     int max_std_level, max_ext_level, std_caps=0, ext_caps=0; | ||||
|     x86_reg a, c; | ||||
|  | ||||
|     asm volatile ( | ||||
|     __asm__ volatile ( | ||||
|         /* See if CPUID instruction is supported ... */ | ||||
|         /* ... Get copies of EFLAGS into eax and ecx */ | ||||
|         "pushf\n\t" | ||||
|   | ||||
| @@ -47,7 +47,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | ||||
|  | ||||
|         rnd_reg = rnd ? &ff_pw_4 : &ff_pw_3; | ||||
|  | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movd %0, %%mm5\n\t" | ||||
|             "movq %1, %%mm4\n\t" | ||||
|             "movq %2, %%mm6\n\t"         /* mm6 = rnd */ | ||||
| @@ -58,13 +58,13 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | ||||
|             :: "rm"(x+y), "m"(ff_pw_8), "m"(*rnd_reg)); | ||||
|  | ||||
|         for(i=0; i<h; i++) { | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 /* mm0 = src[0..7], mm1 = src[1..8] */ | ||||
|                 "movq %0, %%mm0\n\t" | ||||
|                 "movq %1, %%mm2\n\t" | ||||
|                 :: "m"(src[0]), "m"(src[dxy])); | ||||
|  | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 /* [mm0,mm1] = A * src[0..7] */ | ||||
|                 /* [mm2,mm3] = B * src[1..8] */ | ||||
|                 "movq %%mm0, %%mm1\n\t" | ||||
| @@ -98,7 +98,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | ||||
|  | ||||
|     /* general case, bilinear */ | ||||
|     rnd_reg = rnd ? &ff_pw_32.a : &ff_pw_28.a; | ||||
|     asm volatile("movd %2, %%mm4\n\t" | ||||
|     __asm__ volatile("movd %2, %%mm4\n\t" | ||||
|                  "movd %3, %%mm6\n\t" | ||||
|                  "punpcklwd %%mm4, %%mm4\n\t" | ||||
|                  "punpcklwd %%mm6, %%mm6\n\t" | ||||
| @@ -119,7 +119,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | ||||
|                  "movq %%mm4, %0\n\t" | ||||
|                  : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64)); | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         /* mm0 = src[0..7], mm1 = src[1..8] */ | ||||
|         "movq %0, %%mm0\n\t" | ||||
|         "movq %1, %%mm1\n\t" | ||||
| @@ -128,7 +128,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | ||||
|     for(i=0; i<h; i++) { | ||||
|         src += stride; | ||||
|  | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             /* mm2 = A * src[0..3] + B * src[1..4] */ | ||||
|             /* mm3 = A * src[4..7] + B * src[5..8] */ | ||||
|             "movq %%mm0, %%mm2\n\t" | ||||
| @@ -145,7 +145,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | ||||
|             "paddw %%mm0, %%mm3\n\t" | ||||
|             : : "m" (AA)); | ||||
|  | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             /* [mm2,mm3] += C * src[0..7] */ | ||||
|             "movq %0, %%mm0\n\t" | ||||
|             "movq %%mm0, %%mm1\n\t" | ||||
| @@ -157,7 +157,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | ||||
|             "paddw %%mm1, %%mm3\n\t" | ||||
|             : : "m" (src[0])); | ||||
|  | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             /* [mm2,mm3] += D * src[1..8] */ | ||||
|             "movq %1, %%mm1\n\t" | ||||
|             "movq %%mm1, %%mm0\n\t" | ||||
| @@ -171,7 +171,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | ||||
|             "movq %0, %%mm0\n\t" | ||||
|             : : "m" (src[0]), "m" (src[1]), "m" (DD)); | ||||
|  | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             /* dst[0..7] = ([mm2,mm3] + 32) >> 6 */ | ||||
|             "paddw %1, %%mm2\n\t" | ||||
|             "paddw %1, %%mm3\n\t" | ||||
| @@ -187,7 +187,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | ||||
|  | ||||
| static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pxor   %%mm7, %%mm7        \n\t" | ||||
|         "movd %5, %%mm2             \n\t" | ||||
|         "movd %6, %%mm3             \n\t" | ||||
| @@ -259,7 +259,7 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1* | ||||
|     int tmp = ((1<<16)-1)*x + 8; | ||||
|     int CD= tmp*y; | ||||
|     int AB= (tmp<<3) - CD; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         /* mm5 = {A,B,A,B} */ | ||||
|         /* mm6 = {C,D,C,D} */ | ||||
|         "movd %0, %%mm5\n\t" | ||||
| @@ -274,7 +274,7 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1* | ||||
|         :: "r"(AB), "r"(CD), "m"(src[0])); | ||||
|  | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "1:\n\t" | ||||
|         "add %4, %1\n\t" | ||||
|         /* mm1 = A * src[0,1] + B * src[1,2] */ | ||||
|   | ||||
| @@ -37,7 +37,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | ||||
|     if(y==0 || x==0) | ||||
|     { | ||||
|         /* 1 dimensional filter only */ | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movd %0, %%xmm7 \n\t" | ||||
|             "movq %1, %%xmm6 \n\t" | ||||
|             "pshuflw $0, %%xmm7, %%xmm7 \n\t" | ||||
| @@ -47,7 +47,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | ||||
|         ); | ||||
|  | ||||
|         if(x) { | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "1: \n\t" | ||||
|                 "movq (%1), %%xmm0 \n\t" | ||||
|                 "movq 1(%1), %%xmm1 \n\t" | ||||
| @@ -75,7 +75,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | ||||
|                 :"r"((x86_reg)stride) | ||||
|             ); | ||||
|         } else { | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "1: \n\t" | ||||
|                 "movq (%1), %%xmm0 \n\t" | ||||
|                 "movq (%1,%3), %%xmm1 \n\t" | ||||
| @@ -107,7 +107,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | ||||
|     } | ||||
|  | ||||
|     /* general case, bilinear */ | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movd %0, %%xmm7 \n\t" | ||||
|         "movd %1, %%xmm6 \n\t" | ||||
|         "movdqa %2, %%xmm5 \n\t" | ||||
| @@ -118,7 +118,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | ||||
|         :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(*(rnd?&ff_pw_32:&ff_pw_28)) | ||||
|     ); | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq (%1), %%xmm0 \n\t" | ||||
|         "movq 1(%1), %%xmm1 \n\t" | ||||
|         "punpcklbw %%xmm1, %%xmm0 \n\t" | ||||
| @@ -160,7 +160,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | ||||
|  | ||||
| static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movd %0, %%mm7 \n\t" | ||||
|         "movd %1, %%mm6 \n\t" | ||||
|         "movq %2, %%mm5 \n\t" | ||||
| @@ -169,7 +169,7 @@ static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1* | ||||
|         :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(ff_pw_32) | ||||
|     ); | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movd (%1), %%mm0 \n\t" | ||||
|         "punpcklbw 1(%1), %%mm0 \n\t" | ||||
|         "add %3, %1 \n\t" | ||||
|   | ||||
| @@ -70,28 +70,28 @@ DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL; | ||||
| DECLARE_ALIGNED_16(const double, ff_pd_1[2]) = { 1.0, 1.0 }; | ||||
| DECLARE_ALIGNED_16(const double, ff_pd_2[2]) = { 2.0, 2.0 }; | ||||
|  | ||||
| #define JUMPALIGN() asm volatile (ASMALIGN(3)::) | ||||
| #define MOVQ_ZERO(regd)  asm volatile ("pxor %%" #regd ", %%" #regd ::) | ||||
| #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::) | ||||
| #define MOVQ_ZERO(regd)  __asm__ volatile ("pxor %%" #regd ", %%" #regd ::) | ||||
|  | ||||
| #define MOVQ_BFE(regd) \ | ||||
|     asm volatile ( \ | ||||
|     __asm__ volatile ( \ | ||||
|     "pcmpeqd %%" #regd ", %%" #regd " \n\t"\ | ||||
|     "paddb %%" #regd ", %%" #regd " \n\t" ::) | ||||
|  | ||||
| #ifndef PIC | ||||
| #define MOVQ_BONE(regd)  asm volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone)) | ||||
| #define MOVQ_WTWO(regd)  asm volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo)) | ||||
| #define MOVQ_BONE(regd)  __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone)) | ||||
| #define MOVQ_WTWO(regd)  __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo)) | ||||
| #else | ||||
| // for shared library it's better to use this way for accessing constants | ||||
| // pcmpeqd -> -1 | ||||
| #define MOVQ_BONE(regd) \ | ||||
|     asm volatile ( \ | ||||
|     __asm__ volatile ( \ | ||||
|     "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ | ||||
|     "psrlw $15, %%" #regd " \n\t" \ | ||||
|     "packuswb %%" #regd ", %%" #regd " \n\t" ::) | ||||
|  | ||||
| #define MOVQ_WTWO(regd) \ | ||||
|     asm volatile ( \ | ||||
|     __asm__ volatile ( \ | ||||
|     "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ | ||||
|     "psrlw $15, %%" #regd " \n\t" \ | ||||
|     "psllw $1, %%" #regd " \n\t"::) | ||||
| @@ -223,7 +223,7 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size | ||||
|     p = block; | ||||
|     pix = pixels; | ||||
|     /* unrolled loop */ | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|                 "movq   %3, %%mm0               \n\t" | ||||
|                 "movq   8%3, %%mm1              \n\t" | ||||
|                 "movq   16%3, %%mm2             \n\t" | ||||
| @@ -248,7 +248,7 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size | ||||
|     // if here would be an exact copy of the code above | ||||
|     // compiler would generate some very strange code | ||||
|     // thus using "r" | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|             "movq       (%3), %%mm0             \n\t" | ||||
|             "movq       8(%3), %%mm1            \n\t" | ||||
|             "movq       16(%3), %%mm2           \n\t" | ||||
| @@ -299,7 +299,7 @@ void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size | ||||
|     MOVQ_ZERO(mm7); | ||||
|     i = 4; | ||||
|     do { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|                 "movq   (%2), %%mm0     \n\t" | ||||
|                 "movq   8(%2), %%mm1    \n\t" | ||||
|                 "movq   16(%2), %%mm2   \n\t" | ||||
| @@ -330,7 +330,7 @@ void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size | ||||
|  | ||||
| static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|          "lea (%3, %3), %%"REG_a"       \n\t" | ||||
|          ASMALIGN(3) | ||||
|          "1:                            \n\t" | ||||
| @@ -356,7 +356,7 @@ static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size | ||||
|  | ||||
| static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|          "lea (%3, %3), %%"REG_a"       \n\t" | ||||
|          ASMALIGN(3) | ||||
|          "1:                            \n\t" | ||||
| @@ -382,7 +382,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size | ||||
|  | ||||
| static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|          "lea (%3, %3), %%"REG_a"       \n\t" | ||||
|          ASMALIGN(3) | ||||
|          "1:                            \n\t" | ||||
| @@ -416,7 +416,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz | ||||
|  | ||||
| static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|          "1:                            \n\t" | ||||
|          "movdqu (%1), %%xmm0           \n\t" | ||||
|          "movdqu (%1,%3), %%xmm1        \n\t" | ||||
| @@ -438,7 +438,7 @@ static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si | ||||
|  | ||||
| static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|          "1:                            \n\t" | ||||
|          "movdqu (%1), %%xmm0           \n\t" | ||||
|          "movdqu (%1,%3), %%xmm1        \n\t" | ||||
| @@ -464,7 +464,7 @@ static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si | ||||
|  | ||||
| static void clear_blocks_mmx(DCTELEM *blocks) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|                 "pxor %%mm7, %%mm7              \n\t" | ||||
|                 "mov $-128*6, %%"REG_a"         \n\t" | ||||
|                 "1:                             \n\t" | ||||
| @@ -481,7 +481,7 @@ static void clear_blocks_mmx(DCTELEM *blocks) | ||||
|  | ||||
| static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ | ||||
|     x86_reg i=0; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "jmp 2f                         \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq  (%1, %0), %%mm0          \n\t" | ||||
| @@ -505,7 +505,7 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ | ||||
|  | ||||
| static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ | ||||
|     x86_reg i=0; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "jmp 2f                         \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq   (%2, %0), %%mm0         \n\t" | ||||
| @@ -600,7 +600,7 @@ static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){ | ||||
|     if(ENABLE_ANY_H263) { | ||||
|     const int strength= ff_h263_loop_filter_strength[qscale]; | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|  | ||||
|         H263_LOOP_FILTER | ||||
|  | ||||
| @@ -618,7 +618,7 @@ static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){ | ||||
| } | ||||
|  | ||||
| static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ | ||||
|     asm volatile( //FIXME could save 1 instruction if done as 8x4 ... | ||||
|     __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ... | ||||
|         "movd  %4, %%mm0                \n\t" | ||||
|         "movd  %5, %%mm1                \n\t" | ||||
|         "movd  %6, %%mm2                \n\t" | ||||
| @@ -656,7 +656,7 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ | ||||
|  | ||||
|     transpose4x4(btemp  , src           , 8, stride); | ||||
|     transpose4x4(btemp+4, src + 4*stride, 8, stride); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         H263_LOOP_FILTER // 5 3 4 6 | ||||
|  | ||||
|         : "+m" (temp[0]), | ||||
| @@ -666,7 +666,7 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ | ||||
|         : "g" (2*strength), "m"(ff_pb_FC) | ||||
|     ); | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq %%mm5, %%mm1              \n\t" | ||||
|         "movq %%mm4, %%mm0              \n\t" | ||||
|         "punpcklbw %%mm3, %%mm5         \n\t" | ||||
| @@ -711,7 +711,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) | ||||
|     ptr = buf; | ||||
|     if(w==8) | ||||
|     { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|                 "1:                             \n\t" | ||||
|                 "movd (%0), %%mm0               \n\t" | ||||
|                 "punpcklbw %%mm0, %%mm0         \n\t" | ||||
| @@ -732,7 +732,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|                 "1:                             \n\t" | ||||
|                 "movd (%0), %%mm0               \n\t" | ||||
|                 "punpcklbw %%mm0, %%mm0         \n\t" | ||||
| @@ -757,7 +757,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) | ||||
|     for(i=0;i<w;i+=4) { | ||||
|         /* top and bottom (and hopefully also the corners) */ | ||||
|         ptr= buf - (i + 1) * wrap - w; | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|                 "1:                             \n\t" | ||||
|                 "movq (%1, %0), %%mm0           \n\t" | ||||
|                 "movq %%mm0, (%0)               \n\t" | ||||
| @@ -771,7 +771,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) | ||||
|                 : "r" ((x86_reg)buf - (x86_reg)ptr - w), "r" ((x86_reg)-wrap), "r" ((x86_reg)-wrap*3), "r" (ptr+width+2*w) | ||||
|         ); | ||||
|         ptr= last_line + (i + 1) * wrap - w; | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|                 "1:                             \n\t" | ||||
|                 "movq (%1, %0), %%mm0           \n\t" | ||||
|                 "movq %%mm0, (%0)               \n\t" | ||||
| @@ -792,7 +792,7 @@ static void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t * | ||||
| {\ | ||||
|     x86_reg i = -bpp;\ | ||||
|     x86_reg end = w-3;\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "pxor      %%mm7, %%mm7 \n"\ | ||||
|         "movd    (%1,%0), %%mm0 \n"\ | ||||
|         "movd    (%2,%0), %%mm1 \n"\ | ||||
| @@ -886,7 +886,7 @@ PAETH(ssse3, ABS3_SSSE3) | ||||
| static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ | ||||
|     uint64_t temp;\ | ||||
| \ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "pxor %%mm7, %%mm7                \n\t"\ | ||||
|         "1:                               \n\t"\ | ||||
|         "movq  (%0), %%mm0                \n\t" /* ABCDEFGH */\ | ||||
| @@ -1025,7 +1025,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, i | ||||
|         temp[13]= (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]);\ | ||||
|         temp[14]= (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]);\ | ||||
|         temp[15]= (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]);\ | ||||
|         asm volatile(\ | ||||
|         __asm__ volatile(\ | ||||
|             "movq (%0), %%mm0               \n\t"\ | ||||
|             "movq 8(%0), %%mm1              \n\t"\ | ||||
|             "paddw %2, %%mm0                \n\t"\ | ||||
| @@ -1051,7 +1051,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, i | ||||
| }\ | ||||
| \ | ||||
| static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "pxor %%mm7, %%mm7                \n\t"\ | ||||
|         "1:                               \n\t"\ | ||||
|         "movq  (%0), %%mm0                \n\t" /* ABCDEFGH */\ | ||||
| @@ -1128,7 +1128,7 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, in | ||||
|         temp[ 5]= (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 8]);\ | ||||
|         temp[ 6]= (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 8])*3 - (src[ 3]+src[ 7]);\ | ||||
|         temp[ 7]= (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 8])*6 + (src[ 5]+src[ 7])*3 - (src[ 4]+src[ 6]);\ | ||||
|         asm volatile(\ | ||||
|         __asm__ volatile(\ | ||||
|             "movq (%0), %%mm0           \n\t"\ | ||||
|             "movq 8(%0), %%mm1          \n\t"\ | ||||
|             "paddw %2, %%mm0            \n\t"\ | ||||
| @@ -1153,7 +1153,7 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, | ||||
|     int count= 17;\ | ||||
| \ | ||||
|     /*FIXME unroll */\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "pxor %%mm7, %%mm7              \n\t"\ | ||||
|         "1:                             \n\t"\ | ||||
|         "movq (%0), %%mm0               \n\t"\ | ||||
| @@ -1181,7 +1181,7 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, | ||||
|     count=4;\ | ||||
|     \ | ||||
| /*FIXME reorder for speed */\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         /*"pxor %%mm7, %%mm7              \n\t"*/\ | ||||
|         "1:                             \n\t"\ | ||||
|         "movq (%0), %%mm0               \n\t"\ | ||||
| @@ -1231,7 +1231,7 @@ static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, | ||||
|     int count= 9;\ | ||||
| \ | ||||
|     /*FIXME unroll */\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "pxor %%mm7, %%mm7              \n\t"\ | ||||
|         "1:                             \n\t"\ | ||||
|         "movq (%0), %%mm0               \n\t"\ | ||||
| @@ -1253,7 +1253,7 @@ static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, | ||||
|     count=2;\ | ||||
|     \ | ||||
| /*FIXME reorder for speed */\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         /*"pxor %%mm7, %%mm7              \n\t"*/\ | ||||
|         "1:                             \n\t"\ | ||||
|         "movq (%0), %%mm0               \n\t"\ | ||||
| @@ -1620,7 +1620,7 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int o | ||||
|         src = edge_buf; | ||||
|     } | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movd         %0, %%mm6 \n\t" | ||||
|         "pxor      %%mm7, %%mm7 \n\t" | ||||
|         "punpcklwd %%mm6, %%mm6 \n\t" | ||||
| @@ -1639,7 +1639,7 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int o | ||||
|                             oys - dyys + dyxs*(x+3) }; | ||||
|  | ||||
|         for(y=0; y<h; y++){ | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movq   %0,  %%mm4 \n\t" | ||||
|                 "movq   %1,  %%mm5 \n\t" | ||||
|                 "paddw  %2,  %%mm4 \n\t" | ||||
| @@ -1652,7 +1652,7 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int o | ||||
|                 : "m"(*dxy4), "m"(*dyy4) | ||||
|             ); | ||||
|  | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movq   %%mm6, %%mm2 \n\t" | ||||
|                 "movq   %%mm6, %%mm1 \n\t" | ||||
|                 "psubw  %%mm4, %%mm2 \n\t" | ||||
| @@ -1701,7 +1701,7 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int o | ||||
| static void name(void *mem, int stride, int h){\ | ||||
|     const uint8_t *p= mem;\ | ||||
|     do{\ | ||||
|         asm volatile(#op" %0" :: "m"(*p));\ | ||||
|         __asm__ volatile(#op" %0" :: "m"(*p));\ | ||||
|         p+= stride;\ | ||||
|     }while(--h);\ | ||||
| } | ||||
| @@ -1787,9 +1787,9 @@ static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block) | ||||
| static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) | ||||
| { | ||||
|     int i; | ||||
|     asm volatile("pxor %%mm7, %%mm7":); | ||||
|     __asm__ volatile("pxor %%mm7, %%mm7":); | ||||
|     for(i=0; i<blocksize; i+=2) { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movq    %0,    %%mm0 \n\t" | ||||
|             "movq    %1,    %%mm1 \n\t" | ||||
|             "movq    %%mm0, %%mm2 \n\t" | ||||
| @@ -1809,18 +1809,18 @@ static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) | ||||
|             ::"memory" | ||||
|         ); | ||||
|     } | ||||
|     asm volatile("femms"); | ||||
|     __asm__ volatile("femms"); | ||||
| } | ||||
| static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) | ||||
| { | ||||
|     int i; | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|             "movaps  %0,     %%xmm5 \n\t" | ||||
|         ::"m"(ff_pdw_80000000[0]) | ||||
|     ); | ||||
|     for(i=0; i<blocksize; i+=4) { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movaps  %0,     %%xmm0 \n\t" | ||||
|             "movaps  %1,     %%xmm1 \n\t" | ||||
|             "xorps   %%xmm2, %%xmm2 \n\t" | ||||
| @@ -1846,7 +1846,7 @@ static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) | ||||
| #define IF0(x) | ||||
|  | ||||
| #define MIX5(mono,stereo)\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "movss          0(%2), %%xmm5 \n"\ | ||||
|         "movss          8(%2), %%xmm6 \n"\ | ||||
|         "movss         24(%2), %%xmm7 \n"\ | ||||
| @@ -1879,7 +1879,7 @@ static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) | ||||
|     ); | ||||
|  | ||||
| #define MIX_MISC(stereo)\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "1: \n"\ | ||||
|         "movaps  (%3,%0), %%xmm0 \n"\ | ||||
|  stereo("movaps   %%xmm0, %%xmm1 \n")\ | ||||
| @@ -1919,7 +1919,7 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], int out_c | ||||
|     } else { | ||||
|         DECLARE_ALIGNED_16(float, matrix_simd[in_ch][2][4]); | ||||
|         j = 2*in_ch*sizeof(float); | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "1: \n" | ||||
|             "sub $8, %0 \n" | ||||
|             "movss     (%2,%0), %%xmm6 \n" | ||||
| @@ -1943,7 +1943,7 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], int out_c | ||||
|  | ||||
| static void vector_fmul_3dnow(float *dst, const float *src, int len){ | ||||
|     x86_reg i = (len-4)*4; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "1: \n\t" | ||||
|         "movq    (%1,%0), %%mm0 \n\t" | ||||
|         "movq   8(%1,%0), %%mm1 \n\t" | ||||
| @@ -1961,7 +1961,7 @@ static void vector_fmul_3dnow(float *dst, const float *src, int len){ | ||||
| } | ||||
| static void vector_fmul_sse(float *dst, const float *src, int len){ | ||||
|     x86_reg i = (len-8)*4; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "1: \n\t" | ||||
|         "movaps    (%1,%0), %%xmm0 \n\t" | ||||
|         "movaps  16(%1,%0), %%xmm1 \n\t" | ||||
| @@ -1979,7 +1979,7 @@ static void vector_fmul_sse(float *dst, const float *src, int len){ | ||||
|  | ||||
| static void vector_fmul_reverse_3dnow2(float *dst, const float *src0, const float *src1, int len){ | ||||
|     x86_reg i = len*4-16; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "1: \n\t" | ||||
|         "pswapd   8(%1), %%mm0 \n\t" | ||||
|         "pswapd    (%1), %%mm1 \n\t" | ||||
| @@ -1993,11 +1993,11 @@ static void vector_fmul_reverse_3dnow2(float *dst, const float *src0, const floa | ||||
|         :"+r"(i), "+r"(src1) | ||||
|         :"r"(dst), "r"(src0) | ||||
|     ); | ||||
|     asm volatile("femms"); | ||||
|     __asm__ volatile("femms"); | ||||
| } | ||||
| static void vector_fmul_reverse_sse(float *dst, const float *src0, const float *src1, int len){ | ||||
|     x86_reg i = len*4-32; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "1: \n\t" | ||||
|         "movaps        16(%1), %%xmm0 \n\t" | ||||
|         "movaps          (%1), %%xmm1 \n\t" | ||||
| @@ -2020,7 +2020,7 @@ static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float | ||||
|     x86_reg i = (len-4)*4; | ||||
|     if(step == 2 && src3 == 0){ | ||||
|         dst += (len-4)*2; | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "1: \n\t" | ||||
|             "movq   (%2,%0),  %%mm0 \n\t" | ||||
|             "movq  8(%2,%0),  %%mm1 \n\t" | ||||
| @@ -2043,7 +2043,7 @@ static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float | ||||
|         ); | ||||
|     } | ||||
|     else if(step == 1 && src3 == 0){ | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "1: \n\t" | ||||
|             "movq    (%2,%0), %%mm0 \n\t" | ||||
|             "movq   8(%2,%0), %%mm1 \n\t" | ||||
| @@ -2062,14 +2062,14 @@ static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float | ||||
|     } | ||||
|     else | ||||
|         ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step); | ||||
|     asm volatile("femms"); | ||||
|     __asm__ volatile("femms"); | ||||
| } | ||||
| static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *src1, | ||||
|                                     const float *src2, int src3, int len, int step){ | ||||
|     x86_reg i = (len-8)*4; | ||||
|     if(step == 2 && src3 == 0){ | ||||
|         dst += (len-8)*2; | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "1: \n\t" | ||||
|             "movaps   (%2,%0), %%xmm0 \n\t" | ||||
|             "movaps 16(%2,%0), %%xmm1 \n\t" | ||||
| @@ -2100,7 +2100,7 @@ static void vector_fmul_add_add_sse(float *dst, const float *src0, const float * | ||||
|         ); | ||||
|     } | ||||
|     else if(step == 1 && src3 == 0){ | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "1: \n\t" | ||||
|             "movaps   (%2,%0), %%xmm0 \n\t" | ||||
|             "movaps 16(%2,%0), %%xmm1 \n\t" | ||||
| @@ -2127,7 +2127,7 @@ static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float | ||||
|     if(add_bias == 0){ | ||||
|         x86_reg i = -len*4; | ||||
|         x86_reg j = len*4-8; | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "1: \n" | ||||
|             "pswapd  (%5,%1), %%mm1 \n" | ||||
|             "movq    (%5,%0), %%mm0 \n" | ||||
| @@ -2162,7 +2162,7 @@ static void vector_fmul_window_sse(float *dst, const float *src0, const float *s | ||||
|     if(add_bias == 0){ | ||||
|         x86_reg i = -len*4; | ||||
|         x86_reg j = len*4-16; | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "1: \n" | ||||
|             "movaps       (%5,%1), %%xmm1 \n" | ||||
|             "movaps       (%5,%0), %%xmm0 \n" | ||||
| @@ -2195,7 +2195,7 @@ static void vector_fmul_window_sse(float *dst, const float *src0, const float *s | ||||
| static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len) | ||||
| { | ||||
|     x86_reg i = -4*len; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movss  %3, %%xmm4 \n" | ||||
|         "shufps $0, %%xmm4, %%xmm4 \n" | ||||
|         "1: \n" | ||||
| @@ -2219,7 +2219,7 @@ static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul | ||||
| static void int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len) | ||||
| { | ||||
|     x86_reg i = -4*len; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movss  %3, %%xmm4 \n" | ||||
|         "shufps $0, %%xmm4, %%xmm4 \n" | ||||
|         "1: \n" | ||||
| @@ -2238,7 +2238,7 @@ static void int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mu | ||||
|  | ||||
| static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){ | ||||
|     // not bit-exact: pf2id uses different rounding than C and SSE | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "add        %0          , %0        \n\t" | ||||
|         "lea         (%2,%0,2)  , %2        \n\t" | ||||
|         "add        %0          , %1        \n\t" | ||||
| @@ -2259,7 +2259,7 @@ static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){ | ||||
|     ); | ||||
| } | ||||
| static void float_to_int16_sse(int16_t *dst, const float *src, long len){ | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "add        %0          , %0        \n\t" | ||||
|         "lea         (%2,%0,2)  , %2        \n\t" | ||||
|         "add        %0          , %1        \n\t" | ||||
| @@ -2281,7 +2281,7 @@ static void float_to_int16_sse(int16_t *dst, const float *src, long len){ | ||||
| } | ||||
|  | ||||
| static void float_to_int16_sse2(int16_t *dst, const float *src, long len){ | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "add        %0          , %0        \n\t" | ||||
|         "lea         (%2,%0,2)  , %2        \n\t" | ||||
|         "add        %0          , %1        \n\t" | ||||
| @@ -2326,7 +2326,7 @@ static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon | ||||
|     else if(channels==2){\ | ||||
|         const float *src0 = src[0];\ | ||||
|         const float *src1 = src[1];\ | ||||
|         asm volatile(\ | ||||
|         __asm__ volatile(\ | ||||
|             "shl $2, %0 \n"\ | ||||
|             "add %0, %1 \n"\ | ||||
|             "add %0, %2 \n"\ | ||||
| @@ -2412,7 +2412,7 @@ static void add_int16_sse2(int16_t * v1, int16_t * v2, int order) | ||||
|     x86_reg o = -(order << 1); | ||||
|     v1 += order; | ||||
|     v2 += order; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "1:                          \n\t" | ||||
|         "movdqu   (%1,%2),   %%xmm0  \n\t" | ||||
|         "movdqu 16(%1,%2),   %%xmm1  \n\t" | ||||
| @@ -2431,7 +2431,7 @@ static void sub_int16_sse2(int16_t * v1, int16_t * v2, int order) | ||||
|     x86_reg o = -(order << 1); | ||||
|     v1 += order; | ||||
|     v2 += order; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "1:                           \n\t" | ||||
|         "movdqa    (%0,%2),   %%xmm0  \n\t" | ||||
|         "movdqa  16(%0,%2),   %%xmm2  \n\t" | ||||
| @@ -2456,7 +2456,7 @@ static int32_t scalarproduct_int16_sse2(int16_t * v1, int16_t * v2, int order, i | ||||
|     v1 += order; | ||||
|     v2 += order; | ||||
|     sh = shift; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pxor      %%xmm7,  %%xmm7        \n\t" | ||||
|         "1:                               \n\t" | ||||
|         "movdqu    (%0,%3), %%xmm0        \n\t" | ||||
|   | ||||
| @@ -127,7 +127,7 @@ extern const double ff_pd_2[2]; | ||||
| #endif | ||||
|  | ||||
| #define MOVQ_WONE(regd) \ | ||||
|     asm volatile ( \ | ||||
|     __asm__ volatile ( \ | ||||
|     "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ | ||||
|     "psrlw $15, %%" #regd ::) | ||||
|  | ||||
|   | ||||
| @@ -33,7 +33,7 @@ | ||||
|  */ | ||||
| static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%3, %3), %%"REG_a"        \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq (%1), %%mm0               \n\t" | ||||
| @@ -61,7 +61,7 @@ static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_ | ||||
|  | ||||
| static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "testl $1, %0                   \n\t" | ||||
|             " jz 1f                     \n\t" | ||||
|         "movd   (%1), %%mm0             \n\t" | ||||
| @@ -112,7 +112,7 @@ static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int | ||||
|  | ||||
| static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "testl $1, %0                   \n\t" | ||||
|             " jz 1f                     \n\t" | ||||
|         "movq   (%1), %%mm0             \n\t" | ||||
| @@ -162,7 +162,7 @@ static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int | ||||
|  | ||||
| static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pcmpeqb %%mm6, %%mm6           \n\t" | ||||
|         "testl $1, %0                   \n\t" | ||||
|             " jz 1f                     \n\t" | ||||
| @@ -232,7 +232,7 @@ static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src | ||||
|  | ||||
| static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "testl $1, %0                   \n\t" | ||||
|             " jz 1f                     \n\t" | ||||
|         "movd   (%1), %%mm0             \n\t" | ||||
| @@ -284,7 +284,7 @@ static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int | ||||
|  | ||||
| static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "testl $1, %0                   \n\t" | ||||
|             " jz 1f                     \n\t" | ||||
|         "movq   (%1), %%mm0             \n\t" | ||||
| @@ -339,7 +339,7 @@ static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int | ||||
|  | ||||
| static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%3, %3), %%"REG_a"        \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq (%1), %%mm0               \n\t" | ||||
| @@ -379,7 +379,7 @@ static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line | ||||
|  | ||||
| static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "testl $1, %0                   \n\t" | ||||
|             " jz 1f                     \n\t" | ||||
|         "movq   (%1), %%mm0             \n\t" | ||||
| @@ -427,7 +427,7 @@ static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int | ||||
|  | ||||
| static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "testl $1, %0                   \n\t" | ||||
|             " jz 1f                     \n\t" | ||||
|         "movq   (%1), %%mm0             \n\t" | ||||
| @@ -481,7 +481,7 @@ static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int | ||||
|  | ||||
| static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pcmpeqb %%mm6, %%mm6           \n\t" | ||||
|         "testl $1, %0                   \n\t" | ||||
|             " jz 1f                     \n\t" | ||||
| @@ -556,7 +556,7 @@ static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *sr | ||||
| static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     MOVQ_BONE(mm6); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%3, %3), %%"REG_a"        \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq (%1), %%mm0               \n\t" | ||||
| @@ -592,7 +592,7 @@ static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, in | ||||
|  | ||||
| static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%3, %3), %%"REG_a"        \n\t" | ||||
|         "movq (%1), %%mm0               \n\t" | ||||
|         "sub %3, %2                     \n\t" | ||||
| @@ -624,7 +624,7 @@ static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_ | ||||
| static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     MOVQ_BONE(mm6); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%3, %3), %%"REG_a"        \n\t" | ||||
|         "movq (%1), %%mm0               \n\t" | ||||
|         "sub %3, %2                     \n\t" | ||||
| @@ -656,7 +656,7 @@ static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, in | ||||
|  | ||||
| static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%3, %3), %%"REG_a"        \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq (%2), %%mm0               \n\t" | ||||
| @@ -684,7 +684,7 @@ static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_siz | ||||
|  | ||||
| static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%3, %3), %%"REG_a"        \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq (%1), %%mm0               \n\t" | ||||
| @@ -716,7 +716,7 @@ static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_ | ||||
|  | ||||
| static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%3, %3), %%"REG_a"        \n\t" | ||||
|         "movq (%1), %%mm0               \n\t" | ||||
|         "sub %3, %2                     \n\t" | ||||
| @@ -757,7 +757,7 @@ static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_ | ||||
| static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     MOVQ_BONE(mm6); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%3, %3), %%"REG_a"        \n\t" | ||||
|         "movq (%1), %%mm0               \n\t" | ||||
|         PAVGB" 1(%1), %%mm0             \n\t" | ||||
| @@ -798,7 +798,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line | ||||
| static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     do { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movd (%1), %%mm0               \n\t" | ||||
|             "movd (%1, %2), %%mm1           \n\t" | ||||
|             "movd (%1, %2, 2), %%mm2        \n\t" | ||||
| @@ -852,7 +852,7 @@ static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int lin | ||||
|  | ||||
| #define QPEL_2TAP_L3(OPNAME) \ | ||||
| static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "1:                    \n\t"\ | ||||
|         "movq   (%1,%2), %%mm0 \n\t"\ | ||||
|         "movq  8(%1,%2), %%mm1 \n\t"\ | ||||
| @@ -874,7 +874,7 @@ static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride | ||||
|     );\ | ||||
| }\ | ||||
| static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "1:                    \n\t"\ | ||||
|         "movq   (%1,%2), %%mm0 \n\t"\ | ||||
|         PAVGB"  (%1,%3), %%mm0 \n\t"\ | ||||
|   | ||||
| @@ -36,7 +36,7 @@ static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[ | ||||
|     scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT; | ||||
|  | ||||
|     SET_RND(mm6); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pxor %%mm7, %%mm7              \n\t" | ||||
|         "movd  %4, %%mm5                \n\t" | ||||
|         "punpcklwd %%mm5, %%mm5         \n\t" | ||||
| @@ -77,7 +77,7 @@ static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale) | ||||
|     if(FFABS(scale) < MAX_ABS){ | ||||
|         scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT; | ||||
|         SET_RND(mm6); | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|                 "movd  %3, %%mm5        \n\t" | ||||
|                 "punpcklwd %%mm5, %%mm5 \n\t" | ||||
|                 "punpcklwd %%mm5, %%mm5 \n\t" | ||||
|   | ||||
| @@ -32,7 +32,7 @@ | ||||
| static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     MOVQ_BFE(mm6); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea    (%3, %3), %%"REG_a"     \n\t" | ||||
|         ASMALIGN(3) | ||||
|         "1:                             \n\t" | ||||
| @@ -64,7 +64,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line | ||||
| static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| { | ||||
|     MOVQ_BFE(mm6); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "testl $1, %0                   \n\t" | ||||
|         " jz 1f                         \n\t" | ||||
|         "movq   (%1), %%mm0             \n\t" | ||||
| @@ -114,7 +114,7 @@ static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t | ||||
| static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     MOVQ_BFE(mm6); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea        (%3, %3), %%"REG_a" \n\t" | ||||
|         ASMALIGN(3) | ||||
|         "1:                             \n\t" | ||||
| @@ -160,7 +160,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin | ||||
| static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| { | ||||
|     MOVQ_BFE(mm6); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "testl $1, %0                   \n\t" | ||||
|         " jz 1f                         \n\t" | ||||
|         "movq   (%1), %%mm0             \n\t" | ||||
| @@ -209,7 +209,7 @@ static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t | ||||
| static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     MOVQ_BFE(mm6); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%3, %3), %%"REG_a"        \n\t" | ||||
|         "movq (%1), %%mm0               \n\t" | ||||
|         ASMALIGN(3) | ||||
| @@ -239,7 +239,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin | ||||
| { | ||||
|     MOVQ_ZERO(mm7); | ||||
|     SET_RND(mm6); // =2 for rnd  and  =1 for no_rnd version | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq   (%1), %%mm0             \n\t" | ||||
|         "movq   1(%1), %%mm4            \n\t" | ||||
|         "movq   %%mm0, %%mm1            \n\t" | ||||
| @@ -307,7 +307,7 @@ static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, i | ||||
|     MOVQ_BFE(mm6); | ||||
|     JUMPALIGN(); | ||||
|     do { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|              "movd  %0, %%mm0           \n\t" | ||||
|              "movd  %1, %%mm1           \n\t" | ||||
|              PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | ||||
| @@ -327,7 +327,7 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_si | ||||
|     MOVQ_BFE(mm6); | ||||
|     JUMPALIGN(); | ||||
|     do { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|              "movq  %0, %%mm0           \n\t" | ||||
|              "movq  %1, %%mm1           \n\t" | ||||
|              PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | ||||
| @@ -346,7 +346,7 @@ static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_s | ||||
|     MOVQ_BFE(mm6); | ||||
|     JUMPALIGN(); | ||||
|     do { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|              "movq  %0, %%mm0           \n\t" | ||||
|              "movq  %1, %%mm1           \n\t" | ||||
|              PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | ||||
| @@ -369,7 +369,7 @@ static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line | ||||
|     MOVQ_BFE(mm6); | ||||
|     JUMPALIGN(); | ||||
|     do { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movq  %1, %%mm0            \n\t" | ||||
|             "movq  1%1, %%mm1           \n\t" | ||||
|             "movq  %0, %%mm3            \n\t" | ||||
| @@ -389,7 +389,7 @@ static av_unused void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t | ||||
|     MOVQ_BFE(mm6); | ||||
|     JUMPALIGN(); | ||||
|     do { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movq  %1, %%mm0            \n\t" | ||||
|             "movq  %2, %%mm1            \n\t" | ||||
|             "movq  %0, %%mm3            \n\t" | ||||
| @@ -410,7 +410,7 @@ static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin | ||||
|     MOVQ_BFE(mm6); | ||||
|     JUMPALIGN(); | ||||
|     do { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movq  %1, %%mm0            \n\t" | ||||
|             "movq  1%1, %%mm1           \n\t" | ||||
|             "movq  %0, %%mm3            \n\t" | ||||
| @@ -436,7 +436,7 @@ static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t | ||||
|     MOVQ_BFE(mm6); | ||||
|     JUMPALIGN(); | ||||
|     do { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movq  %1, %%mm0            \n\t" | ||||
|             "movq  %2, %%mm1            \n\t" | ||||
|             "movq  %0, %%mm3            \n\t" | ||||
| @@ -461,7 +461,7 @@ static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t | ||||
| static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|     MOVQ_BFE(mm6); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea    (%3, %3), %%"REG_a"     \n\t" | ||||
|         "movq   (%1), %%mm0             \n\t" | ||||
|         ASMALIGN(3) | ||||
| @@ -502,7 +502,7 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin | ||||
| { | ||||
|     MOVQ_ZERO(mm7); | ||||
|     SET_RND(mm6); // =2 for rnd  and  =1 for no_rnd version | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq   (%1), %%mm0             \n\t" | ||||
|         "movq   1(%1), %%mm4            \n\t" | ||||
|         "movq   %%mm0, %%mm1            \n\t" | ||||
|   | ||||
| @@ -30,7 +30,7 @@ | ||||
|  | ||||
| static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "mov $-128, %%"REG_a"           \n\t" | ||||
|         "pxor %%mm7, %%mm7              \n\t" | ||||
|         ASMALIGN(4) | ||||
| @@ -58,7 +58,7 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) | ||||
|  | ||||
| static void get_pixels_sse2(DCTELEM *block, const uint8_t *pixels, int line_size) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pxor %%xmm7,      %%xmm7         \n\t" | ||||
|         "movq (%0),        %%xmm0         \n\t" | ||||
|         "movq (%0, %2),    %%xmm1         \n\t" | ||||
| @@ -92,7 +92,7 @@ static void get_pixels_sse2(DCTELEM *block, const uint8_t *pixels, int line_size | ||||
|  | ||||
| static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pxor %%mm7, %%mm7              \n\t" | ||||
|         "mov $-128, %%"REG_a"           \n\t" | ||||
|         ASMALIGN(4) | ||||
| @@ -124,7 +124,7 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){ | ||||
|     int sum; | ||||
|     x86_reg index= -line_size*h; | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|                 "pxor %%mm7, %%mm7              \n\t" | ||||
|                 "pxor %%mm6, %%mm6              \n\t" | ||||
|                 "1:                             \n\t" | ||||
| @@ -159,7 +159,7 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){ | ||||
|  | ||||
| static int pix_norm1_mmx(uint8_t *pix, int line_size) { | ||||
|     int tmp; | ||||
|   asm volatile ( | ||||
|   __asm__ volatile ( | ||||
|       "movl $16,%%ecx\n" | ||||
|       "pxor %%mm0,%%mm0\n" | ||||
|       "pxor %%mm7,%%mm7\n" | ||||
| @@ -202,7 +202,7 @@ static int pix_norm1_mmx(uint8_t *pix, int line_size) { | ||||
|  | ||||
| static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { | ||||
|     int tmp; | ||||
|   asm volatile ( | ||||
|   __asm__ volatile ( | ||||
|       "movl %4,%%ecx\n" | ||||
|       "shr $1,%%ecx\n" | ||||
|       "pxor %%mm0,%%mm0\n"      /* mm0 = 0 */ | ||||
| @@ -263,7 +263,7 @@ static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int | ||||
|  | ||||
| static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { | ||||
|     int tmp; | ||||
|   asm volatile ( | ||||
|   __asm__ volatile ( | ||||
|       "movl %4,%%ecx\n" | ||||
|       "pxor %%mm0,%%mm0\n"      /* mm0 = 0 */ | ||||
|       "pxor %%mm7,%%mm7\n"      /* mm7 holds the sum */ | ||||
| @@ -323,7 +323,7 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int | ||||
|  | ||||
| static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { | ||||
|     int tmp; | ||||
|   asm volatile ( | ||||
|   __asm__ volatile ( | ||||
|       "shr $1,%2\n" | ||||
|       "pxor %%xmm0,%%xmm0\n"    /* mm0 = 0 */ | ||||
|       "pxor %%xmm7,%%xmm7\n"    /* mm7 holds the sum */ | ||||
| @@ -385,7 +385,7 @@ static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in | ||||
|  | ||||
| static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) { | ||||
|     int tmp; | ||||
|   asm volatile ( | ||||
|   __asm__ volatile ( | ||||
|       "movl %3,%%ecx\n" | ||||
|       "pxor %%mm7,%%mm7\n" | ||||
|       "pxor %%mm6,%%mm6\n" | ||||
| @@ -511,7 +511,7 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) { | ||||
| static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) { | ||||
|     int tmp; | ||||
|     uint8_t * pix= pix1; | ||||
|   asm volatile ( | ||||
|   __asm__ volatile ( | ||||
|       "movl %3,%%ecx\n" | ||||
|       "pxor %%mm7,%%mm7\n" | ||||
|       "pxor %%mm6,%%mm6\n" | ||||
| @@ -673,7 +673,7 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si | ||||
|       "paddw " #in0 ", %%mm6\n" | ||||
|  | ||||
|  | ||||
|   asm volatile ( | ||||
|   __asm__ volatile ( | ||||
|       "movl %3,%%ecx\n" | ||||
|       "pxor %%mm6,%%mm6\n" | ||||
|       "pxor %%mm7,%%mm7\n" | ||||
| @@ -719,7 +719,7 @@ static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_s | ||||
|       "paddw " #in1 ", " #in0 "\n"\ | ||||
|       "paddw " #in0 ", %%mm6\n" | ||||
|  | ||||
|   asm volatile ( | ||||
|   __asm__ volatile ( | ||||
|       "movl %3,%%ecx\n" | ||||
|       "pxor %%mm6,%%mm6\n" | ||||
|       "pxor %%mm7,%%mm7\n" | ||||
| @@ -782,7 +782,7 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in | ||||
|       "paddw " #in0 ", %%mm6\n" | ||||
|  | ||||
|  | ||||
|   asm volatile ( | ||||
|   __asm__ volatile ( | ||||
|       "movl %4,%%ecx\n" | ||||
|       "pxor %%mm6,%%mm6\n" | ||||
|       "pcmpeqw %%mm7,%%mm7\n" | ||||
| @@ -845,7 +845,7 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i | ||||
|       "paddw " #in1 ", " #in0 "\n"\ | ||||
|       "paddw " #in0 ", %%mm6\n" | ||||
|  | ||||
|   asm volatile ( | ||||
|   __asm__ volatile ( | ||||
|       "movl %4,%%ecx\n" | ||||
|       "pxor %%mm6,%%mm6\n" | ||||
|       "pcmpeqw %%mm7,%%mm7\n" | ||||
| @@ -881,7 +881,7 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i | ||||
|  | ||||
| static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ | ||||
|     x86_reg i=0; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "1:                             \n\t" | ||||
|         "movq  (%2, %0), %%mm0          \n\t" | ||||
|         "movq  (%1, %0), %%mm1          \n\t" | ||||
| @@ -905,7 +905,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t | ||||
|     x86_reg i=0; | ||||
|     uint8_t l, lt; | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "1:                             \n\t" | ||||
|         "movq  -1(%1, %0), %%mm0        \n\t" // LT | ||||
|         "movq  (%1, %0), %%mm1          \n\t" // T | ||||
| @@ -946,7 +946,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t | ||||
|  | ||||
| #define DIFF_PIXELS_8(m0,m1,mm,p1,p2,stride,temp) {\ | ||||
|     uint8_t *p1b=p1, *p2b=p2;\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         DIFF_PIXELS_1(m0, mm##0, mm##7, (%1), (%2))\ | ||||
|         DIFF_PIXELS_1(m0, mm##1, mm##7, (%1,%3), (%2,%3))\ | ||||
|         DIFF_PIXELS_1(m0, mm##2, mm##7, (%1,%3,2), (%2,%3,2))\ | ||||
| @@ -1069,7 +1069,7 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid | ||||
| \ | ||||
|     DIFF_PIXELS_4x8(src1, src2, stride, temp[0]);\ | ||||
| \ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         HADAMARD48\ | ||||
| \ | ||||
|         "movq %%mm7, 96(%1)             \n\t"\ | ||||
| @@ -1087,7 +1087,7 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid | ||||
| \ | ||||
|     DIFF_PIXELS_4x8(src1+4, src2+4, stride, temp[4]);\ | ||||
| \ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         HADAMARD48\ | ||||
| \ | ||||
|         "movq %%mm7, 96(%1)             \n\t"\ | ||||
| @@ -1152,7 +1152,7 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid | ||||
| \ | ||||
|     DIFF_PIXELS_8x8(src1, src2, stride, temp[0]);\ | ||||
| \ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         HADAMARD8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7)\ | ||||
|         TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7, (%1))\ | ||||
|         HADAMARD8(%%xmm0, %%xmm5, %%xmm7, %%xmm3, %%xmm6, %%xmm4, %%xmm2, %%xmm1)\ | ||||
| @@ -1219,7 +1219,7 @@ HADAMARD8_DIFF_SSE2(ssse3) | ||||
| #define DCT_SAD_FUNC(cpu) \ | ||||
| static int sum_abs_dctelem_##cpu(DCTELEM *block){\ | ||||
|     int sum;\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         DCT_SAD\ | ||||
|         :"=r"(sum)\ | ||||
|         :"r"(block)\ | ||||
| @@ -1256,7 +1256,7 @@ DCT_SAD_FUNC(ssse3) | ||||
| static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int size){ | ||||
|     int sum; | ||||
|     x86_reg i=size; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pxor %%mm4, %%mm4 \n" | ||||
|         "1: \n" | ||||
|         "sub $8, %0 \n" | ||||
|   | ||||
| @@ -371,7 +371,7 @@ FDCT_COL(sse2, xmm, movdqa) | ||||
|  | ||||
| static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
| #define FDCT_ROW_SSE2_H1(i,t)                    \ | ||||
|         "movq      " #i "(%0), %%xmm2      \n\t" \ | ||||
|         "movq      " #i "+8(%0), %%xmm0    \n\t" \ | ||||
|   | ||||
| @@ -46,7 +46,7 @@ void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z) | ||||
|     int n = 1<<s->nbits; | ||||
|     int i; | ||||
|     ff_fft_dispatch_interleave_3dn2(z, s->nbits); | ||||
|     asm volatile("femms"); | ||||
|     __asm__ volatile("femms"); | ||||
|     if(n <= 8) | ||||
|         for(i=0; i<n; i+=2) | ||||
|             FFSWAP(FFTSample, z[i].im, z[i+1].re); | ||||
| @@ -69,11 +69,11 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu | ||||
|     in1 = input; | ||||
|     in2 = input + n2 - 1; | ||||
| #ifdef EMULATE_3DNOWEXT | ||||
|     asm volatile("movd %0, %%mm7" ::"r"(1<<31)); | ||||
|     __asm__ volatile("movd %0, %%mm7" ::"r"(1<<31)); | ||||
| #endif | ||||
|     for(k = 0; k < n4; k++) { | ||||
|         // FIXME a single block is faster, but gcc 2.95 and 3.4.x on 32bit can't compile it | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movd         %0, %%mm0 \n" | ||||
|             "movd         %2, %%mm1 \n" | ||||
|             "punpckldq    %1, %%mm0 \n" | ||||
| @@ -94,7 +94,7 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu | ||||
|             ::"m"(in2[-2*k]), "m"(in1[2*k]), | ||||
|               "m"(tcos[k]), "m"(tsin[k]) | ||||
|         ); | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movq    %%mm0, %0    \n\t" | ||||
|             :"=m"(z[revtab[k]]) | ||||
|         ); | ||||
| @@ -117,7 +117,7 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu | ||||
|     /* post rotation */ | ||||
|     j = -n2; | ||||
|     k = n2-8; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "1: \n" | ||||
|         CMUL(%0, %%mm0, %%mm1) | ||||
|         CMUL(%1, %%mm2, %%mm3) | ||||
| @@ -140,7 +140,7 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu | ||||
|         :"r"(z+n8), "r"(tcos+n8), "r"(tsin+n8) | ||||
|         :"memory" | ||||
|     ); | ||||
|     asm volatile("femms"); | ||||
|     __asm__ volatile("femms"); | ||||
| } | ||||
|  | ||||
| void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input) | ||||
| @@ -153,7 +153,7 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu | ||||
|  | ||||
|     j = -n; | ||||
|     k = n-8; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq %4, %%mm7 \n" | ||||
|         "1: \n" | ||||
|         PSWAPD((%2,%1), %%mm0) | ||||
| @@ -168,6 +168,6 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu | ||||
|         :"r"(output+n4), "r"(output+n4*3), | ||||
|          "m"(*m1m1) | ||||
|     ); | ||||
|     asm volatile("femms"); | ||||
|     __asm__ volatile("femms"); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -36,7 +36,7 @@ void ff_fft_calc_sse(FFTContext *s, FFTComplex *z) | ||||
|  | ||||
|     if(n <= 16) { | ||||
|         x86_reg i = -8*n; | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "1: \n" | ||||
|             "movaps     (%0,%1), %%xmm0 \n" | ||||
|             "movaps      %%xmm0, %%xmm1 \n" | ||||
| @@ -58,7 +58,7 @@ void ff_fft_permute_sse(FFTContext *s, FFTComplex *z) | ||||
|     int n = 1 << s->nbits; | ||||
|     int i; | ||||
|     for(i=0; i<n; i+=2) { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movaps %2, %%xmm0 \n" | ||||
|             "movlps %%xmm0, %0 \n" | ||||
|             "movhps %%xmm0, %1 \n" | ||||
| @@ -84,7 +84,7 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input | ||||
|  | ||||
|     /* pre rotation */ | ||||
|     for(k=n8-2; k>=0; k-=2) { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movaps     (%2,%1,2), %%xmm0 \n" // { z[k].re,    z[k].im,    z[k+1].re,  z[k+1].im  } | ||||
|             "movaps  -16(%2,%0,2), %%xmm1 \n" // { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im } | ||||
|             "movaps        %%xmm0, %%xmm2 \n" | ||||
| @@ -111,7 +111,7 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input | ||||
| #ifdef ARCH_X86_64 | ||||
|         // if we have enough regs, don't let gcc make the luts latency-bound | ||||
|         // but if not, latency is faster than spilling | ||||
|         asm("movlps %%xmm0, %0 \n" | ||||
|         __asm__("movlps %%xmm0, %0 \n" | ||||
|             "movhps %%xmm0, %1 \n" | ||||
|             "movlps %%xmm1, %2 \n" | ||||
|             "movhps %%xmm1, %3 \n" | ||||
| @@ -121,10 +121,10 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input | ||||
|              "=m"(z[revtab[ k+1]]) | ||||
|         ); | ||||
| #else | ||||
|         asm("movlps %%xmm0, %0" :"=m"(z[revtab[-k-2]])); | ||||
|         asm("movhps %%xmm0, %0" :"=m"(z[revtab[-k-1]])); | ||||
|         asm("movlps %%xmm1, %0" :"=m"(z[revtab[ k  ]])); | ||||
|         asm("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]])); | ||||
|         __asm__("movlps %%xmm0, %0" :"=m"(z[revtab[-k-2]])); | ||||
|         __asm__("movhps %%xmm0, %0" :"=m"(z[revtab[-k-1]])); | ||||
|         __asm__("movlps %%xmm1, %0" :"=m"(z[revtab[ k  ]])); | ||||
|         __asm__("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]])); | ||||
| #endif | ||||
|     } | ||||
|  | ||||
| @@ -146,7 +146,7 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input | ||||
|  | ||||
|     j = -n2; | ||||
|     k = n2-16; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "1: \n" | ||||
|         CMUL(%0, %%xmm0, %%xmm1) | ||||
|         CMUL(%1, %%xmm4, %%xmm5) | ||||
| @@ -181,7 +181,7 @@ void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input | ||||
|  | ||||
|     j = -n; | ||||
|     k = n-16; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movaps %4, %%xmm7 \n" | ||||
|         "1: \n" | ||||
|         "movaps       (%2,%1), %%xmm0 \n" | ||||
|   | ||||
| @@ -28,7 +28,7 @@ static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data | ||||
|     int n2 = len>>1; | ||||
|     x86_reg i = -n2*sizeof(int32_t); | ||||
|     x86_reg j =  n2*sizeof(int32_t); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movsd   %0,     %%xmm7                \n\t" | ||||
|         "movapd  "MANGLE(ff_pd_1)", %%xmm6     \n\t" | ||||
|         "movapd  "MANGLE(ff_pd_2)", %%xmm5     \n\t" | ||||
| @@ -38,7 +38,7 @@ static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data | ||||
|         ::"m"(c) | ||||
|     ); | ||||
| #define WELCH(MOVPD, offset)\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "1:                                    \n\t"\ | ||||
|         "movapd   %%xmm7,  %%xmm1              \n\t"\ | ||||
|         "mulpd    %%xmm1,  %%xmm1              \n\t"\ | ||||
| @@ -84,7 +84,7 @@ void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, | ||||
|     for(j=0; j<lag; j+=2){ | ||||
|         x86_reg i = -len*sizeof(double); | ||||
|         if(j == lag-2) { | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movsd    "MANGLE(ff_pd_1)", %%xmm0 \n\t" | ||||
|                 "movsd    "MANGLE(ff_pd_1)", %%xmm1 \n\t" | ||||
|                 "movsd    "MANGLE(ff_pd_1)", %%xmm2 \n\t" | ||||
| @@ -113,7 +113,7 @@ void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, | ||||
|                 :"r"(data1+len), "r"(data1+len-j) | ||||
|             ); | ||||
|         } else { | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movsd    "MANGLE(ff_pd_1)", %%xmm0 \n\t" | ||||
|                 "movsd    "MANGLE(ff_pd_1)", %%xmm1 \n\t" | ||||
|                 "1:                                 \n\t" | ||||
|   | ||||
| @@ -43,7 +43,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | ||||
|     int minusstart= -(int)significant_coeff_ctx_base; | ||||
|     int minusindex= 4-(int)index; | ||||
|     int coeff_count; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movl "RANGE    "(%3), %%esi            \n\t" | ||||
|         "movl "LOW      "(%3), %%ebx            \n\t" | ||||
|  | ||||
| @@ -96,7 +96,7 @@ static int decode_significance_8x8_x86(CABACContext *c, | ||||
|     int minusindex= 4-(int)index; | ||||
|     int coeff_count; | ||||
|     x86_reg last=0; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movl "RANGE    "(%3), %%esi            \n\t" | ||||
|         "movl "LOW      "(%3), %%ebx            \n\t" | ||||
|  | ||||
|   | ||||
| @@ -57,14 +57,14 @@ DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_7_3  ) = 0x0307030703070307ULL; | ||||
| static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride) | ||||
| { | ||||
|     /* Load dct coeffs */ | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq   (%0), %%mm0 \n\t" | ||||
|         "movq  8(%0), %%mm1 \n\t" | ||||
|         "movq 16(%0), %%mm2 \n\t" | ||||
|         "movq 24(%0), %%mm3 \n\t" | ||||
|     :: "r"(block) ); | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         /* mm1=s02+s13  mm2=s02-s13  mm4=d02+d13  mm0=d02-d13 */ | ||||
|         IDCT4_1D( %%mm2, %%mm1, %%mm0, %%mm3, %%mm4 ) | ||||
|  | ||||
| @@ -80,7 +80,7 @@ static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride) | ||||
|         "pxor %%mm7, %%mm7    \n\t" | ||||
|     :: "m"(ff_pw_32)); | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|     STORE_DIFF_4P( %%mm0, %%mm1, %%mm7) | ||||
|         "add %1, %0             \n\t" | ||||
|     STORE_DIFF_4P( %%mm2, %%mm1, %%mm7) | ||||
| @@ -95,7 +95,7 @@ static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride) | ||||
|  | ||||
| static inline void h264_idct8_1d(int16_t *block) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq 112(%0), %%mm7  \n\t" | ||||
|         "movq  80(%0), %%mm0  \n\t" | ||||
|         "movq  48(%0), %%mm3  \n\t" | ||||
| @@ -166,7 +166,7 @@ static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | ||||
|  | ||||
|         h264_idct8_1d(block+4*i); | ||||
|  | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movq   %%mm7,    %0   \n\t" | ||||
|             TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 ) | ||||
|             "movq   %%mm0,  8(%1)  \n\t" | ||||
| @@ -188,7 +188,7 @@ static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | ||||
|     for(i=0; i<2; i++){ | ||||
|         h264_idct8_1d(b2+4*i); | ||||
|  | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "psraw     $6, %%mm7  \n\t" | ||||
|             "psraw     $6, %%mm6  \n\t" | ||||
|             "psraw     $6, %%mm5  \n\t" | ||||
| @@ -269,7 +269,7 @@ static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | ||||
|  | ||||
| static void ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movdqa   0x10(%1), %%xmm1 \n" | ||||
|         "movdqa   0x20(%1), %%xmm2 \n" | ||||
|         "movdqa   0x30(%1), %%xmm3 \n" | ||||
| @@ -304,7 +304,7 @@ static void ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride) | ||||
| static void ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) | ||||
| { | ||||
|     int dc = (block[0] + 32) >> 6; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movd          %0, %%mm0 \n\t" | ||||
|         "pshufw $0, %%mm0, %%mm0 \n\t" | ||||
|         "pxor       %%mm1, %%mm1 \n\t" | ||||
| @@ -313,7 +313,7 @@ static void ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) | ||||
|         "packuswb   %%mm1, %%mm1 \n\t" | ||||
|         ::"r"(dc) | ||||
|     ); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movd          %0, %%mm2 \n\t" | ||||
|         "movd          %1, %%mm3 \n\t" | ||||
|         "movd          %2, %%mm4 \n\t" | ||||
| @@ -341,7 +341,7 @@ static void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) | ||||
| { | ||||
|     int dc = (block[0] + 32) >> 6; | ||||
|     int y; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movd          %0, %%mm0 \n\t" | ||||
|         "pshufw $0, %%mm0, %%mm0 \n\t" | ||||
|         "pxor       %%mm1, %%mm1 \n\t" | ||||
| @@ -351,7 +351,7 @@ static void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) | ||||
|         ::"r"(dc) | ||||
|     ); | ||||
|     for(y=2; y--; dst += 4*stride){ | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq          %0, %%mm2 \n\t" | ||||
|         "movq          %1, %%mm3 \n\t" | ||||
|         "movq          %2, %%mm4 \n\t" | ||||
| @@ -463,7 +463,7 @@ static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alph | ||||
| { | ||||
|     DECLARE_ALIGNED_8(uint64_t, tmp0[2]); | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq    (%1,%3), %%mm0    \n\t" //p1 | ||||
|         "movq    (%1,%3,2), %%mm1  \n\t" //p0 | ||||
|         "movq    (%2),    %%mm2    \n\t" //q0 | ||||
| @@ -540,7 +540,7 @@ static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, in | ||||
|  | ||||
| static inline void h264_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq    (%0),    %%mm0     \n\t" //p1 | ||||
|         "movq    (%0,%2), %%mm1     \n\t" //p0 | ||||
|         "movq    (%1),    %%mm2     \n\t" //q0 | ||||
| @@ -586,7 +586,7 @@ static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, | ||||
|  | ||||
| static inline void h264_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha1, int beta1) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq    (%0),    %%mm0     \n\t" | ||||
|         "movq    (%0,%2), %%mm1     \n\t" | ||||
|         "movq    (%1),    %%mm2     \n\t" | ||||
| @@ -628,7 +628,7 @@ static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a | ||||
| static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], | ||||
|                                             int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) { | ||||
|     int dir; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pxor %%mm7, %%mm7 \n\t" | ||||
|         "movq %0, %%mm6 \n\t" | ||||
|         "movq %1, %%mm5 \n\t" | ||||
| @@ -636,7 +636,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] | ||||
|         ::"m"(ff_pb_1), "m"(ff_pb_3), "m"(ff_pb_7) | ||||
|     ); | ||||
|     if(field) | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movq %0, %%mm5 \n\t" | ||||
|             "movq %1, %%mm4 \n\t" | ||||
|             ::"m"(ff_pb_3_1), "m"(ff_pb_7_3) | ||||
| @@ -650,14 +650,14 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] | ||||
|         DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL; | ||||
|         int b_idx, edge, l; | ||||
|         for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) { | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "pand %0, %%mm0 \n\t" | ||||
|                 ::"m"(mask_dir) | ||||
|             ); | ||||
|             if(!(mask_mv & edge)) { | ||||
|                 asm volatile("pxor %%mm0, %%mm0 \n\t":); | ||||
|                 __asm__ volatile("pxor %%mm0, %%mm0 \n\t":); | ||||
|                 for( l = bidir; l >= 0; l-- ) { | ||||
|                     asm volatile( | ||||
|                     __asm__ volatile( | ||||
|                         "movd %0, %%mm1 \n\t" | ||||
|                         "punpckldq %1, %%mm1 \n\t" | ||||
|                         "movq %%mm1, %%mm2 \n\t" | ||||
| @@ -688,7 +688,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] | ||||
|                     ); | ||||
|                 } | ||||
|             } | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movd %0, %%mm1 \n\t" | ||||
|                 "por  %1, %%mm1 \n\t" | ||||
|                 "punpcklbw %%mm7, %%mm1 \n\t" | ||||
| @@ -696,7 +696,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] | ||||
|                 ::"m"(nnz[b_idx]), | ||||
|                   "m"(nnz[b_idx+d_idx]) | ||||
|             ); | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "pcmpeqw %%mm7, %%mm0 \n\t" | ||||
|                 "pcmpeqw %%mm7, %%mm0 \n\t" | ||||
|                 "psrlw $15, %%mm0 \n\t" // nonzero -> 1 | ||||
| @@ -713,7 +713,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] | ||||
|         edges = 4; | ||||
|         step = 1; | ||||
|     } | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq   (%0), %%mm0 \n\t" | ||||
|         "movq  8(%0), %%mm1 \n\t" | ||||
|         "movq 16(%0), %%mm2 \n\t" | ||||
| @@ -774,7 +774,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] | ||||
| static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||||
|     int h=4;\ | ||||
| \ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "pxor %%mm7, %%mm7          \n\t"\ | ||||
|         "movq %5, %%mm4             \n\t"\ | ||||
|         "movq %6, %%mm5             \n\t"\ | ||||
| @@ -813,14 +813,14 @@ static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uin | ||||
| }\ | ||||
| static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ | ||||
|     int h=4;\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "pxor %%mm7, %%mm7          \n\t"\ | ||||
|         "movq %0, %%mm4             \n\t"\ | ||||
|         "movq %1, %%mm5             \n\t"\ | ||||
|         :: "m"(ff_pw_5), "m"(ff_pw_16)\ | ||||
|     );\ | ||||
|     do{\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "movd  -1(%0), %%mm1        \n\t"\ | ||||
|         "movd    (%0), %%mm2        \n\t"\ | ||||
|         "movd   1(%0), %%mm3        \n\t"\ | ||||
| @@ -857,7 +857,7 @@ static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, | ||||
| }\ | ||||
| static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||||
|     src -= 2*srcStride;\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "pxor %%mm7, %%mm7          \n\t"\ | ||||
|         "movd (%0), %%mm0           \n\t"\ | ||||
|         "add %2, %0                 \n\t"\ | ||||
| @@ -889,7 +889,7 @@ static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, in | ||||
|     int w=3;\ | ||||
|     src -= 2*srcStride+2;\ | ||||
|     while(w--){\ | ||||
|         asm volatile(\ | ||||
|         __asm__ volatile(\ | ||||
|             "pxor %%mm7, %%mm7      \n\t"\ | ||||
|             "movd (%0), %%mm0       \n\t"\ | ||||
|             "add %2, %0             \n\t"\ | ||||
| @@ -919,7 +919,7 @@ static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, in | ||||
|         src += 4 - 9*srcStride;\ | ||||
|     }\ | ||||
|     tmp -= 3*4;\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "1:                         \n\t"\ | ||||
|         "movq     (%0), %%mm0       \n\t"\ | ||||
|         "paddw  10(%0), %%mm0       \n\t"\ | ||||
| @@ -948,7 +948,7 @@ static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, in | ||||
| \ | ||||
| static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||||
|     int h=8;\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "pxor %%mm7, %%mm7          \n\t"\ | ||||
|         "movq %5, %%mm6             \n\t"\ | ||||
|         "1:                         \n\t"\ | ||||
| @@ -1005,13 +1005,13 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uin | ||||
| \ | ||||
| static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ | ||||
|     int h=8;\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "pxor %%mm7, %%mm7          \n\t"\ | ||||
|         "movq %0, %%mm6             \n\t"\ | ||||
|         :: "m"(ff_pw_5)\ | ||||
|     );\ | ||||
|     do{\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "movq    (%0), %%mm0        \n\t"\ | ||||
|         "movq   1(%0), %%mm2        \n\t"\ | ||||
|         "movq %%mm0, %%mm1          \n\t"\ | ||||
| @@ -1071,7 +1071,7 @@ static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, | ||||
|     src -= 2*srcStride;\ | ||||
|     \ | ||||
|     while(w--){\ | ||||
|       asm volatile(\ | ||||
|       __asm__ volatile(\ | ||||
|         "pxor %%mm7, %%mm7          \n\t"\ | ||||
|         "movd (%0), %%mm0           \n\t"\ | ||||
|         "add %2, %0                 \n\t"\ | ||||
| @@ -1102,7 +1102,7 @@ static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, | ||||
|         : "memory"\ | ||||
|      );\ | ||||
|      if(h==16){\ | ||||
|         asm volatile(\ | ||||
|         __asm__ volatile(\ | ||||
|             QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ | ||||
|             QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ | ||||
|             QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ | ||||
| @@ -1125,7 +1125,7 @@ static av_always_inline void OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_ | ||||
|     int w = (size+8)>>2;\ | ||||
|     src -= 2*srcStride+2;\ | ||||
|     while(w--){\ | ||||
|         asm volatile(\ | ||||
|         __asm__ volatile(\ | ||||
|             "pxor %%mm7, %%mm7      \n\t"\ | ||||
|             "movd (%0), %%mm0       \n\t"\ | ||||
|             "add %2, %0             \n\t"\ | ||||
| @@ -1155,7 +1155,7 @@ static av_always_inline void OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_ | ||||
|             : "memory"\ | ||||
|         );\ | ||||
|         if(size==16){\ | ||||
|             asm volatile(\ | ||||
|             __asm__ volatile(\ | ||||
|                 QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1,  8*48)\ | ||||
|                 QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2,  9*48)\ | ||||
|                 QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\ | ||||
| @@ -1177,7 +1177,7 @@ static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_ | ||||
|     int w = size>>4;\ | ||||
|     do{\ | ||||
|     int h = size;\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "1:                         \n\t"\ | ||||
|         "movq     (%0), %%mm0       \n\t"\ | ||||
|         "movq    8(%0), %%mm3       \n\t"\ | ||||
| @@ -1261,7 +1261,7 @@ static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, | ||||
| \ | ||||
| static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ | ||||
| {\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "movq      (%1), %%mm0          \n\t"\ | ||||
|         "movq    24(%1), %%mm1          \n\t"\ | ||||
|         "psraw      $5,  %%mm0          \n\t"\ | ||||
| @@ -1291,7 +1291,7 @@ static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_ | ||||
| static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ | ||||
| {\ | ||||
|     do{\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "movq      (%1), %%mm0          \n\t"\ | ||||
|         "movq     8(%1), %%mm1          \n\t"\ | ||||
|         "movq    48(%1), %%mm2          \n\t"\ | ||||
| @@ -1325,7 +1325,7 @@ static void OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, u | ||||
| #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ | ||||
| static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ | ||||
|     int h=16;\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "pxor %%xmm15, %%xmm15      \n\t"\ | ||||
|         "movdqa %6, %%xmm14         \n\t"\ | ||||
|         "movdqa %7, %%xmm13         \n\t"\ | ||||
| @@ -1403,13 +1403,13 @@ static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, | ||||
| #define QPEL_H264_H_XMM(OPNAME, OP, MMX)\ | ||||
| static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ | ||||
|     int h=8;\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "pxor %%xmm7, %%xmm7        \n\t"\ | ||||
|         "movdqa %0, %%xmm6          \n\t"\ | ||||
|         :: "m"(ff_pw_5)\ | ||||
|     );\ | ||||
|     do{\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "lddqu   -5(%0), %%xmm1     \n\t"\ | ||||
|         "movdqa  %%xmm1, %%xmm0     \n\t"\ | ||||
|         "punpckhbw %%xmm7, %%xmm1   \n\t"\ | ||||
| @@ -1450,7 +1450,7 @@ QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ | ||||
| \ | ||||
| static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||||
|     int h=8;\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "pxor %%xmm7, %%xmm7        \n\t"\ | ||||
|         "movdqa %5, %%xmm6          \n\t"\ | ||||
|         "1:                         \n\t"\ | ||||
| @@ -1501,7 +1501,7 @@ static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, | ||||
| static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ | ||||
|     src -= 2*srcStride;\ | ||||
|     \ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|         "pxor %%xmm7, %%xmm7        \n\t"\ | ||||
|         "movq (%0), %%xmm0          \n\t"\ | ||||
|         "add %2, %0                 \n\t"\ | ||||
| @@ -1532,7 +1532,7 @@ static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, | ||||
|         : "memory"\ | ||||
|     );\ | ||||
|     if(h==16){\ | ||||
|         asm volatile(\ | ||||
|         __asm__ volatile(\ | ||||
|             QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\ | ||||
|             QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\ | ||||
|             QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\ | ||||
| @@ -1560,7 +1560,7 @@ static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, u | ||||
|     int w = (size+8)>>3; | ||||
|     src -= 2*srcStride+2; | ||||
|     while(w--){ | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "pxor %%xmm7, %%xmm7        \n\t" | ||||
|             "movq (%0), %%xmm0          \n\t" | ||||
|             "add %2, %0                 \n\t" | ||||
| @@ -1590,7 +1590,7 @@ static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, u | ||||
|             : "memory" | ||||
|         ); | ||||
|         if(size==16){ | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1,  8*48) | ||||
|                 QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2,  9*48) | ||||
|                 QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 10*48) | ||||
| @@ -1613,7 +1613,7 @@ static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, u | ||||
| static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\ | ||||
|     int h = size;\ | ||||
|     if(size == 16){\ | ||||
|         asm volatile(\ | ||||
|         __asm__ volatile(\ | ||||
|             "1:                         \n\t"\ | ||||
|             "movdqa 32(%0), %%xmm4      \n\t"\ | ||||
|             "movdqa 16(%0), %%xmm5      \n\t"\ | ||||
| @@ -1668,7 +1668,7 @@ static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_ | ||||
|             : "memory"\ | ||||
|         );\ | ||||
|     }else{\ | ||||
|         asm volatile(\ | ||||
|         __asm__ volatile(\ | ||||
|             "1:                         \n\t"\ | ||||
|             "movdqa 16(%0), %%xmm1      \n\t"\ | ||||
|             "movdqa   (%0), %%xmm0      \n\t"\ | ||||
| @@ -2022,7 +2022,7 @@ static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_de | ||||
|     int x, y; | ||||
|     offset <<= log2_denom; | ||||
|     offset += (1 << log2_denom) >> 1; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movd    %0, %%mm4        \n\t" | ||||
|         "movd    %1, %%mm5        \n\t" | ||||
|         "movd    %2, %%mm6        \n\t" | ||||
| @@ -2033,7 +2033,7 @@ static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_de | ||||
|     ); | ||||
|     for(y=0; y<h; y+=2){ | ||||
|         for(x=0; x<w; x+=4){ | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movd      %0,    %%mm0 \n\t" | ||||
|                 "movd      %1,    %%mm1 \n\t" | ||||
|                 "punpcklbw %%mm7, %%mm0 \n\t" | ||||
| @@ -2060,7 +2060,7 @@ static inline void ff_h264_biweight_WxH_mmx2(uint8_t *dst, uint8_t *src, int str | ||||
| { | ||||
|     int x, y; | ||||
|     offset = ((offset + 1) | 1) << log2_denom; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movd    %0, %%mm3        \n\t" | ||||
|         "movd    %1, %%mm4        \n\t" | ||||
|         "movd    %2, %%mm5        \n\t" | ||||
| @@ -2073,7 +2073,7 @@ static inline void ff_h264_biweight_WxH_mmx2(uint8_t *dst, uint8_t *src, int str | ||||
|     ); | ||||
|     for(y=0; y<h; y++){ | ||||
|         for(x=0; x<w; x+=4){ | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movd      %0,    %%mm0 \n\t" | ||||
|                 "movd      %1,    %%mm1 \n\t" | ||||
|                 "punpcklbw %%mm7, %%mm0 \n\t" | ||||
|   | ||||
| @@ -483,7 +483,7 @@ DECLARE_ALIGNED(8, static const int16_t, tab_i_04_xmm[32*4]) = { | ||||
|  | ||||
|  | ||||
| void ff_idct_xvid_mmx(short *block){ | ||||
| asm volatile( | ||||
| __asm__ volatile( | ||||
|             //# Process each row | ||||
|     DCT_8_INV_ROW_MMX(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1)) | ||||
|     DCT_8_INV_ROW_MMX(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1)) | ||||
| @@ -506,7 +506,7 @@ asm volatile( | ||||
|  | ||||
|  | ||||
| void ff_idct_xvid_mmx2(short *block){ | ||||
| asm volatile( | ||||
| __asm__ volatile( | ||||
|             //# Process each row | ||||
|     DCT_8_INV_ROW_XMM(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1)) | ||||
|     DCT_8_INV_ROW_XMM(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1)) | ||||
|   | ||||
| @@ -341,7 +341,7 @@ DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders[]) = { | ||||
|  | ||||
| inline void ff_idct_xvid_sse2(short *block) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|     "movq     "MANGLE(m127)", %%mm0                              \n\t" | ||||
|     iMTX_MULT("(%0)",     MANGLE(iTab1), ROUND(walkenIdctRounders),      PUT_EVEN(ROW0)) | ||||
|     iMTX_MULT("1*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+1*16), PUT_ODD(ROW1)) | ||||
|   | ||||
| @@ -24,7 +24,7 @@ | ||||
|  | ||||
| #ifdef FRAC_BITS | ||||
| #   define MULL(ra, rb) \ | ||||
|         ({ int rt, dummy; asm (\ | ||||
|         ({ int rt, dummy; __asm__ (\ | ||||
|             "imull %3               \n\t"\ | ||||
|             "shrdl %4, %%edx, %%eax \n\t"\ | ||||
|             : "=a"(rt), "=d"(dummy)\ | ||||
| @@ -34,12 +34,12 @@ | ||||
|  | ||||
| #define MULH(ra, rb) \ | ||||
|     ({ int rt, dummy;\ | ||||
|      asm ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" ((int)ra), "rm" ((int)rb));\ | ||||
|      __asm__ ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" ((int)ra), "rm" ((int)rb));\ | ||||
|      rt; }) | ||||
|  | ||||
| #define MUL64(ra, rb) \ | ||||
|     ({ int64_t rt;\ | ||||
|      asm ("imull %2\n\t" : "=A"(rt) : "a" ((int)ra), "g" ((int)rb));\ | ||||
|      __asm__ ("imull %2\n\t" : "=A"(rt) : "a" ((int)ra), "g" ((int)rb));\ | ||||
|      rt; }) | ||||
|  | ||||
| #endif /* AVCODEC_I386_MATHOPS_H */ | ||||
|   | ||||
| @@ -43,25 +43,25 @@ typedef        union { | ||||
|  | ||||
|  | ||||
| #define         mmx_i2r(op,imm,reg) \ | ||||
|         asm volatile (#op " %0, %%" #reg \ | ||||
|         __asm__ volatile (#op " %0, %%" #reg \ | ||||
|                               : /* nothing */ \ | ||||
|                               : "i" (imm) ) | ||||
|  | ||||
| #define         mmx_m2r(op,mem,reg) \ | ||||
|         asm volatile (#op " %0, %%" #reg \ | ||||
|         __asm__ volatile (#op " %0, %%" #reg \ | ||||
|                               : /* nothing */ \ | ||||
|                               : "m" (mem)) | ||||
|  | ||||
| #define         mmx_r2m(op,reg,mem) \ | ||||
|         asm volatile (#op " %%" #reg ", %0" \ | ||||
|         __asm__ volatile (#op " %%" #reg ", %0" \ | ||||
|                               : "=m" (mem) \ | ||||
|                               : /* nothing */ ) | ||||
|  | ||||
| #define         mmx_r2r(op,regs,regd) \ | ||||
|         asm volatile (#op " %" #regs ", %" #regd) | ||||
|         __asm__ volatile (#op " %" #regs ", %" #regd) | ||||
|  | ||||
|  | ||||
| #define         emms() asm volatile ("emms") | ||||
| #define         emms() __asm__ volatile ("emms") | ||||
|  | ||||
| #define         movd_m2r(var,reg)           mmx_m2r (movd, var, reg) | ||||
| #define         movd_r2m(reg,var)           mmx_r2m (movd, reg, var) | ||||
| @@ -200,16 +200,16 @@ typedef        union { | ||||
|  | ||||
|  | ||||
| #define         mmx_m2ri(op,mem,reg,imm) \ | ||||
|         asm volatile (#op " %1, %0, %%" #reg \ | ||||
|         __asm__ volatile (#op " %1, %0, %%" #reg \ | ||||
|                               : /* nothing */ \ | ||||
|                               : "m" (mem), "i" (imm)) | ||||
| #define         mmx_r2ri(op,regs,regd,imm) \ | ||||
|         asm volatile (#op " %0, %%" #regs ", %%" #regd \ | ||||
|         __asm__ volatile (#op " %0, %%" #regs ", %%" #regd \ | ||||
|                               : /* nothing */ \ | ||||
|                               : "i" (imm) ) | ||||
|  | ||||
| #define         mmx_fetch(mem,hint) \ | ||||
|         asm volatile ("prefetch" #hint " %0" \ | ||||
|         __asm__ volatile ("prefetch" #hint " %0" \ | ||||
|                               : /* nothing */ \ | ||||
|                               : "m" (mem)) | ||||
|  | ||||
| @@ -240,7 +240,7 @@ typedef        union { | ||||
| #define         pminub_r2r(regs,regd)       mmx_r2r (pminub, regs, regd) | ||||
|  | ||||
| #define         pmovmskb(mmreg,reg) \ | ||||
|         asm volatile ("movmskps %" #mmreg ", %" #reg) | ||||
|         __asm__ volatile ("movmskps %" #mmreg ", %" #reg) | ||||
|  | ||||
| #define         pmulhuw_m2r(var,reg)        mmx_m2r (pmulhuw, var, reg) | ||||
| #define         pmulhuw_r2r(regs,regd)      mmx_r2r (pmulhuw, regs, regd) | ||||
| @@ -256,7 +256,7 @@ typedef        union { | ||||
| #define         pshufw_m2r(var,reg,imm)     mmx_m2ri(pshufw, var, reg, imm) | ||||
| #define         pshufw_r2r(regs,regd,imm)   mmx_r2ri(pshufw, regs, regd, imm) | ||||
|  | ||||
| #define         sfence() asm volatile ("sfence\n\t") | ||||
| #define         sfence() __asm__ volatile ("sfence\n\t") | ||||
|  | ||||
| /* SSE2 */ | ||||
| #define         pshufhw_m2r(var,reg,imm)    mmx_m2ri(pshufhw, var, reg, imm) | ||||
|   | ||||
| @@ -36,7 +36,7 @@ DECLARE_ASM_CONST(8, uint64_t, bone)= 0x0101010101010101LL; | ||||
| static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | ||||
| { | ||||
|     x86_reg len= -(stride*h); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         ASMALIGN(4) | ||||
|         "1:                             \n\t" | ||||
|         "movq (%1, %%"REG_a"), %%mm0    \n\t" | ||||
| @@ -71,7 +71,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | ||||
|  | ||||
| static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         ASMALIGN(4) | ||||
|         "1:                             \n\t" | ||||
|         "movq (%1), %%mm0               \n\t" | ||||
| @@ -92,7 +92,7 @@ static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | ||||
| static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) | ||||
| { | ||||
|     int ret; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pxor %%xmm6, %%xmm6            \n\t" | ||||
|         ASMALIGN(4) | ||||
|         "1:                             \n\t" | ||||
| @@ -109,7 +109,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) | ||||
|         : "+r" (h), "+r" (blk1), "+r" (blk2) | ||||
|         : "r" ((x86_reg)stride) | ||||
|     ); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movhlps %%xmm6, %%xmm0         \n\t" | ||||
|         "paddw   %%xmm0, %%xmm6         \n\t" | ||||
|         "movd    %%xmm6, %0             \n\t" | ||||
| @@ -120,7 +120,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) | ||||
|  | ||||
| static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         ASMALIGN(4) | ||||
|         "1:                             \n\t" | ||||
|         "movq (%1), %%mm0               \n\t" | ||||
| @@ -142,7 +142,7 @@ static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h | ||||
|  | ||||
| static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq (%1), %%mm0               \n\t" | ||||
|         "add %3, %1                     \n\t" | ||||
|         ASMALIGN(4) | ||||
| @@ -167,7 +167,7 @@ static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h | ||||
|  | ||||
| static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq "MANGLE(bone)", %%mm5     \n\t" | ||||
|         "movq (%1), %%mm0               \n\t" | ||||
|         "pavgb 1(%1), %%mm0             \n\t" | ||||
| @@ -198,7 +198,7 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | ||||
| static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) | ||||
| { | ||||
|     x86_reg len= -(stride*h); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         ASMALIGN(4) | ||||
|         "1:                             \n\t" | ||||
|         "movq (%1, %%"REG_a"), %%mm0    \n\t" | ||||
| @@ -236,7 +236,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int | ||||
| static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | ||||
| { | ||||
|     x86_reg len= -(stride*h); | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq (%1, %%"REG_a"), %%mm0    \n\t" | ||||
|         "movq 1(%1, %%"REG_a"), %%mm2   \n\t" | ||||
|         "movq %%mm0, %%mm1              \n\t" | ||||
| @@ -289,7 +289,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | ||||
| static inline int sum_mmx(void) | ||||
| { | ||||
|     int ret; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq %%mm6, %%mm0              \n\t" | ||||
|         "psrlq $32, %%mm6               \n\t" | ||||
|         "paddw %%mm0, %%mm6             \n\t" | ||||
| @@ -305,7 +305,7 @@ static inline int sum_mmx(void) | ||||
| static inline int sum_mmx2(void) | ||||
| { | ||||
|     int ret; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movd %%mm6, %0                 \n\t" | ||||
|         : "=r" (ret) | ||||
|     ); | ||||
| @@ -326,7 +326,7 @@ static inline void sad8_y2a_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | ||||
| static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | ||||
|     assert(h==8);\ | ||||
|     asm volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|     __asm__ volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|                  "pxor %%mm6, %%mm6     \n\t":);\ | ||||
| \ | ||||
|     sad8_1_ ## suf(blk1, blk2, stride, 8);\ | ||||
| @@ -336,7 +336,7 @@ static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h | ||||
| static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | ||||
|     assert(h==8);\ | ||||
|     asm volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|     __asm__ volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|                  "pxor %%mm6, %%mm6     \n\t"\ | ||||
|                  "movq %0, %%mm5        \n\t"\ | ||||
|                  :: "m"(round_tab[1]) \ | ||||
| @@ -350,7 +350,7 @@ static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in | ||||
| static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | ||||
|     assert(h==8);\ | ||||
|     asm volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|     __asm__ volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|                  "pxor %%mm6, %%mm6     \n\t"\ | ||||
|                  "movq %0, %%mm5        \n\t"\ | ||||
|                  :: "m"(round_tab[1]) \ | ||||
| @@ -364,7 +364,7 @@ static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in | ||||
| static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | ||||
|     assert(h==8);\ | ||||
|     asm volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|     __asm__ volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|                  "pxor %%mm6, %%mm6     \n\t"\ | ||||
|                  ::);\ | ||||
| \ | ||||
| @@ -375,7 +375,7 @@ static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i | ||||
| \ | ||||
| static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | ||||
|     asm volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|     __asm__ volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|                  "pxor %%mm6, %%mm6     \n\t":);\ | ||||
| \ | ||||
|     sad8_1_ ## suf(blk1  , blk2  , stride, h);\ | ||||
| @@ -385,7 +385,7 @@ static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int | ||||
| }\ | ||||
| static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | ||||
|     asm volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|     __asm__ volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|                  "pxor %%mm6, %%mm6     \n\t"\ | ||||
|                  "movq %0, %%mm5        \n\t"\ | ||||
|                  :: "m"(round_tab[1]) \ | ||||
| @@ -398,7 +398,7 @@ static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i | ||||
| }\ | ||||
| static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | ||||
|     asm volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|     __asm__ volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|                  "pxor %%mm6, %%mm6     \n\t"\ | ||||
|                  "movq %0, %%mm5        \n\t"\ | ||||
|                  :: "m"(round_tab[1]) \ | ||||
| @@ -411,7 +411,7 @@ static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i | ||||
| }\ | ||||
| static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | ||||
|     asm volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|     __asm__ volatile("pxor %%mm7, %%mm7     \n\t"\ | ||||
|                  "pxor %%mm6, %%mm6     \n\t"\ | ||||
|                  ::);\ | ||||
| \ | ||||
|   | ||||
| @@ -55,7 +55,7 @@ static void dct_unquantize_h263_intra_mmx(MpegEncContext *s, | ||||
|     else | ||||
|         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; | ||||
| //printf("%d %d  ", qmul, qadd); | ||||
| asm volatile( | ||||
| __asm__ volatile( | ||||
|                 "movd %1, %%mm6                 \n\t" //qmul | ||||
|                 "packssdw %%mm6, %%mm6          \n\t" | ||||
|                 "packssdw %%mm6, %%mm6          \n\t" | ||||
| @@ -118,7 +118,7 @@ static void dct_unquantize_h263_inter_mmx(MpegEncContext *s, | ||||
|  | ||||
|     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; | ||||
| //printf("%d %d  ", qmul, qadd); | ||||
| asm volatile( | ||||
| __asm__ volatile( | ||||
|                 "movd %1, %%mm6                 \n\t" //qmul | ||||
|                 "packssdw %%mm6, %%mm6          \n\t" | ||||
|                 "packssdw %%mm6, %%mm6          \n\t" | ||||
| @@ -214,7 +214,7 @@ static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s, | ||||
|         block0 = block[0] * s->c_dc_scale; | ||||
|     /* XXX: only mpeg1 */ | ||||
|     quant_matrix = s->intra_matrix; | ||||
| asm volatile( | ||||
| __asm__ volatile( | ||||
|                 "pcmpeqw %%mm7, %%mm7           \n\t" | ||||
|                 "psrlw $15, %%mm7               \n\t" | ||||
|                 "movd %2, %%mm6                 \n\t" | ||||
| @@ -277,7 +277,7 @@ static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s, | ||||
|     nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; | ||||
|  | ||||
|         quant_matrix = s->inter_matrix; | ||||
| asm volatile( | ||||
| __asm__ volatile( | ||||
|                 "pcmpeqw %%mm7, %%mm7           \n\t" | ||||
|                 "psrlw $15, %%mm7               \n\t" | ||||
|                 "movd %2, %%mm6                 \n\t" | ||||
| @@ -349,7 +349,7 @@ static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s, | ||||
|     else | ||||
|         block0 = block[0] * s->c_dc_scale; | ||||
|     quant_matrix = s->intra_matrix; | ||||
| asm volatile( | ||||
| __asm__ volatile( | ||||
|                 "pcmpeqw %%mm7, %%mm7           \n\t" | ||||
|                 "psrlw $15, %%mm7               \n\t" | ||||
|                 "movd %2, %%mm6                 \n\t" | ||||
| @@ -410,7 +410,7 @@ static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s, | ||||
|     else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; | ||||
|  | ||||
|         quant_matrix = s->inter_matrix; | ||||
| asm volatile( | ||||
| __asm__ volatile( | ||||
|                 "pcmpeqw %%mm7, %%mm7           \n\t" | ||||
|                 "psrlq $48, %%mm7               \n\t" | ||||
|                 "movd %2, %%mm6                 \n\t" | ||||
| @@ -482,7 +482,7 @@ static void  denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){ | ||||
|  | ||||
|     s->dct_count[intra]++; | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pxor %%mm7, %%mm7                      \n\t" | ||||
|         "1:                                     \n\t" | ||||
|         "pxor %%mm0, %%mm0                      \n\t" | ||||
| @@ -536,7 +536,7 @@ static void  denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){ | ||||
|  | ||||
|     s->dct_count[intra]++; | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pxor %%xmm7, %%xmm7                    \n\t" | ||||
|         "1:                                     \n\t" | ||||
|         "pxor %%xmm0, %%xmm0                    \n\t" | ||||
|   | ||||
| @@ -117,13 +117,13 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | ||||
|         /* note: block[0] is assumed to be positive */ | ||||
|         if (!s->h263_aic) { | ||||
| #if 1 | ||||
|         asm volatile ( | ||||
|         __asm__ volatile ( | ||||
|                 "mul %%ecx                \n\t" | ||||
|                 : "=d" (level), "=a"(dummy) | ||||
|                 : "a" ((block[0]>>2) + q), "c" (ff_inverse[q<<1]) | ||||
|         ); | ||||
| #else | ||||
|         asm volatile ( | ||||
|         __asm__ volatile ( | ||||
|                 "xorl %%edx, %%edx        \n\t" | ||||
|                 "divw %%cx                \n\t" | ||||
|                 "movzwl %%ax, %%eax       \n\t" | ||||
| @@ -149,7 +149,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | ||||
|  | ||||
|     if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ | ||||
|  | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movd %%"REG_a", "MM"3              \n\t" // last_non_zero_p1 | ||||
|             SPREADW(MM"3") | ||||
|             "pxor "MM"7, "MM"7                  \n\t" // 0 | ||||
| @@ -182,7 +182,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | ||||
|               "r" (inv_zigzag_direct16+64), "r" (temp_block+64) | ||||
|         ); | ||||
|     }else{ // FMT_H263 | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movd %%"REG_a", "MM"3              \n\t" // last_non_zero_p1 | ||||
|             SPREADW(MM"3") | ||||
|             "pxor "MM"7, "MM"7                  \n\t" // 0 | ||||
| @@ -214,7 +214,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | ||||
|               "r" (inv_zigzag_direct16+64), "r" (temp_block+64) | ||||
|         ); | ||||
|     } | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movd %1, "MM"1                     \n\t" // max_qcoeff | ||||
|         SPREADW(MM"1") | ||||
|         "psubusw "MM"1, "MM"4               \n\t" | ||||
|   | ||||
| @@ -212,7 +212,7 @@ static inline void idct(int16_t *block) | ||||
|         DECLARE_ALIGNED(8, int64_t, align_tmp[16]); | ||||
|         int16_t * const temp= (int16_t*)align_tmp; | ||||
|  | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
| #if 0 //Alternative, simpler variant | ||||
|  | ||||
| #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ | ||||
|   | ||||
| @@ -38,7 +38,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ | ||||
|         // calculate b[0] correctly afterwards. | ||||
|  | ||||
|         i = 0; | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "pcmpeqd   %%xmm7, %%xmm7         \n\t" | ||||
|             "pcmpeqd   %%xmm3, %%xmm3         \n\t" | ||||
|             "psllw         $1, %%xmm3         \n\t" | ||||
| @@ -46,7 +46,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ | ||||
|             "psllw        $13, %%xmm3         \n\t" | ||||
|         ::); | ||||
|         for(; i<w_l-15; i+=16){ | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movdqu   (%1), %%xmm1        \n\t" | ||||
|                 "movdqu 16(%1), %%xmm5        \n\t" | ||||
|                 "movdqu  2(%1), %%xmm2        \n\t" | ||||
| @@ -77,7 +77,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ | ||||
|             dst[i] = dst[i] - (b[i] + b[i + 1]); | ||||
|         } | ||||
|         for(; i<w_r-15; i+=16){ | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movdqu   (%1), %%xmm1        \n\t" | ||||
|                 "movdqu 16(%1), %%xmm5        \n\t" | ||||
|                 "movdqu  2(%1), %%xmm2        \n\t" | ||||
| @@ -102,14 +102,14 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ | ||||
|         IDWTELEM b_0 = b[0]; | ||||
|  | ||||
|         i = 0; | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "psllw         $15, %%xmm7        \n\t" | ||||
|             "pcmpeqw    %%xmm6, %%xmm6        \n\t" | ||||
|             "psrlw         $13, %%xmm6        \n\t" | ||||
|             "paddw      %%xmm7, %%xmm6        \n\t" | ||||
|         ::); | ||||
|         for(; i<w_l-15; i+=16){ | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movdqu   (%1), %%xmm0        \n\t" | ||||
|                 "movdqu 16(%1), %%xmm4        \n\t" | ||||
|                 "movdqu  2(%1), %%xmm1        \n\t" | ||||
| @@ -150,7 +150,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ | ||||
|             temp[i] = src[i] - ((-W_AM*(b[i] + b[i+1]))>>W_AS); | ||||
|         } | ||||
|         for(; i<w_r-7; i+=8){ | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movdqu  2(%1), %%xmm2        \n\t" | ||||
|                 "movdqu 18(%1), %%xmm6        \n\t" | ||||
|                 "paddw    (%1), %%xmm2        \n\t" | ||||
| @@ -180,7 +180,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ | ||||
|             b[i] = b[i>>1]; | ||||
|         } | ||||
|         for (i-=62; i>=0; i-=64){ | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movdqa      (%1), %%xmm0       \n\t" | ||||
|                 "movdqa    16(%1), %%xmm2       \n\t" | ||||
|                 "movdqa    32(%1), %%xmm4       \n\t" | ||||
| @@ -224,7 +224,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ | ||||
|  | ||||
|         i = 1; | ||||
|         b[0] = b[0] - ((W_DM * 2 * ref[1]+W_DO)>>W_DS); | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "pcmpeqw    %%mm7, %%mm7         \n\t" | ||||
|             "pcmpeqw    %%mm3, %%mm3         \n\t" | ||||
|             "psllw         $1, %%mm3         \n\t" | ||||
| @@ -232,7 +232,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ | ||||
|             "psllw        $13, %%mm3         \n\t" | ||||
|            ::); | ||||
|         for(; i<w_l-7; i+=8){ | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movq     (%1), %%mm2        \n\t" | ||||
|                 "movq    8(%1), %%mm6        \n\t" | ||||
|                 "paddw   2(%1), %%mm2        \n\t" | ||||
| @@ -257,7 +257,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ | ||||
|  | ||||
|         i = 0; | ||||
|         for(; i<w_r-7; i+=8){ | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movq     (%1), %%mm2        \n\t" | ||||
|                 "movq    8(%1), %%mm6        \n\t" | ||||
|                 "paddw   2(%1), %%mm2        \n\t" | ||||
| @@ -280,14 +280,14 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ | ||||
|  | ||||
|         i = 1; | ||||
|         b[0] = b[0] + (((2 * ref[1] + W_BO) + 4 * b[0]) >> W_BS); | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "psllw         $15, %%mm7        \n\t" | ||||
|             "pcmpeqw     %%mm6, %%mm6        \n\t" | ||||
|             "psrlw         $13, %%mm6        \n\t" | ||||
|             "paddw       %%mm7, %%mm6        \n\t" | ||||
|            ::); | ||||
|         for(; i<w_l-7; i+=8){ | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movq     (%1), %%mm0        \n\t" | ||||
|                 "movq    8(%1), %%mm4        \n\t" | ||||
|                 "movq    2(%1), %%mm1        \n\t" | ||||
| @@ -324,7 +324,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ | ||||
|         i = 0; | ||||
|  | ||||
|         for(; i<w_r-7; i+=8){ | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movq    2(%1), %%mm2        \n\t" | ||||
|                 "movq   10(%1), %%mm6        \n\t" | ||||
|                 "paddw    (%1), %%mm2        \n\t" | ||||
| @@ -354,7 +354,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ | ||||
|             b[i] = b[i>>1]; | ||||
|         } | ||||
|         for (i-=30; i>=0; i-=32){ | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movq        (%1), %%mm0       \n\t" | ||||
|                 "movq       8(%1), %%mm2       \n\t" | ||||
|                 "movq      16(%1), %%mm4       \n\t" | ||||
| @@ -448,7 +448,7 @@ void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, | ||||
|     } | ||||
|     i+=i; | ||||
|  | ||||
|          asm volatile ( | ||||
|          __asm__ volatile ( | ||||
|         "jmp 2f                                      \n\t" | ||||
|         "1:                                          \n\t" | ||||
|         snow_vertical_compose_sse2_load("%4","xmm0","xmm2","xmm4","xmm6") | ||||
| @@ -544,7 +544,7 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I | ||||
|         b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | ||||
|     } | ||||
|     i+=i; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "jmp 2f                                      \n\t" | ||||
|         "1:                                          \n\t" | ||||
|  | ||||
| @@ -606,7 +606,7 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I | ||||
| #define snow_inner_add_yblock_sse2_header \ | ||||
|     IDWTELEM * * dst_array = sb->line + src_y;\ | ||||
|     x86_reg tmp;\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|              "mov  %7, %%"REG_c"             \n\t"\ | ||||
|              "mov  %6, %2                    \n\t"\ | ||||
|              "mov  %4, %%"REG_S"             \n\t"\ | ||||
| @@ -759,7 +759,7 @@ snow_inner_add_yblock_sse2_end_16 | ||||
| #define snow_inner_add_yblock_mmx_header \ | ||||
|     IDWTELEM * * dst_array = sb->line + src_y;\ | ||||
|     x86_reg tmp;\ | ||||
|     asm volatile(\ | ||||
|     __asm__ volatile(\ | ||||
|              "mov  %7, %%"REG_c"             \n\t"\ | ||||
|              "mov  %6, %2                    \n\t"\ | ||||
|              "mov  %4, %%"REG_S"             \n\t"\ | ||||
|   | ||||
| @@ -74,7 +74,7 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst, | ||||
|                                        const uint8_t *src, x86_reg stride, | ||||
|                                        int rnd, int64_t shift) | ||||
| { | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "mov       $3, %%"REG_c"           \n\t" | ||||
|         LOAD_ROUNDER_MMX("%5") | ||||
|         "movq      "MANGLE(ff_pw_9)", %%mm6 \n\t" | ||||
| @@ -114,7 +114,7 @@ static void vc1_put_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride, | ||||
|  | ||||
|     src -= 1; | ||||
|     rnd -= (-1+9+9-1)*1024; /* Add -1024 bias */ | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         LOAD_ROUNDER_MMX("%4") | ||||
|         "movq      "MANGLE(ff_pw_128)", %%mm6\n\t" | ||||
|         "movq      "MANGLE(ff_pw_9)", %%mm5 \n\t" | ||||
| @@ -155,7 +155,7 @@ static void vc1_put_shift2_mmx(uint8_t *dst, const uint8_t *src, | ||||
|                                x86_reg stride, int rnd, x86_reg offset) | ||||
| { | ||||
|     rnd = 8-rnd; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "mov       $8, %%"REG_c"           \n\t" | ||||
|         LOAD_ROUNDER_MMX("%5") | ||||
|         "movq      "MANGLE(ff_pw_9)", %%mm6\n\t" | ||||
| @@ -264,7 +264,7 @@ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src,      \ | ||||
| {                                                                       \ | ||||
|     int h = 8;                                                          \ | ||||
|     src -= src_stride;                                                  \ | ||||
|     asm volatile(                                                       \ | ||||
|     __asm__ volatile(                                                       \ | ||||
|         LOAD_ROUNDER_MMX("%5")                                          \ | ||||
|         "movq      "MANGLE(ff_pw_53)", %%mm5\n\t"                       \ | ||||
|         "movq      "MANGLE(ff_pw_18)", %%mm6\n\t"                       \ | ||||
| @@ -320,7 +320,7 @@ vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride,         \ | ||||
|     int h = 8;                                                          \ | ||||
|     src -= 1;                                                           \ | ||||
|     rnd -= (-4+58+13-3)*256; /* Add -256 bias */                        \ | ||||
|     asm volatile(                                                       \ | ||||
|     __asm__ volatile(                                                       \ | ||||
|         LOAD_ROUNDER_MMX("%4")                                          \ | ||||
|         "movq      "MANGLE(ff_pw_18)", %%mm6   \n\t"                    \ | ||||
|         "movq      "MANGLE(ff_pw_53)", %%mm5   \n\t"                    \ | ||||
| @@ -358,7 +358,7 @@ vc1_put_## NAME ## _mmx(uint8_t *dst, const uint8_t *src,               \ | ||||
|     int h = 8;                                                          \ | ||||
|     src -= offset;                                                      \ | ||||
|     rnd = 32-rnd;                                                       \ | ||||
|     asm volatile (                                                      \ | ||||
|     __asm__ volatile (                                                      \ | ||||
|         LOAD_ROUNDER_MMX("%6")                                          \ | ||||
|         "movq      "MANGLE(ff_pw_53)", %%mm5       \n\t"                \ | ||||
|         "movq      "MANGLE(ff_pw_18)", %%mm6       \n\t"                \ | ||||
| @@ -412,7 +412,7 @@ static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, | ||||
|     static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] = | ||||
|          { NULL, vc1_put_shift1_mmx, vc1_put_shift2_mmx, vc1_put_shift3_mmx }; | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pxor %%mm0, %%mm0         \n\t" | ||||
|         ::: "memory" | ||||
|     ); | ||||
|   | ||||
| @@ -250,7 +250,7 @@ void ff_vp3_idct_mmx(int16_t *output_data) | ||||
| #define I(x) AV_STRINGIFY(16* x       )"(%0)" | ||||
| #define J(x) AV_STRINGIFY(16*(x-4) + 8)"(%0)" | ||||
|  | ||||
|     asm volatile ( | ||||
|     __asm__ volatile ( | ||||
|         RowIDCT() | ||||
|         Transpose() | ||||
|  | ||||
|   | ||||
| @@ -161,7 +161,7 @@ void ff_vp3_idct_sse2(int16_t *input_data) | ||||
| #define O(x) I(x) | ||||
| #define C(x) AV_STRINGIFY(16*(x-1))"(%1)" | ||||
|  | ||||
|     asm volatile ( | ||||
|     __asm__ volatile ( | ||||
|         VP3_1D_IDCT_SSE2(NOP, NOP) | ||||
|  | ||||
|         TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7, (%0)) | ||||
|   | ||||
| @@ -654,7 +654,7 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n, | ||||
|        fact they decided to store the quantized DC (which would lead | ||||
|        to problems if Q could vary !) */ | ||||
| #if (defined(ARCH_X86)) && !defined PIC | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movl %3, %%eax         \n\t" | ||||
|         "shrl $1, %%eax         \n\t" | ||||
|         "addl %%eax, %2         \n\t" | ||||
|   | ||||
| @@ -66,7 +66,7 @@ int has_altivec(void) | ||||
| #elif defined(RUNTIME_CPUDETECT) | ||||
|     int proc_ver; | ||||
|     // Support of mfspr PVR emulation added in Linux 2.6.17. | ||||
|     asm volatile("mfspr %0, 287" : "=r" (proc_ver)); | ||||
|     __asm__ volatile("mfspr %0, 287" : "=r" (proc_ver)); | ||||
|     proc_ver >>= 16; | ||||
|     if (proc_ver  & 0x8000 || | ||||
|         proc_ver == 0x000c || | ||||
|   | ||||
| @@ -148,7 +148,7 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1); | ||||
|         i += 16; | ||||
|     } | ||||
|     for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) { | ||||
|         asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory"); | ||||
|         __asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory"); | ||||
|     } | ||||
|     if (misal) { | ||||
|         ((unsigned long*)blocks)[188] = 0L; | ||||
| @@ -181,7 +181,7 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1); | ||||
|     } | ||||
|     else | ||||
|         for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { | ||||
|             asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory"); | ||||
|             __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory"); | ||||
|         } | ||||
| #else | ||||
|     memset(blocks, 0, sizeof(DCTELEM)*6*64); | ||||
| @@ -219,7 +219,7 @@ long check_dcbzl_effect(void) | ||||
|  | ||||
|     /* below the constraint "b" seems to mean "Address base register" | ||||
|        in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */ | ||||
|     asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); | ||||
|     __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); | ||||
|  | ||||
|     for (i = 0; i < 1024 ; i ++) { | ||||
|         if (fakedata[i] == (char)0) | ||||
| @@ -241,7 +241,7 @@ static void prefetch_ppc(void *mem, int stride, int h) | ||||
| { | ||||
|     register const uint8_t *p = mem; | ||||
|     do { | ||||
|         asm volatile ("dcbt 0,%0" : : "r" (p)); | ||||
|         __asm__ volatile ("dcbt 0,%0" : : "r" (p)); | ||||
|         p+= stride; | ||||
|     } while(--h); | ||||
| } | ||||
|   | ||||
| @@ -70,36 +70,36 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][ | ||||
|  | ||||
| #ifndef HAVE_PPC64 | ||||
| #define POWERP_PMC_DATATYPE unsigned long | ||||
| #define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 937" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC1(a) __asm__ volatile("mfspr %0, 937" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC2(a) __asm__ volatile("mfspr %0, 938" : "=r" (a)) | ||||
| #if (POWERPC_NUM_PMC_ENABLED > 2) | ||||
| #define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 942" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC3(a) __asm__ volatile("mfspr %0, 941" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC4(a) __asm__ volatile("mfspr %0, 942" : "=r" (a)) | ||||
| #else | ||||
| #define POWERPC_GET_PMC3(a) do {} while (0) | ||||
| #define POWERPC_GET_PMC4(a) do {} while (0) | ||||
| #endif | ||||
| #if (POWERPC_NUM_PMC_ENABLED > 4) | ||||
| #define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 929" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 930" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC5(a) __asm__ volatile("mfspr %0, 929" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC6(a) __asm__ volatile("mfspr %0, 930" : "=r" (a)) | ||||
| #else | ||||
| #define POWERPC_GET_PMC5(a) do {} while (0) | ||||
| #define POWERPC_GET_PMC6(a) do {} while (0) | ||||
| #endif | ||||
| #else /* HAVE_PPC64 */ | ||||
| #define POWERP_PMC_DATATYPE unsigned long long | ||||
| #define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 771" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 772" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC1(a) __asm__ volatile("mfspr %0, 771" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC2(a) __asm__ volatile("mfspr %0, 772" : "=r" (a)) | ||||
| #if (POWERPC_NUM_PMC_ENABLED > 2) | ||||
| #define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 773" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 774" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC3(a) __asm__ volatile("mfspr %0, 773" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC4(a) __asm__ volatile("mfspr %0, 774" : "=r" (a)) | ||||
| #else | ||||
| #define POWERPC_GET_PMC3(a) do {} while (0) | ||||
| #define POWERPC_GET_PMC4(a) do {} while (0) | ||||
| #endif | ||||
| #if (POWERPC_NUM_PMC_ENABLED > 4) | ||||
| #define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 775" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 776" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC5(a) __asm__ volatile("mfspr %0, 775" : "=r" (a)) | ||||
| #define POWERPC_GET_PMC6(a) __asm__ volatile("mfspr %0, 776" : "=r" (a)) | ||||
| #else | ||||
| #define POWERPC_GET_PMC5(a) do {} while (0) | ||||
| #define POWERPC_GET_PMC6(a) do {} while (0) | ||||
|   | ||||
| @@ -32,7 +32,7 @@ | ||||
| #if (__GNUC__ < 4) | ||||
| # define REG_v(a) | ||||
| #else | ||||
| # define REG_v(a) asm ( #a ) | ||||
| # define REG_v(a) __asm__ ( #a ) | ||||
| #endif | ||||
|  | ||||
| #if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) | ||||
|   | ||||
| @@ -26,12 +26,12 @@ | ||||
| #if defined(ARCH_POWERPC_405) | ||||
| /* signed 16x16 -> 32 multiply add accumulate */ | ||||
| #define MAC16(rt, ra, rb) \ | ||||
|     asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); | ||||
|     __asm__ ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); | ||||
|  | ||||
| /* signed 16x16 -> 32 multiply */ | ||||
| #define MUL16(ra, rb) \ | ||||
|     ({ int __rt; \ | ||||
|     asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \ | ||||
|     __asm__ ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \ | ||||
|     __rt; }) | ||||
| #endif | ||||
|  | ||||
|   | ||||
| @@ -31,7 +31,7 @@ void ff_mmi_idct(DCTELEM *block); | ||||
|  | ||||
| static void clear_blocks_mmi(DCTELEM * blocks) | ||||
| { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|         ".set noreorder    \n" | ||||
|         "addiu $9, %0, 768 \n" | ||||
|         "nop               \n" | ||||
| @@ -51,7 +51,7 @@ static void clear_blocks_mmi(DCTELEM * blocks) | ||||
|  | ||||
| static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size) | ||||
| { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|         ".set   push            \n\t" | ||||
|         ".set   mips3           \n\t" | ||||
|         "ld     $8, 0(%0)       \n\t" | ||||
| @@ -92,7 +92,7 @@ static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size) | ||||
|  | ||||
| static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|         ".set   push            \n\t" | ||||
|         ".set   mips3           \n\t" | ||||
|         "1:                     \n\t" | ||||
| @@ -111,7 +111,7 @@ static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size | ||||
|  | ||||
| static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | ||||
|         asm volatile ( | ||||
|         __asm__ volatile ( | ||||
|         ".set   push            \n\t" | ||||
|         ".set   mips3           \n\t" | ||||
|         "1:                     \n\t" | ||||
|   | ||||
| @@ -257,7 +257,7 @@ static short consttable[] align16 = { | ||||
|         pmaxh($2, $0, $2);      \ | ||||
|         ppacb($0, $2, $2);      \ | ||||
|         sd3(2, 0, 4);           \ | ||||
|         asm volatile ("add $4, $5, $4"); | ||||
|         __asm__ volatile ("add $4, $5, $4"); | ||||
|  | ||||
| #define DCT_8_INV_COL8_PUT() \ | ||||
|         PUT($16);        \ | ||||
| @@ -277,7 +277,7 @@ static short consttable[] align16 = { | ||||
|         pmaxh($2, $0, $2);   \ | ||||
|         ppacb($0, $2, $2);   \ | ||||
|         sd3(2, 0, 4); \ | ||||
|         asm volatile ("add $4, $5, $4"); | ||||
|         __asm__ volatile ("add $4, $5, $4"); | ||||
|  | ||||
| /*fixme: schedule*/ | ||||
| #define DCT_8_INV_COL8_ADD() \ | ||||
| @@ -294,7 +294,7 @@ static short consttable[] align16 = { | ||||
| void ff_mmi_idct(int16_t * block) | ||||
| { | ||||
|         /* $4 = block */ | ||||
|         asm volatile("la $24, %0"::"m"(consttable[0])); | ||||
|         __asm__ volatile("la $24, %0"::"m"(consttable[0])); | ||||
|         lq($24, ROUNDER_0, $8); | ||||
|         lq($24, ROUNDER_1, $7); | ||||
|         DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8); | ||||
| @@ -309,14 +309,14 @@ void ff_mmi_idct(int16_t * block) | ||||
|         DCT_8_INV_COL8_STORE($4); | ||||
|  | ||||
|         //let savedtemp regs be saved | ||||
|         asm volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | ||||
|         __asm__ volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | ||||
| } | ||||
|  | ||||
|  | ||||
| void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block) | ||||
| { | ||||
|         /* $4 = dest, $5 = line_size, $6 = block */ | ||||
|         asm volatile("la $24, %0"::"m"(consttable[0])); | ||||
|         __asm__ volatile("la $24, %0"::"m"(consttable[0])); | ||||
|         lq($24, ROUNDER_0, $8); | ||||
|         lq($24, ROUNDER_1, $7); | ||||
|         DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8); | ||||
| @@ -333,14 +333,14 @@ void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block) | ||||
|         DCT_8_INV_COL8_PUT(); | ||||
|  | ||||
|         //let savedtemp regs be saved | ||||
|         asm volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | ||||
|         __asm__ volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | ||||
| } | ||||
|  | ||||
|  | ||||
| void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | ||||
| { | ||||
|         /* $4 = dest, $5 = line_size, $6 = block */ | ||||
|         asm volatile("la $24, %0"::"m"(consttable[0])); | ||||
|         __asm__ volatile("la $24, %0"::"m"(consttable[0])); | ||||
|         lq($24, ROUNDER_0, $8); | ||||
|         lq($24, ROUNDER_1, $7); | ||||
|         DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8); | ||||
| @@ -357,6 +357,6 @@ void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | ||||
|         DCT_8_INV_COL8_ADD(); | ||||
|  | ||||
|         //let savedtemp regs be saved | ||||
|         asm volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | ||||
|         __asm__ volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -60,112 +60,112 @@ | ||||
|  | ||||
|  | ||||
| #define         lq(base, off, reg)        \ | ||||
|         asm volatile ("lq " #reg ", %0("#base ")" : : "i" (off) ) | ||||
|         __asm__ volatile ("lq " #reg ", %0("#base ")" : : "i" (off) ) | ||||
|  | ||||
| #define         lq2(mem, reg)        \ | ||||
|         asm volatile ("lq " #reg ", %0" : : "r" (mem)) | ||||
|         __asm__ volatile ("lq " #reg ", %0" : : "r" (mem)) | ||||
|  | ||||
| #define         sq(reg, off, base)        \ | ||||
|         asm volatile ("sq " #reg ", %0("#base ")" : : "i" (off) ) | ||||
|         __asm__ volatile ("sq " #reg ", %0("#base ")" : : "i" (off) ) | ||||
|  | ||||
| /* | ||||
| #define         ld(base, off, reg)        \ | ||||
|         asm volatile ("ld " #reg ", " #off "("#base ")") | ||||
|         __asm__ volatile ("ld " #reg ", " #off "("#base ")") | ||||
| */ | ||||
|  | ||||
| #define         ld3(base, off, reg)        \ | ||||
|         asm volatile (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off))) | ||||
|         __asm__ volatile (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off))) | ||||
|  | ||||
| #define         ldr3(base, off, reg)        \ | ||||
|         asm volatile (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off))) | ||||
|         __asm__ volatile (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off))) | ||||
|  | ||||
| #define         ldl3(base, off, reg)        \ | ||||
|         asm volatile (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off))) | ||||
|         __asm__ volatile (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off))) | ||||
|  | ||||
| /* | ||||
| #define         sd(reg, off, base)        \ | ||||
|         asm volatile ("sd " #reg ", " #off "("#base ")") | ||||
|         __asm__ volatile ("sd " #reg ", " #off "("#base ")") | ||||
| */ | ||||
| //seems assembler has bug encoding mnemonic 'sd', so DIY | ||||
| #define         sd3(reg, off, base)        \ | ||||
|         asm volatile (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off))) | ||||
|         __asm__ volatile (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off))) | ||||
|  | ||||
| #define         sw(reg, off, base)        \ | ||||
|         asm volatile ("sw " #reg ", " #off "("#base ")") | ||||
|         __asm__ volatile ("sw " #reg ", " #off "("#base ")") | ||||
|  | ||||
| #define         sq2(reg, mem)        \ | ||||
|         asm volatile ("sq " #reg ", %0" : : "m" (*(mem))) | ||||
|         __asm__ volatile ("sq " #reg ", %0" : : "m" (*(mem))) | ||||
|  | ||||
| #define         pinth(rs, rt, rd) \ | ||||
|         asm volatile ("pinth  " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("pinth  " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         phmadh(rs, rt, rd) \ | ||||
|         asm volatile ("phmadh " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("phmadh " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         pcpyud(rs, rt, rd) \ | ||||
|         asm volatile ("pcpyud " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("pcpyud " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         pcpyld(rs, rt, rd) \ | ||||
|         asm volatile ("pcpyld " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("pcpyld " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         pcpyh(rt, rd) \ | ||||
|         asm volatile ("pcpyh  " #rd ", " #rt ) | ||||
|         __asm__ volatile ("pcpyh  " #rd ", " #rt ) | ||||
|  | ||||
| #define         paddw(rs, rt, rd) \ | ||||
|         asm volatile ("paddw  " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("paddw  " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         pextlw(rs, rt, rd) \ | ||||
|         asm volatile ("pextlw " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("pextlw " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         pextuw(rs, rt, rd) \ | ||||
|         asm volatile ("pextuw " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("pextuw " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         pextlh(rs, rt, rd) \ | ||||
|         asm volatile ("pextlh " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("pextlh " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         pextuh(rs, rt, rd) \ | ||||
|         asm volatile ("pextuh " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("pextuh " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         psubw(rs, rt, rd) \ | ||||
|         asm volatile ("psubw  " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("psubw  " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         psraw(rt, sa, rd) \ | ||||
|         asm volatile ("psraw  " #rd ", " #rt ", %0" : : "i"(sa) ) | ||||
|         __asm__ volatile ("psraw  " #rd ", " #rt ", %0" : : "i"(sa) ) | ||||
|  | ||||
| #define         ppach(rs, rt, rd) \ | ||||
|         asm volatile ("ppach  " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("ppach  " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         ppacb(rs, rt, rd) \ | ||||
|         asm volatile ("ppacb  " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("ppacb  " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         prevh(rt, rd) \ | ||||
|         asm volatile ("prevh  " #rd ", " #rt ) | ||||
|         __asm__ volatile ("prevh  " #rd ", " #rt ) | ||||
|  | ||||
| #define         pmulth(rs, rt, rd) \ | ||||
|         asm volatile ("pmulth " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("pmulth " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         pmaxh(rs, rt, rd) \ | ||||
|         asm volatile ("pmaxh " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("pmaxh " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         pminh(rs, rt, rd) \ | ||||
|         asm volatile ("pminh " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("pminh " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         pinteh(rs, rt, rd) \ | ||||
|         asm volatile ("pinteh  " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("pinteh  " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         paddh(rs, rt, rd) \ | ||||
|         asm volatile ("paddh  " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("paddh  " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         psubh(rs, rt, rd) \ | ||||
|         asm volatile ("psubh  " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("psubh  " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #define         psrah(rt, sa, rd) \ | ||||
|         asm volatile ("psrah  " #rd ", " #rt ", %0" : : "i"(sa) ) | ||||
|         __asm__ volatile ("psrah  " #rd ", " #rt ", %0" : : "i"(sa) ) | ||||
|  | ||||
| #define         pmfhl_uw(rd) \ | ||||
|         asm volatile ("pmfhl.uw  " #rd) | ||||
|         __asm__ volatile ("pmfhl.uw  " #rd) | ||||
|  | ||||
| #define         pextlb(rs, rt, rd) \ | ||||
|         asm volatile ("pextlb  " #rd ", " #rs ", " #rt ) | ||||
|         __asm__ volatile ("pextlb  " #rd ", " #rs ", " #rt ) | ||||
|  | ||||
| #endif /* AVCODEC_PS2_MMI_H */ | ||||
|   | ||||
| @@ -50,7 +50,7 @@ static void dct_unquantize_h263_mmi(MpegEncContext *s, | ||||
|         nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; | ||||
|     } | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "add    $14, $0, %3     \n\t" | ||||
|         "pcpyld $8, %0, %0      \n\t" | ||||
|         "pcpyh  $8, $8          \n\t"   //r8 = qmul | ||||
|   | ||||
| @@ -28,7 +28,7 @@ static void memzero_align8(void *dst,size_t size) | ||||
| #if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) | ||||
|         (char*)dst+=size; | ||||
|         size/=8*4; | ||||
|         asm( | ||||
|         __asm__( | ||||
| #if defined(__SH4__) | ||||
|         " fschg\n"  //single float mode | ||||
| #endif | ||||
|   | ||||
| @@ -54,7 +54,7 @@ static const float odd_table[] __attribute__ ((aligned(8))) = { | ||||
| #if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) | ||||
|  | ||||
| #define         load_matrix(table) \ | ||||
|         asm volatile( \ | ||||
|         __asm__ volatile( \ | ||||
|         "       fschg\n" \ | ||||
|         "       fmov   @%0+,xd0\n" \ | ||||
|         "       fmov   @%0+,xd2\n" \ | ||||
| @@ -71,15 +71,15 @@ static const float odd_table[] __attribute__ ((aligned(8))) = { | ||||
|         ) | ||||
|  | ||||
| #define         ftrv() \ | ||||
|                 asm volatile("ftrv xmtrx,fv0" \ | ||||
|                 __asm__ volatile("ftrv xmtrx,fv0" \ | ||||
|                 : "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \ | ||||
|                 :  "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) ); | ||||
|  | ||||
| #define         DEFREG        \ | ||||
|         register float fr0 asm("fr0"); \ | ||||
|         register float fr1 asm("fr1"); \ | ||||
|         register float fr2 asm("fr2"); \ | ||||
|         register float fr3 asm("fr3") | ||||
|         register float fr0 __asm__("fr0"); \ | ||||
|         register float fr1 __asm__("fr1"); \ | ||||
|         register float fr2 __asm__("fr2"); \ | ||||
|         register float fr3 __asm__("fr3") | ||||
|  | ||||
| #else | ||||
|  | ||||
|   | ||||
| @@ -59,11 +59,11 @@ | ||||
|  | ||||
| /* signed 16x16 -> 32 multiply add accumulate */ | ||||
| #define MAC16(rt, ra, rb) \ | ||||
|     asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); | ||||
|     __asm__ ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); | ||||
|  | ||||
| /* signed 16x16 -> 32 multiply */ | ||||
| #define MUL16(rt, ra, rb) \ | ||||
|     asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb)); | ||||
|     __asm__ ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb)); | ||||
|  | ||||
| #else | ||||
|  | ||||
|   | ||||
| @@ -388,7 +388,7 @@ inline void ff_simple_idct_vis(DCTELEM *data) { | ||||
|     int out1, out2, out3, out4; | ||||
|     DECLARE_ALIGNED_8(int16_t, temp[8*8]); | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         INIT_IDCT | ||||
|  | ||||
| #define ADDROUNDER | ||||
| @@ -428,7 +428,7 @@ void ff_simple_idct_put_vis(uint8_t *dest, int line_size, DCTELEM *data) { | ||||
|     int out1, out2, out3, out4, out5; | ||||
|     int r1, r2, r3, r4, r5, r6, r7; | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "wr %%g0, 0x8, %%gsr \n\t" | ||||
|  | ||||
|         INIT_IDCT | ||||
| @@ -478,7 +478,7 @@ void ff_simple_idct_add_vis(uint8_t *dest, int line_size, DCTELEM *data) { | ||||
|     int out1, out2, out3, out4, out5, out6; | ||||
|     int r1, r2, r3, r4, r5, r6, r7; | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "wr %%g0, 0x8, %%gsr \n\t" | ||||
|  | ||||
|         INIT_IDCT | ||||
|   | ||||
| @@ -55,97 +55,97 @@ | ||||
| #define vis_rd_d(X)     (vis_dreg(X) << 25) | ||||
|  | ||||
| #define vis_ss2s(opf,rs1,rs2,rd) \ | ||||
|         asm volatile (".word %0" \ | ||||
|         __asm__ volatile (".word %0" \ | ||||
|                               : : "i" (vis_opc_base | vis_opf(opf) | \ | ||||
|                                        vis_rs1_s(rs1) | \ | ||||
|                                        vis_rs2_s(rs2) | \ | ||||
|                                        vis_rd_s(rd))) | ||||
|  | ||||
| #define vis_dd2d(opf,rs1,rs2,rd) \ | ||||
|         asm volatile (".word %0" \ | ||||
|         __asm__ volatile (".word %0" \ | ||||
|                               : : "i" (vis_opc_base | vis_opf(opf) | \ | ||||
|                                        vis_rs1_d(rs1) | \ | ||||
|                                        vis_rs2_d(rs2) | \ | ||||
|                                        vis_rd_d(rd))) | ||||
|  | ||||
| #define vis_ss2d(opf,rs1,rs2,rd) \ | ||||
|         asm volatile (".word %0" \ | ||||
|         __asm__ volatile (".word %0" \ | ||||
|                               : : "i" (vis_opc_base | vis_opf(opf) | \ | ||||
|                                        vis_rs1_s(rs1) | \ | ||||
|                                        vis_rs2_s(rs2) | \ | ||||
|                                        vis_rd_d(rd))) | ||||
|  | ||||
| #define vis_sd2d(opf,rs1,rs2,rd) \ | ||||
|         asm volatile (".word %0" \ | ||||
|         __asm__ volatile (".word %0" \ | ||||
|                               : : "i" (vis_opc_base | vis_opf(opf) | \ | ||||
|                                        vis_rs1_s(rs1) | \ | ||||
|                                        vis_rs2_d(rs2) | \ | ||||
|                                        vis_rd_d(rd))) | ||||
|  | ||||
| #define vis_d2s(opf,rs2,rd) \ | ||||
|         asm volatile (".word %0" \ | ||||
|         __asm__ volatile (".word %0" \ | ||||
|                               : : "i" (vis_opc_base | vis_opf(opf) | \ | ||||
|                                        vis_rs2_d(rs2) | \ | ||||
|                                        vis_rd_s(rd))) | ||||
|  | ||||
| #define vis_s2d(opf,rs2,rd) \ | ||||
|         asm volatile (".word %0" \ | ||||
|         __asm__ volatile (".word %0" \ | ||||
|                               : : "i" (vis_opc_base | vis_opf(opf) | \ | ||||
|                                        vis_rs2_s(rs2) | \ | ||||
|                                        vis_rd_d(rd))) | ||||
|  | ||||
| #define vis_d12d(opf,rs1,rd) \ | ||||
|         asm volatile (".word %0" \ | ||||
|         __asm__ volatile (".word %0" \ | ||||
|                               : : "i" (vis_opc_base | vis_opf(opf) | \ | ||||
|                                        vis_rs1_d(rs1) | \ | ||||
|                                        vis_rd_d(rd))) | ||||
|  | ||||
| #define vis_d22d(opf,rs2,rd) \ | ||||
|         asm volatile (".word %0" \ | ||||
|         __asm__ volatile (".word %0" \ | ||||
|                               : : "i" (vis_opc_base | vis_opf(opf) | \ | ||||
|                                        vis_rs2_d(rs2) | \ | ||||
|                                        vis_rd_d(rd))) | ||||
|  | ||||
| #define vis_s12s(opf,rs1,rd) \ | ||||
|         asm volatile (".word %0" \ | ||||
|         __asm__ volatile (".word %0" \ | ||||
|                               : : "i" (vis_opc_base | vis_opf(opf) | \ | ||||
|                                        vis_rs1_s(rs1) | \ | ||||
|                                        vis_rd_s(rd))) | ||||
|  | ||||
| #define vis_s22s(opf,rs2,rd) \ | ||||
|         asm volatile (".word %0" \ | ||||
|         __asm__ volatile (".word %0" \ | ||||
|                               : : "i" (vis_opc_base | vis_opf(opf) | \ | ||||
|                                        vis_rs2_s(rs2) | \ | ||||
|                                        vis_rd_s(rd))) | ||||
|  | ||||
| #define vis_s(opf,rd) \ | ||||
|         asm volatile (".word %0" \ | ||||
|         __asm__ volatile (".word %0" \ | ||||
|                               : : "i" (vis_opc_base | vis_opf(opf) | \ | ||||
|                                        vis_rd_s(rd))) | ||||
|  | ||||
| #define vis_d(opf,rd) \ | ||||
|         asm volatile (".word %0" \ | ||||
|         __asm__ volatile (".word %0" \ | ||||
|                               : : "i" (vis_opc_base | vis_opf(opf) | \ | ||||
|                                        vis_rd_d(rd))) | ||||
|  | ||||
| #define vis_r2m(op,rd,mem) \ | ||||
|         asm volatile (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) ) | ||||
|         __asm__ volatile (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) ) | ||||
|  | ||||
| #define vis_r2m_2(op,rd,mem1,mem2) \ | ||||
|         asm volatile (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) ) | ||||
|         __asm__ volatile (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) ) | ||||
|  | ||||
| #define vis_m2r(op,mem,rd) \ | ||||
|         asm volatile (#op "\t[%0], %%f" #rd : : "r" (&(mem)) ) | ||||
|         __asm__ volatile (#op "\t[%0], %%f" #rd : : "r" (&(mem)) ) | ||||
|  | ||||
| #define vis_m2r_2(op,mem1,mem2,rd) \ | ||||
|         asm volatile (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) ) | ||||
|         __asm__ volatile (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) ) | ||||
|  | ||||
| static inline void vis_set_gsr(unsigned int _val) | ||||
| { | ||||
|         register unsigned int val asm("g1"); | ||||
|         register unsigned int val __asm__("g1"); | ||||
|  | ||||
|         val = _val; | ||||
|         asm volatile(".word 0xa7804000" | ||||
|         __asm__ volatile(".word 0xa7804000" | ||||
|                              : : "r" (val)); | ||||
| } | ||||
|  | ||||
| @@ -164,9 +164,9 @@ static inline void vis_set_gsr(unsigned int _val) | ||||
| #define vis_st64_2(rs1,mem1,mem2)       vis_r2m_2(std, rs1, mem1, mem2) | ||||
|  | ||||
| #define vis_ldblk(mem, rd) \ | ||||
| do {        register void *__mem asm("g1"); \ | ||||
| do {        register void *__mem __asm__("g1"); \ | ||||
|         __mem = &(mem); \ | ||||
|         asm volatile(".word 0xc1985e00 | %1" \ | ||||
|         __asm__ volatile(".word 0xc1985e00 | %1" \ | ||||
|                              : \ | ||||
|                              : "r" (__mem), \ | ||||
|                                "i" (vis_rd_d(rd)) \ | ||||
| @@ -174,9 +174,9 @@ do {        register void *__mem asm("g1"); \ | ||||
| } while (0) | ||||
|  | ||||
| #define vis_stblk(rd, mem) \ | ||||
| do {        register void *__mem asm("g1"); \ | ||||
| do {        register void *__mem __asm__("g1"); \ | ||||
|         __mem = &(mem); \ | ||||
|         asm volatile(".word 0xc1b85e00 | %1" \ | ||||
|         __asm__ volatile(".word 0xc1b85e00 | %1" \ | ||||
|                              : \ | ||||
|                              : "r" (__mem), \ | ||||
|                                "i" (vis_rd_d(rd)) \ | ||||
| @@ -184,10 +184,10 @@ do {        register void *__mem asm("g1"); \ | ||||
| } while (0) | ||||
|  | ||||
| #define vis_membar_storestore()        \ | ||||
|         asm volatile(".word 0x8143e008" : : : "memory") | ||||
|         __asm__ volatile(".word 0x8143e008" : : : "memory") | ||||
|  | ||||
| #define vis_membar_sync()        \ | ||||
|         asm volatile(".word 0x8143e040" : : : "memory") | ||||
|         __asm__ volatile(".word 0x8143e040" : : : "memory") | ||||
|  | ||||
| /* 16 and 32 bit partitioned addition and subtraction.  The normal | ||||
|  * versions perform 4 16-bit or 2 32-bit additions or subtractions. | ||||
| @@ -226,11 +226,11 @@ do {        register void *__mem asm("g1"); \ | ||||
|  | ||||
| static inline void *vis_alignaddr(void *_ptr) | ||||
| { | ||||
|         register void *ptr asm("g1"); | ||||
|         register void *ptr __asm__("g1"); | ||||
|  | ||||
|         ptr = _ptr; | ||||
|  | ||||
|         asm volatile(".word %2" | ||||
|         __asm__ volatile(".word %2" | ||||
|                              : "=&r" (ptr) | ||||
|                              : "0" (ptr), | ||||
|                                "i" (vis_opc_base | vis_opf(0x18) | | ||||
| @@ -243,11 +243,11 @@ static inline void *vis_alignaddr(void *_ptr) | ||||
|  | ||||
| static inline void vis_alignaddr_g0(void *_ptr) | ||||
| { | ||||
|         register void *ptr asm("g1"); | ||||
|         register void *ptr __asm__("g1"); | ||||
|  | ||||
|         ptr = _ptr; | ||||
|  | ||||
|         asm volatile(".word %2" | ||||
|         __asm__ volatile(".word %2" | ||||
|                              : "=&r" (ptr) | ||||
|                              : "0" (ptr), | ||||
|                                "i" (vis_opc_base | vis_opf(0x18) | | ||||
| @@ -258,11 +258,11 @@ static inline void vis_alignaddr_g0(void *_ptr) | ||||
|  | ||||
| static inline void *vis_alignaddrl(void *_ptr) | ||||
| { | ||||
|         register void *ptr asm("g1"); | ||||
|         register void *ptr __asm__("g1"); | ||||
|  | ||||
|         ptr = _ptr; | ||||
|  | ||||
|         asm volatile(".word %2" | ||||
|         __asm__ volatile(".word %2" | ||||
|                              : "=&r" (ptr) | ||||
|                              : "0" (ptr), | ||||
|                                "i" (vis_opc_base | vis_opf(0x19) | | ||||
| @@ -275,11 +275,11 @@ static inline void *vis_alignaddrl(void *_ptr) | ||||
|  | ||||
| static inline void vis_alignaddrl_g0(void *_ptr) | ||||
| { | ||||
|         register void *ptr asm("g1"); | ||||
|         register void *ptr __asm__("g1"); | ||||
|  | ||||
|         ptr = _ptr; | ||||
|  | ||||
|         asm volatile(".word %2" | ||||
|         __asm__ volatile(".word %2" | ||||
|                              : "=&r" (ptr) | ||||
|                              : "0" (ptr), | ||||
|                                "i" (vis_opc_base | vis_opf(0x19) | | ||||
|   | ||||
| @@ -33,11 +33,11 @@ | ||||
| static av_always_inline av_const uint16_t bswap_16(uint16_t x) | ||||
| { | ||||
| #if defined(ARCH_X86) | ||||
|     asm("rorw $8, %0" : "+r"(x)); | ||||
|     __asm__("rorw $8, %0" : "+r"(x)); | ||||
| #elif defined(ARCH_SH4) | ||||
|     asm("swap.b %0,%0" : "=r"(x) : "0"(x)); | ||||
|     __asm__("swap.b %0,%0" : "=r"(x) : "0"(x)); | ||||
| #elif defined(HAVE_ARMV6) | ||||
|     asm("rev16 %0, %0" : "+r"(x)); | ||||
|     __asm__("rev16 %0, %0" : "+r"(x)); | ||||
| #else | ||||
|     x= (x>>8) | (x<<8); | ||||
| #endif | ||||
| @@ -48,30 +48,30 @@ static av_always_inline av_const uint32_t bswap_32(uint32_t x) | ||||
| { | ||||
| #if defined(ARCH_X86) | ||||
| #ifdef HAVE_BSWAP | ||||
|     asm("bswap   %0" : "+r" (x)); | ||||
|     __asm__("bswap   %0" : "+r" (x)); | ||||
| #else | ||||
|     asm("rorw    $8,  %w0 \n\t" | ||||
|     __asm__("rorw    $8,  %w0 \n\t" | ||||
|         "rorl    $16, %0  \n\t" | ||||
|         "rorw    $8,  %w0" | ||||
|         : "+r"(x)); | ||||
| #endif | ||||
| #elif defined(ARCH_SH4) | ||||
|     asm("swap.b %0,%0\n" | ||||
|     __asm__("swap.b %0,%0\n" | ||||
|         "swap.w %0,%0\n" | ||||
|         "swap.b %0,%0\n" | ||||
|         : "=r"(x) : "0"(x)); | ||||
| #elif defined(HAVE_ARMV6) | ||||
|     asm("rev %0, %0" : "+r"(x)); | ||||
|     __asm__("rev %0, %0" : "+r"(x)); | ||||
| #elif defined(ARCH_ARMV4L) | ||||
|     uint32_t t; | ||||
|     asm ("eor %1, %0, %0, ror #16 \n\t" | ||||
|     __asm__ ("eor %1, %0, %0, ror #16 \n\t" | ||||
|          "bic %1, %1, #0xFF0000   \n\t" | ||||
|          "mov %0, %0, ror #8      \n\t" | ||||
|          "eor %0, %0, %1, lsr #8  \n\t" | ||||
|          : "+r"(x), "+r"(t)); | ||||
| #elif defined(ARCH_BFIN) | ||||
|     unsigned tmp; | ||||
|     asm("%1 = %0 >> 8 (V);      \n\t" | ||||
|     __asm__("%1 = %0 >> 8 (V);      \n\t" | ||||
|         "%0 = %0 << 8 (V);      \n\t" | ||||
|         "%0 = %0 | %1;          \n\t" | ||||
|         "%0 = PACK(%0.L, %0.H); \n\t" | ||||
| @@ -90,7 +90,7 @@ static inline uint64_t av_const bswap_64(uint64_t x) | ||||
|     x= ((x<<16)&0xFFFF0000FFFF0000ULL) | ((x>>16)&0x0000FFFF0000FFFFULL); | ||||
|     return (x>>32) | (x<<32); | ||||
| #elif defined(ARCH_X86_64) | ||||
|   asm("bswap  %0": "=r" (x) : "0" (x)); | ||||
|   __asm__("bswap  %0": "=r" (x) : "0" (x)); | ||||
|   return x; | ||||
| #else | ||||
|     union { | ||||
|   | ||||
| @@ -154,7 +154,7 @@ static inline av_const int mid_pred(int a, int b, int c) | ||||
| { | ||||
| #ifdef HAVE_CMOV | ||||
|     int i=b; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "cmp    %2, %1 \n\t" | ||||
|         "cmovg  %1, %0 \n\t" | ||||
|         "cmovg  %2, %1 \n\t" | ||||
| @@ -327,7 +327,7 @@ static inline av_pure int ff_get_fourcc(const char *s){ | ||||
| static inline uint64_t read_time(void) | ||||
| { | ||||
|     uint64_t a, d; | ||||
|     asm volatile("rdtsc\n\t" | ||||
|     __asm__ volatile("rdtsc\n\t" | ||||
|                  : "=a" (a), "=d" (d)); | ||||
|     return (d << 32) | (a & 0xffffffff); | ||||
| } | ||||
| @@ -335,7 +335,7 @@ static inline uint64_t read_time(void) | ||||
| static inline long long read_time(void) | ||||
| { | ||||
|     long long l; | ||||
|     asm volatile("rdtsc\n\t" | ||||
|     __asm__ volatile("rdtsc\n\t" | ||||
|                  : "=A" (l)); | ||||
|     return l; | ||||
| } | ||||
| @@ -349,7 +349,7 @@ static inline uint64_t read_time(void) | ||||
|         } p; | ||||
|         unsigned long long c; | ||||
|     } t; | ||||
|     asm volatile ("%0=cycles; %1=cycles2;" : "=d" (t.p.lo), "=d" (t.p.hi)); | ||||
|     __asm__ volatile ("%0=cycles; %1=cycles2;" : "=d" (t.p.lo), "=d" (t.p.hi)); | ||||
|     return t.c; | ||||
| } | ||||
| #else //FIXME check ppc64 | ||||
| @@ -358,7 +358,7 @@ static inline uint64_t read_time(void) | ||||
|     uint32_t tbu, tbl, temp; | ||||
|  | ||||
|      /* from section 2.2.1 of the 32-bit PowerPC PEM */ | ||||
|      asm volatile( | ||||
|      __asm__ volatile( | ||||
|          "1:\n" | ||||
|          "mftbu  %2\n" | ||||
|          "mftb   %0\n" | ||||
|   | ||||
| @@ -130,7 +130,7 @@ extern const uint32_t ff_inverse[256]; | ||||
| #    define FASTDIV(a,b) \ | ||||
|     ({\ | ||||
|         int ret,dmy;\ | ||||
|         asm volatile(\ | ||||
|         __asm__ volatile(\ | ||||
|             "mull %3"\ | ||||
|             :"=d"(ret),"=a"(dmy)\ | ||||
|             :"1"(a),"g"(ff_inverse[b])\ | ||||
| @@ -141,7 +141,7 @@ extern const uint32_t ff_inverse[256]; | ||||
| static inline av_const int FASTDIV(int a, int b) | ||||
| { | ||||
|     int r; | ||||
|     asm volatile("cmp   %2, #0        \n\t" | ||||
|     __asm__ volatile("cmp   %2, #0        \n\t" | ||||
|                  "smmul %0, %1, %2    \n\t" | ||||
|                  "rsblt %0, %0, #0    \n\t" | ||||
|                  : "=r"(r) : "r"(a), "r"(ff_inverse[b])); | ||||
| @@ -151,7 +151,7 @@ static inline av_const int FASTDIV(int a, int b) | ||||
| #    define FASTDIV(a,b) \ | ||||
|     ({\ | ||||
|         int ret,dmy;\ | ||||
|         asm volatile(\ | ||||
|         __asm__ volatile(\ | ||||
|             "umull %1, %0, %2, %3"\ | ||||
|             :"=&r"(ret),"=&r"(dmy)\ | ||||
|             :"r"(a),"r"(ff_inverse[b])\ | ||||
| @@ -190,7 +190,7 @@ static inline av_const unsigned int ff_sqrt(unsigned int a) | ||||
|  | ||||
| #if defined(ARCH_X86) | ||||
| #define MASK_ABS(mask, level)\ | ||||
|             asm volatile(\ | ||||
|             __asm__ volatile(\ | ||||
|                 "cltd                   \n\t"\ | ||||
|                 "xorl %1, %0            \n\t"\ | ||||
|                 "subl %1, %0            \n\t"\ | ||||
| @@ -204,7 +204,7 @@ static inline av_const unsigned int ff_sqrt(unsigned int a) | ||||
|  | ||||
| #ifdef HAVE_CMOV | ||||
| #define COPY3_IF_LT(x,y,a,b,c,d)\ | ||||
| asm volatile (\ | ||||
| __asm__ volatile (\ | ||||
|     "cmpl %0, %3        \n\t"\ | ||||
|     "cmovl %3, %0       \n\t"\ | ||||
|     "cmovl %4, %1       \n\t"\ | ||||
|   | ||||
| @@ -156,28 +156,28 @@ static const char *replaceTable[]= | ||||
| #if defined(ARCH_X86) | ||||
| static inline void prefetchnta(void *p) | ||||
| { | ||||
|     asm volatile(   "prefetchnta (%0)\n\t" | ||||
|     __asm__ volatile(   "prefetchnta (%0)\n\t" | ||||
|         : : "r" (p) | ||||
|     ); | ||||
| } | ||||
|  | ||||
| static inline void prefetcht0(void *p) | ||||
| { | ||||
|     asm volatile(   "prefetcht0 (%0)\n\t" | ||||
|     __asm__ volatile(   "prefetcht0 (%0)\n\t" | ||||
|         : : "r" (p) | ||||
|     ); | ||||
| } | ||||
|  | ||||
| static inline void prefetcht1(void *p) | ||||
| { | ||||
|     asm volatile(   "prefetcht1 (%0)\n\t" | ||||
|     __asm__ volatile(   "prefetcht1 (%0)\n\t" | ||||
|         : : "r" (p) | ||||
|     ); | ||||
| } | ||||
|  | ||||
| static inline void prefetcht2(void *p) | ||||
| { | ||||
|     asm volatile(   "prefetcht2 (%0)\n\t" | ||||
|     __asm__ volatile(   "prefetcht2 (%0)\n\t" | ||||
|         : : "r" (p) | ||||
|     ); | ||||
| } | ||||
|   | ||||
| @@ -63,13 +63,13 @@ | ||||
| static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ | ||||
|     int numEq= 0, dcOk; | ||||
|     src+= stride*4; // src points to begin of the 8x8 Block | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq %0, %%mm7                         \n\t" | ||||
|         "movq %1, %%mm6                         \n\t" | ||||
|         : : "m" (c->mmxDcOffset[c->nonBQP]),  "m" (c->mmxDcThreshold[c->nonBQP]) | ||||
|         ); | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%2, %3), %%"REG_a"                \n\t" | ||||
| //      0       1       2       3       4       5       6       7       8       9 | ||||
| //      %1      eax     eax+%2  eax+2%2 %1+4%2  ecx     ecx+%2  ecx+2%2 %1+8%2  ecx+4%2 | ||||
| @@ -181,7 +181,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) | ||||
| { | ||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | ||||
|     src+= stride*3; | ||||
|     asm volatile(        //"movv %0 %1 %2\n\t" | ||||
|     __asm__ volatile(        //"movv %0 %1 %2\n\t" | ||||
|         "movq %2, %%mm0                         \n\t"  // QP,..., QP | ||||
|         "pxor %%mm4, %%mm4                      \n\t" | ||||
|  | ||||
| @@ -367,7 +367,7 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) | ||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | ||||
|     src+= stride*3; | ||||
| // FIXME rounding | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pxor %%mm7, %%mm7                      \n\t" // 0 | ||||
|         "movq "MANGLE(b80)", %%mm6              \n\t" // MIN_SIGNED_BYTE | ||||
|         "leal (%0, %1), %%"REG_a"               \n\t" | ||||
| @@ -465,7 +465,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) | ||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | ||||
|     src+= stride*3; | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pxor %%mm7, %%mm7                      \n\t" // 0 | ||||
|         "lea (%0, %1), %%"REG_a"                \n\t" | ||||
|         "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t" | ||||
| @@ -604,7 +604,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext | ||||
|     memcpy(tmp+8, src+stride*8, 8); | ||||
| */ | ||||
|     src+= stride*4; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|  | ||||
| #if 0 //slightly more accurate and slightly slower | ||||
|         "pxor %%mm7, %%mm7                      \n\t" // 0 | ||||
| @@ -871,7 +871,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext | ||||
| */ | ||||
| #elif defined (HAVE_MMX) | ||||
|     src+= stride*4; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pxor %%mm7, %%mm7                      \n\t" | ||||
|         "lea -40(%%"REG_SP"), %%"REG_c"         \n\t" // make space for 4 8-byte vars | ||||
|         "and "ALIGN_MASK", %%"REG_c"            \n\t" // align | ||||
| @@ -1147,7 +1147,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext | ||||
| static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) | ||||
| { | ||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "pxor %%mm6, %%mm6                      \n\t" | ||||
|         "pcmpeqb %%mm7, %%mm7                   \n\t" | ||||
|         "movq %2, %%mm0                         \n\t" | ||||
| @@ -1431,7 +1431,7 @@ DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, | ||||
|                 f= (f + 8)>>4; | ||||
|  | ||||
| #ifdef DEBUG_DERING_THRESHOLD | ||||
|                     asm volatile("emms\n\t":); | ||||
|                     __asm__ volatile("emms\n\t":); | ||||
|                     { | ||||
|                     static long long numPixels=0; | ||||
|                     if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++; | ||||
| @@ -1501,7 +1501,7 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid | ||||
| { | ||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | ||||
|     src+= 4*stride; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%0, %1), %%"REG_a"                \n\t" | ||||
|         "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t" | ||||
| //      0       1       2       3       4       5       6       7       8       9 | ||||
| @@ -1554,7 +1554,7 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride | ||||
| { | ||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | ||||
|     src+= stride*3; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%0, %1), %%"REG_a"                \n\t" | ||||
|         "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t" | ||||
|         "lea (%%"REG_d", %1, 4), %%"REG_c"      \n\t" | ||||
| @@ -1618,7 +1618,7 @@ static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp | ||||
| { | ||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | ||||
|     src+= stride*4; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%0, %1), %%"REG_a"                \n\t" | ||||
|         "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t" | ||||
|         "pxor %%mm7, %%mm7                      \n\t" | ||||
| @@ -1697,7 +1697,7 @@ static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp | ||||
| { | ||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | ||||
|     src+= stride*4; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%0, %1), %%"REG_a"                \n\t" | ||||
|         "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t" | ||||
|         "pxor %%mm7, %%mm7                      \n\t" | ||||
| @@ -1798,7 +1798,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin | ||||
| { | ||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | ||||
|     src+= 4*stride; | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%0, %1), %%"REG_a"                \n\t" | ||||
|         "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t" | ||||
| //      0       1       2       3       4       5       6       7       8       9 | ||||
| @@ -1900,7 +1900,7 @@ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) | ||||
| #ifdef HAVE_MMX | ||||
|     src+= 4*stride; | ||||
| #ifdef HAVE_MMX2 | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%0, %1), %%"REG_a"                \n\t" | ||||
|         "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t" | ||||
| //      0       1       2       3       4       5       6       7       8       9 | ||||
| @@ -1949,7 +1949,7 @@ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) | ||||
|     ); | ||||
|  | ||||
| #else // MMX without MMX2 | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%0, %1), %%"REG_a"                \n\t" | ||||
|         "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t" | ||||
| //      0       1       2       3       4       5       6       7       8       9 | ||||
| @@ -2018,7 +2018,7 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) | ||||
|  */ | ||||
| static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride) | ||||
| { | ||||
|     asm( | ||||
|     __asm__( | ||||
|         "lea (%0, %1), %%"REG_a"                \n\t" | ||||
| //      0       1       2       3       4       5       6       7       8       9 | ||||
| //      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1 %0+8%1  edx+4%1 | ||||
| @@ -2103,7 +2103,7 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src | ||||
|  */ | ||||
| static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src) | ||||
| { | ||||
|     asm( | ||||
|     __asm__( | ||||
|         "lea (%0, %1), %%"REG_a"                \n\t" | ||||
|         "lea (%%"REG_a",%1,4), %%"REG_d"        \n\t" | ||||
| //      0       1       2       3       4       5       6       7       8       9 | ||||
| @@ -2192,7 +2192,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | ||||
| #define FAST_L2_DIFF | ||||
| //#define L1_DIFF //u should change the thresholds too if u try that one | ||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%2, %2, 2), %%"REG_a"             \n\t" // 3*stride | ||||
|         "lea (%2, %2, 4), %%"REG_d"             \n\t" // 5*stride | ||||
|         "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" // 7*stride | ||||
| @@ -2575,13 +2575,13 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st | ||||
|     int64_t sums[10*8*2]; | ||||
|     src+= step*3; // src points to begin of the 8x8 Block | ||||
| //START_TIMER | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq %0, %%mm7                         \n\t" | ||||
|         "movq %1, %%mm6                         \n\t" | ||||
|         : : "m" (c->mmxDcOffset[c->nonBQP]),  "m" (c->mmxDcThreshold[c->nonBQP]) | ||||
|         ); | ||||
|  | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%2, %3), %%"REG_a"                \n\t" | ||||
| //      0       1       2       3       4       5       6       7       8       9 | ||||
| //      %1      eax     eax+%2  eax+2%2 %1+4%2  ecx     ecx+%2  ecx+2%2 %1+8%2  ecx+4%2 | ||||
| @@ -2686,7 +2686,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st | ||||
|         long offset= -8*step; | ||||
|         int64_t *temp_sums= sums; | ||||
|  | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movq %2, %%mm0                         \n\t"  // QP,..., QP | ||||
|             "pxor %%mm4, %%mm4                      \n\t" | ||||
|  | ||||
| @@ -2825,7 +2825,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st | ||||
|  | ||||
|         src+= step; // src points to begin of the 8x8 Block | ||||
|  | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "movq %4, %%mm6                         \n\t" | ||||
|             "pcmpeqb %%mm5, %%mm5                   \n\t" | ||||
|             "pxor %%mm6, %%mm5                      \n\t" | ||||
| @@ -2864,7 +2864,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st | ||||
|  | ||||
|     if(eq_mask != -1LL){ | ||||
|         uint8_t *temp_src= src; | ||||
|         asm volatile( | ||||
|         __asm__ volatile( | ||||
|             "pxor %%mm7, %%mm7                      \n\t" | ||||
|             "lea -40(%%"REG_SP"), %%"REG_c"         \n\t" // make space for 4 8-byte vars | ||||
|             "and "ALIGN_MASK", %%"REG_c"            \n\t" // align | ||||
| @@ -3121,7 +3121,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t | ||||
| #endif | ||||
|     if(levelFix){ | ||||
| #ifdef HAVE_MMX | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq (%%"REG_a"), %%mm2        \n\t" // packedYOffset | ||||
|         "movq 8(%%"REG_a"), %%mm3       \n\t" // packedYScale | ||||
|         "lea (%2,%4), %%"REG_a"         \n\t" | ||||
| @@ -3204,7 +3204,7 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2)) | ||||
| #endif //HAVE_MMX | ||||
|     }else{ | ||||
| #ifdef HAVE_MMX | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "lea (%0,%2), %%"REG_a"                 \n\t" | ||||
|         "lea (%1,%3), %%"REG_d"                 \n\t" | ||||
|  | ||||
| @@ -3244,7 +3244,7 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) | ||||
| static inline void RENAME(duplicate)(uint8_t src[], int stride) | ||||
| { | ||||
| #ifdef HAVE_MMX | ||||
|     asm volatile( | ||||
|     __asm__ volatile( | ||||
|         "movq (%0), %%mm0               \n\t" | ||||
|         "add %1, %0                     \n\t" | ||||
|         "movq %%mm0, (%0)               \n\t" | ||||
| @@ -3392,7 +3392,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | ||||
|             prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32); | ||||
| */ | ||||
|  | ||||
|             asm( | ||||
|             __asm__( | ||||
|                 "mov %4, %%"REG_a"              \n\t" | ||||
|                 "shr $2, %%"REG_a"              \n\t" | ||||
|                 "and $6, %%"REG_a"              \n\t" | ||||
| @@ -3508,7 +3508,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | ||||
|             } | ||||
|             c.QP= QP; | ||||
| #ifdef HAVE_MMX | ||||
|             asm volatile( | ||||
|             __asm__ volatile( | ||||
|                 "movd %1, %%mm7         \n\t" | ||||
|                 "packuswb %%mm7, %%mm7  \n\t" // 0, 0, 0, QP, 0, 0, 0, QP | ||||
|                 "packuswb %%mm7, %%mm7  \n\t" // 0,QP, 0, QP, 0,QP, 0, QP | ||||
| @@ -3528,7 +3528,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | ||||
|             prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32); | ||||
| */ | ||||
|  | ||||
|             asm( | ||||
|             __asm__( | ||||
|                 "mov %4, %%"REG_a"              \n\t" | ||||
|                 "shr $2, %%"REG_a"              \n\t" | ||||
|                 "and $6, %%"REG_a"              \n\t" | ||||
| @@ -3700,9 +3700,9 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | ||||
|         }*/ | ||||
|     } | ||||
| #ifdef HAVE_3DNOW | ||||
|     asm volatile("femms"); | ||||
|     __asm__ volatile("femms"); | ||||
| #elif defined (HAVE_MMX) | ||||
|     asm volatile("emms"); | ||||
|     __asm__ volatile("emms"); | ||||
| #endif | ||||
|  | ||||
| #ifdef DEBUG_BRIGHTNESS | ||||
|   | ||||
		Reference in New Issue
	
	Block a user