You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	asm: FF_-prefix internal macros used in inline assembly
See merge commit '39d6d3618d48625decaff7d9bdbb45b44ef2a805'.
This commit is contained in:
		| @@ -45,7 +45,7 @@ | ||||
| #define END_CHECK(end) "" | ||||
| #else | ||||
| #define END_CHECK(end) \ | ||||
|         "cmp    "end"       , %%"REG_c"                                 \n\t"\ | ||||
|         "cmp    "end"       , %%"FF_REG_c"                              \n\t"\ | ||||
|         "jge    1f                                                      \n\t" | ||||
| #endif | ||||
|  | ||||
| @@ -92,11 +92,11 @@ | ||||
|         "mov    "tmpbyte"   , "statep"                                  \n\t"\ | ||||
|         "test   "lowword"   , "lowword"                                 \n\t"\ | ||||
|         "jnz    2f                                                      \n\t"\ | ||||
|         "mov    "byte"      , %%"REG_c"                                 \n\t"\ | ||||
|         "mov    "byte"      , %%"FF_REG_c"                              \n\t"\ | ||||
|         END_CHECK(end)\ | ||||
|         "add"OPSIZE" $2     , "byte"                                    \n\t"\ | ||||
|         "add"FF_OPSIZE" $2  , "byte"                                    \n\t"\ | ||||
|         "1:                                                             \n\t"\ | ||||
|         "movzwl (%%"REG_c") , "tmp"                                     \n\t"\ | ||||
|         "movzwl (%%"FF_REG_c") , "tmp"                                  \n\t"\ | ||||
|         "lea    -1("low")   , %%ecx                                     \n\t"\ | ||||
|         "xor    "low"       , %%ecx                                     \n\t"\ | ||||
|         "shr    $15         , %%ecx                                     \n\t"\ | ||||
| @@ -153,11 +153,11 @@ | ||||
|         "mov    "tmpbyte"   , "statep"                                  \n\t"\ | ||||
|         "test   "lowword"   , "lowword"                                 \n\t"\ | ||||
|         " jnz   2f                                                      \n\t"\ | ||||
|         "mov    "byte"      , %%"REG_c"                                 \n\t"\ | ||||
|         "mov    "byte"      , %%"FF_REG_c"                              \n\t"\ | ||||
|         END_CHECK(end)\ | ||||
|         "add"OPSIZE" $2     , "byte"                                    \n\t"\ | ||||
|         "add"FF_OPSIZE" $2  , "byte"                                    \n\t"\ | ||||
|         "1:                                                             \n\t"\ | ||||
|         "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\ | ||||
|         "movzwl (%%"FF_REG_c") , "tmp"                                  \n\t"\ | ||||
|         "lea    -1("low")   , %%ecx                                     \n\t"\ | ||||
|         "xor    "low"       , %%ecx                                     \n\t"\ | ||||
|         "shr    $15         , %%ecx                                     \n\t"\ | ||||
| @@ -203,7 +203,7 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c, | ||||
|           "i"(offsetof(CABACContext, bytestream_end)) | ||||
|           TABLES_ARG | ||||
|           ,"1"(c->low), "2"(c->range) | ||||
|         : "%"REG_c, "memory" | ||||
|         : "%"FF_REG_c, "memory" | ||||
|     ); | ||||
|     return bit & 1; | ||||
| } | ||||
| @@ -240,7 +240,7 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val) | ||||
|         "addl          %%edx, %%eax     \n\t" | ||||
|         "cmp         %c5(%2), %1        \n\t" | ||||
|         "jge              1f            \n\t" | ||||
|         "add"OPSIZE"      $2, %c4(%2)   \n\t" | ||||
|         "add"FF_OPSIZE"   $2, %c4(%2)   \n\t" | ||||
| #endif | ||||
|         "1:                             \n\t" | ||||
|         "movl          %%eax, %c3(%2)   \n\t" | ||||
| @@ -281,7 +281,7 @@ static av_always_inline int get_cabac_bypass_x86(CABACContext *c) | ||||
|         "addl          %%ecx, %%eax     \n\t" | ||||
|         "cmp         %c5(%2), %1        \n\t" | ||||
|         "jge              1f            \n\t" | ||||
|         "add"OPSIZE"      $2, %c4(%2)   \n\t" | ||||
|         "add"FF_OPSIZE"   $2, %c4(%2)   \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movl          %%eax, %c3(%2)   \n\t" | ||||
|  | ||||
|   | ||||
| @@ -91,13 +91,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | ||||
|         "sub  %10, %1                           \n\t" | ||||
|         "mov  %2, %0                            \n\t" | ||||
|         "movl %7, %%ecx                         \n\t" | ||||
|         "add  %1, %%"REG_c"                     \n\t" | ||||
|         "add  %1, %%"FF_REG_c"                  \n\t" | ||||
|         "movl %%ecx, (%0)                       \n\t" | ||||
|  | ||||
|         "test $1, %4                            \n\t" | ||||
|         " jnz 5f                                \n\t" | ||||
|  | ||||
|         "add"OPSIZE"  $4, %2                    \n\t" | ||||
|         "add"FF_OPSIZE"  $4, %2                 \n\t" | ||||
|  | ||||
|         "4:                                     \n\t" | ||||
|         "add  $1, %1                            \n\t" | ||||
| @@ -105,7 +105,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | ||||
|         " jb 3b                                 \n\t" | ||||
|         "mov  %2, %0                            \n\t" | ||||
|         "movl %7, %%ecx                         \n\t" | ||||
|         "add  %1, %%"REG_c"                     \n\t" | ||||
|         "add  %1, %%"FF_REG_c"                  \n\t" | ||||
|         "movl %%ecx, (%0)                       \n\t" | ||||
|         "5:                                     \n\t" | ||||
|         "add  %9, %k0                           \n\t" | ||||
| @@ -116,7 +116,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | ||||
|           "i"(offsetof(CABACContext, bytestream)), | ||||
|           "i"(offsetof(CABACContext, bytestream_end)) | ||||
|           TABLES_ARG | ||||
|         : "%"REG_c, "memory" | ||||
|         : "%"FF_REG_c, "memory" | ||||
|     ); | ||||
|     return coeff_count; | ||||
| } | ||||
| @@ -183,7 +183,7 @@ static int decode_significance_8x8_x86(CABACContext *c, | ||||
|         "test $1, %4                            \n\t" | ||||
|         " jnz 5f                                \n\t" | ||||
|  | ||||
|         "add"OPSIZE"  $4, %2                    \n\t" | ||||
|         "add"FF_OPSIZE"  $4, %2                 \n\t" | ||||
|  | ||||
|         "4:                                     \n\t" | ||||
|         "add $1, %6                             \n\t" | ||||
| @@ -202,7 +202,7 @@ static int decode_significance_8x8_x86(CABACContext *c, | ||||
|           "i"(offsetof(CABACContext, bytestream)), | ||||
|           "i"(offsetof(CABACContext, bytestream_end)), | ||||
|           "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG | ||||
|         : "%"REG_c, "memory" | ||||
|         : "%"FF_REG_c, "memory" | ||||
|     ); | ||||
|     return coeff_count; | ||||
| } | ||||
|   | ||||
| @@ -32,7 +32,7 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels | ||||
| { | ||||
|     MOVQ_BFE(mm6); | ||||
|     __asm__ volatile( | ||||
|         "lea    (%3, %3), %%"REG_a"     \n\t" | ||||
|         "lea    (%3, %3), %%"FF_REG_a"  \n\t" | ||||
|         ".p2align 3                     \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq   (%1), %%mm0             \n\t" | ||||
| @@ -42,8 +42,8 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels | ||||
|         PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5) | ||||
|         "movq   %%mm4, (%2)             \n\t" | ||||
|         "movq   %%mm5, (%2, %3)         \n\t" | ||||
|         "add    %%"REG_a", %1           \n\t" | ||||
|         "add    %%"REG_a", %2           \n\t" | ||||
|         "add    %%"FF_REG_a", %1        \n\t" | ||||
|         "add    %%"FF_REG_a", %2        \n\t" | ||||
|         "movq   (%1), %%mm0             \n\t" | ||||
|         "movq   1(%1), %%mm1            \n\t" | ||||
|         "movq   (%1, %3), %%mm2         \n\t" | ||||
| @@ -51,20 +51,20 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels | ||||
|         PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5) | ||||
|         "movq   %%mm4, (%2)             \n\t" | ||||
|         "movq   %%mm5, (%2, %3)         \n\t" | ||||
|         "add    %%"REG_a", %1           \n\t" | ||||
|         "add    %%"REG_a", %2           \n\t" | ||||
|         "add    %%"FF_REG_a", %1        \n\t" | ||||
|         "add    %%"FF_REG_a", %2        \n\t" | ||||
|         "subl   $4, %0                  \n\t" | ||||
|         "jnz    1b                      \n\t" | ||||
|         :"+g"(h), "+S"(pixels), "+D"(block) | ||||
|         :"r"((x86_reg)line_size) | ||||
|         :REG_a, "memory"); | ||||
|         :FF_REG_a, "memory"); | ||||
| } | ||||
|  | ||||
| av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | ||||
| { | ||||
|     MOVQ_BFE(mm6); | ||||
|     __asm__ volatile( | ||||
|         "lea        (%3, %3), %%"REG_a" \n\t" | ||||
|         "lea    (%3, %3), %%"FF_REG_a"  \n\t" | ||||
|         ".p2align 3                     \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq   (%1), %%mm0             \n\t" | ||||
| @@ -81,8 +81,8 @@ av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixel | ||||
|         PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5) | ||||
|         "movq   %%mm4, 8(%2)            \n\t" | ||||
|         "movq   %%mm5, 8(%2, %3)        \n\t" | ||||
|         "add    %%"REG_a", %1           \n\t" | ||||
|         "add    %%"REG_a", %2           \n\t" | ||||
|         "add    %%"FF_REG_a", %1        \n\t" | ||||
|         "add    %%"FF_REG_a", %2        \n\t" | ||||
|         "movq   (%1), %%mm0             \n\t" | ||||
|         "movq   1(%1), %%mm1            \n\t" | ||||
|         "movq   (%1, %3), %%mm2         \n\t" | ||||
| @@ -97,42 +97,42 @@ av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixel | ||||
|         PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5) | ||||
|         "movq   %%mm4, 8(%2)            \n\t" | ||||
|         "movq   %%mm5, 8(%2, %3)        \n\t" | ||||
|         "add    %%"REG_a", %1           \n\t" | ||||
|         "add    %%"REG_a", %2           \n\t" | ||||
|         "add    %%"FF_REG_a", %1        \n\t" | ||||
|         "add    %%"FF_REG_a", %2        \n\t" | ||||
|         "subl   $4, %0                  \n\t" | ||||
|         "jnz    1b                      \n\t" | ||||
|         :"+g"(h), "+S"(pixels), "+D"(block) | ||||
|         :"r"((x86_reg)line_size) | ||||
|         :REG_a, "memory"); | ||||
|         :FF_REG_a, "memory"); | ||||
| } | ||||
|  | ||||
| av_unused static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | ||||
| { | ||||
|     MOVQ_BFE(mm6); | ||||
|     __asm__ volatile( | ||||
|         "lea (%3, %3), %%"REG_a"        \n\t" | ||||
|         "lea (%3, %3), %%"FF_REG_a"     \n\t" | ||||
|         "movq (%1), %%mm0               \n\t" | ||||
|         ".p2align 3                     \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq   (%1, %3), %%mm1         \n\t" | ||||
|         "movq   (%1, %%"REG_a"),%%mm2   \n\t" | ||||
|         "movq   (%1, %%"FF_REG_a"),%%mm2\n\t" | ||||
|         PAVGBP(%%mm1, %%mm0, %%mm4,   %%mm2, %%mm1, %%mm5) | ||||
|         "movq   %%mm4, (%2)             \n\t" | ||||
|         "movq   %%mm5, (%2, %3)         \n\t" | ||||
|         "add    %%"REG_a", %1           \n\t" | ||||
|         "add    %%"REG_a", %2           \n\t" | ||||
|         "add    %%"FF_REG_a", %1        \n\t" | ||||
|         "add    %%"FF_REG_a", %2        \n\t" | ||||
|         "movq   (%1, %3), %%mm1         \n\t" | ||||
|         "movq   (%1, %%"REG_a"),%%mm0   \n\t" | ||||
|         "movq   (%1, %%"FF_REG_a"),%%mm0\n\t" | ||||
|         PAVGBP(%%mm1, %%mm2, %%mm4,   %%mm0, %%mm1, %%mm5) | ||||
|         "movq   %%mm4, (%2)             \n\t" | ||||
|         "movq   %%mm5, (%2, %3)         \n\t" | ||||
|         "add    %%"REG_a", %1           \n\t" | ||||
|         "add    %%"REG_a", %2           \n\t" | ||||
|         "add    %%"FF_REG_a", %1        \n\t" | ||||
|         "add    %%"FF_REG_a", %2        \n\t" | ||||
|         "subl   $4, %0                  \n\t" | ||||
|         "jnz    1b                      \n\t" | ||||
|         :"+g"(h), "+S"(pixels), "+D"(block) | ||||
|         :"r"((x86_reg)line_size) | ||||
|         :REG_a, "memory"); | ||||
|         :FF_REG_a, "memory"); | ||||
| } | ||||
|  | ||||
| av_unused static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | ||||
| @@ -166,12 +166,12 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels | ||||
| { | ||||
|     MOVQ_BFE(mm6); | ||||
|     __asm__ volatile( | ||||
|         "lea    (%3, %3), %%"REG_a"     \n\t" | ||||
|         "lea    (%3, %3), %%"FF_REG_a"  \n\t" | ||||
|         "movq   (%1), %%mm0             \n\t" | ||||
|         ".p2align 3                     \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq   (%1, %3), %%mm1         \n\t" | ||||
|         "movq   (%1, %%"REG_a"), %%mm2  \n\t" | ||||
|         "movq   (%1, %%"FF_REG_a"), %%mm2 \n\t" | ||||
|         PAVGBP(%%mm1, %%mm0, %%mm4,   %%mm2, %%mm1, %%mm5) | ||||
|         "movq   (%2), %%mm3             \n\t" | ||||
|         PAVGB_MMX(%%mm3, %%mm4, %%mm0, %%mm6) | ||||
| @@ -179,11 +179,11 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels | ||||
|         PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6) | ||||
|         "movq   %%mm0, (%2)             \n\t" | ||||
|         "movq   %%mm1, (%2, %3)         \n\t" | ||||
|         "add    %%"REG_a", %1           \n\t" | ||||
|         "add    %%"REG_a", %2           \n\t" | ||||
|         "add    %%"FF_REG_a", %1        \n\t" | ||||
|         "add    %%"FF_REG_a", %2        \n\t" | ||||
|  | ||||
|         "movq   (%1, %3), %%mm1         \n\t" | ||||
|         "movq   (%1, %%"REG_a"), %%mm0  \n\t" | ||||
|         "movq   (%1, %%"FF_REG_a"), %%mm0 \n\t" | ||||
|         PAVGBP(%%mm1, %%mm2, %%mm4,   %%mm0, %%mm1, %%mm5) | ||||
|         "movq   (%2), %%mm3             \n\t" | ||||
|         PAVGB_MMX(%%mm3, %%mm4, %%mm2, %%mm6) | ||||
| @@ -191,12 +191,12 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels | ||||
|         PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6) | ||||
|         "movq   %%mm2, (%2)             \n\t" | ||||
|         "movq   %%mm1, (%2, %3)         \n\t" | ||||
|         "add    %%"REG_a", %1           \n\t" | ||||
|         "add    %%"REG_a", %2           \n\t" | ||||
|         "add    %%"FF_REG_a", %1        \n\t" | ||||
|         "add    %%"FF_REG_a", %2        \n\t" | ||||
|  | ||||
|         "subl   $4, %0                  \n\t" | ||||
|         "jnz    1b                      \n\t" | ||||
|         :"+g"(h), "+S"(pixels), "+D"(block) | ||||
|         :"r"((x86_reg)line_size) | ||||
|         :REG_a, "memory"); | ||||
|         :FF_REG_a, "memory"); | ||||
| } | ||||
|   | ||||
| @@ -283,15 +283,15 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, | ||||
|     __asm__ volatile ( | ||||
|         ".p2align 4                     \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq (%1, %%"REG_a"), %%mm0    \n\t" | ||||
|         "movq (%2, %%"REG_a"), %%mm2    \n\t" | ||||
|         "movq (%2, %%"REG_a"), %%mm4    \n\t" | ||||
|         "add %3, %%"REG_a"              \n\t" | ||||
|         "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" | ||||
|         "movq (%2, %%"FF_REG_a"), %%mm2 \n\t" | ||||
|         "movq (%2, %%"FF_REG_a"), %%mm4 \n\t" | ||||
|         "add %3, %%"FF_REG_a"           \n\t" | ||||
|         "psubusb %%mm0, %%mm2           \n\t" | ||||
|         "psubusb %%mm4, %%mm0           \n\t" | ||||
|         "movq (%1, %%"REG_a"), %%mm1    \n\t" | ||||
|         "movq (%2, %%"REG_a"), %%mm3    \n\t" | ||||
|         "movq (%2, %%"REG_a"), %%mm5    \n\t" | ||||
|         "movq (%1, %%"FF_REG_a"), %%mm1 \n\t" | ||||
|         "movq (%2, %%"FF_REG_a"), %%mm3 \n\t" | ||||
|         "movq (%2, %%"FF_REG_a"), %%mm5 \n\t" | ||||
|         "psubusb %%mm1, %%mm3           \n\t" | ||||
|         "psubusb %%mm5, %%mm1           \n\t" | ||||
|         "por %%mm2, %%mm0               \n\t" | ||||
| @@ -306,7 +306,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, | ||||
|         "paddw %%mm3, %%mm2             \n\t" | ||||
|         "paddw %%mm2, %%mm0             \n\t" | ||||
|         "paddw %%mm0, %%mm6             \n\t" | ||||
|         "add %3, %%"REG_a"              \n\t" | ||||
|         "add %3, %%"FF_REG_a"           \n\t" | ||||
|         " js 1b                         \n\t" | ||||
|         : "+a" (len) | ||||
|         : "r" (blk1 - len), "r" (blk2 - len), "r" (stride)); | ||||
| @@ -319,18 +319,18 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, | ||||
|     __asm__ volatile ( | ||||
|         ".p2align 4                     \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq (%1, %%"REG_a"), %%mm0    \n\t" | ||||
|         "movq (%2, %%"REG_a"), %%mm1    \n\t" | ||||
|         "movq (%1, %%"REG_a"), %%mm2    \n\t" | ||||
|         "movq (%2, %%"REG_a"), %%mm3    \n\t" | ||||
|         "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" | ||||
|         "movq (%2, %%"FF_REG_a"), %%mm1 \n\t" | ||||
|         "movq (%1, %%"FF_REG_a"), %%mm2 \n\t" | ||||
|         "movq (%2, %%"FF_REG_a"), %%mm3 \n\t" | ||||
|         "punpcklbw %%mm7, %%mm0         \n\t" | ||||
|         "punpcklbw %%mm7, %%mm1         \n\t" | ||||
|         "punpckhbw %%mm7, %%mm2         \n\t" | ||||
|         "punpckhbw %%mm7, %%mm3         \n\t" | ||||
|         "paddw %%mm0, %%mm1             \n\t" | ||||
|         "paddw %%mm2, %%mm3             \n\t" | ||||
|         "movq (%3, %%"REG_a"), %%mm4    \n\t" | ||||
|         "movq (%3, %%"REG_a"), %%mm2    \n\t" | ||||
|         "movq (%3, %%"FF_REG_a"), %%mm4 \n\t" | ||||
|         "movq (%3, %%"FF_REG_a"), %%mm2 \n\t" | ||||
|         "paddw %%mm5, %%mm1             \n\t" | ||||
|         "paddw %%mm5, %%mm3             \n\t" | ||||
|         "psrlw $1, %%mm1                \n\t" | ||||
| @@ -344,7 +344,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, | ||||
|         "punpckhbw %%mm7, %%mm1         \n\t" | ||||
|         "paddw %%mm1, %%mm0             \n\t" | ||||
|         "paddw %%mm0, %%mm6             \n\t" | ||||
|         "add %4, %%"REG_a"              \n\t" | ||||
|         "add %4, %%"FF_REG_a"           \n\t" | ||||
|         " js 1b                         \n\t" | ||||
|         : "+a" (len) | ||||
|         : "r" (blk1a - len), "r" (blk1b - len), "r" (blk2 - len), | ||||
| @@ -356,8 +356,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, | ||||
| { | ||||
|     x86_reg len = -stride * h; | ||||
|     __asm__ volatile ( | ||||
|         "movq  (%1, %%"REG_a"), %%mm0   \n\t" | ||||
|         "movq 1(%1, %%"REG_a"), %%mm2   \n\t" | ||||
|         "movq  (%1, %%"FF_REG_a"), %%mm0\n\t" | ||||
|         "movq 1(%1, %%"FF_REG_a"), %%mm2\n\t" | ||||
|         "movq %%mm0, %%mm1              \n\t" | ||||
|         "movq %%mm2, %%mm3              \n\t" | ||||
|         "punpcklbw %%mm7, %%mm0         \n\t" | ||||
| @@ -368,8 +368,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, | ||||
|         "paddw %%mm3, %%mm1             \n\t" | ||||
|         ".p2align 4                     \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq  (%2, %%"REG_a"), %%mm2   \n\t" | ||||
|         "movq 1(%2, %%"REG_a"), %%mm4   \n\t" | ||||
|         "movq  (%2, %%"FF_REG_a"), %%mm2\n\t" | ||||
|         "movq 1(%2, %%"FF_REG_a"), %%mm4\n\t" | ||||
|         "movq %%mm2, %%mm3              \n\t" | ||||
|         "movq %%mm4, %%mm5              \n\t" | ||||
|         "punpcklbw %%mm7, %%mm2         \n\t" | ||||
| @@ -383,8 +383,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, | ||||
|         "paddw %%mm3, %%mm1             \n\t" | ||||
|         "paddw %%mm5, %%mm0             \n\t" | ||||
|         "paddw %%mm5, %%mm1             \n\t" | ||||
|         "movq (%3, %%"REG_a"), %%mm4    \n\t" | ||||
|         "movq (%3, %%"REG_a"), %%mm5    \n\t" | ||||
|         "movq (%3, %%"FF_REG_a"), %%mm4 \n\t" | ||||
|         "movq (%3, %%"FF_REG_a"), %%mm5 \n\t" | ||||
|         "psrlw $2, %%mm0                \n\t" | ||||
|         "psrlw $2, %%mm1                \n\t" | ||||
|         "packuswb %%mm1, %%mm0          \n\t" | ||||
| @@ -398,7 +398,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, | ||||
|         "paddw %%mm4, %%mm6             \n\t" | ||||
|         "movq  %%mm2, %%mm0             \n\t" | ||||
|         "movq  %%mm3, %%mm1             \n\t" | ||||
|         "add %4, %%"REG_a"              \n\t" | ||||
|         "add %4, %%"FF_REG_a"           \n\t" | ||||
|         " js 1b                         \n\t" | ||||
|         : "+a" (len) | ||||
|         : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), | ||||
|   | ||||
| @@ -188,13 +188,13 @@ __asm__ volatile( | ||||
|                 "movd %2, %%mm6                 \n\t" | ||||
|                 "packssdw %%mm6, %%mm6          \n\t" | ||||
|                 "packssdw %%mm6, %%mm6          \n\t" | ||||
|                 "mov %3, %%"REG_a"              \n\t" | ||||
|                 "mov %3, %%"FF_REG_a"           \n\t" | ||||
|                 ".p2align 4                     \n\t" | ||||
|                 "1:                             \n\t" | ||||
|                 "movq (%0, %%"REG_a"), %%mm0    \n\t" | ||||
|                 "movq 8(%0, %%"REG_a"), %%mm1   \n\t" | ||||
|                 "movq (%1, %%"REG_a"), %%mm4    \n\t" | ||||
|                 "movq 8(%1, %%"REG_a"), %%mm5   \n\t" | ||||
|                 "movq (%0, %%"FF_REG_a"), %%mm0 \n\t" | ||||
|                 "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t" | ||||
|                 "movq (%1, %%"FF_REG_a"), %%mm4 \n\t" | ||||
|                 "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t" | ||||
|                 "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i] | ||||
|                 "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i] | ||||
|                 "pxor %%mm2, %%mm2              \n\t" | ||||
| @@ -209,8 +209,8 @@ __asm__ volatile( | ||||
|                 "pmullw %%mm5, %%mm1            \n\t" // abs(block[i])*q | ||||
|                 "pxor %%mm4, %%mm4              \n\t" | ||||
|                 "pxor %%mm5, %%mm5              \n\t" // FIXME slow | ||||
|                 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "psraw $3, %%mm0                \n\t" | ||||
|                 "psraw $3, %%mm1                \n\t" | ||||
|                 "psubw %%mm7, %%mm0             \n\t" | ||||
| @@ -223,13 +223,13 @@ __asm__ volatile( | ||||
|                 "psubw %%mm3, %%mm1             \n\t" | ||||
|                 "pandn %%mm0, %%mm4             \n\t" | ||||
|                 "pandn %%mm1, %%mm5             \n\t" | ||||
|                 "movq %%mm4, (%0, %%"REG_a")    \n\t" | ||||
|                 "movq %%mm5, 8(%0, %%"REG_a")   \n\t" | ||||
|                 "movq %%mm4, (%0, %%"FF_REG_a") \n\t" | ||||
|                 "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t" | ||||
|  | ||||
|                 "add $16, %%"REG_a"             \n\t" | ||||
|                 "add $16, %%"FF_REG_a"          \n\t" | ||||
|                 "js 1b                          \n\t" | ||||
|                 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs) | ||||
|                 : "%"REG_a, "memory" | ||||
|                 : "%"FF_REG_a, "memory" | ||||
|         ); | ||||
|     block[0]= block0; | ||||
| } | ||||
| @@ -251,13 +251,13 @@ __asm__ volatile( | ||||
|                 "movd %2, %%mm6                 \n\t" | ||||
|                 "packssdw %%mm6, %%mm6          \n\t" | ||||
|                 "packssdw %%mm6, %%mm6          \n\t" | ||||
|                 "mov %3, %%"REG_a"              \n\t" | ||||
|                 "mov %3, %%"FF_REG_a"           \n\t" | ||||
|                 ".p2align 4                     \n\t" | ||||
|                 "1:                             \n\t" | ||||
|                 "movq (%0, %%"REG_a"), %%mm0    \n\t" | ||||
|                 "movq 8(%0, %%"REG_a"), %%mm1   \n\t" | ||||
|                 "movq (%1, %%"REG_a"), %%mm4    \n\t" | ||||
|                 "movq 8(%1, %%"REG_a"), %%mm5   \n\t" | ||||
|                 "movq (%0, %%"FF_REG_a"), %%mm0 \n\t" | ||||
|                 "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t" | ||||
|                 "movq (%1, %%"FF_REG_a"), %%mm4 \n\t" | ||||
|                 "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t" | ||||
|                 "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i] | ||||
|                 "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i] | ||||
|                 "pxor %%mm2, %%mm2              \n\t" | ||||
| @@ -276,8 +276,8 @@ __asm__ volatile( | ||||
|                 "pmullw %%mm5, %%mm1            \n\t" // (abs(block[i])*2 + 1)*q | ||||
|                 "pxor %%mm4, %%mm4              \n\t" | ||||
|                 "pxor %%mm5, %%mm5              \n\t" // FIXME slow | ||||
|                 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "psraw $4, %%mm0                \n\t" | ||||
|                 "psraw $4, %%mm1                \n\t" | ||||
|                 "psubw %%mm7, %%mm0             \n\t" | ||||
| @@ -290,13 +290,13 @@ __asm__ volatile( | ||||
|                 "psubw %%mm3, %%mm1             \n\t" | ||||
|                 "pandn %%mm0, %%mm4             \n\t" | ||||
|                 "pandn %%mm1, %%mm5             \n\t" | ||||
|                 "movq %%mm4, (%0, %%"REG_a")    \n\t" | ||||
|                 "movq %%mm5, 8(%0, %%"REG_a")   \n\t" | ||||
|                 "movq %%mm4, (%0, %%"FF_REG_a") \n\t" | ||||
|                 "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t" | ||||
|  | ||||
|                 "add $16, %%"REG_a"             \n\t" | ||||
|                 "add $16, %%"FF_REG_a"          \n\t" | ||||
|                 "js 1b                          \n\t" | ||||
|                 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs) | ||||
|                 : "%"REG_a, "memory" | ||||
|                 : "%"FF_REG_a, "memory" | ||||
|         ); | ||||
| } | ||||
|  | ||||
| @@ -326,13 +326,13 @@ __asm__ volatile( | ||||
|                 "movd %2, %%mm6                 \n\t" | ||||
|                 "packssdw %%mm6, %%mm6          \n\t" | ||||
|                 "packssdw %%mm6, %%mm6          \n\t" | ||||
|                 "mov %3, %%"REG_a"              \n\t" | ||||
|                 "mov %3, %%"FF_REG_a"           \n\t" | ||||
|                 ".p2align 4                     \n\t" | ||||
|                 "1:                             \n\t" | ||||
|                 "movq (%0, %%"REG_a"), %%mm0    \n\t" | ||||
|                 "movq 8(%0, %%"REG_a"), %%mm1   \n\t" | ||||
|                 "movq (%1, %%"REG_a"), %%mm4    \n\t" | ||||
|                 "movq 8(%1, %%"REG_a"), %%mm5   \n\t" | ||||
|                 "movq (%0, %%"FF_REG_a"), %%mm0 \n\t" | ||||
|                 "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t" | ||||
|                 "movq (%1, %%"FF_REG_a"), %%mm4 \n\t" | ||||
|                 "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t" | ||||
|                 "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i] | ||||
|                 "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i] | ||||
|                 "pxor %%mm2, %%mm2              \n\t" | ||||
| @@ -347,8 +347,8 @@ __asm__ volatile( | ||||
|                 "pmullw %%mm5, %%mm1            \n\t" // abs(block[i])*q | ||||
|                 "pxor %%mm4, %%mm4              \n\t" | ||||
|                 "pxor %%mm5, %%mm5              \n\t" // FIXME slow | ||||
|                 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "psraw $4, %%mm0                \n\t" | ||||
|                 "psraw $4, %%mm1                \n\t" | ||||
|                 "pxor %%mm2, %%mm0              \n\t" | ||||
| @@ -357,13 +357,13 @@ __asm__ volatile( | ||||
|                 "psubw %%mm3, %%mm1             \n\t" | ||||
|                 "pandn %%mm0, %%mm4             \n\t" | ||||
|                 "pandn %%mm1, %%mm5             \n\t" | ||||
|                 "movq %%mm4, (%0, %%"REG_a")    \n\t" | ||||
|                 "movq %%mm5, 8(%0, %%"REG_a")   \n\t" | ||||
|                 "movq %%mm4, (%0, %%"FF_REG_a") \n\t" | ||||
|                 "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t" | ||||
|  | ||||
|                 "add $16, %%"REG_a"             \n\t" | ||||
|                 "add $16, %%"FF_REG_a"          \n\t" | ||||
|                 "jng 1b                         \n\t" | ||||
|                 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs) | ||||
|                 : "%"REG_a, "memory" | ||||
|                 : "%"FF_REG_a, "memory" | ||||
|         ); | ||||
|     block[0]= block0; | ||||
|         //Note, we do not do mismatch control for intra as errors cannot accumulate | ||||
| @@ -390,13 +390,13 @@ __asm__ volatile( | ||||
|                 "movd %2, %%mm6                 \n\t" | ||||
|                 "packssdw %%mm6, %%mm6          \n\t" | ||||
|                 "packssdw %%mm6, %%mm6          \n\t" | ||||
|                 "mov %3, %%"REG_a"              \n\t" | ||||
|                 "mov %3, %%"FF_REG_a"           \n\t" | ||||
|                 ".p2align 4                     \n\t" | ||||
|                 "1:                             \n\t" | ||||
|                 "movq (%0, %%"REG_a"), %%mm0    \n\t" | ||||
|                 "movq 8(%0, %%"REG_a"), %%mm1   \n\t" | ||||
|                 "movq (%1, %%"REG_a"), %%mm4    \n\t" | ||||
|                 "movq 8(%1, %%"REG_a"), %%mm5   \n\t" | ||||
|                 "movq (%0, %%"FF_REG_a"), %%mm0 \n\t" | ||||
|                 "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t" | ||||
|                 "movq (%1, %%"FF_REG_a"), %%mm4 \n\t" | ||||
|                 "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t" | ||||
|                 "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i] | ||||
|                 "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i] | ||||
|                 "pxor %%mm2, %%mm2              \n\t" | ||||
| @@ -415,8 +415,8 @@ __asm__ volatile( | ||||
|                 "paddw %%mm5, %%mm1             \n\t" // (abs(block[i])*2 + 1)*q | ||||
|                 "pxor %%mm4, %%mm4              \n\t" | ||||
|                 "pxor %%mm5, %%mm5              \n\t" // FIXME slow | ||||
|                 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | ||||
|                 "psrlw $5, %%mm0                \n\t" | ||||
|                 "psrlw $5, %%mm1                \n\t" | ||||
|                 "pxor %%mm2, %%mm0              \n\t" | ||||
| @@ -427,10 +427,10 @@ __asm__ volatile( | ||||
|                 "pandn %%mm1, %%mm5             \n\t" | ||||
|                 "pxor %%mm4, %%mm7              \n\t" | ||||
|                 "pxor %%mm5, %%mm7              \n\t" | ||||
|                 "movq %%mm4, (%0, %%"REG_a")    \n\t" | ||||
|                 "movq %%mm5, 8(%0, %%"REG_a")   \n\t" | ||||
|                 "movq %%mm4, (%0, %%"FF_REG_a") \n\t" | ||||
|                 "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t" | ||||
|  | ||||
|                 "add $16, %%"REG_a"             \n\t" | ||||
|                 "add $16, %%"FF_REG_a"          \n\t" | ||||
|                 "jng 1b                         \n\t" | ||||
|                 "movd 124(%0, %3), %%mm0        \n\t" | ||||
|                 "movq %%mm7, %%mm6              \n\t" | ||||
| @@ -445,7 +445,7 @@ __asm__ volatile( | ||||
|                 "movd %%mm0, 124(%0, %3)        \n\t" | ||||
|  | ||||
|                 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "r" (-2*nCoeffs) | ||||
|                 : "%"REG_a, "memory" | ||||
|                 : "%"FF_REG_a, "memory" | ||||
|         ); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -150,32 +150,32 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | ||||
|     if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ | ||||
|  | ||||
|         __asm__ volatile( | ||||
|             "movd %%"REG_a", "MM"3              \n\t" // last_non_zero_p1 | ||||
|             "movd %%"FF_REG_a", "MM"3           \n\t" // last_non_zero_p1 | ||||
|             SPREADW(MM"3") | ||||
|             "pxor "MM"7, "MM"7                  \n\t" // 0 | ||||
|             "pxor "MM"4, "MM"4                  \n\t" // 0 | ||||
|             MOVQ" (%2), "MM"5                   \n\t" // qmat[0] | ||||
|             "pxor "MM"6, "MM"6                  \n\t" | ||||
|             "psubw (%3), "MM"6                  \n\t" // -bias[0] | ||||
|             "mov $-128, %%"REG_a"               \n\t" | ||||
|             "mov $-128, %%"FF_REG_a"            \n\t" | ||||
|             ".p2align 4                         \n\t" | ||||
|             "1:                                 \n\t" | ||||
|             MOVQ" (%1, %%"REG_a"), "MM"0        \n\t" // block[i] | ||||
|             MOVQ" (%1, %%"FF_REG_a"), "MM"0     \n\t" // block[i] | ||||
|             SAVE_SIGN(MM"1", MM"0")                   // ABS(block[i]) | ||||
|             "psubusw "MM"6, "MM"0               \n\t" // ABS(block[i]) + bias[0] | ||||
|             "pmulhw "MM"5, "MM"0                \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 | ||||
|             "por "MM"0, "MM"4                   \n\t" | ||||
|             RESTORE_SIGN(MM"1", MM"0")                // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) | ||||
|             MOVQ" "MM"0, (%5, %%"REG_a")        \n\t" | ||||
|             MOVQ" "MM"0, (%5, %%"FF_REG_a")     \n\t" | ||||
|             "pcmpeqw "MM"7, "MM"0               \n\t" // out==0 ? 0xFF : 0x00 | ||||
|             MOVQ" (%4, %%"REG_a"), "MM"1        \n\t" | ||||
|             MOVQ" "MM"7, (%1, %%"REG_a")        \n\t" // 0 | ||||
|             MOVQ" (%4, %%"FF_REG_a"), "MM"1     \n\t" | ||||
|             MOVQ" "MM"7, (%1, %%"FF_REG_a")     \n\t" // 0 | ||||
|             "pandn "MM"1, "MM"0                 \n\t" | ||||
|             PMAXW(MM"0", MM"3") | ||||
|             "add $"MMREG_WIDTH", %%"REG_a"      \n\t" | ||||
|             "add $"MMREG_WIDTH", %%"FF_REG_a"   \n\t" | ||||
|             " js 1b                             \n\t" | ||||
|             PMAX(MM"3", MM"0") | ||||
|             "movd "MM"3, %%"REG_a"              \n\t" | ||||
|             "movd "MM"3, %%"FF_REG_a"           \n\t" | ||||
|             "movzbl %%al, %%eax                 \n\t" // last_non_zero_p1 | ||||
|             : "+a" (last_non_zero_p1) | ||||
|             : "r" (block+64), "r" (qmat), "r" (bias), | ||||
| @@ -185,31 +185,31 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | ||||
|         ); | ||||
|     }else{ // FMT_H263 | ||||
|         __asm__ volatile( | ||||
|             "movd %%"REG_a", "MM"3              \n\t" // last_non_zero_p1 | ||||
|             "movd %%"FF_REG_a", "MM"3           \n\t" // last_non_zero_p1 | ||||
|             SPREADW(MM"3") | ||||
|             "pxor "MM"7, "MM"7                  \n\t" // 0 | ||||
|             "pxor "MM"4, "MM"4                  \n\t" // 0 | ||||
|             "mov $-128, %%"REG_a"               \n\t" | ||||
|             "mov $-128, %%"FF_REG_a"            \n\t" | ||||
|             ".p2align 4                         \n\t" | ||||
|             "1:                                 \n\t" | ||||
|             MOVQ" (%1, %%"REG_a"), "MM"0        \n\t" // block[i] | ||||
|             MOVQ" (%1, %%"FF_REG_a"), "MM"0     \n\t" // block[i] | ||||
|             SAVE_SIGN(MM"1", MM"0")                   // ABS(block[i]) | ||||
|             MOVQ" (%3, %%"REG_a"), "MM"6        \n\t" // bias[0] | ||||
|             MOVQ" (%3, %%"FF_REG_a"), "MM"6     \n\t" // bias[0] | ||||
|             "paddusw "MM"6, "MM"0               \n\t" // ABS(block[i]) + bias[0] | ||||
|             MOVQ" (%2, %%"REG_a"), "MM"5        \n\t" // qmat[i] | ||||
|             MOVQ" (%2, %%"FF_REG_a"), "MM"5     \n\t" // qmat[i] | ||||
|             "pmulhw "MM"5, "MM"0                \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 | ||||
|             "por "MM"0, "MM"4                   \n\t" | ||||
|             RESTORE_SIGN(MM"1", MM"0")                // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) | ||||
|             MOVQ" "MM"0, (%5, %%"REG_a")        \n\t" | ||||
|             MOVQ" "MM"0, (%5, %%"FF_REG_a")     \n\t" | ||||
|             "pcmpeqw "MM"7, "MM"0               \n\t" // out==0 ? 0xFF : 0x00 | ||||
|             MOVQ" (%4, %%"REG_a"), "MM"1        \n\t" | ||||
|             MOVQ" "MM"7, (%1, %%"REG_a")        \n\t" // 0 | ||||
|             MOVQ" (%4, %%"FF_REG_a"), "MM"1     \n\t" | ||||
|             MOVQ" "MM"7, (%1, %%"FF_REG_a")     \n\t" // 0 | ||||
|             "pandn "MM"1, "MM"0                 \n\t" | ||||
|             PMAXW(MM"0", MM"3") | ||||
|             "add $"MMREG_WIDTH", %%"REG_a"      \n\t" | ||||
|             "add $"MMREG_WIDTH", %%"FF_REG_a"   \n\t" | ||||
|             " js 1b                             \n\t" | ||||
|             PMAX(MM"3", MM"0") | ||||
|             "movd "MM"3, %%"REG_a"              \n\t" | ||||
|             "movd "MM"3, %%"FF_REG_a"           \n\t" | ||||
|             "movzbl %%al, %%eax                 \n\t" // last_non_zero_p1 | ||||
|             : "+a" (last_non_zero_p1) | ||||
|             : "r" (block+64), "r" (qmat+64), "r" (bias+64), | ||||
|   | ||||
| @@ -46,12 +46,12 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel | ||||
|         "punpckhbw %%mm7, %%mm5         \n\t" | ||||
|         "paddusw %%mm0, %%mm4           \n\t" | ||||
|         "paddusw %%mm1, %%mm5           \n\t" | ||||
|         "xor    %%"REG_a", %%"REG_a"    \n\t" | ||||
|         "xor    %%"FF_REG_a", %%"FF_REG_a" \n\t" | ||||
|         "add    %3, %1                  \n\t" | ||||
|         ".p2align 3                     \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq   (%1, %%"REG_a"), %%mm0  \n\t" | ||||
|         "movq   1(%1, %%"REG_a"), %%mm2 \n\t" | ||||
|         "movq   (%1, %%"FF_REG_a"), %%mm0  \n\t" | ||||
|         "movq   1(%1, %%"FF_REG_a"), %%mm2 \n\t" | ||||
|         "movq   %%mm0, %%mm1            \n\t" | ||||
|         "movq   %%mm2, %%mm3            \n\t" | ||||
|         "punpcklbw %%mm7, %%mm0         \n\t" | ||||
| @@ -67,11 +67,11 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel | ||||
|         "psrlw  $2, %%mm4               \n\t" | ||||
|         "psrlw  $2, %%mm5               \n\t" | ||||
|         "packuswb  %%mm5, %%mm4         \n\t" | ||||
|         "movq   %%mm4, (%2, %%"REG_a")  \n\t" | ||||
|         "add    %3, %%"REG_a"           \n\t" | ||||
|         "movq   %%mm4, (%2, %%"FF_REG_a")  \n\t" | ||||
|         "add    %3, %%"FF_REG_a"           \n\t" | ||||
|  | ||||
|         "movq   (%1, %%"REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3 | ||||
|         "movq   1(%1, %%"REG_a"), %%mm4 \n\t" | ||||
|         "movq   (%1, %%"FF_REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3 | ||||
|         "movq   1(%1, %%"FF_REG_a"), %%mm4 \n\t" | ||||
|         "movq   %%mm2, %%mm3            \n\t" | ||||
|         "movq   %%mm4, %%mm5            \n\t" | ||||
|         "punpcklbw %%mm7, %%mm2         \n\t" | ||||
| @@ -87,14 +87,14 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel | ||||
|         "psrlw  $2, %%mm0               \n\t" | ||||
|         "psrlw  $2, %%mm1               \n\t" | ||||
|         "packuswb  %%mm1, %%mm0         \n\t" | ||||
|         "movq   %%mm0, (%2, %%"REG_a")  \n\t" | ||||
|         "add    %3, %%"REG_a"           \n\t" | ||||
|         "movq   %%mm0, (%2, %%"FF_REG_a")  \n\t" | ||||
|         "add    %3, %%"FF_REG_a"        \n\t" | ||||
|  | ||||
|         "subl   $2, %0                  \n\t" | ||||
|         "jnz    1b                      \n\t" | ||||
|         :"+g"(h), "+S"(pixels) | ||||
|         :"D"(block), "r"((x86_reg)line_size) | ||||
|         :REG_a, "memory"); | ||||
|         :FF_REG_a, "memory"); | ||||
| } | ||||
|  | ||||
| // avg_pixels | ||||
| @@ -115,12 +115,12 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel | ||||
|         "punpckhbw %%mm7, %%mm5         \n\t" | ||||
|         "paddusw %%mm0, %%mm4           \n\t" | ||||
|         "paddusw %%mm1, %%mm5           \n\t" | ||||
|         "xor    %%"REG_a", %%"REG_a"    \n\t" | ||||
|         "xor    %%"FF_REG_a", %%"FF_REG_a" \n\t" | ||||
|         "add    %3, %1                  \n\t" | ||||
|         ".p2align 3                     \n\t" | ||||
|         "1:                             \n\t" | ||||
|         "movq   (%1, %%"REG_a"), %%mm0  \n\t" | ||||
|         "movq   1(%1, %%"REG_a"), %%mm2 \n\t" | ||||
|         "movq   (%1, %%"FF_REG_a"), %%mm0  \n\t" | ||||
|         "movq   1(%1, %%"FF_REG_a"), %%mm2 \n\t" | ||||
|         "movq   %%mm0, %%mm1            \n\t" | ||||
|         "movq   %%mm2, %%mm3            \n\t" | ||||
|         "punpcklbw %%mm7, %%mm0         \n\t" | ||||
| @@ -135,16 +135,16 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel | ||||
|         "paddusw %%mm1, %%mm5           \n\t" | ||||
|         "psrlw  $2, %%mm4               \n\t" | ||||
|         "psrlw  $2, %%mm5               \n\t" | ||||
|                 "movq   (%2, %%"REG_a"), %%mm3  \n\t" | ||||
|                 "movq   (%2, %%"FF_REG_a"), %%mm3  \n\t" | ||||
|         "packuswb  %%mm5, %%mm4         \n\t" | ||||
|                 "pcmpeqd %%mm2, %%mm2   \n\t" | ||||
|                 "paddb %%mm2, %%mm2     \n\t" | ||||
|                 PAVGB_MMX(%%mm3, %%mm4, %%mm5, %%mm2) | ||||
|                 "movq   %%mm5, (%2, %%"REG_a")  \n\t" | ||||
|         "add    %3, %%"REG_a"                \n\t" | ||||
|                 "movq   %%mm5, (%2, %%"FF_REG_a")  \n\t" | ||||
|         "add    %3, %%"FF_REG_a"        \n\t" | ||||
|  | ||||
|         "movq   (%1, %%"REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3 | ||||
|         "movq   1(%1, %%"REG_a"), %%mm4 \n\t" | ||||
|         "movq   (%1, %%"FF_REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3 | ||||
|         "movq   1(%1, %%"FF_REG_a"), %%mm4 \n\t" | ||||
|         "movq   %%mm2, %%mm3            \n\t" | ||||
|         "movq   %%mm4, %%mm5            \n\t" | ||||
|         "punpcklbw %%mm7, %%mm2         \n\t" | ||||
| @@ -159,17 +159,17 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel | ||||
|         "paddusw %%mm5, %%mm1           \n\t" | ||||
|         "psrlw  $2, %%mm0               \n\t" | ||||
|         "psrlw  $2, %%mm1               \n\t" | ||||
|                 "movq   (%2, %%"REG_a"), %%mm3  \n\t" | ||||
|                 "movq   (%2, %%"FF_REG_a"), %%mm3  \n\t" | ||||
|         "packuswb  %%mm1, %%mm0         \n\t" | ||||
|                 "pcmpeqd %%mm2, %%mm2   \n\t" | ||||
|                 "paddb %%mm2, %%mm2     \n\t" | ||||
|                 PAVGB_MMX(%%mm3, %%mm0, %%mm1, %%mm2) | ||||
|                 "movq   %%mm1, (%2, %%"REG_a")  \n\t" | ||||
|         "add    %3, %%"REG_a"           \n\t" | ||||
|                 "movq   %%mm1, (%2, %%"FF_REG_a")  \n\t" | ||||
|         "add    %3, %%"FF_REG_a"           \n\t" | ||||
|  | ||||
|         "subl   $2, %0                  \n\t" | ||||
|         "jnz    1b                      \n\t" | ||||
|         :"+g"(h), "+S"(pixels) | ||||
|         :"D"(block), "r"((x86_reg)line_size) | ||||
|         :REG_a, "memory"); | ||||
|         :FF_REG_a, "memory"); | ||||
| } | ||||
|   | ||||
| @@ -390,10 +390,10 @@ static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, IDWTELEM *temp, int w | ||||
|  | ||||
| #if HAVE_7REGS | ||||
| #define snow_vertical_compose_sse2_load_add(op,r,t0,t1,t2,t3)\ | ||||
|         ""op" ("r",%%"REG_d"), %%"t0"      \n\t"\ | ||||
|         ""op" 16("r",%%"REG_d"), %%"t1"    \n\t"\ | ||||
|         ""op" 32("r",%%"REG_d"), %%"t2"    \n\t"\ | ||||
|         ""op" 48("r",%%"REG_d"), %%"t3"    \n\t" | ||||
|         ""op" ("r",%%"FF_REG_d"), %%"t0"      \n\t"\ | ||||
|         ""op" 16("r",%%"FF_REG_d"), %%"t1"    \n\t"\ | ||||
|         ""op" 32("r",%%"FF_REG_d"), %%"t2"    \n\t"\ | ||||
|         ""op" 48("r",%%"FF_REG_d"), %%"t3"    \n\t" | ||||
|  | ||||
| #define snow_vertical_compose_sse2_load(r,t0,t1,t2,t3)\ | ||||
|         snow_vertical_compose_sse2_load_add("movdqa",r,t0,t1,t2,t3) | ||||
| @@ -408,10 +408,10 @@ static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, IDWTELEM *temp, int w | ||||
|         "psubw %%"s3", %%"t3" \n\t" | ||||
|  | ||||
| #define snow_vertical_compose_sse2_store(w,s0,s1,s2,s3)\ | ||||
|         "movdqa %%"s0", ("w",%%"REG_d")      \n\t"\ | ||||
|         "movdqa %%"s1", 16("w",%%"REG_d")    \n\t"\ | ||||
|         "movdqa %%"s2", 32("w",%%"REG_d")    \n\t"\ | ||||
|         "movdqa %%"s3", 48("w",%%"REG_d")    \n\t" | ||||
|         "movdqa %%"s0", ("w",%%"FF_REG_d")    \n\t"\ | ||||
|         "movdqa %%"s1", 16("w",%%"FF_REG_d")  \n\t"\ | ||||
|         "movdqa %%"s2", 32("w",%%"FF_REG_d")  \n\t"\ | ||||
|         "movdqa %%"s3", 48("w",%%"FF_REG_d")  \n\t" | ||||
|  | ||||
| #define snow_vertical_compose_sra(n,t0,t1,t2,t3)\ | ||||
|         "psraw $"n", %%"t0" \n\t"\ | ||||
| @@ -477,14 +477,14 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE | ||||
|         "psrlw $13, %%xmm5                           \n\t" | ||||
|         "paddw %%xmm7, %%xmm5                        \n\t" | ||||
|         snow_vertical_compose_r2r_add("xmm5","xmm5","xmm5","xmm5","xmm0","xmm2","xmm4","xmm6") | ||||
|         "movq   (%2,%%"REG_d"), %%xmm1        \n\t" | ||||
|         "movq  8(%2,%%"REG_d"), %%xmm3        \n\t" | ||||
|         "movq   (%2,%%"FF_REG_d"), %%xmm1            \n\t" | ||||
|         "movq  8(%2,%%"FF_REG_d"), %%xmm3            \n\t" | ||||
|         "paddw %%xmm7, %%xmm1                        \n\t" | ||||
|         "paddw %%xmm7, %%xmm3                        \n\t" | ||||
|         "pavgw %%xmm1, %%xmm0                        \n\t" | ||||
|         "pavgw %%xmm3, %%xmm2                        \n\t" | ||||
|         "movq 16(%2,%%"REG_d"), %%xmm1        \n\t" | ||||
|         "movq 24(%2,%%"REG_d"), %%xmm3        \n\t" | ||||
|         "movq 16(%2,%%"FF_REG_d"), %%xmm1            \n\t" | ||||
|         "movq 24(%2,%%"FF_REG_d"), %%xmm3            \n\t" | ||||
|         "paddw %%xmm7, %%xmm1                        \n\t" | ||||
|         "paddw %%xmm7, %%xmm3                        \n\t" | ||||
|         "pavgw %%xmm1, %%xmm4                        \n\t" | ||||
| @@ -504,17 +504,17 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE | ||||
|         snow_vertical_compose_sse2_store("%2","xmm0","xmm2","xmm4","xmm6") | ||||
|  | ||||
|         "2:                                          \n\t" | ||||
|         "sub $64, %%"REG_d"                          \n\t" | ||||
|         "sub $64, %%"FF_REG_d"                       \n\t" | ||||
|         "jge 1b                                      \n\t" | ||||
|         :"+d"(i) | ||||
|         :"r"(b0),"r"(b1),"r"(b2),"r"(b3),"r"(b4),"r"(b5)); | ||||
| } | ||||
|  | ||||
| #define snow_vertical_compose_mmx_load_add(op,r,t0,t1,t2,t3)\ | ||||
|         ""op" ("r",%%"REG_d"), %%"t0"   \n\t"\ | ||||
|         ""op" 8("r",%%"REG_d"), %%"t1"  \n\t"\ | ||||
|         ""op" 16("r",%%"REG_d"), %%"t2" \n\t"\ | ||||
|         ""op" 24("r",%%"REG_d"), %%"t3" \n\t" | ||||
|         ""op" ("r",%%"FF_REG_d"), %%"t0"   \n\t"\ | ||||
|         ""op" 8("r",%%"FF_REG_d"), %%"t1"  \n\t"\ | ||||
|         ""op" 16("r",%%"FF_REG_d"), %%"t2" \n\t"\ | ||||
|         ""op" 24("r",%%"FF_REG_d"), %%"t3" \n\t" | ||||
|  | ||||
| #define snow_vertical_compose_mmx_load(r,t0,t1,t2,t3)\ | ||||
|         snow_vertical_compose_mmx_load_add("movq",r,t0,t1,t2,t3) | ||||
| @@ -523,10 +523,10 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE | ||||
|         snow_vertical_compose_mmx_load_add("paddw",r,t0,t1,t2,t3) | ||||
|  | ||||
| #define snow_vertical_compose_mmx_store(w,s0,s1,s2,s3)\ | ||||
|         "movq %%"s0", ("w",%%"REG_d")   \n\t"\ | ||||
|         "movq %%"s1", 8("w",%%"REG_d")  \n\t"\ | ||||
|         "movq %%"s2", 16("w",%%"REG_d") \n\t"\ | ||||
|         "movq %%"s3", 24("w",%%"REG_d") \n\t" | ||||
|         "movq %%"s0", ("w",%%"FF_REG_d")   \n\t"\ | ||||
|         "movq %%"s1", 8("w",%%"FF_REG_d")  \n\t"\ | ||||
|         "movq %%"s2", 16("w",%%"FF_REG_d") \n\t"\ | ||||
|         "movq %%"s3", 24("w",%%"FF_REG_d") \n\t" | ||||
|  | ||||
| #define snow_vertical_compose_mmx_move(s0,s1,s2,s3,t0,t1,t2,t3)\ | ||||
|         "movq %%"s0", %%"t0" \n\t"\ | ||||
| @@ -571,14 +571,14 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM | ||||
|         "psrlw $13, %%mm5                            \n\t" | ||||
|         "paddw %%mm7, %%mm5                          \n\t" | ||||
|         snow_vertical_compose_r2r_add("mm5","mm5","mm5","mm5","mm0","mm2","mm4","mm6") | ||||
|         "movq   (%2,%%"REG_d"), %%mm1         \n\t" | ||||
|         "movq  8(%2,%%"REG_d"), %%mm3         \n\t" | ||||
|         "movq   (%2,%%"FF_REG_d"), %%mm1             \n\t" | ||||
|         "movq  8(%2,%%"FF_REG_d"), %%mm3             \n\t" | ||||
|         "paddw %%mm7, %%mm1                          \n\t" | ||||
|         "paddw %%mm7, %%mm3                          \n\t" | ||||
|         "pavgw %%mm1, %%mm0                          \n\t" | ||||
|         "pavgw %%mm3, %%mm2                          \n\t" | ||||
|         "movq 16(%2,%%"REG_d"), %%mm1         \n\t" | ||||
|         "movq 24(%2,%%"REG_d"), %%mm3         \n\t" | ||||
|         "movq 16(%2,%%"FF_REG_d"), %%mm1             \n\t" | ||||
|         "movq 24(%2,%%"FF_REG_d"), %%mm3             \n\t" | ||||
|         "paddw %%mm7, %%mm1                          \n\t" | ||||
|         "paddw %%mm7, %%mm3                          \n\t" | ||||
|         "pavgw %%mm1, %%mm4                          \n\t" | ||||
| @@ -598,7 +598,7 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM | ||||
|         snow_vertical_compose_mmx_store("%2","mm0","mm2","mm4","mm6") | ||||
|  | ||||
|         "2:                                          \n\t" | ||||
|         "sub $32, %%"REG_d"                          \n\t" | ||||
|         "sub $32, %%"FF_REG_d"                       \n\t" | ||||
|         "jge 1b                                      \n\t" | ||||
|         :"+d"(i) | ||||
|         :"r"(b0),"r"(b1),"r"(b2),"r"(b3),"r"(b4),"r"(b5)); | ||||
| @@ -610,39 +610,39 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM | ||||
|     IDWTELEM * * dst_array = sb->line + src_y;\ | ||||
|     x86_reg tmp;\ | ||||
|     __asm__ volatile(\ | ||||
|              "mov  %7, %%"REG_c"             \n\t"\ | ||||
|              "mov  %7, %%"FF_REG_c"          \n\t"\ | ||||
|              "mov  %6, %2                    \n\t"\ | ||||
|              "mov  %4, %%"REG_S"             \n\t"\ | ||||
|              "mov  %4, %%"FF_REG_S"          \n\t"\ | ||||
|              "pxor %%xmm7, %%xmm7            \n\t" /* 0 */\ | ||||
|              "pcmpeqd %%xmm3, %%xmm3         \n\t"\ | ||||
|              "psllw $15, %%xmm3              \n\t"\ | ||||
|              "psrlw $12, %%xmm3              \n\t" /* FRAC_BITS >> 1 */\ | ||||
|              "1:                             \n\t"\ | ||||
|              "mov %1, %%"REG_D"              \n\t"\ | ||||
|              "mov (%%"REG_D"), %%"REG_D"     \n\t"\ | ||||
|              "add %3, %%"REG_D"              \n\t" | ||||
|              "mov %1, %%"FF_REG_D"           \n\t"\ | ||||
|              "mov (%%"FF_REG_D"), %%"FF_REG_D" \n\t"\ | ||||
|              "add %3, %%"FF_REG_D"           \n\t" | ||||
|  | ||||
| #define snow_inner_add_yblock_sse2_start_8(out_reg1, out_reg2, ptr_offset, s_offset)\ | ||||
|              "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\ | ||||
|              "movq (%%"REG_d"), %%"out_reg1" \n\t"\ | ||||
|              "movq (%%"REG_d", %%"REG_c"), %%"out_reg2" \n\t"\ | ||||
|              "mov "FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_REG_d"; \n\t"\ | ||||
|              "movq (%%"FF_REG_d"), %%"out_reg1"                           \n\t"\ | ||||
|              "movq (%%"FF_REG_d", %%"FF_REG_c"), %%"out_reg2"             \n\t"\ | ||||
|              "punpcklbw %%xmm7, %%"out_reg1" \n\t"\ | ||||
|              "punpcklbw %%xmm7, %%"out_reg2" \n\t"\ | ||||
|              "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\ | ||||
|              "movq "s_offset"+16(%%"REG_S"), %%xmm4 \n\t"\ | ||||
|              "movq "s_offset"(%%"FF_REG_S"), %%xmm0    \n\t"\ | ||||
|              "movq "s_offset"+16(%%"FF_REG_S"), %%xmm4 \n\t"\ | ||||
|              "punpcklbw %%xmm7, %%xmm0       \n\t"\ | ||||
|              "punpcklbw %%xmm7, %%xmm4       \n\t"\ | ||||
|              "pmullw %%xmm0, %%"out_reg1"    \n\t"\ | ||||
|              "pmullw %%xmm4, %%"out_reg2"    \n\t" | ||||
|  | ||||
| #define snow_inner_add_yblock_sse2_start_16(out_reg1, out_reg2, ptr_offset, s_offset)\ | ||||
|              "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\ | ||||
|              "movq (%%"REG_d"), %%"out_reg1" \n\t"\ | ||||
|              "movq 8(%%"REG_d"), %%"out_reg2" \n\t"\ | ||||
|              "mov "FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_REG_d"; \n\t"\ | ||||
|              "movq (%%"FF_REG_d"), %%"out_reg1"                           \n\t"\ | ||||
|              "movq 8(%%"FF_REG_d"), %%"out_reg2"                          \n\t"\ | ||||
|              "punpcklbw %%xmm7, %%"out_reg1" \n\t"\ | ||||
|              "punpcklbw %%xmm7, %%"out_reg2" \n\t"\ | ||||
|              "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\ | ||||
|              "movq "s_offset"+8(%%"REG_S"), %%xmm4 \n\t"\ | ||||
|              "movq "s_offset"(%%"FF_REG_S"), %%xmm0   \n\t"\ | ||||
|              "movq "s_offset"+8(%%"FF_REG_S"), %%xmm4 \n\t"\ | ||||
|              "punpcklbw %%xmm7, %%xmm0       \n\t"\ | ||||
|              "punpcklbw %%xmm7, %%xmm4       \n\t"\ | ||||
|              "pmullw %%xmm0, %%"out_reg1"    \n\t"\ | ||||
| @@ -659,12 +659,12 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM | ||||
|              "paddusw %%xmm6, %%xmm5         \n\t" | ||||
|  | ||||
| #define snow_inner_add_yblock_sse2_end_common1\ | ||||
|              "add $32, %%"REG_S"             \n\t"\ | ||||
|              "add %%"REG_c", %0              \n\t"\ | ||||
|              "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\ | ||||
|              "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\ | ||||
|              "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\ | ||||
|              "add %%"REG_c", (%%"REG_a")     \n\t" | ||||
|              "add $32, %%"FF_REG_S"                            \n\t"\ | ||||
|              "add %%"FF_REG_c", %0                             \n\t"\ | ||||
|              "add %%"FF_REG_c", "FF_PTR_SIZE"*3(%%"FF_REG_a"); \n\t"\ | ||||
|              "add %%"FF_REG_c", "FF_PTR_SIZE"*2(%%"FF_REG_a"); \n\t"\ | ||||
|              "add %%"FF_REG_c", "FF_PTR_SIZE"*1(%%"FF_REG_a"); \n\t"\ | ||||
|              "add %%"FF_REG_c", (%%"FF_REG_a")                 \n\t" | ||||
|  | ||||
| #define snow_inner_add_yblock_sse2_end_common2\ | ||||
|              "jnz 1b                         \n\t"\ | ||||
| @@ -672,18 +672,18 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM | ||||
|              :\ | ||||
|              "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\ | ||||
|              XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", )\ | ||||
|              "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d""); | ||||
|              "%"FF_REG_c"","%"FF_REG_S"","%"FF_REG_D"","%"FF_REG_d""); | ||||
|  | ||||
| #define snow_inner_add_yblock_sse2_end_8\ | ||||
|              "sal $1, %%"REG_c"              \n\t"\ | ||||
|              "add"OPSIZE" $"PTR_SIZE"*2, %1  \n\t"\ | ||||
|              "sal $1, %%"FF_REG_c"                \n\t"\ | ||||
|              "add"FF_OPSIZE" $"FF_PTR_SIZE"*2, %1 \n\t"\ | ||||
|              snow_inner_add_yblock_sse2_end_common1\ | ||||
|              "sar $1, %%"REG_c"              \n\t"\ | ||||
|              "sar $1, %%"FF_REG_c"           \n\t"\ | ||||
|              "sub $2, %2                     \n\t"\ | ||||
|              snow_inner_add_yblock_sse2_end_common2 | ||||
|  | ||||
| #define snow_inner_add_yblock_sse2_end_16\ | ||||
|              "add"OPSIZE" $"PTR_SIZE"*1, %1  \n\t"\ | ||||
|              "add"FF_OPSIZE" $"FF_PTR_SIZE"*1, %1 \n\t"\ | ||||
|              snow_inner_add_yblock_sse2_end_common1\ | ||||
|              "dec %2                         \n\t"\ | ||||
|              snow_inner_add_yblock_sse2_end_common2 | ||||
| @@ -696,28 +696,28 @@ snow_inner_add_yblock_sse2_accum_8("2", "8") | ||||
| snow_inner_add_yblock_sse2_accum_8("1", "128") | ||||
| snow_inner_add_yblock_sse2_accum_8("0", "136") | ||||
|  | ||||
|              "mov %0, %%"REG_d"              \n\t" | ||||
|              "movdqa (%%"REG_D"), %%xmm0     \n\t" | ||||
|              "mov %0, %%"FF_REG_d"           \n\t" | ||||
|              "movdqa (%%"FF_REG_D"), %%xmm0  \n\t" | ||||
|              "movdqa %%xmm1, %%xmm2          \n\t" | ||||
|  | ||||
|              "punpckhwd %%xmm7, %%xmm1       \n\t" | ||||
|              "punpcklwd %%xmm7, %%xmm2       \n\t" | ||||
|              "paddd %%xmm2, %%xmm0           \n\t" | ||||
|              "movdqa 16(%%"REG_D"), %%xmm2   \n\t" | ||||
|              "movdqa 16(%%"FF_REG_D"), %%xmm2\n\t" | ||||
|              "paddd %%xmm1, %%xmm2           \n\t" | ||||
|              "paddd %%xmm3, %%xmm0           \n\t" | ||||
|              "paddd %%xmm3, %%xmm2           \n\t" | ||||
|  | ||||
|              "mov %1, %%"REG_D"              \n\t" | ||||
|              "mov "PTR_SIZE"(%%"REG_D"), %%"REG_D";\n\t" | ||||
|              "add %3, %%"REG_D"              \n\t" | ||||
|              "mov %1, %%"FF_REG_D"           \n\t" | ||||
|              "mov "FF_PTR_SIZE"(%%"FF_REG_D"), %%"FF_REG_D"; \n\t" | ||||
|              "add %3, %%"FF_REG_D"           \n\t" | ||||
|  | ||||
|              "movdqa (%%"REG_D"), %%xmm4     \n\t" | ||||
|              "movdqa (%%"FF_REG_D"), %%xmm4  \n\t" | ||||
|              "movdqa %%xmm5, %%xmm6          \n\t" | ||||
|              "punpckhwd %%xmm7, %%xmm5       \n\t" | ||||
|              "punpcklwd %%xmm7, %%xmm6       \n\t" | ||||
|              "paddd %%xmm6, %%xmm4           \n\t" | ||||
|              "movdqa 16(%%"REG_D"), %%xmm6   \n\t" | ||||
|              "movdqa 16(%%"FF_REG_D"), %%xmm6\n\t" | ||||
|              "paddd %%xmm5, %%xmm6           \n\t" | ||||
|              "paddd %%xmm3, %%xmm4           \n\t" | ||||
|              "paddd %%xmm3, %%xmm6           \n\t" | ||||
| @@ -726,13 +726,13 @@ snow_inner_add_yblock_sse2_accum_8("0", "136") | ||||
|              "psrad $8, %%xmm2               \n\t" /* FRAC_BITS. */ | ||||
|              "packssdw %%xmm2, %%xmm0        \n\t" | ||||
|              "packuswb %%xmm7, %%xmm0        \n\t" | ||||
|              "movq %%xmm0, (%%"REG_d")       \n\t" | ||||
|              "movq %%xmm0, (%%"FF_REG_d")    \n\t" | ||||
|  | ||||
|              "psrad $8, %%xmm4               \n\t" /* FRAC_BITS. */ | ||||
|              "psrad $8, %%xmm6               \n\t" /* FRAC_BITS. */ | ||||
|              "packssdw %%xmm6, %%xmm4        \n\t" | ||||
|              "packuswb %%xmm7, %%xmm4        \n\t" | ||||
|              "movq %%xmm4, (%%"REG_d",%%"REG_c");\n\t" | ||||
|              "movq %%xmm4, (%%"FF_REG_d",%%"FF_REG_c"); \n\t" | ||||
| snow_inner_add_yblock_sse2_end_8 | ||||
| } | ||||
|  | ||||
| @@ -744,18 +744,18 @@ snow_inner_add_yblock_sse2_accum_16("2", "16") | ||||
| snow_inner_add_yblock_sse2_accum_16("1", "512") | ||||
| snow_inner_add_yblock_sse2_accum_16("0", "528") | ||||
|  | ||||
|              "mov %0, %%"REG_d"              \n\t" | ||||
|              "mov %0, %%"FF_REG_d"           \n\t" | ||||
|              "psrlw $4, %%xmm1               \n\t" | ||||
|              "psrlw $4, %%xmm5               \n\t" | ||||
|              "paddw   (%%"REG_D"), %%xmm1    \n\t" | ||||
|              "paddw 16(%%"REG_D"), %%xmm5    \n\t" | ||||
|              "paddw   (%%"FF_REG_D"), %%xmm1 \n\t" | ||||
|              "paddw 16(%%"FF_REG_D"), %%xmm5 \n\t" | ||||
|              "paddw %%xmm3, %%xmm1           \n\t" | ||||
|              "paddw %%xmm3, %%xmm5           \n\t" | ||||
|              "psraw $4, %%xmm1               \n\t" /* FRAC_BITS. */ | ||||
|              "psraw $4, %%xmm5               \n\t" /* FRAC_BITS. */ | ||||
|              "packuswb %%xmm5, %%xmm1        \n\t" | ||||
|  | ||||
|              "movdqu %%xmm1, (%%"REG_d")       \n\t" | ||||
|              "movdqu %%xmm1, (%%"FF_REG_d")  \n\t" | ||||
|  | ||||
| snow_inner_add_yblock_sse2_end_16 | ||||
| } | ||||
| @@ -764,30 +764,30 @@ snow_inner_add_yblock_sse2_end_16 | ||||
|     IDWTELEM * * dst_array = sb->line + src_y;\ | ||||
|     x86_reg tmp;\ | ||||
|     __asm__ volatile(\ | ||||
|              "mov  %7, %%"REG_c"             \n\t"\ | ||||
|              "mov  %7, %%"FF_REG_c"          \n\t"\ | ||||
|              "mov  %6, %2                    \n\t"\ | ||||
|              "mov  %4, %%"REG_S"             \n\t"\ | ||||
|              "mov  %4, %%"FF_REG_S"          \n\t"\ | ||||
|              "pxor %%mm7, %%mm7              \n\t" /* 0 */\ | ||||
|              "pcmpeqd %%mm3, %%mm3           \n\t"\ | ||||
|              "psllw $15, %%mm3               \n\t"\ | ||||
|              "psrlw $12, %%mm3               \n\t" /* FRAC_BITS >> 1 */\ | ||||
|              "1:                             \n\t"\ | ||||
|              "mov %1, %%"REG_D"              \n\t"\ | ||||
|              "mov (%%"REG_D"), %%"REG_D"     \n\t"\ | ||||
|              "add %3, %%"REG_D"              \n\t" | ||||
|              "mov %1, %%"FF_REG_D"           \n\t"\ | ||||
|              "mov (%%"FF_REG_D"), %%"FF_REG_D" \n\t"\ | ||||
|              "add %3, %%"FF_REG_D"           \n\t" | ||||
|  | ||||
| #define snow_inner_add_yblock_mmx_start(out_reg1, out_reg2, ptr_offset, s_offset, d_offset)\ | ||||
|              "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\ | ||||
|              "movd "d_offset"(%%"REG_d"), %%"out_reg1" \n\t"\ | ||||
|              "movd "d_offset"+4(%%"REG_d"), %%"out_reg2" \n\t"\ | ||||
|              "mov "FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_REG_d"; \n\t"\ | ||||
|              "movd "d_offset"(%%"FF_REG_d"), %%"out_reg1"                 \n\t"\ | ||||
|              "movd "d_offset"+4(%%"FF_REG_d"), %%"out_reg2"               \n\t"\ | ||||
|              "punpcklbw %%mm7, %%"out_reg1" \n\t"\ | ||||
|              "punpcklbw %%mm7, %%"out_reg2" \n\t"\ | ||||
|              "movd "s_offset"(%%"REG_S"), %%mm0 \n\t"\ | ||||
|              "movd "s_offset"+4(%%"REG_S"), %%mm4 \n\t"\ | ||||
|              "movd "s_offset"(%%"FF_REG_S"), %%mm0   \n\t"\ | ||||
|              "movd "s_offset"+4(%%"FF_REG_S"), %%mm4 \n\t"\ | ||||
|              "punpcklbw %%mm7, %%mm0       \n\t"\ | ||||
|              "punpcklbw %%mm7, %%mm4       \n\t"\ | ||||
|              "pmullw %%mm0, %%"out_reg1"    \n\t"\ | ||||
|              "pmullw %%mm4, %%"out_reg2"    \n\t" | ||||
|              "pmullw %%mm0, %%"out_reg1"   \n\t"\ | ||||
|              "pmullw %%mm4, %%"out_reg2"   \n\t" | ||||
|  | ||||
| #define snow_inner_add_yblock_mmx_accum(ptr_offset, s_offset, d_offset) \ | ||||
|              snow_inner_add_yblock_mmx_start("mm2", "mm6", ptr_offset, s_offset, d_offset)\ | ||||
| @@ -795,32 +795,32 @@ snow_inner_add_yblock_sse2_end_16 | ||||
|              "paddusw %%mm6, %%mm5         \n\t" | ||||
|  | ||||
| #define snow_inner_add_yblock_mmx_mix(read_offset, write_offset)\ | ||||
|              "mov %0, %%"REG_d"              \n\t"\ | ||||
|              "mov %0, %%"FF_REG_d"           \n\t"\ | ||||
|              "psrlw $4, %%mm1                \n\t"\ | ||||
|              "psrlw $4, %%mm5                \n\t"\ | ||||
|              "paddw "read_offset"(%%"REG_D"), %%mm1 \n\t"\ | ||||
|              "paddw "read_offset"+8(%%"REG_D"), %%mm5 \n\t"\ | ||||
|              "paddw "read_offset"(%%"FF_REG_D"), %%mm1   \n\t"\ | ||||
|              "paddw "read_offset"+8(%%"FF_REG_D"), %%mm5 \n\t"\ | ||||
|              "paddw %%mm3, %%mm1             \n\t"\ | ||||
|              "paddw %%mm3, %%mm5             \n\t"\ | ||||
|              "psraw $4, %%mm1                \n\t"\ | ||||
|              "psraw $4, %%mm5                \n\t"\ | ||||
|              "packuswb %%mm5, %%mm1          \n\t"\ | ||||
|              "movq %%mm1, "write_offset"(%%"REG_d") \n\t" | ||||
|              "movq %%mm1, "write_offset"(%%"FF_REG_d") \n\t" | ||||
|  | ||||
| #define snow_inner_add_yblock_mmx_end(s_step)\ | ||||
|              "add $"s_step", %%"REG_S"             \n\t"\ | ||||
|              "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\ | ||||
|              "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\ | ||||
|              "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\ | ||||
|              "add %%"REG_c", (%%"REG_a")     \n\t"\ | ||||
|              "add"OPSIZE " $"PTR_SIZE"*1, %1 \n\t"\ | ||||
|              "add %%"REG_c", %0              \n\t"\ | ||||
|              "add $"s_step", %%"FF_REG_S"                      \n\t"\ | ||||
|              "add %%"FF_REG_c", "FF_PTR_SIZE"*3(%%"FF_REG_a"); \n\t"\ | ||||
|              "add %%"FF_REG_c", "FF_PTR_SIZE"*2(%%"FF_REG_a"); \n\t"\ | ||||
|              "add %%"FF_REG_c", "FF_PTR_SIZE"*1(%%"FF_REG_a"); \n\t"\ | ||||
|              "add %%"FF_REG_c", (%%"FF_REG_a")                 \n\t"\ | ||||
|              "add"FF_OPSIZE " $"FF_PTR_SIZE"*1, %1             \n\t"\ | ||||
|              "add %%"FF_REG_c", %0                             \n\t"\ | ||||
|              "dec %2                         \n\t"\ | ||||
|              "jnz 1b                         \n\t"\ | ||||
|              :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\ | ||||
|              :\ | ||||
|              "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\ | ||||
|              "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d""); | ||||
|              "%"FF_REG_c"","%"FF_REG_S"","%"FF_REG_D"","%"FF_REG_d""); | ||||
|  | ||||
| static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h, | ||||
|                       int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | ||||
|   | ||||
| @@ -84,7 +84,7 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\ | ||||
| {\ | ||||
|     rnd = 8-rnd;\ | ||||
|     __asm__ volatile(\ | ||||
|         "mov       $8, %%"REG_c"           \n\t"\ | ||||
|         "mov       $8, %%"FF_REG_c"        \n\t"\ | ||||
|         LOAD_ROUNDER_MMX("%5")\ | ||||
|         "movq      "MANGLE(ff_pw_9)", %%mm6\n\t"\ | ||||
|         "1:                                \n\t"\ | ||||
| @@ -119,13 +119,13 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\ | ||||
|         "movq      %%mm3, (%1)             \n\t"\ | ||||
|         "add       %6, %0                  \n\t"\ | ||||
|         "add       %4, %1                  \n\t"\ | ||||
|         "dec       %%"REG_c"               \n\t"\ | ||||
|         "dec       %%"FF_REG_c"            \n\t"\ | ||||
|         "jnz 1b                            \n\t"\ | ||||
|         : "+r"(src),  "+r"(dst)\ | ||||
|         : "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\ | ||||
|           "g"(stride-offset)\ | ||||
|           NAMED_CONSTRAINTS_ADD(ff_pw_9)\ | ||||
|         : "%"REG_c, "memory"\ | ||||
|         : "%"FF_REG_c, "memory"\ | ||||
|     );\ | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -32,22 +32,22 @@ static void line_noise_mmx(uint8_t *dst, const uint8_t *src, | ||||
|     noise += shift; | ||||
|  | ||||
|     __asm__ volatile( | ||||
|             "mov %3, %%"REG_a"               \n\t" | ||||
|             "mov %3, %%"FF_REG_a"            \n\t" | ||||
|             "pcmpeqb %%mm7, %%mm7            \n\t" | ||||
|             "psllw $15, %%mm7                \n\t" | ||||
|             "packsswb %%mm7, %%mm7           \n\t" | ||||
|             ".p2align 4                      \n\t" | ||||
|             "1:                              \n\t" | ||||
|             "movq (%0, %%"REG_a"), %%mm0     \n\t" | ||||
|             "movq (%1, %%"REG_a"), %%mm1     \n\t" | ||||
|             "movq (%0, %%"FF_REG_a"), %%mm0  \n\t" | ||||
|             "movq (%1, %%"FF_REG_a"), %%mm1  \n\t" | ||||
|             "pxor %%mm7, %%mm0               \n\t" | ||||
|             "paddsb %%mm1, %%mm0             \n\t" | ||||
|             "pxor %%mm7, %%mm0               \n\t" | ||||
|             "movq %%mm0, (%2, %%"REG_a")     \n\t" | ||||
|             "add $8, %%"REG_a"               \n\t" | ||||
|             "movq %%mm0, (%2, %%"FF_REG_a")  \n\t" | ||||
|             "add $8, %%"FF_REG_a"            \n\t" | ||||
|             " js 1b                          \n\t" | ||||
|             :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) | ||||
|             : "%"REG_a | ||||
|             : "%"FF_REG_a | ||||
|     ); | ||||
|     if (mmx_len != len) | ||||
|         ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0); | ||||
| @@ -60,13 +60,13 @@ static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src, | ||||
|     x86_reg mmx_len = len & (~7); | ||||
|  | ||||
|     __asm__ volatile( | ||||
|             "mov %5, %%"REG_a"              \n\t" | ||||
|             "mov %5, %%"FF_REG_a"           \n\t" | ||||
|             ".p2align 4                     \n\t" | ||||
|             "1:                             \n\t" | ||||
|             "movq (%1, %%"REG_a"), %%mm1    \n\t" | ||||
|             "movq (%0, %%"REG_a"), %%mm0    \n\t" | ||||
|             "paddb (%2, %%"REG_a"), %%mm1   \n\t" | ||||
|             "paddb (%3, %%"REG_a"), %%mm1   \n\t" | ||||
|             "movq (%1, %%"FF_REG_a"), %%mm1 \n\t" | ||||
|             "movq (%0, %%"FF_REG_a"), %%mm0 \n\t" | ||||
|             "paddb (%2, %%"FF_REG_a"), %%mm1\n\t" | ||||
|             "paddb (%3, %%"FF_REG_a"), %%mm1\n\t" | ||||
|             "movq %%mm0, %%mm2              \n\t" | ||||
|             "movq %%mm1, %%mm3              \n\t" | ||||
|             "punpcklbw %%mm0, %%mm0         \n\t" | ||||
| @@ -82,12 +82,12 @@ static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src, | ||||
|             "psrlw $8, %%mm1                \n\t" | ||||
|             "psrlw $8, %%mm3                \n\t" | ||||
|             "packuswb %%mm3, %%mm1          \n\t" | ||||
|             "movq %%mm1, (%4, %%"REG_a")    \n\t" | ||||
|             "add $8, %%"REG_a"              \n\t" | ||||
|             "movq %%mm1, (%4, %%"FF_REG_a") \n\t" | ||||
|             "add $8, %%"FF_REG_a"           \n\t" | ||||
|             " js 1b                         \n\t" | ||||
|             :: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len), | ||||
|                "r" (dst+mmx_len), "g" (-mmx_len) | ||||
|             : "%"REG_a | ||||
|             : "%"FF_REG_a | ||||
|         ); | ||||
|  | ||||
|     if (mmx_len != len){ | ||||
| @@ -104,22 +104,22 @@ static void line_noise_mmxext(uint8_t *dst, const uint8_t *src, | ||||
|     noise += shift; | ||||
|  | ||||
|     __asm__ volatile( | ||||
|             "mov %3, %%"REG_a"                \n\t" | ||||
|             "mov %3, %%"FF_REG_a"             \n\t" | ||||
|             "pcmpeqb %%mm7, %%mm7             \n\t" | ||||
|             "psllw $15, %%mm7                 \n\t" | ||||
|             "packsswb %%mm7, %%mm7            \n\t" | ||||
|             ".p2align 4                       \n\t" | ||||
|             "1:                               \n\t" | ||||
|             "movq (%0, %%"REG_a"), %%mm0      \n\t" | ||||
|             "movq (%1, %%"REG_a"), %%mm1      \n\t" | ||||
|             "movq (%0, %%"FF_REG_a"), %%mm0   \n\t" | ||||
|             "movq (%1, %%"FF_REG_a"), %%mm1   \n\t" | ||||
|             "pxor %%mm7, %%mm0                \n\t" | ||||
|             "paddsb %%mm1, %%mm0              \n\t" | ||||
|             "pxor %%mm7, %%mm0                \n\t" | ||||
|             "movntq %%mm0, (%2, %%"REG_a")    \n\t" | ||||
|             "add $8, %%"REG_a"                \n\t" | ||||
|             "movntq %%mm0, (%2, %%"FF_REG_a") \n\t" | ||||
|             "add $8, %%"FF_REG_a"             \n\t" | ||||
|             " js 1b                           \n\t" | ||||
|             :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) | ||||
|             : "%"REG_a | ||||
|             : "%"FF_REG_a | ||||
|             ); | ||||
|     if (mmx_len != len) | ||||
|         ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0); | ||||
|   | ||||
| @@ -28,46 +28,46 @@ typedef struct xmm_reg { uint64_t a, b; } xmm_reg; | ||||
| typedef struct ymm_reg { uint64_t a, b, c, d; } ymm_reg; | ||||
|  | ||||
| #if ARCH_X86_64 | ||||
| #    define OPSIZE "q" | ||||
| #    define REG_a "rax" | ||||
| #    define REG_b "rbx" | ||||
| #    define REG_c "rcx" | ||||
| #    define REG_d "rdx" | ||||
| #    define REG_D "rdi" | ||||
| #    define REG_S "rsi" | ||||
| #    define PTR_SIZE "8" | ||||
| #    define FF_OPSIZE "q" | ||||
| #    define FF_REG_a "rax" | ||||
| #    define FF_REG_b "rbx" | ||||
| #    define FF_REG_c "rcx" | ||||
| #    define FF_REG_d "rdx" | ||||
| #    define FF_REG_D "rdi" | ||||
| #    define FF_REG_S "rsi" | ||||
| #    define FF_PTR_SIZE "8" | ||||
| typedef int64_t x86_reg; | ||||
|  | ||||
| /* REG_SP is defined in Solaris sys headers, so use REG_sp */ | ||||
| #    define REG_sp "rsp" | ||||
| #    define REG_BP "rbp" | ||||
| #    define REGBP   rbp | ||||
| #    define REGa    rax | ||||
| #    define REGb    rbx | ||||
| #    define REGc    rcx | ||||
| #    define REGd    rdx | ||||
| #    define REGSP   rsp | ||||
| /* FF_REG_SP is defined in Solaris sys headers, so use FF_REG_sp */ | ||||
| #    define FF_REG_sp "rsp" | ||||
| #    define FF_REG_BP "rbp" | ||||
| #    define FF_REGBP   rbp | ||||
| #    define FF_REGa    rax | ||||
| #    define FF_REGb    rbx | ||||
| #    define FF_REGc    rcx | ||||
| #    define FF_REGd    rdx | ||||
| #    define FF_REGSP   rsp | ||||
|  | ||||
| #elif ARCH_X86_32 | ||||
|  | ||||
| #    define OPSIZE "l" | ||||
| #    define REG_a "eax" | ||||
| #    define REG_b "ebx" | ||||
| #    define REG_c "ecx" | ||||
| #    define REG_d "edx" | ||||
| #    define REG_D "edi" | ||||
| #    define REG_S "esi" | ||||
| #    define PTR_SIZE "4" | ||||
| #    define FF_OPSIZE "l" | ||||
| #    define FF_REG_a "eax" | ||||
| #    define FF_REG_b "ebx" | ||||
| #    define FF_REG_c "ecx" | ||||
| #    define FF_REG_d "edx" | ||||
| #    define FF_REG_D "edi" | ||||
| #    define FF_REG_S "esi" | ||||
| #    define FF_PTR_SIZE "4" | ||||
| typedef int32_t x86_reg; | ||||
|  | ||||
| #    define REG_sp "esp" | ||||
| #    define REG_BP "ebp" | ||||
| #    define REGBP   ebp | ||||
| #    define REGa    eax | ||||
| #    define REGb    ebx | ||||
| #    define REGc    ecx | ||||
| #    define REGd    edx | ||||
| #    define REGSP   esp | ||||
| #    define FF_REG_sp "esp" | ||||
| #    define FF_REG_BP "ebp" | ||||
| #    define FF_REGBP   ebp | ||||
| #    define FF_REGa    eax | ||||
| #    define FF_REGb    ebx | ||||
| #    define FF_REGc    ecx | ||||
| #    define FF_REGd    edx | ||||
| #    define FF_REGSP   esp | ||||
| #else | ||||
| typedef int x86_reg; | ||||
| #endif | ||||
|   | ||||
| @@ -41,9 +41,9 @@ | ||||
| /* ebx saving is necessary for PIC. gcc seems unable to see it alone */ | ||||
| #define cpuid(index, eax, ebx, ecx, edx)                        \ | ||||
|     __asm__ volatile (                                          \ | ||||
|         "mov    %%"REG_b", %%"REG_S" \n\t"                      \ | ||||
|         "mov    %%"FF_REG_b", %%"FF_REG_S" \n\t"                \ | ||||
|         "cpuid                       \n\t"                      \ | ||||
|         "xchg   %%"REG_b", %%"REG_S                             \ | ||||
|         "xchg   %%"FF_REG_b", %%"FF_REG_S                       \ | ||||
|         : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx)        \ | ||||
|         : "0" (index), "2"(0)) | ||||
|  | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -55,9 +55,9 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, | ||||
|         "jmp                         9f                 \n\t" | ||||
|         // Begin | ||||
|         "0:                                             \n\t" | ||||
|         "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t" | ||||
|         "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t" | ||||
|         "movd   1(%%"REG_c", %%"REG_S"), %%mm1          \n\t" | ||||
|         "movq    (%%"FF_REG_d", %%"FF_REG_a"), %%mm3    \n\t" | ||||
|         "movd    (%%"FF_REG_c", %%"FF_REG_S"), %%mm0    \n\t" | ||||
|         "movd   1(%%"FF_REG_c", %%"FF_REG_S"), %%mm1    \n\t" | ||||
|         "punpcklbw                %%mm7, %%mm1          \n\t" | ||||
|         "punpcklbw                %%mm7, %%mm0          \n\t" | ||||
|         "pshufw                   $0xFF, %%mm1, %%mm1   \n\t" | ||||
| @@ -65,14 +65,14 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, | ||||
|         "pshufw                   $0xFF, %%mm0, %%mm0   \n\t" | ||||
|         "2:                                             \n\t" | ||||
|         "psubw                    %%mm1, %%mm0          \n\t" | ||||
|         "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t" | ||||
|         "movl   8(%%"FF_REG_b", %%"FF_REG_a"), %%esi    \n\t" | ||||
|         "pmullw                   %%mm3, %%mm0          \n\t" | ||||
|         "psllw                       $7, %%mm1          \n\t" | ||||
|         "paddw                    %%mm1, %%mm0          \n\t" | ||||
|  | ||||
|         "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t" | ||||
|         "movq                     %%mm0, (%%"FF_REG_D", %%"FF_REG_a") \n\t" | ||||
|  | ||||
|         "add                         $8, %%"REG_a"      \n\t" | ||||
|         "add                         $8, %%"FF_REG_a"   \n\t" | ||||
|         // End | ||||
|         "9:                                             \n\t" | ||||
|         "lea       " LOCAL_MANGLE(0b) ", %0             \n\t" | ||||
| @@ -94,22 +94,22 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, | ||||
|         "jmp                         9f                 \n\t" | ||||
|         // Begin | ||||
|         "0:                                             \n\t" | ||||
|         "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t" | ||||
|         "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t" | ||||
|         "movq    (%%"FF_REG_d", %%"FF_REG_a"), %%mm3    \n\t" | ||||
|         "movd    (%%"FF_REG_c", %%"FF_REG_S"), %%mm0    \n\t" | ||||
|         "punpcklbw                %%mm7, %%mm0          \n\t" | ||||
|         "pshufw                   $0xFF, %%mm0, %%mm1   \n\t" | ||||
|         "1:                                             \n\t" | ||||
|         "pshufw                   $0xFF, %%mm0, %%mm0   \n\t" | ||||
|         "2:                                             \n\t" | ||||
|         "psubw                    %%mm1, %%mm0          \n\t" | ||||
|         "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t" | ||||
|         "movl   8(%%"FF_REG_b", %%"FF_REG_a"), %%esi    \n\t" | ||||
|         "pmullw                   %%mm3, %%mm0          \n\t" | ||||
|         "psllw                       $7, %%mm1          \n\t" | ||||
|         "paddw                    %%mm1, %%mm0          \n\t" | ||||
|  | ||||
|         "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t" | ||||
|         "movq                     %%mm0, (%%"FF_REG_D", %%"FF_REG_a") \n\t" | ||||
|  | ||||
|         "add                         $8, %%"REG_a"      \n\t" | ||||
|         "add                         $8, %%"FF_REG_a"   \n\t" | ||||
|         // End | ||||
|         "9:                                             \n\t" | ||||
|         "lea       " LOCAL_MANGLE(0b) ", %0             \n\t" | ||||
| @@ -206,39 +206,39 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst, | ||||
|  | ||||
|     __asm__ volatile( | ||||
| #if ARCH_X86_64 | ||||
|         "mov               -8(%%rsp), %%"REG_a" \n\t" | ||||
|         "mov               %%"REG_a", %5        \n\t"  // retsave | ||||
|         "mov               -8(%%rsp), %%"FF_REG_a"    \n\t" | ||||
|         "mov            %%"FF_REG_a", %5              \n\t"  // retsave | ||||
| #else | ||||
| #if defined(PIC) | ||||
|         "mov               %%"REG_b", %5        \n\t"  // ebxsave | ||||
|         "mov            %%"FF_REG_b", %5              \n\t"  // ebxsave | ||||
| #endif | ||||
| #endif | ||||
|         "pxor                  %%mm7, %%mm7     \n\t" | ||||
|         "mov                      %0, %%"REG_c" \n\t" | ||||
|         "mov                      %1, %%"REG_D" \n\t" | ||||
|         "mov                      %2, %%"REG_d" \n\t" | ||||
|         "mov                      %3, %%"REG_b" \n\t" | ||||
|         "xor               %%"REG_a", %%"REG_a" \n\t" // i | ||||
|         PREFETCH"        (%%"REG_c")            \n\t" | ||||
|         PREFETCH"      32(%%"REG_c")            \n\t" | ||||
|         PREFETCH"      64(%%"REG_c")            \n\t" | ||||
|         "pxor                  %%mm7, %%mm7           \n\t" | ||||
|         "mov                      %0, %%"FF_REG_c"    \n\t" | ||||
|         "mov                      %1, %%"FF_REG_D"    \n\t" | ||||
|         "mov                      %2, %%"FF_REG_d"    \n\t" | ||||
|         "mov                      %3, %%"FF_REG_b"    \n\t" | ||||
|         "xor            %%"FF_REG_a", %%"FF_REG_a"    \n\t" // i | ||||
|         PREFETCH"      (%%"FF_REG_c")                 \n\t" | ||||
|         PREFETCH"    32(%%"FF_REG_c")                 \n\t" | ||||
|         PREFETCH"    64(%%"FF_REG_c")                 \n\t" | ||||
|  | ||||
| #if ARCH_X86_64 | ||||
| #define CALL_MMXEXT_FILTER_CODE \ | ||||
|         "movl            (%%"REG_b"), %%esi     \n\t"\ | ||||
|         "call                    *%4            \n\t"\ | ||||
|         "movl (%%"REG_b", %%"REG_a"), %%esi     \n\t"\ | ||||
|         "add               %%"REG_S", %%"REG_c" \n\t"\ | ||||
|         "add               %%"REG_a", %%"REG_D" \n\t"\ | ||||
|         "xor               %%"REG_a", %%"REG_a" \n\t"\ | ||||
|         "movl               (%%"FF_REG_b"), %%esi        \n\t"\ | ||||
|         "call                          *%4               \n\t"\ | ||||
|         "movl (%%"FF_REG_b", %%"FF_REG_a"), %%esi        \n\t"\ | ||||
|         "add                  %%"FF_REG_S", %%"FF_REG_c" \n\t"\ | ||||
|         "add                  %%"FF_REG_a", %%"FF_REG_D" \n\t"\ | ||||
|         "xor                  %%"FF_REG_a", %%"FF_REG_a" \n\t"\ | ||||
|  | ||||
| #else | ||||
| #define CALL_MMXEXT_FILTER_CODE \ | ||||
|         "movl (%%"REG_b"), %%esi        \n\t"\ | ||||
|         "call         *%4                       \n\t"\ | ||||
|         "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\ | ||||
|         "add               %%"REG_a", %%"REG_D" \n\t"\ | ||||
|         "xor               %%"REG_a", %%"REG_a" \n\t"\ | ||||
|         "movl               (%%"FF_REG_b"), %%esi        \n\t"\ | ||||
|         "call                          *%4               \n\t"\ | ||||
|         "addl (%%"FF_REG_b", %%"FF_REG_a"), %%"FF_REG_c" \n\t"\ | ||||
|         "add                  %%"FF_REG_a", %%"FF_REG_D" \n\t"\ | ||||
|         "xor                  %%"FF_REG_a", %%"FF_REG_a" \n\t"\ | ||||
|  | ||||
| #endif /* ARCH_X86_64 */ | ||||
|  | ||||
| @@ -252,11 +252,11 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst, | ||||
|         CALL_MMXEXT_FILTER_CODE | ||||
|  | ||||
| #if ARCH_X86_64 | ||||
|         "mov                      %5, %%"REG_a" \n\t" | ||||
|         "mov               %%"REG_a", -8(%%rsp) \n\t" | ||||
|         "mov                      %5, %%"FF_REG_a" \n\t" | ||||
|         "mov            %%"FF_REG_a", -8(%%rsp)    \n\t" | ||||
| #else | ||||
| #if defined(PIC) | ||||
|         "mov                      %5, %%"REG_b" \n\t" | ||||
|         "mov                      %5, %%"FF_REG_b" \n\t" | ||||
| #endif | ||||
| #endif | ||||
|         :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos), | ||||
| @@ -268,9 +268,9 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst, | ||||
|           ,"m" (ebxsave) | ||||
| #endif | ||||
| #endif | ||||
|         : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D | ||||
|         : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_D | ||||
| #if ARCH_X86_64 || !defined(PIC) | ||||
|          ,"%"REG_b | ||||
|          ,"%"FF_REG_b | ||||
| #endif | ||||
|     ); | ||||
|  | ||||
| @@ -295,33 +295,33 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2, | ||||
| #endif | ||||
|     __asm__ volatile( | ||||
| #if ARCH_X86_64 | ||||
|         "mov          -8(%%rsp), %%"REG_a"  \n\t" | ||||
|         "mov          %%"REG_a", %7         \n\t"  // retsave | ||||
|         "mov          -8(%%rsp), %%"FF_REG_a"    \n\t" | ||||
|         "mov       %%"FF_REG_a", %7              \n\t"  // retsave | ||||
| #else | ||||
| #if defined(PIC) | ||||
|         "mov          %%"REG_b", %7         \n\t"  // ebxsave | ||||
|         "mov       %%"FF_REG_b", %7              \n\t"  // ebxsave | ||||
| #endif | ||||
| #endif | ||||
|         "pxor             %%mm7, %%mm7      \n\t" | ||||
|         "mov                 %0, %%"REG_c"  \n\t" | ||||
|         "mov                 %1, %%"REG_D"  \n\t" | ||||
|         "mov                 %2, %%"REG_d"  \n\t" | ||||
|         "mov                 %3, %%"REG_b"  \n\t" | ||||
|         "xor          %%"REG_a", %%"REG_a"  \n\t" // i | ||||
|         PREFETCH"   (%%"REG_c")             \n\t" | ||||
|         PREFETCH" 32(%%"REG_c")             \n\t" | ||||
|         PREFETCH" 64(%%"REG_c")             \n\t" | ||||
|         "pxor             %%mm7, %%mm7           \n\t" | ||||
|         "mov                 %0, %%"FF_REG_c"    \n\t" | ||||
|         "mov                 %1, %%"FF_REG_D"    \n\t" | ||||
|         "mov                 %2, %%"FF_REG_d"    \n\t" | ||||
|         "mov                 %3, %%"FF_REG_b"    \n\t" | ||||
|         "xor          %%"FF_REG_a", %%"FF_REG_a" \n\t" // i | ||||
|         PREFETCH"   (%%"FF_REG_c")               \n\t" | ||||
|         PREFETCH" 32(%%"FF_REG_c")               \n\t" | ||||
|         PREFETCH" 64(%%"FF_REG_c")               \n\t" | ||||
|  | ||||
|         CALL_MMXEXT_FILTER_CODE | ||||
|         CALL_MMXEXT_FILTER_CODE | ||||
|         CALL_MMXEXT_FILTER_CODE | ||||
|         CALL_MMXEXT_FILTER_CODE | ||||
|         "xor          %%"REG_a", %%"REG_a"  \n\t" // i | ||||
|         "mov                 %5, %%"REG_c"  \n\t" // src2 | ||||
|         "mov                 %6, %%"REG_D"  \n\t" // dst2 | ||||
|         PREFETCH"   (%%"REG_c")             \n\t" | ||||
|         PREFETCH" 32(%%"REG_c")             \n\t" | ||||
|         PREFETCH" 64(%%"REG_c")             \n\t" | ||||
|         "xor          %%"FF_REG_a", %%"FF_REG_a" \n\t" // i | ||||
|         "mov                    %5, %%"FF_REG_c" \n\t" // src2 | ||||
|         "mov                    %6, %%"FF_REG_D" \n\t" // dst2 | ||||
|         PREFETCH"   (%%"FF_REG_c")               \n\t" | ||||
|         PREFETCH" 32(%%"FF_REG_c")               \n\t" | ||||
|         PREFETCH" 64(%%"FF_REG_c")               \n\t" | ||||
|  | ||||
|         CALL_MMXEXT_FILTER_CODE | ||||
|         CALL_MMXEXT_FILTER_CODE | ||||
| @@ -329,11 +329,11 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2, | ||||
|         CALL_MMXEXT_FILTER_CODE | ||||
|  | ||||
| #if ARCH_X86_64 | ||||
|         "mov                 %7, %%"REG_a"  \n\t" | ||||
|         "mov          %%"REG_a", -8(%%rsp)  \n\t" | ||||
|         "mov                    %7, %%"FF_REG_a" \n\t" | ||||
|         "mov          %%"FF_REG_a", -8(%%rsp)    \n\t" | ||||
| #else | ||||
| #if defined(PIC) | ||||
|         "mov %7, %%"REG_b"    \n\t" | ||||
|         "mov %7, %%"FF_REG_b"    \n\t" | ||||
| #endif | ||||
| #endif | ||||
|         :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos), | ||||
| @@ -345,9 +345,9 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2, | ||||
|           ,"m" (ebxsave) | ||||
| #endif | ||||
| #endif | ||||
|         : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D | ||||
|         : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_D | ||||
| #if ARCH_X86_64 || !defined(PIC) | ||||
|          ,"%"REG_b | ||||
|          ,"%"FF_REG_b | ||||
| #endif | ||||
|     ); | ||||
|  | ||||
|   | ||||
| @@ -1101,43 +1101,43 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr | ||||
|     unsigned i; | ||||
|     x86_reg mmx_size= 23 - src_size; | ||||
|     __asm__ volatile ( | ||||
|         "test             %%"REG_a", %%"REG_a"          \n\t" | ||||
|         "test             %%"FF_REG_a", %%"FF_REG_a"    \n\t" | ||||
|         "jns                     2f                     \n\t" | ||||
|         "movq     "MANGLE(mask24r)", %%mm5              \n\t" | ||||
|         "movq     "MANGLE(mask24g)", %%mm6              \n\t" | ||||
|         "movq     "MANGLE(mask24b)", %%mm7              \n\t" | ||||
|         ".p2align                 4                     \n\t" | ||||
|         "1:                                             \n\t" | ||||
|         PREFETCH" 32(%1, %%"REG_a")                     \n\t" | ||||
|         "movq       (%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG | ||||
|         "movq       (%1, %%"REG_a"), %%mm1              \n\t" // BGR BGR BG | ||||
|         "movq      2(%1, %%"REG_a"), %%mm2              \n\t" // R BGR BGR B | ||||
|         PREFETCH" 32(%1, %%"FF_REG_a")                  \n\t" | ||||
|         "movq    (%1, %%"FF_REG_a"), %%mm0              \n\t" // BGR BGR BG | ||||
|         "movq    (%1, %%"FF_REG_a"), %%mm1              \n\t" // BGR BGR BG | ||||
|         "movq   2(%1, %%"FF_REG_a"), %%mm2              \n\t" // R BGR BGR B | ||||
|         "psllq                  $16, %%mm0              \n\t" // 00 BGR BGR | ||||
|         "pand                 %%mm5, %%mm0              \n\t" | ||||
|         "pand                 %%mm6, %%mm1              \n\t" | ||||
|         "pand                 %%mm7, %%mm2              \n\t" | ||||
|         "por                  %%mm0, %%mm1              \n\t" | ||||
|         "por                  %%mm2, %%mm1              \n\t" | ||||
|         "movq      6(%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG | ||||
|         MOVNTQ"               %%mm1,   (%2, %%"REG_a")  \n\t" // RGB RGB RG | ||||
|         "movq      8(%1, %%"REG_a"), %%mm1              \n\t" // R BGR BGR B | ||||
|         "movq     10(%1, %%"REG_a"), %%mm2              \n\t" // GR BGR BGR | ||||
|         "movq   6(%1, %%"FF_REG_a"), %%mm0              \n\t" // BGR BGR BG | ||||
|         MOVNTQ"               %%mm1,(%2, %%"FF_REG_a")  \n\t" // RGB RGB RG | ||||
|         "movq   8(%1, %%"FF_REG_a"), %%mm1              \n\t" // R BGR BGR B | ||||
|         "movq  10(%1, %%"FF_REG_a"), %%mm2              \n\t" // GR BGR BGR | ||||
|         "pand                 %%mm7, %%mm0              \n\t" | ||||
|         "pand                 %%mm5, %%mm1              \n\t" | ||||
|         "pand                 %%mm6, %%mm2              \n\t" | ||||
|         "por                  %%mm0, %%mm1              \n\t" | ||||
|         "por                  %%mm2, %%mm1              \n\t" | ||||
|         "movq     14(%1, %%"REG_a"), %%mm0              \n\t" // R BGR BGR B | ||||
|         MOVNTQ"               %%mm1,  8(%2, %%"REG_a")  \n\t" // B RGB RGB R | ||||
|         "movq     16(%1, %%"REG_a"), %%mm1              \n\t" // GR BGR BGR | ||||
|         "movq     18(%1, %%"REG_a"), %%mm2              \n\t" // BGR BGR BG | ||||
|         "movq  14(%1, %%"FF_REG_a"), %%mm0              \n\t" // R BGR BGR B | ||||
|         MOVNTQ"               %%mm1, 8(%2, %%"FF_REG_a")\n\t" // B RGB RGB R | ||||
|         "movq  16(%1, %%"FF_REG_a"), %%mm1              \n\t" // GR BGR BGR | ||||
|         "movq  18(%1, %%"FF_REG_a"), %%mm2              \n\t" // BGR BGR BG | ||||
|         "pand                 %%mm6, %%mm0              \n\t" | ||||
|         "pand                 %%mm7, %%mm1              \n\t" | ||||
|         "pand                 %%mm5, %%mm2              \n\t" | ||||
|         "por                  %%mm0, %%mm1              \n\t" | ||||
|         "por                  %%mm2, %%mm1              \n\t" | ||||
|         MOVNTQ"               %%mm1, 16(%2, %%"REG_a")  \n\t" | ||||
|         "add                    $24, %%"REG_a"          \n\t" | ||||
|         MOVNTQ"               %%mm1, 16(%2, %%"FF_REG_a") \n\t" | ||||
|         "add                    $24, %%"FF_REG_a"       \n\t" | ||||
|         " js                     1b                     \n\t" | ||||
|         "2:                                             \n\t" | ||||
|         : "+a" (mmx_size) | ||||
| @@ -1173,20 +1173,20 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u | ||||
|     for (y=0; y<height; y++) { | ||||
|         //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) | ||||
|         __asm__ volatile( | ||||
|             "xor                 %%"REG_a", %%"REG_a"   \n\t" | ||||
|             "xor                 %%"FF_REG_a", %%"FF_REG_a" \n\t" | ||||
|             ".p2align                    4              \n\t" | ||||
|             "1:                                         \n\t" | ||||
|             PREFETCH"    32(%1, %%"REG_a", 2)           \n\t" | ||||
|             PREFETCH"    32(%2, %%"REG_a")              \n\t" | ||||
|             PREFETCH"    32(%3, %%"REG_a")              \n\t" | ||||
|             "movq          (%2, %%"REG_a"), %%mm0       \n\t" // U(0) | ||||
|             PREFETCH" 32(%1, %%"FF_REG_a", 2)           \n\t" | ||||
|             PREFETCH" 32(%2, %%"FF_REG_a")              \n\t" | ||||
|             PREFETCH" 32(%3, %%"FF_REG_a")              \n\t" | ||||
|             "movq       (%2, %%"FF_REG_a"), %%mm0       \n\t" // U(0) | ||||
|             "movq                    %%mm0, %%mm2       \n\t" // U(0) | ||||
|             "movq          (%3, %%"REG_a"), %%mm1       \n\t" // V(0) | ||||
|             "movq       (%3, %%"FF_REG_a"), %%mm1       \n\t" // V(0) | ||||
|             "punpcklbw               %%mm1, %%mm0       \n\t" // UVUV UVUV(0) | ||||
|             "punpckhbw               %%mm1, %%mm2       \n\t" // UVUV UVUV(8) | ||||
|  | ||||
|             "movq        (%1, %%"REG_a",2), %%mm3       \n\t" // Y(0) | ||||
|             "movq       8(%1, %%"REG_a",2), %%mm5       \n\t" // Y(8) | ||||
|             "movq     (%1, %%"FF_REG_a",2), %%mm3       \n\t" // Y(0) | ||||
|             "movq    8(%1, %%"FF_REG_a",2), %%mm5       \n\t" // Y(8) | ||||
|             "movq                    %%mm3, %%mm4       \n\t" // Y(0) | ||||
|             "movq                    %%mm5, %%mm6       \n\t" // Y(8) | ||||
|             "punpcklbw               %%mm0, %%mm3       \n\t" // YUYV YUYV(0) | ||||
| @@ -1194,16 +1194,16 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u | ||||
|             "punpcklbw               %%mm2, %%mm5       \n\t" // YUYV YUYV(8) | ||||
|             "punpckhbw               %%mm2, %%mm6       \n\t" // YUYV YUYV(12) | ||||
|  | ||||
|             MOVNTQ"                  %%mm3,   (%0, %%"REG_a", 4)    \n\t" | ||||
|             MOVNTQ"                  %%mm4,  8(%0, %%"REG_a", 4)    \n\t" | ||||
|             MOVNTQ"                  %%mm5, 16(%0, %%"REG_a", 4)    \n\t" | ||||
|             MOVNTQ"                  %%mm6, 24(%0, %%"REG_a", 4)    \n\t" | ||||
|             MOVNTQ"                  %%mm3,   (%0, %%"FF_REG_a", 4)    \n\t" | ||||
|             MOVNTQ"                  %%mm4,  8(%0, %%"FF_REG_a", 4)    \n\t" | ||||
|             MOVNTQ"                  %%mm5, 16(%0, %%"FF_REG_a", 4)    \n\t" | ||||
|             MOVNTQ"                  %%mm6, 24(%0, %%"FF_REG_a", 4)    \n\t" | ||||
|  | ||||
|             "add                        $8, %%"REG_a"   \n\t" | ||||
|             "cmp                        %4, %%"REG_a"   \n\t" | ||||
|             " jb                        1b              \n\t" | ||||
|             "add                        $8, %%"FF_REG_a" \n\t" | ||||
|             "cmp                        %4, %%"FF_REG_a" \n\t" | ||||
|             " jb                        1b               \n\t" | ||||
|             ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) | ||||
|             : "%"REG_a | ||||
|             : "%"FF_REG_a | ||||
|         ); | ||||
|         if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { | ||||
|             usrc += chromStride; | ||||
| @@ -1238,20 +1238,20 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u | ||||
|     for (y=0; y<height; y++) { | ||||
|         //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) | ||||
|         __asm__ volatile( | ||||
|             "xor                %%"REG_a", %%"REG_a"    \n\t" | ||||
|             "xor             %%"FF_REG_a", %%"FF_REG_a" \n\t" | ||||
|             ".p2align                   4               \n\t" | ||||
|             "1:                                         \n\t" | ||||
|             PREFETCH"   32(%1, %%"REG_a", 2)            \n\t" | ||||
|             PREFETCH"   32(%2, %%"REG_a")               \n\t" | ||||
|             PREFETCH"   32(%3, %%"REG_a")               \n\t" | ||||
|             "movq         (%2, %%"REG_a"), %%mm0        \n\t" // U(0) | ||||
|             PREFETCH" 32(%1, %%"FF_REG_a", 2)           \n\t" | ||||
|             PREFETCH" 32(%2, %%"FF_REG_a")              \n\t" | ||||
|             PREFETCH" 32(%3, %%"FF_REG_a")              \n\t" | ||||
|             "movq      (%2, %%"FF_REG_a"), %%mm0        \n\t" // U(0) | ||||
|             "movq                   %%mm0, %%mm2        \n\t" // U(0) | ||||
|             "movq         (%3, %%"REG_a"), %%mm1        \n\t" // V(0) | ||||
|             "movq      (%3, %%"FF_REG_a"), %%mm1        \n\t" // V(0) | ||||
|             "punpcklbw              %%mm1, %%mm0        \n\t" // UVUV UVUV(0) | ||||
|             "punpckhbw              %%mm1, %%mm2        \n\t" // UVUV UVUV(8) | ||||
|  | ||||
|             "movq       (%1, %%"REG_a",2), %%mm3        \n\t" // Y(0) | ||||
|             "movq      8(%1, %%"REG_a",2), %%mm5        \n\t" // Y(8) | ||||
|             "movq    (%1, %%"FF_REG_a",2), %%mm3        \n\t" // Y(0) | ||||
|             "movq   8(%1, %%"FF_REG_a",2), %%mm5        \n\t" // Y(8) | ||||
|             "movq                   %%mm0, %%mm4        \n\t" // Y(0) | ||||
|             "movq                   %%mm2, %%mm6        \n\t" // Y(8) | ||||
|             "punpcklbw              %%mm3, %%mm0        \n\t" // YUYV YUYV(0) | ||||
| @@ -1259,16 +1259,16 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u | ||||
|             "punpcklbw              %%mm5, %%mm2        \n\t" // YUYV YUYV(8) | ||||
|             "punpckhbw              %%mm5, %%mm6        \n\t" // YUYV YUYV(12) | ||||
|  | ||||
|             MOVNTQ"                 %%mm0,   (%0, %%"REG_a", 4)     \n\t" | ||||
|             MOVNTQ"                 %%mm4,  8(%0, %%"REG_a", 4)     \n\t" | ||||
|             MOVNTQ"                 %%mm2, 16(%0, %%"REG_a", 4)     \n\t" | ||||
|             MOVNTQ"                 %%mm6, 24(%0, %%"REG_a", 4)     \n\t" | ||||
|             MOVNTQ"                 %%mm0,   (%0, %%"FF_REG_a", 4)     \n\t" | ||||
|             MOVNTQ"                 %%mm4,  8(%0, %%"FF_REG_a", 4)     \n\t" | ||||
|             MOVNTQ"                 %%mm2, 16(%0, %%"FF_REG_a", 4)     \n\t" | ||||
|             MOVNTQ"                 %%mm6, 24(%0, %%"FF_REG_a", 4)     \n\t" | ||||
|  | ||||
|             "add                       $8, %%"REG_a"    \n\t" | ||||
|             "cmp                       %4, %%"REG_a"    \n\t" | ||||
|             "add                       $8, %%"FF_REG_a" \n\t" | ||||
|             "cmp                       %4, %%"FF_REG_a" \n\t" | ||||
|             " jb                       1b               \n\t" | ||||
|             ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) | ||||
|             : "%"REG_a | ||||
|             : "%"FF_REG_a | ||||
|         ); | ||||
|         if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { | ||||
|             usrc += chromStride; | ||||
| @@ -1326,14 +1326,14 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | ||||
|     const x86_reg chromWidth= width>>1; | ||||
|     for (y=0; y<height; y+=2) { | ||||
|         __asm__ volatile( | ||||
|             "xor                 %%"REG_a", %%"REG_a"   \n\t" | ||||
|             "xor              %%"FF_REG_a", %%"FF_REG_a"\n\t" | ||||
|             "pcmpeqw                 %%mm7, %%mm7       \n\t" | ||||
|             "psrlw                      $8, %%mm7       \n\t" // FF,00,FF,00... | ||||
|             ".p2align                    4              \n\t" | ||||
|             "1:                \n\t" | ||||
|             PREFETCH" 64(%0, %%"REG_a", 4)              \n\t" | ||||
|             "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0) | ||||
|             "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4) | ||||
|             PREFETCH" 64(%0, %%"FF_REG_a", 4)           \n\t" | ||||
|             "movq    (%0, %%"FF_REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0) | ||||
|             "movq   8(%0, %%"FF_REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4) | ||||
|             "movq                    %%mm0, %%mm2       \n\t" // YUYV YUYV(0) | ||||
|             "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(4) | ||||
|             "psrlw                      $8, %%mm0       \n\t" // U0V0 U0V0(0) | ||||
| @@ -1343,10 +1343,10 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | ||||
|             "packuswb                %%mm1, %%mm0       \n\t" // UVUV UVUV(0) | ||||
|             "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(0) | ||||
|  | ||||
|             MOVNTQ"                  %%mm2, (%1, %%"REG_a", 2)  \n\t" | ||||
|             MOVNTQ"                  %%mm2, (%1, %%"FF_REG_a", 2) \n\t" | ||||
|  | ||||
|             "movq     16(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(8) | ||||
|             "movq     24(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(12) | ||||
|             "movq  16(%0, %%"FF_REG_a", 4), %%mm1       \n\t" // YUYV YUYV(8) | ||||
|             "movq  24(%0, %%"FF_REG_a", 4), %%mm2       \n\t" // YUYV YUYV(12) | ||||
|             "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(8) | ||||
|             "movq                    %%mm2, %%mm4       \n\t" // YUYV YUYV(12) | ||||
|             "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(8) | ||||
| @@ -1356,7 +1356,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | ||||
|             "packuswb                %%mm2, %%mm1       \n\t" // UVUV UVUV(8) | ||||
|             "packuswb                %%mm4, %%mm3       \n\t" // YYYY YYYY(8) | ||||
|  | ||||
|             MOVNTQ"                  %%mm3, 8(%1, %%"REG_a", 2) \n\t" | ||||
|             MOVNTQ"                  %%mm3, 8(%1, %%"FF_REG_a", 2) \n\t" | ||||
|  | ||||
|             "movq                    %%mm0, %%mm2       \n\t" // UVUV UVUV(0) | ||||
|             "movq                    %%mm1, %%mm3       \n\t" // UVUV UVUV(8) | ||||
| @@ -1367,28 +1367,28 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | ||||
|             "packuswb                %%mm1, %%mm0       \n\t" // VVVV VVVV(0) | ||||
|             "packuswb                %%mm3, %%mm2       \n\t" // UUUU UUUU(0) | ||||
|  | ||||
|             MOVNTQ"                  %%mm0, (%3, %%"REG_a")     \n\t" | ||||
|             MOVNTQ"                  %%mm2, (%2, %%"REG_a")     \n\t" | ||||
|             MOVNTQ"                  %%mm0, (%3, %%"FF_REG_a")     \n\t" | ||||
|             MOVNTQ"                  %%mm2, (%2, %%"FF_REG_a")     \n\t" | ||||
|  | ||||
|             "add                        $8, %%"REG_a"   \n\t" | ||||
|             "cmp                        %4, %%"REG_a"   \n\t" | ||||
|             " jb                        1b              \n\t" | ||||
|             "add                        $8, %%"FF_REG_a" \n\t" | ||||
|             "cmp                        %4, %%"FF_REG_a" \n\t" | ||||
|             " jb                        1b               \n\t" | ||||
|             ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) | ||||
|             : "memory", "%"REG_a | ||||
|             : "memory", "%"FF_REG_a | ||||
|         ); | ||||
|  | ||||
|         ydst += lumStride; | ||||
|         src  += srcStride; | ||||
|  | ||||
|         __asm__ volatile( | ||||
|             "xor                 %%"REG_a", %%"REG_a"   \n\t" | ||||
|             "xor              %%"FF_REG_a", %%"FF_REG_a"\n\t" | ||||
|             ".p2align                    4              \n\t" | ||||
|             "1:                                         \n\t" | ||||
|             PREFETCH" 64(%0, %%"REG_a", 4)              \n\t" | ||||
|             "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0) | ||||
|             "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4) | ||||
|             "movq     16(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(8) | ||||
|             "movq     24(%0, %%"REG_a", 4), %%mm3       \n\t" // YUYV YUYV(12) | ||||
|             PREFETCH" 64(%0, %%"FF_REG_a", 4)           \n\t" | ||||
|             "movq    (%0, %%"FF_REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0) | ||||
|             "movq   8(%0, %%"FF_REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4) | ||||
|             "movq  16(%0, %%"FF_REG_a", 4), %%mm2       \n\t" // YUYV YUYV(8) | ||||
|             "movq  24(%0, %%"FF_REG_a", 4), %%mm3       \n\t" // YUYV YUYV(12) | ||||
|             "pand                    %%mm7, %%mm0       \n\t" // Y0Y0 Y0Y0(0) | ||||
|             "pand                    %%mm7, %%mm1       \n\t" // Y0Y0 Y0Y0(4) | ||||
|             "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(8) | ||||
| @@ -1396,15 +1396,15 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | ||||
|             "packuswb                %%mm1, %%mm0       \n\t" // YYYY YYYY(0) | ||||
|             "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(8) | ||||
|  | ||||
|             MOVNTQ"                  %%mm0,  (%1, %%"REG_a", 2) \n\t" | ||||
|             MOVNTQ"                  %%mm2, 8(%1, %%"REG_a", 2) \n\t" | ||||
|             MOVNTQ"                  %%mm0,  (%1, %%"FF_REG_a", 2) \n\t" | ||||
|             MOVNTQ"                  %%mm2, 8(%1, %%"FF_REG_a", 2) \n\t" | ||||
|  | ||||
|             "add                        $8, %%"REG_a"   \n\t" | ||||
|             "cmp                        %4, %%"REG_a"   \n\t" | ||||
|             "add                        $8, %%"FF_REG_a"\n\t" | ||||
|             "cmp                        %4, %%"FF_REG_a"\n\t" | ||||
|             " jb                        1b              \n\t" | ||||
|  | ||||
|             ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) | ||||
|             : "memory", "%"REG_a | ||||
|             : "memory", "%"FF_REG_a | ||||
|         ); | ||||
|         udst += chromStride; | ||||
|         vdst += chromStride; | ||||
| @@ -1438,23 +1438,23 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid | ||||
|  | ||||
|         if (mmxSize) { | ||||
|         __asm__ volatile( | ||||
|             "mov           %4, %%"REG_a"            \n\t" | ||||
|             "mov                       %4, %%"FF_REG_a" \n\t" | ||||
|             "movq        "MANGLE(mmx_ff)", %%mm0    \n\t" | ||||
|             "movq         (%0, %%"REG_a"), %%mm4    \n\t" | ||||
|             "movq      (%0, %%"FF_REG_a"), %%mm4    \n\t" | ||||
|             "movq                   %%mm4, %%mm2    \n\t" | ||||
|             "psllq                     $8, %%mm4    \n\t" | ||||
|             "pand                   %%mm0, %%mm2    \n\t" | ||||
|             "por                    %%mm2, %%mm4    \n\t" | ||||
|             "movq         (%1, %%"REG_a"), %%mm5    \n\t" | ||||
|             "movq      (%1, %%"FF_REG_a"), %%mm5    \n\t" | ||||
|             "movq                   %%mm5, %%mm3    \n\t" | ||||
|             "psllq                     $8, %%mm5    \n\t" | ||||
|             "pand                   %%mm0, %%mm3    \n\t" | ||||
|             "por                    %%mm3, %%mm5    \n\t" | ||||
|             "1:                                     \n\t" | ||||
|             "movq         (%0, %%"REG_a"), %%mm0    \n\t" | ||||
|             "movq         (%1, %%"REG_a"), %%mm1    \n\t" | ||||
|             "movq        1(%0, %%"REG_a"), %%mm2    \n\t" | ||||
|             "movq        1(%1, %%"REG_a"), %%mm3    \n\t" | ||||
|             "movq      (%0, %%"FF_REG_a"), %%mm0    \n\t" | ||||
|             "movq      (%1, %%"FF_REG_a"), %%mm1    \n\t" | ||||
|             "movq     1(%0, %%"FF_REG_a"), %%mm2    \n\t" | ||||
|             "movq     1(%1, %%"FF_REG_a"), %%mm3    \n\t" | ||||
|             PAVGB"                  %%mm0, %%mm5    \n\t" | ||||
|             PAVGB"                  %%mm0, %%mm3    \n\t" | ||||
|             PAVGB"                  %%mm0, %%mm5    \n\t" | ||||
| @@ -1469,19 +1469,19 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid | ||||
|             "punpckhbw              %%mm3, %%mm7    \n\t" | ||||
|             "punpcklbw              %%mm2, %%mm4    \n\t" | ||||
|             "punpckhbw              %%mm2, %%mm6    \n\t" | ||||
|             MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t" | ||||
|             MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t" | ||||
|             MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t" | ||||
|             MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t" | ||||
|             "add                       $8, %%"REG_a"            \n\t" | ||||
|             "movq       -1(%0, %%"REG_a"), %%mm4    \n\t" | ||||
|             "movq       -1(%1, %%"REG_a"), %%mm5    \n\t" | ||||
|             " js                       1b                       \n\t" | ||||
|             MOVNTQ"                 %%mm5,  (%2, %%"FF_REG_a", 2)  \n\t" | ||||
|             MOVNTQ"                 %%mm7, 8(%2, %%"FF_REG_a", 2)  \n\t" | ||||
|             MOVNTQ"                 %%mm4,  (%3, %%"FF_REG_a", 2)  \n\t" | ||||
|             MOVNTQ"                 %%mm6, 8(%3, %%"FF_REG_a", 2)  \n\t" | ||||
|             "add                       $8, %%"FF_REG_a"            \n\t" | ||||
|             "movq    -1(%0, %%"FF_REG_a"), %%mm4    \n\t" | ||||
|             "movq    -1(%1, %%"FF_REG_a"), %%mm5    \n\t" | ||||
|             " js                       1b           \n\t" | ||||
|             :: "r" (src + mmxSize  ), "r" (src + srcStride + mmxSize  ), | ||||
|                "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), | ||||
|                "g" (-mmxSize) | ||||
|                NAMED_CONSTRAINTS_ADD(mmx_ff) | ||||
|             : "%"REG_a | ||||
|             : "%"FF_REG_a | ||||
|         ); | ||||
|         } else { | ||||
|             mmxSize = 1; | ||||
| @@ -1532,14 +1532,14 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | ||||
|     const x86_reg chromWidth= width>>1; | ||||
|     for (y=0; y<height; y+=2) { | ||||
|         __asm__ volatile( | ||||
|             "xor                 %%"REG_a", %%"REG_a"   \n\t" | ||||
|             "xor          %%"FF_REG_a", %%"FF_REG_a" \n\t" | ||||
|             "pcmpeqw             %%mm7, %%mm7   \n\t" | ||||
|             "psrlw                  $8, %%mm7   \n\t" // FF,00,FF,00... | ||||
|             ".p2align                4          \n\t" | ||||
|             "1:                                 \n\t" | ||||
|             PREFETCH" 64(%0, %%"REG_a", 4)          \n\t" | ||||
|             "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // UYVY UYVY(0) | ||||
|             "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(4) | ||||
|             PREFETCH" 64(%0, %%"FF_REG_a", 4)          \n\t" | ||||
|             "movq       (%0, %%"FF_REG_a", 4), %%mm0   \n\t" // UYVY UYVY(0) | ||||
|             "movq      8(%0, %%"FF_REG_a", 4), %%mm1   \n\t" // UYVY UYVY(4) | ||||
|             "movq                %%mm0, %%mm2   \n\t" // UYVY UYVY(0) | ||||
|             "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(4) | ||||
|             "pand                %%mm7, %%mm0   \n\t" // U0V0 U0V0(0) | ||||
| @@ -1549,10 +1549,10 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | ||||
|             "packuswb            %%mm1, %%mm0   \n\t" // UVUV UVUV(0) | ||||
|             "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(0) | ||||
|  | ||||
|             MOVNTQ"              %%mm2,  (%1, %%"REG_a", 2) \n\t" | ||||
|             MOVNTQ"              %%mm2,  (%1, %%"FF_REG_a", 2) \n\t" | ||||
|  | ||||
|             "movq     16(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(8) | ||||
|             "movq     24(%0, %%"REG_a", 4), %%mm2   \n\t" // UYVY UYVY(12) | ||||
|             "movq     16(%0, %%"FF_REG_a", 4), %%mm1   \n\t" // UYVY UYVY(8) | ||||
|             "movq     24(%0, %%"FF_REG_a", 4), %%mm2   \n\t" // UYVY UYVY(12) | ||||
|             "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(8) | ||||
|             "movq                %%mm2, %%mm4   \n\t" // UYVY UYVY(12) | ||||
|             "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(8) | ||||
| @@ -1562,7 +1562,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | ||||
|             "packuswb            %%mm2, %%mm1   \n\t" // UVUV UVUV(8) | ||||
|             "packuswb            %%mm4, %%mm3   \n\t" // YYYY YYYY(8) | ||||
|  | ||||
|             MOVNTQ"              %%mm3, 8(%1, %%"REG_a", 2) \n\t" | ||||
|             MOVNTQ"              %%mm3, 8(%1, %%"FF_REG_a", 2) \n\t" | ||||
|  | ||||
|             "movq                %%mm0, %%mm2   \n\t" // UVUV UVUV(0) | ||||
|             "movq                %%mm1, %%mm3   \n\t" // UVUV UVUV(8) | ||||
| @@ -1573,28 +1573,28 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | ||||
|             "packuswb            %%mm1, %%mm0   \n\t" // VVVV VVVV(0) | ||||
|             "packuswb            %%mm3, %%mm2   \n\t" // UUUU UUUU(0) | ||||
|  | ||||
|             MOVNTQ"              %%mm0, (%3, %%"REG_a") \n\t" | ||||
|             MOVNTQ"              %%mm2, (%2, %%"REG_a") \n\t" | ||||
|             MOVNTQ"              %%mm0, (%3, %%"FF_REG_a") \n\t" | ||||
|             MOVNTQ"              %%mm2, (%2, %%"FF_REG_a") \n\t" | ||||
|  | ||||
|             "add                    $8, %%"REG_a"   \n\t" | ||||
|             "cmp                    %4, %%"REG_a"   \n\t" | ||||
|             " jb                    1b          \n\t" | ||||
|             "add                    $8, %%"FF_REG_a" \n\t" | ||||
|             "cmp                    %4, %%"FF_REG_a" \n\t" | ||||
|             " jb                    1b               \n\t" | ||||
|             ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) | ||||
|             : "memory", "%"REG_a | ||||
|             : "memory", "%"FF_REG_a | ||||
|         ); | ||||
|  | ||||
|         ydst += lumStride; | ||||
|         src  += srcStride; | ||||
|  | ||||
|         __asm__ volatile( | ||||
|             "xor                 %%"REG_a", %%"REG_a"   \n\t" | ||||
|             ".p2align                    4              \n\t" | ||||
|             "1:                                 \n\t" | ||||
|             PREFETCH" 64(%0, %%"REG_a", 4)          \n\t" | ||||
|             "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // YUYV YUYV(0) | ||||
|             "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // YUYV YUYV(4) | ||||
|             "movq     16(%0, %%"REG_a", 4), %%mm2   \n\t" // YUYV YUYV(8) | ||||
|             "movq     24(%0, %%"REG_a", 4), %%mm3   \n\t" // YUYV YUYV(12) | ||||
|             "xor          %%"FF_REG_a", %%"FF_REG_a"  \n\t" | ||||
|             ".p2align                4                \n\t" | ||||
|             "1:                                       \n\t" | ||||
|             PREFETCH" 64(%0, %%"FF_REG_a", 4)         \n\t" | ||||
|             "movq       (%0, %%"FF_REG_a", 4), %%mm0  \n\t" // YUYV YUYV(0) | ||||
|             "movq      8(%0, %%"FF_REG_a", 4), %%mm1  \n\t" // YUYV YUYV(4) | ||||
|             "movq     16(%0, %%"FF_REG_a", 4), %%mm2  \n\t" // YUYV YUYV(8) | ||||
|             "movq     24(%0, %%"FF_REG_a", 4), %%mm3  \n\t" // YUYV YUYV(12) | ||||
|             "psrlw                  $8, %%mm0   \n\t" // Y0Y0 Y0Y0(0) | ||||
|             "psrlw                  $8, %%mm1   \n\t" // Y0Y0 Y0Y0(4) | ||||
|             "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(8) | ||||
| @@ -1602,15 +1602,15 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | ||||
|             "packuswb            %%mm1, %%mm0   \n\t" // YYYY YYYY(0) | ||||
|             "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(8) | ||||
|  | ||||
|             MOVNTQ"              %%mm0,  (%1, %%"REG_a", 2) \n\t" | ||||
|             MOVNTQ"              %%mm2, 8(%1, %%"REG_a", 2) \n\t" | ||||
|             MOVNTQ"              %%mm0,  (%1, %%"FF_REG_a", 2) \n\t" | ||||
|             MOVNTQ"              %%mm2, 8(%1, %%"FF_REG_a", 2) \n\t" | ||||
|  | ||||
|             "add                    $8, %%"REG_a"   \n\t" | ||||
|             "cmp                    %4, %%"REG_a"   \n\t" | ||||
|             " jb                    1b          \n\t" | ||||
|             "add                    $8, %%"FF_REG_a" \n\t" | ||||
|             "cmp                    %4, %%"FF_REG_a" \n\t" | ||||
|             " jb                    1b               \n\t" | ||||
|  | ||||
|             ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) | ||||
|             : "memory", "%"REG_a | ||||
|             : "memory", "%"FF_REG_a | ||||
|         ); | ||||
|         udst += chromStride; | ||||
|         vdst += chromStride; | ||||
| @@ -1655,20 +1655,20 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | ||||
|         int i; | ||||
|         for (i=0; i<2; i++) { | ||||
|             __asm__ volatile( | ||||
|                 "mov                        %2, %%"REG_a"   \n\t" | ||||
|                 "mov                        %2, %%"FF_REG_a"\n\t" | ||||
|                 "movq          "BGR2Y_IDX"(%3), %%mm6       \n\t" | ||||
|                 "movq       "MANGLE(ff_w1111)", %%mm5       \n\t" | ||||
|                 "pxor                    %%mm7, %%mm7       \n\t" | ||||
|                 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t" | ||||
|                 "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d" \n\t" | ||||
|                 ".p2align                    4              \n\t" | ||||
|                 "1:                                         \n\t" | ||||
|                 PREFETCH"    64(%0, %%"REG_d")              \n\t" | ||||
|                 "movd          (%0, %%"REG_d"), %%mm0       \n\t" | ||||
|                 "movd         3(%0, %%"REG_d"), %%mm1       \n\t" | ||||
|                 PREFETCH" 64(%0, %%"FF_REG_d")              \n\t" | ||||
|                 "movd       (%0, %%"FF_REG_d"), %%mm0       \n\t" | ||||
|                 "movd      3(%0, %%"FF_REG_d"), %%mm1       \n\t" | ||||
|                 "punpcklbw               %%mm7, %%mm0       \n\t" | ||||
|                 "punpcklbw               %%mm7, %%mm1       \n\t" | ||||
|                 "movd         6(%0, %%"REG_d"), %%mm2       \n\t" | ||||
|                 "movd         9(%0, %%"REG_d"), %%mm3       \n\t" | ||||
|                 "movd      6(%0, %%"FF_REG_d"), %%mm2       \n\t" | ||||
|                 "movd      9(%0, %%"FF_REG_d"), %%mm3       \n\t" | ||||
|                 "punpcklbw               %%mm7, %%mm2       \n\t" | ||||
|                 "punpcklbw               %%mm7, %%mm3       \n\t" | ||||
|                 "pmaddwd                 %%mm6, %%mm0       \n\t" | ||||
| @@ -1686,12 +1686,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | ||||
|                 "packssdw                %%mm2, %%mm0       \n\t" | ||||
|                 "psraw                      $7, %%mm0       \n\t" | ||||
|  | ||||
|                 "movd        12(%0, %%"REG_d"), %%mm4       \n\t" | ||||
|                 "movd        15(%0, %%"REG_d"), %%mm1       \n\t" | ||||
|                 "movd     12(%0, %%"FF_REG_d"), %%mm4       \n\t" | ||||
|                 "movd     15(%0, %%"FF_REG_d"), %%mm1       \n\t" | ||||
|                 "punpcklbw               %%mm7, %%mm4       \n\t" | ||||
|                 "punpcklbw               %%mm7, %%mm1       \n\t" | ||||
|                 "movd        18(%0, %%"REG_d"), %%mm2       \n\t" | ||||
|                 "movd        21(%0, %%"REG_d"), %%mm3       \n\t" | ||||
|                 "movd     18(%0, %%"FF_REG_d"), %%mm2       \n\t" | ||||
|                 "movd     21(%0, %%"FF_REG_d"), %%mm3       \n\t" | ||||
|                 "punpcklbw               %%mm7, %%mm2       \n\t" | ||||
|                 "punpcklbw               %%mm7, %%mm3       \n\t" | ||||
|                 "pmaddwd                 %%mm6, %%mm4       \n\t" | ||||
| @@ -1706,40 +1706,40 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | ||||
|                 "packssdw                %%mm3, %%mm2       \n\t" | ||||
|                 "pmaddwd                 %%mm5, %%mm4       \n\t" | ||||
|                 "pmaddwd                 %%mm5, %%mm2       \n\t" | ||||
|                 "add                       $24, %%"REG_d"   \n\t" | ||||
|                 "add                       $24, %%"FF_REG_d"\n\t" | ||||
|                 "packssdw                %%mm2, %%mm4       \n\t" | ||||
|                 "psraw                      $7, %%mm4       \n\t" | ||||
|  | ||||
|                 "packuswb                %%mm4, %%mm0       \n\t" | ||||
|                 "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0    \n\t" | ||||
|  | ||||
|                 MOVNTQ"                  %%mm0, (%1, %%"REG_a") \n\t" | ||||
|                 "add                        $8,      %%"REG_a"  \n\t" | ||||
|                 " js                        1b                  \n\t" | ||||
|                 MOVNTQ"                  %%mm0, (%1, %%"FF_REG_a") \n\t" | ||||
|                 "add                        $8,      %%"FF_REG_a"  \n\t" | ||||
|                 " js                        1b                     \n\t" | ||||
|                 : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv) | ||||
|                   NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2YOffset) | ||||
|                 : "%"REG_a, "%"REG_d | ||||
|                 : "%"FF_REG_a, "%"FF_REG_d | ||||
|             ); | ||||
|             ydst += lumStride; | ||||
|             src  += srcStride; | ||||
|         } | ||||
|         src -= srcStride*2; | ||||
|         __asm__ volatile( | ||||
|             "mov                        %4, %%"REG_a"   \n\t" | ||||
|             "mov                        %4, %%"FF_REG_a"\n\t" | ||||
|             "movq       "MANGLE(ff_w1111)", %%mm5       \n\t" | ||||
|             "movq          "BGR2U_IDX"(%5), %%mm6       \n\t" | ||||
|             "pxor                    %%mm7, %%mm7       \n\t" | ||||
|             "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t" | ||||
|             "add                 %%"REG_d", %%"REG_d"   \n\t" | ||||
|             "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d" \n\t" | ||||
|             "add              %%"FF_REG_d", %%"FF_REG_d"\n\t" | ||||
|             ".p2align                    4              \n\t" | ||||
|             "1:                                         \n\t" | ||||
|             PREFETCH"    64(%0, %%"REG_d")              \n\t" | ||||
|             PREFETCH"    64(%1, %%"REG_d")              \n\t" | ||||
|             PREFETCH" 64(%0, %%"FF_REG_d")              \n\t" | ||||
|             PREFETCH" 64(%1, %%"FF_REG_d")              \n\t" | ||||
| #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW | ||||
|             "movq          (%0, %%"REG_d"), %%mm0       \n\t" | ||||
|             "movq          (%1, %%"REG_d"), %%mm1       \n\t" | ||||
|             "movq         6(%0, %%"REG_d"), %%mm2       \n\t" | ||||
|             "movq         6(%1, %%"REG_d"), %%mm3       \n\t" | ||||
|             "movq       (%0, %%"FF_REG_d"), %%mm0       \n\t" | ||||
|             "movq       (%1, %%"FF_REG_d"), %%mm1       \n\t" | ||||
|             "movq      6(%0, %%"FF_REG_d"), %%mm2       \n\t" | ||||
|             "movq      6(%1, %%"FF_REG_d"), %%mm3       \n\t" | ||||
|             PAVGB"                   %%mm1, %%mm0       \n\t" | ||||
|             PAVGB"                   %%mm3, %%mm2       \n\t" | ||||
|             "movq                    %%mm0, %%mm1       \n\t" | ||||
| @@ -1751,10 +1751,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | ||||
|             "punpcklbw               %%mm7, %%mm0       \n\t" | ||||
|             "punpcklbw               %%mm7, %%mm2       \n\t" | ||||
| #else | ||||
|             "movd          (%0, %%"REG_d"), %%mm0       \n\t" | ||||
|             "movd          (%1, %%"REG_d"), %%mm1       \n\t" | ||||
|             "movd         3(%0, %%"REG_d"), %%mm2       \n\t" | ||||
|             "movd         3(%1, %%"REG_d"), %%mm3       \n\t" | ||||
|             "movd       (%0, %%"FF_REG_d"), %%mm0       \n\t" | ||||
|             "movd       (%1, %%"FF_REG_d"), %%mm1       \n\t" | ||||
|             "movd      3(%0, %%"FF_REG_d"), %%mm2       \n\t" | ||||
|             "movd      3(%1, %%"FF_REG_d"), %%mm3       \n\t" | ||||
|             "punpcklbw               %%mm7, %%mm0       \n\t" | ||||
|             "punpcklbw               %%mm7, %%mm1       \n\t" | ||||
|             "punpcklbw               %%mm7, %%mm2       \n\t" | ||||
| @@ -1762,10 +1762,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | ||||
|             "paddw                   %%mm1, %%mm0       \n\t" | ||||
|             "paddw                   %%mm3, %%mm2       \n\t" | ||||
|             "paddw                   %%mm2, %%mm0       \n\t" | ||||
|             "movd         6(%0, %%"REG_d"), %%mm4       \n\t" | ||||
|             "movd         6(%1, %%"REG_d"), %%mm1       \n\t" | ||||
|             "movd         9(%0, %%"REG_d"), %%mm2       \n\t" | ||||
|             "movd         9(%1, %%"REG_d"), %%mm3       \n\t" | ||||
|             "movd      6(%0, %%"FF_REG_d"), %%mm4       \n\t" | ||||
|             "movd      6(%1, %%"FF_REG_d"), %%mm1       \n\t" | ||||
|             "movd      9(%0, %%"FF_REG_d"), %%mm2       \n\t" | ||||
|             "movd      9(%1, %%"FF_REG_d"), %%mm3       \n\t" | ||||
|             "punpcklbw               %%mm7, %%mm4       \n\t" | ||||
|             "punpcklbw               %%mm7, %%mm1       \n\t" | ||||
|             "punpcklbw               %%mm7, %%mm2       \n\t" | ||||
| @@ -1795,10 +1795,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | ||||
|             "psraw                      $7, %%mm0       \n\t" | ||||
|  | ||||
| #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW | ||||
|             "movq        12(%0, %%"REG_d"), %%mm4       \n\t" | ||||
|             "movq        12(%1, %%"REG_d"), %%mm1       \n\t" | ||||
|             "movq        18(%0, %%"REG_d"), %%mm2       \n\t" | ||||
|             "movq        18(%1, %%"REG_d"), %%mm3       \n\t" | ||||
|             "movq     12(%0, %%"FF_REG_d"), %%mm4       \n\t" | ||||
|             "movq     12(%1, %%"FF_REG_d"), %%mm1       \n\t" | ||||
|             "movq     18(%0, %%"FF_REG_d"), %%mm2       \n\t" | ||||
|             "movq     18(%1, %%"FF_REG_d"), %%mm3       \n\t" | ||||
|             PAVGB"                   %%mm1, %%mm4       \n\t" | ||||
|             PAVGB"                   %%mm3, %%mm2       \n\t" | ||||
|             "movq                    %%mm4, %%mm1       \n\t" | ||||
| @@ -1810,10 +1810,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | ||||
|             "punpcklbw               %%mm7, %%mm4       \n\t" | ||||
|             "punpcklbw               %%mm7, %%mm2       \n\t" | ||||
| #else | ||||
|             "movd        12(%0, %%"REG_d"), %%mm4       \n\t" | ||||
|             "movd        12(%1, %%"REG_d"), %%mm1       \n\t" | ||||
|             "movd        15(%0, %%"REG_d"), %%mm2       \n\t" | ||||
|             "movd        15(%1, %%"REG_d"), %%mm3       \n\t" | ||||
|             "movd     12(%0, %%"FF_REG_d"), %%mm4       \n\t" | ||||
|             "movd     12(%1, %%"FF_REG_d"), %%mm1       \n\t" | ||||
|             "movd     15(%0, %%"FF_REG_d"), %%mm2       \n\t" | ||||
|             "movd     15(%1, %%"FF_REG_d"), %%mm3       \n\t" | ||||
|             "punpcklbw               %%mm7, %%mm4       \n\t" | ||||
|             "punpcklbw               %%mm7, %%mm1       \n\t" | ||||
|             "punpcklbw               %%mm7, %%mm2       \n\t" | ||||
| @@ -1821,10 +1821,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | ||||
|             "paddw                   %%mm1, %%mm4       \n\t" | ||||
|             "paddw                   %%mm3, %%mm2       \n\t" | ||||
|             "paddw                   %%mm2, %%mm4       \n\t" | ||||
|             "movd        18(%0, %%"REG_d"), %%mm5       \n\t" | ||||
|             "movd        18(%1, %%"REG_d"), %%mm1       \n\t" | ||||
|             "movd        21(%0, %%"REG_d"), %%mm2       \n\t" | ||||
|             "movd        21(%1, %%"REG_d"), %%mm3       \n\t" | ||||
|             "movd     18(%0, %%"FF_REG_d"), %%mm5       \n\t" | ||||
|             "movd     18(%1, %%"FF_REG_d"), %%mm1       \n\t" | ||||
|             "movd     21(%0, %%"FF_REG_d"), %%mm2       \n\t" | ||||
|             "movd     21(%1, %%"FF_REG_d"), %%mm3       \n\t" | ||||
|             "punpcklbw               %%mm7, %%mm5       \n\t" | ||||
|             "punpcklbw               %%mm7, %%mm1       \n\t" | ||||
|             "punpcklbw               %%mm7, %%mm2       \n\t" | ||||
| @@ -1851,7 +1851,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | ||||
|             "packssdw                %%mm3, %%mm1       \n\t" | ||||
|             "pmaddwd                 %%mm5, %%mm4       \n\t" | ||||
|             "pmaddwd                 %%mm5, %%mm1       \n\t" | ||||
|             "add                       $24, %%"REG_d"   \n\t" | ||||
|             "add                       $24, %%"FF_REG_d"\n\t" | ||||
|             "packssdw                %%mm1, %%mm4       \n\t" // V3 V2 U3 U2 | ||||
|             "psraw                      $7, %%mm4       \n\t" | ||||
|  | ||||
| @@ -1860,14 +1860,14 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | ||||
|             "punpckhdq               %%mm4, %%mm1           \n\t" | ||||
|             "packsswb                %%mm1, %%mm0           \n\t" | ||||
|             "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0         \n\t" | ||||
|             "movd                    %%mm0, (%2, %%"REG_a") \n\t" | ||||
|             "punpckhdq               %%mm0, %%mm0           \n\t" | ||||
|             "movd                    %%mm0, (%3, %%"REG_a") \n\t" | ||||
|             "add                        $4, %%"REG_a"       \n\t" | ||||
|             " js                        1b                  \n\t" | ||||
|             "movd                    %%mm0, (%2, %%"FF_REG_a") \n\t" | ||||
|             "punpckhdq               %%mm0, %%mm0              \n\t" | ||||
|             "movd                    %%mm0, (%3, %%"FF_REG_a") \n\t" | ||||
|             "add                        $4, %%"FF_REG_a"       \n\t" | ||||
|             " js                        1b              \n\t" | ||||
|             : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv) | ||||
|               NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2UVOffset) | ||||
|             : "%"REG_a, "%"REG_d | ||||
|             : "%"FF_REG_a, "%"FF_REG_d | ||||
|         ); | ||||
|  | ||||
|         udst += chromStride; | ||||
| @@ -1898,49 +1898,49 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui | ||||
| #if COMPILE_TEMPLATE_SSE2 | ||||
|             if (!((((intptr_t)src1) | ((intptr_t)src2) | ((intptr_t)dest))&15)) { | ||||
|         __asm__( | ||||
|             "xor              %%"REG_a", %%"REG_a"  \n\t" | ||||
|             "xor              %%"FF_REG_a", %%"FF_REG_a"  \n\t" | ||||
|             "1:                                     \n\t" | ||||
|             PREFETCH" 64(%1, %%"REG_a")             \n\t" | ||||
|             PREFETCH" 64(%2, %%"REG_a")             \n\t" | ||||
|             "movdqa     (%1, %%"REG_a"), %%xmm0     \n\t" | ||||
|             "movdqa     (%1, %%"REG_a"), %%xmm1     \n\t" | ||||
|             "movdqa     (%2, %%"REG_a"), %%xmm2     \n\t" | ||||
|             PREFETCH" 64(%1, %%"FF_REG_a")          \n\t" | ||||
|             PREFETCH" 64(%2, %%"FF_REG_a")          \n\t" | ||||
|             "movdqa  (%1, %%"FF_REG_a"), %%xmm0     \n\t" | ||||
|             "movdqa  (%1, %%"FF_REG_a"), %%xmm1     \n\t" | ||||
|             "movdqa  (%2, %%"FF_REG_a"), %%xmm2     \n\t" | ||||
|             "punpcklbw           %%xmm2, %%xmm0     \n\t" | ||||
|             "punpckhbw           %%xmm2, %%xmm1     \n\t" | ||||
|             "movntdq             %%xmm0,   (%0, %%"REG_a", 2)   \n\t" | ||||
|             "movntdq             %%xmm1, 16(%0, %%"REG_a", 2)   \n\t" | ||||
|             "add                    $16, %%"REG_a"  \n\t" | ||||
|             "cmp                     %3, %%"REG_a"  \n\t" | ||||
|             "movntdq             %%xmm0,   (%0, %%"FF_REG_a", 2) \n\t" | ||||
|             "movntdq             %%xmm1, 16(%0, %%"FF_REG_a", 2) \n\t" | ||||
|             "add                    $16, %%"FF_REG_a"            \n\t" | ||||
|             "cmp                     %3, %%"FF_REG_a"            \n\t" | ||||
|             " jb                     1b             \n\t" | ||||
|             ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) | ||||
|             : "memory", XMM_CLOBBERS("xmm0", "xmm1", "xmm2",) "%"REG_a | ||||
|             : "memory", XMM_CLOBBERS("xmm0", "xmm1", "xmm2",) "%"FF_REG_a | ||||
|         ); | ||||
|             } else | ||||
| #endif | ||||
|         __asm__( | ||||
|             "xor %%"REG_a", %%"REG_a"               \n\t" | ||||
|             "xor %%"FF_REG_a", %%"FF_REG_a"         \n\t" | ||||
|             "1:                                     \n\t" | ||||
|             PREFETCH" 64(%1, %%"REG_a")             \n\t" | ||||
|             PREFETCH" 64(%2, %%"REG_a")             \n\t" | ||||
|             "movq       (%1, %%"REG_a"), %%mm0      \n\t" | ||||
|             "movq      8(%1, %%"REG_a"), %%mm2      \n\t" | ||||
|             PREFETCH" 64(%1, %%"FF_REG_a")          \n\t" | ||||
|             PREFETCH" 64(%2, %%"FF_REG_a")          \n\t" | ||||
|             "movq    (%1, %%"FF_REG_a"), %%mm0      \n\t" | ||||
|             "movq   8(%1, %%"FF_REG_a"), %%mm2      \n\t" | ||||
|             "movq                 %%mm0, %%mm1      \n\t" | ||||
|             "movq                 %%mm2, %%mm3      \n\t" | ||||
|             "movq       (%2, %%"REG_a"), %%mm4      \n\t" | ||||
|             "movq      8(%2, %%"REG_a"), %%mm5      \n\t" | ||||
|             "movq    (%2, %%"FF_REG_a"), %%mm4      \n\t" | ||||
|             "movq   8(%2, %%"FF_REG_a"), %%mm5      \n\t" | ||||
|             "punpcklbw            %%mm4, %%mm0      \n\t" | ||||
|             "punpckhbw            %%mm4, %%mm1      \n\t" | ||||
|             "punpcklbw            %%mm5, %%mm2      \n\t" | ||||
|             "punpckhbw            %%mm5, %%mm3      \n\t" | ||||
|             MOVNTQ"               %%mm0,   (%0, %%"REG_a", 2)   \n\t" | ||||
|             MOVNTQ"               %%mm1,  8(%0, %%"REG_a", 2)   \n\t" | ||||
|             MOVNTQ"               %%mm2, 16(%0, %%"REG_a", 2)   \n\t" | ||||
|             MOVNTQ"               %%mm3, 24(%0, %%"REG_a", 2)   \n\t" | ||||
|             "add                    $16, %%"REG_a"  \n\t" | ||||
|             "cmp                     %3, %%"REG_a"  \n\t" | ||||
|             " jb                     1b             \n\t" | ||||
|             MOVNTQ"               %%mm0,   (%0, %%"FF_REG_a", 2) \n\t" | ||||
|             MOVNTQ"               %%mm1,  8(%0, %%"FF_REG_a", 2) \n\t" | ||||
|             MOVNTQ"               %%mm2, 16(%0, %%"FF_REG_a", 2) \n\t" | ||||
|             MOVNTQ"               %%mm3, 24(%0, %%"FF_REG_a", 2) \n\t" | ||||
|             "add                    $16, %%"FF_REG_a"            \n\t" | ||||
|             "cmp                     %3, %%"FF_REG_a"            \n\t" | ||||
|             " jb                     1b                          \n\t" | ||||
|             ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) | ||||
|             : "memory", "%"REG_a | ||||
|             : "memory", "%"FF_REG_a | ||||
|         ); | ||||
|  | ||||
|         } | ||||
|   | ||||
| @@ -220,16 +220,16 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, | ||||
|         "movdqa     %%xmm3, %%xmm4 \n\t" \ | ||||
|         "movdqa     %%xmm3, %%xmm7 \n\t" \ | ||||
|         "movl           %3, %%ecx  \n\t" \ | ||||
|         "mov                                 %0, %%"REG_d"  \n\t"\ | ||||
|         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\ | ||||
|         "mov                                 %0, %%"FF_REG_d"        \n\t"\ | ||||
|         "mov                        (%%"FF_REG_d"), %%"FF_REG_S"     \n\t"\ | ||||
|         ".p2align                             4             \n\t" /* FIXME Unroll? */\ | ||||
|         "1:                                                 \n\t"\ | ||||
|         "movddup                  8(%%"REG_d"), %%xmm0      \n\t" /* filterCoeff */\ | ||||
|         "movdqa              (%%"REG_S", %%"REG_c", 2), %%xmm2      \n\t" /* srcData */\ | ||||
|         "movdqa            16(%%"REG_S", %%"REG_c", 2), %%xmm5      \n\t" /* srcData */\ | ||||
|         "add                                $16, %%"REG_d"  \n\t"\ | ||||
|         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\ | ||||
|         "test                         %%"REG_S", %%"REG_S"  \n\t"\ | ||||
|         "movddup                  8(%%"FF_REG_d"), %%xmm0   \n\t" /* filterCoeff */\ | ||||
|         "movdqa              (%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm2 \n\t" /* srcData */\ | ||||
|         "movdqa            16(%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm5 \n\t" /* srcData */\ | ||||
|         "add                                $16, %%"FF_REG_d"        \n\t"\ | ||||
|         "mov                        (%%"FF_REG_d"), %%"FF_REG_S"     \n\t"\ | ||||
|         "test                         %%"FF_REG_S", %%"FF_REG_S"     \n\t"\ | ||||
|         "pmulhw                           %%xmm0, %%xmm2      \n\t"\ | ||||
|         "pmulhw                           %%xmm0, %%xmm5      \n\t"\ | ||||
|         "paddw                            %%xmm2, %%xmm3      \n\t"\ | ||||
| @@ -238,13 +238,13 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, | ||||
|         "psraw                               $3, %%xmm3      \n\t"\ | ||||
|         "psraw                               $3, %%xmm4      \n\t"\ | ||||
|         "packuswb                         %%xmm4, %%xmm3      \n\t"\ | ||||
|         "movntdq                          %%xmm3, (%1, %%"REG_c")\n\t"\ | ||||
|         "add                         $16, %%"REG_c"         \n\t"\ | ||||
|         "cmp                          %2, %%"REG_c"         \n\t"\ | ||||
|         "movntdq                          %%xmm3, (%1, %%"FF_REG_c") \n\t"\ | ||||
|         "add                         $16, %%"FF_REG_c"        \n\t"\ | ||||
|         "cmp                          %2, %%"FF_REG_c"        \n\t"\ | ||||
|         "movdqa                   %%xmm7, %%xmm3            \n\t" \ | ||||
|         "movdqa                   %%xmm7, %%xmm4            \n\t" \ | ||||
|         "mov                                 %0, %%"REG_d"  \n\t"\ | ||||
|         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\ | ||||
|         "mov                                 %0, %%"FF_REG_d"        \n\t"\ | ||||
|         "mov                        (%%"FF_REG_d"), %%"FF_REG_S"     \n\t"\ | ||||
|         "jb                                  1b             \n\t" | ||||
|  | ||||
|     if (offset) { | ||||
| @@ -259,7 +259,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, | ||||
|               "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset), | ||||
|               "m"(filterSize), "m"(((uint64_t *) dither)[0]) | ||||
|               : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,) | ||||
|                 "%"REG_d, "%"REG_S, "%"REG_c | ||||
|                 "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c | ||||
|               ); | ||||
|     } else { | ||||
|         __asm__ volatile( | ||||
| @@ -269,7 +269,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, | ||||
|               "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset), | ||||
|               "m"(filterSize), "m"(((uint64_t *) dither)[0]) | ||||
|               : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,) | ||||
|                 "%"REG_d, "%"REG_S, "%"REG_c | ||||
|                 "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c | ||||
|               ); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -88,16 +88,16 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, | ||||
|         "movq    %%mm3, %%mm6\n\t" | ||||
|         "movq    %%mm4, %%mm7\n\t" | ||||
|         "movl %3, %%ecx\n\t" | ||||
|         "mov                                 %0, %%"REG_d"  \n\t"\ | ||||
|         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\ | ||||
|         ".p2align                             4             \n\t" /* FIXME Unroll? */\ | ||||
|         "1:                                                 \n\t"\ | ||||
|         "movq                      8(%%"REG_d"), %%mm0      \n\t" /* filterCoeff */\ | ||||
|         "movq                (%%"REG_S", %%"REG_c", 2), %%mm2      \n\t" /* srcData */\ | ||||
|         "movq               8(%%"REG_S", %%"REG_c", 2), %%mm5      \n\t" /* srcData */\ | ||||
|         "add                                $16, %%"REG_d"  \n\t"\ | ||||
|         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\ | ||||
|         "test                         %%"REG_S", %%"REG_S"  \n\t"\ | ||||
|         "mov                                 %0, %%"FF_REG_d"       \n\t"\ | ||||
|         "mov                        (%%"FF_REG_d"), %%"FF_REG_S"    \n\t"\ | ||||
|         ".p2align                             4                     \n\t" /* FIXME Unroll? */\ | ||||
|         "1:                                                         \n\t"\ | ||||
|         "movq                      8(%%"FF_REG_d"), %%mm0           \n\t" /* filterCoeff */\ | ||||
|         "movq                (%%"FF_REG_S", %%"FF_REG_c", 2), %%mm2 \n\t" /* srcData */\ | ||||
|         "movq               8(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm5 \n\t" /* srcData */\ | ||||
|         "add                                $16, %%"FF_REG_d"       \n\t"\ | ||||
|         "mov                        (%%"FF_REG_d"), %%"FF_REG_S"    \n\t"\ | ||||
|         "test                         %%"FF_REG_S", %%"FF_REG_S"    \n\t"\ | ||||
|         "pmulhw                           %%mm0, %%mm2      \n\t"\ | ||||
|         "pmulhw                           %%mm0, %%mm5      \n\t"\ | ||||
|         "paddw                            %%mm2, %%mm3      \n\t"\ | ||||
| @@ -106,62 +106,62 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, | ||||
|         "psraw                               $3, %%mm3      \n\t"\ | ||||
|         "psraw                               $3, %%mm4      \n\t"\ | ||||
|         "packuswb                         %%mm4, %%mm3      \n\t" | ||||
|         MOVNTQ2 "                         %%mm3, (%1, %%"REG_c")\n\t" | ||||
|         "add                          $8, %%"REG_c"         \n\t"\ | ||||
|         "cmp                          %2, %%"REG_c"         \n\t"\ | ||||
|         MOVNTQ2 "                         %%mm3, (%1, %%"FF_REG_c")\n\t" | ||||
|         "add                          $8, %%"FF_REG_c"      \n\t"\ | ||||
|         "cmp                          %2, %%"FF_REG_c"      \n\t"\ | ||||
|         "movq    %%mm6, %%mm3\n\t" | ||||
|         "movq    %%mm7, %%mm4\n\t" | ||||
|         "mov                                 %0, %%"REG_d"  \n\t"\ | ||||
|         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\ | ||||
|         "jb                                  1b             \n\t"\ | ||||
|         "mov                                 %0, %%"FF_REG_d"     \n\t"\ | ||||
|         "mov                        (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\ | ||||
|         "jb                                  1b                   \n\t"\ | ||||
|         :: "g" (filter), | ||||
|            "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) | ||||
|         : "%"REG_d, "%"REG_S, "%"REG_c | ||||
|         : "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c | ||||
|     ); | ||||
| } | ||||
|  | ||||
| #define YSCALEYUV2PACKEDX_UV \ | ||||
|     __asm__ volatile(\ | ||||
|         "xor                   %%"REG_a", %%"REG_a"     \n\t"\ | ||||
|         "xor                %%"FF_REG_a", %%"FF_REG_a"  \n\t"\ | ||||
|         ".p2align                      4                \n\t"\ | ||||
|         "nop                                            \n\t"\ | ||||
|         "1:                                             \n\t"\ | ||||
|         "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\ | ||||
|         "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\ | ||||
|         "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d"  \n\t"\ | ||||
|         "mov              (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\ | ||||
|         "movq      "VROUNDER_OFFSET"(%0), %%mm3         \n\t"\ | ||||
|         "movq                      %%mm3, %%mm4         \n\t"\ | ||||
|         ".p2align                      4                \n\t"\ | ||||
|         "2:                                             \n\t"\ | ||||
|         "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\ | ||||
|         "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* UsrcData */\ | ||||
|         "add                          %6, %%"REG_S"     \n\t" \ | ||||
|         "movq     (%%"REG_S", %%"REG_a"), %%mm5         \n\t" /* VsrcData */\ | ||||
|         "add                         $16, %%"REG_d"     \n\t"\ | ||||
|         "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\ | ||||
|         "movq            8(%%"FF_REG_d"), %%mm0         \n\t" /* filterCoeff */\ | ||||
|         "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm2      \n\t" /* UsrcData */\ | ||||
|         "add                          %6, %%"FF_REG_S"  \n\t" \ | ||||
|         "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm5      \n\t" /* VsrcData */\ | ||||
|         "add                         $16, %%"FF_REG_d"  \n\t"\ | ||||
|         "mov              (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\ | ||||
|         "pmulhw                    %%mm0, %%mm2         \n\t"\ | ||||
|         "pmulhw                    %%mm0, %%mm5         \n\t"\ | ||||
|         "paddw                     %%mm2, %%mm3         \n\t"\ | ||||
|         "paddw                     %%mm5, %%mm4         \n\t"\ | ||||
|         "test                  %%"REG_S", %%"REG_S"     \n\t"\ | ||||
|         "test               %%"FF_REG_S", %%"FF_REG_S"  \n\t"\ | ||||
|         " jnz                         2b                \n\t"\ | ||||
|  | ||||
| #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \ | ||||
|     "lea                "offset"(%0), %%"REG_d"     \n\t"\ | ||||
|     "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\ | ||||
|     "lea                "offset"(%0), %%"FF_REG_d"  \n\t"\ | ||||
|     "mov              (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\ | ||||
|     "movq      "VROUNDER_OFFSET"(%0), "#dst1"       \n\t"\ | ||||
|     "movq                    "#dst1", "#dst2"       \n\t"\ | ||||
|     ".p2align                      4                \n\t"\ | ||||
|     "2:                                             \n\t"\ | ||||
|     "movq               8(%%"REG_d"), "#coeff"      \n\t" /* filterCoeff */\ | ||||
|     "movq  (%%"REG_S", %%"REG_a", 2), "#src1"       \n\t" /* Y1srcData */\ | ||||
|     "movq 8(%%"REG_S", %%"REG_a", 2), "#src2"       \n\t" /* Y2srcData */\ | ||||
|     "add                         $16, %%"REG_d"            \n\t"\ | ||||
|     "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\ | ||||
|     "movq            8(%%"FF_REG_d"), "#coeff"      \n\t" /* filterCoeff */\ | ||||
|     "movq  (%%"FF_REG_S", %%"FF_REG_a", 2), "#src1" \n\t" /* Y1srcData */\ | ||||
|     "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), "#src2" \n\t" /* Y2srcData */\ | ||||
|     "add                         $16, %%"FF_REG_d"  \n\t"\ | ||||
|     "mov              (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\ | ||||
|     "pmulhw                 "#coeff", "#src1"       \n\t"\ | ||||
|     "pmulhw                 "#coeff", "#src2"       \n\t"\ | ||||
|     "paddw                   "#src1", "#dst1"       \n\t"\ | ||||
|     "paddw                   "#src2", "#dst2"       \n\t"\ | ||||
|     "test                  %%"REG_S", %%"REG_S"     \n\t"\ | ||||
|     "test               %%"FF_REG_S", %%"FF_REG_S"  \n\t"\ | ||||
|     " jnz                         2b                \n\t"\ | ||||
|  | ||||
| #define YSCALEYUV2PACKEDX \ | ||||
| @@ -173,41 +173,41 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, | ||||
|             "m" (dummy), "m" (dummy), "m" (dummy),\ | ||||
|             "r" (dest), "m" (dstW_reg), "m"(uv_off) \ | ||||
|             NAMED_CONSTRAINTS_ADD(bF8,bFC) \ | ||||
|         : "%"REG_a, "%"REG_d, "%"REG_S            \ | ||||
|         : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_S            \ | ||||
|     ); | ||||
|  | ||||
| #define YSCALEYUV2PACKEDX_ACCURATE_UV \ | ||||
|     __asm__ volatile(\ | ||||
|         "xor %%"REG_a", %%"REG_a"                       \n\t"\ | ||||
|         "xor %%"FF_REG_a", %%"FF_REG_a"                 \n\t"\ | ||||
|         ".p2align                      4                \n\t"\ | ||||
|         "nop                                            \n\t"\ | ||||
|         "1:                                             \n\t"\ | ||||
|         "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\ | ||||
|         "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\ | ||||
|         "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d"  \n\t"\ | ||||
|         "mov              (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\ | ||||
|         "pxor                      %%mm4, %%mm4         \n\t"\ | ||||
|         "pxor                      %%mm5, %%mm5         \n\t"\ | ||||
|         "pxor                      %%mm6, %%mm6         \n\t"\ | ||||
|         "pxor                      %%mm7, %%mm7         \n\t"\ | ||||
|         ".p2align                      4                \n\t"\ | ||||
|         "2:                                             \n\t"\ | ||||
|         "movq     (%%"REG_S", %%"REG_a"), %%mm0         \n\t" /* UsrcData */\ | ||||
|         "add                          %6, %%"REG_S"      \n\t" \ | ||||
|         "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\ | ||||
|         "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\ | ||||
|         "movq     (%%"REG_S", %%"REG_a"), %%mm1         \n\t" /* UsrcData */\ | ||||
|         "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm0      \n\t" /* UsrcData */\ | ||||
|         "add                          %6, %%"FF_REG_S"  \n\t" \ | ||||
|         "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm2      \n\t" /* VsrcData */\ | ||||
|         "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\ | ||||
|         "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm1      \n\t" /* UsrcData */\ | ||||
|         "movq                      %%mm0, %%mm3         \n\t"\ | ||||
|         "punpcklwd                 %%mm1, %%mm0         \n\t"\ | ||||
|         "punpckhwd                 %%mm1, %%mm3         \n\t"\ | ||||
|         "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1         \n\t" /* filterCoeff */\ | ||||
|         "movq "STR(APCK_COEF)"(%%"FF_REG_d"),%%mm1      \n\t" /* filterCoeff */\ | ||||
|         "pmaddwd                   %%mm1, %%mm0         \n\t"\ | ||||
|         "pmaddwd                   %%mm1, %%mm3         \n\t"\ | ||||
|         "paddd                     %%mm0, %%mm4         \n\t"\ | ||||
|         "paddd                     %%mm3, %%mm5         \n\t"\ | ||||
|         "add                          %6, %%"REG_S"      \n\t" \ | ||||
|         "movq     (%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\ | ||||
|         "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\ | ||||
|         "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\ | ||||
|         "test                  %%"REG_S", %%"REG_S"     \n\t"\ | ||||
|         "add                          %6, %%"FF_REG_S"  \n\t" \ | ||||
|         "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm3      \n\t" /* VsrcData */\ | ||||
|         "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\ | ||||
|         "add           $"STR(APCK_SIZE)", %%"FF_REG_d"  \n\t"\ | ||||
|         "test               %%"FF_REG_S", %%"FF_REG_S"  \n\t"\ | ||||
|         "movq                      %%mm2, %%mm0         \n\t"\ | ||||
|         "punpcklwd                 %%mm3, %%mm2         \n\t"\ | ||||
|         "punpckhwd                 %%mm3, %%mm0         \n\t"\ | ||||
| @@ -229,30 +229,30 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, | ||||
|         "movq                      %%mm6, "V_TEMP"(%0)  \n\t"\ | ||||
|  | ||||
| #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \ | ||||
|     "lea                "offset"(%0), %%"REG_d"     \n\t"\ | ||||
|     "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\ | ||||
|     "lea                "offset"(%0), %%"FF_REG_d"      \n\t"\ | ||||
|     "mov                 (%%"FF_REG_d"), %%"FF_REG_S"   \n\t"\ | ||||
|     "pxor                      %%mm1, %%mm1         \n\t"\ | ||||
|     "pxor                      %%mm5, %%mm5         \n\t"\ | ||||
|     "pxor                      %%mm7, %%mm7         \n\t"\ | ||||
|     "pxor                      %%mm6, %%mm6         \n\t"\ | ||||
|     ".p2align                      4                \n\t"\ | ||||
|     "2:                                             \n\t"\ | ||||
|     "movq  (%%"REG_S", %%"REG_a", 2), %%mm0         \n\t" /* Y1srcData */\ | ||||
|     "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2         \n\t" /* Y2srcData */\ | ||||
|     "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\ | ||||
|     "movq  (%%"REG_S", %%"REG_a", 2), %%mm4         \n\t" /* Y1srcData */\ | ||||
|     "movq  (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm0       \n\t" /* Y1srcData */\ | ||||
|     "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm2       \n\t" /* Y2srcData */\ | ||||
|     "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S"   \n\t"\ | ||||
|     "movq  (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm4       \n\t" /* Y1srcData */\ | ||||
|     "movq                      %%mm0, %%mm3         \n\t"\ | ||||
|     "punpcklwd                 %%mm4, %%mm0         \n\t"\ | ||||
|     "punpckhwd                 %%mm4, %%mm3         \n\t"\ | ||||
|     "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4         \n\t" /* filterCoeff */\ | ||||
|     "movq "STR(APCK_COEF)"(%%"FF_REG_d"), %%mm4     \n\t" /* filterCoeff */\ | ||||
|     "pmaddwd                   %%mm4, %%mm0         \n\t"\ | ||||
|     "pmaddwd                   %%mm4, %%mm3         \n\t"\ | ||||
|     "paddd                     %%mm0, %%mm1         \n\t"\ | ||||
|     "paddd                     %%mm3, %%mm5         \n\t"\ | ||||
|     "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3         \n\t" /* Y2srcData */\ | ||||
|     "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\ | ||||
|     "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\ | ||||
|     "test                  %%"REG_S", %%"REG_S"     \n\t"\ | ||||
|     "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm3   \n\t" /* Y2srcData */\ | ||||
|     "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\ | ||||
|     "add           $"STR(APCK_SIZE)", %%"FF_REG_d"  \n\t"\ | ||||
|     "test               %%"FF_REG_S", %%"FF_REG_S"  \n\t"\ | ||||
|     "movq                      %%mm2, %%mm0         \n\t"\ | ||||
|     "punpcklwd                 %%mm3, %%mm2         \n\t"\ | ||||
|     "punpckhwd                 %%mm3, %%mm0         \n\t"\ | ||||
| @@ -359,13 +359,13 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, | ||||
|         "psraw                        $3, %%mm1         \n\t" | ||||
|         "psraw                        $3, %%mm7         \n\t" | ||||
|         "packuswb                  %%mm7, %%mm1         \n\t" | ||||
|         WRITEBGR32(%4, "%5", %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6) | ||||
|         WRITEBGR32(%4, "%5", %%FF_REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6) | ||||
|         YSCALEYUV2PACKEDX_END | ||||
|     } else { | ||||
|         YSCALEYUV2PACKEDX_ACCURATE | ||||
|         YSCALEYUV2RGBX | ||||
|         "pcmpeqd %%mm7, %%mm7 \n\t" | ||||
|         WRITEBGR32(%4, "%5", %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|         WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|         YSCALEYUV2PACKEDX_END | ||||
|     } | ||||
| } | ||||
| @@ -388,13 +388,13 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, | ||||
|         "psraw                        $3, %%mm1         \n\t" | ||||
|         "psraw                        $3, %%mm7         \n\t" | ||||
|         "packuswb                  %%mm7, %%mm1         \n\t" | ||||
|         WRITEBGR32(%4, "%5", %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) | ||||
|         WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) | ||||
|         YSCALEYUV2PACKEDX_END | ||||
|     } else { | ||||
|         YSCALEYUV2PACKEDX | ||||
|         YSCALEYUV2RGBX | ||||
|         "pcmpeqd %%mm7, %%mm7 \n\t" | ||||
|         WRITEBGR32(%4, "%5", %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|         WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|         YSCALEYUV2PACKEDX_END | ||||
|     } | ||||
| } | ||||
| @@ -417,13 +417,13 @@ static void RENAME(yuv2bgr32_X)(SwsContext *c, const int16_t *lumFilter, | ||||
|         "psraw                        $3, %%mm1         \n\t" | ||||
|         "psraw                        $3, %%mm7         \n\t" | ||||
|         "packuswb                  %%mm7, %%mm1         \n\t" | ||||
|         WRITEBGR32(%4, "%5", %%REGa, %%mm5, %%mm4, %%mm2, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) | ||||
|         WRITEBGR32(%4, "%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) | ||||
|         YSCALEYUV2PACKEDX_END | ||||
|     } else { | ||||
|         YSCALEYUV2PACKEDX | ||||
|         YSCALEYUV2RGBX | ||||
|         "pcmpeqd %%mm7, %%mm7 \n\t" | ||||
|         WRITEBGR32(%4, "%5", %%REGa, %%mm5, %%mm4, %%mm2, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|         WRITEBGR32(%4, "%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|         YSCALEYUV2PACKEDX_END | ||||
|     } | ||||
| } | ||||
| @@ -476,7 +476,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, | ||||
|     "paddusb "GREEN_DITHER"(%0), %%mm4\n\t" | ||||
|     "paddusb "RED_DITHER"(%0), %%mm5\n\t" | ||||
| #endif | ||||
|     WRITERGB16(%4, "%5", %%REGa) | ||||
|     WRITERGB16(%4, "%5", %%FF_REGa) | ||||
|     YSCALEYUV2PACKEDX_END | ||||
| } | ||||
|  | ||||
| @@ -500,7 +500,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, | ||||
|     "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t" | ||||
|     "paddusb "RED_DITHER"(%0), %%mm5  \n\t" | ||||
| #endif | ||||
|     WRITERGB16(%4, "%5", %%REGa) | ||||
|     WRITERGB16(%4, "%5", %%FF_REGa) | ||||
|     YSCALEYUV2PACKEDX_END | ||||
| } | ||||
|  | ||||
| @@ -553,7 +553,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, | ||||
|     "paddusb "GREEN_DITHER"(%0), %%mm4\n\t" | ||||
|     "paddusb "RED_DITHER"(%0), %%mm5\n\t" | ||||
| #endif | ||||
|     WRITERGB15(%4, "%5", %%REGa) | ||||
|     WRITERGB15(%4, "%5", %%FF_REGa) | ||||
|     YSCALEYUV2PACKEDX_END | ||||
| } | ||||
|  | ||||
| @@ -577,7 +577,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, | ||||
|     "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t" | ||||
|     "paddusb "RED_DITHER"(%0), %%mm5  \n\t" | ||||
| #endif | ||||
|     WRITERGB15(%4, "%5", %%REGa) | ||||
|     WRITERGB15(%4, "%5", %%FF_REGa) | ||||
|     YSCALEYUV2PACKEDX_END | ||||
| } | ||||
|  | ||||
| @@ -705,14 +705,14 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, | ||||
|     YSCALEYUV2PACKEDX_ACCURATE | ||||
|     YSCALEYUV2RGBX | ||||
|     "pxor %%mm7, %%mm7 \n\t" | ||||
|     "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize | ||||
|     "add %4, %%"REG_c"                        \n\t" | ||||
|     WRITEBGR24(%%REGc, "%5", %%REGa) | ||||
|     "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c"\n\t" //FIXME optimize | ||||
|     "add %4, %%"FF_REG_c"                        \n\t" | ||||
|     WRITEBGR24(%%FF_REGc, "%5", %%FF_REGa) | ||||
|     :: "r" (&c->redDither), | ||||
|        "m" (dummy), "m" (dummy), "m" (dummy), | ||||
|        "r" (dest), "m" (dstW_reg), "m"(uv_off) | ||||
|        NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) | ||||
|     : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S | ||||
|     : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S | ||||
|     ); | ||||
| } | ||||
|  | ||||
| @@ -729,15 +729,15 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, | ||||
|  | ||||
|     YSCALEYUV2PACKEDX | ||||
|     YSCALEYUV2RGBX | ||||
|     "pxor                    %%mm7, %%mm7       \n\t" | ||||
|     "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize | ||||
|     "add                        %4, %%"REG_c"   \n\t" | ||||
|     WRITEBGR24(%%REGc, "%5", %%REGa) | ||||
|     "pxor                    %%mm7, %%mm7              \n\t" | ||||
|     "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c" \n\t" //FIXME optimize | ||||
|     "add                        %4, %%"FF_REG_c"       \n\t" | ||||
|     WRITEBGR24(%%FF_REGc, "%5", %%FF_REGa) | ||||
|     :: "r" (&c->redDither), | ||||
|        "m" (dummy), "m" (dummy), "m" (dummy), | ||||
|        "r" (dest),  "m" (dstW_reg), "m"(uv_off) | ||||
|        NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) | ||||
|     : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S | ||||
|     : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S | ||||
|     ); | ||||
| } | ||||
| #endif /* HAVE_6REGS */ | ||||
| @@ -776,7 +776,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, | ||||
|     "psraw $3, %%mm4    \n\t" | ||||
|     "psraw $3, %%mm1    \n\t" | ||||
|     "psraw $3, %%mm7    \n\t" | ||||
|     WRITEYUY2(%4, "%5", %%REGa) | ||||
|     WRITEYUY2(%4, "%5", %%FF_REGa) | ||||
|     YSCALEYUV2PACKEDX_END | ||||
| } | ||||
|  | ||||
| @@ -797,7 +797,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, | ||||
|     "psraw $3, %%mm4    \n\t" | ||||
|     "psraw $3, %%mm1    \n\t" | ||||
|     "psraw $3, %%mm7    \n\t" | ||||
|     WRITEYUY2(%4, "%5", %%REGa) | ||||
|     WRITEYUY2(%4, "%5", %%FF_REGa) | ||||
|     YSCALEYUV2PACKEDX_END | ||||
| } | ||||
|  | ||||
| @@ -908,37 +908,37 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], | ||||
|         c->u_temp=(intptr_t)abuf0; | ||||
|         c->v_temp=(intptr_t)abuf1; | ||||
|         __asm__ volatile( | ||||
|             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|             "mov        %4, %%"REG_b"               \n\t" | ||||
|             "push %%"REG_BP"                        \n\t" | ||||
|             YSCALEYUV2RGB(%%REGBP, %5) | ||||
|             "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|             "mov        %4, %%"FF_REG_b"            \n\t" | ||||
|             "push %%"FF_REG_BP"                     \n\t" | ||||
|             YSCALEYUV2RGB(%%FF_REGBP, %5) | ||||
|             "push                   %0              \n\t" | ||||
|             "push                   %1              \n\t" | ||||
|             "mov          "U_TEMP"(%5), %0          \n\t" | ||||
|             "mov          "V_TEMP"(%5), %1          \n\t" | ||||
|             YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1) | ||||
|             YSCALEYUV2RGB_YA(%%FF_REGBP, %5, %0, %1) | ||||
|             "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ | ||||
|             "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ | ||||
|             "packuswb            %%mm7, %%mm1       \n\t" | ||||
|             "pop                    %1              \n\t" | ||||
|             "pop                    %0              \n\t" | ||||
|             WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) | ||||
|             "pop %%"REG_BP"                         \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|             WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) | ||||
|             "pop %%"FF_REG_BP"                      \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|                "a" (&c->redDither) | ||||
|         ); | ||||
| #endif | ||||
|     } else { | ||||
|         __asm__ volatile( | ||||
|             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|             "mov        %4, %%"REG_b"               \n\t" | ||||
|             "push %%"REG_BP"                        \n\t" | ||||
|             YSCALEYUV2RGB(%%REGBP, %5) | ||||
|             "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|             "mov        %4, %%"FF_REG_b"            \n\t" | ||||
|             "push %%"FF_REG_BP"                     \n\t" | ||||
|             YSCALEYUV2RGB(%%FF_REGBP, %5) | ||||
|             "pcmpeqd %%mm7, %%mm7                   \n\t" | ||||
|             WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|             "pop %%"REG_BP"                         \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|             WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|             "pop %%"FF_REG_BP"                      \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|                "a" (&c->redDither) | ||||
|         ); | ||||
| @@ -954,14 +954,14 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2], | ||||
|                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; | ||||
|  | ||||
|     __asm__ volatile( | ||||
|         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|         "mov        %4, %%"REG_b"               \n\t" | ||||
|         "push %%"REG_BP"                        \n\t" | ||||
|         YSCALEYUV2RGB(%%REGBP, %5) | ||||
|         "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|         "mov           %4, %%"FF_REG_b"         \n\t" | ||||
|         "push %%"FF_REG_BP"                     \n\t" | ||||
|         YSCALEYUV2RGB(%%FF_REGBP, %5) | ||||
|         "pxor    %%mm7, %%mm7                   \n\t" | ||||
|         WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) | ||||
|         "pop %%"REG_BP"                         \n\t" | ||||
|         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|         WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) | ||||
|         "pop %%"FF_REG_BP"                      \n\t" | ||||
|         "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|            "a" (&c->redDither) | ||||
|            NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) | ||||
| @@ -977,20 +977,20 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2], | ||||
|                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; | ||||
|  | ||||
|     __asm__ volatile( | ||||
|         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|         "mov        %4, %%"REG_b"               \n\t" | ||||
|         "push %%"REG_BP"                        \n\t" | ||||
|         YSCALEYUV2RGB(%%REGBP, %5) | ||||
|         "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|         "mov        %4, %%"FF_REG_b"            \n\t" | ||||
|         "push %%"FF_REG_BP"                     \n\t" | ||||
|         YSCALEYUV2RGB(%%FF_REGBP, %5) | ||||
|         "pxor    %%mm7, %%mm7                   \n\t" | ||||
|         /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | ||||
| #ifdef DITHER1XBPP | ||||
|         "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t" | ||||
|         "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t" | ||||
|         "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t" | ||||
|         "paddusb "RED_DITHER"(%5), %%mm5      \n\t" | ||||
|         "paddusb "RED_DITHER"(%5), %%mm5        \n\t" | ||||
| #endif | ||||
|         WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) | ||||
|         "pop %%"REG_BP"                         \n\t" | ||||
|         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|         WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) | ||||
|         "pop %%"FF_REG_BP"                      \n\t" | ||||
|         "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|            "a" (&c->redDither) | ||||
|            NAMED_CONSTRAINTS_ADD(bF8) | ||||
| @@ -1006,20 +1006,20 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], | ||||
|                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; | ||||
|  | ||||
|     __asm__ volatile( | ||||
|         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|         "mov        %4, %%"REG_b"               \n\t" | ||||
|         "push %%"REG_BP"                        \n\t" | ||||
|         YSCALEYUV2RGB(%%REGBP, %5) | ||||
|         "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|         "mov           %4, %%"FF_REG_b"         \n\t" | ||||
|         "push %%"FF_REG_BP"                     \n\t" | ||||
|         YSCALEYUV2RGB(%%FF_REGBP, %5) | ||||
|         "pxor    %%mm7, %%mm7                   \n\t" | ||||
|         /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | ||||
| #ifdef DITHER1XBPP | ||||
|         "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t" | ||||
|         "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t" | ||||
|         "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t" | ||||
|         "paddusb "RED_DITHER"(%5), %%mm5      \n\t" | ||||
|         "paddusb "RED_DITHER"(%5), %%mm5        \n\t" | ||||
| #endif | ||||
|         WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) | ||||
|         "pop %%"REG_BP"                         \n\t" | ||||
|         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|         WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) | ||||
|         "pop %%"FF_REG_BP"                      \n\t" | ||||
|         "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|            "a" (&c->redDither) | ||||
|            NAMED_CONSTRAINTS_ADD(bF8,bFC) | ||||
| @@ -1075,13 +1075,13 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], | ||||
|                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; | ||||
|  | ||||
|     __asm__ volatile( | ||||
|         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|         "mov %4, %%"REG_b"                        \n\t" | ||||
|         "push %%"REG_BP"                        \n\t" | ||||
|         YSCALEYUV2PACKED(%%REGBP, %5) | ||||
|         WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) | ||||
|         "pop %%"REG_BP"                         \n\t" | ||||
|         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|         "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|         "mov           %4, %%"FF_REG_b"         \n\t" | ||||
|         "push %%"FF_REG_BP"                     \n\t" | ||||
|         YSCALEYUV2PACKED(%%FF_REGBP, %5) | ||||
|         WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) | ||||
|         "pop %%"FF_REG_BP"                      \n\t" | ||||
|         "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|            "a" (&c->redDither) | ||||
|     ); | ||||
| @@ -1217,27 +1217,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, | ||||
|         const int16_t *ubuf1 = ubuf[0]; | ||||
|         if (CONFIG_SWSCALE_ALPHA && c->needAlpha) { | ||||
|             __asm__ volatile( | ||||
|                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|                 "mov        %4, %%"REG_b"               \n\t" | ||||
|                 "push %%"REG_BP"                        \n\t" | ||||
|                 YSCALEYUV2RGB1(%%REGBP, %5) | ||||
|                 YSCALEYUV2RGB1_ALPHA(%%REGBP) | ||||
|                 WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|                 "pop %%"REG_BP"                         \n\t" | ||||
|                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|                 "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|                 "mov           %4, %%"FF_REG_b"         \n\t" | ||||
|                 "push %%"FF_REG_BP"                     \n\t" | ||||
|                 YSCALEYUV2RGB1(%%FF_REGBP, %5) | ||||
|                 YSCALEYUV2RGB1_ALPHA(%%FF_REGBP) | ||||
|                 WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|                 "pop %%"FF_REG_BP"                      \n\t" | ||||
|                 "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|                 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|                    "a" (&c->redDither) | ||||
|             ); | ||||
|         } else { | ||||
|             __asm__ volatile( | ||||
|                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|                 "mov        %4, %%"REG_b"               \n\t" | ||||
|                 "push %%"REG_BP"                        \n\t" | ||||
|                 YSCALEYUV2RGB1(%%REGBP, %5) | ||||
|                 "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|                 "mov           %4, %%"FF_REG_b"         \n\t" | ||||
|                 "push %%"FF_REG_BP"                     \n\t" | ||||
|                 YSCALEYUV2RGB1(%%FF_REGBP, %5) | ||||
|                 "pcmpeqd %%mm7, %%mm7                   \n\t" | ||||
|                 WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|                 "pop %%"REG_BP"                         \n\t" | ||||
|                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|                 WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|                 "pop %%"FF_REG_BP"                      \n\t" | ||||
|                 "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|                 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|                    "a" (&c->redDither) | ||||
|             ); | ||||
| @@ -1246,27 +1246,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, | ||||
|         const int16_t *ubuf1 = ubuf[1]; | ||||
|         if (CONFIG_SWSCALE_ALPHA && c->needAlpha) { | ||||
|             __asm__ volatile( | ||||
|                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|                 "mov        %4, %%"REG_b"               \n\t" | ||||
|                 "push %%"REG_BP"                        \n\t" | ||||
|                 YSCALEYUV2RGB1b(%%REGBP, %5) | ||||
|                 YSCALEYUV2RGB1_ALPHA(%%REGBP) | ||||
|                 WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|                 "pop %%"REG_BP"                         \n\t" | ||||
|                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|                 "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|                 "mov           %4, %%"FF_REG_b"         \n\t" | ||||
|                 "push %%"FF_REG_BP"                     \n\t" | ||||
|                 YSCALEYUV2RGB1b(%%FF_REGBP, %5) | ||||
|                 YSCALEYUV2RGB1_ALPHA(%%FF_REGBP) | ||||
|                 WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|                 "pop %%"FF_REG_BP"                      \n\t" | ||||
|                 "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|                 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|                    "a" (&c->redDither) | ||||
|             ); | ||||
|         } else { | ||||
|             __asm__ volatile( | ||||
|                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|                 "mov        %4, %%"REG_b"               \n\t" | ||||
|                 "push %%"REG_BP"                        \n\t" | ||||
|                 YSCALEYUV2RGB1b(%%REGBP, %5) | ||||
|                 "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|                 "mov           %4, %%"FF_REG_b"         \n\t" | ||||
|                 "push %%"FF_REG_BP"                     \n\t" | ||||
|                 YSCALEYUV2RGB1b(%%FF_REGBP, %5) | ||||
|                 "pcmpeqd %%mm7, %%mm7                   \n\t" | ||||
|                 WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|                 "pop %%"REG_BP"                         \n\t" | ||||
|                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|                 WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) | ||||
|                 "pop %%"FF_REG_BP"                      \n\t" | ||||
|                 "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|                 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|                    "a" (&c->redDither) | ||||
|             ); | ||||
| @@ -1285,14 +1285,14 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, | ||||
|     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster | ||||
|         const int16_t *ubuf1 = ubuf[0]; | ||||
|         __asm__ volatile( | ||||
|             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|             "mov        %4, %%"REG_b"               \n\t" | ||||
|             "push %%"REG_BP"                        \n\t" | ||||
|             YSCALEYUV2RGB1(%%REGBP, %5) | ||||
|             "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|             "mov           %4, %%"FF_REG_b"         \n\t" | ||||
|             "push %%"FF_REG_BP"                     \n\t" | ||||
|             YSCALEYUV2RGB1(%%FF_REGBP, %5) | ||||
|             "pxor    %%mm7, %%mm7                   \n\t" | ||||
|             WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) | ||||
|             "pop %%"REG_BP"                         \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|             WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) | ||||
|             "pop %%"FF_REG_BP"                      \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|                "a" (&c->redDither) | ||||
|                NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) | ||||
| @@ -1300,14 +1300,14 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, | ||||
|     } else { | ||||
|         const int16_t *ubuf1 = ubuf[1]; | ||||
|         __asm__ volatile( | ||||
|             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|             "mov        %4, %%"REG_b"               \n\t" | ||||
|             "push %%"REG_BP"                        \n\t" | ||||
|             YSCALEYUV2RGB1b(%%REGBP, %5) | ||||
|             "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|             "mov           %4, %%"FF_REG_b"         \n\t" | ||||
|             "push %%"FF_REG_BP"                     \n\t" | ||||
|             YSCALEYUV2RGB1b(%%FF_REGBP, %5) | ||||
|             "pxor    %%mm7, %%mm7                   \n\t" | ||||
|             WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) | ||||
|             "pop %%"REG_BP"                         \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|             WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) | ||||
|             "pop %%"FF_REG_BP"                      \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|                "a" (&c->redDither) | ||||
|                NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) | ||||
| @@ -1326,20 +1326,20 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, | ||||
|     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster | ||||
|         const int16_t *ubuf1 = ubuf[0]; | ||||
|         __asm__ volatile( | ||||
|             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|             "mov        %4, %%"REG_b"               \n\t" | ||||
|             "push %%"REG_BP"                        \n\t" | ||||
|             YSCALEYUV2RGB1(%%REGBP, %5) | ||||
|             "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|             "mov           %4, %%"FF_REG_b"         \n\t" | ||||
|             "push %%"FF_REG_BP"                     \n\t" | ||||
|             YSCALEYUV2RGB1(%%FF_REGBP, %5) | ||||
|             "pxor    %%mm7, %%mm7                   \n\t" | ||||
|             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | ||||
| #ifdef DITHER1XBPP | ||||
|             "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t" | ||||
|             "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t" | ||||
|             "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t" | ||||
|             "paddusb "RED_DITHER"(%5), %%mm5      \n\t" | ||||
|             "paddusb "RED_DITHER"(%5), %%mm5        \n\t" | ||||
| #endif | ||||
|             WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) | ||||
|             "pop %%"REG_BP"                         \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|             WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) | ||||
|             "pop %%"FF_REG_BP"                      \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|                "a" (&c->redDither) | ||||
|                NAMED_CONSTRAINTS_ADD(bF8) | ||||
| @@ -1347,20 +1347,20 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, | ||||
|     } else { | ||||
|         const int16_t *ubuf1 = ubuf[1]; | ||||
|         __asm__ volatile( | ||||
|             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|             "mov        %4, %%"REG_b"               \n\t" | ||||
|             "push %%"REG_BP"                        \n\t" | ||||
|             YSCALEYUV2RGB1b(%%REGBP, %5) | ||||
|             "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|             "mov           %4, %%"FF_REG_b"         \n\t" | ||||
|             "push %%"FF_REG_BP"                     \n\t" | ||||
|             YSCALEYUV2RGB1b(%%FF_REGBP, %5) | ||||
|             "pxor    %%mm7, %%mm7                   \n\t" | ||||
|             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | ||||
| #ifdef DITHER1XBPP | ||||
|             "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t" | ||||
|             "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t" | ||||
|             "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t" | ||||
|             "paddusb "RED_DITHER"(%5), %%mm5      \n\t" | ||||
|             "paddusb "RED_DITHER"(%5), %%mm5        \n\t" | ||||
| #endif | ||||
|             WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) | ||||
|             "pop %%"REG_BP"                         \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|             WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) | ||||
|             "pop %%"FF_REG_BP"                      \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|                "a" (&c->redDither) | ||||
|                NAMED_CONSTRAINTS_ADD(bF8) | ||||
| @@ -1379,20 +1379,20 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, | ||||
|     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster | ||||
|         const int16_t *ubuf1 = ubuf[0]; | ||||
|         __asm__ volatile( | ||||
|             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|             "mov        %4, %%"REG_b"               \n\t" | ||||
|             "push %%"REG_BP"                        \n\t" | ||||
|             YSCALEYUV2RGB1(%%REGBP, %5) | ||||
|             "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|             "mov           %4, %%"FF_REG_b"         \n\t" | ||||
|             "push %%"FF_REG_BP"                     \n\t" | ||||
|             YSCALEYUV2RGB1(%%FF_REGBP, %5) | ||||
|             "pxor    %%mm7, %%mm7                   \n\t" | ||||
|             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | ||||
| #ifdef DITHER1XBPP | ||||
|             "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t" | ||||
|             "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t" | ||||
|             "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t" | ||||
|             "paddusb "RED_DITHER"(%5), %%mm5      \n\t" | ||||
|             "paddusb "RED_DITHER"(%5), %%mm5        \n\t" | ||||
| #endif | ||||
|             WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) | ||||
|             "pop %%"REG_BP"                         \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|             WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) | ||||
|             "pop %%"FF_REG_BP"                      \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|                "a" (&c->redDither) | ||||
|                NAMED_CONSTRAINTS_ADD(bF8,bFC) | ||||
| @@ -1400,20 +1400,20 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, | ||||
|     } else { | ||||
|         const int16_t *ubuf1 = ubuf[1]; | ||||
|         __asm__ volatile( | ||||
|             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|             "mov        %4, %%"REG_b"               \n\t" | ||||
|             "push %%"REG_BP"                        \n\t" | ||||
|             YSCALEYUV2RGB1b(%%REGBP, %5) | ||||
|             "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|             "mov           %4, %%"FF_REG_b"         \n\t" | ||||
|             "push %%"FF_REG_BP"                     \n\t" | ||||
|             YSCALEYUV2RGB1b(%%FF_REGBP, %5) | ||||
|             "pxor    %%mm7, %%mm7                   \n\t" | ||||
|             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | ||||
| #ifdef DITHER1XBPP | ||||
|             "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t" | ||||
|             "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t" | ||||
|             "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t" | ||||
|             "paddusb "RED_DITHER"(%5), %%mm5      \n\t" | ||||
|             "paddusb "RED_DITHER"(%5), %%mm5        \n\t" | ||||
| #endif | ||||
|             WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) | ||||
|             "pop %%"REG_BP"                         \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|             WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) | ||||
|             "pop %%"FF_REG_BP"                      \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|                "a" (&c->redDither) | ||||
|                NAMED_CONSTRAINTS_ADD(bF8,bFC) | ||||
| @@ -1469,26 +1469,26 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, | ||||
|     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster | ||||
|         const int16_t *ubuf1 = ubuf[0]; | ||||
|         __asm__ volatile( | ||||
|             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|             "mov        %4, %%"REG_b"               \n\t" | ||||
|             "push %%"REG_BP"                        \n\t" | ||||
|             YSCALEYUV2PACKED1(%%REGBP, %5) | ||||
|             WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) | ||||
|             "pop %%"REG_BP"                         \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|             "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|             "mov           %4, %%"FF_REG_b"         \n\t" | ||||
|             "push %%"FF_REG_BP"                     \n\t" | ||||
|             YSCALEYUV2PACKED1(%%FF_REGBP, %5) | ||||
|             WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) | ||||
|             "pop %%"FF_REG_BP"                      \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|                "a" (&c->redDither) | ||||
|         ); | ||||
|     } else { | ||||
|         const int16_t *ubuf1 = ubuf[1]; | ||||
|         __asm__ volatile( | ||||
|             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t" | ||||
|             "mov        %4, %%"REG_b"               \n\t" | ||||
|             "push %%"REG_BP"                        \n\t" | ||||
|             YSCALEYUV2PACKED1b(%%REGBP, %5) | ||||
|             WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) | ||||
|             "pop %%"REG_BP"                         \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t" | ||||
|             "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t" | ||||
|             "mov           %4, %%"FF_REG_b"         \n\t" | ||||
|             "push %%"FF_REG_BP"                     \n\t" | ||||
|             YSCALEYUV2PACKED1b(%%FF_REGBP, %5) | ||||
|             WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) | ||||
|             "pop %%"FF_REG_BP"                      \n\t" | ||||
|             "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t" | ||||
|             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), | ||||
|                "a" (&c->redDither) | ||||
|         ); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user