You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-11-06 08:29:25 +02:00
asm: FF_-prefix internal macros used in inline assembly
These warnings conflict with system macros on Solaris, producing truckloads of warnings about macro redefinition.
This commit is contained in:
@@ -649,9 +649,9 @@ static av_cold int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
|
||||
"jmp 9f \n\t"
|
||||
// Begin
|
||||
"0: \n\t"
|
||||
"movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
|
||||
"movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
|
||||
"movd 1(%%"REG_c", %%"REG_S"), %%mm1 \n\t"
|
||||
"movq (%%"FF_REG_d", %%"FF_REG_a"), %%mm3 \n\t"
|
||||
"movd (%%"FF_REG_c", %%"FF_REG_S"), %%mm0 \n\t"
|
||||
"movd 1(%%"FF_REG_c", %%"FF_REG_S"), %%mm1 \n\t"
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"pshufw $0xFF, %%mm1, %%mm1 \n\t"
|
||||
@@ -659,14 +659,14 @@ static av_cold int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
|
||||
"pshufw $0xFF, %%mm0, %%mm0 \n\t"
|
||||
"2: \n\t"
|
||||
"psubw %%mm1, %%mm0 \n\t"
|
||||
"movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
|
||||
"movl 8(%%"FF_REG_b", %%"FF_REG_a"), %%esi \n\t"
|
||||
"pmullw %%mm3, %%mm0 \n\t"
|
||||
"psllw $7, %%mm1 \n\t"
|
||||
"paddw %%mm1, %%mm0 \n\t"
|
||||
|
||||
"movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
|
||||
"movq %%mm0, (%%"FF_REG_D", %%"FF_REG_a") \n\t"
|
||||
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
"add $8, %%"FF_REG_a" \n\t"
|
||||
// End
|
||||
"9: \n\t"
|
||||
// "int $3 \n\t"
|
||||
@@ -689,22 +689,22 @@ static av_cold int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
|
||||
"jmp 9f \n\t"
|
||||
// Begin
|
||||
"0: \n\t"
|
||||
"movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
|
||||
"movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
|
||||
"movq (%%"FF_REG_d", %%"FF_REG_a"), %%mm3 \n\t"
|
||||
"movd (%%"FF_REG_c", %%"FF_REG_S"), %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"pshufw $0xFF, %%mm0, %%mm1 \n\t"
|
||||
"1: \n\t"
|
||||
"pshufw $0xFF, %%mm0, %%mm0 \n\t"
|
||||
"2: \n\t"
|
||||
"psubw %%mm1, %%mm0 \n\t"
|
||||
"movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
|
||||
"movl 8(%%"FF_REG_b", %%"FF_REG_a"), %%esi \n\t"
|
||||
"pmullw %%mm3, %%mm0 \n\t"
|
||||
"psllw $7, %%mm1 \n\t"
|
||||
"paddw %%mm1, %%mm0 \n\t"
|
||||
|
||||
"movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
|
||||
"movq %%mm0, (%%"FF_REG_D", %%"FF_REG_a") \n\t"
|
||||
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
"add $8, %%"FF_REG_a" \n\t"
|
||||
// End
|
||||
"9: \n\t"
|
||||
// "int $3 \n\t"
|
||||
|
||||
@@ -1109,43 +1109,43 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr
|
||||
unsigned i;
|
||||
x86_reg mmx_size= 23 - src_size;
|
||||
__asm__ volatile (
|
||||
"test %%"REG_a", %%"REG_a" \n\t"
|
||||
"test %%"FF_REG_a", %%"FF_REG_a" \n\t"
|
||||
"jns 2f \n\t"
|
||||
"movq "MANGLE(mask24r)", %%mm5 \n\t"
|
||||
"movq "MANGLE(mask24g)", %%mm6 \n\t"
|
||||
"movq "MANGLE(mask24b)", %%mm7 \n\t"
|
||||
".p2align 4 \n\t"
|
||||
"1: \n\t"
|
||||
PREFETCH" 32(%1, %%"REG_a") \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG
|
||||
"movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG
|
||||
"movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B
|
||||
PREFETCH" 32(%1, %%"FF_REG_a") \n\t"
|
||||
"movq (%1, %%"FF_REG_a"), %%mm0 \n\t" // BGR BGR BG
|
||||
"movq (%1, %%"FF_REG_a"), %%mm1 \n\t" // BGR BGR BG
|
||||
"movq 2(%1, %%"FF_REG_a"), %%mm2 \n\t" // R BGR BGR B
|
||||
"psllq $16, %%mm0 \n\t" // 00 BGR BGR
|
||||
"pand %%mm5, %%mm0 \n\t"
|
||||
"pand %%mm6, %%mm1 \n\t"
|
||||
"pand %%mm7, %%mm2 \n\t"
|
||||
"por %%mm0, %%mm1 \n\t"
|
||||
"por %%mm2, %%mm1 \n\t"
|
||||
"movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG
|
||||
MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t" // RGB RGB RG
|
||||
"movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B
|
||||
"movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR
|
||||
"movq 6(%1, %%"FF_REG_a"), %%mm0 \n\t" // BGR BGR BG
|
||||
MOVNTQ" %%mm1, (%2, %%"FF_REG_a") \n\t" // RGB RGB RG
|
||||
"movq 8(%1, %%"FF_REG_a"), %%mm1 \n\t" // R BGR BGR B
|
||||
"movq 10(%1, %%"FF_REG_a"), %%mm2 \n\t" // GR BGR BGR
|
||||
"pand %%mm7, %%mm0 \n\t"
|
||||
"pand %%mm5, %%mm1 \n\t"
|
||||
"pand %%mm6, %%mm2 \n\t"
|
||||
"por %%mm0, %%mm1 \n\t"
|
||||
"por %%mm2, %%mm1 \n\t"
|
||||
"movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B
|
||||
MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t" // B RGB RGB R
|
||||
"movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR
|
||||
"movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG
|
||||
"movq 14(%1, %%"FF_REG_a"), %%mm0 \n\t" // R BGR BGR B
|
||||
MOVNTQ" %%mm1, 8(%2, %%"FF_REG_a")\n\t" // B RGB RGB R
|
||||
"movq 16(%1, %%"FF_REG_a"), %%mm1 \n\t" // GR BGR BGR
|
||||
"movq 18(%1, %%"FF_REG_a"), %%mm2 \n\t" // BGR BGR BG
|
||||
"pand %%mm6, %%mm0 \n\t"
|
||||
"pand %%mm7, %%mm1 \n\t"
|
||||
"pand %%mm5, %%mm2 \n\t"
|
||||
"por %%mm0, %%mm1 \n\t"
|
||||
"por %%mm2, %%mm1 \n\t"
|
||||
MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t"
|
||||
"add $24, %%"REG_a" \n\t"
|
||||
MOVNTQ" %%mm1, 16(%2, %%"FF_REG_a")\n\t"
|
||||
"add $24, %%"FF_REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
"2: \n\t"
|
||||
: "+a" (mmx_size)
|
||||
@@ -1180,20 +1180,20 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
|
||||
for (y=0; y<height; y++) {
|
||||
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
|
||||
__asm__ volatile(
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a"\n\t"
|
||||
".p2align 4 \n\t"
|
||||
"1: \n\t"
|
||||
PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
|
||||
PREFETCH" 32(%2, %%"REG_a") \n\t"
|
||||
PREFETCH" 32(%3, %%"REG_a") \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0)
|
||||
PREFETCH" 32(%1, %%"FF_REG_a", 2) \n\t"
|
||||
PREFETCH" 32(%2, %%"FF_REG_a") \n\t"
|
||||
PREFETCH" 32(%3, %%"FF_REG_a") \n\t"
|
||||
"movq (%2, %%"FF_REG_a"), %%mm0 \n\t" // U(0)
|
||||
"movq %%mm0, %%mm2 \n\t" // U(0)
|
||||
"movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0)
|
||||
"movq (%3, %%"FF_REG_a"), %%mm1 \n\t" // V(0)
|
||||
"punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
|
||||
"punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
|
||||
|
||||
"movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0)
|
||||
"movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8)
|
||||
"movq (%1, %%"FF_REG_a",2), %%mm3 \n\t" // Y(0)
|
||||
"movq 8(%1, %%"FF_REG_a",2), %%mm5 \n\t" // Y(8)
|
||||
"movq %%mm3, %%mm4 \n\t" // Y(0)
|
||||
"movq %%mm5, %%mm6 \n\t" // Y(8)
|
||||
"punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0)
|
||||
@@ -1201,16 +1201,16 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
|
||||
"punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8)
|
||||
"punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12)
|
||||
|
||||
MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t"
|
||||
MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
|
||||
MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t"
|
||||
MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
|
||||
MOVNTQ" %%mm3, (%0, %%"FF_REG_a", 4) \n\t"
|
||||
MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a", 4) \n\t"
|
||||
MOVNTQ" %%mm5, 16(%0, %%"FF_REG_a", 4) \n\t"
|
||||
MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a", 4) \n\t"
|
||||
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
"cmp %4, %%"REG_a" \n\t"
|
||||
"add $8, %%"FF_REG_a"\n\t"
|
||||
"cmp %4, %%"FF_REG_a"\n\t"
|
||||
" jb 1b \n\t"
|
||||
::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
|
||||
: "%"REG_a
|
||||
: "%"FF_REG_a
|
||||
);
|
||||
if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
|
||||
usrc += chromStride;
|
||||
@@ -1245,20 +1245,20 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
|
||||
for (y=0; y<height; y++) {
|
||||
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
|
||||
__asm__ volatile(
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
|
||||
".p2align 4 \n\t"
|
||||
"1: \n\t"
|
||||
PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
|
||||
PREFETCH" 32(%2, %%"REG_a") \n\t"
|
||||
PREFETCH" 32(%3, %%"REG_a") \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0)
|
||||
PREFETCH" 32(%1, %%"FF_REG_a", 2) \n\t"
|
||||
PREFETCH" 32(%2, %%"FF_REG_a") \n\t"
|
||||
PREFETCH" 32(%3, %%"FF_REG_a") \n\t"
|
||||
"movq (%2, %%"FF_REG_a"), %%mm0 \n\t" // U(0)
|
||||
"movq %%mm0, %%mm2 \n\t" // U(0)
|
||||
"movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0)
|
||||
"movq (%3, %%"FF_REG_a"), %%mm1 \n\t" // V(0)
|
||||
"punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
|
||||
"punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
|
||||
|
||||
"movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0)
|
||||
"movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8)
|
||||
"movq (%1, %%"FF_REG_a",2), %%mm3 \n\t" // Y(0)
|
||||
"movq 8(%1, %%"FF_REG_a",2), %%mm5 \n\t" // Y(8)
|
||||
"movq %%mm0, %%mm4 \n\t" // Y(0)
|
||||
"movq %%mm2, %%mm6 \n\t" // Y(8)
|
||||
"punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0)
|
||||
@@ -1266,16 +1266,16 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
|
||||
"punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8)
|
||||
"punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12)
|
||||
|
||||
MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t"
|
||||
MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
|
||||
MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t"
|
||||
MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
|
||||
MOVNTQ" %%mm0, (%0, %%"FF_REG_a", 4) \n\t"
|
||||
MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a", 4) \n\t"
|
||||
MOVNTQ" %%mm2, 16(%0, %%"FF_REG_a", 4) \n\t"
|
||||
MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a", 4) \n\t"
|
||||
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
"cmp %4, %%"REG_a" \n\t"
|
||||
"add $8, %%"FF_REG_a" \n\t"
|
||||
"cmp %4, %%"FF_REG_a" \n\t"
|
||||
" jb 1b \n\t"
|
||||
::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
|
||||
: "%"REG_a
|
||||
: "%"FF_REG_a
|
||||
);
|
||||
if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
|
||||
usrc += chromStride;
|
||||
@@ -1333,14 +1333,14 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
|
||||
const x86_reg chromWidth= width>>1;
|
||||
for (y=0; y<height; y+=2) {
|
||||
__asm__ volatile(
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a"\n\t"
|
||||
"pcmpeqw %%mm7, %%mm7 \n\t"
|
||||
"psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
|
||||
".p2align 4 \n\t"
|
||||
"1: \n\t"
|
||||
PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
|
||||
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
|
||||
"movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
|
||||
PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
|
||||
"movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
|
||||
"movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
|
||||
"movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
|
||||
"movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4)
|
||||
"psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0)
|
||||
@@ -1350,10 +1350,10 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
|
||||
"packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
|
||||
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
|
||||
|
||||
MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm2, (%1, %%"FF_REG_a", 2)\n\t"
|
||||
|
||||
"movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8)
|
||||
"movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12)
|
||||
"movq 16(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8)
|
||||
"movq 24(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12)
|
||||
"movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8)
|
||||
"movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12)
|
||||
"psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8)
|
||||
@@ -1363,7 +1363,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
|
||||
"packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
|
||||
"packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
|
||||
|
||||
MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm3, 8(%1, %%"FF_REG_a", 2) \n\t"
|
||||
|
||||
"movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
|
||||
"movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
|
||||
@@ -1374,28 +1374,28 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
|
||||
"packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
|
||||
"packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
|
||||
|
||||
MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
|
||||
MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
|
||||
MOVNTQ" %%mm0, (%3, %%"FF_REG_a") \n\t"
|
||||
MOVNTQ" %%mm2, (%2, %%"FF_REG_a") \n\t"
|
||||
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
"cmp %4, %%"REG_a" \n\t"
|
||||
"add $8, %%"FF_REG_a"\n\t"
|
||||
"cmp %4, %%"FF_REG_a"\n\t"
|
||||
" jb 1b \n\t"
|
||||
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
|
||||
: "memory", "%"REG_a
|
||||
: "memory", "%"FF_REG_a
|
||||
);
|
||||
|
||||
ydst += lumStride;
|
||||
src += srcStride;
|
||||
|
||||
__asm__ volatile(
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a"\n\t"
|
||||
".p2align 4 \n\t"
|
||||
"1: \n\t"
|
||||
PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
|
||||
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
|
||||
"movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
|
||||
"movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8)
|
||||
"movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12)
|
||||
PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
|
||||
"movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
|
||||
"movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
|
||||
"movq 16(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8)
|
||||
"movq 24(%0, %%"FF_REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12)
|
||||
"pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
|
||||
"pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
|
||||
"pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
|
||||
@@ -1403,15 +1403,15 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
|
||||
"packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
|
||||
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
|
||||
|
||||
MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm0, (%1, %%"FF_REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm2, 8(%1, %%"FF_REG_a", 2) \n\t"
|
||||
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
"cmp %4, %%"REG_a" \n\t"
|
||||
"add $8, %%"FF_REG_a"\n\t"
|
||||
"cmp %4, %%"FF_REG_a"\n\t"
|
||||
" jb 1b \n\t"
|
||||
|
||||
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
|
||||
: "memory", "%"REG_a
|
||||
: "memory", "%"FF_REG_a
|
||||
);
|
||||
udst += chromStride;
|
||||
vdst += chromStride;
|
||||
@@ -1443,23 +1443,23 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
|
||||
for (y=1; y<srcHeight; y++) {
|
||||
const x86_reg mmxSize= srcWidth&~15;
|
||||
__asm__ volatile(
|
||||
"mov %4, %%"REG_a" \n\t"
|
||||
"mov %4, %%"FF_REG_a" \n\t"
|
||||
"movq "MANGLE(mmx_ff)", %%mm0 \n\t"
|
||||
"movq (%0, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq (%0, %%"FF_REG_a"), %%mm4 \n\t"
|
||||
"movq %%mm4, %%mm2 \n\t"
|
||||
"psllq $8, %%mm4 \n\t"
|
||||
"pand %%mm0, %%mm2 \n\t"
|
||||
"por %%mm2, %%mm4 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm5 \n\t"
|
||||
"movq (%1, %%"FF_REG_a"), %%mm5 \n\t"
|
||||
"movq %%mm5, %%mm3 \n\t"
|
||||
"psllq $8, %%mm5 \n\t"
|
||||
"pand %%mm0, %%mm3 \n\t"
|
||||
"por %%mm3, %%mm5 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq 1(%0, %%"REG_a"), %%mm2 \n\t"
|
||||
"movq 1(%1, %%"REG_a"), %%mm3 \n\t"
|
||||
"movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
|
||||
"movq (%1, %%"FF_REG_a"), %%mm1 \n\t"
|
||||
"movq 1(%0, %%"FF_REG_a"), %%mm2 \n\t"
|
||||
"movq 1(%1, %%"FF_REG_a"), %%mm3 \n\t"
|
||||
PAVGB" %%mm0, %%mm5 \n\t"
|
||||
PAVGB" %%mm0, %%mm3 \n\t"
|
||||
PAVGB" %%mm0, %%mm5 \n\t"
|
||||
@@ -1474,18 +1474,18 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
|
||||
"punpckhbw %%mm3, %%mm7 \n\t"
|
||||
"punpcklbw %%mm2, %%mm4 \n\t"
|
||||
"punpckhbw %%mm2, %%mm6 \n\t"
|
||||
MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t"
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
"movq -1(%0, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq -1(%1, %%"REG_a"), %%mm5 \n\t"
|
||||
MOVNTQ" %%mm5, (%2, %%"FF_REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm7, 8(%2, %%"FF_REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm4, (%3, %%"FF_REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm6, 8(%3, %%"FF_REG_a", 2) \n\t"
|
||||
"add $8, %%"FF_REG_a" \n\t"
|
||||
"movq -1(%0, %%"FF_REG_a"), %%mm4 \n\t"
|
||||
"movq -1(%1, %%"FF_REG_a"), %%mm5 \n\t"
|
||||
" js 1b \n\t"
|
||||
:: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
|
||||
"r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
|
||||
"g" (-mmxSize)
|
||||
: "%"REG_a
|
||||
: "%"FF_REG_a
|
||||
);
|
||||
|
||||
for (x=mmxSize-1; x<srcWidth-1; x++) {
|
||||
@@ -1531,14 +1531,14 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
|
||||
const x86_reg chromWidth= width>>1;
|
||||
for (y=0; y<height; y+=2) {
|
||||
__asm__ volatile(
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
|
||||
"pcmpeqw %%mm7, %%mm7 \n\t"
|
||||
"psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
|
||||
".p2align 4 \n\t"
|
||||
"1: \n\t"
|
||||
PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
|
||||
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0)
|
||||
"movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4)
|
||||
PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
|
||||
"movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0)
|
||||
"movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4)
|
||||
"movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0)
|
||||
"movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4)
|
||||
"pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0)
|
||||
@@ -1548,10 +1548,10 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
|
||||
"packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
|
||||
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
|
||||
|
||||
MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm2, (%1, %%"FF_REG_a", 2) \n\t"
|
||||
|
||||
"movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(8)
|
||||
"movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // UYVY UYVY(12)
|
||||
"movq 16(%0, %%"FF_REG_a", 4), %%mm1\n\t" // UYVY UYVY(8)
|
||||
"movq 24(%0, %%"FF_REG_a", 4), %%mm2\n\t" // UYVY UYVY(12)
|
||||
"movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8)
|
||||
"movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12)
|
||||
"pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8)
|
||||
@@ -1561,7 +1561,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
|
||||
"packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
|
||||
"packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
|
||||
|
||||
MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm3, 8(%1, %%"FF_REG_a", 2) \n\t"
|
||||
|
||||
"movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
|
||||
"movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
|
||||
@@ -1572,28 +1572,28 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
|
||||
"packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
|
||||
"packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
|
||||
|
||||
MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
|
||||
MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
|
||||
MOVNTQ" %%mm0, (%3, %%"FF_REG_a") \n\t"
|
||||
MOVNTQ" %%mm2, (%2, %%"FF_REG_a") \n\t"
|
||||
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
"cmp %4, %%"REG_a" \n\t"
|
||||
"add $8, %%"FF_REG_a" \n\t"
|
||||
"cmp %4, %%"FF_REG_a" \n\t"
|
||||
" jb 1b \n\t"
|
||||
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
|
||||
: "memory", "%"REG_a
|
||||
: "memory", "%"FF_REG_a
|
||||
);
|
||||
|
||||
ydst += lumStride;
|
||||
src += srcStride;
|
||||
|
||||
__asm__ volatile(
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"
|
||||
".p2align 4 \n\t"
|
||||
"1: \n\t"
|
||||
PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
|
||||
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
|
||||
"movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
|
||||
"movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8)
|
||||
"movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12)
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
|
||||
".p2align 4 \n\t"
|
||||
"1: \n\t"
|
||||
PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
|
||||
"movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
|
||||
"movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
|
||||
"movq 16(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8)
|
||||
"movq 24(%0, %%"FF_REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12)
|
||||
"psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
|
||||
"psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
|
||||
"psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
|
||||
@@ -1601,15 +1601,15 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
|
||||
"packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
|
||||
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
|
||||
|
||||
MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm0, (%1, %%"FF_REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm2, 8(%1, %%"FF_REG_a", 2) \n\t"
|
||||
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
"cmp %4, %%"REG_a" \n\t"
|
||||
"add $8, %%"FF_REG_a" \n\t"
|
||||
"cmp %4, %%"FF_REG_a" \n\t"
|
||||
" jb 1b \n\t"
|
||||
|
||||
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
|
||||
: "memory", "%"REG_a
|
||||
: "memory", "%"FF_REG_a
|
||||
);
|
||||
udst += chromStride;
|
||||
vdst += chromStride;
|
||||
@@ -1639,20 +1639,20 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
|
||||
int i;
|
||||
for (i=0; i<2; i++) {
|
||||
__asm__ volatile(
|
||||
"mov %2, %%"REG_a" \n\t"
|
||||
"mov %2, %%"FF_REG_a"\n\t"
|
||||
"movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t"
|
||||
"movq "MANGLE(ff_w1111)", %%mm5 \n\t"
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
|
||||
"lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d"\n\t"
|
||||
".p2align 4 \n\t"
|
||||
"1: \n\t"
|
||||
PREFETCH" 64(%0, %%"REG_d") \n\t"
|
||||
"movd (%0, %%"REG_d"), %%mm0 \n\t"
|
||||
"movd 3(%0, %%"REG_d"), %%mm1 \n\t"
|
||||
PREFETCH" 64(%0, %%"FF_REG_d") \n\t"
|
||||
"movd (%0, %%"FF_REG_d"), %%mm0 \n\t"
|
||||
"movd 3(%0, %%"FF_REG_d"), %%mm1 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"
|
||||
"movd 6(%0, %%"REG_d"), %%mm2 \n\t"
|
||||
"movd 9(%0, %%"REG_d"), %%mm3 \n\t"
|
||||
"movd 6(%0, %%"FF_REG_d"), %%mm2 \n\t"
|
||||
"movd 9(%0, %%"FF_REG_d"), %%mm3 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"
|
||||
"pmaddwd %%mm6, %%mm0 \n\t"
|
||||
@@ -1672,12 +1672,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
|
||||
"packssdw %%mm2, %%mm0 \n\t"
|
||||
"psraw $7, %%mm0 \n\t"
|
||||
|
||||
"movd 12(%0, %%"REG_d"), %%mm4 \n\t"
|
||||
"movd 15(%0, %%"REG_d"), %%mm1 \n\t"
|
||||
"movd 12(%0, %%"FF_REG_d"), %%mm4 \n\t"
|
||||
"movd 15(%0, %%"FF_REG_d"), %%mm1 \n\t"
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"
|
||||
"movd 18(%0, %%"REG_d"), %%mm2 \n\t"
|
||||
"movd 21(%0, %%"REG_d"), %%mm3 \n\t"
|
||||
"movd 18(%0, %%"FF_REG_d"), %%mm2 \n\t"
|
||||
"movd 21(%0, %%"FF_REG_d"), %%mm3 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"
|
||||
"pmaddwd %%mm6, %%mm4 \n\t"
|
||||
@@ -1694,39 +1694,39 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
|
||||
"packssdw %%mm3, %%mm2 \n\t"
|
||||
"pmaddwd %%mm5, %%mm4 \n\t"
|
||||
"pmaddwd %%mm5, %%mm2 \n\t"
|
||||
"add $24, %%"REG_d" \n\t"
|
||||
"add $24, %%"FF_REG_d"\n\t"
|
||||
"packssdw %%mm2, %%mm4 \n\t"
|
||||
"psraw $7, %%mm4 \n\t"
|
||||
|
||||
"packuswb %%mm4, %%mm0 \n\t"
|
||||
"paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t"
|
||||
|
||||
MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t"
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
MOVNTQ" %%mm0, (%1, %%"FF_REG_a") \n\t"
|
||||
"add $8, %%"FF_REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
: : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width)
|
||||
: "%"REG_a, "%"REG_d
|
||||
: "%"FF_REG_a, "%"FF_REG_d
|
||||
);
|
||||
ydst += lumStride;
|
||||
src += srcStride;
|
||||
}
|
||||
src -= srcStride*2;
|
||||
__asm__ volatile(
|
||||
"mov %4, %%"REG_a" \n\t"
|
||||
"mov %4, %%"FF_REG_a"\n\t"
|
||||
"movq "MANGLE(ff_w1111)", %%mm5 \n\t"
|
||||
"movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t"
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
|
||||
"add %%"REG_d", %%"REG_d" \n\t"
|
||||
"lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d" \n\t"
|
||||
"add %%"FF_REG_d", %%"FF_REG_d"\n\t"
|
||||
".p2align 4 \n\t"
|
||||
"1: \n\t"
|
||||
PREFETCH" 64(%0, %%"REG_d") \n\t"
|
||||
PREFETCH" 64(%1, %%"REG_d") \n\t"
|
||||
PREFETCH" 64(%0, %%"FF_REG_d") \n\t"
|
||||
PREFETCH" 64(%1, %%"FF_REG_d") \n\t"
|
||||
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
|
||||
"movq (%0, %%"REG_d"), %%mm0 \n\t"
|
||||
"movq (%1, %%"REG_d"), %%mm1 \n\t"
|
||||
"movq 6(%0, %%"REG_d"), %%mm2 \n\t"
|
||||
"movq 6(%1, %%"REG_d"), %%mm3 \n\t"
|
||||
"movq (%0, %%"FF_REG_d"), %%mm0 \n\t"
|
||||
"movq (%1, %%"FF_REG_d"), %%mm1 \n\t"
|
||||
"movq 6(%0, %%"FF_REG_d"), %%mm2 \n\t"
|
||||
"movq 6(%1, %%"FF_REG_d"), %%mm3 \n\t"
|
||||
PAVGB" %%mm1, %%mm0 \n\t"
|
||||
PAVGB" %%mm3, %%mm2 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
@@ -1738,10 +1738,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
#else
|
||||
"movd (%0, %%"REG_d"), %%mm0 \n\t"
|
||||
"movd (%1, %%"REG_d"), %%mm1 \n\t"
|
||||
"movd 3(%0, %%"REG_d"), %%mm2 \n\t"
|
||||
"movd 3(%1, %%"REG_d"), %%mm3 \n\t"
|
||||
"movd (%0, %%"FF_REG_d"), %%mm0 \n\t"
|
||||
"movd (%1, %%"FF_REG_d"), %%mm1 \n\t"
|
||||
"movd 3(%0, %%"FF_REG_d"), %%mm2 \n\t"
|
||||
"movd 3(%1, %%"FF_REG_d"), %%mm3 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
@@ -1749,10 +1749,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
|
||||
"paddw %%mm1, %%mm0 \n\t"
|
||||
"paddw %%mm3, %%mm2 \n\t"
|
||||
"paddw %%mm2, %%mm0 \n\t"
|
||||
"movd 6(%0, %%"REG_d"), %%mm4 \n\t"
|
||||
"movd 6(%1, %%"REG_d"), %%mm1 \n\t"
|
||||
"movd 9(%0, %%"REG_d"), %%mm2 \n\t"
|
||||
"movd 9(%1, %%"REG_d"), %%mm3 \n\t"
|
||||
"movd 6(%0, %%"FF_REG_d"), %%mm4 \n\t"
|
||||
"movd 6(%1, %%"FF_REG_d"), %%mm1 \n\t"
|
||||
"movd 9(%0, %%"FF_REG_d"), %%mm2 \n\t"
|
||||
"movd 9(%1, %%"FF_REG_d"), %%mm3 \n\t"
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
@@ -1784,10 +1784,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
|
||||
"psraw $7, %%mm0 \n\t"
|
||||
|
||||
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
|
||||
"movq 12(%0, %%"REG_d"), %%mm4 \n\t"
|
||||
"movq 12(%1, %%"REG_d"), %%mm1 \n\t"
|
||||
"movq 18(%0, %%"REG_d"), %%mm2 \n\t"
|
||||
"movq 18(%1, %%"REG_d"), %%mm3 \n\t"
|
||||
"movq 12(%0, %%"FF_REG_d"), %%mm4 \n\t"
|
||||
"movq 12(%1, %%"FF_REG_d"), %%mm1 \n\t"
|
||||
"movq 18(%0, %%"FF_REG_d"), %%mm2 \n\t"
|
||||
"movq 18(%1, %%"FF_REG_d"), %%mm3 \n\t"
|
||||
PAVGB" %%mm1, %%mm4 \n\t"
|
||||
PAVGB" %%mm3, %%mm2 \n\t"
|
||||
"movq %%mm4, %%mm1 \n\t"
|
||||
@@ -1799,10 +1799,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
#else
|
||||
"movd 12(%0, %%"REG_d"), %%mm4 \n\t"
|
||||
"movd 12(%1, %%"REG_d"), %%mm1 \n\t"
|
||||
"movd 15(%0, %%"REG_d"), %%mm2 \n\t"
|
||||
"movd 15(%1, %%"REG_d"), %%mm3 \n\t"
|
||||
"movd 12(%0, %%"FF_REG_d"), %%mm4 \n\t"
|
||||
"movd 12(%1, %%"FF_REG_d"), %%mm1 \n\t"
|
||||
"movd 15(%0, %%"FF_REG_d"), %%mm2 \n\t"
|
||||
"movd 15(%1, %%"FF_REG_d"), %%mm3 \n\t"
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
@@ -1810,10 +1810,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
|
||||
"paddw %%mm1, %%mm4 \n\t"
|
||||
"paddw %%mm3, %%mm2 \n\t"
|
||||
"paddw %%mm2, %%mm4 \n\t"
|
||||
"movd 18(%0, %%"REG_d"), %%mm5 \n\t"
|
||||
"movd 18(%1, %%"REG_d"), %%mm1 \n\t"
|
||||
"movd 21(%0, %%"REG_d"), %%mm2 \n\t"
|
||||
"movd 21(%1, %%"REG_d"), %%mm3 \n\t"
|
||||
"movd 18(%0, %%"FF_REG_d"), %%mm5 \n\t"
|
||||
"movd 18(%1, %%"FF_REG_d"), %%mm1 \n\t"
|
||||
"movd 21(%0, %%"FF_REG_d"), %%mm2 \n\t"
|
||||
"movd 21(%1, %%"FF_REG_d"), %%mm3 \n\t"
|
||||
"punpcklbw %%mm7, %%mm5 \n\t"
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
@@ -1842,7 +1842,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
|
||||
"packssdw %%mm3, %%mm1 \n\t"
|
||||
"pmaddwd %%mm5, %%mm4 \n\t"
|
||||
"pmaddwd %%mm5, %%mm1 \n\t"
|
||||
"add $24, %%"REG_d" \n\t"
|
||||
"add $24, %%"FF_REG_d"\n\t"
|
||||
"packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2
|
||||
"psraw $7, %%mm4 \n\t"
|
||||
|
||||
@@ -1851,13 +1851,13 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
|
||||
"punpckhdq %%mm4, %%mm1 \n\t"
|
||||
"packsswb %%mm1, %%mm0 \n\t"
|
||||
"paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t"
|
||||
"movd %%mm0, (%2, %%"REG_a") \n\t"
|
||||
"movd %%mm0, (%2, %%"FF_REG_a") \n\t"
|
||||
"punpckhdq %%mm0, %%mm0 \n\t"
|
||||
"movd %%mm0, (%3, %%"REG_a") \n\t"
|
||||
"add $4, %%"REG_a" \n\t"
|
||||
"movd %%mm0, (%3, %%"FF_REG_a") \n\t"
|
||||
"add $4, %%"FF_REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
: : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
|
||||
: "%"REG_a, "%"REG_d
|
||||
: "%"FF_REG_a, "%"FF_REG_d
|
||||
);
|
||||
|
||||
udst += chromStride;
|
||||
@@ -1885,48 +1885,48 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
|
||||
|
||||
#if COMPILE_TEMPLATE_SSE2
|
||||
__asm__(
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
|
||||
"1: \n\t"
|
||||
PREFETCH" 64(%1, %%"REG_a") \n\t"
|
||||
PREFETCH" 64(%2, %%"REG_a") \n\t"
|
||||
"movdqa (%1, %%"REG_a"), %%xmm0 \n\t"
|
||||
"movdqa (%1, %%"REG_a"), %%xmm1 \n\t"
|
||||
"movdqa (%2, %%"REG_a"), %%xmm2 \n\t"
|
||||
PREFETCH" 64(%1, %%"FF_REG_a") \n\t"
|
||||
PREFETCH" 64(%2, %%"FF_REG_a") \n\t"
|
||||
"movdqa (%1, %%"FF_REG_a"), %%xmm0 \n\t"
|
||||
"movdqa (%1, %%"FF_REG_a"), %%xmm1 \n\t"
|
||||
"movdqa (%2, %%"FF_REG_a"), %%xmm2 \n\t"
|
||||
"punpcklbw %%xmm2, %%xmm0 \n\t"
|
||||
"punpckhbw %%xmm2, %%xmm1 \n\t"
|
||||
"movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t"
|
||||
"movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t"
|
||||
"add $16, %%"REG_a" \n\t"
|
||||
"cmp %3, %%"REG_a" \n\t"
|
||||
"movntdq %%xmm0, (%0, %%"FF_REG_a", 2) \n\t"
|
||||
"movntdq %%xmm1, 16(%0, %%"FF_REG_a", 2) \n\t"
|
||||
"add $16, %%"FF_REG_a" \n\t"
|
||||
"cmp %3, %%"FF_REG_a" \n\t"
|
||||
" jb 1b \n\t"
|
||||
::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
|
||||
: "memory", "%"REG_a""
|
||||
: "memory", "%"FF_REG_a""
|
||||
);
|
||||
#else
|
||||
__asm__(
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
|
||||
"1: \n\t"
|
||||
PREFETCH" 64(%1, %%"REG_a") \n\t"
|
||||
PREFETCH" 64(%2, %%"REG_a") \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq 8(%1, %%"REG_a"), %%mm2 \n\t"
|
||||
PREFETCH" 64(%1, %%"FF_REG_a") \n\t"
|
||||
PREFETCH" 64(%2, %%"FF_REG_a") \n\t"
|
||||
"movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
|
||||
"movq 8(%1, %%"FF_REG_a"), %%mm2 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm2, %%mm3 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq 8(%2, %%"REG_a"), %%mm5 \n\t"
|
||||
"movq (%2, %%"FF_REG_a"), %%mm4 \n\t"
|
||||
"movq 8(%2, %%"FF_REG_a"), %%mm5 \n\t"
|
||||
"punpcklbw %%mm4, %%mm0 \n\t"
|
||||
"punpckhbw %%mm4, %%mm1 \n\t"
|
||||
"punpcklbw %%mm5, %%mm2 \n\t"
|
||||
"punpckhbw %%mm5, %%mm3 \n\t"
|
||||
MOVNTQ" %%mm0, (%0, %%"REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t"
|
||||
"add $16, %%"REG_a" \n\t"
|
||||
"cmp %3, %%"REG_a" \n\t"
|
||||
MOVNTQ" %%mm0, (%0, %%"FF_REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm1, 8(%0, %%"FF_REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm2, 16(%0, %%"FF_REG_a", 2) \n\t"
|
||||
MOVNTQ" %%mm3, 24(%0, %%"FF_REG_a", 2) \n\t"
|
||||
"add $16, %%"FF_REG_a" \n\t"
|
||||
"cmp %3, %%"FF_REG_a" \n\t"
|
||||
" jb 1b \n\t"
|
||||
::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
|
||||
: "memory", "%"REG_a
|
||||
: "memory", "%"FF_REG_a
|
||||
);
|
||||
#endif
|
||||
for (w= (width&(~15)); w < width; w++) {
|
||||
|
||||
@@ -42,46 +42,46 @@
|
||||
|
||||
#define YSCALEYUV2PACKEDX_UV \
|
||||
__asm__ volatile(\
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"\
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
|
||||
".p2align 4 \n\t"\
|
||||
"nop \n\t"\
|
||||
"1: \n\t"\
|
||||
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
|
||||
"mov (%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d" \n\t"\
|
||||
"mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
|
||||
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
|
||||
"movq %%mm3, %%mm4 \n\t"\
|
||||
".p2align 4 \n\t"\
|
||||
"2: \n\t"\
|
||||
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
|
||||
"movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
|
||||
"add %6, %%"REG_S" \n\t" \
|
||||
"movq (%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
|
||||
"add $16, %%"REG_d" \n\t"\
|
||||
"mov (%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"movq 8(%%"FF_REG_d"), %%mm0 \n\t" /* filterCoeff */\
|
||||
"movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm2 \n\t" /* UsrcData */\
|
||||
"add %6, %%"FF_REG_S" \n\t" \
|
||||
"movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm5 \n\t" /* VsrcData */\
|
||||
"add $16, %%"FF_REG_d" \n\t"\
|
||||
"mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
|
||||
"pmulhw %%mm0, %%mm2 \n\t"\
|
||||
"pmulhw %%mm0, %%mm5 \n\t"\
|
||||
"paddw %%mm2, %%mm3 \n\t"\
|
||||
"paddw %%mm5, %%mm4 \n\t"\
|
||||
"test %%"REG_S", %%"REG_S" \n\t"\
|
||||
"test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
|
||||
" jnz 2b \n\t"\
|
||||
|
||||
#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
|
||||
"lea "offset"(%0), %%"REG_d" \n\t"\
|
||||
"mov (%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"lea "offset"(%0), %%"FF_REG_d" \n\t"\
|
||||
"mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
|
||||
"movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
|
||||
"movq "#dst1", "#dst2" \n\t"\
|
||||
".p2align 4 \n\t"\
|
||||
"2: \n\t"\
|
||||
"movq 8(%%"REG_d"), "#coeff" \n\t" /* filterCoeff */\
|
||||
"movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" /* Y1srcData */\
|
||||
"movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" /* Y2srcData */\
|
||||
"add $16, %%"REG_d" \n\t"\
|
||||
"mov (%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"movq 8(%%"FF_REG_d"), "#coeff" \n\t" /* filterCoeff */\
|
||||
"movq (%%"FF_REG_S", %%"FF_REG_a", 2), "#src1" \n\t" /* Y1srcData */\
|
||||
"movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), "#src2" \n\t" /* Y2srcData */\
|
||||
"add $16, %%"FF_REG_d" \n\t"\
|
||||
"mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
|
||||
"pmulhw "#coeff", "#src1" \n\t"\
|
||||
"pmulhw "#coeff", "#src2" \n\t"\
|
||||
"paddw "#src1", "#dst1" \n\t"\
|
||||
"paddw "#src2", "#dst2" \n\t"\
|
||||
"test %%"REG_S", %%"REG_S" \n\t"\
|
||||
"test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
|
||||
" jnz 2b \n\t"\
|
||||
|
||||
#define YSCALEYUV2PACKEDX \
|
||||
@@ -92,41 +92,41 @@
|
||||
:: "r" (&c->redDither), \
|
||||
"m" (dummy), "m" (dummy), "m" (dummy),\
|
||||
"r" (dest), "m" (dstW_reg), "m"(uv_off) \
|
||||
: "%"REG_a, "%"REG_d, "%"REG_S \
|
||||
: "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_S \
|
||||
);
|
||||
|
||||
#define YSCALEYUV2PACKEDX_ACCURATE_UV \
|
||||
__asm__ volatile(\
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"\
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
|
||||
".p2align 4 \n\t"\
|
||||
"nop \n\t"\
|
||||
"1: \n\t"\
|
||||
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
|
||||
"mov (%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d" \n\t"\
|
||||
"mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
|
||||
"pxor %%mm4, %%mm4 \n\t"\
|
||||
"pxor %%mm5, %%mm5 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
".p2align 4 \n\t"\
|
||||
"2: \n\t"\
|
||||
"movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
|
||||
"add %6, %%"REG_S" \n\t" \
|
||||
"movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
|
||||
"mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
|
||||
"movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm0 \n\t" /* UsrcData */\
|
||||
"add %6, %%"FF_REG_S" \n\t" \
|
||||
"movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm2 \n\t" /* VsrcData */\
|
||||
"mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
|
||||
"movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm1 \n\t" /* UsrcData */\
|
||||
"movq %%mm0, %%mm3 \n\t"\
|
||||
"punpcklwd %%mm1, %%mm0 \n\t"\
|
||||
"punpckhwd %%mm1, %%mm3 \n\t"\
|
||||
"movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\
|
||||
"movq "STR(APCK_COEF)"(%%"FF_REG_d"),%%mm1 \n\t" /* filterCoeff */\
|
||||
"pmaddwd %%mm1, %%mm0 \n\t"\
|
||||
"pmaddwd %%mm1, %%mm3 \n\t"\
|
||||
"paddd %%mm0, %%mm4 \n\t"\
|
||||
"paddd %%mm3, %%mm5 \n\t"\
|
||||
"add %6, %%"REG_S" \n\t" \
|
||||
"movq (%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
|
||||
"mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
|
||||
"test %%"REG_S", %%"REG_S" \n\t"\
|
||||
"add %6, %%"FF_REG_S" \n\t"\
|
||||
"movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm3 \n\t" /* VsrcData */\
|
||||
"mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
|
||||
"add $"STR(APCK_SIZE)", %%"FF_REG_d" \n\t"\
|
||||
"test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
|
||||
"movq %%mm2, %%mm0 \n\t"\
|
||||
"punpcklwd %%mm3, %%mm2 \n\t"\
|
||||
"punpckhwd %%mm3, %%mm0 \n\t"\
|
||||
@@ -148,30 +148,30 @@
|
||||
"movq %%mm6, "V_TEMP"(%0) \n\t"\
|
||||
|
||||
#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
|
||||
"lea "offset"(%0), %%"REG_d" \n\t"\
|
||||
"mov (%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"lea "offset"(%0), %%"FF_REG_d" \n\t"\
|
||||
"mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
|
||||
"pxor %%mm1, %%mm1 \n\t"\
|
||||
"pxor %%mm5, %%mm5 \n\t"\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
".p2align 4 \n\t"\
|
||||
"2: \n\t"\
|
||||
"movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
|
||||
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
|
||||
"mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
|
||||
"movq (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
|
||||
"movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
|
||||
"mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
|
||||
"movq (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
|
||||
"movq %%mm0, %%mm3 \n\t"\
|
||||
"punpcklwd %%mm4, %%mm0 \n\t"\
|
||||
"punpckhwd %%mm4, %%mm3 \n\t"\
|
||||
"movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
|
||||
"movq "STR(APCK_COEF)"(%%"FF_REG_d"), %%mm4 \n\t" /* filterCoeff */\
|
||||
"pmaddwd %%mm4, %%mm0 \n\t"\
|
||||
"pmaddwd %%mm4, %%mm3 \n\t"\
|
||||
"paddd %%mm0, %%mm1 \n\t"\
|
||||
"paddd %%mm3, %%mm5 \n\t"\
|
||||
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
|
||||
"mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
|
||||
"test %%"REG_S", %%"REG_S" \n\t"\
|
||||
"movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
|
||||
"mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
|
||||
"add $"STR(APCK_SIZE)", %%"FF_REG_d" \n\t"\
|
||||
"test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
|
||||
"movq %%mm2, %%mm0 \n\t"\
|
||||
"punpcklwd %%mm3, %%mm2 \n\t"\
|
||||
"punpckhwd %%mm3, %%mm0 \n\t"\
|
||||
@@ -278,13 +278,13 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
|
||||
"psraw $3, %%mm1 \n\t"
|
||||
"psraw $3, %%mm7 \n\t"
|
||||
"packuswb %%mm7, %%mm1 \n\t"
|
||||
WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
|
||||
WRITEBGR32(%4, %5, %%FF_REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
|
||||
YSCALEYUV2PACKEDX_END
|
||||
} else {
|
||||
YSCALEYUV2PACKEDX_ACCURATE
|
||||
YSCALEYUV2RGBX
|
||||
"pcmpeqd %%mm7, %%mm7 \n\t"
|
||||
WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
|
||||
WRITEBGR32(%4, %5, %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
|
||||
YSCALEYUV2PACKEDX_END
|
||||
}
|
||||
}
|
||||
@@ -307,13 +307,13 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
|
||||
"psraw $3, %%mm1 \n\t"
|
||||
"psraw $3, %%mm7 \n\t"
|
||||
"packuswb %%mm7, %%mm1 \n\t"
|
||||
WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
|
||||
WRITEBGR32(%4, %5, %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
|
||||
YSCALEYUV2PACKEDX_END
|
||||
} else {
|
||||
YSCALEYUV2PACKEDX
|
||||
YSCALEYUV2RGBX
|
||||
"pcmpeqd %%mm7, %%mm7 \n\t"
|
||||
WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
|
||||
WRITEBGR32(%4, %5, %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
|
||||
YSCALEYUV2PACKEDX_END
|
||||
}
|
||||
}
|
||||
@@ -366,7 +366,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
|
||||
"paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
|
||||
"paddusb "RED_DITHER"(%0), %%mm5\n\t"
|
||||
#endif
|
||||
WRITERGB16(%4, %5, %%REGa)
|
||||
WRITERGB16(%4, %5, %%FF_REGa)
|
||||
YSCALEYUV2PACKEDX_END
|
||||
}
|
||||
|
||||
@@ -390,7 +390,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
|
||||
"paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
|
||||
"paddusb "RED_DITHER"(%0), %%mm5 \n\t"
|
||||
#endif
|
||||
WRITERGB16(%4, %5, %%REGa)
|
||||
WRITERGB16(%4, %5, %%FF_REGa)
|
||||
YSCALEYUV2PACKEDX_END
|
||||
}
|
||||
|
||||
@@ -443,7 +443,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
|
||||
"paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
|
||||
"paddusb "RED_DITHER"(%0), %%mm5\n\t"
|
||||
#endif
|
||||
WRITERGB15(%4, %5, %%REGa)
|
||||
WRITERGB15(%4, %5, %%FF_REGa)
|
||||
YSCALEYUV2PACKEDX_END
|
||||
}
|
||||
|
||||
@@ -467,7 +467,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
|
||||
"paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
|
||||
"paddusb "RED_DITHER"(%0), %%mm5 \n\t"
|
||||
#endif
|
||||
WRITERGB15(%4, %5, %%REGa)
|
||||
WRITERGB15(%4, %5, %%FF_REGa)
|
||||
YSCALEYUV2PACKEDX_END
|
||||
}
|
||||
|
||||
@@ -593,14 +593,14 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
|
||||
|
||||
YSCALEYUV2PACKEDX_ACCURATE
|
||||
YSCALEYUV2RGBX
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
|
||||
"add %4, %%"REG_c" \n\t"
|
||||
WRITEBGR24(%%REGc, %5, %%REGa)
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c" \n\t" // FIXME optimize
|
||||
"add %4, %%"FF_REG_c" \n\t"
|
||||
WRITEBGR24(%%FF_REGc, %5, %%FF_REGa)
|
||||
:: "r" (&c->redDither),
|
||||
"m" (dummy), "m" (dummy), "m" (dummy),
|
||||
"r" (dest), "m" (dstW_reg), "m"(uv_off)
|
||||
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
|
||||
: "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
|
||||
);
|
||||
}
|
||||
|
||||
@@ -617,14 +617,14 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
|
||||
|
||||
YSCALEYUV2PACKEDX
|
||||
YSCALEYUV2RGBX
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize
|
||||
"add %4, %%"REG_c" \n\t"
|
||||
WRITEBGR24(%%REGc, %5, %%REGa)
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c" \n\t" // FIXME optimize
|
||||
"add %4, %%"FF_REG_c" \n\t"
|
||||
WRITEBGR24(%%FF_REGc, %5, %%FF_REGa)
|
||||
:: "r" (&c->redDither),
|
||||
"m" (dummy), "m" (dummy), "m" (dummy),
|
||||
"r" (dest), "m" (dstW_reg), "m"(uv_off)
|
||||
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
|
||||
: "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
|
||||
);
|
||||
}
|
||||
|
||||
@@ -662,7 +662,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
|
||||
"psraw $3, %%mm4 \n\t"
|
||||
"psraw $3, %%mm1 \n\t"
|
||||
"psraw $3, %%mm7 \n\t"
|
||||
WRITEYUY2(%4, %5, %%REGa)
|
||||
WRITEYUY2(%4, %5, %%FF_REGa)
|
||||
YSCALEYUV2PACKEDX_END
|
||||
}
|
||||
|
||||
@@ -683,7 +683,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
|
||||
"psraw $3, %%mm4 \n\t"
|
||||
"psraw $3, %%mm1 \n\t"
|
||||
"psraw $3, %%mm7 \n\t"
|
||||
WRITEYUY2(%4, %5, %%REGa)
|
||||
WRITEYUY2(%4, %5, %%FF_REGa)
|
||||
YSCALEYUV2PACKEDX_END
|
||||
}
|
||||
|
||||
@@ -794,37 +794,37 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
|
||||
*(const uint16_t **)(&c->u_temp)=abuf0;
|
||||
*(const uint16_t **)(&c->v_temp)=abuf1;
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2RGB(%%REGBP, %5)
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2RGB(%%FF_REGBP, %5)
|
||||
"push %0 \n\t"
|
||||
"push %1 \n\t"
|
||||
"mov "U_TEMP"(%5), %0 \n\t"
|
||||
"mov "V_TEMP"(%5), %1 \n\t"
|
||||
YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
|
||||
YSCALEYUV2RGB_YA(%%FF_REGBP, %5, %0, %1)
|
||||
"psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
|
||||
"psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
|
||||
"packuswb %%mm7, %%mm1 \n\t"
|
||||
"pop %1 \n\t"
|
||||
"pop %0 \n\t"
|
||||
WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
WRITEBGR32(%%FF_REGb, 8280(%5), %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
#endif
|
||||
} else {
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2RGB(%%REGBP, %5)
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2RGB(%%FF_REGBP, %5)
|
||||
"pcmpeqd %%mm7, %%mm7 \n\t"
|
||||
WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
WRITEBGR32(%%FF_REGb, 8280(%5), %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
@@ -841,14 +841,14 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
|
||||
|
||||
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2RGB(%%REGBP, %5)
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2RGB(%%FF_REGBP, %5)
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
WRITEBGR24(%%FF_REGb, 8280(%5), %%FF_REGBP)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
@@ -864,10 +864,10 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
|
||||
|
||||
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2RGB(%%REGBP, %5)
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2RGB(%%FF_REGBP, %5)
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
|
||||
#ifdef DITHER1XBPP
|
||||
@@ -875,9 +875,9 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
|
||||
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
|
||||
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
|
||||
#endif
|
||||
WRITERGB15(%%REGb, 8280(%5), %%REGBP)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
WRITERGB15(%%FF_REGb, 8280(%5), %%FF_REGBP)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
@@ -893,10 +893,10 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
|
||||
|
||||
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2RGB(%%REGBP, %5)
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2RGB(%%FF_REGBP, %5)
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
|
||||
#ifdef DITHER1XBPP
|
||||
@@ -904,9 +904,9 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
|
||||
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
|
||||
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
|
||||
#endif
|
||||
WRITERGB16(%%REGb, 8280(%5), %%REGBP)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
WRITERGB16(%%FF_REGb, 8280(%5), %%FF_REGBP)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
@@ -962,13 +962,13 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
|
||||
|
||||
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2PACKED(%%REGBP, %5)
|
||||
WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2PACKED(%%FF_REGBP, %5)
|
||||
WRITEYUY2(%%FF_REGb, 8280(%5), %%FF_REGBP)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
@@ -1104,27 +1104,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
|
||||
const int16_t *ubuf1 = ubuf[0];
|
||||
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1(%%REGBP, %5)
|
||||
YSCALEYUV2RGB1_ALPHA(%%REGBP)
|
||||
WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1(%%FF_REGBP, %5)
|
||||
YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
|
||||
WRITEBGR32(%%FF_REGb, 8280(%5), %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
} else {
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1(%%REGBP, %5)
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1(%%FF_REGBP, %5)
|
||||
"pcmpeqd %%mm7, %%mm7 \n\t"
|
||||
WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
WRITEBGR32(%%FF_REGb, 8280(%5), %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
@@ -1133,27 +1133,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
|
||||
const int16_t *ubuf1 = ubuf[1];
|
||||
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1b(%%REGBP, %5)
|
||||
YSCALEYUV2RGB1_ALPHA(%%REGBP)
|
||||
WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1b(%%FF_REGBP, %5)
|
||||
YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
|
||||
WRITEBGR32(%%FF_REGb, 8280(%5), %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
} else {
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1b(%%REGBP, %5)
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1b(%%FF_REGBP, %5)
|
||||
"pcmpeqd %%mm7, %%mm7 \n\t"
|
||||
WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
WRITEBGR32(%%FF_REGb, 8280(%5), %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
@@ -1172,28 +1172,28 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
|
||||
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
|
||||
const int16_t *ubuf1 = ubuf[0];
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1(%%REGBP, %5)
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1(%%FF_REGBP, %5)
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
WRITEBGR24(%%FF_REGb, 8280(%5), %%FF_REGBP)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
} else {
|
||||
const int16_t *ubuf1 = ubuf[1];
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1b(%%REGBP, %5)
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1b(%%FF_REGBP, %5)
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
WRITEBGR24(%%FF_REGb, 8280(%5), %%FF_REGBP)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
@@ -1211,10 +1211,10 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
|
||||
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
|
||||
const int16_t *ubuf1 = ubuf[0];
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1(%%REGBP, %5)
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1(%%FF_REGBP, %5)
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
|
||||
#ifdef DITHER1XBPP
|
||||
@@ -1222,19 +1222,19 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
|
||||
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
|
||||
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
|
||||
#endif
|
||||
WRITERGB15(%%REGb, 8280(%5), %%REGBP)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
WRITERGB15(%%FF_REGb, 8280(%5), %%FF_REGBP)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
} else {
|
||||
const int16_t *ubuf1 = ubuf[1];
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1b(%%REGBP, %5)
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1b(%%FF_REGBP, %5)
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
|
||||
#ifdef DITHER1XBPP
|
||||
@@ -1242,9 +1242,9 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
|
||||
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
|
||||
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
|
||||
#endif
|
||||
WRITERGB15(%%REGb, 8280(%5), %%REGBP)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
WRITERGB15(%%FF_REGb, 8280(%5), %%FF_REGBP)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
@@ -1262,10 +1262,10 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
|
||||
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
|
||||
const int16_t *ubuf1 = ubuf[0];
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1(%%REGBP, %5)
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1(%%FF_REGBP, %5)
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
|
||||
#ifdef DITHER1XBPP
|
||||
@@ -1273,19 +1273,19 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
|
||||
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
|
||||
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
|
||||
#endif
|
||||
WRITERGB16(%%REGb, 8280(%5), %%REGBP)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
WRITERGB16(%%FF_REGb, 8280(%5), %%FF_REGBP)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
} else {
|
||||
const int16_t *ubuf1 = ubuf[1];
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1b(%%REGBP, %5)
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2RGB1b(%%FF_REGBP, %5)
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
|
||||
#ifdef DITHER1XBPP
|
||||
@@ -1293,9 +1293,9 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
|
||||
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
|
||||
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
|
||||
#endif
|
||||
WRITERGB16(%%REGb, 8280(%5), %%REGBP)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
WRITERGB16(%%FF_REGb, 8280(%5), %%FF_REGBP)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
@@ -1350,26 +1350,26 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
|
||||
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
|
||||
const int16_t *ubuf1 = ubuf[0];
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2PACKED1(%%REGBP, %5)
|
||||
WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2PACKED1(%%FF_REGBP, %5)
|
||||
WRITEYUY2(%%FF_REGb, 8280(%5), %%FF_REGBP)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
} else {
|
||||
const int16_t *ubuf1 = ubuf[1];
|
||||
__asm__ volatile(
|
||||
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"REG_b" \n\t"
|
||||
"push %%"REG_BP" \n\t"
|
||||
YSCALEYUV2PACKED1b(%%REGBP, %5)
|
||||
WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
|
||||
"pop %%"REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
|
||||
"mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
|
||||
"mov %4, %%"FF_REG_b" \n\t"
|
||||
"push %%"FF_REG_BP" \n\t"
|
||||
YSCALEYUV2PACKED1b(%%FF_REGBP, %5)
|
||||
WRITEYUY2(%%FF_REGb, 8280(%5), %%FF_REGBP)
|
||||
"pop %%"FF_REG_BP" \n\t"
|
||||
"mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
|
||||
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
|
||||
"a" (&c->redDither)
|
||||
);
|
||||
@@ -1394,43 +1394,43 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
|
||||
|
||||
__asm__ volatile(
|
||||
#if defined(PIC)
|
||||
"mov %%"REG_b", %5 \n\t"
|
||||
"mov %%"FF_REG_b", %5 \n\t"
|
||||
#if ARCH_X86_64
|
||||
"mov -8(%%rsp), %%"REG_a" \n\t"
|
||||
"mov %%"REG_a", %6 \n\t"
|
||||
"mov -8(%%rsp), %%"FF_REG_a" \n\t"
|
||||
"mov %%"FF_REG_a", %6 \n\t"
|
||||
#endif
|
||||
#else
|
||||
#if ARCH_X86_64
|
||||
"mov -8(%%rsp), %%"REG_a" \n\t"
|
||||
"mov %%"REG_a", %5 \n\t"
|
||||
"mov -8(%%rsp), %%"FF_REG_a" \n\t"
|
||||
"mov %%"FF_REG_a", %5 \n\t"
|
||||
#endif
|
||||
#endif
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"mov %0, %%"REG_c" \n\t"
|
||||
"mov %1, %%"REG_D" \n\t"
|
||||
"mov %2, %%"REG_d" \n\t"
|
||||
"mov %3, %%"REG_b" \n\t"
|
||||
"xor %%"REG_a", %%"REG_a" \n\t" // i
|
||||
PREFETCH" (%%"REG_c") \n\t"
|
||||
PREFETCH" 32(%%"REG_c") \n\t"
|
||||
PREFETCH" 64(%%"REG_c") \n\t"
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"mov %0, %%"FF_REG_c" \n\t"
|
||||
"mov %1, %%"FF_REG_D" \n\t"
|
||||
"mov %2, %%"FF_REG_d" \n\t"
|
||||
"mov %3, %%"FF_REG_b" \n\t"
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t" // i
|
||||
PREFETCH" (%%"FF_REG_c") \n\t"
|
||||
PREFETCH" 32(%%"FF_REG_c") \n\t"
|
||||
PREFETCH" 64(%%"FF_REG_c") \n\t"
|
||||
|
||||
#if ARCH_X86_64
|
||||
#define CALL_MMXEXT_FILTER_CODE \
|
||||
"movl (%%"REG_b"), %%esi \n\t"\
|
||||
"call *%4 \n\t"\
|
||||
"movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
|
||||
"add %%"REG_S", %%"REG_c" \n\t"\
|
||||
"add %%"REG_a", %%"REG_D" \n\t"\
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"\
|
||||
"movl (%%"FF_REG_b"), %%esi \n\t"\
|
||||
"call *%4 \n\t"\
|
||||
"movl (%%"FF_REG_b", %%"FF_REG_a"), %%esi \n\t"\
|
||||
"add %%"FF_REG_S", %%"FF_REG_c" \n\t"\
|
||||
"add %%"FF_REG_a", %%"FF_REG_D" \n\t"\
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
|
||||
|
||||
#else
|
||||
#define CALL_MMXEXT_FILTER_CODE \
|
||||
"movl (%%"REG_b"), %%esi \n\t"\
|
||||
"call *%4 \n\t"\
|
||||
"addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
|
||||
"add %%"REG_a", %%"REG_D" \n\t"\
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"\
|
||||
"movl (%%"FF_REG_b"), %%esi \n\t"\
|
||||
"call *%4 \n\t"\
|
||||
"addl (%%"FF_REG_b", %%"FF_REG_a"), %%"FF_REG_c" \n\t"\
|
||||
"add %%"FF_REG_a", %%"FF_REG_D" \n\t"\
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
|
||||
|
||||
#endif /* ARCH_X86_64 */
|
||||
|
||||
@@ -1444,15 +1444,15 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
|
||||
#if defined(PIC)
|
||||
"mov %5, %%"REG_b" \n\t"
|
||||
"mov %5, %%"FF_REG_b" \n\t"
|
||||
#if ARCH_X86_64
|
||||
"mov %6, %%"REG_a" \n\t"
|
||||
"mov %%"REG_a", -8(%%rsp) \n\t"
|
||||
"mov %6, %%"FF_REG_a" \n\t"
|
||||
"mov %%"FF_REG_a", -8(%%rsp) \n\t"
|
||||
#endif
|
||||
#else
|
||||
#if ARCH_X86_64
|
||||
"mov %5, %%"REG_a" \n\t"
|
||||
"mov %%"REG_a", -8(%%rsp) \n\t"
|
||||
"mov %5, %%"FF_REG_a" \n\t"
|
||||
"mov %%"FF_REG_a", -8(%%rsp) \n\t"
|
||||
#endif
|
||||
#endif
|
||||
:: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
|
||||
@@ -1463,9 +1463,9 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
|
||||
#if ARCH_X86_64
|
||||
,"m"(retsave)
|
||||
#endif
|
||||
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
|
||||
: "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_D
|
||||
#if !defined(PIC)
|
||||
,"%"REG_b
|
||||
,"%"FF_REG_b
|
||||
#endif
|
||||
);
|
||||
|
||||
@@ -1490,37 +1490,37 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
|
||||
|
||||
__asm__ volatile(
|
||||
#if defined(PIC)
|
||||
"mov %%"REG_b", %7 \n\t"
|
||||
"mov %%"FF_REG_b", %7 \n\t"
|
||||
#if ARCH_X86_64
|
||||
"mov -8(%%rsp), %%"REG_a" \n\t"
|
||||
"mov %%"REG_a", %8 \n\t"
|
||||
"mov -8(%%rsp), %%"FF_REG_a" \n\t"
|
||||
"mov %%"FF_REG_a", %8 \n\t"
|
||||
#endif
|
||||
#else
|
||||
#if ARCH_X86_64
|
||||
"mov -8(%%rsp), %%"REG_a" \n\t"
|
||||
"mov %%"REG_a", %7 \n\t"
|
||||
"mov -8(%%rsp), %%"FF_REG_a" \n\t"
|
||||
"mov %%"FF_REG_a", %7 \n\t"
|
||||
#endif
|
||||
#endif
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"mov %0, %%"REG_c" \n\t"
|
||||
"mov %1, %%"REG_D" \n\t"
|
||||
"mov %2, %%"REG_d" \n\t"
|
||||
"mov %3, %%"REG_b" \n\t"
|
||||
"xor %%"REG_a", %%"REG_a" \n\t" // i
|
||||
PREFETCH" (%%"REG_c") \n\t"
|
||||
PREFETCH" 32(%%"REG_c") \n\t"
|
||||
PREFETCH" 64(%%"REG_c") \n\t"
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"mov %0, %%"FF_REG_c" \n\t"
|
||||
"mov %1, %%"FF_REG_D" \n\t"
|
||||
"mov %2, %%"FF_REG_d" \n\t"
|
||||
"mov %3, %%"FF_REG_b" \n\t"
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t" // i
|
||||
PREFETCH" (%%"FF_REG_c") \n\t"
|
||||
PREFETCH" 32(%%"FF_REG_c") \n\t"
|
||||
PREFETCH" 64(%%"FF_REG_c") \n\t"
|
||||
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
"xor %%"REG_a", %%"REG_a" \n\t" // i
|
||||
"mov %5, %%"REG_c" \n\t" // src
|
||||
"mov %6, %%"REG_D" \n\t" // buf2
|
||||
PREFETCH" (%%"REG_c") \n\t"
|
||||
PREFETCH" 32(%%"REG_c") \n\t"
|
||||
PREFETCH" 64(%%"REG_c") \n\t"
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t" // i
|
||||
"mov %5, %%"FF_REG_c" \n\t" // src
|
||||
"mov %6, %%"FF_REG_D" \n\t" // buf2
|
||||
PREFETCH" (%%"FF_REG_c") \n\t"
|
||||
PREFETCH" 32(%%"FF_REG_c") \n\t"
|
||||
PREFETCH" 64(%%"FF_REG_c") \n\t"
|
||||
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
@@ -1528,15 +1528,15 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
|
||||
#if defined(PIC)
|
||||
"mov %7, %%"REG_b" \n\t"
|
||||
"mov %7, %%"FF_REG_b" \n\t"
|
||||
#if ARCH_X86_64
|
||||
"mov %8, %%"REG_a" \n\t"
|
||||
"mov %%"REG_a", -8(%%rsp) \n\t"
|
||||
"mov %8, %%"FF_REG_a" \n\t"
|
||||
"mov %%"FF_REG_a", -8(%%rsp) \n\t"
|
||||
#endif
|
||||
#else
|
||||
#if ARCH_X86_64
|
||||
"mov %7, %%"REG_a" \n\t"
|
||||
"mov %%"REG_a", -8(%%rsp) \n\t"
|
||||
"mov %7, %%"FF_REG_a" \n\t"
|
||||
"mov %%"FF_REG_a", -8(%%rsp) \n\t"
|
||||
#endif
|
||||
#endif
|
||||
:: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
|
||||
@@ -1547,9 +1547,9 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
|
||||
#if ARCH_X86_64
|
||||
,"m"(retsave)
|
||||
#endif
|
||||
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
|
||||
: "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_D
|
||||
#if !defined(PIC)
|
||||
,"%"REG_b
|
||||
,"%"FF_REG_b
|
||||
#endif
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user