1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

mangle for win32 in postproc

Originally committed as revision 4249 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
This commit is contained in:
Felix Bünemann 2002-01-19 05:14:47 +00:00
parent 0a8d8945fd
commit 9b4644281d
4 changed files with 182 additions and 180 deletions

View File

@ -86,6 +86,7 @@ dont use #ifdef ARCH_X86 for the asm stuff ... cross compilers? (note cpudetect
//#include "../libvo/fastmemcpy.h"
#include "postprocess.h"
#include "../cpudetect.h"
#include "../mangle.h"
#define MIN(a,b) ((a) > (b) ? (b) : (a))
#define MAX(a,b) ((a) < (b) ? (b) : (a))

View File

@ -60,8 +60,8 @@ asm volatile(
"leal (%%eax, %2, 4), %%ebx \n\t"
// 0 1 2 3 4 5 6 7 8 9
// %1 eax eax+%2 eax+2%2 %1+4%2 ebx ebx+%2 ebx+2%2 %1+8%2 ebx+4%2
"movq mmxDCOffset, %%mm7 \n\t" // mm7 = 0x7F
"movq mmxDCThreshold, %%mm6 \n\t" // mm6 = 0x7D
"movq "MANGLE(mmxDCOffset)", %%mm7 \n\t" // mm7 = 0x7F
"movq "MANGLE(mmxDCThreshold)", %%mm6 \n\t" // mm6 = 0x7D
"movq (%1), %%mm0 \n\t"
"movq (%%eax), %%mm1 \n\t"
"psubb %%mm1, %%mm0 \n\t" // mm0 = differnece
@ -171,12 +171,12 @@ static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, int QP)
"psubusb %%mm2, %%mm1 \n\t"
"por %%mm1, %%mm0 \n\t" // ABS Diff
"movq pQPb, %%mm7 \n\t" // QP,..., QP
"movq "MANGLE(pQPb)", %%mm7 \n\t" // QP,..., QP
"paddusb %%mm7, %%mm7 \n\t" // 2QP ... 2QP
"psubusb %%mm7, %%mm0 \n\t" // Diff <= 2QP -> 0
"pcmpeqd b00, %%mm0 \n\t"
"pcmpeqd "MANGLE(b00)", %%mm0 \n\t"
"psrlq $16, %%mm0 \n\t"
"pcmpeqd bFF, %%mm0 \n\t"
"pcmpeqd "MANGLE(bFF)", %%mm0 \n\t"
// "movd %%mm0, (%1, %2, 4)\n\t"
"movd %%mm0, %0 \n\t"
: "=r" (isOk)
@ -219,7 +219,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*3;
asm volatile( //"movv %0 %1 %2\n\t"
"movq pQPb, %%mm0 \n\t" // QP,..., QP
"movq "MANGLE(pQPb)", %%mm0 \n\t" // QP,..., QP
"movq (%0), %%mm6 \n\t"
"movq (%0, %1), %%mm5 \n\t"
@ -229,7 +229,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
"psubusb %%mm1, %%mm2 \n\t"
"por %%mm5, %%mm2 \n\t" // ABS Diff of lines
"psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0
"pcmpeqb b00, %%mm2 \n\t" // diff <= QP -> FF
"pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // diff <= QP -> FF
"pand %%mm2, %%mm6 \n\t"
"pandn %%mm1, %%mm2 \n\t"
@ -247,7 +247,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
"psubusb %%mm1, %%mm2 \n\t"
"por %%mm5, %%mm2 \n\t" // ABS Diff of lines
"psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0
"pcmpeqb b00, %%mm2 \n\t" // diff <= QP -> FF
"pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // diff <= QP -> FF
"pand %%mm2, %%mm7 \n\t"
"pandn %%mm1, %%mm2 \n\t"
@ -403,16 +403,16 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
// FIXME rounding
asm volatile(
"pxor %%mm7, %%mm7 \n\t" // 0
"movq b80, %%mm6 \n\t" // MIN_SIGNED_BYTE
"movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE
"leal (%0, %1), %%eax \n\t"
"leal (%%eax, %1, 4), %%ebx \n\t"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
"movq pQPb, %%mm0 \n\t" // QP,..., QP
"movq "MANGLE(pQPb)", %%mm0 \n\t" // QP,..., QP
"movq %%mm0, %%mm1 \n\t" // QP,..., QP
"paddusb b02, %%mm0 \n\t"
"paddusb "MANGLE(b02)", %%mm0 \n\t"
"psrlw $2, %%mm0 \n\t"
"pand b3F, %%mm0 \n\t" // QP/4,..., QP/4
"pand "MANGLE(b3F)", %%mm0 \n\t" // QP/4,..., QP/4
"paddusb %%mm1, %%mm0 \n\t" // QP*1.25 ...
"movq (%0, %1, 4), %%mm2 \n\t" // line 4
"movq (%%ebx), %%mm3 \n\t" // line 5
@ -441,8 +441,8 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
"paddb %%mm6, %%mm5 \n\t"
"psrlw $2, %%mm5 \n\t"
"pand b3F, %%mm5 \n\t"
"psubb b20, %%mm5 \n\t" // (l5-l4)/8
"pand "MANGLE(b3F)", %%mm5 \n\t"
"psubb "MANGLE(b20)", %%mm5 \n\t" // (l5-l4)/8
"movq (%%eax, %1, 2), %%mm2 \n\t"
"paddb %%mm6, %%mm2 \n\t" // line 3 + 0x80
@ -503,7 +503,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP)
asm volatile(
"pxor %%mm7, %%mm7 \n\t" // 0
// "movq b80, %%mm6 \n\t" // MIN_SIGNED_BYTE
// "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE
"leal (%0, %1), %%eax \n\t"
"leal (%%eax, %1, 4), %%ebx \n\t"
// 0 1 2 3 4 5 6 7 8 9
@ -529,9 +529,9 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP)
"por %%mm5, %%mm4 \n\t" // |l4 - l5|
"psubusb %%mm0, %%mm4 \n\t" //d = MAX(0, |l4-l5| - (|l2-l3| + |l5-l6|)/2)
"movq %%mm4, %%mm3 \n\t" // d
"psubusb pQPb, %%mm4 \n\t"
"psubusb "MANGLE(pQPb)", %%mm4 \n\t"
"pcmpeqb %%mm7, %%mm4 \n\t" // d <= QP ? -1 : 0
"psubusb b01, %%mm3 \n\t"
"psubusb "MANGLE(b01)", %%mm3 \n\t"
"pand %%mm4, %%mm3 \n\t" // d <= QP ? d : 0
PAVGB(%%mm7, %%mm3) // d/2
@ -740,18 +740,18 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
PMINUB(%%mm2, %%mm1, %%mm4) // MIN(|lenergy|,|renergy|)/8
"movq pQPb, %%mm4 \n\t" // QP //FIXME QP+1 ?
"paddusb b01, %%mm4 \n\t"
"movq "MANGLE(pQPb)", %%mm4 \n\t" // QP //FIXME QP+1 ?
"paddusb "MANGLE(b01)", %%mm4 \n\t"
"pcmpgtb %%mm3, %%mm4 \n\t" // |menergy|/8 < QP
"psubusb %%mm1, %%mm3 \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
"pand %%mm4, %%mm3 \n\t"
"movq %%mm3, %%mm1 \n\t"
// "psubusb b01, %%mm3 \n\t"
// "psubusb "MANGLE(b01)", %%mm3 \n\t"
PAVGB(%%mm7, %%mm3)
PAVGB(%%mm7, %%mm3)
"paddusb %%mm1, %%mm3 \n\t"
// "paddusb b01, %%mm3 \n\t"
// "paddusb "MANGLE(b01)", %%mm3 \n\t"
"movq (%%eax, %1, 2), %%mm6 \n\t" //l3
"movq (%0, %1, 4), %%mm5 \n\t" //l4
@ -764,7 +764,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
"pand %%mm0, %%mm3 \n\t"
PMINUB(%%mm5, %%mm3, %%mm0)
"psubusb b01, %%mm3 \n\t"
"psubusb "MANGLE(b01)", %%mm3 \n\t"
PAVGB(%%mm7, %%mm3)
"movq (%%eax, %1, 2), %%mm0 \n\t"
@ -796,7 +796,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
"movq (%%eax, %1), %%mm3 \n\t" // l2
"pxor %%mm6, %%mm2 \n\t" // -l5-1
"movq %%mm2, %%mm5 \n\t" // -l5-1
"movq b80, %%mm4 \n\t" // 128
"movq "MANGLE(b80)", %%mm4 \n\t" // 128
"leal (%%eax, %1, 4), %%ebx \n\t"
PAVGB(%%mm3, %%mm2) // (l2-l5+256)/2
PAVGB(%%mm0, %%mm4) // ~(l4-l3)/4 + 128
@ -808,7 +808,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
"pxor %%mm6, %%mm2 \n\t" // -l1-1
PAVGB(%%mm3, %%mm2) // (l2-l1+256)/2
PAVGB((%0), %%mm1) // (l0-l3+256)/2
"movq b80, %%mm3 \n\t" // 128
"movq "MANGLE(b80)", %%mm3 \n\t" // 128
PAVGB(%%mm2, %%mm3) // ~(l2-l1)/4 + 128
PAVGB(%%mm1, %%mm3) // ~(l0-l3)/4 +(l2-l1)/8 + 128
PAVGB(%%mm2, %%mm3) // ~(l0-l3)/8 +5(l2-l1)/16 + 128
@ -818,14 +818,14 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
"movq (%%ebx, %1, 2), %%mm1 \n\t" // l7
"pxor %%mm6, %%mm1 \n\t" // -l7-1
PAVGB((%0, %1, 4), %%mm1) // (l4-l7+256)/2
"movq b80, %%mm2 \n\t" // 128
"movq "MANGLE(b80)", %%mm2 \n\t" // 128
PAVGB(%%mm5, %%mm2) // ~(l6-l5)/4 + 128
PAVGB(%%mm1, %%mm2) // ~(l4-l7)/4 +(l6-l5)/8 + 128
PAVGB(%%mm5, %%mm2) // ~(l4-l7)/8 +5(l6-l5)/16 + 128
// mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128
"movq b00, %%mm1 \n\t" // 0
"movq b00, %%mm5 \n\t" // 0
"movq "MANGLE(b00)", %%mm1 \n\t" // 0
"movq "MANGLE(b00)", %%mm5 \n\t" // 0
"psubb %%mm2, %%mm1 \n\t" // 128 - renergy/16
"psubb %%mm3, %%mm5 \n\t" // 128 - lenergy/16
PMAXUB(%%mm1, %%mm2) // 128 + |renergy/16|
@ -834,8 +834,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128
"movq b00, %%mm7 \n\t" // 0
"movq pQPb, %%mm2 \n\t" // QP
"movq "MANGLE(b00)", %%mm7 \n\t" // 0
"movq "MANGLE(pQPb)", %%mm2 \n\t" // QP
PAVGB(%%mm6, %%mm2) // 128 + QP/2
"psubb %%mm6, %%mm2 \n\t"
@ -848,13 +848,13 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16
"movq %%mm4, %%mm3 \n\t" // d
"psubusb b01, %%mm4 \n\t"
"psubusb "MANGLE(b01)", %%mm4 \n\t"
PAVGB(%%mm7, %%mm4) // d/32
PAVGB(%%mm7, %%mm4) // (d + 32)/64
"paddb %%mm3, %%mm4 \n\t" // 5d/64
"pand %%mm2, %%mm4 \n\t"
"movq b80, %%mm5 \n\t" // 128
"movq "MANGLE(b80)", %%mm5 \n\t" // 128
"psubb %%mm0, %%mm5 \n\t" // q
"paddsb %%mm6, %%mm5 \n\t" // fix bad rounding
"pcmpgtb %%mm5, %%mm7 \n\t" // SIGN(q)
@ -991,8 +991,8 @@ src-=8;
"psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - H3
"psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
"psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
"movq %%mm0, temp0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
"movq %%mm1, temp1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
"movq %%mm0, "MANGLE(temp0)" \n\t" // 2L0 - 5L1 + 5L2 - 2L3
"movq %%mm1, "MANGLE(temp1)" \n\t" // 2H0 - 5H1 + 5H2 - 2H3
"movq (%0, %1, 4), %%mm0 \n\t"
"movq %%mm0, %%mm1 \n\t"
@ -1001,8 +1001,8 @@ src-=8;
"psubw %%mm0, %%mm2 \n\t" // L3 - L4
"psubw %%mm1, %%mm3 \n\t" // H3 - H4
"movq %%mm2, temp2 \n\t" // L3 - L4
"movq %%mm3, temp3 \n\t" // H3 - H4
"movq %%mm2, "MANGLE(temp2)" \n\t" // L3 - L4
"movq %%mm3, "MANGLE(temp3)" \n\t" // H3 - H4
"paddw %%mm4, %%mm4 \n\t" // 2L2
"paddw %%mm5, %%mm5 \n\t" // 2H2
"psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4
@ -1049,8 +1049,8 @@ src-=8;
"psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7
"psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7
"movq temp0, %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
"movq temp1, %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
"movq "MANGLE(temp0)", %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
"movq "MANGLE(temp1)", %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
#ifdef HAVE_MMX2
"movq %%mm7, %%mm6 \n\t" // 0
@ -1138,8 +1138,8 @@ src-=8;
"pmulhw %%mm2, %%mm5 \n\t" // ld/13
*/
"movq temp2, %%mm0 \n\t" // L3 - L4
"movq temp3, %%mm1 \n\t" // H3 - H4
"movq "MANGLE(temp2)", %%mm0 \n\t" // L3 - L4
"movq "MANGLE(temp3)", %%mm1 \n\t" // H3 - H4
"pxor %%mm2, %%mm2 \n\t"
"pxor %%mm3, %%mm3 \n\t"
@ -1235,9 +1235,9 @@ static inline void RENAME(dering)(uint8_t src[], int stride, int QP)
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
asm volatile(
"movq pQPb, %%mm0 \n\t"
"movq "MANGLE(pQPb)", %%mm0 \n\t"
"paddusb %%mm0, %%mm0 \n\t"
"movq %%mm0, pQPb2 \n\t"
"movq %%mm0, "MANGLE(pQPb2)" \n\t"
"leal (%0, %1), %%eax \n\t"
"leal (%%eax, %1, 4), %%ebx \n\t"
@ -1319,13 +1319,13 @@ FIND_MIN_MAX((%0, %1, 8))
"movq %%mm6, %%mm0 \n\t" // max
"psubb %%mm7, %%mm6 \n\t" // max - min
"movd %%mm6, %%ecx \n\t"
"cmpb deringThreshold, %%cl \n\t"
"cmpb "MANGLE(deringThreshold)", %%cl \n\t"
" jb 1f \n\t"
PAVGB(%%mm0, %%mm7) // a=(max + min)/2
"punpcklbw %%mm7, %%mm7 \n\t"
"punpcklbw %%mm7, %%mm7 \n\t"
"punpcklbw %%mm7, %%mm7 \n\t"
"movq %%mm7, temp0 \n\t"
"movq %%mm7, "MANGLE(temp0)" \n\t"
"movq (%0), %%mm0 \n\t" // L10
"movq %%mm0, %%mm1 \n\t" // L10
@ -1344,9 +1344,9 @@ FIND_MIN_MAX((%0, %1, 8))
"psubusb %%mm7, %%mm0 \n\t"
"psubusb %%mm7, %%mm2 \n\t"
"psubusb %%mm7, %%mm3 \n\t"
"pcmpeqb b00, %%mm0 \n\t" // L10 > a ? 0 : -1
"pcmpeqb b00, %%mm2 \n\t" // L20 > a ? 0 : -1
"pcmpeqb b00, %%mm3 \n\t" // L00 > a ? 0 : -1
"pcmpeqb "MANGLE(b00)", %%mm0 \n\t" // L10 > a ? 0 : -1
"pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // L20 > a ? 0 : -1
"pcmpeqb "MANGLE(b00)", %%mm3 \n\t" // L00 > a ? 0 : -1
"paddb %%mm2, %%mm0 \n\t"
"paddb %%mm3, %%mm0 \n\t"
@ -1367,9 +1367,9 @@ FIND_MIN_MAX((%0, %1, 8))
"psubusb %%mm7, %%mm2 \n\t"
"psubusb %%mm7, %%mm4 \n\t"
"psubusb %%mm7, %%mm5 \n\t"
"pcmpeqb b00, %%mm2 \n\t" // L11 > a ? 0 : -1
"pcmpeqb b00, %%mm4 \n\t" // L21 > a ? 0 : -1
"pcmpeqb b00, %%mm5 \n\t" // L01 > a ? 0 : -1
"pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // L11 > a ? 0 : -1
"pcmpeqb "MANGLE(b00)", %%mm4 \n\t" // L21 > a ? 0 : -1
"pcmpeqb "MANGLE(b00)", %%mm5 \n\t" // L01 > a ? 0 : -1
"paddb %%mm4, %%mm2 \n\t"
"paddb %%mm5, %%mm2 \n\t"
// 0, 2, 3, 1
@ -1389,12 +1389,12 @@ FIND_MIN_MAX((%0, %1, 8))
PAVGB(t0, lx) /* (src[-1] + src[+1])/2 */\
PAVGB(sx, lx) /* (src[-1] + 2src[0] + src[+1])/4 */\
PAVGB(lx, pplx) \
"movq " #lx ", temp1 \n\t"\
"movq temp0, " #lx " \n\t"\
"movq " #lx ", "MANGLE(temp1)" \n\t"\
"movq "MANGLE(temp0)", " #lx " \n\t"\
"psubusb " #lx ", " #t1 " \n\t"\
"psubusb " #lx ", " #t0 " \n\t"\
"psubusb " #lx ", " #sx " \n\t"\
"movq b00, " #lx " \n\t"\
"movq "MANGLE(b00)", " #lx " \n\t"\
"pcmpeqb " #lx ", " #t1 " \n\t" /* src[-1] > a ? 0 : -1*/\
"pcmpeqb " #lx ", " #t0 " \n\t" /* src[+1] > a ? 0 : -1*/\
"pcmpeqb " #lx ", " #sx " \n\t" /* src[0] > a ? 0 : -1*/\
@ -1404,20 +1404,20 @@ FIND_MIN_MAX((%0, %1, 8))
PAVGB(plx, pplx) /* filtered */\
"movq " #dst ", " #t0 " \n\t" /* dst */\
"movq " #t0 ", " #t1 " \n\t" /* dst */\
"psubusb pQPb2, " #t0 " \n\t"\
"paddusb pQPb2, " #t1 " \n\t"\
"psubusb "MANGLE(pQPb2)", " #t0 " \n\t"\
"paddusb "MANGLE(pQPb2)", " #t1 " \n\t"\
PMAXUB(t0, pplx)\
PMINUB(t1, pplx, t0)\
"paddb " #sx ", " #ppsx " \n\t"\
"paddb " #psx ", " #ppsx " \n\t"\
"#paddb b02, " #ppsx " \n\t"\
"pand b08, " #ppsx " \n\t"\
"#paddb "MANGLE(b02)", " #ppsx " \n\t"\
"pand "MANGLE(b08)", " #ppsx " \n\t"\
"pcmpeqb " #lx ", " #ppsx " \n\t"\
"pand " #ppsx ", " #pplx " \n\t"\
"pandn " #dst ", " #ppsx " \n\t"\
"por " #pplx ", " #ppsx " \n\t"\
"movq " #ppsx ", " #dst " \n\t"\
"movq temp1, " #lx " \n\t"
"movq "MANGLE(temp1)", " #lx " \n\t"
/*
0000000
@ -2082,7 +2082,7 @@ static void inline RENAME(tempNoiseReducer)(uint8_t *src, int stride,
"paddw %%mm6, %%mm0 \n\t"
#elif defined (FAST_L2_DIFF)
"pcmpeqb %%mm7, %%mm7 \n\t"
"movq b80, %%mm6 \n\t"
"movq "MANGLE(b80)", %%mm6 \n\t"
"pxor %%mm0, %%mm0 \n\t"
#define L2_DIFF_CORE(a, b)\
"movq " #a ", %%mm5 \n\t"\
@ -2152,12 +2152,12 @@ L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx))
"movl %%ecx, (%%ebx) \n\t"
"leal (%%eax, %2, 2), %%ebx \n\t" // 5*stride
// "movl %3, %%ecx \n\t"
// "movl %3, %%ecx \n\t"
// "movl %%ecx, test \n\t"
// "jmp 4f \n\t"
"cmpl 4+maxTmpNoise, %%ecx \n\t"
"cmpl 4+"MANGLE(maxTmpNoise)", %%ecx \n\t"
" jb 2f \n\t"
"cmpl 8+maxTmpNoise, %%ecx \n\t"
"cmpl 8+"MANGLE(maxTmpNoise)", %%ecx \n\t"
" jb 1f \n\t"
"leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride
@ -2216,7 +2216,7 @@ L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx))
"jmp 4f \n\t"
"2: \n\t"
"cmpl maxTmpNoise, %%ecx \n\t"
"cmpl "MANGLE(maxTmpNoise)", %%ecx \n\t"
" jb 3f \n\t"
"leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride
@ -2461,8 +2461,8 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, uint8_t src[]
asm volatile(
"leal (%0,%2), %%eax \n\t"
"leal (%1,%3), %%ebx \n\t"
"movq packedYOffset, %%mm2 \n\t"
"movq packedYScale, %%mm3 \n\t"
"movq "MANGLE(packedYOffset)", %%mm2\n\t"
"movq "MANGLE(packedYScale)", %%mm3\n\t"
"pxor %%mm4, %%mm4 \n\t"
#ifdef HAVE_MMX2
#define SCALED_CPY(src1, src2, dst1, dst2) \
@ -2884,7 +2884,7 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
"packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
"packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
"packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
"movq %%mm7, pQPb \n\t"
"movq %%mm7, "MANGLE(pQPb)" \n\t"
: : "r" (QP)
);
#endif

View File

@ -10,6 +10,7 @@
#include <math.h>
#include <stdio.h>
#include "../config.h"
#include "../mangle.h"
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif

View File

@ -135,19 +135,19 @@
"addl $1, %%edx \n\t"\
" jnz 2b \n\t"\
\
"psubw w400, %%mm3 \n\t" /* (U-128)8*/\
"psubw w400, %%mm4 \n\t" /* (V-128)8*/\
"psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\
"psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
"pmulhw ugCoeff, %%mm3 \n\t"\
"pmulhw vgCoeff, %%mm4 \n\t"\
"pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\
"pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
"pmulhw ubCoeff, %%mm2 \n\t"\
"pmulhw vrCoeff, %%mm5 \n\t"\
"psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
"psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\
"pmulhw yCoeff, %%mm1 \n\t"\
"pmulhw yCoeff, %%mm7 \n\t"\
"pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\
"pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\
"psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
"psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\
"pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
"pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
"paddw %%mm3, %%mm4 \n\t"\
"movq %%mm2, %%mm0 \n\t"\
@ -197,23 +197,23 @@
"movq 4096(%3, %%eax,2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
"psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
"psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
"psubw w400, %%mm3 \n\t" /* 8(U-128)*/\
"pmulhw yCoeff, %%mm1 \n\t"\
"psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
"psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\
"pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
\
\
"pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
"pmulhw ubCoeff, %%mm3 \n\t"\
"pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\
"psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
"pmulhw ugCoeff, %%mm2 \n\t"\
"pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\
"paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
"psubw w400, %%mm0 \n\t" /* (V-128)8*/\
"psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\
\
\
"movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\
"pmulhw vrCoeff, %%mm0 \n\t"\
"pmulhw vgCoeff, %%mm4 \n\t"\
"pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\
"pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
"paddw %%mm1, %%mm3 \n\t" /* B*/\
"paddw %%mm1, %%mm0 \n\t" /* R*/\
"packuswb %%mm3, %%mm3 \n\t"\
@ -228,11 +228,11 @@
"movd %6, %%mm6 \n\t" /*yalpha1*/\
"punpcklwd %%mm6, %%mm6 \n\t"\
"punpcklwd %%mm6, %%mm6 \n\t"\
"movq %%mm6, asm_yalpha1 \n\t"\
"movq %%mm6, "MANGLE(asm_yalpha1)"\n\t"\
"movd %7, %%mm5 \n\t" /*uvalpha1*/\
"punpcklwd %%mm5, %%mm5 \n\t"\
"punpcklwd %%mm5, %%mm5 \n\t"\
"movq %%mm5, asm_uvalpha1 \n\t"\
"movq %%mm5, "MANGLE(asm_uvalpha1)"\n\t"\
"xorl %%eax, %%eax \n\t"\
".balign 16 \n\t"\
"1: \n\t"\
@ -242,19 +242,19 @@
"movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
"movq asm_uvalpha1, %%mm0 \n\t"\
"movq "MANGLE(asm_uvalpha1)", %%mm0\n\t"\
"pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
"pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
"psubw w400, %%mm3 \n\t" /* (U-128)8*/\
"psubw w400, %%mm4 \n\t" /* (V-128)8*/\
"psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\
"psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
"pmulhw ugCoeff, %%mm3 \n\t"\
"pmulhw vgCoeff, %%mm4 \n\t"\
"pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\
"pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
"movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\
"movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\
@ -262,18 +262,18 @@
"movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\
"psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
"psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
"pmulhw asm_yalpha1, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
"pmulhw asm_yalpha1, %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
"pmulhw "MANGLE(asm_yalpha1)", %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
"pmulhw "MANGLE(asm_yalpha1)", %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
"psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
"psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
"paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
"paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
"pmulhw ubCoeff, %%mm2 \n\t"\
"pmulhw vrCoeff, %%mm5 \n\t"\
"psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
"psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\
"pmulhw yCoeff, %%mm1 \n\t"\
"pmulhw yCoeff, %%mm7 \n\t"\
"pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\
"pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\
"psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
"psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\
"pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
"pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
"paddw %%mm3, %%mm4 \n\t"\
"movq %%mm2, %%mm0 \n\t"\
@ -305,23 +305,23 @@
"movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
"psubw w400, %%mm3 \n\t" /* (U-128)8*/\
"psubw w400, %%mm4 \n\t" /* (V-128)8*/\
"psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\
"psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
"pmulhw ugCoeff, %%mm3 \n\t"\
"pmulhw vgCoeff, %%mm4 \n\t"\
"pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\
"pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
"movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\
"movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\
"psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
"psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
"pmulhw ubCoeff, %%mm2 \n\t"\
"pmulhw vrCoeff, %%mm5 \n\t"\
"psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
"psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\
"pmulhw yCoeff, %%mm1 \n\t"\
"pmulhw yCoeff, %%mm7 \n\t"\
"pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\
"pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\
"psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
"psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\
"pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
"pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
"paddw %%mm3, %%mm4 \n\t"\
"movq %%mm2, %%mm0 \n\t"\
@ -358,23 +358,23 @@
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
"psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
"psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
"psubw w400, %%mm3 \n\t" /* (U-128)8*/\
"psubw w400, %%mm4 \n\t" /* (V-128)8*/\
"psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\
"psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
"pmulhw ugCoeff, %%mm3 \n\t"\
"pmulhw vgCoeff, %%mm4 \n\t"\
"pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\
"pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
"movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\
"movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\
"psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
"psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
"pmulhw ubCoeff, %%mm2 \n\t"\
"pmulhw vrCoeff, %%mm5 \n\t"\
"psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
"psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\
"pmulhw yCoeff, %%mm1 \n\t"\
"pmulhw yCoeff, %%mm7 \n\t"\
"pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\
"pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\
"psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
"psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\
"pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
"pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
"paddw %%mm3, %%mm4 \n\t"\
"movq %%mm2, %%mm0 \n\t"\
@ -423,9 +423,9 @@
" jb 1b \n\t"
#define WRITEBGR16 \
"pand bF8, %%mm2 \n\t" /* B */\
"pand bFC, %%mm4 \n\t" /* G */\
"pand bF8, %%mm5 \n\t" /* R */\
"pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
"pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
"pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
"psrlq $3, %%mm2 \n\t"\
\
"movq %%mm2, %%mm1 \n\t"\
@ -450,9 +450,9 @@
" jb 1b \n\t"
#define WRITEBGR15 \
"pand bF8, %%mm2 \n\t" /* B */\
"pand bF8, %%mm4 \n\t" /* G */\
"pand bF8, %%mm5 \n\t" /* R */\
"pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
"pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
"pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
"psrlq $3, %%mm2 \n\t"\
"psrlq $1, %%mm5 \n\t"\
\
@ -494,8 +494,8 @@
\
"movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
"psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\
"pand bm00000111, %%mm4 \n\t" /* 00000RGB 0 */\
"pand bm11111000, %%mm0 \n\t" /* 00RGB000 0.5 */\
"pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 0 */\
"pand "MANGLE(bm11111000)", %%mm0\n\t" /* 00RGB000 0.5 */\
"por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\
"movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\
"psllq $48, %%mm2 \n\t" /* GB000000 1 */\
@ -505,11 +505,11 @@
"psrld $16, %%mm4 \n\t" /* 000R000R 1 */\
"psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\
"por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\
"pand bm00001111, %%mm2 \n\t" /* 0000RGBR 1 */\
"pand "MANGLE(bm00001111)", %%mm2\n\t" /* 0000RGBR 1 */\
"movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\
"psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\
"pand bm00000111, %%mm4 \n\t" /* 00000RGB 2 */\
"pand bm11111000, %%mm1 \n\t" /* 00RGB000 2.5 */\
"pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 2 */\
"pand "MANGLE(bm11111000)", %%mm1\n\t" /* 00RGB000 2.5 */\
"por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\
"movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\
"psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\
@ -518,8 +518,8 @@
"psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\
"movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\
"psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\
"pand bm00000111, %%mm5 \n\t" /* 00000RGB 3 */\
"pand bm11111000, %%mm3 \n\t" /* 00RGB000 3.5 */\
"pand "MANGLE(bm00000111)", %%mm5\n\t" /* 00000RGB 3 */\
"pand "MANGLE(bm11111000)", %%mm3\n\t" /* 00RGB000 3.5 */\
"por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\
"psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\
"por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\
@ -588,8 +588,8 @@
#define WRITEBGR24MMX2 \
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
"movq M24A, %%mm0 \n\t"\
"movq M24C, %%mm7 \n\t"\
"movq "MANGLE(M24A)", %%mm0 \n\t"\
"movq "MANGLE(M24C)", %%mm7 \n\t"\
"pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
"pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
"pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
@ -608,7 +608,7 @@
"pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
"pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
\
"pand M24B, %%mm1 \n\t" /* B5 B4 B3 */\
"pand "MANGLE(M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
"pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
"pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
\
@ -622,7 +622,7 @@
\
"pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
"pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
"pand M24B, %%mm6 \n\t" /* R7 R6 R5 */\
"pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
\
"por %%mm1, %%mm3 \n\t"\
"por %%mm3, %%mm6 \n\t"\
@ -777,9 +777,9 @@ static inline void RENAME(yuv2rgbX)(int16_t *lumFilter, int16_t **lumSrc, int lu
YSCALEYUV2RGBX
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t"
"paddusb g5Dither, %%mm4 \n\t"
"paddusb r5Dither, %%mm5 \n\t"
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb "MANGLE(g5Dither)", %%mm4\n\t"
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif
WRITEBGR15
@ -797,9 +797,9 @@ static inline void RENAME(yuv2rgbX)(int16_t *lumFilter, int16_t **lumSrc, int lu
YSCALEYUV2RGBX
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t"
"paddusb g6Dither, %%mm4 \n\t"
"paddusb r5Dither, %%mm5 \n\t"
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb "MANGLE(g6Dither)", %%mm4\n\t"
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif
WRITEBGR16
@ -876,8 +876,8 @@ FULL_YSCALEYUV2RGB
"movq %%mm3, %%mm2 \n\t" // BGR0BGR0
"psrlq $8, %%mm3 \n\t" // GR0BGR00
"pand bm00000111, %%mm2 \n\t" // BGR00000
"pand bm11111000, %%mm3 \n\t" // 000BGR00
"pand "MANGLE(bm00000111)", %%mm2\n\t" // BGR00000
"pand "MANGLE(bm11111000)", %%mm3\n\t" // 000BGR00
"por %%mm2, %%mm3 \n\t" // BGRBGR00
"movq %%mm1, %%mm2 \n\t"
"psllq $48, %%mm1 \n\t" // 000000BG
@ -916,9 +916,9 @@ FULL_YSCALEYUV2RGB
FULL_YSCALEYUV2RGB
#ifdef DITHER1XBPP
"paddusb g5Dither, %%mm1 \n\t"
"paddusb r5Dither, %%mm0 \n\t"
"paddusb b5Dither, %%mm3 \n\t"
"paddusb "MANGLE(g5Dither)", %%mm1\n\t"
"paddusb "MANGLE(r5Dither)", %%mm0\n\t"
"paddusb "MANGLE(b5Dither)", %%mm3\n\t"
#endif
"punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
"punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
@ -927,8 +927,8 @@ FULL_YSCALEYUV2RGB
"psrlw $3, %%mm3 \n\t"
"psllw $2, %%mm1 \n\t"
"psllw $7, %%mm0 \n\t"
"pand g15Mask, %%mm1 \n\t"
"pand r15Mask, %%mm0 \n\t"
"pand "MANGLE(g15Mask)", %%mm1 \n\t"
"pand "MANGLE(r15Mask)", %%mm0 \n\t"
"por %%mm3, %%mm1 \n\t"
"por %%mm1, %%mm0 \n\t"
@ -950,9 +950,9 @@ FULL_YSCALEYUV2RGB
FULL_YSCALEYUV2RGB
#ifdef DITHER1XBPP
"paddusb g6Dither, %%mm1 \n\t"
"paddusb r5Dither, %%mm0 \n\t"
"paddusb b5Dither, %%mm3 \n\t"
"paddusb "MANGLE(g6Dither)", %%mm1\n\t"
"paddusb "MANGLE(r5Dither)", %%mm0\n\t"
"paddusb "MANGLE(b5Dither)", %%mm3\n\t"
#endif
"punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
"punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
@ -961,8 +961,8 @@ FULL_YSCALEYUV2RGB
"psrlw $3, %%mm3 \n\t"
"psllw $3, %%mm1 \n\t"
"psllw $8, %%mm0 \n\t"
"pand g16Mask, %%mm1 \n\t"
"pand r16Mask, %%mm0 \n\t"
"pand "MANGLE(g16Mask)", %%mm1 \n\t"
"pand "MANGLE(r16Mask)", %%mm0 \n\t"
"por %%mm3, %%mm1 \n\t"
"por %%mm1, %%mm0 \n\t"
@ -1057,9 +1057,9 @@ FULL_YSCALEYUV2RGB
YSCALEYUV2RGB
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t"
"paddusb g5Dither, %%mm4 \n\t"
"paddusb r5Dither, %%mm5 \n\t"
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb "MANGLE(g5Dither)", %%mm4\n\t"
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif
WRITEBGR15
@ -1075,9 +1075,9 @@ FULL_YSCALEYUV2RGB
YSCALEYUV2RGB
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t"
"paddusb g6Dither, %%mm4 \n\t"
"paddusb r5Dither, %%mm5 \n\t"
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb "MANGLE(g6Dither)", %%mm4\n\t"
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif
WRITEBGR16
@ -1234,9 +1234,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
YSCALEYUV2RGB1
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t"
"paddusb g5Dither, %%mm4 \n\t"
"paddusb r5Dither, %%mm5 \n\t"
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb "MANGLE(g5Dither)", %%mm4\n\t"
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif
WRITEBGR15
:: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
@ -1250,9 +1250,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
YSCALEYUV2RGB1
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t"
"paddusb g6Dither, %%mm4 \n\t"
"paddusb r5Dither, %%mm5 \n\t"
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb "MANGLE(g6Dither)", %%mm4\n\t"
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif
WRITEBGR16
@ -1291,9 +1291,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
YSCALEYUV2RGB1b
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t"
"paddusb g5Dither, %%mm4 \n\t"
"paddusb r5Dither, %%mm5 \n\t"
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb "MANGLE(g5Dither)", %%mm4\n\t"
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif
WRITEBGR15
:: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
@ -1307,9 +1307,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
YSCALEYUV2RGB1b
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t"
"paddusb g6Dither, %%mm4 \n\t"
"paddusb r5Dither, %%mm5 \n\t"
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb "MANGLE(g6Dither)", %%mm4\n\t"
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif
WRITEBGR16
@ -1435,7 +1435,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW
dst-= counter/2;
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
"movq w02, %%mm6 \n\t"
"movq "MANGLE(w02)", %%mm6 \n\t"
"pushl %%ebp \n\t" // we use 7 regs here ...
"movl %%eax, %%ebp \n\t"
".balign 16 \n\t"
@ -1473,7 +1473,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW
dst-= counter/2;
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
"movq w02, %%mm6 \n\t"
"movq "MANGLE(w02)", %%mm6 \n\t"
"pushl %%ebp \n\t" // we use 7 regs here ...
"movl %%eax, %%ebp \n\t"
".balign 16 \n\t"
@ -1523,7 +1523,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW
dst-= counter/2;
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
"movq w02, %%mm6 \n\t"
"movq "MANGLE(w02)", %%mm6 \n\t"
".balign 16 \n\t"
"1: \n\t"
"movl %2, %%ecx \n\t"
@ -1614,7 +1614,7 @@ static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, in
"psllq $16, %%mm2 \n\t"
"paddw %%mm6, %%mm2 \n\t"
"psllq $16, %%mm2 \n\t" //0,t,2t,3t t=xInc&0xFF
"movq %%mm2, temp0 \n\t"
"movq %%mm2, "MANGLE(temp0)" \n\t"
"movd %4, %%mm6 \n\t" //(xInc*4)&0xFFFF
"punpcklwd %%mm6, %%mm6 \n\t"
"punpcklwd %%mm6, %%mm6 \n\t"
@ -1630,8 +1630,8 @@ static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, in
PREFETCH" 1024(%%esi) \n\t"\
PREFETCH" 1056(%%esi) \n\t"\
PREFETCH" 1088(%%esi) \n\t"\
"call funnyYCode \n\t"\
"movq temp0, %%mm2 \n\t"\
"call "MANGLE(funnyYCode)" \n\t"\
"movq "MANGLE(temp0)", %%mm2 \n\t"\
"xorl %%ecx, %%ecx \n\t"
FUNNY_Y_CODE
@ -1741,7 +1741,7 @@ inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth,
"psllq $16, %%mm2 \n\t"
"paddw %%mm6, %%mm2 \n\t"
"psllq $16, %%mm2 \n\t" //0,t,2t,3t t=xInc&0xFFFF
"movq %%mm2, temp0 \n\t"
"movq %%mm2, "MANGLE(temp0)" \n\t"
"movd %4, %%mm6 \n\t" //(xInc*4)&0xFFFF
"punpcklwd %%mm6, %%mm6 \n\t"
"punpcklwd %%mm6, %%mm6 \n\t"
@ -1757,8 +1757,8 @@ inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth,
PREFETCH" 1024(%%esi) \n\t"\
PREFETCH" 1056(%%esi) \n\t"\
PREFETCH" 1088(%%esi) \n\t"\
"call funnyUVCode \n\t"\
"movq temp0, %%mm2 \n\t"\
"call "MANGLE(funnyUVCode)" \n\t"\
"movq "MANGLE(temp0)", %%mm2 \n\t"\
"xorl %%ecx, %%ecx \n\t"
FUNNYUVCODE