mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
mangle for win32 in postproc
Originally committed as revision 4249 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
This commit is contained in:
parent
0a8d8945fd
commit
9b4644281d
@ -86,6 +86,7 @@ dont use #ifdef ARCH_X86 for the asm stuff ... cross compilers? (note cpudetect
|
||||
//#include "../libvo/fastmemcpy.h"
|
||||
#include "postprocess.h"
|
||||
#include "../cpudetect.h"
|
||||
#include "../mangle.h"
|
||||
|
||||
#define MIN(a,b) ((a) > (b) ? (b) : (a))
|
||||
#define MAX(a,b) ((a) < (b) ? (b) : (a))
|
||||
|
@ -60,8 +60,8 @@ asm volatile(
|
||||
"leal (%%eax, %2, 4), %%ebx \n\t"
|
||||
// 0 1 2 3 4 5 6 7 8 9
|
||||
// %1 eax eax+%2 eax+2%2 %1+4%2 ebx ebx+%2 ebx+2%2 %1+8%2 ebx+4%2
|
||||
"movq mmxDCOffset, %%mm7 \n\t" // mm7 = 0x7F
|
||||
"movq mmxDCThreshold, %%mm6 \n\t" // mm6 = 0x7D
|
||||
"movq "MANGLE(mmxDCOffset)", %%mm7 \n\t" // mm7 = 0x7F
|
||||
"movq "MANGLE(mmxDCThreshold)", %%mm6 \n\t" // mm6 = 0x7D
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq (%%eax), %%mm1 \n\t"
|
||||
"psubb %%mm1, %%mm0 \n\t" // mm0 = differnece
|
||||
@ -171,12 +171,12 @@ static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, int QP)
|
||||
"psubusb %%mm2, %%mm1 \n\t"
|
||||
"por %%mm1, %%mm0 \n\t" // ABS Diff
|
||||
|
||||
"movq pQPb, %%mm7 \n\t" // QP,..., QP
|
||||
"movq "MANGLE(pQPb)", %%mm7 \n\t" // QP,..., QP
|
||||
"paddusb %%mm7, %%mm7 \n\t" // 2QP ... 2QP
|
||||
"psubusb %%mm7, %%mm0 \n\t" // Diff <= 2QP -> 0
|
||||
"pcmpeqd b00, %%mm0 \n\t"
|
||||
"pcmpeqd "MANGLE(b00)", %%mm0 \n\t"
|
||||
"psrlq $16, %%mm0 \n\t"
|
||||
"pcmpeqd bFF, %%mm0 \n\t"
|
||||
"pcmpeqd "MANGLE(bFF)", %%mm0 \n\t"
|
||||
// "movd %%mm0, (%1, %2, 4)\n\t"
|
||||
"movd %%mm0, %0 \n\t"
|
||||
: "=r" (isOk)
|
||||
@ -219,7 +219,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
|
||||
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
||||
src+= stride*3;
|
||||
asm volatile( //"movv %0 %1 %2\n\t"
|
||||
"movq pQPb, %%mm0 \n\t" // QP,..., QP
|
||||
"movq "MANGLE(pQPb)", %%mm0 \n\t" // QP,..., QP
|
||||
|
||||
"movq (%0), %%mm6 \n\t"
|
||||
"movq (%0, %1), %%mm5 \n\t"
|
||||
@ -229,7 +229,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
|
||||
"psubusb %%mm1, %%mm2 \n\t"
|
||||
"por %%mm5, %%mm2 \n\t" // ABS Diff of lines
|
||||
"psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0
|
||||
"pcmpeqb b00, %%mm2 \n\t" // diff <= QP -> FF
|
||||
"pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // diff <= QP -> FF
|
||||
|
||||
"pand %%mm2, %%mm6 \n\t"
|
||||
"pandn %%mm1, %%mm2 \n\t"
|
||||
@ -247,7 +247,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
|
||||
"psubusb %%mm1, %%mm2 \n\t"
|
||||
"por %%mm5, %%mm2 \n\t" // ABS Diff of lines
|
||||
"psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0
|
||||
"pcmpeqb b00, %%mm2 \n\t" // diff <= QP -> FF
|
||||
"pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // diff <= QP -> FF
|
||||
|
||||
"pand %%mm2, %%mm7 \n\t"
|
||||
"pandn %%mm1, %%mm2 \n\t"
|
||||
@ -403,16 +403,16 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
|
||||
// FIXME rounding
|
||||
asm volatile(
|
||||
"pxor %%mm7, %%mm7 \n\t" // 0
|
||||
"movq b80, %%mm6 \n\t" // MIN_SIGNED_BYTE
|
||||
"movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE
|
||||
"leal (%0, %1), %%eax \n\t"
|
||||
"leal (%%eax, %1, 4), %%ebx \n\t"
|
||||
// 0 1 2 3 4 5 6 7 8 9
|
||||
// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
|
||||
"movq pQPb, %%mm0 \n\t" // QP,..., QP
|
||||
"movq "MANGLE(pQPb)", %%mm0 \n\t" // QP,..., QP
|
||||
"movq %%mm0, %%mm1 \n\t" // QP,..., QP
|
||||
"paddusb b02, %%mm0 \n\t"
|
||||
"paddusb "MANGLE(b02)", %%mm0 \n\t"
|
||||
"psrlw $2, %%mm0 \n\t"
|
||||
"pand b3F, %%mm0 \n\t" // QP/4,..., QP/4
|
||||
"pand "MANGLE(b3F)", %%mm0 \n\t" // QP/4,..., QP/4
|
||||
"paddusb %%mm1, %%mm0 \n\t" // QP*1.25 ...
|
||||
"movq (%0, %1, 4), %%mm2 \n\t" // line 4
|
||||
"movq (%%ebx), %%mm3 \n\t" // line 5
|
||||
@ -441,8 +441,8 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
|
||||
|
||||
"paddb %%mm6, %%mm5 \n\t"
|
||||
"psrlw $2, %%mm5 \n\t"
|
||||
"pand b3F, %%mm5 \n\t"
|
||||
"psubb b20, %%mm5 \n\t" // (l5-l4)/8
|
||||
"pand "MANGLE(b3F)", %%mm5 \n\t"
|
||||
"psubb "MANGLE(b20)", %%mm5 \n\t" // (l5-l4)/8
|
||||
|
||||
"movq (%%eax, %1, 2), %%mm2 \n\t"
|
||||
"paddb %%mm6, %%mm2 \n\t" // line 3 + 0x80
|
||||
@ -503,7 +503,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP)
|
||||
|
||||
asm volatile(
|
||||
"pxor %%mm7, %%mm7 \n\t" // 0
|
||||
// "movq b80, %%mm6 \n\t" // MIN_SIGNED_BYTE
|
||||
// "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE
|
||||
"leal (%0, %1), %%eax \n\t"
|
||||
"leal (%%eax, %1, 4), %%ebx \n\t"
|
||||
// 0 1 2 3 4 5 6 7 8 9
|
||||
@ -529,9 +529,9 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP)
|
||||
"por %%mm5, %%mm4 \n\t" // |l4 - l5|
|
||||
"psubusb %%mm0, %%mm4 \n\t" //d = MAX(0, |l4-l5| - (|l2-l3| + |l5-l6|)/2)
|
||||
"movq %%mm4, %%mm3 \n\t" // d
|
||||
"psubusb pQPb, %%mm4 \n\t"
|
||||
"psubusb "MANGLE(pQPb)", %%mm4 \n\t"
|
||||
"pcmpeqb %%mm7, %%mm4 \n\t" // d <= QP ? -1 : 0
|
||||
"psubusb b01, %%mm3 \n\t"
|
||||
"psubusb "MANGLE(b01)", %%mm3 \n\t"
|
||||
"pand %%mm4, %%mm3 \n\t" // d <= QP ? d : 0
|
||||
|
||||
PAVGB(%%mm7, %%mm3) // d/2
|
||||
@ -740,18 +740,18 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
|
||||
|
||||
|
||||
PMINUB(%%mm2, %%mm1, %%mm4) // MIN(|lenergy|,|renergy|)/8
|
||||
"movq pQPb, %%mm4 \n\t" // QP //FIXME QP+1 ?
|
||||
"paddusb b01, %%mm4 \n\t"
|
||||
"movq "MANGLE(pQPb)", %%mm4 \n\t" // QP //FIXME QP+1 ?
|
||||
"paddusb "MANGLE(b01)", %%mm4 \n\t"
|
||||
"pcmpgtb %%mm3, %%mm4 \n\t" // |menergy|/8 < QP
|
||||
"psubusb %%mm1, %%mm3 \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
|
||||
"pand %%mm4, %%mm3 \n\t"
|
||||
|
||||
"movq %%mm3, %%mm1 \n\t"
|
||||
// "psubusb b01, %%mm3 \n\t"
|
||||
// "psubusb "MANGLE(b01)", %%mm3 \n\t"
|
||||
PAVGB(%%mm7, %%mm3)
|
||||
PAVGB(%%mm7, %%mm3)
|
||||
"paddusb %%mm1, %%mm3 \n\t"
|
||||
// "paddusb b01, %%mm3 \n\t"
|
||||
// "paddusb "MANGLE(b01)", %%mm3 \n\t"
|
||||
|
||||
"movq (%%eax, %1, 2), %%mm6 \n\t" //l3
|
||||
"movq (%0, %1, 4), %%mm5 \n\t" //l4
|
||||
@ -764,7 +764,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
|
||||
"pand %%mm0, %%mm3 \n\t"
|
||||
PMINUB(%%mm5, %%mm3, %%mm0)
|
||||
|
||||
"psubusb b01, %%mm3 \n\t"
|
||||
"psubusb "MANGLE(b01)", %%mm3 \n\t"
|
||||
PAVGB(%%mm7, %%mm3)
|
||||
|
||||
"movq (%%eax, %1, 2), %%mm0 \n\t"
|
||||
@ -796,7 +796,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
|
||||
"movq (%%eax, %1), %%mm3 \n\t" // l2
|
||||
"pxor %%mm6, %%mm2 \n\t" // -l5-1
|
||||
"movq %%mm2, %%mm5 \n\t" // -l5-1
|
||||
"movq b80, %%mm4 \n\t" // 128
|
||||
"movq "MANGLE(b80)", %%mm4 \n\t" // 128
|
||||
"leal (%%eax, %1, 4), %%ebx \n\t"
|
||||
PAVGB(%%mm3, %%mm2) // (l2-l5+256)/2
|
||||
PAVGB(%%mm0, %%mm4) // ~(l4-l3)/4 + 128
|
||||
@ -808,7 +808,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
|
||||
"pxor %%mm6, %%mm2 \n\t" // -l1-1
|
||||
PAVGB(%%mm3, %%mm2) // (l2-l1+256)/2
|
||||
PAVGB((%0), %%mm1) // (l0-l3+256)/2
|
||||
"movq b80, %%mm3 \n\t" // 128
|
||||
"movq "MANGLE(b80)", %%mm3 \n\t" // 128
|
||||
PAVGB(%%mm2, %%mm3) // ~(l2-l1)/4 + 128
|
||||
PAVGB(%%mm1, %%mm3) // ~(l0-l3)/4 +(l2-l1)/8 + 128
|
||||
PAVGB(%%mm2, %%mm3) // ~(l0-l3)/8 +5(l2-l1)/16 + 128
|
||||
@ -818,14 +818,14 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
|
||||
"movq (%%ebx, %1, 2), %%mm1 \n\t" // l7
|
||||
"pxor %%mm6, %%mm1 \n\t" // -l7-1
|
||||
PAVGB((%0, %1, 4), %%mm1) // (l4-l7+256)/2
|
||||
"movq b80, %%mm2 \n\t" // 128
|
||||
"movq "MANGLE(b80)", %%mm2 \n\t" // 128
|
||||
PAVGB(%%mm5, %%mm2) // ~(l6-l5)/4 + 128
|
||||
PAVGB(%%mm1, %%mm2) // ~(l4-l7)/4 +(l6-l5)/8 + 128
|
||||
PAVGB(%%mm5, %%mm2) // ~(l4-l7)/8 +5(l6-l5)/16 + 128
|
||||
// mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128
|
||||
|
||||
"movq b00, %%mm1 \n\t" // 0
|
||||
"movq b00, %%mm5 \n\t" // 0
|
||||
"movq "MANGLE(b00)", %%mm1 \n\t" // 0
|
||||
"movq "MANGLE(b00)", %%mm5 \n\t" // 0
|
||||
"psubb %%mm2, %%mm1 \n\t" // 128 - renergy/16
|
||||
"psubb %%mm3, %%mm5 \n\t" // 128 - lenergy/16
|
||||
PMAXUB(%%mm1, %%mm2) // 128 + |renergy/16|
|
||||
@ -834,8 +834,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
|
||||
|
||||
// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128
|
||||
|
||||
"movq b00, %%mm7 \n\t" // 0
|
||||
"movq pQPb, %%mm2 \n\t" // QP
|
||||
"movq "MANGLE(b00)", %%mm7 \n\t" // 0
|
||||
"movq "MANGLE(pQPb)", %%mm2 \n\t" // QP
|
||||
PAVGB(%%mm6, %%mm2) // 128 + QP/2
|
||||
"psubb %%mm6, %%mm2 \n\t"
|
||||
|
||||
@ -848,13 +848,13 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
|
||||
// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16
|
||||
|
||||
"movq %%mm4, %%mm3 \n\t" // d
|
||||
"psubusb b01, %%mm4 \n\t"
|
||||
"psubusb "MANGLE(b01)", %%mm4 \n\t"
|
||||
PAVGB(%%mm7, %%mm4) // d/32
|
||||
PAVGB(%%mm7, %%mm4) // (d + 32)/64
|
||||
"paddb %%mm3, %%mm4 \n\t" // 5d/64
|
||||
"pand %%mm2, %%mm4 \n\t"
|
||||
|
||||
"movq b80, %%mm5 \n\t" // 128
|
||||
"movq "MANGLE(b80)", %%mm5 \n\t" // 128
|
||||
"psubb %%mm0, %%mm5 \n\t" // q
|
||||
"paddsb %%mm6, %%mm5 \n\t" // fix bad rounding
|
||||
"pcmpgtb %%mm5, %%mm7 \n\t" // SIGN(q)
|
||||
@ -991,8 +991,8 @@ src-=8;
|
||||
"psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - H3
|
||||
"psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
|
||||
"psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
|
||||
"movq %%mm0, temp0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
|
||||
"movq %%mm1, temp1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
|
||||
"movq %%mm0, "MANGLE(temp0)" \n\t" // 2L0 - 5L1 + 5L2 - 2L3
|
||||
"movq %%mm1, "MANGLE(temp1)" \n\t" // 2H0 - 5H1 + 5H2 - 2H3
|
||||
|
||||
"movq (%0, %1, 4), %%mm0 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
@ -1001,8 +1001,8 @@ src-=8;
|
||||
|
||||
"psubw %%mm0, %%mm2 \n\t" // L3 - L4
|
||||
"psubw %%mm1, %%mm3 \n\t" // H3 - H4
|
||||
"movq %%mm2, temp2 \n\t" // L3 - L4
|
||||
"movq %%mm3, temp3 \n\t" // H3 - H4
|
||||
"movq %%mm2, "MANGLE(temp2)" \n\t" // L3 - L4
|
||||
"movq %%mm3, "MANGLE(temp3)" \n\t" // H3 - H4
|
||||
"paddw %%mm4, %%mm4 \n\t" // 2L2
|
||||
"paddw %%mm5, %%mm5 \n\t" // 2H2
|
||||
"psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4
|
||||
@ -1049,8 +1049,8 @@ src-=8;
|
||||
"psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7
|
||||
"psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7
|
||||
|
||||
"movq temp0, %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
|
||||
"movq temp1, %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
|
||||
"movq "MANGLE(temp0)", %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
|
||||
"movq "MANGLE(temp1)", %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
|
||||
|
||||
#ifdef HAVE_MMX2
|
||||
"movq %%mm7, %%mm6 \n\t" // 0
|
||||
@ -1138,8 +1138,8 @@ src-=8;
|
||||
"pmulhw %%mm2, %%mm5 \n\t" // ld/13
|
||||
*/
|
||||
|
||||
"movq temp2, %%mm0 \n\t" // L3 - L4
|
||||
"movq temp3, %%mm1 \n\t" // H3 - H4
|
||||
"movq "MANGLE(temp2)", %%mm0 \n\t" // L3 - L4
|
||||
"movq "MANGLE(temp3)", %%mm1 \n\t" // H3 - H4
|
||||
|
||||
"pxor %%mm2, %%mm2 \n\t"
|
||||
"pxor %%mm3, %%mm3 \n\t"
|
||||
@ -1235,9 +1235,9 @@ static inline void RENAME(dering)(uint8_t src[], int stride, int QP)
|
||||
{
|
||||
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
||||
asm volatile(
|
||||
"movq pQPb, %%mm0 \n\t"
|
||||
"movq "MANGLE(pQPb)", %%mm0 \n\t"
|
||||
"paddusb %%mm0, %%mm0 \n\t"
|
||||
"movq %%mm0, pQPb2 \n\t"
|
||||
"movq %%mm0, "MANGLE(pQPb2)" \n\t"
|
||||
|
||||
"leal (%0, %1), %%eax \n\t"
|
||||
"leal (%%eax, %1, 4), %%ebx \n\t"
|
||||
@ -1319,13 +1319,13 @@ FIND_MIN_MAX((%0, %1, 8))
|
||||
"movq %%mm6, %%mm0 \n\t" // max
|
||||
"psubb %%mm7, %%mm6 \n\t" // max - min
|
||||
"movd %%mm6, %%ecx \n\t"
|
||||
"cmpb deringThreshold, %%cl \n\t"
|
||||
"cmpb "MANGLE(deringThreshold)", %%cl \n\t"
|
||||
" jb 1f \n\t"
|
||||
PAVGB(%%mm0, %%mm7) // a=(max + min)/2
|
||||
"punpcklbw %%mm7, %%mm7 \n\t"
|
||||
"punpcklbw %%mm7, %%mm7 \n\t"
|
||||
"punpcklbw %%mm7, %%mm7 \n\t"
|
||||
"movq %%mm7, temp0 \n\t"
|
||||
"movq %%mm7, "MANGLE(temp0)" \n\t"
|
||||
|
||||
"movq (%0), %%mm0 \n\t" // L10
|
||||
"movq %%mm0, %%mm1 \n\t" // L10
|
||||
@ -1344,9 +1344,9 @@ FIND_MIN_MAX((%0, %1, 8))
|
||||
"psubusb %%mm7, %%mm0 \n\t"
|
||||
"psubusb %%mm7, %%mm2 \n\t"
|
||||
"psubusb %%mm7, %%mm3 \n\t"
|
||||
"pcmpeqb b00, %%mm0 \n\t" // L10 > a ? 0 : -1
|
||||
"pcmpeqb b00, %%mm2 \n\t" // L20 > a ? 0 : -1
|
||||
"pcmpeqb b00, %%mm3 \n\t" // L00 > a ? 0 : -1
|
||||
"pcmpeqb "MANGLE(b00)", %%mm0 \n\t" // L10 > a ? 0 : -1
|
||||
"pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // L20 > a ? 0 : -1
|
||||
"pcmpeqb "MANGLE(b00)", %%mm3 \n\t" // L00 > a ? 0 : -1
|
||||
"paddb %%mm2, %%mm0 \n\t"
|
||||
"paddb %%mm3, %%mm0 \n\t"
|
||||
|
||||
@ -1367,9 +1367,9 @@ FIND_MIN_MAX((%0, %1, 8))
|
||||
"psubusb %%mm7, %%mm2 \n\t"
|
||||
"psubusb %%mm7, %%mm4 \n\t"
|
||||
"psubusb %%mm7, %%mm5 \n\t"
|
||||
"pcmpeqb b00, %%mm2 \n\t" // L11 > a ? 0 : -1
|
||||
"pcmpeqb b00, %%mm4 \n\t" // L21 > a ? 0 : -1
|
||||
"pcmpeqb b00, %%mm5 \n\t" // L01 > a ? 0 : -1
|
||||
"pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // L11 > a ? 0 : -1
|
||||
"pcmpeqb "MANGLE(b00)", %%mm4 \n\t" // L21 > a ? 0 : -1
|
||||
"pcmpeqb "MANGLE(b00)", %%mm5 \n\t" // L01 > a ? 0 : -1
|
||||
"paddb %%mm4, %%mm2 \n\t"
|
||||
"paddb %%mm5, %%mm2 \n\t"
|
||||
// 0, 2, 3, 1
|
||||
@ -1389,12 +1389,12 @@ FIND_MIN_MAX((%0, %1, 8))
|
||||
PAVGB(t0, lx) /* (src[-1] + src[+1])/2 */\
|
||||
PAVGB(sx, lx) /* (src[-1] + 2src[0] + src[+1])/4 */\
|
||||
PAVGB(lx, pplx) \
|
||||
"movq " #lx ", temp1 \n\t"\
|
||||
"movq temp0, " #lx " \n\t"\
|
||||
"movq " #lx ", "MANGLE(temp1)" \n\t"\
|
||||
"movq "MANGLE(temp0)", " #lx " \n\t"\
|
||||
"psubusb " #lx ", " #t1 " \n\t"\
|
||||
"psubusb " #lx ", " #t0 " \n\t"\
|
||||
"psubusb " #lx ", " #sx " \n\t"\
|
||||
"movq b00, " #lx " \n\t"\
|
||||
"movq "MANGLE(b00)", " #lx " \n\t"\
|
||||
"pcmpeqb " #lx ", " #t1 " \n\t" /* src[-1] > a ? 0 : -1*/\
|
||||
"pcmpeqb " #lx ", " #t0 " \n\t" /* src[+1] > a ? 0 : -1*/\
|
||||
"pcmpeqb " #lx ", " #sx " \n\t" /* src[0] > a ? 0 : -1*/\
|
||||
@ -1404,20 +1404,20 @@ FIND_MIN_MAX((%0, %1, 8))
|
||||
PAVGB(plx, pplx) /* filtered */\
|
||||
"movq " #dst ", " #t0 " \n\t" /* dst */\
|
||||
"movq " #t0 ", " #t1 " \n\t" /* dst */\
|
||||
"psubusb pQPb2, " #t0 " \n\t"\
|
||||
"paddusb pQPb2, " #t1 " \n\t"\
|
||||
"psubusb "MANGLE(pQPb2)", " #t0 " \n\t"\
|
||||
"paddusb "MANGLE(pQPb2)", " #t1 " \n\t"\
|
||||
PMAXUB(t0, pplx)\
|
||||
PMINUB(t1, pplx, t0)\
|
||||
"paddb " #sx ", " #ppsx " \n\t"\
|
||||
"paddb " #psx ", " #ppsx " \n\t"\
|
||||
"#paddb b02, " #ppsx " \n\t"\
|
||||
"pand b08, " #ppsx " \n\t"\
|
||||
"#paddb "MANGLE(b02)", " #ppsx " \n\t"\
|
||||
"pand "MANGLE(b08)", " #ppsx " \n\t"\
|
||||
"pcmpeqb " #lx ", " #ppsx " \n\t"\
|
||||
"pand " #ppsx ", " #pplx " \n\t"\
|
||||
"pandn " #dst ", " #ppsx " \n\t"\
|
||||
"por " #pplx ", " #ppsx " \n\t"\
|
||||
"movq " #ppsx ", " #dst " \n\t"\
|
||||
"movq temp1, " #lx " \n\t"
|
||||
"movq "MANGLE(temp1)", " #lx " \n\t"
|
||||
|
||||
/*
|
||||
0000000
|
||||
@ -2082,7 +2082,7 @@ static void inline RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
||||
"paddw %%mm6, %%mm0 \n\t"
|
||||
#elif defined (FAST_L2_DIFF)
|
||||
"pcmpeqb %%mm7, %%mm7 \n\t"
|
||||
"movq b80, %%mm6 \n\t"
|
||||
"movq "MANGLE(b80)", %%mm6 \n\t"
|
||||
"pxor %%mm0, %%mm0 \n\t"
|
||||
#define L2_DIFF_CORE(a, b)\
|
||||
"movq " #a ", %%mm5 \n\t"\
|
||||
@ -2152,12 +2152,12 @@ L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx))
|
||||
"movl %%ecx, (%%ebx) \n\t"
|
||||
"leal (%%eax, %2, 2), %%ebx \n\t" // 5*stride
|
||||
|
||||
// "movl %3, %%ecx \n\t"
|
||||
// "movl %3, %%ecx \n\t"
|
||||
// "movl %%ecx, test \n\t"
|
||||
// "jmp 4f \n\t"
|
||||
"cmpl 4+maxTmpNoise, %%ecx \n\t"
|
||||
"cmpl 4+"MANGLE(maxTmpNoise)", %%ecx \n\t"
|
||||
" jb 2f \n\t"
|
||||
"cmpl 8+maxTmpNoise, %%ecx \n\t"
|
||||
"cmpl 8+"MANGLE(maxTmpNoise)", %%ecx \n\t"
|
||||
" jb 1f \n\t"
|
||||
|
||||
"leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride
|
||||
@ -2216,7 +2216,7 @@ L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx))
|
||||
"jmp 4f \n\t"
|
||||
|
||||
"2: \n\t"
|
||||
"cmpl maxTmpNoise, %%ecx \n\t"
|
||||
"cmpl "MANGLE(maxTmpNoise)", %%ecx \n\t"
|
||||
" jb 3f \n\t"
|
||||
|
||||
"leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride
|
||||
@ -2461,8 +2461,8 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, uint8_t src[]
|
||||
asm volatile(
|
||||
"leal (%0,%2), %%eax \n\t"
|
||||
"leal (%1,%3), %%ebx \n\t"
|
||||
"movq packedYOffset, %%mm2 \n\t"
|
||||
"movq packedYScale, %%mm3 \n\t"
|
||||
"movq "MANGLE(packedYOffset)", %%mm2\n\t"
|
||||
"movq "MANGLE(packedYScale)", %%mm3\n\t"
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
#ifdef HAVE_MMX2
|
||||
#define SCALED_CPY(src1, src2, dst1, dst2) \
|
||||
@ -2884,7 +2884,7 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
|
||||
"packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
|
||||
"packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
|
||||
"packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
|
||||
"movq %%mm7, pQPb \n\t"
|
||||
"movq %%mm7, "MANGLE(pQPb)" \n\t"
|
||||
: : "r" (QP)
|
||||
);
|
||||
#endif
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include "../config.h"
|
||||
#include "../mangle.h"
|
||||
#ifdef HAVE_MALLOC_H
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
@ -135,19 +135,19 @@
|
||||
"addl $1, %%edx \n\t"\
|
||||
" jnz 2b \n\t"\
|
||||
\
|
||||
"psubw w400, %%mm3 \n\t" /* (U-128)8*/\
|
||||
"psubw w400, %%mm4 \n\t" /* (V-128)8*/\
|
||||
"psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\
|
||||
"psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\
|
||||
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
|
||||
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
|
||||
"pmulhw ugCoeff, %%mm3 \n\t"\
|
||||
"pmulhw vgCoeff, %%mm4 \n\t"\
|
||||
"pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\
|
||||
"pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
|
||||
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
|
||||
"pmulhw ubCoeff, %%mm2 \n\t"\
|
||||
"pmulhw vrCoeff, %%mm5 \n\t"\
|
||||
"psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
|
||||
"psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\
|
||||
"pmulhw yCoeff, %%mm1 \n\t"\
|
||||
"pmulhw yCoeff, %%mm7 \n\t"\
|
||||
"pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\
|
||||
"pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\
|
||||
"psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
|
||||
"psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\
|
||||
"pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
|
||||
"pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\
|
||||
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
|
||||
"paddw %%mm3, %%mm4 \n\t"\
|
||||
"movq %%mm2, %%mm0 \n\t"\
|
||||
@ -197,23 +197,23 @@
|
||||
"movq 4096(%3, %%eax,2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\
|
||||
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
|
||||
"psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
|
||||
"psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
|
||||
"psubw w400, %%mm3 \n\t" /* 8(U-128)*/\
|
||||
"pmulhw yCoeff, %%mm1 \n\t"\
|
||||
"psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
|
||||
"psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\
|
||||
"pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
|
||||
\
|
||||
\
|
||||
"pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
|
||||
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
|
||||
"pmulhw ubCoeff, %%mm3 \n\t"\
|
||||
"pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\
|
||||
"psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
|
||||
"pmulhw ugCoeff, %%mm2 \n\t"\
|
||||
"pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\
|
||||
"paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
|
||||
"psubw w400, %%mm0 \n\t" /* (V-128)8*/\
|
||||
"psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\
|
||||
\
|
||||
\
|
||||
"movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\
|
||||
"pmulhw vrCoeff, %%mm0 \n\t"\
|
||||
"pmulhw vgCoeff, %%mm4 \n\t"\
|
||||
"pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\
|
||||
"pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
|
||||
"paddw %%mm1, %%mm3 \n\t" /* B*/\
|
||||
"paddw %%mm1, %%mm0 \n\t" /* R*/\
|
||||
"packuswb %%mm3, %%mm3 \n\t"\
|
||||
@ -228,11 +228,11 @@
|
||||
"movd %6, %%mm6 \n\t" /*yalpha1*/\
|
||||
"punpcklwd %%mm6, %%mm6 \n\t"\
|
||||
"punpcklwd %%mm6, %%mm6 \n\t"\
|
||||
"movq %%mm6, asm_yalpha1 \n\t"\
|
||||
"movq %%mm6, "MANGLE(asm_yalpha1)"\n\t"\
|
||||
"movd %7, %%mm5 \n\t" /*uvalpha1*/\
|
||||
"punpcklwd %%mm5, %%mm5 \n\t"\
|
||||
"punpcklwd %%mm5, %%mm5 \n\t"\
|
||||
"movq %%mm5, asm_uvalpha1 \n\t"\
|
||||
"movq %%mm5, "MANGLE(asm_uvalpha1)"\n\t"\
|
||||
"xorl %%eax, %%eax \n\t"\
|
||||
".balign 16 \n\t"\
|
||||
"1: \n\t"\
|
||||
@ -242,19 +242,19 @@
|
||||
"movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
|
||||
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
|
||||
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
|
||||
"movq asm_uvalpha1, %%mm0 \n\t"\
|
||||
"movq "MANGLE(asm_uvalpha1)", %%mm0\n\t"\
|
||||
"pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
|
||||
"pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
|
||||
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
|
||||
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
|
||||
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
|
||||
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
|
||||
"psubw w400, %%mm3 \n\t" /* (U-128)8*/\
|
||||
"psubw w400, %%mm4 \n\t" /* (V-128)8*/\
|
||||
"psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\
|
||||
"psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\
|
||||
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
|
||||
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
|
||||
"pmulhw ugCoeff, %%mm3 \n\t"\
|
||||
"pmulhw vgCoeff, %%mm4 \n\t"\
|
||||
"pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\
|
||||
"pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
|
||||
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
|
||||
"movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\
|
||||
"movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\
|
||||
@ -262,18 +262,18 @@
|
||||
"movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\
|
||||
"psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
|
||||
"psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
|
||||
"pmulhw asm_yalpha1, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
|
||||
"pmulhw asm_yalpha1, %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
|
||||
"pmulhw "MANGLE(asm_yalpha1)", %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
|
||||
"pmulhw "MANGLE(asm_yalpha1)", %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
|
||||
"psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
|
||||
"psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
|
||||
"paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
|
||||
"paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
|
||||
"pmulhw ubCoeff, %%mm2 \n\t"\
|
||||
"pmulhw vrCoeff, %%mm5 \n\t"\
|
||||
"psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
|
||||
"psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\
|
||||
"pmulhw yCoeff, %%mm1 \n\t"\
|
||||
"pmulhw yCoeff, %%mm7 \n\t"\
|
||||
"pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\
|
||||
"pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\
|
||||
"psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
|
||||
"psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\
|
||||
"pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
|
||||
"pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\
|
||||
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
|
||||
"paddw %%mm3, %%mm4 \n\t"\
|
||||
"movq %%mm2, %%mm0 \n\t"\
|
||||
@ -305,23 +305,23 @@
|
||||
"movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
|
||||
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
|
||||
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
|
||||
"psubw w400, %%mm3 \n\t" /* (U-128)8*/\
|
||||
"psubw w400, %%mm4 \n\t" /* (V-128)8*/\
|
||||
"psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\
|
||||
"psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\
|
||||
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
|
||||
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
|
||||
"pmulhw ugCoeff, %%mm3 \n\t"\
|
||||
"pmulhw vgCoeff, %%mm4 \n\t"\
|
||||
"pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\
|
||||
"pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
|
||||
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
|
||||
"movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\
|
||||
"movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\
|
||||
"psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
|
||||
"psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
|
||||
"pmulhw ubCoeff, %%mm2 \n\t"\
|
||||
"pmulhw vrCoeff, %%mm5 \n\t"\
|
||||
"psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
|
||||
"psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\
|
||||
"pmulhw yCoeff, %%mm1 \n\t"\
|
||||
"pmulhw yCoeff, %%mm7 \n\t"\
|
||||
"pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\
|
||||
"pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\
|
||||
"psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
|
||||
"psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\
|
||||
"pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
|
||||
"pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\
|
||||
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
|
||||
"paddw %%mm3, %%mm4 \n\t"\
|
||||
"movq %%mm2, %%mm0 \n\t"\
|
||||
@ -358,23 +358,23 @@
|
||||
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
|
||||
"psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
|
||||
"psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
|
||||
"psubw w400, %%mm3 \n\t" /* (U-128)8*/\
|
||||
"psubw w400, %%mm4 \n\t" /* (V-128)8*/\
|
||||
"psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\
|
||||
"psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\
|
||||
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
|
||||
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
|
||||
"pmulhw ugCoeff, %%mm3 \n\t"\
|
||||
"pmulhw vgCoeff, %%mm4 \n\t"\
|
||||
"pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\
|
||||
"pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
|
||||
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
|
||||
"movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\
|
||||
"movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\
|
||||
"psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
|
||||
"psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
|
||||
"pmulhw ubCoeff, %%mm2 \n\t"\
|
||||
"pmulhw vrCoeff, %%mm5 \n\t"\
|
||||
"psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
|
||||
"psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\
|
||||
"pmulhw yCoeff, %%mm1 \n\t"\
|
||||
"pmulhw yCoeff, %%mm7 \n\t"\
|
||||
"pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\
|
||||
"pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\
|
||||
"psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
|
||||
"psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\
|
||||
"pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
|
||||
"pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\
|
||||
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
|
||||
"paddw %%mm3, %%mm4 \n\t"\
|
||||
"movq %%mm2, %%mm0 \n\t"\
|
||||
@ -423,9 +423,9 @@
|
||||
" jb 1b \n\t"
|
||||
|
||||
#define WRITEBGR16 \
|
||||
"pand bF8, %%mm2 \n\t" /* B */\
|
||||
"pand bFC, %%mm4 \n\t" /* G */\
|
||||
"pand bF8, %%mm5 \n\t" /* R */\
|
||||
"pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
|
||||
"pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
|
||||
"pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
|
||||
"psrlq $3, %%mm2 \n\t"\
|
||||
\
|
||||
"movq %%mm2, %%mm1 \n\t"\
|
||||
@ -450,9 +450,9 @@
|
||||
" jb 1b \n\t"
|
||||
|
||||
#define WRITEBGR15 \
|
||||
"pand bF8, %%mm2 \n\t" /* B */\
|
||||
"pand bF8, %%mm4 \n\t" /* G */\
|
||||
"pand bF8, %%mm5 \n\t" /* R */\
|
||||
"pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
|
||||
"pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
|
||||
"pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
|
||||
"psrlq $3, %%mm2 \n\t"\
|
||||
"psrlq $1, %%mm5 \n\t"\
|
||||
\
|
||||
@ -494,8 +494,8 @@
|
||||
\
|
||||
"movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
|
||||
"psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\
|
||||
"pand bm00000111, %%mm4 \n\t" /* 00000RGB 0 */\
|
||||
"pand bm11111000, %%mm0 \n\t" /* 00RGB000 0.5 */\
|
||||
"pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 0 */\
|
||||
"pand "MANGLE(bm11111000)", %%mm0\n\t" /* 00RGB000 0.5 */\
|
||||
"por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\
|
||||
"movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\
|
||||
"psllq $48, %%mm2 \n\t" /* GB000000 1 */\
|
||||
@ -505,11 +505,11 @@
|
||||
"psrld $16, %%mm4 \n\t" /* 000R000R 1 */\
|
||||
"psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\
|
||||
"por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\
|
||||
"pand bm00001111, %%mm2 \n\t" /* 0000RGBR 1 */\
|
||||
"pand "MANGLE(bm00001111)", %%mm2\n\t" /* 0000RGBR 1 */\
|
||||
"movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\
|
||||
"psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\
|
||||
"pand bm00000111, %%mm4 \n\t" /* 00000RGB 2 */\
|
||||
"pand bm11111000, %%mm1 \n\t" /* 00RGB000 2.5 */\
|
||||
"pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 2 */\
|
||||
"pand "MANGLE(bm11111000)", %%mm1\n\t" /* 00RGB000 2.5 */\
|
||||
"por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\
|
||||
"movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\
|
||||
"psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\
|
||||
@ -518,8 +518,8 @@
|
||||
"psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\
|
||||
"movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\
|
||||
"psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\
|
||||
"pand bm00000111, %%mm5 \n\t" /* 00000RGB 3 */\
|
||||
"pand bm11111000, %%mm3 \n\t" /* 00RGB000 3.5 */\
|
||||
"pand "MANGLE(bm00000111)", %%mm5\n\t" /* 00000RGB 3 */\
|
||||
"pand "MANGLE(bm11111000)", %%mm3\n\t" /* 00RGB000 3.5 */\
|
||||
"por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\
|
||||
"psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\
|
||||
"por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\
|
||||
@ -588,8 +588,8 @@
|
||||
|
||||
#define WRITEBGR24MMX2 \
|
||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
|
||||
"movq M24A, %%mm0 \n\t"\
|
||||
"movq M24C, %%mm7 \n\t"\
|
||||
"movq "MANGLE(M24A)", %%mm0 \n\t"\
|
||||
"movq "MANGLE(M24C)", %%mm7 \n\t"\
|
||||
"pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
|
||||
"pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
|
||||
"pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
|
||||
@ -608,7 +608,7 @@
|
||||
"pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
|
||||
"pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
|
||||
\
|
||||
"pand M24B, %%mm1 \n\t" /* B5 B4 B3 */\
|
||||
"pand "MANGLE(M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
|
||||
"pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
|
||||
"pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
|
||||
\
|
||||
@ -622,7 +622,7 @@
|
||||
\
|
||||
"pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
|
||||
"pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
|
||||
"pand M24B, %%mm6 \n\t" /* R7 R6 R5 */\
|
||||
"pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
|
||||
\
|
||||
"por %%mm1, %%mm3 \n\t"\
|
||||
"por %%mm3, %%mm6 \n\t"\
|
||||
@ -777,9 +777,9 @@ static inline void RENAME(yuv2rgbX)(int16_t *lumFilter, int16_t **lumSrc, int lu
|
||||
YSCALEYUV2RGBX
|
||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
|
||||
#ifdef DITHER1XBPP
|
||||
"paddusb b5Dither, %%mm2 \n\t"
|
||||
"paddusb g5Dither, %%mm4 \n\t"
|
||||
"paddusb r5Dither, %%mm5 \n\t"
|
||||
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
|
||||
"paddusb "MANGLE(g5Dither)", %%mm4\n\t"
|
||||
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
|
||||
#endif
|
||||
|
||||
WRITEBGR15
|
||||
@ -797,9 +797,9 @@ static inline void RENAME(yuv2rgbX)(int16_t *lumFilter, int16_t **lumSrc, int lu
|
||||
YSCALEYUV2RGBX
|
||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
|
||||
#ifdef DITHER1XBPP
|
||||
"paddusb b5Dither, %%mm2 \n\t"
|
||||
"paddusb g6Dither, %%mm4 \n\t"
|
||||
"paddusb r5Dither, %%mm5 \n\t"
|
||||
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
|
||||
"paddusb "MANGLE(g6Dither)", %%mm4\n\t"
|
||||
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
|
||||
#endif
|
||||
|
||||
WRITEBGR16
|
||||
@ -876,8 +876,8 @@ FULL_YSCALEYUV2RGB
|
||||
|
||||
"movq %%mm3, %%mm2 \n\t" // BGR0BGR0
|
||||
"psrlq $8, %%mm3 \n\t" // GR0BGR00
|
||||
"pand bm00000111, %%mm2 \n\t" // BGR00000
|
||||
"pand bm11111000, %%mm3 \n\t" // 000BGR00
|
||||
"pand "MANGLE(bm00000111)", %%mm2\n\t" // BGR00000
|
||||
"pand "MANGLE(bm11111000)", %%mm3\n\t" // 000BGR00
|
||||
"por %%mm2, %%mm3 \n\t" // BGRBGR00
|
||||
"movq %%mm1, %%mm2 \n\t"
|
||||
"psllq $48, %%mm1 \n\t" // 000000BG
|
||||
@ -916,9 +916,9 @@ FULL_YSCALEYUV2RGB
|
||||
|
||||
FULL_YSCALEYUV2RGB
|
||||
#ifdef DITHER1XBPP
|
||||
"paddusb g5Dither, %%mm1 \n\t"
|
||||
"paddusb r5Dither, %%mm0 \n\t"
|
||||
"paddusb b5Dither, %%mm3 \n\t"
|
||||
"paddusb "MANGLE(g5Dither)", %%mm1\n\t"
|
||||
"paddusb "MANGLE(r5Dither)", %%mm0\n\t"
|
||||
"paddusb "MANGLE(b5Dither)", %%mm3\n\t"
|
||||
#endif
|
||||
"punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
|
||||
"punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
|
||||
@ -927,8 +927,8 @@ FULL_YSCALEYUV2RGB
|
||||
"psrlw $3, %%mm3 \n\t"
|
||||
"psllw $2, %%mm1 \n\t"
|
||||
"psllw $7, %%mm0 \n\t"
|
||||
"pand g15Mask, %%mm1 \n\t"
|
||||
"pand r15Mask, %%mm0 \n\t"
|
||||
"pand "MANGLE(g15Mask)", %%mm1 \n\t"
|
||||
"pand "MANGLE(r15Mask)", %%mm0 \n\t"
|
||||
|
||||
"por %%mm3, %%mm1 \n\t"
|
||||
"por %%mm1, %%mm0 \n\t"
|
||||
@ -950,9 +950,9 @@ FULL_YSCALEYUV2RGB
|
||||
|
||||
FULL_YSCALEYUV2RGB
|
||||
#ifdef DITHER1XBPP
|
||||
"paddusb g6Dither, %%mm1 \n\t"
|
||||
"paddusb r5Dither, %%mm0 \n\t"
|
||||
"paddusb b5Dither, %%mm3 \n\t"
|
||||
"paddusb "MANGLE(g6Dither)", %%mm1\n\t"
|
||||
"paddusb "MANGLE(r5Dither)", %%mm0\n\t"
|
||||
"paddusb "MANGLE(b5Dither)", %%mm3\n\t"
|
||||
#endif
|
||||
"punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
|
||||
"punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
|
||||
@ -961,8 +961,8 @@ FULL_YSCALEYUV2RGB
|
||||
"psrlw $3, %%mm3 \n\t"
|
||||
"psllw $3, %%mm1 \n\t"
|
||||
"psllw $8, %%mm0 \n\t"
|
||||
"pand g16Mask, %%mm1 \n\t"
|
||||
"pand r16Mask, %%mm0 \n\t"
|
||||
"pand "MANGLE(g16Mask)", %%mm1 \n\t"
|
||||
"pand "MANGLE(r16Mask)", %%mm0 \n\t"
|
||||
|
||||
"por %%mm3, %%mm1 \n\t"
|
||||
"por %%mm1, %%mm0 \n\t"
|
||||
@ -1057,9 +1057,9 @@ FULL_YSCALEYUV2RGB
|
||||
YSCALEYUV2RGB
|
||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
|
||||
#ifdef DITHER1XBPP
|
||||
"paddusb b5Dither, %%mm2 \n\t"
|
||||
"paddusb g5Dither, %%mm4 \n\t"
|
||||
"paddusb r5Dither, %%mm5 \n\t"
|
||||
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
|
||||
"paddusb "MANGLE(g5Dither)", %%mm4\n\t"
|
||||
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
|
||||
#endif
|
||||
|
||||
WRITEBGR15
|
||||
@ -1075,9 +1075,9 @@ FULL_YSCALEYUV2RGB
|
||||
YSCALEYUV2RGB
|
||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
|
||||
#ifdef DITHER1XBPP
|
||||
"paddusb b5Dither, %%mm2 \n\t"
|
||||
"paddusb g6Dither, %%mm4 \n\t"
|
||||
"paddusb r5Dither, %%mm5 \n\t"
|
||||
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
|
||||
"paddusb "MANGLE(g6Dither)", %%mm4\n\t"
|
||||
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
|
||||
#endif
|
||||
|
||||
WRITEBGR16
|
||||
@ -1234,9 +1234,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
|
||||
YSCALEYUV2RGB1
|
||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
|
||||
#ifdef DITHER1XBPP
|
||||
"paddusb b5Dither, %%mm2 \n\t"
|
||||
"paddusb g5Dither, %%mm4 \n\t"
|
||||
"paddusb r5Dither, %%mm5 \n\t"
|
||||
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
|
||||
"paddusb "MANGLE(g5Dither)", %%mm4\n\t"
|
||||
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
|
||||
#endif
|
||||
WRITEBGR15
|
||||
:: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
|
||||
@ -1250,9 +1250,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
|
||||
YSCALEYUV2RGB1
|
||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
|
||||
#ifdef DITHER1XBPP
|
||||
"paddusb b5Dither, %%mm2 \n\t"
|
||||
"paddusb g6Dither, %%mm4 \n\t"
|
||||
"paddusb r5Dither, %%mm5 \n\t"
|
||||
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
|
||||
"paddusb "MANGLE(g6Dither)", %%mm4\n\t"
|
||||
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
|
||||
#endif
|
||||
|
||||
WRITEBGR16
|
||||
@ -1291,9 +1291,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
|
||||
YSCALEYUV2RGB1b
|
||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
|
||||
#ifdef DITHER1XBPP
|
||||
"paddusb b5Dither, %%mm2 \n\t"
|
||||
"paddusb g5Dither, %%mm4 \n\t"
|
||||
"paddusb r5Dither, %%mm5 \n\t"
|
||||
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
|
||||
"paddusb "MANGLE(g5Dither)", %%mm4\n\t"
|
||||
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
|
||||
#endif
|
||||
WRITEBGR15
|
||||
:: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
|
||||
@ -1307,9 +1307,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
|
||||
YSCALEYUV2RGB1b
|
||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
|
||||
#ifdef DITHER1XBPP
|
||||
"paddusb b5Dither, %%mm2 \n\t"
|
||||
"paddusb g6Dither, %%mm4 \n\t"
|
||||
"paddusb r5Dither, %%mm5 \n\t"
|
||||
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
|
||||
"paddusb "MANGLE(g6Dither)", %%mm4\n\t"
|
||||
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
|
||||
#endif
|
||||
|
||||
WRITEBGR16
|
||||
@ -1435,7 +1435,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW
|
||||
dst-= counter/2;
|
||||
asm volatile(
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"movq w02, %%mm6 \n\t"
|
||||
"movq "MANGLE(w02)", %%mm6 \n\t"
|
||||
"pushl %%ebp \n\t" // we use 7 regs here ...
|
||||
"movl %%eax, %%ebp \n\t"
|
||||
".balign 16 \n\t"
|
||||
@ -1473,7 +1473,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW
|
||||
dst-= counter/2;
|
||||
asm volatile(
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"movq w02, %%mm6 \n\t"
|
||||
"movq "MANGLE(w02)", %%mm6 \n\t"
|
||||
"pushl %%ebp \n\t" // we use 7 regs here ...
|
||||
"movl %%eax, %%ebp \n\t"
|
||||
".balign 16 \n\t"
|
||||
@ -1523,7 +1523,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW
|
||||
dst-= counter/2;
|
||||
asm volatile(
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"movq w02, %%mm6 \n\t"
|
||||
"movq "MANGLE(w02)", %%mm6 \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movl %2, %%ecx \n\t"
|
||||
@ -1614,7 +1614,7 @@ static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, in
|
||||
"psllq $16, %%mm2 \n\t"
|
||||
"paddw %%mm6, %%mm2 \n\t"
|
||||
"psllq $16, %%mm2 \n\t" //0,t,2t,3t t=xInc&0xFF
|
||||
"movq %%mm2, temp0 \n\t"
|
||||
"movq %%mm2, "MANGLE(temp0)" \n\t"
|
||||
"movd %4, %%mm6 \n\t" //(xInc*4)&0xFFFF
|
||||
"punpcklwd %%mm6, %%mm6 \n\t"
|
||||
"punpcklwd %%mm6, %%mm6 \n\t"
|
||||
@ -1630,8 +1630,8 @@ static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, in
|
||||
PREFETCH" 1024(%%esi) \n\t"\
|
||||
PREFETCH" 1056(%%esi) \n\t"\
|
||||
PREFETCH" 1088(%%esi) \n\t"\
|
||||
"call funnyYCode \n\t"\
|
||||
"movq temp0, %%mm2 \n\t"\
|
||||
"call "MANGLE(funnyYCode)" \n\t"\
|
||||
"movq "MANGLE(temp0)", %%mm2 \n\t"\
|
||||
"xorl %%ecx, %%ecx \n\t"
|
||||
|
||||
FUNNY_Y_CODE
|
||||
@ -1741,7 +1741,7 @@ inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth,
|
||||
"psllq $16, %%mm2 \n\t"
|
||||
"paddw %%mm6, %%mm2 \n\t"
|
||||
"psllq $16, %%mm2 \n\t" //0,t,2t,3t t=xInc&0xFFFF
|
||||
"movq %%mm2, temp0 \n\t"
|
||||
"movq %%mm2, "MANGLE(temp0)" \n\t"
|
||||
"movd %4, %%mm6 \n\t" //(xInc*4)&0xFFFF
|
||||
"punpcklwd %%mm6, %%mm6 \n\t"
|
||||
"punpcklwd %%mm6, %%mm6 \n\t"
|
||||
@ -1757,8 +1757,8 @@ inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth,
|
||||
PREFETCH" 1024(%%esi) \n\t"\
|
||||
PREFETCH" 1056(%%esi) \n\t"\
|
||||
PREFETCH" 1088(%%esi) \n\t"\
|
||||
"call funnyUVCode \n\t"\
|
||||
"movq temp0, %%mm2 \n\t"\
|
||||
"call "MANGLE(funnyUVCode)" \n\t"\
|
||||
"movq "MANGLE(temp0)", %%mm2 \n\t"\
|
||||
"xorl %%ecx, %%ecx \n\t"
|
||||
|
||||
FUNNYUVCODE
|
||||
|
Loading…
Reference in New Issue
Block a user