From 2e9054c8ea8ff8fd429a0bf9fcdc5551871dcb33 Mon Sep 17 00:00:00 2001 From: blikblum Date: Wed, 10 Oct 2012 01:07:34 +0000 Subject: [PATCH] * qt: implement alpha blend functions in 64bit git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@2548 8e941d3f-bd1b-0410-a28a-d453659cc2b4 --- .../4.8/include/intf/qt/vtgraphicsi.inc | 430 ++++++++++++++++-- 1 file changed, 389 insertions(+), 41 deletions(-) diff --git a/components/virtualtreeview-new/branches/4.8/include/intf/qt/vtgraphicsi.inc b/components/virtualtreeview-new/branches/4.8/include/intf/qt/vtgraphicsi.inc index 9ffeb2e43..30e828d50 100644 --- a/components/virtualtreeview-new/branches/4.8/include/intf/qt/vtgraphicsi.inc +++ b/components/virtualtreeview-new/branches/4.8/include/intf/qt/vtgraphicsi.inc @@ -1,7 +1,7 @@ uses qt4, qtobjects; -{$ifdef CPU32} +{$ASMMODE INTEL} procedure AlphaBlendLineConstant(Source, Destination: Pointer; Count: Integer; ConstantAlpha, Bias: Integer); @@ -10,13 +10,106 @@ procedure AlphaBlendLineConstant(Source, Destination: Pointer; Count: Integer; C // ConstantAlpha must be in the range 0..255 where 0 means totally transparent (destination pixel only) // and 255 totally opaque (source pixel only). // Bias is an additional value which gets added to every component and must be in the range -128..127 -// + +asm + +{$ifdef CPU64} +//windows +// RCX contains Source +// RDX contains Destination +// R8D contains Count +// R9D contains ConstantAlpha +// Bias is on the stack + +//non windows +// RDI contains Source +// RSI contains Destination +// EDX contains Count +// ECX contains ConstantAlpha +// R8D contains Bias + + //.NOFRAME + + // Load XMM3 with the constant alpha value (replicate it for every component). + // Expand it to word size. + {$ifdef windows} + MOVD XMM3, R9D // ConstantAlpha + {$else} + MOVD XMM3, ECX // ConstantAlpha + {$endif} + PUNPCKLWD XMM3, XMM3 + PUNPCKLDQ XMM3, XMM3 + + // Load XMM5 with the bias value. + {$ifdef windows} + MOVD XMM5, [Bias] + {$else} + MOVD XMM5, R8D //Bias + {$endif} + PUNPCKLWD XMM5, XMM5 + PUNPCKLDQ XMM5, XMM5 + + // Load XMM4 with 128 to allow for saturated biasing. + MOV R10D, 128 + MOVD XMM4, R10D + PUNPCKLWD XMM4, XMM4 + PUNPCKLDQ XMM4, XMM4 + +@1: // The pixel loop calculates an entire pixel in one run. + // Note: The pixel byte values are expanded into the higher bytes of a word due + // to the way unpacking works. We compensate for this with an extra shift. + {$ifdef windows} + MOVD XMM1, DWORD PTR [RCX] // data is unaligned + MOVD XMM2, DWORD PTR [RDX] // data is unaligned + {$else} + MOVD XMM1, DWORD PTR [RDI] // data is unaligned + MOVD XMM2, DWORD PTR [RSI] // data is unaligned + {$endif} + PXOR XMM0, XMM0 // clear source pixel register for unpacking + PUNPCKLBW XMM0, XMM1{[RCX]} // unpack source pixel byte values into words + PSRLW XMM0, 8 // move higher bytes to lower bytes + PXOR XMM1, XMM1 // clear target pixel register for unpacking + PUNPCKLBW XMM1, XMM2{[RDX]} // unpack target pixel byte values into words + MOVQ XMM2, XMM1 // make a copy of the shifted values, we need them again + PSRLW XMM1, 8 // move higher bytes to lower bytes + + // calculation is: target = (alpha * (source - target) + 256 * target) / 256 + PSUBW XMM0, XMM1 // source - target + PMULLW XMM0, XMM3 // alpha * (source - target) + PADDW XMM0, XMM2 // add target (in shifted form) + PSRLW XMM0, 8 // divide by 256 + + // Bias is accounted for by conversion of range 0..255 to -128..127, + // doing a saturated add and convert back to 0..255. + PSUBW XMM0, XMM4 + PADDSW XMM0, XMM5 + PADDW XMM0, XMM4 + PACKUSWB XMM0, XMM0 // convert words to bytes with saturation + {$ifdef windows} + MOVD DWORD PTR [RDX], XMM0 // store the result + {$else} + MOVD DWORD PTR [RSI], XMM0 // store the result + {$endif} +@3: + {$ifdef windows} + ADD RCX, 4 + ADD RDX, 4 + DEC R8D + {$else} + ADD RDI, 4 + ADD RSI, 4 + DEC EDX + {$endif} + JNZ @1 + + +{$else} // EAX contains Source // EDX contains Destination // ECX contains Count // ConstantAlpha and Bias are on the stack -asm + PUSH ESI // save used registers PUSH EDI @@ -73,6 +166,7 @@ asm JNZ @1 POP EDI POP ESI +{$endif} end; //---------------------------------------------------------------------------------------------------------------------- @@ -82,13 +176,100 @@ procedure AlphaBlendLinePerPixel(Source, Destination: Pointer; Count, Bias: Inte // Blends a line of Count pixels from Source to Destination using the alpha value of the source pixels. // The layout of a pixel must be BGRA. // Bias is an additional value which gets added to every component and must be in the range -128..127 -// + +asm + +{$ifdef CPU64} +//windows +// RCX contains Source +// RDX contains Destination +// R8D contains Count +// R9D contains Bias + +//non windows +// RDI contains Source +// RSI contains Destination +// EDX contains Count +// ECX contains Bias + + //.NOFRAME + + // Load XMM5 with the bias value. + {$ifdef windows} + MOVD XMM5, R9D // Bias + {$else} + MOVD XMM5, ECX // Bias + {$endif} + PUNPCKLWD XMM5, XMM5 + PUNPCKLDQ XMM5, XMM5 + + // Load XMM4 with 128 to allow for saturated biasing. + MOV R10D, 128 + MOVD XMM4, R10D + PUNPCKLWD XMM4, XMM4 + PUNPCKLDQ XMM4, XMM4 + +@1: // The pixel loop calculates an entire pixel in one run. + // Note: The pixel byte values are expanded into the higher bytes of a word due + // to the way unpacking works. We compensate for this with an extra shift. + {$ifdef windows} + MOVD XMM1, DWORD PTR [RCX] // data is unaligned + MOVD XMM2, DWORD PTR [RDX] // data is unaligned + {$else} + MOVD XMM1, DWORD PTR [RDI] // data is unaligned + MOVD XMM2, DWORD PTR [RSI] // data is unaligned + {$endif} + PXOR XMM0, XMM0 // clear source pixel register for unpacking + PUNPCKLBW XMM0, XMM1{[RCX]} // unpack source pixel byte values into words + PSRLW XMM0, 8 // move higher bytes to lower bytes + PXOR XMM1, XMM1 // clear target pixel register for unpacking + PUNPCKLBW XMM1, XMM2{[RDX]} // unpack target pixel byte values into words + MOVQ XMM2, XMM1 // make a copy of the shifted values, we need them again + PSRLW XMM1, 8 // move higher bytes to lower bytes + + // Load XMM3 with the source alpha value (replicate it for every component). + // Expand it to word size. + MOVQ XMM3, XMM0 + PUNPCKHWD XMM3, XMM3 + PUNPCKHDQ XMM3, XMM3 + + // calculation is: target = (alpha * (source - target) + 256 * target) / 256 + PSUBW XMM0, XMM1 // source - target + PMULLW XMM0, XMM3 // alpha * (source - target) + PADDW XMM0, XMM2 // add target (in shifted form) + PSRLW XMM0, 8 // divide by 256 + + // Bias is accounted for by conversion of range 0..255 to -128..127, + // doing a saturated add and convert back to 0..255. + PSUBW XMM0, XMM4 + PADDSW XMM0, XMM5 + PADDW XMM0, XMM4 + PACKUSWB XMM0, XMM0 // convert words to bytes with saturation + {$ifdef windows} + MOVD DWORD PTR [RDX], XMM0 // store the result + {$else} + MOVD DWORD PTR [RSI], XMM0 // store the result + {$endif} +@3: + {$ifdef windows} + ADD RCX, 4 + ADD RDX, 4 + DEC R8D + {$else} + ADD RDI, 4 + ADD RSI, 4 + DEC EDX + {$endif} + JNZ @1 + + +{$else} + // EAX contains Source // EDX contains Destination // ECX contains Count // Bias is on the stack -asm PUSH ESI // save used registers PUSH EDI @@ -103,7 +284,7 @@ asm // Load MM4 with 128 to allow for saturated biasing. MOV EAX, 128 - DB $0F, $6E, $E0 /// MOVD MM4, EAX + DB $0F, $6E, AlphaBlendLineConstant$E0 /// MOVD MM4, EAX DB $0F, $61, $E4 /// PUNPCKLWD MM4, MM4 DB $0F, $62, $E4 /// PUNPCKLDQ MM4, MM4 @@ -144,6 +325,7 @@ asm JNZ @1 POP EDI POP ESI +{$endif} end; //---------------------------------------------------------------------------------------------------------------------- @@ -154,13 +336,115 @@ procedure AlphaBlendLineMaster(Source, Destination: Pointer; Count: Integer; Con // The layout of a pixel must be BGRA. // ConstantAlpha must be in the range 0..255. // Bias is an additional value which gets added to every component and must be in the range -128..127 -// + +asm + +{$ifdef CPU64} +//windows +// RCX contains Source +// RDX contains Destination +// R8D contains Count +// R9D contains ConstantAlpha +// Bias is on the stack + +//non windows +// RDI contains Source +// RSI contains Destination +// EDX contains Count +// ECX contains ConstantAlpha +// R8D contains Bias + + //.SAVENV XMM6 //todo see how implement in fpc + + // Load XMM3 with the constant alpha value (replicate it for every component). + // Expand it to word size. + {$ifdef windows} + MOVD XMM3, R9D // ConstantAlpha + {$else} + MOVD XMM3, ECX // ConstantAlpha + {$endif} + PUNPCKLWD XMM3, XMM3 + PUNPCKLDQ XMM3, XMM3 + + // Load XMM5 with the bias value. + {$ifdef windows} + MOV R10D, [Bias] + MOVD XMM5, R10D + {$else} + MOVD XMM5, R8D + {$endif} + PUNPCKLWD XMM5, XMM5 + PUNPCKLDQ XMM5, XMM5 + + // Load XMM4 with 128 to allow for saturated biasing. + MOV R10D, 128 + MOVD XMM4, R10D + PUNPCKLWD XMM4, XMM4 + PUNPCKLDQ XMM4, XMM4 + +@1: // The pixel loop calculates an entire pixel in one run. + // Note: The pixel byte values are expanded into the higher bytes of a word due + // to the way unpacking works. We compensate for this with an extra shift. + {$ifdef windows} + MOVD XMM1, DWORD PTR [RCX] // data is unaligned + MOVD XMM2, DWORD PTR [RDX] // data is unaligned + {$else} + MOVD XMM1, DWORD PTR [RDI] // data is unaligned + MOVD XMM2, DWORD PTR [RSI] // data is unaligned + {$endif} + PXOR XMM0, XMM0 // clear source pixel register for unpacking + PUNPCKLBW XMM0, XMM1{[RCX]} // unpack source pixel byte values into words + PSRLW XMM0, 8 // move higher bytes to lower bytes + PXOR XMM1, XMM1 // clear target pixel register for unpacking + PUNPCKLBW XMM1, XMM2{[RCX]} // unpack target pixel byte values into words + MOVQ XMM2, XMM1 // make a copy of the shifted values, we need them again + PSRLW XMM1, 8 // move higher bytes to lower bytes + + // Load XMM6 with the source alpha value (replicate it for every component). + // Expand it to word size. + MOVQ XMM6, XMM0 + PUNPCKHWD XMM6, XMM6 + PUNPCKHDQ XMM6, XMM6 + PMULLW XMM6, XMM3 // source alpha * master alpha + PSRLW XMM6, 8 // divide by 256 + + // calculation is: target = (alpha * master alpha * (source - target) + 256 * target) / 256 + PSUBW XMM0, XMM1 // source - target + PMULLW XMM0, XMM6 // alpha * (source - target) + PADDW XMM0, XMM2 // add target (in shifted form) + PSRLW XMM0, 8 // divide by 256 + + // Bias is accounted for by conversion of range 0..255 to -128..127, + // doing a saturated add and convert back to 0..255. + PSUBW XMM0, XMM4 + PADDSW XMM0, XMM5 + PADDW XMM0, XMM4 + PACKUSWB XMM0, XMM0 // convert words to bytes with saturation + {$ifdef windows} + MOVD DWORD PTR [RDX], XMM0 // store the result + {$else} + MOVD DWORD PTR [RSI], XMM0 // store the result + {$endif} +@3: + {$ifdef windows} + ADD RCX, 4 + ADD RDX, 4 + DEC R8D + {$else} + ADD RDI, 4 + ADD RSI, 4 + DEC EDX + {$endif} + JNZ @1 + +{$else} + // EAX contains Source // EDX contains Destination // ECX contains Count // ConstantAlpha and Bias are on the stack -asm + PUSH ESI // save used registers PUSH EDI @@ -225,6 +509,7 @@ asm JNZ @1 POP EDI POP ESI +{$endif} end; //---------------------------------------------------------------------------------------------------------------------- @@ -234,13 +519,94 @@ procedure AlphaBlendLineMasterAndColor(Destination: Pointer; Count: Integer; Con // Blends a line of Count pixels in Destination against the given color using a constant alpha value. // The layout of a pixel must be BGRA and Color must be rrggbb00 (as stored by a COLORREF). // ConstantAlpha must be in the range 0..255. -// + +asm + +{$ifdef CPU64} +//windows +// RCX contains Destination +// EDX contains Count +// R8D contains ConstantAlpha +// R9D contains Color + +//non windows +// RDI contains Destination +// ESI contains Count +// EDX contains ConstantAlpha +// ECX contains Color + + //.NOFRAME + + // The used formula is: target = (alpha * color + (256 - alpha) * target) / 256. + // alpha * color (factor 1) and 256 - alpha (factor 2) are constant values which can be calculated in advance. + // The remaining calculation is therefore: target = (F1 + F2 * target) / 256 + + // Load XMM3 with the constant alpha value (replicate it for every component). + // Expand it to word size. (Every calculation here works on word sized operands.) + {$ifdef windows} + MOVD XMM3, R8D // ConstantAlpha + {$else} + MOVD XMM3, EDX // ConstantAlpha + {$endif} + PUNPCKLWD XMM3, XMM3 + PUNPCKLDQ XMM3, XMM3 + + // Calculate factor 2. + MOV R10D, $100 + MOVD XMM2, R10D + PUNPCKLWD XMM2, XMM2 + PUNPCKLDQ XMM2, XMM2 + PSUBW XMM2, XMM3 // XMM2 contains now: 255 - alpha = F2 + + // Now calculate factor 1. Alpha is still in XMM3, but the r and b components of Color must be swapped. + {$ifdef windows} + BSWAP R9D // Color + ROR R9D, 8 + MOVD XMM1, R9D // Load the color and convert to word sized values. + {$else} + BSWAP ECX // Color + ROR ECX, 8 + MOVD XMM1, ECX // Load the color and convert to word sized values. + {$endif} + PXOR XMM4, XMM4 + PUNPCKLBW XMM1, XMM4 + PMULLW XMM1, XMM3 // XMM1 contains now: color * alpha = F1 + +@1: // The pixel loop calculates an entire pixel in one run. + {$ifdef windows} + MOVD XMM0, DWORD PTR [RCX] + {$else} + MOVD XMM0, DWORD PTR [RDI] + {$endif} + PUNPCKLBW XMM0, XMM4 + + PMULLW XMM0, XMM2 // calculate F1 + F2 * target + PADDW XMM0, XMM1 + PSRLW XMM0, 8 // divide by 256 + + PACKUSWB XMM0, XMM0 // convert words to bytes with saturation + {$ifdef windows} + MOVD DWORD PTR [RCX], XMM0 // store the result + + ADD RCX, 4 + DEC EDX + {$else} + MOVD DWORD PTR [RDI], XMM0 // store the result + + ADD RDI, 4 + DEC ESI + {$endif} + JNZ @1 + + +{$else} + // EAX contains Destination // EDX contains Count // ECX contains ConstantAlpha // Color is passed on the stack -asm + // The used formula is: target = (alpha * color + (256 - alpha) * target) / 256. // alpha * color (factor 1) and 256 - alpha (factor 2) are constant values which can be calculated in advance. // The remaining calculation is therefore: target = (F1 + F2 * target) / 256 @@ -281,6 +647,7 @@ asm ADD EAX, 4 DEC EDX JNZ @1 +{$endif} end; //---------------------------------------------------------------------------------------------------------------------- @@ -289,40 +656,16 @@ procedure EMMS; // Reset MMX state to use the FPU for other tasks again. +{$ifdef CPU64} + inline; + begin + end; + +{$else} + asm DB $0F, $77 /// EMMS end; -{$else} -procedure AlphaBlendLineConstant(Source, Destination: Pointer; Count: Integer; ConstantAlpha, Bias: Integer); - -begin - // -end; - -//---------------------------------------------------------------------------------------------------------------------- - -procedure AlphaBlendLinePerPixel(Source, Destination: Pointer; Count, Bias: Integer); - -begin - // -end; - -//---------------------------------------------------------------------------------------------------------------------- - -procedure AlphaBlendLineMaster(Source, Destination: Pointer; Count: Integer; ConstantAlpha, Bias: Integer); -begin - // -end; - -procedure AlphaBlendLineMasterAndColor(Destination: Pointer; Count: Integer; ConstantAlpha, Color: Integer); -begin - // -end; - -procedure EMMS; -begin - // -end; {$endif} //---------------------------------------------------------------------------------------------------------------------- @@ -420,6 +763,11 @@ var begin if not IsRectEmpty(R) then begin + {$ifdef CPU64} + //avoid MasterAlpha due to incomplete AlphaBlendLineMaster. See comment in procedure + if Mode = bmMasterAlpha then + Mode := bmConstantAlpha; + {$endif} // Note: it is tempting to optimize the special cases for constant alpha 0 and 255 by just ignoring soure // (alpha = 0) or simply do a blit (alpha = 255). But this does not take the bias into account. case Mode of