git-svn-id: https://svn.code.sf.net/p/kolmck/code@54 91bb2d04-0c0c-4d2d-88a5-bbb6f4c1fa07
This commit is contained in:
509
WideStringEdit.pas
Normal file
509
WideStringEdit.pas
Normal file
@ -0,0 +1,509 @@
|
||||
unit WideStringEdit;
|
||||
|
||||
interface
|
||||
|
||||
function WStrLComp(const Str1, Str2: PWideChar; MaxLen: Cardinal): Integer;
|
||||
{* Compare two strings (fast). Terminating 0 is not considered, so if
|
||||
strings are equal, comparing is continued up to MaxLen bytes.
|
||||
Since this, pass minimum of lengths as MaxLen. }
|
||||
function WS2Int( S: PWideChar ): Integer;
|
||||
{* Converts null-terminated string to Integer. Scanning stopped when any
|
||||
non-digit character found. Even empty string or string not containing
|
||||
valid integer number silently converted to 0. }
|
||||
function UTF8ToUCS2(Dest: PWideChar; MaxDestBytes: Cardinal;
|
||||
Source: PChar; SourceChars: Cardinal): Cardinal;
|
||||
(* Decode string from UTF8 to UCS2 *)
|
||||
function UCS2ToUTF8(Dest: PChar; MaxDestBytes: Cardinal;
|
||||
Source: PWideChar; SourceChars: Cardinal): Cardinal;
|
||||
(* Decode string from UCS2 to UTF8 *)
|
||||
|
||||
implementation
|
||||
|
||||
function WStrLComp(const Str1, Str2: PWideChar; MaxLen: Cardinal): Integer; assembler;
|
||||
asm
|
||||
OR ECX,ECX
|
||||
JE @@1
|
||||
PUSH EDI
|
||||
PUSH ESI
|
||||
PUSH EBX
|
||||
MOV EDI,EDX
|
||||
MOV ESI,EAX
|
||||
MOV EBX,ECX
|
||||
XOR EAX,EAX
|
||||
REPNE SCASW
|
||||
SUB EBX,ECX
|
||||
MOV ECX,EBX
|
||||
MOV EDI,EDX
|
||||
XOR EDX,EDX
|
||||
REPE CMPSW
|
||||
MOV AX,[ESI-2]
|
||||
MOV DX,[EDI-2]
|
||||
SUB EAX,EDX
|
||||
POP EBX
|
||||
POP ESI
|
||||
POP EDI
|
||||
@@1:
|
||||
end;
|
||||
|
||||
|
||||
|
||||
function WS2Int( S: PWideChar ): Integer;
|
||||
//EAX: S
|
||||
//Result: Integer -> EAX
|
||||
asm
|
||||
XCHG EDX, EAX
|
||||
XOR EAX, EAX
|
||||
TEST EDX, EDX
|
||||
JZ @@exit
|
||||
|
||||
XOR ECX, ECX
|
||||
MOV CX, [EDX]
|
||||
ADD EDX, 2
|
||||
CMP CX, '-'
|
||||
PUSHFD
|
||||
JE @@0
|
||||
@@1: CMP CX, '+'
|
||||
JNE @@2
|
||||
@@0: MOV CX, [EDX]
|
||||
ADD EDX, 2
|
||||
@@2: SUB CX, '0'
|
||||
CMP CX, '9'-'0'
|
||||
JA @@fin
|
||||
LEA EAX, [EAX+EAX*4] //
|
||||
LEA EAX, [ECX+EAX*2] //
|
||||
JMP @@0
|
||||
@@fin: POPFD
|
||||
JNE @@exit
|
||||
NEG EAX
|
||||
@@exit:
|
||||
end;
|
||||
|
||||
function UTF8ToUCS2(Dest: PWideChar; MaxDestBytes: Cardinal;
|
||||
Source: PChar; SourceChars: Cardinal): Cardinal;
|
||||
//EAX: @Dest
|
||||
//EDX: MaxDestBytes
|
||||
//ECX: @Source
|
||||
//(ESP): SourceChars;
|
||||
//Result: DestChars of @Dest -> EAX
|
||||
asm
|
||||
//backup
|
||||
PUSHF
|
||||
CLD //set (ESI)+
|
||||
PUSH EBX
|
||||
PUSH ESI
|
||||
PUSH EDI
|
||||
|
||||
PUSH Dest //backup @Dst
|
||||
MOV EDI, Dest
|
||||
TEST Source, Source //test NULL string
|
||||
JZ @Exit
|
||||
MOV ESI, Source
|
||||
MOV ECX, SourceChars
|
||||
|
||||
@NextChar:
|
||||
//test length of Dst
|
||||
SUB EDX, 2
|
||||
JLE @Exit
|
||||
//get next char to EAX
|
||||
XOR EAX, EAX
|
||||
LODSB //MOV AL, [ESI]+
|
||||
//test NULL char (end of string)
|
||||
TEST AL, AL
|
||||
JZ @Exit
|
||||
//decode UTF8 to UCS2
|
||||
@Utf8ToUcs2:
|
||||
//test first byte UTF8 = 0xxxxxxx
|
||||
TEST AL, $80
|
||||
JNZ @1xxxxxxx
|
||||
//UTF8: 0xxxxxxx (AH = 0)
|
||||
@SaveU16:
|
||||
STOSW //MOVW [EDI]+, EAX
|
||||
@Loop:
|
||||
LOOP @NextChar
|
||||
JMP @Exit
|
||||
|
||||
@1xxxxxxx:
|
||||
//test first byte UTF8 = 10xxxxxx
|
||||
TEST AL, $40 //01000000
|
||||
JZ @Exit //Error UTF8: 10xxxxxx
|
||||
//test first byte UTF8 = 1111xxxx
|
||||
CMP AL, $F0 //11110000
|
||||
JAE @Exit //Error UTF8 to UCS2: 1111xxxx ( if AL >= $F0)
|
||||
//test exist second byte UTF8
|
||||
JECXZ @Exit // DEC ECX; if ECX = 0
|
||||
//backup first byte UTF8
|
||||
MOV AH, AL //11xxxxxx
|
||||
//load second byte UTF8
|
||||
LODSB //MOV AL, [ESI]+
|
||||
//test second byte UTF8 = 10xxxxxx
|
||||
TEST AL, $40 //01000000
|
||||
JNE @Exit //Error UTF8: 10xxxxxx
|
||||
//test second byte UTF8 = 110xxxxx
|
||||
TEST AH, $20 //00100000
|
||||
JNZ @1110xxxx //third byte UTF8
|
||||
//UTF8: 110xxxxx 10xxxxxx
|
||||
//backup first byte UTF8
|
||||
MOV BL, AH //110xxxxx
|
||||
//get high byte UCS2
|
||||
SHR AH, 2 //00110xxx
|
||||
AND AX, $073F //AH: 00000xxx; AL: 00xxxxxx
|
||||
//get low byte USC2
|
||||
SHL BL, 6 //xx000000
|
||||
OR AL, BL //xxxxxxxx
|
||||
//AX: 00000xxx:xxxxxxxx
|
||||
JMP @SaveU16
|
||||
|
||||
@1110xxxx:
|
||||
//test exist third byte UTF8
|
||||
JeCXZ @Exit // DEC ECX; if ECX = 0
|
||||
//backup second byte UTF8
|
||||
MOV BL, AL //10xxxxxx
|
||||
//load third byte UTF8
|
||||
LODSB //MOV AL, [ESI]+
|
||||
//test third byte UTF8 = 10xxxxxx
|
||||
CMP AL, $C0 //11000000
|
||||
JAE @Exit //Error UTF8: 11xxxxxx ( if AL >= $C0)
|
||||
//UTF8: 1110xxxx 10xxxxxx 10xxxxxx
|
||||
//get bytes UCS2 ��: xx00000:0000xxxx
|
||||
AND BX, $003F //DX := 00000000:00xxxxxx
|
||||
ROR BX, 2 //BL := 0000xxxx; BH := xx000000
|
||||
//get low byte UTF8
|
||||
AND AL, $3F //00xxxxxx
|
||||
OR AL, BH //xxxxxxxx
|
||||
//get high byte UCS2
|
||||
SHL AH, 4 //xxxx0000
|
||||
OR AH, BL //xxxxxxxx
|
||||
JMP @SaveU16
|
||||
|
||||
@Exit:
|
||||
XOR EAX, EAX
|
||||
MOV [EDI],AX //set end-char of Dst
|
||||
POP EAX //restore @Dst
|
||||
XCHG EAX, EDI
|
||||
//get length of Dst to Result
|
||||
SUB EAX, EDI
|
||||
SHR EAX, 1
|
||||
//restore
|
||||
POP EDI
|
||||
POP ESI
|
||||
POP EBX
|
||||
POPF
|
||||
end;
|
||||
|
||||
function UCS2ToUTF8(Dest: PChar; MaxDestBytes: Cardinal;
|
||||
Source: PWideChar; SourceChars: Cardinal): Cardinal;
|
||||
//EAX: @Dest
|
||||
//EDX: MaxDestBytes
|
||||
//ECX: @Source
|
||||
//(ESP): SourceChars;
|
||||
//Result: DestChars of @Dest -> EAX
|
||||
asm
|
||||
//backup
|
||||
PUSHF
|
||||
CLD //set (ESI)+
|
||||
PUSH EBX
|
||||
PUSH ESI
|
||||
PUSH EDI
|
||||
|
||||
PUSH Dest //backup @Dst
|
||||
MOV EDI, Dest
|
||||
TEST Source, Source //test NULL string
|
||||
JZ @Exit
|
||||
MOV ESI, Source
|
||||
MOV ECX, SourceChars
|
||||
|
||||
@NextChar:
|
||||
//test length of Dst
|
||||
DEC EDX
|
||||
JLE @Exit
|
||||
//get next char to EAX
|
||||
XOR EAX, EAX
|
||||
LODSW //MOV AX, [ESI]+
|
||||
//test NULL char (end of string)
|
||||
TEST EAX, EAX
|
||||
JZ @Exit
|
||||
//decode UCS2 to UTF8
|
||||
@Ucs2ToUtf8:
|
||||
//test UCS2-char in $0000..$007F
|
||||
CMP AX, $007F
|
||||
JA @11xxxxxx //if AX > $7F
|
||||
//UTF8-char: 0xxxxxxx
|
||||
//AH = 00000000; AL = 0xxxxxxx
|
||||
@0xxxxxxx:
|
||||
//save UTF8-char
|
||||
STOSB //MOVB [EDI]+, AL
|
||||
//end Loop
|
||||
@Loop:
|
||||
LOOP @NextChar
|
||||
JMP @Exit
|
||||
|
||||
@11xxxxxx:
|
||||
//test length of Dst
|
||||
DEC EDX
|
||||
JLE @Exit
|
||||
//test UCS2-char in $0080..$07FF
|
||||
CMP AX, $07FF
|
||||
JA @1110xxxx //if AX > $07FF
|
||||
//UTF8-char: 110xxxxx 10xxxxxx
|
||||
//AH = 00000xxx; AL = xxxxxxxx
|
||||
//get first byte UTF8-char to AL
|
||||
ROR AX, 6 //AH = xxxxxx00; AL = 000xxxxx
|
||||
//get second byte UTF8-char to AH
|
||||
SHR AH, 2 //AH = 00xxxxxx
|
||||
OR AX, $80C0 //AH = 10xxxxxx; AL = 110xxxxx
|
||||
//save UTF8-char
|
||||
STOSW //MOVW [EDI]+, AX
|
||||
JMP @Loop
|
||||
|
||||
//UTF8-char: 1110xxxx 10xxxxxx 10xxxxxx
|
||||
@1110xxxx:
|
||||
//test length of Dst
|
||||
DEC EDX
|
||||
JLE @Exit
|
||||
//save lobyte of UCS2-char
|
||||
MOV BL, AL
|
||||
//AH = xxxxxxxx; AL = xxxxxxxx
|
||||
//get first byte UTF8-char UTF8 to AL
|
||||
ROL AX, 4 //AL = ????xxxx; AH = xxxxxx??
|
||||
AND AL, $0F //AL = 0000xxxx
|
||||
//get second byte UTF8-char to AH
|
||||
SHR AH, 2 //AH = 00xxxxxx
|
||||
OR AX, $80E0 //AH = 10xxxxxx; AL = 1110xxxx
|
||||
//save first bytes UTF8-char
|
||||
STOSW //MOVW [EDI]+, AX
|
||||
//get second byte UTF8-char to AL
|
||||
XCHG EAX, EBX //??xxxxxx
|
||||
AND AL, $3F //00xxxxxx
|
||||
OR AL, $80 //10xxxxxx
|
||||
//save third byte UTF8-char
|
||||
JMP @0xxxxxxx
|
||||
|
||||
@Exit:
|
||||
MOV BYTE PTR [EDI], $00 //set end-char of Dst
|
||||
POP EAX //restore @Dst
|
||||
XCHG EAX, EDI
|
||||
//get length of Dst to Result
|
||||
SUB EAX, EDI
|
||||
//restore
|
||||
POP EDI
|
||||
POP ESI
|
||||
POP EBX
|
||||
POPF
|
||||
end;
|
||||
|
||||
END//Decode string from UTF8 to UCS2
|
||||
function UTF8ToUCS2(Dest: PWideChar; MaxDestBytes: Cardinal;
|
||||
Source: PChar; SourceChars: Cardinal): Cardinal;
|
||||
//EAX: @Dest
|
||||
//EDX: MaxDestBytes
|
||||
//ECX: @Source
|
||||
//(ESP): SourceChars;
|
||||
//Result: DestChars of @Dest -> EAX
|
||||
asm
|
||||
//backup
|
||||
PUSHF
|
||||
CLD //set (ESI)+
|
||||
PUSH EBX
|
||||
PUSH ESI
|
||||
PUSH EDI
|
||||
|
||||
PUSH Dest //backup @Dst
|
||||
MOV EDI, Dest
|
||||
TEST Source, Source //test NULL string
|
||||
JZ @Exit
|
||||
MOV ESI, Source
|
||||
MOV ECX, SourceChars
|
||||
|
||||
@NextChar:
|
||||
//test length of Dst
|
||||
SUB EDX, 2
|
||||
JLE @Exit
|
||||
//get next char to EAX
|
||||
XOR EAX, EAX
|
||||
LODSB //MOV AL, [ESI]+
|
||||
//test NULL char (end of string)
|
||||
TEST AL, AL
|
||||
JZ @Exit
|
||||
//decode UTF8 to UCS2
|
||||
@Utf8ToUcs2:
|
||||
//test first byte UTF8 = 0xxxxxxx
|
||||
TEST AL, $80
|
||||
JNZ @1xxxxxxx
|
||||
//UTF8: 0xxxxxxx (AH = 0)
|
||||
@SaveU16:
|
||||
STOSW //MOVW [EDI]+, EAX
|
||||
@Loop:
|
||||
LOOP @NextChar
|
||||
JMP @Exit
|
||||
|
||||
@1xxxxxxx:
|
||||
//test first byte UTF8 = 10xxxxxx
|
||||
TEST AL, $40 //01000000
|
||||
JZ @Exit //Error UTF8: 10xxxxxx
|
||||
//test first byte UTF8 = 1111xxxx
|
||||
CMP AL, $F0 //11110000
|
||||
JAE @Exit //Error UTF8 to UCS2: 1111xxxx ( if AL >= $F0)
|
||||
//test exist second byte UTF8
|
||||
JECXZ @Exit // DEC ECX; if ECX = 0
|
||||
//backup first byte UTF8
|
||||
MOV AH, AL //11xxxxxx
|
||||
//load second byte UTF8
|
||||
LODSB //MOV AL, [ESI]+
|
||||
//test second byte UTF8 = 10xxxxxx
|
||||
TEST AL, $40 //01000000
|
||||
JNE @Exit //Error UTF8: 10xxxxxx
|
||||
//test second byte UTF8 = 110xxxxx
|
||||
TEST AH, $20 //00100000
|
||||
JNZ @1110xxxx //third byte UTF8
|
||||
//UTF8: 110xxxxx 10xxxxxx
|
||||
//backup first byte UTF8
|
||||
MOV BL, AH //110xxxxx
|
||||
//get high byte UCS2
|
||||
SHR AH, 2 //00110xxx
|
||||
AND AX, $073F //AH: 00000xxx; AL: 00xxxxxx
|
||||
//get low byte USC2
|
||||
SHL BL, 6 //xx000000
|
||||
OR AL, BL //xxxxxxxx
|
||||
//AX: 00000xxx:xxxxxxxx
|
||||
JMP @SaveU16
|
||||
|
||||
@1110xxxx:
|
||||
//test exist third byte UTF8
|
||||
JeCXZ @Exit // DEC ECX; if ECX = 0
|
||||
//backup second byte UTF8
|
||||
MOV BL, AL //10xxxxxx
|
||||
//load third byte UTF8
|
||||
LODSB //MOV AL, [ESI]+
|
||||
//test third byte UTF8 = 10xxxxxx
|
||||
CMP AL, $C0 //11000000
|
||||
JAE @Exit //Error UTF8: 11xxxxxx ( if AL >= $C0)
|
||||
//UTF8: 1110xxxx 10xxxxxx 10xxxxxx
|
||||
//get bytes UCS2 ��: xx00000:0000xxxx
|
||||
AND BX, $003F //DX := 00000000:00xxxxxx
|
||||
ROR BX, 2 //BL := 0000xxxx; BH := xx000000
|
||||
//get low byte UTF8
|
||||
AND AL, $3F //00xxxxxx
|
||||
OR AL, BH //xxxxxxxx
|
||||
//get high byte UCS2
|
||||
SHL AH, 4 //xxxx0000
|
||||
OR AH, BL //xxxxxxxx
|
||||
JMP @SaveU16
|
||||
|
||||
@Exit:
|
||||
XOR EAX, EAX
|
||||
MOV [EDI],AX //set end-char of Dst
|
||||
POP EAX //restore @Dst
|
||||
XCHG EAX, EDI
|
||||
//get length of Dst to Result
|
||||
SUB EAX, EDI
|
||||
SHR EAX, 1
|
||||
//restore
|
||||
POP EDI
|
||||
POP ESI
|
||||
POP EBX
|
||||
POPF
|
||||
end ; //asm
|
||||
|
||||
//Decode string from UCS2 to UTF8
|
||||
function UCS2ToUTF8(Dest: PChar; MaxDestBytes: Cardinal;
|
||||
Source: PWideChar; SourceChars: Cardinal): Cardinal;
|
||||
//EAX: @Dest
|
||||
//EDX: MaxDestBytes
|
||||
//ECX: @Source
|
||||
//(ESP): SourceChars;
|
||||
//Result: DestChars of @Dest -> EAX
|
||||
asm
|
||||
//backup
|
||||
PUSHF
|
||||
CLD //set (ESI)+
|
||||
PUSH EBX
|
||||
PUSH ESI
|
||||
PUSH EDI
|
||||
|
||||
PUSH Dest //backup @Dst
|
||||
MOV EDI, Dest
|
||||
TEST Source, Source //test NULL string
|
||||
JZ @Exit
|
||||
MOV ESI, Source
|
||||
MOV ECX, SourceChars
|
||||
|
||||
@NextChar:
|
||||
//test length of Dst
|
||||
DEC EDX
|
||||
JLE @Exit
|
||||
//get next char to EAX
|
||||
XOR EAX, EAX
|
||||
LODSW //MOV AX, [ESI]+
|
||||
//test NULL char (end of string)
|
||||
TEST EAX, EAX
|
||||
JZ @Exit
|
||||
//decode UCS2 to UTF8
|
||||
@Ucs2ToUtf8:
|
||||
//test UCS2-char in $0000..$007F
|
||||
CMP AX, $007F
|
||||
JA @11xxxxxx //if AX > $7F
|
||||
//UTF8-char: 0xxxxxxx
|
||||
//AH = 00000000; AL = 0xxxxxxx
|
||||
@0xxxxxxx:
|
||||
//save UTF8-char
|
||||
STOSB //MOVB [EDI]+, AL
|
||||
//end Loop
|
||||
@Loop:
|
||||
LOOP @NextChar
|
||||
JMP @Exit
|
||||
|
||||
@11xxxxxx:
|
||||
//test length of Dst
|
||||
DEC EDX
|
||||
JLE @Exit
|
||||
//test UCS2-char in $0080..$07FF
|
||||
CMP AX, $07FF
|
||||
JA @1110xxxx //if AX > $07FF
|
||||
//UTF8-char: 110xxxxx 10xxxxxx
|
||||
//AH = 00000xxx; AL = xxxxxxxx
|
||||
//get first byte UTF8-char to AL
|
||||
ROR AX, 6 //AH = xxxxxx00; AL = 000xxxxx
|
||||
//get second byte UTF8-char to AH
|
||||
SHR AH, 2 //AH = 00xxxxxx
|
||||
OR AX, $80C0 //AH = 10xxxxxx; AL = 110xxxxx
|
||||
//save UTF8-char
|
||||
STOSW //MOVW [EDI]+, AX
|
||||
JMP @Loop
|
||||
|
||||
//UTF8-char: 1110xxxx 10xxxxxx 10xxxxxx
|
||||
@1110xxxx:
|
||||
//test length of Dst
|
||||
DEC EDX
|
||||
JLE @Exit
|
||||
//save lobyte of UCS2-char
|
||||
MOV BL, AL
|
||||
//AH = xxxxxxxx; AL = xxxxxxxx
|
||||
//get first byte UTF8-char UTF8 to AL
|
||||
ROL AX, 4 //AL = ????xxxx; AH = xxxxxx??
|
||||
AND AL, $0F //AL = 0000xxxx
|
||||
//get second byte UTF8-char to AH
|
||||
SHR AH, 2 //AH = 00xxxxxx
|
||||
OR AX, $80E0 //AH = 10xxxxxx; AL = 1110xxxx
|
||||
//save first bytes UTF8-char
|
||||
STOSW //MOVW [EDI]+, AX
|
||||
//get second byte UTF8-char to AL
|
||||
XCHG EAX, EBX //??xxxxxx
|
||||
AND AL, $3F //00xxxxxx
|
||||
OR AL, $80 //10xxxxxx
|
||||
//save third byte UTF8-char
|
||||
JMP @0xxxxxxx
|
||||
|
||||
@Exit:
|
||||
MOV BYTE PTR [EDI], $00 //set end-char of Dst
|
||||
POP EAX //restore @Dst
|
||||
XCHG EAX, EDI
|
||||
//get length of Dst to Result
|
||||
SUB EAX, EDI
|
||||
//restore
|
||||
POP EDI
|
||||
POP ESI
|
||||
POP EBX
|
||||
POPF
|
||||
end;
|
||||
|
||||
end.
|
Reference in New Issue
Block a user