Files
lazarus-ccr/components/tvplanit/source/vpxchrflt.pas

634 lines
18 KiB
ObjectPascal
Raw Normal View History

{*********************************************************}
{* VPXCHRFLT.PAS 1.03 *}
{*********************************************************}
{* ***** BEGIN LICENSE BLOCK ***** *}
{* Version: MPL 1.1 *}
{* *}
{* The contents of this file are subject to the Mozilla Public License *}
{* Version 1.1 (the "License"); you may not use this file except in *}
{* compliance with the License. You may obtain a copy of the License at *}
{* http://www.mozilla.org/MPL/ *}
{* *}
{* Software distributed under the License is distributed on an "AS IS" basis, *}
{* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License *}
{* for the specific language governing rights and limitations under the *}
{* License. *}
{* *}
{* The Original Code is TurboPower Visual PlanIt *}
{* *}
{* The Initial Developer of the Original Code is TurboPower Software *}
{* *}
{* Portions created by TurboPower Software Inc. are Copyright (C) 2002 *}
{* TurboPower Software Inc. All Rights Reserved. *}
{* *}
{* Contributor(s): *}
{* *}
{* ***** END LICENSE BLOCK ***** *}
{$I Vp.INC}
unit VpXChrFlt;
interface
uses
SysUtils, Classes, VpSR,
VpBase, VpXBase;
const
VpEndOfStream = #1;
VpEndOfReplaceText = #2;
VpNullChar = #3;
type
TVpStreamFormat = {character formats of stream...}
(sfUTF8, {..UTF8 -- the default}
sfUTF16LE, {..UTF16, little endian (eg, Intel)}
sfUTF16BE, {..UTF16, big endian}
sfISO88591); {..ISO-8859-1, or Latin 1}
TVpBaseCharFilter = class(TObject)
protected
FBufSize : Longint;
FBuffer : PAnsiChar;
FBufPos : Longint;
FFormat : TVpStreamFormat; {The format of the incoming stream}
FFreeStream : Boolean;
FStream : TStream;
FStreamPos : Longint;
FStreamSize : Longint;
protected
function csGetSize : Longint; virtual;
procedure csSetFormat(const aValue : TVpStreamFormat); virtual; abstract;
public
constructor Create(aStream : TStream; const aBufSize : Longint); virtual;
destructor Destroy; override;
property BufSize : Longint
read FBufSize;
property FreeStream : Boolean
read FFreeStream write FFreeStream;
property Stream : TStream
read FStream;
end;
TVpInCharFilter = class(TVpBaseCharFilter)
private
FBufEnd : Longint;
FUCS4Char : TVpUcs4Char;
FLine : Longint;
FLinePos : Longint;
FLastChar : DOMChar;
FEOF : Boolean;
FBufDMZ : Longint;
FInTryRead : Boolean;
protected
procedure csAdvanceLine;
procedure csAdvanceLinePos;
procedure csGetCharPrim(out aCh: TVpUcs4Char); //; var aIsLiteral: Boolean);
function csGetNextBuffer: Boolean;
function csGetTwoAnsiChars(var Buffer): Boolean;
function csGetUtf8Char: TVpUcs4Char;
procedure csIdentifyFormat;
procedure csPushCharPrim(aCh: TVpUcs4Char);
procedure csSetFormat(const aValue: TVpStreamFormat); override;
procedure csGetChar(out aCh: TVpUcs4Char); //; var aIsLiteral: Boolean);
public
constructor Create(aStream: TStream; const aBufSize: Longint); override;
property Format: TVpStreamFormat read FFormat write csSetFormat;
property EOF: Boolean read FEOF;
public
procedure SkipChar;
function TryRead(const S: array of Longint) : Boolean;
function ReadChar: DOMChar;
function ReadAndSkipChar: DOMChar;
property Line : LongInt read FLine;
property LinePos : LongInt read FLinePos;
end;
TVpOutCharFilter = class(TVpBaseCharFilter)
protected
FFormat: TVpStreamFormat;
FSetUTF8Sig: Boolean;
protected
function csGetSize: LongInt; override;
procedure csPutUtf8Char(const aCh: TVpUcs4Char);
procedure csSetFormat(const aValue: TVpStreamFormat); override;
procedure csWriteBuffer;
public
constructor Create(aStream: TStream; const aBufSize: Longint); override;
destructor Destroy; override;
procedure PutUCS4Char(aCh: TVpUcs4Char);
function PutChar(aCh1, aCh2: DOMChar; out aBothUsed: Boolean): Boolean;
function PutString(const aText: DOMString): Boolean;
function Position : integer;
property Format: TVpStreamFormat read FFormat write csSetFormat;
property WriteUTF8Signature: Boolean read FSetUTF8Sig write FSetUTF8Sig;
property Size: LongInt read csGetSize;
end;
implementation
const
CR = 13; {Carriage return}
LF = 10; {Line feed}
{====================================================================}
constructor TVpBaseCharFilter.Create(aStream : TStream;
const aBufSize : Longint);
begin
inherited Create;
Assert(Assigned(aStream));
FBufSize := aBufSize;
FBufPos := 0;
FFormat := sfUTF8;
FFreeStream := False;
FStream := aStream;
FStreamPos := aStream.Position;
FStreamSize := aStream.Size;
GetMem(FBuffer, FBufSize);
end;
{--------}
destructor TVpBaseCharFilter.Destroy;
begin
if Assigned(FBuffer) then begin
FreeMem(FBuffer, FBufSize);
FBuffer := nil;
end;
if FFreeStream then
FStream.Free;
inherited Destroy;
end;
{--------}
function TVpBaseCharFilter.csGetSize : LongInt;
begin
Result := FStreamSize;
end;
{====================================================================}
constructor TVpInCharFilter.Create(aStream : TStream;
const aBufSize : Longint);
begin
inherited Create(aStream, aBufSize);
if FStreamSize <= aBufSize then
FBufDMZ := 0
else
FBufDMZ := 64;
FBufEnd := 0;
FLine := 1;
FLinePos := 1;
csIdentifyFormat;
if aStream.Size > 0 then
FEOF := False
else
FEOF := True;
FUCS4Char := TVpUCS4Char(VpNullChar);
FInTryRead := False;
end;
{--------}
procedure TVpInCharFilter.csAdvanceLine;
begin
Inc(FLine);
FLinePos := 1;
end;
{--------}
procedure TVpInCharFilter.csAdvanceLinePos;
begin
Inc(FLinePos);
end;
{--------}
procedure TVpInCharFilter.csGetCharPrim(out aCh: TVpUcs4Char);
// var aIsLiteral: Boolean);
begin
{Note: as described in the XML spec (2.11) all end-of-lines are
passed as LF characters no matter what the original document
had. This routine converts a CR/LF pair to a single LF, a
single CR to an LF, and passes LFs as they are.}
{get the first (test) character}
{first check the UCS4Char buffer to see if we have a character there;
if so get it}
if (FUCS4Char <> TVpUCS4Char(VpNullChar)) then begin
aCh := FUCS4Char;
FUCS4Char := TVpUCS4Char(VpNullChar);
end
{otherwise get a character from the buffer; this depends on the
format of the stream of course}
else begin
case Format of
sfUTF8: aCh := csGetUtf8Char;
else
{it is next to impossible that this else clause is reached; if
it is we're in deep doggy doo-doo, so pretending that it's the
end of the stream is the least of our worries}
aCh := TVpUCS4Char(VpEndOfStream);
end;
end;
{if we got a CR, then we need to see what the next character is; if
it is an LF, return LF; otherwise put the second character back
and still return an LF}
if (aCh = CR) then begin
if (FUCS4Char <> TVpUCS4Char(VpNullChar)) then begin
aCh := FUCS4Char;
FUCS4Char := TVpUCS4Char(VpNullChar);
end
else begin
case Format of
sfUTF8: aCh := csGetUtf8Char;
else
aCh := TVpUCS4Char(VpEndOfStream);
end;
end;
if (aCh <> LF) then
csPushCharPrim(aCh);
aCh := LF;
end;
{check to see that the character is valid according to XML}
if (aCh <> TVpUCS4Char(VpEndOfStream)) and (not VpIsChar(aCh)) then
raise EVpFilterError.CreateError(FStream.Position, Line, LinePos, sInvalidXMLChar);
end;
{--------}
function TVpInCharFilter.csGetNextBuffer : Boolean;
begin
if FStream.Position > FBufDMZ then
{Account for necessary buffer overlap}
FStream.Position := FStream.Position - (Int64(FBufEnd) - FBufPos);
FBufEnd := FStream.Read(FBuffer^, FBufSize);
FStreamPos := FStream.Position;
FBufPos := 0;
Result := FBufEnd <> 0;
end;
{--------}
function TVpInCharFilter.csGetTwoAnsiChars(var Buffer) : Boolean;
type
TTwoChars = array [0..1] of AnsiChar;
var
i : integer;
begin
{get two byte characters from the stream}
for i := 0 to 1 do begin
{if the buffer is empty, fill it}
if (FBufPos >= FBufEnd - FBufDMZ) and
(not FInTryRead) then begin
{if we exhaust the stream, we couldn't satisfy the request}
if not csGetNextBuffer then begin
Result := false;
Exit;
end;
end;
{get the first byte character from the buffer}
TTwoChars(Buffer)[i] := FBuffer[FBufPos];
inc(FBufPos);
end;
Result := true;
end;
{--------}
function TVpInCharFilter.csGetUtf8Char : TVpUcs4Char;
var
Utf8Char : TVpUtf8Char;
{Ch : AnsiChar;}
Len : Integer;
i : Integer;
begin
{if the buffer is empty, fill it}
if (not FInTryRead) and
(FBufPos >= FBufEnd - FBufDMZ) then begin
{if we exhaust the stream, there are no more characters}
if not csGetNextBuffer then begin
Result := TVpUCS4Char(VpEndOfStream);
Exit;
end;
end;
{get the first byte character from the buffer}
Utf8Char[1] := FBuffer[FBufPos];
FBufPos := FBufPos + 1;
{determine the length of the Utf8 character from this}
Len := VpGetLengthUtf8(Utf8Char[1]);
if (Len < 1) then
raise EVpFilterError.CreateError (FStream.Position,
Line,
LinePos,
sBadUTF8Char);
Move(Len, Utf8Char[0], 1);
{get the remaining characters from the stream}
for i := 2 to Len do begin
{if the buffer is empty, fill it}
if (FBufPos >= FBufEnd - FBufDMZ) and
(not FInTryRead) then begin
{if we exhaust the stream now, it's a badly formed UTF8
character--true--but we'll just pretend that the last character
does not exist}
if not csGetNextBuffer then begin
Result := TVpUCS4Char(VpEndOfStream);
Exit;
end;
end;
{get the next byte character from the buffer}
Utf8Char[i] := FBuffer[FBufPos];
FBufPos := FBufPos + 1;
end;
{convert the UTF8 character into a UCS4 character}
if (not VpUtf8ToUcs4(Utf8Char, Len, Result)) then
raise EVpFilterError.CreateError (FStream.Position,
Line,
LinePos,
sBadUTF8Char);
end;
{--------}
procedure TVpInCharFilter.csIdentifyFormat;
begin
{Note: a stream in either of the UTF16 formats will start with a
byte-order-mark (BOM). This is the unicode value $FEFF. Hence
if the first two bytes of the stream are read as ($FE, $FF),
we have a UTF16BE stream. If they are read as ($FF, $FE), we
have a UTF16LE stream. Otherwise we assume a UTF8 stream (at
least for now, it can be changed later).}
csGetNextBuffer;
if FBufSize > 2 then
if (FBuffer[0] = #$FE) and (FBuffer[1] = #$FF) then begin
FFormat := sfUTF16BE;
FBufPos := 2;
end else
if (FBuffer[0] = #$FF) and (FBuffer[1] = #$FE) then begin
FFormat := sfUTF16LE;
FBufPos := 2;
end else
if (FBuffer[0] = #$EF) and (FBuffer[1] = #$BB) and (FBuffer[2] = #$BF) then
begin
FFormat := sfUTF8;
FBufPos := 3;
end else
FFormat := sfUTF8
else
FFormat := sfUTF8;
end;
{--------}
procedure TVpInCharFilter.csPushCharPrim(aCh: TVpUcs4Char);
begin
Assert(FUCS4Char = TVpUCS4Char(VpNullChar));
{put the char into the buffer}
FUCS4Char := aCh;
end;
{--------}
procedure TVpInCharFilter.csSetFormat(const aValue: TVpStreamFormat);
begin
{we do not allow the UTF16 formats to be changed since they were
well defined by the BOM at the start of the stream but all other
changes are allowed (caveat user); this means that an input stream
that defaulted to UTF8 can be changed at a later stage to
ISO-8859-1 or whatever if required}
if (Format <> sfUTF16LE) and (Format <> sfUTF16BE) then
FFormat := aValue;
end;
{--------}
procedure TVpInCharFilter.csGetChar(out aCh: TVpUcs4Char); //var aIsLiteral: Boolean);
begin
{get the next character; for an EOF raise an exception}
csGetCharPrim(aCh); //, aIsLiteral);
if (aCh = TVpUCS4Char(VpEndOfStream)) then
FEOF := True
else
{maintain the line/character counts}
if (aCh = LF) then
csAdvanceLine
else
csAdvanceLinePos;
end;
{--------}
function TVpInCharFilter.TryRead(const S: array of Longint): Boolean;
var
Idx : Longint;
Ch : TVpUcs4Char;
// IL : Boolean;
OldBufPos : Longint;
OldChar : DOMChar;
OldUCS4Char : TVpUcs4Char;
OldLinePos : Longint;
OldLine : Longint;
begin
OldBufPos := FBufPos;
OldChar := FLastChar;
OldUCS4Char := FUCS4Char;
OldLinePos := LinePos;
OldLine := Line;
Result := True;
FInTryRead := True;
try
for Idx := Low(s) to High(S) do begin
csGetChar(Ch); //, IL);
if Ch <> TVpUcs4Char(S[Idx]) then begin
Result := False;
Break;
end;
end;
finally
if not Result then begin
FBufPos := OldBufPos;
FLastChar := OldChar;
FUCS4Char := OldUCS4Char;
FLinePos := OldLinePos;
FLine := OldLine;
end else begin
FLastChar := #0;
FUCS4Char := TVpUCS4Char(VpNullChar);
if (FStreamSize = FStreamPos) and
(FBufPos = FBufEnd) then
FEOF := True;
end;
FInTryRead := False;
end;
end;
{--------}
procedure TVpInCharFilter.SkipChar;
begin
FLastChar := #0;
FUCS4Char := TVpUCS4Char(VpNullChar);
Inc(FLinePos);
end;
{--------}
function TVpInCharFilter.ReadandSkipChar : DOMChar;
var
Ch: TVpUCS4Char;
// IL: Boolean;
begin
if FLastChar = '' then begin
csGetChar(Ch); //, IL);
VpUcs4ToWideChar(Ch, Result);
end else begin
Result := FLastChar;
Inc(FLinePos);
end;
FLastChar := #0;
FUCS4Char := TVpUCS4Char(VpNullChar);
if (FStreamSize = FStreamPos) and (FBufPos = FBufEnd) then
FEOF := True;
end;
{--------}
function TVpInCharFilter.ReadChar: DOMChar;
var
Ch: TVpUCS4Char = 0; // to silence the compiler
// IL: Boolean; // dto.
begin
if (FLastChar = '') or (FLastChar = #0) then begin // wp: added #0
csGetChar(Ch); //, IL);
VpUcs4ToWideChar(Ch, Result);
Dec(FLinePos);
FLastChar := Result;
if (FUCS4Char <> TVpUCS4Char(VpNullChar)) then
if (Format = sfUTF16LE) or (Format = sfUTF16BE) then
Dec(FBufPos, 2)
else if FBufPos > 0 then
Dec(FBufPos, 1);
FUCS4Char := Ch;
end else
Result := FLastChar;
end;
{===TVpOutCharFilter=================================================}
constructor TVpOutCharFilter.Create(aStream : TStream; const aBufSize : Longint);
begin
inherited Create(aStream, aBufSize);
FSetUTF8Sig := True;
end;
{--------}
destructor TVpOutCharFilter.Destroy;
begin
if Assigned(FBuffer) then
if (FBufPos > 0) then
csWriteBuffer;
inherited Destroy;
end;
{--------}
function TVpOutCharFilter.csGetSize: LongInt;
begin
Result := FStream.Size + FBufPos;
end;
{--------}
procedure TVpOutCharFilter.csPutUtf8Char(const aCh: TVpUcs4Char);
var
UTF8: TVpUtf8Char;
i: integer;
begin
if not VpUcs4ToUtf8(aCh, UTF8) then
raise EVpStreamError.CreateError(FStream.Position, sUCS_U8ConverErr);
for i := 1 to length(UTF8) do begin
if (FBufPos = FBufSize) then
csWriteBuffer;
FBuffer[FBufPos] := UTF8[i];
inc(FBufPos);
end;
end;
{--------}
procedure TVpOutCharFilter.csSetFormat(const aValue: TVpStreamFormat);
var
TooLate: Boolean;
begin
case Format of
sfUTF8:
TooLate := (FSetUTF8Sig and (Position > 3)) or ((not FSetUTF8Sig) and (Position > 0));
sfUTF16LE:
TooLate := (Position > 2);
sfUTF16BE:
TooLate := (Position > 2);
sfISO88591:
TooLate := (Position > 0);
else
TooLate := true;
end;
if not TooLate then begin
FBufPos := 0;
FFormat := aValue;
case Format of
sfUTF8:
if FSetUTF8Sig then begin
FBuffer[0] := #$EF;
FBuffer[1] := #$BB;
FBuffer[2] := #$BF;
FBufPos := 3;
end;
sfUTF16LE :
begin
FBuffer[0] := #$FF;
FBuffer[1] := #$FE;
FBufPos := 2;
end;
sfUTF16BE :
begin
FBuffer[0] := #$FE;
FBuffer[1] := #$FF;
FBufPos := 2;
end;
else
FBufPos := 0;
end;
end;
end;
{--------}
procedure TVpOutCharFilter.csWriteBuffer;
begin
FStream.WriteBuffer(FBuffer^, FBufPos);
FBufPos := 0;
end;
{--------}
procedure TVpOutCharFilter.PutUCS4Char(aCh: TVpUcs4Char);
begin
case Format of
sfUTF8: csPutUTF8Char(aCh);
end;
end;
{--------}
function TVpOutCharFilter.PutChar(aCh1, aCh2: DOMChar;
out aBothUsed: Boolean): Boolean;
var
OutCh: TVpUCS4Char;
begin
Result := VpUTF16toUCS4(aCh1, aCh2, OutCh, aBothUsed);
if Result then
PutUCS4Char(OutCh);
end;
{--------}
function TVpOutCharFilter.PutString(const aText: DOMString): Boolean;
var
aBothUsed: Boolean;
aLen, aPos: Integer;
begin
aLen := Length(aText);
aPos := 1;
Result := True;
while Result and (aPos <= aLen) do begin
if aPos = aLen then
Result := PutChar(aText[aPos], aText[aPos], aBothUsed)
else
Result := PutChar(aText[aPos], aText[aPos + 1], aBothUsed);
if Result then
if aBothUsed then
inc(aPos, 2)
else
inc(aPos, 1);
end;
end;
{--------}
function TVpOutCharFilter.Position: Integer;
begin
Result := FStreamPos + FBufPos;
end;
end.