fpspreadsheet: Use '?' as common broken-utf8-replacement character.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@8578 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2022-10-21 14:20:06 +00:00
parent 506dbe52b7
commit 68ec591390
4 changed files with 17 additions and 15 deletions

View File

@@ -168,7 +168,7 @@ const
STR_ERR_ARG_ERROR = '#N/A'; STR_ERR_ARG_ERROR = '#N/A';
// No Excel errors // No Excel errors
STR_ERR_FORMULA_NOT_SUPPORTED= '<FMLA?>'; STR_ERR_FORMULA_NOT_SUPPORTED= '<FMLA?>';
STR_ERR_UNKNOWN = '#UNKNWON!'; STR_ERR_UNKNOWN = '#UNKNOWN!';
{@@ Maximum count of worksheet columns} {@@ Maximum count of worksheet columns}
MAX_COL_COUNT = 65535; MAX_COL_COUNT = 65535;
@@ -193,6 +193,10 @@ const
unique value simplifies many things... } unique value simplifies many things... }
FPS_LINE_ENDING = #10; FPS_LINE_ENDING = #10;
{@@ Replacement character used in broken UTF8 strings. Alternate replacement
chars could be: #$E2#$8E#$95 = box, or #$EF#$BF#$BD = "unicode replacement character" }
FPS_REPLACEMENT_CHAR = '?';
var var
CSVParams: TsCSVParams = ( CSVParams: TsCSVParams = (
SheetIndex: 0; SheetIndex: 0;

View File

@@ -241,8 +241,8 @@ function GetFontAsString(AFont: TsFont): String;
function ISO8601StrToDateTime(s: String): TDateTime; function ISO8601StrToDateTime(s: String): TDateTime;
function UTF8CodePoints(AText: string; out sa: TStringArray; AReplacement: String = #$E2#$8E#$95): Boolean; function UTF8CodePoints(AText: string; out sa: TStringArray): Boolean;
function ValidUTF8Text(var AText: String; AReplacement: String = #$E2#$8E#$95): Boolean; function ValidUTF8Text(var AText: String): Boolean;
procedure AppendToStream(AStream: TStream; const AString: String); inline; overload; procedure AppendToStream(AStream: TStream; const AString: String); inline; overload;
procedure AppendToStream(AStream: TStream; const AString1, AString2: String); inline; overload; procedure AppendToStream(AStream: TStream; const AString1, AString2: String); inline; overload;
@@ -3222,8 +3222,7 @@ end;
returns them as an array of string (sa). The result of the function is returns them as an array of string (sa). The result of the function is
false if at least one codepoint is broken UTF8. false if at least one codepoint is broken UTF8.
-------------------------------------------------------------------------------} -------------------------------------------------------------------------------}
function UTF8CodePoints(AText: String; out sa: TStringArray; function UTF8CodePoints(AText: String; out sa: TStringArray): Boolean;
AReplacement: string = #$E2#$8E#$95): Boolean;
var var
n: Integer; n: Integer;
ch: String; ch: String;
@@ -3245,7 +3244,7 @@ begin
chLen := UTF8CodePointSize(P); chLen := UTF8CodePointSize(P);
if (chLen = 1) and (P^ > #127) then if (chLen = 1) and (P^ > #127) then
begin begin
ch := AReplacement; ch := FPS_REPLACEMENT_CHAR;
Result := false; Result := false;
end else end else
begin begin
@@ -3259,7 +3258,7 @@ begin
SetLength(sa, n); SetLength(sa, n);
end; end;
function ValidUTF8Text(var AText: string; AReplacement: String = #$E2#$8E#$95): Boolean; function ValidUTF8Text(var AText: string): Boolean;
var var
i: Integer; i: Integer;
P: PChar; P: PChar;
@@ -3271,7 +3270,7 @@ begin
if i >= 0 then if i >= 0 then
begin begin
Delete(AText, i+1, 1); Delete(AText, i+1, 1);
Insert(AReplacement, AText, i+1); Insert(FPS_REPLACEMENT_CHAR, AText, i+1);
Result := false; Result := false;
end; end;
until (i < 0); until (i < 0);

View File

@@ -47,7 +47,7 @@ function GetNodeValue(ANode: TDOMNode): String;
function LineEndingToBR(const AText: String): String; function LineEndingToBR(const AText: String): String;
function UTF8TextToXMLText(AText: string; ProcessLineEndings: Boolean = false): string; function UTF8TextToXMLText(AText: string; ProcessLineEndings: Boolean = false): string;
function ValidXMLText(var AText: string; ReplaceSpecialChars: Boolean = true; function ValidXMLText(var AText: string; ReplaceSpecialChars: Boolean = true;
ProcessLineEndings: Boolean = false; InvalidUTF8Replacement: String = #$E2#$8E#$95): Boolean; ProcessLineEndings: Boolean = false): Boolean;
function XMLQuote(AText: String): String; function XMLQuote(AText: String): String;
procedure UnzipFile(AZipFileName, AZippedFile, ADestFolder: String); procedure UnzipFile(AZipFileName, AZippedFile, ADestFolder: String);
@@ -202,25 +202,24 @@ end;
codes (e.g. '>' --> '&gt;') codes (e.g. '>' --> '&gt;')
@param ProcessLineEndings If TRUE line ending characters are replaced by @param ProcessLineEndings If TRUE line ending characters are replaced by
their HTML entities. their HTML entities.
@param InvalidUTF8Replacement UTF8-character inserted for a malformed UTF8 codepoint.
@return FALSE if characters were replaced, TRUE otherwise. @return FALSE if characters were replaced, TRUE otherwise.
-------------------------------------------------------------------------------} -------------------------------------------------------------------------------}
function ValidXMLText(var AText: string; function ValidXMLText(var AText: string;
ReplaceSpecialChars: Boolean = true; ReplaceSpecialChars: Boolean = true;
ProcessLineEndings: Boolean = false; ProcessLineEndings: Boolean = false): Boolean;
InvalidUTF8Replacement: String = #$E2#$8E#$95): Boolean; // Alternate replacement chars: #$E2#$8E#$95 = box, or #$EF#$BF#$BD = "replacement character"
var var
i: Integer; i: Integer;
begin begin
// Replace broken UTF8 codepoints // Replace broken UTF8 codepoints
Result := ValidUTF8Text(AText, InvalidUTF8Replacement); Result := ValidUTF8Text(AText);
// Replace ASCII characters which are not allowed in XML. // Replace ASCII characters which are not allowed in XML.
for i := Length(AText) downto 1 do for i := Length(AText) downto 1 do
if (AText[i] < #32) and not (AText[i] in [#9, #10, #13]) then begin if (AText[i] < #32) and not (AText[i] in [#9, #10, #13]) then begin
// Replace invalid character by box symbol // Replace invalid character by box symbol
Delete(AText, i, 1); Delete(AText, i, 1);
Insert(InvalidUTF8Replacement, AText, i); Insert(FPS_REPLACEMENT_CHAR, AText, i);
Result := false; Result := false;
end; end;

View File

@@ -1876,7 +1876,7 @@ begin
exit; exit;
lValue := FixLineEnding(AValue); // This does not change indices for rtf. lValue := FixLineEnding(AValue); // This does not change indices for rtf.
if not ValidUTF8Text(lValue, '?') then if not ValidUTF8Text(lValue) then
Workbook.AddErrorMsg( Workbook.AddErrorMsg(
rsInvalidCharacterInCell, [ rsInvalidCharacterInCell, [
GetCellString(ARow, ACol) GetCellString(ARow, ACol)