fpspreadsheet: Use '?' as common broken-utf8-replacement character.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@8578 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2022-10-21 14:20:06 +00:00
parent 506dbe52b7
commit 68ec591390
4 changed files with 17 additions and 15 deletions

View File

@ -168,7 +168,7 @@ const
STR_ERR_ARG_ERROR = '#N/A';
// No Excel errors
STR_ERR_FORMULA_NOT_SUPPORTED= '<FMLA?>';
STR_ERR_UNKNOWN = '#UNKNWON!';
STR_ERR_UNKNOWN = '#UNKNOWN!';
{@@ Maximum count of worksheet columns}
MAX_COL_COUNT = 65535;
@ -193,6 +193,10 @@ const
unique value simplifies many things... }
FPS_LINE_ENDING = #10;
{@@ Replacement character used in broken UTF8 strings. Alternate replacement
chars could be: #$E2#$8E#$95 = box, or #$EF#$BF#$BD = "unicode replacement character" }
FPS_REPLACEMENT_CHAR = '?';
var
CSVParams: TsCSVParams = (
SheetIndex: 0;

View File

@ -241,8 +241,8 @@ function GetFontAsString(AFont: TsFont): String;
function ISO8601StrToDateTime(s: String): TDateTime;
function UTF8CodePoints(AText: string; out sa: TStringArray; AReplacement: String = #$E2#$8E#$95): Boolean;
function ValidUTF8Text(var AText: String; AReplacement: String = #$E2#$8E#$95): Boolean;
function UTF8CodePoints(AText: string; out sa: TStringArray): Boolean;
function ValidUTF8Text(var AText: String): Boolean;
procedure AppendToStream(AStream: TStream; const AString: String); inline; overload;
procedure AppendToStream(AStream: TStream; const AString1, AString2: String); inline; overload;
@ -3222,8 +3222,7 @@ end;
returns them as an array of string (sa). The result of the function is
false if at least one codepoint is broken UTF8.
-------------------------------------------------------------------------------}
function UTF8CodePoints(AText: String; out sa: TStringArray;
AReplacement: string = #$E2#$8E#$95): Boolean;
function UTF8CodePoints(AText: String; out sa: TStringArray): Boolean;
var
n: Integer;
ch: String;
@ -3245,7 +3244,7 @@ begin
chLen := UTF8CodePointSize(P);
if (chLen = 1) and (P^ > #127) then
begin
ch := AReplacement;
ch := FPS_REPLACEMENT_CHAR;
Result := false;
end else
begin
@ -3259,7 +3258,7 @@ begin
SetLength(sa, n);
end;
function ValidUTF8Text(var AText: string; AReplacement: String = #$E2#$8E#$95): Boolean;
function ValidUTF8Text(var AText: string): Boolean;
var
i: Integer;
P: PChar;
@ -3271,7 +3270,7 @@ begin
if i >= 0 then
begin
Delete(AText, i+1, 1);
Insert(AReplacement, AText, i+1);
Insert(FPS_REPLACEMENT_CHAR, AText, i+1);
Result := false;
end;
until (i < 0);

View File

@ -47,7 +47,7 @@ function GetNodeValue(ANode: TDOMNode): String;
function LineEndingToBR(const AText: String): String;
function UTF8TextToXMLText(AText: string; ProcessLineEndings: Boolean = false): string;
function ValidXMLText(var AText: string; ReplaceSpecialChars: Boolean = true;
ProcessLineEndings: Boolean = false; InvalidUTF8Replacement: String = #$E2#$8E#$95): Boolean;
ProcessLineEndings: Boolean = false): Boolean;
function XMLQuote(AText: String): String;
procedure UnzipFile(AZipFileName, AZippedFile, ADestFolder: String);
@ -202,25 +202,24 @@ end;
codes (e.g. '>' --> '&gt;')
@param ProcessLineEndings If TRUE line ending characters are replaced by
their HTML entities.
@param InvalidUTF8Replacement UTF8-character inserted for a malformed UTF8 codepoint.
@return FALSE if characters were replaced, TRUE otherwise.
-------------------------------------------------------------------------------}
function ValidXMLText(var AText: string;
ReplaceSpecialChars: Boolean = true;
ProcessLineEndings: Boolean = false;
InvalidUTF8Replacement: String = #$E2#$8E#$95): Boolean;
ProcessLineEndings: Boolean = false): Boolean;
// Alternate replacement chars: #$E2#$8E#$95 = box, or #$EF#$BF#$BD = "replacement character"
var
i: Integer;
begin
// Replace broken UTF8 codepoints
Result := ValidUTF8Text(AText, InvalidUTF8Replacement);
Result := ValidUTF8Text(AText);
// Replace ASCII characters which are not allowed in XML.
for i := Length(AText) downto 1 do
if (AText[i] < #32) and not (AText[i] in [#9, #10, #13]) then begin
// Replace invalid character by box symbol
Delete(AText, i, 1);
Insert(InvalidUTF8Replacement, AText, i);
Insert(FPS_REPLACEMENT_CHAR, AText, i);
Result := false;
end;

View File

@ -1876,7 +1876,7 @@ begin
exit;
lValue := FixLineEnding(AValue); // This does not change indices for rtf.
if not ValidUTF8Text(lValue, '?') then
if not ValidUTF8Text(lValue) then
Workbook.AddErrorMsg(
rsInvalidCharacterInCell, [
GetCellString(ARow, ACol)