fpspreadsheet: Use '?' as common broken-utf8-replacement character.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@8578 8e941d3f-bd1b-0410-a28a-d453659cc2b4
2022-10-21 14:20:06 +00:00
parent 506dbe52b7
commit 68ec591390
4 changed files with 17 additions and 15 deletions
--- a/components/fpspreadsheet/source/common/fpstypes.pas
+++ b/components/fpspreadsheet/source/common/fpstypes.pas
@@ -168,7 +168,7 @@ const
  STR_ERR_ARG_ERROR = '#N/A';
  // No Excel errors
  STR_ERR_FORMULA_NOT_SUPPORTED= '<FMLA?>';
-  STR_ERR_UNKNOWN = '#UNKNWON!';
+  STR_ERR_UNKNOWN = '#UNKNOWN!';
  {@@ Maximum count of worksheet columns}
  MAX_COL_COUNT = 65535;
@@ -193,6 +193,10 @@ const
      unique value simplifies many things... }
  FPS_LINE_ENDING = #10;
  {@@ Replacement character used in broken UTF8 strings. Alternate replacement
      chars could be: #$E2#$8E#$95 = box, or #$EF#$BF#$BD = "unicode replacement character" }
  FPS_REPLACEMENT_CHAR = '?';
 var
  CSVParams: TsCSVParams = (
    SheetIndex: 0;
--- a/components/fpspreadsheet/source/common/fpsutils.pas
+++ b/components/fpspreadsheet/source/common/fpsutils.pas
@@ -241,8 +241,8 @@ function GetFontAsString(AFont: TsFont): String;
 function ISO8601StrToDateTime(s: String): TDateTime;
-function UTF8CodePoints(AText: string; out sa: TStringArray; AReplacement: String = #$E2#$8E#$95): Boolean;
+function UTF8CodePoints(AText: string; out sa: TStringArray): Boolean;
-function ValidUTF8Text(var AText: String; AReplacement: String = #$E2#$8E#$95): Boolean;
+function ValidUTF8Text(var AText: String): Boolean;
 procedure AppendToStream(AStream: TStream; const AString: String); inline; overload;
 procedure AppendToStream(AStream: TStream; const AString1, AString2: String); inline; overload;
@@ -3222,8 +3222,7 @@ end;
  returns them as an array of string (sa). The result of the function is
  false if at least one codepoint is broken UTF8.
 -------------------------------------------------------------------------------}
-function UTF8CodePoints(AText: String; out sa: TStringArray;
+function UTF8CodePoints(AText: String; out sa: TStringArray): Boolean;
  AReplacement: string = #$E2#$8E#$95): Boolean;
 var
  n: Integer;
  ch: String;
@@ -3245,7 +3244,7 @@ begin
    chLen := UTF8CodePointSize(P);
    if (chLen = 1) and (P^ > #127) then
    begin
-      ch := AReplacement;
+      ch := FPS_REPLACEMENT_CHAR;
      Result := false;
    end else
    begin
@@ -3259,7 +3258,7 @@ begin
  SetLength(sa, n);
 end;
-function ValidUTF8Text(var AText: string; AReplacement: String = #$E2#$8E#$95): Boolean;
+function ValidUTF8Text(var AText: string): Boolean;
 var
  i: Integer;
  P: PChar;
@@ -3271,7 +3270,7 @@ begin
    if i >= 0 then
    begin
      Delete(AText, i+1, 1);
-      Insert(AReplacement, AText, i+1);
+      Insert(FPS_REPLACEMENT_CHAR, AText, i+1);
      Result := false;
    end;
  until (i < 0);
--- a/components/fpspreadsheet/source/common/fpsxmlcommon.pas
+++ b/components/fpspreadsheet/source/common/fpsxmlcommon.pas
@@ -47,7 +47,7 @@ function GetNodeValue(ANode: TDOMNode): String;
 function LineEndingToBR(const AText: String): String;
 function UTF8TextToXMLText(AText: string; ProcessLineEndings: Boolean = false): string;
 function ValidXMLText(var AText: string; ReplaceSpecialChars: Boolean = true;
-  ProcessLineEndings: Boolean = false; InvalidUTF8Replacement: String = #$E2#$8E#$95): Boolean;
+  ProcessLineEndings: Boolean = false): Boolean;
 function XMLQuote(AText: String): String;
 procedure UnzipFile(AZipFileName, AZippedFile, ADestFolder: String);
@@ -202,25 +202,24 @@ end;
                               codes (e.g. '>' --> '&gt;')
  @param  ProcessLineEndings   If TRUE line ending characters are replaced by
                               their HTML entities.
  @param  InvalidUTF8Replacement  UTF8-character inserted for a malformed UTF8 codepoint.
  @return FALSE if characters were replaced, TRUE otherwise.
 -------------------------------------------------------------------------------}
 function ValidXMLText(var AText: string;
  ReplaceSpecialChars: Boolean = true;
-  ProcessLineEndings: Boolean = false;
+  ProcessLineEndings: Boolean = false): Boolean;
-  InvalidUTF8Replacement: String = #$E2#$8E#$95): Boolean;
+// Alternate replacement chars: #$E2#$8E#$95 = box, or #$EF#$BF#$BD = "replacement character"
 var
  i: Integer;
 begin
  // Replace broken UTF8 codepoints
-  Result := ValidUTF8Text(AText, InvalidUTF8Replacement);
+  Result := ValidUTF8Text(AText);
  // Replace ASCII characters which are not allowed in XML.
  for i := Length(AText) downto 1 do
    if (AText[i] < #32) and not (AText[i] in [#9, #10, #13]) then begin
      // Replace invalid character by box symbol
      Delete(AText, i, 1);
-      Insert(InvalidUTF8Replacement, AText, i);
+      Insert(FPS_REPLACEMENT_CHAR, AText, i);
      Result := false;
    end;
--- a/components/fpspreadsheet/source/common/xlsbiff5.pas
+++ b/components/fpspreadsheet/source/common/xlsbiff5.pas
@@ -1876,7 +1876,7 @@ begin
    exit;
  lValue := FixLineEnding(AValue);     // This does not change indices for rtf.
-  if not ValidUTF8Text(lValue, '?') then
+  if not ValidUTF8Text(lValue) then
    Workbook.AddErrorMsg(
      rsInvalidCharacterInCell, [
      GetCellString(ARow, ACol)