diff --git a/components/fpspreadsheet/source/common/fpstypes.pas b/components/fpspreadsheet/source/common/fpstypes.pas index 4798accd5..559145391 100644 --- a/components/fpspreadsheet/source/common/fpstypes.pas +++ b/components/fpspreadsheet/source/common/fpstypes.pas @@ -168,7 +168,7 @@ const STR_ERR_ARG_ERROR = '#N/A'; // No Excel errors STR_ERR_FORMULA_NOT_SUPPORTED= ''; - STR_ERR_UNKNOWN = '#UNKNWON!'; + STR_ERR_UNKNOWN = '#UNKNOWN!'; {@@ Maximum count of worksheet columns} MAX_COL_COUNT = 65535; @@ -193,6 +193,10 @@ const unique value simplifies many things... } FPS_LINE_ENDING = #10; + {@@ Replacement character used in broken UTF8 strings. Alternate replacement + chars could be: #$E2#$8E#$95 = box, or #$EF#$BF#$BD = "unicode replacement character" } + FPS_REPLACEMENT_CHAR = '?'; + var CSVParams: TsCSVParams = ( SheetIndex: 0; diff --git a/components/fpspreadsheet/source/common/fpsutils.pas b/components/fpspreadsheet/source/common/fpsutils.pas index a4da2ad6a..073c26d84 100644 --- a/components/fpspreadsheet/source/common/fpsutils.pas +++ b/components/fpspreadsheet/source/common/fpsutils.pas @@ -241,8 +241,8 @@ function GetFontAsString(AFont: TsFont): String; function ISO8601StrToDateTime(s: String): TDateTime; -function UTF8CodePoints(AText: string; out sa: TStringArray; AReplacement: String = #$E2#$8E#$95): Boolean; -function ValidUTF8Text(var AText: String; AReplacement: String = #$E2#$8E#$95): Boolean; +function UTF8CodePoints(AText: string; out sa: TStringArray): Boolean; +function ValidUTF8Text(var AText: String): Boolean; procedure AppendToStream(AStream: TStream; const AString: String); inline; overload; procedure AppendToStream(AStream: TStream; const AString1, AString2: String); inline; overload; @@ -3222,8 +3222,7 @@ end; returns them as an array of string (sa). The result of the function is false if at least one codepoint is broken UTF8. -------------------------------------------------------------------------------} -function UTF8CodePoints(AText: String; out sa: TStringArray; - AReplacement: string = #$E2#$8E#$95): Boolean; +function UTF8CodePoints(AText: String; out sa: TStringArray): Boolean; var n: Integer; ch: String; @@ -3245,7 +3244,7 @@ begin chLen := UTF8CodePointSize(P); if (chLen = 1) and (P^ > #127) then begin - ch := AReplacement; + ch := FPS_REPLACEMENT_CHAR; Result := false; end else begin @@ -3259,7 +3258,7 @@ begin SetLength(sa, n); end; -function ValidUTF8Text(var AText: string; AReplacement: String = #$E2#$8E#$95): Boolean; +function ValidUTF8Text(var AText: string): Boolean; var i: Integer; P: PChar; @@ -3271,7 +3270,7 @@ begin if i >= 0 then begin Delete(AText, i+1, 1); - Insert(AReplacement, AText, i+1); + Insert(FPS_REPLACEMENT_CHAR, AText, i+1); Result := false; end; until (i < 0); diff --git a/components/fpspreadsheet/source/common/fpsxmlcommon.pas b/components/fpspreadsheet/source/common/fpsxmlcommon.pas index 42df28f21..92840120e 100644 --- a/components/fpspreadsheet/source/common/fpsxmlcommon.pas +++ b/components/fpspreadsheet/source/common/fpsxmlcommon.pas @@ -47,7 +47,7 @@ function GetNodeValue(ANode: TDOMNode): String; function LineEndingToBR(const AText: String): String; function UTF8TextToXMLText(AText: string; ProcessLineEndings: Boolean = false): string; function ValidXMLText(var AText: string; ReplaceSpecialChars: Boolean = true; - ProcessLineEndings: Boolean = false; InvalidUTF8Replacement: String = #$E2#$8E#$95): Boolean; + ProcessLineEndings: Boolean = false): Boolean; function XMLQuote(AText: String): String; procedure UnzipFile(AZipFileName, AZippedFile, ADestFolder: String); @@ -202,25 +202,24 @@ end; codes (e.g. '>' --> '>') @param ProcessLineEndings If TRUE line ending characters are replaced by their HTML entities. - @param InvalidUTF8Replacement UTF8-character inserted for a malformed UTF8 codepoint. @return FALSE if characters were replaced, TRUE otherwise. -------------------------------------------------------------------------------} function ValidXMLText(var AText: string; ReplaceSpecialChars: Boolean = true; - ProcessLineEndings: Boolean = false; - InvalidUTF8Replacement: String = #$E2#$8E#$95): Boolean; + ProcessLineEndings: Boolean = false): Boolean; +// Alternate replacement chars: #$E2#$8E#$95 = box, or #$EF#$BF#$BD = "replacement character" var i: Integer; begin // Replace broken UTF8 codepoints - Result := ValidUTF8Text(AText, InvalidUTF8Replacement); + Result := ValidUTF8Text(AText); // Replace ASCII characters which are not allowed in XML. for i := Length(AText) downto 1 do if (AText[i] < #32) and not (AText[i] in [#9, #10, #13]) then begin // Replace invalid character by box symbol Delete(AText, i, 1); - Insert(InvalidUTF8Replacement, AText, i); + Insert(FPS_REPLACEMENT_CHAR, AText, i); Result := false; end; diff --git a/components/fpspreadsheet/source/common/xlsbiff5.pas b/components/fpspreadsheet/source/common/xlsbiff5.pas index f9d38e060..186327016 100644 --- a/components/fpspreadsheet/source/common/xlsbiff5.pas +++ b/components/fpspreadsheet/source/common/xlsbiff5.pas @@ -1876,7 +1876,7 @@ begin exit; lValue := FixLineEnding(AValue); // This does not change indices for rtf. - if not ValidUTF8Text(lValue, '?') then + if not ValidUTF8Text(lValue) then Workbook.AddErrorMsg( rsInvalidCharacterInCell, [ GetCellString(ARow, ACol)