You've already forked lazarus-ccr
fpspreadsheet: Use '?' as common broken-utf8-replacement character.
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@8578 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
@ -168,7 +168,7 @@ const
|
||||
STR_ERR_ARG_ERROR = '#N/A';
|
||||
// No Excel errors
|
||||
STR_ERR_FORMULA_NOT_SUPPORTED= '<FMLA?>';
|
||||
STR_ERR_UNKNOWN = '#UNKNWON!';
|
||||
STR_ERR_UNKNOWN = '#UNKNOWN!';
|
||||
|
||||
{@@ Maximum count of worksheet columns}
|
||||
MAX_COL_COUNT = 65535;
|
||||
@ -193,6 +193,10 @@ const
|
||||
unique value simplifies many things... }
|
||||
FPS_LINE_ENDING = #10;
|
||||
|
||||
{@@ Replacement character used in broken UTF8 strings. Alternate replacement
|
||||
chars could be: #$E2#$8E#$95 = box, or #$EF#$BF#$BD = "unicode replacement character" }
|
||||
FPS_REPLACEMENT_CHAR = '?';
|
||||
|
||||
var
|
||||
CSVParams: TsCSVParams = (
|
||||
SheetIndex: 0;
|
||||
|
@ -241,8 +241,8 @@ function GetFontAsString(AFont: TsFont): String;
|
||||
|
||||
function ISO8601StrToDateTime(s: String): TDateTime;
|
||||
|
||||
function UTF8CodePoints(AText: string; out sa: TStringArray; AReplacement: String = #$E2#$8E#$95): Boolean;
|
||||
function ValidUTF8Text(var AText: String; AReplacement: String = #$E2#$8E#$95): Boolean;
|
||||
function UTF8CodePoints(AText: string; out sa: TStringArray): Boolean;
|
||||
function ValidUTF8Text(var AText: String): Boolean;
|
||||
|
||||
procedure AppendToStream(AStream: TStream; const AString: String); inline; overload;
|
||||
procedure AppendToStream(AStream: TStream; const AString1, AString2: String); inline; overload;
|
||||
@ -3222,8 +3222,7 @@ end;
|
||||
returns them as an array of string (sa). The result of the function is
|
||||
false if at least one codepoint is broken UTF8.
|
||||
-------------------------------------------------------------------------------}
|
||||
function UTF8CodePoints(AText: String; out sa: TStringArray;
|
||||
AReplacement: string = #$E2#$8E#$95): Boolean;
|
||||
function UTF8CodePoints(AText: String; out sa: TStringArray): Boolean;
|
||||
var
|
||||
n: Integer;
|
||||
ch: String;
|
||||
@ -3245,7 +3244,7 @@ begin
|
||||
chLen := UTF8CodePointSize(P);
|
||||
if (chLen = 1) and (P^ > #127) then
|
||||
begin
|
||||
ch := AReplacement;
|
||||
ch := FPS_REPLACEMENT_CHAR;
|
||||
Result := false;
|
||||
end else
|
||||
begin
|
||||
@ -3259,7 +3258,7 @@ begin
|
||||
SetLength(sa, n);
|
||||
end;
|
||||
|
||||
function ValidUTF8Text(var AText: string; AReplacement: String = #$E2#$8E#$95): Boolean;
|
||||
function ValidUTF8Text(var AText: string): Boolean;
|
||||
var
|
||||
i: Integer;
|
||||
P: PChar;
|
||||
@ -3271,7 +3270,7 @@ begin
|
||||
if i >= 0 then
|
||||
begin
|
||||
Delete(AText, i+1, 1);
|
||||
Insert(AReplacement, AText, i+1);
|
||||
Insert(FPS_REPLACEMENT_CHAR, AText, i+1);
|
||||
Result := false;
|
||||
end;
|
||||
until (i < 0);
|
||||
|
@ -47,7 +47,7 @@ function GetNodeValue(ANode: TDOMNode): String;
|
||||
function LineEndingToBR(const AText: String): String;
|
||||
function UTF8TextToXMLText(AText: string; ProcessLineEndings: Boolean = false): string;
|
||||
function ValidXMLText(var AText: string; ReplaceSpecialChars: Boolean = true;
|
||||
ProcessLineEndings: Boolean = false; InvalidUTF8Replacement: String = #$E2#$8E#$95): Boolean;
|
||||
ProcessLineEndings: Boolean = false): Boolean;
|
||||
function XMLQuote(AText: String): String;
|
||||
|
||||
procedure UnzipFile(AZipFileName, AZippedFile, ADestFolder: String);
|
||||
@ -202,25 +202,24 @@ end;
|
||||
codes (e.g. '>' --> '>')
|
||||
@param ProcessLineEndings If TRUE line ending characters are replaced by
|
||||
their HTML entities.
|
||||
@param InvalidUTF8Replacement UTF8-character inserted for a malformed UTF8 codepoint.
|
||||
@return FALSE if characters were replaced, TRUE otherwise.
|
||||
-------------------------------------------------------------------------------}
|
||||
function ValidXMLText(var AText: string;
|
||||
ReplaceSpecialChars: Boolean = true;
|
||||
ProcessLineEndings: Boolean = false;
|
||||
InvalidUTF8Replacement: String = #$E2#$8E#$95): Boolean;
|
||||
ProcessLineEndings: Boolean = false): Boolean;
|
||||
// Alternate replacement chars: #$E2#$8E#$95 = box, or #$EF#$BF#$BD = "replacement character"
|
||||
var
|
||||
i: Integer;
|
||||
begin
|
||||
// Replace broken UTF8 codepoints
|
||||
Result := ValidUTF8Text(AText, InvalidUTF8Replacement);
|
||||
Result := ValidUTF8Text(AText);
|
||||
|
||||
// Replace ASCII characters which are not allowed in XML.
|
||||
for i := Length(AText) downto 1 do
|
||||
if (AText[i] < #32) and not (AText[i] in [#9, #10, #13]) then begin
|
||||
// Replace invalid character by box symbol
|
||||
Delete(AText, i, 1);
|
||||
Insert(InvalidUTF8Replacement, AText, i);
|
||||
Insert(FPS_REPLACEMENT_CHAR, AText, i);
|
||||
Result := false;
|
||||
end;
|
||||
|
||||
|
@ -1876,7 +1876,7 @@ begin
|
||||
exit;
|
||||
|
||||
lValue := FixLineEnding(AValue); // This does not change indices for rtf.
|
||||
if not ValidUTF8Text(lValue, '?') then
|
||||
if not ValidUTF8Text(lValue) then
|
||||
Workbook.AddErrorMsg(
|
||||
rsInvalidCharacterInCell, [
|
||||
GetCellString(ARow, ACol)
|
||||
|
Reference in New Issue
Block a user