You've already forked lazarus-ccr
fpspreadsheet: Use '?' as common broken-utf8-replacement character.
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@8578 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
@@ -168,7 +168,7 @@ const
|
|||||||
STR_ERR_ARG_ERROR = '#N/A';
|
STR_ERR_ARG_ERROR = '#N/A';
|
||||||
// No Excel errors
|
// No Excel errors
|
||||||
STR_ERR_FORMULA_NOT_SUPPORTED= '<FMLA?>';
|
STR_ERR_FORMULA_NOT_SUPPORTED= '<FMLA?>';
|
||||||
STR_ERR_UNKNOWN = '#UNKNWON!';
|
STR_ERR_UNKNOWN = '#UNKNOWN!';
|
||||||
|
|
||||||
{@@ Maximum count of worksheet columns}
|
{@@ Maximum count of worksheet columns}
|
||||||
MAX_COL_COUNT = 65535;
|
MAX_COL_COUNT = 65535;
|
||||||
@@ -193,6 +193,10 @@ const
|
|||||||
unique value simplifies many things... }
|
unique value simplifies many things... }
|
||||||
FPS_LINE_ENDING = #10;
|
FPS_LINE_ENDING = #10;
|
||||||
|
|
||||||
|
{@@ Replacement character used in broken UTF8 strings. Alternate replacement
|
||||||
|
chars could be: #$E2#$8E#$95 = box, or #$EF#$BF#$BD = "unicode replacement character" }
|
||||||
|
FPS_REPLACEMENT_CHAR = '?';
|
||||||
|
|
||||||
var
|
var
|
||||||
CSVParams: TsCSVParams = (
|
CSVParams: TsCSVParams = (
|
||||||
SheetIndex: 0;
|
SheetIndex: 0;
|
||||||
|
@@ -241,8 +241,8 @@ function GetFontAsString(AFont: TsFont): String;
|
|||||||
|
|
||||||
function ISO8601StrToDateTime(s: String): TDateTime;
|
function ISO8601StrToDateTime(s: String): TDateTime;
|
||||||
|
|
||||||
function UTF8CodePoints(AText: string; out sa: TStringArray; AReplacement: String = #$E2#$8E#$95): Boolean;
|
function UTF8CodePoints(AText: string; out sa: TStringArray): Boolean;
|
||||||
function ValidUTF8Text(var AText: String; AReplacement: String = #$E2#$8E#$95): Boolean;
|
function ValidUTF8Text(var AText: String): Boolean;
|
||||||
|
|
||||||
procedure AppendToStream(AStream: TStream; const AString: String); inline; overload;
|
procedure AppendToStream(AStream: TStream; const AString: String); inline; overload;
|
||||||
procedure AppendToStream(AStream: TStream; const AString1, AString2: String); inline; overload;
|
procedure AppendToStream(AStream: TStream; const AString1, AString2: String); inline; overload;
|
||||||
@@ -3222,8 +3222,7 @@ end;
|
|||||||
returns them as an array of string (sa). The result of the function is
|
returns them as an array of string (sa). The result of the function is
|
||||||
false if at least one codepoint is broken UTF8.
|
false if at least one codepoint is broken UTF8.
|
||||||
-------------------------------------------------------------------------------}
|
-------------------------------------------------------------------------------}
|
||||||
function UTF8CodePoints(AText: String; out sa: TStringArray;
|
function UTF8CodePoints(AText: String; out sa: TStringArray): Boolean;
|
||||||
AReplacement: string = #$E2#$8E#$95): Boolean;
|
|
||||||
var
|
var
|
||||||
n: Integer;
|
n: Integer;
|
||||||
ch: String;
|
ch: String;
|
||||||
@@ -3245,7 +3244,7 @@ begin
|
|||||||
chLen := UTF8CodePointSize(P);
|
chLen := UTF8CodePointSize(P);
|
||||||
if (chLen = 1) and (P^ > #127) then
|
if (chLen = 1) and (P^ > #127) then
|
||||||
begin
|
begin
|
||||||
ch := AReplacement;
|
ch := FPS_REPLACEMENT_CHAR;
|
||||||
Result := false;
|
Result := false;
|
||||||
end else
|
end else
|
||||||
begin
|
begin
|
||||||
@@ -3259,7 +3258,7 @@ begin
|
|||||||
SetLength(sa, n);
|
SetLength(sa, n);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
function ValidUTF8Text(var AText: string; AReplacement: String = #$E2#$8E#$95): Boolean;
|
function ValidUTF8Text(var AText: string): Boolean;
|
||||||
var
|
var
|
||||||
i: Integer;
|
i: Integer;
|
||||||
P: PChar;
|
P: PChar;
|
||||||
@@ -3271,7 +3270,7 @@ begin
|
|||||||
if i >= 0 then
|
if i >= 0 then
|
||||||
begin
|
begin
|
||||||
Delete(AText, i+1, 1);
|
Delete(AText, i+1, 1);
|
||||||
Insert(AReplacement, AText, i+1);
|
Insert(FPS_REPLACEMENT_CHAR, AText, i+1);
|
||||||
Result := false;
|
Result := false;
|
||||||
end;
|
end;
|
||||||
until (i < 0);
|
until (i < 0);
|
||||||
|
@@ -47,7 +47,7 @@ function GetNodeValue(ANode: TDOMNode): String;
|
|||||||
function LineEndingToBR(const AText: String): String;
|
function LineEndingToBR(const AText: String): String;
|
||||||
function UTF8TextToXMLText(AText: string; ProcessLineEndings: Boolean = false): string;
|
function UTF8TextToXMLText(AText: string; ProcessLineEndings: Boolean = false): string;
|
||||||
function ValidXMLText(var AText: string; ReplaceSpecialChars: Boolean = true;
|
function ValidXMLText(var AText: string; ReplaceSpecialChars: Boolean = true;
|
||||||
ProcessLineEndings: Boolean = false; InvalidUTF8Replacement: String = #$E2#$8E#$95): Boolean;
|
ProcessLineEndings: Boolean = false): Boolean;
|
||||||
function XMLQuote(AText: String): String;
|
function XMLQuote(AText: String): String;
|
||||||
|
|
||||||
procedure UnzipFile(AZipFileName, AZippedFile, ADestFolder: String);
|
procedure UnzipFile(AZipFileName, AZippedFile, ADestFolder: String);
|
||||||
@@ -202,25 +202,24 @@ end;
|
|||||||
codes (e.g. '>' --> '>')
|
codes (e.g. '>' --> '>')
|
||||||
@param ProcessLineEndings If TRUE line ending characters are replaced by
|
@param ProcessLineEndings If TRUE line ending characters are replaced by
|
||||||
their HTML entities.
|
their HTML entities.
|
||||||
@param InvalidUTF8Replacement UTF8-character inserted for a malformed UTF8 codepoint.
|
|
||||||
@return FALSE if characters were replaced, TRUE otherwise.
|
@return FALSE if characters were replaced, TRUE otherwise.
|
||||||
-------------------------------------------------------------------------------}
|
-------------------------------------------------------------------------------}
|
||||||
function ValidXMLText(var AText: string;
|
function ValidXMLText(var AText: string;
|
||||||
ReplaceSpecialChars: Boolean = true;
|
ReplaceSpecialChars: Boolean = true;
|
||||||
ProcessLineEndings: Boolean = false;
|
ProcessLineEndings: Boolean = false): Boolean;
|
||||||
InvalidUTF8Replacement: String = #$E2#$8E#$95): Boolean;
|
// Alternate replacement chars: #$E2#$8E#$95 = box, or #$EF#$BF#$BD = "replacement character"
|
||||||
var
|
var
|
||||||
i: Integer;
|
i: Integer;
|
||||||
begin
|
begin
|
||||||
// Replace broken UTF8 codepoints
|
// Replace broken UTF8 codepoints
|
||||||
Result := ValidUTF8Text(AText, InvalidUTF8Replacement);
|
Result := ValidUTF8Text(AText);
|
||||||
|
|
||||||
// Replace ASCII characters which are not allowed in XML.
|
// Replace ASCII characters which are not allowed in XML.
|
||||||
for i := Length(AText) downto 1 do
|
for i := Length(AText) downto 1 do
|
||||||
if (AText[i] < #32) and not (AText[i] in [#9, #10, #13]) then begin
|
if (AText[i] < #32) and not (AText[i] in [#9, #10, #13]) then begin
|
||||||
// Replace invalid character by box symbol
|
// Replace invalid character by box symbol
|
||||||
Delete(AText, i, 1);
|
Delete(AText, i, 1);
|
||||||
Insert(InvalidUTF8Replacement, AText, i);
|
Insert(FPS_REPLACEMENT_CHAR, AText, i);
|
||||||
Result := false;
|
Result := false;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
@@ -1876,7 +1876,7 @@ begin
|
|||||||
exit;
|
exit;
|
||||||
|
|
||||||
lValue := FixLineEnding(AValue); // This does not change indices for rtf.
|
lValue := FixLineEnding(AValue); // This does not change indices for rtf.
|
||||||
if not ValidUTF8Text(lValue, '?') then
|
if not ValidUTF8Text(lValue) then
|
||||||
Workbook.AddErrorMsg(
|
Workbook.AddErrorMsg(
|
||||||
rsInvalidCharacterInCell, [
|
rsInvalidCharacterInCell, [
|
||||||
GetCellString(ARow, ACol)
|
GetCellString(ARow, ACol)
|
||||||
|
Reference in New Issue
Block a user