fpspreadsheet: Fix writing malformed UTF8 text to Excel5 xls format (display as '?').

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@8572 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2022-10-19 15:24:39 +00:00
parent 2ba059e4f9
commit 9d63e15606
3 changed files with 44 additions and 16 deletions

View File

@ -241,7 +241,8 @@ function GetFontAsString(AFont: TsFont): String;
function ISO8601StrToDateTime(s: String): TDateTime;
function UTF8CodePoints(s: string; out sa: TStringArray): Boolean;
function UTF8CodePoints(AText: string; out sa: TStringArray; AReplacement: String = #$E2#$8E#$95): Boolean;
function ValidUTF8Text(var AText: String; AReplacement: String = #$E2#$8E#$95): Boolean;
procedure AppendToStream(AStream: TStream; const AString: String); inline; overload;
procedure AppendToStream(AStream: TStream; const AString1, AString2: String); inline; overload;
@ -3222,9 +3223,8 @@ end;
returns them as an array of string (sa). The result of the function is
false if at least one codepoint is broken UTF8.
-------------------------------------------------------------------------------}
function UTF8CodePoints(s: String; out sa: TStringArray): Boolean;
const
BROKEN_UTF8_REPLACEMENT = #$E2#$8E#$95; // Box character. Could also be a '?'.
function UTF8CodePoints(AText: String; out sa: TStringArray;
AReplacement: string = #$E2#$8E#$95): Boolean;
var
n: Integer;
ch: String;
@ -3232,21 +3232,21 @@ var
chLen: Integer;
begin
Result := true;
if s = '' then
if AText = '' then
begin
sa := nil;
exit;
end;
n := 0;
SetLength(sa, Length(s));
P := PChar(s);
PEnd := P + Length(s);
SetLength(sa, Length(AText));
P := PChar(AText);
PEnd := P + Length(AText);
while P < PEnd do
begin
chLen := UTF8CodePointSize(P);
if (chLen = 1) and (P^ > #127) then
begin
ch := BROKEN_UTF8_REPLACEMENT;
ch := AReplacement;
Result := false;
end else
begin
@ -3260,6 +3260,24 @@ begin
SetLength(sa, n);
end;
function ValidUTF8Text(var AText: string; AReplacement: String = #$E2#$8E#$95): Boolean;
var
i: Integer;
P: PChar;
begin
Result := true;
repeat
P := PChar(AText);
i := FindInvalidUTF8CodePoint(P, Length(AText), true);
if i >= 0 then
begin
Delete(AText, i+1, 1);
Insert(AReplacement, AText, i+1);
Result := false;
end;
until (i < 0);
end;
{$PUSH}{$HINTS OFF}
{@@ Silence warnings due to an unused parameter }
procedure Unused(const A1);

View File

@ -1865,7 +1865,8 @@ const
MAXBYTES = 255; // Limit for this BIFF5
var
L: Word;
AnsiValue: ansistring;
lValue: String;
ansiValue: ansistring;
rec: TBIFF5_LabelRecord;
i, nRuns: Integer;
rtfRuns: TBIFF5_RichTextFormattingRuns = nil;
@ -1874,8 +1875,14 @@ begin
if (ARow >= FLimitations.MaxRowCount) or (ACol >= FLimitations.MaxColCount) then
exit;
ansiValue := ConvertEncoding(FixLineEnding(AValue), encodingUTF8, FCodePage);
if AnsiValue = '' then begin
lValue := FixLineEnding(AValue); // This does not change indices for rtf.
if not ValidUTF8Text(lValue, '?') then
Workbook.AddErrorMsg(
rsInvalidCharacterInCell, [
GetCellString(ARow, ACol)
]);
ansiValue := ConvertEncoding(lValue, encodingUTF8, FCodePage);
if ansiValue = '' then begin
// Bad formatted UTF8String (maybe ANSI?)
if Length(AValue) <> 0 then begin
//It was an ANSI string written as UTF8 quite sure, so raise exception.
@ -1886,15 +1893,15 @@ begin
Exit;
end;
if Length(AnsiValue) > MAXBYTES then begin
if Length(ansiValue) > MAXBYTES then begin
// Rather than lose data when reading it, let the application programmer deal
// with the problem or purposefully ignore it.
AnsiValue := Copy(AnsiValue, 1, MAXBYTES);
ansiValue := Copy(ansiValue, 1, MAXBYTES);
Workbook.AddErrorMsg(rsTruncateTooLongCellText, [
MAXBYTES, GetCellString(ARow, ACol)
]);
end;
L := Length(AnsiValue);
L := Length(ansiValue);
nRuns := Length(ACell^.RichTextParams);
{ BIFF record header }
@ -1917,7 +1924,7 @@ begin
{ Copy the text characters into a buffer immediately after rec }
SetLength(buf, SizeOf(rec) + L);
Move(rec, buf[0], SizeOf(rec));
Move(AnsiValue[1], buf[SizeOf(rec)], L);
Move(ansiValue[1], buf[SizeOf(rec)], L);
{ Write out buffer }
AStream.WriteBuffer(buf[0], SizeOf(Rec) + L);

View File

@ -3779,6 +3779,9 @@ end;
The line separator for multi-line text in label cells is accepted by xls
to be either CRLF or LF, CR does not work.
This procedure replaces accidentally used single CR characters by LF.
IMPORTANT NOTE FOR RICH-TEXT FORMATTING: This function does not change the
character indices at which formatting changes.
-------------------------------------------------------------------------------}
function TsSpreadBIFFWriter.FixLineEnding(const AText: String): String;
var