You've already forked lazarus-ccr
fpspreadsheet: Fix writing malformed UTF8 text to Excel5 xls format (display as '?').
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@8572 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
@ -241,7 +241,8 @@ function GetFontAsString(AFont: TsFont): String;
|
||||
|
||||
function ISO8601StrToDateTime(s: String): TDateTime;
|
||||
|
||||
function UTF8CodePoints(s: string; out sa: TStringArray): Boolean;
|
||||
function UTF8CodePoints(AText: string; out sa: TStringArray; AReplacement: String = #$E2#$8E#$95): Boolean;
|
||||
function ValidUTF8Text(var AText: String; AReplacement: String = #$E2#$8E#$95): Boolean;
|
||||
|
||||
procedure AppendToStream(AStream: TStream; const AString: String); inline; overload;
|
||||
procedure AppendToStream(AStream: TStream; const AString1, AString2: String); inline; overload;
|
||||
@ -3222,9 +3223,8 @@ end;
|
||||
returns them as an array of string (sa). The result of the function is
|
||||
false if at least one codepoint is broken UTF8.
|
||||
-------------------------------------------------------------------------------}
|
||||
function UTF8CodePoints(s: String; out sa: TStringArray): Boolean;
|
||||
const
|
||||
BROKEN_UTF8_REPLACEMENT = #$E2#$8E#$95; // Box character. Could also be a '?'.
|
||||
function UTF8CodePoints(AText: String; out sa: TStringArray;
|
||||
AReplacement: string = #$E2#$8E#$95): Boolean;
|
||||
var
|
||||
n: Integer;
|
||||
ch: String;
|
||||
@ -3232,21 +3232,21 @@ var
|
||||
chLen: Integer;
|
||||
begin
|
||||
Result := true;
|
||||
if s = '' then
|
||||
if AText = '' then
|
||||
begin
|
||||
sa := nil;
|
||||
exit;
|
||||
end;
|
||||
n := 0;
|
||||
SetLength(sa, Length(s));
|
||||
P := PChar(s);
|
||||
PEnd := P + Length(s);
|
||||
SetLength(sa, Length(AText));
|
||||
P := PChar(AText);
|
||||
PEnd := P + Length(AText);
|
||||
while P < PEnd do
|
||||
begin
|
||||
chLen := UTF8CodePointSize(P);
|
||||
if (chLen = 1) and (P^ > #127) then
|
||||
begin
|
||||
ch := BROKEN_UTF8_REPLACEMENT;
|
||||
ch := AReplacement;
|
||||
Result := false;
|
||||
end else
|
||||
begin
|
||||
@ -3260,6 +3260,24 @@ begin
|
||||
SetLength(sa, n);
|
||||
end;
|
||||
|
||||
function ValidUTF8Text(var AText: string; AReplacement: String = #$E2#$8E#$95): Boolean;
|
||||
var
|
||||
i: Integer;
|
||||
P: PChar;
|
||||
begin
|
||||
Result := true;
|
||||
repeat
|
||||
P := PChar(AText);
|
||||
i := FindInvalidUTF8CodePoint(P, Length(AText), true);
|
||||
if i >= 0 then
|
||||
begin
|
||||
Delete(AText, i+1, 1);
|
||||
Insert(AReplacement, AText, i+1);
|
||||
Result := false;
|
||||
end;
|
||||
until (i < 0);
|
||||
end;
|
||||
|
||||
{$PUSH}{$HINTS OFF}
|
||||
{@@ Silence warnings due to an unused parameter }
|
||||
procedure Unused(const A1);
|
||||
|
@ -1865,7 +1865,8 @@ const
|
||||
MAXBYTES = 255; // Limit for this BIFF5
|
||||
var
|
||||
L: Word;
|
||||
AnsiValue: ansistring;
|
||||
lValue: String;
|
||||
ansiValue: ansistring;
|
||||
rec: TBIFF5_LabelRecord;
|
||||
i, nRuns: Integer;
|
||||
rtfRuns: TBIFF5_RichTextFormattingRuns = nil;
|
||||
@ -1874,8 +1875,14 @@ begin
|
||||
if (ARow >= FLimitations.MaxRowCount) or (ACol >= FLimitations.MaxColCount) then
|
||||
exit;
|
||||
|
||||
ansiValue := ConvertEncoding(FixLineEnding(AValue), encodingUTF8, FCodePage);
|
||||
if AnsiValue = '' then begin
|
||||
lValue := FixLineEnding(AValue); // This does not change indices for rtf.
|
||||
if not ValidUTF8Text(lValue, '?') then
|
||||
Workbook.AddErrorMsg(
|
||||
rsInvalidCharacterInCell, [
|
||||
GetCellString(ARow, ACol)
|
||||
]);
|
||||
ansiValue := ConvertEncoding(lValue, encodingUTF8, FCodePage);
|
||||
if ansiValue = '' then begin
|
||||
// Bad formatted UTF8String (maybe ANSI?)
|
||||
if Length(AValue) <> 0 then begin
|
||||
//It was an ANSI string written as UTF8 quite sure, so raise exception.
|
||||
@ -1886,15 +1893,15 @@ begin
|
||||
Exit;
|
||||
end;
|
||||
|
||||
if Length(AnsiValue) > MAXBYTES then begin
|
||||
if Length(ansiValue) > MAXBYTES then begin
|
||||
// Rather than lose data when reading it, let the application programmer deal
|
||||
// with the problem or purposefully ignore it.
|
||||
AnsiValue := Copy(AnsiValue, 1, MAXBYTES);
|
||||
ansiValue := Copy(ansiValue, 1, MAXBYTES);
|
||||
Workbook.AddErrorMsg(rsTruncateTooLongCellText, [
|
||||
MAXBYTES, GetCellString(ARow, ACol)
|
||||
]);
|
||||
end;
|
||||
L := Length(AnsiValue);
|
||||
L := Length(ansiValue);
|
||||
nRuns := Length(ACell^.RichTextParams);
|
||||
|
||||
{ BIFF record header }
|
||||
@ -1917,7 +1924,7 @@ begin
|
||||
{ Copy the text characters into a buffer immediately after rec }
|
||||
SetLength(buf, SizeOf(rec) + L);
|
||||
Move(rec, buf[0], SizeOf(rec));
|
||||
Move(AnsiValue[1], buf[SizeOf(rec)], L);
|
||||
Move(ansiValue[1], buf[SizeOf(rec)], L);
|
||||
|
||||
{ Write out buffer }
|
||||
AStream.WriteBuffer(buf[0], SizeOf(Rec) + L);
|
||||
|
@ -3779,6 +3779,9 @@ end;
|
||||
The line separator for multi-line text in label cells is accepted by xls
|
||||
to be either CRLF or LF, CR does not work.
|
||||
This procedure replaces accidentally used single CR characters by LF.
|
||||
|
||||
IMPORTANT NOTE FOR RICH-TEXT FORMATTING: This function does not change the
|
||||
character indices at which formatting changes.
|
||||
-------------------------------------------------------------------------------}
|
||||
function TsSpreadBIFFWriter.FixLineEnding(const AText: String): String;
|
||||
var
|
||||
|
Reference in New Issue
Block a user