fpspreadsheet: Simplified solution for multi-line text issue #39045 fixing also malformed UTF8 strings for ods.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@8568 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2022-10-19 10:09:16 +00:00
parent 6036db7d88
commit df2c182ee2
3 changed files with 69 additions and 101 deletions

View File

@ -9207,58 +9207,31 @@ var
i, idx, endidx, fntidx, len: Integer; i, idx, endidx, fntidx, len: Integer;
rtParam: TsRichTextParam; rtParam: TsRichTextParam;
sheet: TsWorksheet; sheet: TsWorksheet;
{$IFDEF FPS_NO_LAZUNICODE}
ch: WideChar;
wideStr, txt: WideString;
{$ELSE}
ch: String;
chArray: array of string = nil;
txt: String; txt: String;
{$ENDIF} chArray: array of string = nil;
function IsNewLine(var idx: integer): Boolean; function IsNewLine(var idx: integer): Boolean;
begin begin
{$IFDEF FPS_NO_LAZUNICODE}
if (wideStr[idx] = #13) then
begin
Result := true;
if (idx < len) and (widestr[idx+1] = #10) then inc(idx);
end else
if (wideStr[idx] = #10) then
begin
Result := true;
if (idx < len) and (widestr[idx+1] = #13) then inc(idx);
end else
Result := false;
{$ELSE}
Result := true; Result := true;
case chArray[idx] of case chArray[idx] of
#13: if (chArray[idx+1] = #10) then inc(idx); // Cannot overrun because we had appended a #0 to string #13: if (idx < len-1) and (chArray[idx+1] = #10) then inc(idx);
#10: if (chArray[idx+1] = #13) then inc(idx); // Cannot overrun because we had appended a #0 to string #10: if (idx < len-1) and (chArray[idx+1] = #13) then inc(idx);
else Result := false; else Result := false;
end; end;
{$ENDIF}
end; end;
procedure AppendTxt(NewLine: Boolean; FntStyle: String); procedure AppendTxt(NewLine: Boolean; FntStyle: String);
var
s: String;
begin begin
{$IFDEF FPS_NO_LAZUNICODE} ValidXMLText(txt);
s := UTF8Encode(txt); if NewLine and (txt = '') then
{$ELSE}
s := txt;
{$ENDIF}
ValidXMLText(s);
if NewLine and (s = '') then
totaltxt := totaltxt + '</text:p><text:p>' totaltxt := totaltxt + '</text:p><text:p>'
else else
begin begin
if FntStyle = '' then if FntStyle = '' then
totaltxt := totaltxt + s totaltxt := totaltxt + txt
else else
totaltxt := totaltxt + totaltxt := totaltxt +
'<text:span text:style-name="' + FntStyle + '">' + s + '</text:span>'; '<text:span text:style-name="' + FntStyle + '">' + txt + '</text:span>';
if NewLine then if NewLine then
totaltxt := totaltxt + '</text:p><text:p>'; totaltxt := totaltxt + '</text:p><text:p>';
end; end;
@ -9322,6 +9295,10 @@ begin
end end
else else
begin begin
// Split AValue into separate code points.
chArray := UTF8CodePoints(AValue);
len := Length(chArray);
// No hyperlink, normal text only // No hyperlink, normal text only
if Length(ACell^.RichTextParams) = 0 then if Length(ACell^.RichTextParams) = 0 then
begin begin
@ -9334,73 +9311,33 @@ begin
(until LO gets fixed). (until LO gets fixed).
} }
totaltxt := '<text:p>'; totaltxt := '<text:p>';
len := Length(AValue); idx := 0;
idx := 1; txt := '';
while (idx <= len) do while (idx < len) do
begin begin
case AValue[idx] of if IsNewLine(idx) then
#13: begin AppendTxt(true, '')
totaltxt := totaltxt + '</text:p><text:p>'; else
if (idx < len) and (AValue[idx+1] = #10) then inc(idx); totaltxt := totaltxt + chArray[idx];
end;
#10: begin
totaltxt := totaltxt + '</text:p><text:p>';
if (idx < len) and (AValue[idx+1] = #13) then inc(idx);
end;
else totaltxt := totaltxt + AValue[idx];
end;
inc(idx); inc(idx);
end; end;
totaltxt := totaltxt + '</text:p>'; totaltxt := totaltxt + '</text:p>';
end else end else
begin begin
// "Rich-text" formatting // "Rich-text" formatting
txt := ''; // Before the first formatted section, chars are having the cell's format
{$IFDEF FPS_NO_LAZUNICODE}
wideStr := UTF8Decode(AValue); // Convert to unicode
len := Length(wideStr);
{$ELSE}
// Split utf8-encoded string into the individual code points and store them
// in an array of strings.
// Index 0 of this array is ignored so that indices map to a normal string.
// An auxiliary #0 is appended so that the case of #13#10 at the end of
// AValue can be handled without complicated checks.
SetLength(chArray, Length(AValue));
i := 0;
for ch in AValue do
begin
chArray[i] := ch;
inc(i);
end;
len := i;
SetLength(chArray, len+2);
i := length(chArray)-1;
chArray[i] := #0;
dec(i);
while i > 0 do
begin
chArray[i] := chArray[i-1];
dec(i);
end;
chArray[0] := #0;
{$ENDIF}
// Before the first formatted section having the cell's format
totaltxt := '<text:p>'; totaltxt := '<text:p>';
rtParam := ACell^.RichTextParams[0]; rtParam := ACell^.RichTextParams[0];
idx := 1; idx := 0;
txt := '';
if rtParam.FirstIndex > 1 then if rtParam.FirstIndex > 1 then
begin begin
while (idx <= len) and (idx < rtParam.FirstIndex) do while (idx < len) and (idx + 1 < rtParam.FirstIndex) do
begin begin
{$IFDEF FPS_NO_LAZUNICODE}
ch := wideStr[idx];
{$ELSE}
ch := chArray[idx];
{$ENDIF}
if IsNewLine(idx) then if IsNewLine(idx) then
AppendTxt(true, '') AppendTxt(true, '')
else else
txt := txt + ch; txt := txt + chArray[idx];
inc(idx); inc(idx);
end; end;
if txt <> '' then if txt <> '' then
@ -9409,7 +9346,7 @@ begin
txt := ''; txt := '';
for i := 0 to High(ACell^.RichTextParams) do for i := 0 to High(ACell^.RichTextParams) do
begin begin
// Formatted parts of the string according the RichTextParams // Formatted parts of the string according to the RichTextParams
rtParam := ACell^.RichTextParams[i]; rtParam := ACell^.RichTextParams[i];
fnt := (FWorkbook as TsWorkbook).GetFont(rtParam.FontIndex); fnt := (FWorkbook as TsWorkbook).GetFont(rtParam.FontIndex);
fntidx := FRichTextFontList.IndexOfObject(fnt); fntidx := FRichTextFontList.IndexOfObject(fnt);
@ -9418,17 +9355,12 @@ begin
endidx := ACell^.RichTextParams[i+1].FirstIndex-1 endidx := ACell^.RichTextParams[i+1].FirstIndex-1
else else
endidx := len; endidx := len;
while (idx <= len) and (idx <= endidx) do while (idx < len) and (idx < endidx) do
begin begin
{$IFDEF FPS_NO_LAZUNICODE}
ch := wideStr[idx];
{$ELSE}
ch := chArray[idx];
{$ENDIF}
if IsNewLine(idx) then if IsNewLine(idx) then
AppendTxt(true, fntName) AppendTxt(true, fntName)
else else
txt := txt + ch; txt := txt + chArray[idx];
inc(idx); inc(idx);
end; end;
if txt <> '' then if txt <> '' then

View File

@ -12,7 +12,6 @@ unit fpsUtils;
// to do: Remove the declaration UTF8FormatSettings and InitUTF8FormatSettings // to do: Remove the declaration UTF8FormatSettings and InitUTF8FormatSettings
// when this same modification is in LazUtils of Laz stable // when this same modification is in LazUtils of Laz stable
{$mode objfpc}{$H+} {$mode objfpc}{$H+}
interface interface
@ -242,6 +241,8 @@ function GetFontAsString(AFont: TsFont): String;
function ISO8601StrToDateTime(s: String): TDateTime; function ISO8601StrToDateTime(s: String): TDateTime;
function UTF8CodePoints(s: string): TStringArray;
procedure AppendToStream(AStream: TStream; const AString: String); inline; overload; procedure AppendToStream(AStream: TStream; const AString: String); inline; overload;
procedure AppendToStream(AStream: TStream; const AString1, AString2: String); inline; overload; procedure AppendToStream(AStream: TStream; const AString1, AString2: String); inline; overload;
procedure AppendToStream(AStream: TStream; const AString1, AString2, AString3: String); inline; overload; procedure AppendToStream(AStream: TStream; const AString1, AString2, AString3: String); inline; overload;
@ -260,7 +261,9 @@ var
implementation implementation
uses uses
Math, lazutf8, lazfileutils, fpsStrings, fpsReaderWriter; Math, lazutf8, lazfileutils,
{$IFNDEF FPS_NO_LAZUNICODE}LazUnicode,{$ENDIF}
fpsStrings, fpsReaderWriter;
const const
INT_NUM_LETTERS = 26; INT_NUM_LETTERS = 26;
@ -3214,6 +3217,44 @@ begin
Result := Result - GetLocalTimeOffset / MinsPerDay; Result := Result - GetLocalTimeOffset / MinsPerDay;
end; end;
{@@ ----------------------------------------------------------------------------
Splits the UTF8-encoded string into its code-points ("characters") and
returns them as an array of string.
-------------------------------------------------------------------------------}
function UTF8CodePoints(s: String): TStringArray;
const
BROKEN_UTF8_REPLACEMENT = #$E2#$8E#$95; // Box character. Could also be a '?'.
var
n: Integer;
ch: String;
P, PEnd: PChar;
chLen: Integer;
begin
if s = '' then
begin
Result := nil;
exit;
end;
n := 0;
SetLength(Result, Length(s));
P := PChar(s);
PEnd := P + Length(s);
while P < PEnd do
begin
chLen := UTF8CodePointSize(P);
if (chLen = 1) and (P^ > #127) then
ch := BROKEN_UTF8_REPLACEMENT
else
begin
SetLength(ch, chLen);
Move(P^, ch[1], chLen);
end;
Result[n] := ch;
inc(P, chLen);
inc(n);
end;
SetLength(Result, n);
end;
{$PUSH}{$HINTS OFF} {$PUSH}{$HINTS OFF}
{@@ Silence warnings due to an unused parameter } {@@ Silence warnings due to an unused parameter }

View File

@ -31,11 +31,6 @@
FPS_LAZUTF8. Keep undefined for the current Lazarus version. } FPS_LAZUTF8. Keep undefined for the current Lazarus version. }
{.$DEFINE FPS_LAZUTF8} {.$DEFINE FPS_LAZUTF8}
{ fpspreadsheet requires some functions from unit LazUnicode which was
added to Lazarus in v1.8. If an older Lazarus version is used define
FPS_NO_LAZUNICODE. Keep undefined for the current Lazarus version. }
{.$DEFINE FPS_NO_LAZUNICODE}
{ In order to allow selection of multiple ranges in the WorksheetGrid a { In order to allow selection of multiple ranges in the WorksheetGrid a
sufficiently new version of the basic TCustomGrid is required. The required sufficiently new version of the basic TCustomGrid is required. The required
property "RangeSelect" was introduced in Lazarus 1.4. In order to compile property "RangeSelect" was introduced in Lazarus 1.4. In order to compile