fpspreadsheet: Simplified solution for multi-line text issue #39045 fixing also malformed UTF8 strings for ods.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@8568 8e941d3f-bd1b-0410-a28a-d453659cc2b4
2022-10-19 10:09:16 +00:00
parent 6036db7d88
commit df2c182ee2
3 changed files with 69 additions and 101 deletions
--- a/components/fpspreadsheet/source/common/fpsopendocument.pas
+++ b/components/fpspreadsheet/source/common/fpsopendocument.pas
@ -9207,58 +9207,31 @@ var
  i, idx, endidx, fntidx, len: Integer;
  rtParam: TsRichTextParam;
  sheet: TsWorksheet;
  {$IFDEF FPS_NO_LAZUNICODE}
  ch: WideChar;
  wideStr, txt: WideString;
  {$ELSE}
  ch: String;
  chArray: array of string = nil;
  txt: String;
-  {$ENDIF}
+  chArray: array of string = nil;
  function IsNewLine(var idx: integer): Boolean;
  begin
   {$IFDEF FPS_NO_LAZUNICODE}
    if (wideStr[idx] = #13) then
    begin
      Result := true;
      if (idx < len) and (widestr[idx+1] = #10) then inc(idx);
    end else
    if (wideStr[idx] = #10) then
    begin
      Result := true;
      if (idx < len) and (widestr[idx+1] = #13) then inc(idx);
    end else
      Result := false;
   {$ELSE}
    Result := true;
    case chArray[idx] of
-      #13: if (chArray[idx+1] = #10) then inc(idx);  // Cannot overrun because we had appended a #0 to string
+      #13: if (idx < len-1) and (chArray[idx+1] = #10) then inc(idx);
-      #10: if (chArray[idx+1] = #13) then inc(idx);  // Cannot overrun because we had appended a #0 to string
+      #10: if (idx < len-1) and (chArray[idx+1] = #13) then inc(idx);
      else Result := false;
    end;
   {$ENDIF}
  end;
  procedure AppendTxt(NewLine: Boolean; FntStyle: String);
  var
    s: String;
  begin
-    {$IFDEF FPS_NO_LAZUNICODE}
+    ValidXMLText(txt);
-    s := UTF8Encode(txt);
+    if NewLine and (txt = '') then
    {$ELSE}
    s := txt;
    {$ENDIF}
    ValidXMLText(s);
    if NewLine and (s = '') then
      totaltxt := totaltxt + '</text:p><text:p>'
    else
    begin
      if FntStyle = '' then
-        totaltxt := totaltxt + s
+        totaltxt := totaltxt + txt
      else
        totaltxt := totaltxt +
-          '<text:span text:style-name="' + FntStyle + '">' + s + '</text:span>';
+          '<text:span text:style-name="' + FntStyle + '">' + txt + '</text:span>';
      if NewLine then
        totaltxt := totaltxt + '</text:p><text:p>';
    end;
@ -9322,6 +9295,10 @@ begin
  end
  else
  begin
    // Split AValue into separate code points.
    chArray := UTF8CodePoints(AValue);
    len := Length(chArray);
    // No hyperlink, normal text only
    if Length(ACell^.RichTextParams) = 0 then
    begin
@ -9334,73 +9311,33 @@ begin
        (until LO gets fixed).
      }
      totaltxt := '<text:p>';
-      len := Length(AValue);
+      idx := 0;
-      idx := 1;
+      txt := '';
-      while (idx <= len) do
+      while (idx < len) do
      begin
-        case AValue[idx] of
+        if IsNewLine(idx) then
-          #13: begin
+          AppendTxt(true, '')
-                 totaltxt := totaltxt + '</text:p><text:p>';
+        else
-                 if (idx < len) and (AValue[idx+1] = #10) then inc(idx);
+          totaltxt := totaltxt + chArray[idx];
               end;
          #10: begin
                 totaltxt := totaltxt + '</text:p><text:p>';
                 if (idx < len) and (AValue[idx+1] = #13) then inc(idx);
               end;
          else totaltxt := totaltxt + AValue[idx];
        end;
        inc(idx);
      end;
      totaltxt := totaltxt + '</text:p>';
    end else
    begin
      // "Rich-text" formatting
-      txt := '';
+      // Before the first formatted section, chars are having the cell's format
      {$IFDEF FPS_NO_LAZUNICODE}
      wideStr := UTF8Decode(AValue);  // Convert to unicode
      len := Length(wideStr);
      {$ELSE}
      // Split utf8-encoded string into the individual code points and store them
      // in an array of strings.
      // Index 0 of this array is ignored so that indices map to a normal string.
      // An auxiliary #0 is appended so that the case of #13#10 at the end of
      // AValue can be handled without complicated checks.
      SetLength(chArray, Length(AValue));
      i := 0;
      for ch in AValue do
      begin
        chArray[i] := ch;
        inc(i);
      end;
      len := i;
      SetLength(chArray, len+2);
      i := length(chArray)-1;
      chArray[i] := #0;
      dec(i);
      while i > 0 do
      begin
        chArray[i] := chArray[i-1];
        dec(i);
      end;
      chArray[0] := #0;
      {$ENDIF}
      // Before the first formatted section having the cell's format
      totaltxt := '<text:p>';
      rtParam := ACell^.RichTextParams[0];
-      idx := 1;
+      idx := 0;
      txt := '';
      if rtParam.FirstIndex > 1 then
      begin
-        while (idx <= len) and (idx < rtParam.FirstIndex) do
+        while (idx < len) and (idx + 1 < rtParam.FirstIndex) do
        begin
          {$IFDEF FPS_NO_LAZUNICODE}
          ch := wideStr[idx];
          {$ELSE}
          ch := chArray[idx];
          {$ENDIF}
          if IsNewLine(idx) then
            AppendTxt(true, '')
          else
-            txt := txt + ch;
+            txt := txt + chArray[idx];
          inc(idx);
        end;
        if txt <> '' then
@ -9409,7 +9346,7 @@ begin
      txt := '';
      for i := 0 to High(ACell^.RichTextParams) do
      begin
-        // Formatted parts of the string according the RichTextParams
+        // Formatted parts of the string according to the RichTextParams
        rtParam := ACell^.RichTextParams[i];
        fnt := (FWorkbook as TsWorkbook).GetFont(rtParam.FontIndex);
        fntidx := FRichTextFontList.IndexOfObject(fnt);
@ -9418,17 +9355,12 @@ begin
          endidx := ACell^.RichTextParams[i+1].FirstIndex-1
        else
          endidx := len;
-        while (idx <= len) and (idx <= endidx) do
+        while (idx < len) and (idx < endidx) do
        begin
          {$IFDEF FPS_NO_LAZUNICODE}
          ch := wideStr[idx];
          {$ELSE}
          ch := chArray[idx];
          {$ENDIF}
          if IsNewLine(idx) then
            AppendTxt(true, fntName)
          else
-            txt := txt + ch;
+            txt := txt + chArray[idx];
          inc(idx);
        end;
        if txt <> '' then
--- a/components/fpspreadsheet/source/common/fpsutils.pas
+++ b/components/fpspreadsheet/source/common/fpsutils.pas
@ -12,7 +12,6 @@ unit fpsUtils;
 // to do: Remove the declaration UTF8FormatSettings and InitUTF8FormatSettings
 //        when this same modification is in LazUtils of Laz stable
 {$mode objfpc}{$H+}
 interface
@ -242,6 +241,8 @@ function GetFontAsString(AFont: TsFont): String;
 function ISO8601StrToDateTime(s: String): TDateTime;
 function UTF8CodePoints(s: string): TStringArray;
 procedure AppendToStream(AStream: TStream; const AString: String); inline; overload;
 procedure AppendToStream(AStream: TStream; const AString1, AString2: String); inline; overload;
 procedure AppendToStream(AStream: TStream; const AString1, AString2, AString3: String); inline; overload;
@ -260,7 +261,9 @@ var
 implementation
 uses
-  Math, lazutf8, lazfileutils, fpsStrings, fpsReaderWriter;
+  Math, lazutf8, lazfileutils,
  {$IFNDEF FPS_NO_LAZUNICODE}LazUnicode,{$ENDIF}
  fpsStrings, fpsReaderWriter;
 const
  INT_NUM_LETTERS = 26;
@ -3214,6 +3217,44 @@ begin
    Result := Result - GetLocalTimeOffset / MinsPerDay;
 end;
 {@@ ----------------------------------------------------------------------------
  Splits the UTF8-encoded string into its code-points ("characters") and
  returns them as an array of string.
 -------------------------------------------------------------------------------}
 function UTF8CodePoints(s: String): TStringArray;
 const
  BROKEN_UTF8_REPLACEMENT = #$E2#$8E#$95; // Box character. Could also be a '?'.
 var
  n: Integer;
  ch: String;
  P, PEnd: PChar;
  chLen: Integer;
 begin
  if s = '' then
  begin
    Result := nil;
    exit;
  end;
  n := 0;
  SetLength(Result, Length(s));
  P := PChar(s);
  PEnd := P + Length(s);
  while P < PEnd do
  begin
    chLen := UTF8CodePointSize(P);
    if (chLen = 1) and (P^ > #127) then
      ch := BROKEN_UTF8_REPLACEMENT
    else
    begin
      SetLength(ch, chLen);
      Move(P^, ch[1], chLen);
    end;
    Result[n] := ch;
    inc(P, chLen);
    inc(n);
  end;
  SetLength(Result, n);
 end;
 {$PUSH}{$HINTS OFF}
 {@@ Silence warnings due to an unused parameter }
--- a/components/fpspreadsheet/source/fps.inc
+++ b/components/fpspreadsheet/source/fps.inc
@ -31,11 +31,6 @@
  FPS_LAZUTF8. Keep undefined for the current Lazarus version. }
 {.$DEFINE FPS_LAZUTF8}
 { fpspreadsheet requires some functions from unit LazUnicode which was
  added to Lazarus in v1.8. If an older Lazarus version is used define
  FPS_NO_LAZUNICODE. Keep undefined for the current Lazarus version. }
 {.$DEFINE FPS_NO_LAZUNICODE}
 { In order to allow selection of multiple ranges in the WorksheetGrid a
  sufficiently new version of the basic TCustomGrid is required. The required
  property "RangeSelect" was introduced in Lazarus 1.4. In order to compile