From a6900014cf90ddea19ea2ba6e0a2ee4e7fb2df1f Mon Sep 17 00:00:00 2001 From: wp_xxyyzz Date: Thu, 31 May 2018 16:22:41 +0000 Subject: [PATCH] fpspreadsheet: Use a StringHashTable for the SST needed when writing BIFF8 (significant speed-up of files with many strings, but still considerably slower than BIFF5 and BIFF2). git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@6447 8e941d3f-bd1b-0410-a28a-d453659cc2b4 --- .../source/common/fpspreadsheet.pas | 4 +- .../fpspreadsheet/source/common/fpsutils.pas | 55 ++++ .../fpspreadsheet/source/common/xlsbiff8.pas | 265 ++---------------- 3 files changed, 85 insertions(+), 239 deletions(-) diff --git a/components/fpspreadsheet/source/common/fpspreadsheet.pas b/components/fpspreadsheet/source/common/fpspreadsheet.pas index 42afbb075..f1d621992 100644 --- a/components/fpspreadsheet/source/common/fpspreadsheet.pas +++ b/components/fpspreadsheet/source/common/fpspreadsheet.pas @@ -7892,9 +7892,9 @@ begin end; -{------------------------------------------------------------------------------} +{==============================================================================} { TsWorkbook } -{------------------------------------------------------------------------------} +{==============================================================================} {@@ ---------------------------------------------------------------------------- Helper method called before reading the workbook. Clears the error log. diff --git a/components/fpspreadsheet/source/common/fpsutils.pas b/components/fpspreadsheet/source/common/fpsutils.pas index db1b71c2f..d9dbf5a89 100644 --- a/components/fpspreadsheet/source/common/fpsutils.pas +++ b/components/fpspreadsheet/source/common/fpsutils.pas @@ -179,6 +179,10 @@ function AnalyzeCompareStr(AString: String; out ACompareOp: TsCompareOperation): procedure FixLineEndings(var AText: String; var ARichTextParams: TsRichTextParams); function RandomString(ALen: Integer): String; function SameRichTextParams(ARtp1, ARtp2: TsRichTextparams): Boolean; +function CombineTextAndRichTextParams(AText: String; + ARichText: TsRichTextParams): String; +procedure SplitTextAndRichTextParams(AValue: String; + out AText: String; out ARichText: TsRichTextParams); function SplitStr(const AText: String; ADelimiter: Char): TStringArray; function UnquoteStr(AString: String): String; @@ -2195,6 +2199,57 @@ begin Result := true; end; +{@@ ---------------------------------------------------------------------------- + Append the rich-text parameters to the bare text. Needed for StringHashList. +-------------------------------------------------------------------------------} +function CombineTextAndRichTextParams(AText: String; + ARichText: TsRichTextParams): String; +var + i: Integer; +begin + Result := AText; + if Length(ARichText) > 0 then begin + Result := Format('%s|@|%d,%d,%d', [ + Result, ARichText[0].FirstIndex, ARichText[0].FontIndex, ARichText[0].HyperlinkIndex + ]); + for i:=1 to High(ARichText) do + Result := Format('%s;%d,%d,%d', [ + Result, ARichText[i].FirstIndex, ARichText[i].FontIndex, ARichText[i].HyperlinkIndex + ]); + end; +end; + +{@@ ---------------------------------------------------------------------------- + Split text and rich-text parameters from the combined string needed for + StringHashList +-------------------------------------------------------------------------------} +procedure SplitTextAndRichTextParams(AValue: String; out AText: String; + out ARichText: TsRichTextParams); +const + SEPARATOR = '|@|'; +var + p: Integer; + arr1, arr2: TStringArray; + i: Integer; +begin + p := pos(SEPARATOR, AValue); + if p = 0 then begin + AText := AValue; + SetLength(ARichText, 0); + end else + begin + AText := Copy(AValue, 1, p-1); + arr1 := SplitStr(Copy(AValue, p+Length(SEPARATOR), MaxInt), ';'); + SetLength(ARichText, Length(arr1)); + for i := 0 to Length(arr1)-1 do begin + arr2 := SplitStr(arr1[i], ','); + ARichText[i].FirstIndex := StrToInt(arr2[0]); + ARichText[i].FontIndex := StrToInt(arr2[1]); + ARichText[i].HyperlinkIndex := StrToInt(arr2[2]); + end; + end; +end; + {@@ ---------------------------------------------------------------------------- Splits a string at the specified delimiters into individual strings and passes them in an array. diff --git a/components/fpspreadsheet/source/common/xlsbiff8.pas b/components/fpspreadsheet/source/common/xlsbiff8.pas index 2ca742d03..a213f9662 100644 --- a/components/fpspreadsheet/source/common/xlsbiff8.pas +++ b/components/fpspreadsheet/source/common/xlsbiff8.pas @@ -54,7 +54,7 @@ unit xlsbiff8; interface uses - Classes, SysUtils, fpcanvas, DateUtils, contnrs, lazutf8, + Classes, SysUtils, fpcanvas, DateUtils, contnrs, lazutf8, stringhashlist, fpstypes, xlscommon, {$ifdef USE_NEW_OLE} fpolebasic, @@ -185,7 +185,7 @@ type TsSpreadBIFF8Writer = class(TsSpreadBIFFWriter) private - FSharedStringTable: TStringList; + FSharedStringTable: TStringHashList; FNumStrings: DWord; FBiff8ExternBooks: TsBIFF8ExternbookList; FBiff8ExternSheets: TsBIFF8ExternSheetList; @@ -786,12 +786,14 @@ begin { Destroy shared string table } if Assigned(FSharedStringTable) then + { begin for j := FSharedStringTable.Count-1 downto 0 do if FSharedStringTable.Objects[j] <> nil then FSharedStringTable.Objects[j].Free; + } FSharedStringTable.Free; - end; + //end; if Assigned(FCommentList) then FCommentList.Free; @@ -799,7 +801,6 @@ begin inherited; end; - {@@ ---------------------------------------------------------------------------- Populates the reader's default palette using the BIFF8 default colors. -------------------------------------------------------------------------------} @@ -1109,7 +1110,8 @@ begin then FCommentList := TObjectList.Create else FCommentList.Clear; - if Assigned(FSharedStringTable) then FreeAndNil(FSharedStringTable); + if Assigned(FSharedStringTable) then + FreeAndNil(FSharedStringTable); while (not SectionEOF) do begin { Read the record header } @@ -1691,6 +1693,8 @@ var LString: String; ContinueIndicator: WORD; rtParams: TsRichTextParams; + p: Pointer; + n: Integer; ms: TMemoryStream; begin //Reads the shared string table, only compatible with BIFF8 @@ -1705,7 +1709,7 @@ begin Items := DWordLEtoN(AStream.ReadDWord); Dec(PendingRecordSize, 8); end else begin - //A second record must not happend. Garbage so skip. + //A second record must not happen. Garbage so skip. Exit; end; @@ -1806,6 +1810,7 @@ begin end else cell := (FWorksheet as TsWorksheet).AddCell(ARow, ACol); + { Read text from shared string table entry } (FWorksheet as TsWorksheet).WriteText(cell, FSharedStringTable.Strings[SSTIndex]); { Add attributes } @@ -2558,7 +2563,6 @@ function DoCollectSheetsWith3dRefs(ANode: TsExprNode; AData: Pointer): Boolean; var sheetlist: TsBIFF8ExternSheetList; sheetIdx, sheetIdx1, sheetIdx2: Integer; - workbook: TsWorkbook; begin sheetlist := TsBIFF8ExternSheetList(AData); if (ANode is TsCellExprNode) and TsCellExprNode(ANode).Has3DLink then @@ -2568,7 +2572,6 @@ begin end else if (ANode is TsCellRangeExprNode) and TsCellRangeExprNode(ANode).Has3DLink then begin - workbook := TsCellRangeExprNode(ANode).Workbook as TsWorkbook; sheetIdx1 := TsCellRangeExprNode(ANode).GetSheetIndex(1); sheetIdx2 := TsCellRangeExprNode(ANode).GetSheetIndex(2); for sheetIdx := sheetIdx1 to sheetIdx2 do @@ -2591,40 +2594,7 @@ procedure TsSpreadBIFF8Writer.CollectExternData; for formula in ASheet.Formulas do formula^.Parser.IterateNodes(@DoCollectSheetsWith3dRefs, FBiff8ExternSheets); end; -{ - procedure DoCollectForSheet(ASheet: TsWorksheet); - var - cell: PCell; - parser: TsExpressionParser; - rpn: TsRPNFormula; - fe: TsFormulaElement; - j: Integer; - begin - for cell in ASheet.Cells do - begin - if not HasFormula(cell) then - Continue; - if (cell^.Flags * [cf3dFormula, cfCalculated] = [cfCalculated]) then - Continue; - parser := TsSpreadsheetParser.Create(ASheet); - try - parser.Expression := cell^.FormulaValue; - rpn := parser.RPNFormula; - for j:=0 to High(rpn) do - begin - fe := rpn[j]; - if fe.ElementKind in [fekCell3d, fekCellRef3d, fekCellRange3d] then - FBiff8ExternSheets.AddSheets('', nil, fe.Sheet, fe.Sheet2); - // FIXME: '' --> supporting only internal 3d links so far - end; - finally - parser.Free; - rpn := nil; - end; - end; - end; - } var book: TsWorkbook; sheet: TsWorksheet; @@ -2697,17 +2667,13 @@ function TsSpreadBIFF8Writer.IndexOfSharedString(const AText: String; const ARichTextParams: TsRichTextParams): Integer; var s: String; - obj: TObject; begin if FSharedStringTable <> nil then - for Result := 0 to FSharedStringTable.Count-1 do begin - s := FSharedStringTable.Strings[Result]; - obj := FSharedStringTable.Objects[Result]; -// if (s = AText) and (TsRichTextParams(obj) = ARichTextParams) - if (s = AText) and SameRichTextParams(TsRichTextParams(obj), ARichTextParams) - then exit; - end; - Result := -1; + begin + s := CombineTextAndRichTextParams(AText, ARichTextParams); + Result := FSharedStringTable.Find(s); + end else + Result := -1; end; {@@ ---------------------------------------------------------------------------- @@ -2852,9 +2818,10 @@ var cell: PCell; sheet: TsWorksheet; book: TsWorkbook absolute AWorkbook; + s: String; begin FNumStrings := 0; - FSharedStringTable := TStringList.Create; + FSharedStringTable := TStringHashList.Create(true); for i:=0 to book.GetWorksheetCount-1 do begin @@ -2868,7 +2835,8 @@ begin inc(FNumStrings); if idx > -1 then Continue; - FSharedStringTable.AddObject(cell^.UTF8StringValue, TObject(cell^.RichTextParams)); + s := CombineTextAndRichTextParams(cell^.UTF8StringValue, cell^.RichTextParams); + FSharedStringTable.Add(s); end; end; end; @@ -3794,12 +3762,6 @@ begin { BIFF record header } rec.RecordID := WordToLE(IfThen(nRuns > 0, INT_EXCEL_ID_RSTRING, INT_EXCEL_ID_LABEL)); - (* - recSize := SizeOf(TBiff8_LabelRecord) - SizeOf(TsBiffHeader) + L*SizeOf(WideChar); - if nRuns > 0 then - inc(recSize, SizeOf(Word) + nRunms * SizeOf(TBiff8_RichTextFormattingRun); - if n - *) rec.RecordSize := SizeOf(TBiff8_LabelRecord) - SizeOf(TsBiffHeader) + L *SizeOf(WideChar); if nRuns > 0 then inc(rec.RecordSize, SizeOf(Word) + nRuns * SizeOf(TBiff8_RichTextFormattingRun)); @@ -4116,19 +4078,6 @@ begin AStream.WriteWord(WordToLE(c)); Result := 4; end; - (* -function TsSpreadBIFF8Writer.WriteRPNCellAddress3D(AStream: TStream; - ASheet, ARow, ACol: Cardinal; AFlags: TsRelFlags): Word; -begin - // Next line is a simplification: We should write the index of the sheet - // in the REF record here, but these are arranged in the same order as the - // sheets. --> MUST BE RE-DONE ONCE SHEET RANGES ARE ALLOWED. - AStream.WriteWord(WordToLE(ASheet)); - - // Write row/column address - Result := 2 + WriteRPNCellAddress(AStream, ARow, ACol, AFlags); -end; - *) {@@ ---------------------------------------------------------------------------- Writes row and column offset needed in RPN formulas (unsigned integers!) @@ -4315,7 +4264,7 @@ procedure TsSpreadBIFF8Writer.WriteSST(AStream: TStream); var sizePos: Int64; bytesWritten, totalBytesWritten: Integer; - i, j: Integer; + i, j, n: Integer; rtParams: TsRichTextParams; bytesAvail: Integer; isASCII: Boolean; @@ -4347,7 +4296,9 @@ begin totalBytesWritten := 8; for i:=0 to FSharedStringTable.Count-1 do begin - s := FixLineEnding(FSharedStringTable.Strings[i]); + SplitTextAndRichTextParams(FSharedStringTable.List[i]^.Key, s, rtParams); + + s := FixLineEnding(s); isASCII := Is8BitString(s); if isASCII then begin @@ -4369,12 +4320,9 @@ begin end; end; - SetLength(rtParams, Length(TsRichTextParams(FSharedStringTable.Objects[i]))); - for j := 0 to High(rtParams) do begin - rtParams[j] := TsRichTextParams(FSharedStringTable.Objects[i])[j]; - // Index of new font. Be aware of font #4 missing in BIFF! + for j := 0 to High(rtParams) do + // Be aware of font #4 missing in BIFF! if rtParams[j].FontIndex >= 4 then inc(rtParams[j].FontIndex); - end; textIndex := 1; rtIndex := 0; @@ -4404,163 +4352,6 @@ begin FixRecordSize(AStream, sizePos, totalBytesWritten); end; - -(* -procedure TsSpreadBIFF8Writer.WriteSST(AStream: TStream); -type - TBiff8RichTextParam = packed record - FirstIndex: Word; - FontIndex: Word; - end; - TBiff8RichTextParams = array of TBiff8RichTextParam; -var - i, j: Integer; - pSize: Int64; - s: string; - ws: WideString; - rtParams: TsRichTextParams; - biffRtParams: TBiff8RichTextParams; - bytesAvail, bytesToWrite, bytesWritten, totalBytesWritten: Integer; - hasRtp: Boolean; - hdrSize: Integer; - flags: Byte; - startIndex: Integer; - needCONTINUE: Boolean; - - procedure EndRecord; - var - p: Int64; - begin - p := AStream.Position; - AStream.Position := pSize; - AStream.WriteWord(WordToLE(totalBytesWritten)); - AStream.Position := p; - end; - - procedure BeginCONTINUERecord; - begin - AStream.WriteWord(WordToLE(INT_EXCEL_ID_CONTINUE)); - pSize := AStream.Position; - AStream.WriteWord(0); - end; - -begin - if FSharedStringTable.Count = 0 then - exit; - - { Write BIFF header } - AStream.WriteWord(WordToLE(INT_EXCEL_ID_SST)); - pSize := AStream.Position; - AStream.WriteWord(0); // Size of record - will be written later - - { Number of strings in workbook } - AStream.WriteDWord(DWordToLE(FNumStrings)); - - { Number of strings in SST } - AStream.WriteDWord(DWordToLE(FSharedStringTable.Count)); - - { Here the strings plus rich-text parameters are following. This is a bit - complicated because usually there are many strings, but each record can - hold only 8224 bytes (MAX_BYTES_IN_RECORD) which requires additional - CONTINUE records. } - - totalBytesWritten := 8; - - for i:=0 to FSharedStringTable.Count-1 do - begin - // Assemble the string to be written in a buffer stream - s := FixLineEnding(FSharedStringTable.Strings[i]); - ws := WideStringToLE(UTF8Decode(s)); - rtParams := TsRichTextParams(FSharedStringTable.Objects[i]); - SetLength(biffRtParams, Length(rtParams)); - for j := 0 to High(biffRtParams) do begin - biffRtParams[j].FirstIndex := WordToLE(rtParams[j].FirstIndex) - 1; - // character index is 0-based in file, but 1-based in fps. - biffRtParams[j].FontIndex := WordToLE(rtParams[j].FontIndex); - end; - hasRtp := Length(rtParams) > 0; - hdrsize := IfThen(hasRtp, 3+2, 3); - - bytesAvail := MAX_BYTES_IN_RECORD - totalBytesWritten; - - // (1) String header - // String header plus 1st character do not fit into current record - // ---> move everything to a CONTINUE record - if bytesAvail < hdrsize + SizeOf(WideChar) then begin - EndRecord; - BeginCONTINUERecord; // Begins a CONTINUE record - end else begin - { Write string length } - AStream.WriteWord(WordToLE(Length(ws))); - { Write string flags byte } - flags := 1; // 1 = uncompressed data (= wide chars) - if hasRtp then inc(flags, 8); // 8 = has rich-text formatting runs - inc(totalbytesWritten, 3); - AStream.Writebyte(flags); - { Write number of rich-text formatting runs } - if hasRtp then begin - AStream.WriteWord(WordToLE(Length(rtParams))); - inc(totalBytesWritten, 2); - end; - end; - - // (2) String characters - bytesAvail := MAX_BYTES_IN_RECORD - totalBytesWritten; - if odd(bytesAvail) then dec(bytesAvail); // Split between widechars - bytesToWrite := Length(ws) * SizeOf(WideChar); - needCONTINUE := bytesToWrite > bytesAvail; - startIndex := 1; - while needCONTINUE do begin - // Fill remainder of current record - bytesWritten := AStream.Write(ws[startIndex], bytesAvail); - inc(totalBytesWritten, bytesWritten); - EndRecord; - BeginCONTINUERecord; - // Write flag byte because string is split - AStream.WriteByte(1); - totalBytesWritten := 1; - startIndex := StartIndex + bytesWritten div 2; - bytesAvail := MAX_BYTES_IN_RECORD - totalBytesWritten; - if odd(bytesAvail) then dec(bytesAvail); - bytesToWrite := (Length(ws) - startIndex + 1) * SizeOf(WideChar); - needCONTINUE := bytesToWrite > bytesAvail; - end; - if bytesToWrite > 0 then begin - bytesWritten := AStream.Write(ws[startIndex], bytesToWrite); - inc(totalBytesWritten, bytesWritten); - end; - - // (3) Rich-text formatting runs - bytesAvail := MAX_BYTES_IN_RECORD - totalBytesWritten; - // Make sure to split between runs - bytesAvail := (bytesAvail div 4) * 4; - bytesToWrite := Length(biffRtParams) * 4; // 4 = size of formatting run - needCONTINUE := bytesToWrite > bytesAvail; - startIndex := 0; - while needCONTINUE do begin - // Fill remainder of current record - bytesWritten := AStream.Write(biffRtParams[startIndex], bytesAvail); - inc(totalBytesWritten, bytesWritten); - EndRecord; - BeginCONTINUERecord; - totalBytesWritten := 0; - startIndex := startIndex + bytesWritten div 4; - bytesAvail := MAX_BYTES_IN_RECORD - totalBytesWritten; - bytesAvail := (bytesAvail div 4) * 4; - bytesToWrite := (Length(biffRtParams) - startIndex) * 4; - needCONTINUE := bytesToWrite > bytesAvail; - end; - if bytesToWrite > 0 then begin - bytesWritten := AStream.Write(biffRtParams[startIndex], bytesToWrite); - inc(totalBytesWritten, bytesWritten); - end; - end; - - // Write size word of the current record - EndRecord; -end; -*) - {@@ ---------------------------------------------------------------------------- Helper function for writing a string with 8-bit length. Overridden version for BIFF8. Called for writing rpn formula string tokens. @@ -5081,9 +4872,9 @@ begin end; -{------------------------------------------------------------------------------} +{==============================================================================} { Global utilities } -{------------------------------------------------------------------------------} +{==============================================================================} procedure InitBIFF8Limitations(out ALimitations: TsSpreadsheetFormatLimitations); begin InitBiffLimitations(ALimitations);