From fbd714c976f91829f853d394597fa2581a1cddc3 Mon Sep 17 00:00:00 2001 From: wp_xxyyzz Date: Tue, 5 Jul 2022 22:56:39 +0000 Subject: [PATCH] fpspreadsheet: Refactor xlsx reader for comments and hyperlinks. git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@8339 8e941d3f-bd1b-0410-a28a-d453659cc2b4 --- .../fpspreadsheet/source/common/xlsxooxml.pas | 213 ++++++++---------- 1 file changed, 94 insertions(+), 119 deletions(-) diff --git a/components/fpspreadsheet/source/common/xlsxooxml.pas b/components/fpspreadsheet/source/common/xlsxooxml.pas index e2e4380c1..8759c8b07 100644 --- a/components/fpspreadsheet/source/common/xlsxooxml.pas +++ b/components/fpspreadsheet/source/common/xlsxooxml.pas @@ -68,8 +68,6 @@ type procedure ApplyCellFormatting(ACell: PCell; XfIndex: Integer); procedure ApplyHyperlinks(AWorksheet: TsBasicWorksheet); function CreateXMLStream: TStream; - function FindCommentsFileName(ANode: TDOMNode): String; - function MakeXLPath(AFileName: String): String; protected procedure ReadActiveSheet(ANode: TDOMNode; out ActiveSheetIndex: Integer); procedure ReadBorders(ANode: TDOMNode); @@ -424,6 +422,7 @@ type TRelationship = class RelID: String; Target: String; + Schema: String; end; TRelationshipList = class(TFPList) @@ -432,7 +431,7 @@ type procedure SetItem(AIndex: Integer; AValue: TRelationship); public destructor Destroy; override; - function Add(ARelID, ATarget: String): TRelationship; + function Add(ARelID, ASchema, ATarget: String): TRelationship; procedure Clear; procedure Delete(AIndex: Integer); function FindTarget(ARelID: String): String; @@ -448,6 +447,8 @@ type DrawingRels: TRelationshipList; VmlDrawing_File: String; VmlDrawingRels: TRelationshipList; + HyperlinkRels: TRelationshipList; + Comments_File: String; constructor Create; destructor Destroy; override; end; @@ -556,6 +557,24 @@ begin Result[2] := 'F'; // --> "FFrrggbb" end; +{ The rels files store relative file paths (e.g. ../media/image1.png). + This function makes sure that the file path begins with 'xl'. This filename + can be used by the unzipper to extract the file from the xlsx (zip) archive. } +function MakeXLPath(AFileName: String): String; +begin + Result := AFileName; + if Length(AFileName) <= 3 then + exit; + + if (Result[1] = '.') and (Result[2] = '.') then + begin + Result[1] := 'x'; + Result[2] := 'l'; + end else + if not ((Result[1] ='x') and (Result[2] = 'l') and (Result[3] = '/')) then + Result := 'xl/' + AFileName; +end; + function RelsFileFor(AFileName: String): String; var path: String; @@ -691,19 +710,22 @@ begin end; +{ TRelationshipList } + destructor TRelationshipList.Destroy; begin Clear; inherited; end; -function TRelationshipList.Add(ARelID, ATarget: String): TRelationship; +function TRelationshipList.Add(ARelID, ASchema, ATarget: String): TRelationship; var rel: TRelationship; idx: Integer; begin rel := TRelationship.Create; rel.RelID := ARelID; + rel.Schema := ASchema; rel.Target := ATarget; idx := inherited Add(rel); Result := Items[idx]; @@ -751,12 +773,15 @@ begin end; +{ TSheetData } + constructor TSheetData.Create; begin inherited; SheetRels := TRelationshipList.Create; DrawingRels := TRelationshipList.Create; VmlDrawingRels := TRelationshipList.Create; + HyperlinkRels := TRelationshipList.Create; end; destructor TSheetData.Destroy; @@ -764,6 +789,7 @@ begin SheetRels.Free; DrawingRels.Free; VmlDrawingRels.Free; + HyperlinkRels.Free; inherited; end; @@ -917,41 +943,6 @@ begin Result := TMemoryStream.Create; end; -function TsSpreadOOXMLReader.FindCommentsFileName(ANode: TDOMNode): String; -var - s: String; -begin - while ANode <> nil do - begin - s := GetAttrValue(ANode, 'Type'); - if s = SCHEMAS_COMMENTS then - begin - Result := ExtractFileName(GetAttrValue(ANode, 'Target')); // ??? ExtractFileName here ???? - exit; - end; - ANode := ANode.NextSibling; - end; - Result := ''; -end; - -{ The rels files store relative file paths (e.g. ../media/image1.png). - This function makes sure that the file path begins with 'xl'. This filename - can be used by the unzipper to extract the file from the xlsx (zip) archive. } -function TsSpreadOOXMLReader.MakeXLPath(AFileName: String): String; -begin - Result := AFileName; - if Length(AFileName) <= 3 then - exit; - - if (Result[1] = '.') and (Result[2] = '.') then - begin - Result[1] := 'x'; - Result[2] := 'l'; - end else - if not ((Result[1] ='x') and (Result[2] = 'l') and (Result[3] = '/')) then - Result := 'xl/' + AFileName; -end; - procedure TsSpreadOOXMLReader.ReadActiveSheet(ANode: TDOMNode; out ActiveSheetIndex: Integer); var @@ -3124,6 +3115,7 @@ procedure TsSpreadOOXMLReader.ReadHyperlinks(ANode: TDOMNode; var node: TDOMNode; nodeName: String; + sheetData: TSheetData; hyperlinkData: THyperlinkListData; s: String; @@ -3140,6 +3132,8 @@ var end; begin + sheetData := TSheetData(FSheetList[TsWorksheet(AWorksheet).Index]); + if Assigned(ANode) then begin nodename := ANode.NodeName; if (nodename = 'hyperlinks') or (nodename = 'x:hyperlinks') then @@ -3152,7 +3146,8 @@ begin hyperlinkData := THyperlinkListData.Create; hyperlinkData.CellRef := GetAttrValue(node, 'ref'); hyperlinkData.ID := GetAttrValue(node, 'r:id'); - hyperlinkData.Target := ''; + if hyperlinkData.ID <> '' then + hyperlinkData.Target := sheetData.HyperlinkRels.FindTarget(hyperlinkData.ID); hyperlinkData.TextMark := GetAttrValue(node, 'location'); hyperlinkData.Display := GetAttrValue(node, 'display'); hyperlinkData.Tooltip := GetAttrValue(node, 'tooltip'); @@ -3161,39 +3156,10 @@ begin FHyperlinkList.Add(hyperlinkData); node := node.NextSibling; end; - end else - if (nodename = 'Relationship') or (nodename = 'x:Relationship') then - begin - node := ANode; - while Assigned(node) do - begin - nodename := node.NodeName; - if (nodename = 'Relationship') or (nodename = 'x:Relationship') then - begin - s := GetAttrValue(node, 'Type'); - if s = SCHEMAS_HYPERLINK then - begin - s := GetAttrValue(node, 'Id'); - if s <> '' then - begin - hyperlinkData := FindHyperlinkID(s); - if hyperlinkData <> nil then begin - s := GetAttrValue(node, 'Target'); - if s <> '' then hyperlinkData.Target := s; - s := GetAttrValue(node, 'TargetMode'); - if s <> 'External' then // Only "External" accepted! - begin - hyperlinkData.Target := ''; - hyperlinkData.TextMark := ''; - end; - end; - end; - end; - end; - node := node.NextSibling; - end; end; end; + + ApplyHyperlinks(AWorksheet); end; procedure TsSpreadOOXMLReader.ReadMedia(AStream: TStream); @@ -3531,7 +3497,7 @@ var XMLStream: TStream; doc: TXMLDocument; node: TDOMNode; - relID, relTarget: String; + relID, relSchema, relTarget: String; nodeName: String; begin // Avoid adding items again and again to the same list if this function is @@ -3552,9 +3518,10 @@ begin if nodeName = 'Relationship' then begin relID := GetAttrValue(node, 'Id'); + relSchema := GetAttrValue(node, 'Type'); relTarget := GetAttrValue(node, 'Target'); if (relID <> '') and (relTarget <> '') then - (ARelsList as TRelationshipList).Add(relID, relTarget); + (ARelsList as TRelationshipList).Add(relID, relSchema, relTarget); end; node := node.NextSibling; end; @@ -3952,7 +3919,7 @@ var relID: String; sheetRelsFile: String; rels: TRelationshipList; - j: Integer; + i: Integer; begin sheetIndex := TsWorksheet(AWorksheet).Index; sheetData := TSheetData(FSheetList[sheetIndex]); @@ -3964,6 +3931,8 @@ begin nodeName := ANode.NodeName; if nodeName = 'drawing' then begin + // This node points to the drawing.xml file with parameters for embedded + // images. rels := TRelationshipList.Create; try ReadRels(AStream, sheetRelsFile, rels); @@ -3976,6 +3945,7 @@ begin end else if nodeName = 'legacyDrawingHF' then begin + // This is the node pointer to parameters for heater/footer images. rels := TRelationshipList.Create; try ReadRels(AStream, sheetRelsFile, rels); @@ -3985,6 +3955,40 @@ begin finally rels.Free; end; + end else + if nodeName = 'legacyDrawing' then + begin + // This node is for comment size & position. We do not support this. + // But it indicates the presence of comments and extract the name of the + // comments.xml file from the sheet.xml.rels file + rels := TRelationshipList.Create; + try + ReadRels(AStream, sheetRelsFile, rels); + for i := 0 to rels.Count-1 do + if rels[i].Schema = SCHEMAS_COMMENTS then + begin + sheetData.Comments_File := rels[i].Target; + break; + end; + finally + rels.Free; + end; + end else + if nodeName = 'hyperlinks' then + begin + // This node contains the hyperlink data and will be handled separately. + // It contains also the relationship ids to external files listed in + // the sheet.xml.rels file. We read these relationships here and store + // them in the HyperlinkRels of the sheetdata. + rels := TRelationshipList.Create; + try + ReadRels(AStream, sheetRelsfile, rels); + for i := 0 to rels.Count-1 do + if rels[i].Schema = SCHEMAS_HYPERLINK then + sheetData.HyperlinkRels.Add(rels[i].RelID, rels[i].Schema, rels[i].Target); + finally + rels.Free; + end; end; ANode := ANode.NextSibling; end; @@ -4340,12 +4344,11 @@ procedure TsSpreadOOXMLReader.ReadFromStream(AStream: TStream; APassword: String = ''; AParams: TsStreamParams = []); var Doc : TXMLDocument; - RelsNode: TDOMNode; i, j: Integer; fn: String; - fn_comments: String; XMLStream: TStream; actSheetIndex: Integer; + sheetData: TSheetData; function Doc_FindNode(ANodeName: String): TDOMNode; begin @@ -4381,7 +4384,7 @@ begin ReadFileVersion(Doc_FindNode('fileVersion')); ReadDateMode(Doc_FindNode('workbookPr')); ReadWorkbookProtection(Doc_FindNode('workbookProtection')); - ReadSheetList(Doc_FindNode('sheets')); + ReadSheetList(Doc_FindNode('sheets')); // This creates the worksheets! ReadSheetRels(AStream); //ReadDefinedNames(Doc.DocumentElement.FindNode('definedNames')); -- don't read here because sheets do not yet exist ReadActiveSheet(Doc_FindNode('bookViews'), actSheetIndex); @@ -4424,15 +4427,13 @@ begin XMLStream.Free; end; - // read worksheets + // read worksheet contents for i:=0 to FSheetList.Count-1 do begin - { - // Create worksheet - FWorksheet := (FWorkbook as TsWorkbook).AddWorksheet(TSheetData(FSheetList[i]).Name, true); - } + sheetData := TSheetData(FSheetList[i]); + // Worksheets are already created... - FWorksheet := (FWorkbook as TsWorkbook).GetWorksheetByName(TSheetData(FSheetList[i]).Name); - if TSheetData(FSheetList[i]).Hidden then + FWorksheet := (FWorkbook as TsWorkbook).GetWorksheetByName(sheetData.Name); + if sheetData.Hidden then FWorksheet.Options := FWorksheet.Options + [soHidden]; // unzip sheet file @@ -4452,6 +4453,7 @@ begin FSharedFormulaBaseList.Clear; // Sheet data, formats, etc. + ReadSheetRels(AStream, Doc.DocumentElement, FWorksheet); ReadSheetPr(Doc_FindNode('sheetPr'), FWorksheet); ReadDimension(Doc_FindNode('dimension'), FWorksheet); ReadSheetViews(Doc_FindNode('sheetViews'), FWorksheet); @@ -4468,44 +4470,20 @@ begin ReadColRowBreaks(Doc_FindNode('rowBreaks'), FWorksheet); ReadColRowBreaks(Doc_FindNode('colBreaks'), FWorksheet); ReadHeaderFooter(Doc_FindNode('headerFooter'), FWorksheet); - ReadSheetRels(AStream, Doc.DocumentElement, FWorksheet); FreeAndNil(Doc); { Comments: The comments are stored in separate "comments.xml" files (n = 1, 2, ...) - The relationship which comment belongs to which sheet file must be - retrieved from the "sheet.xml.rels" file (n = 1, 2, ...). + for each worksheet. + The relationship which comment belongs to which sheet file is contained + in the "sheet.xml.rels" file (n = 1, 2, ...) which already has been + read and stored in the sheetData. The rels file contains also the second part of the hyperlink data. } - fn := OOXML_PATH_XL_WORKSHEETS_RELS + Format('sheet%d.xml.rels', [i+1]); - XMLStream := CreateXMLStream; - try - if UnzipToStream(AStream, fn, XMLStream) then - begin - // Find exact name of comments.xml file - ReadXMLStream(Doc, XMLStream); - RelsNode := Doc_FindNode('Relationship'); - fn_comments := FindCommentsFileName(RelsNode); - // Get hyperlink data - ReadHyperlinks(RelsNode, FWorksheet); - FreeAndNil(Doc); - end else - if (FSheetList.Count = 1) then - // If the workbook has only one sheet then the sheet.xml.rels file - // is missing - fn_comments := 'comments1.xml' - else - // This sheet does not have any cell comments at all - fn_comments := ''; - //continue; - finally - XMLStream.Free; - end; - - // Extract texts from the comments file found and apply to worksheet. - if fn_comments <> '' then + if sheetData.Comments_File <> '' then begin - fn := OOXML_PATH_XL + fn_comments; + // Extract texts from the comments file found and apply to worksheet. + fn := MakeXLPath(sheetData.Comments_File); XMLStream := CreateXMLStream; try if UnzipToStream(AStream, fn, XMLStream) then @@ -4519,9 +4497,6 @@ begin end; end; - // Add hyperlinks to cells - ApplyHyperlinks(FWorksheet); - // Active worksheet if i = actSheetIndex then (FWorkbook as TsWorkbook).SelectWorksheet(FWorksheet as TsWorksheet); @@ -4550,7 +4525,7 @@ begin begin ReadXMLStream(Doc, XMLStream); ReadMetaData(Doc.DocumentElement); - FreeandNil(Doc); + FreeAndNil(Doc); end; finally XMLStream.Free;