From 02281cd15f73bdaae4e125375df6952897a2ce94 Mon Sep 17 00:00:00 2001 From: wp_xxyyzz Date: Tue, 18 Dec 2018 00:07:16 +0000 Subject: [PATCH] fpspreadsheet: Improved auto-format detection (check extension first, check file header when extension test fails). Fix ods reader crashing when extension has been renamed. git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@6760 8e941d3f-bd1b-0410-a28a-d453659cc2b4 --- .../source/common/fpsopendocument.pas | 153 +++++++++--------- .../source/common/fpspreadsheet.pas | 29 ++++ .../fpspreadsheet/source/common/fpsutils.pas | 2 +- 3 files changed, 108 insertions(+), 76 deletions(-) diff --git a/components/fpspreadsheet/source/common/fpsopendocument.pas b/components/fpspreadsheet/source/common/fpsopendocument.pas index 79bc86262..159a07fa6 100644 --- a/components/fpspreadsheet/source/common/fpsopendocument.pas +++ b/components/fpspreadsheet/source/common/fpsopendocument.pas @@ -2613,97 +2613,103 @@ begin XMLStream.Free; end; - ReadFontFaces(Doc.DocumentElement.FindNode('office:font-face-decls')); + if Assigned(Doc) then begin + ReadFontFaces(Doc.DocumentElement.FindNode('office:font-face-decls')); - StylesNode := Doc.DocumentElement.FindNode('office:styles'); - ReadNumFormats(StylesNode); - ReadStyles(StylesNode); - ReadAutomaticStyles(Doc.DocumentElement.FindNode('office:automatic-styles')); - ReadMasterStyles(Doc.DocumentElement.FindNode('office:master-styles')); - FreeAndNil(Doc); + StylesNode := Doc.DocumentElement.FindNode('office:styles'); + ReadNumFormats(StylesNode); + ReadStyles(StylesNode); + ReadAutomaticStyles(Doc.DocumentElement.FindNode('office:automatic-styles')); + ReadMasterStyles(Doc.DocumentElement.FindNode('office:master-styles')); + FreeAndNil(Doc); + end; //process the content.xml file XMLStream := CreateXMLStream; try if UnzipToStream(AStream, 'content.xml', XMLStream) then - ReadXMLStream(Doc, XMLStream); + ReadXMLStream(Doc, XMLStream) + else + raise EFPSpreadsheetReader.CreateFmt(rsDefectiveInternalFileStructure, ['ods']); finally XMLStream.Free; end; - ReadFontFaces(Doc.DocumentElement.FindNode('office:font-face-decls')); - StylesNode := Doc.DocumentElement.FindNode('office:automatic-styles'); - ReadNumFormats(StylesNode); - ReadStyles(StylesNode); + if Assigned(Doc) then begin + ReadFontFaces(Doc.DocumentElement.FindNode('office:font-face-decls')); + StylesNode := Doc.DocumentElement.FindNode('office:automatic-styles'); + ReadNumFormats(StylesNode); + ReadStyles(StylesNode); - BodyNode := Doc.DocumentElement.FindNode('office:body'); - if not Assigned(BodyNode) then - raise EFPSpreadsheet.Create('[TsSpreadOpenDocReader.ReadFromStream] Node "office:body" not found.'); + BodyNode := Doc.DocumentElement.FindNode('office:body'); + if not Assigned(BodyNode) then + raise EFPSpreadsheet.Create('[TsSpreadOpenDocReader.ReadFromStream] Node "office:body" not found.'); - SpreadSheetNode := BodyNode.FindNode('office:spreadsheet'); - if not Assigned(SpreadSheetNode) then - raise EFPSpreadsheet.Create('[TsSpreadOpenDocReader.ReadFromStream] Node "office:spreadsheet" not found.'); + SpreadSheetNode := BodyNode.FindNode('office:spreadsheet'); + if not Assigned(SpreadSheetNode) then + raise EFPSpreadsheet.Create('[TsSpreadOpenDocReader.ReadFromStream] Node "office:spreadsheet" not found.'); - ReadSheets(SpreadsheetNode); - ReadDocumentProtection(SpreadsheetNode); - ReadDateMode(SpreadSheetNode); + ReadSheets(SpreadsheetNode); + ReadDocumentProtection(SpreadsheetNode); + ReadDateMode(SpreadSheetNode); - //process each table (sheet) - TableNode := SpreadSheetNode.FindNode('table:table'); - while Assigned(TableNode) do - begin - nodename := TableNode.Nodename; - // These nodes occur due to leading spaces which are not skipped - // automatically any more due to PreserveWhiteSpace option applied - // to ReadXMLFile - if nodeName <> 'table:table' then + //process each table (sheet) + TableNode := SpreadSheetNode.FindNode('table:table'); + while Assigned(TableNode) do begin - TableNode := TableNode.NextSibling; - continue; - end; + nodename := TableNode.Nodename; + // These nodes occur due to leading spaces which are not skipped + // automatically any more due to PreserveWhiteSpace option applied + // to ReadXMLFile + if nodeName <> 'table:table' then + begin + TableNode := TableNode.NextSibling; + continue; + end; - // Tables with external references contain a copy of the external table - // having the filename as sheet name - which is not valid for fps. - // Since external references are not supported ATM we skip this table. - if TableNode.FindNode('table:table-source') <> nil then begin - TableNode := TableNode.NextSibling; - Continue; - end; + // Tables with external references contain a copy of the external table + // having the filename as sheet name - which is not valid for fps. + // Since external references are not supported ATM we skip this table. + if TableNode.FindNode('table:table-source') <> nil then begin + TableNode := TableNode.NextSibling; + Continue; + end; - sheetName := GetAttrValue(TableNode, 'table:name'); - FWorksheet := TsWorkbook(FWorkbook).GetWorksheetByName(sheetName); + sheetName := GetAttrValue(TableNode, 'table:name'); + FWorksheet := TsWorkbook(FWorkbook).GetWorksheetByName(sheetName); // FWorkSheet := TsWorkbook(FWorkbook).AddWorksheet(sheetName, true); - tablestyleName := GetAttrValue(TableNode, 'table:style-name'); - // Read protection - ReadSheetProtection(TableNode, FWorksheet); - // Collect embedded images - ReadShapes(TableNode); - // Collect column styles used - ReadColumns(TableNode); - // Process each row inside the sheet and process each cell of the row - ReadRowsAndCells(TableNode); - // Read page layout - ReadPageLayout(StylesNode, GetAttrValue(TableNode, 'table:style-name'), - (FWorksheet as TsWorksheet).PageLayout); - // Repeated cols/rows already have been determined. - (FWorksheet as TsWorksheet).PageLayout.SetRepeatedRows( - FRepeatedRows.FirstIndex, FRepeatedRows.LastIndex); - (FWorksheet as TsWorksheet).PageLayout.SetRepeatedCols( - FRepeatedCols.FirstIndex, FRepeatedCols.LastIndex); - // Read print ranges - ReadPrintRanges(TableNode, FWorksheet); - // Apply table style - ApplyTableStyle(FWorksheet, tablestylename); - // Handle columns - ApplyColWidths; - // Page layout - FixCols(FWorksheet); - FixRows(FWorksheet); - // Continue with next table - TableNode := TableNode.NextSibling; - end; //while Assigned(TableNode) + tablestyleName := GetAttrValue(TableNode, 'table:style-name'); + // Read protection + ReadSheetProtection(TableNode, FWorksheet); + // Collect embedded images + ReadShapes(TableNode); + // Collect column styles used + ReadColumns(TableNode); + // Process each row inside the sheet and process each cell of the row + ReadRowsAndCells(TableNode); + // Read page layout + ReadPageLayout(StylesNode, GetAttrValue(TableNode, 'table:style-name'), + (FWorksheet as TsWorksheet).PageLayout); + // Repeated cols/rows already have been determined. + (FWorksheet as TsWorksheet).PageLayout.SetRepeatedRows( + FRepeatedRows.FirstIndex, FRepeatedRows.LastIndex); + (FWorksheet as TsWorksheet).PageLayout.SetRepeatedCols( + FRepeatedCols.FirstIndex, FRepeatedCols.LastIndex); + // Read print ranges + ReadPrintRanges(TableNode, FWorksheet); + // Apply table style + ApplyTableStyle(FWorksheet, tablestylename); + // Handle columns + ApplyColWidths; + // Page layout + FixCols(FWorksheet); + FixRows(FWorksheet); + // Continue with next table + TableNode := TableNode.NextSibling; + end; //while Assigned(TableNode) - FreeAndNil(Doc); + FreeAndNil(Doc); + end; // process the settings.xml file (Note: it does not always exist!) XMLStream := CreateXMLStream; @@ -2718,9 +2724,6 @@ begin XMLStream.Free; end; - // Convert formulas from OpenDocument to ExcelA1 dialect -// FixFormulas; - // Active sheet if FActiveSheet <> '' then sheet := (FWorkbook as TsWorkbook).GetWorksheetByName(FActiveSheet) else diff --git a/components/fpspreadsheet/source/common/fpspreadsheet.pas b/components/fpspreadsheet/source/common/fpspreadsheet.pas index dc0903799..53891bbd2 100644 --- a/components/fpspreadsheet/source/common/fpspreadsheet.pas +++ b/components/fpspreadsheet/source/common/fpspreadsheet.pas @@ -8691,18 +8691,47 @@ procedure TsWorkbook.ReadFromFile(AFileName: string; APassword: String = ''; AParams: TsStreamParams = []); var success: Boolean; + fmtID: TsSpreadFormatID; fileFormats: TsSpreadFormatIDArray; i: Integer; + found: Boolean; begin if not FileExists(AFileName) then raise EFPSpreadsheetReader.CreateFmt(rsFileNotFound, [AFileName]); + // First try to determine file format from the extension + if GetFormatFromFileName(AFilename, fmtID) then begin + try + ReadFromFile(AFileName, fmtID, APassword, AParams); + exit; + except + // format does not match. We must continue with rest of procedure + end; + end else + fmtID := MaxInt; + // Try to get file format from file header GetFormatFromFileHeader(AFileName, fileformats); if Length(fileformats) = 0 then // If not successful use formats defined by extension fileFormats := GetSpreadFormatsFromFileName(faRead, AFileName); + // Remove already tested format + found := false; + i := 0; + while (i <= High(fileFormats)) do begin + if fileFormats[i] = fmtID then begin + found := true; + inc(i); + while (i <= High(fileFormats)) do begin + fileFormats[i-1] := fileFormats[i]; + inc(i); + end; + end else + inc(i); + end; + if found then SetLength(fileFormats, Length(fileFormats)-1); + // No file format found for this file --> error if Length(fileformats) = 0 then raise EFPSpreadsheetReader.CreateFmt(rsReaderNotFound, [AFileName]); diff --git a/components/fpspreadsheet/source/common/fpsutils.pas b/components/fpspreadsheet/source/common/fpsutils.pas index f045b75d8..f1c31accd 100644 --- a/components/fpspreadsheet/source/common/fpsutils.pas +++ b/components/fpspreadsheet/source/common/fpsutils.pas @@ -1370,7 +1370,7 @@ var fileformats: TsSpreadFormatIDArray; begin fileFormats := GetSpreadFormatsFromFileName(faRead, AFileName, ord(sfExcel8)); - Result := (Length(fileFormats) > 0) and (fileFormats[0] <= sfidUnknown); + Result := (Length(fileFormats) > 0); // and (fileFormats[0] <= sfidUnknown); // wp - removed for new format detection if Result then AFormatID := fileFormats[0]; end;