From 495fcb92eb39355e71239fc6dcd13eca33c6f582 Mon Sep 17 00:00:00 2001 From: wp_xxyyzz Date: Tue, 18 Dec 2018 13:08:58 +0000 Subject: [PATCH] fpspreadsheet: Redo automatic file format detection: Add method CheckFileFormat to each reader for overriding (check file header or whatever). git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@6764 8e941d3f-bd1b-0410-a28a-d453659cc2b4 --- .../source/common/fpsopendocument.pas | 8 ++ .../source/common/fpspreadsheet.pas | 123 ++++-------------- .../source/common/fpsreaderwriter.pas | 26 +++- .../source/common/fpsxmlcommon.pas | 23 ++++ .../fpspreadsheet/source/common/xlsbiff2.pas | 31 +++++ .../fpspreadsheet/source/common/xlsbiff5.pas | 21 ++- .../fpspreadsheet/source/common/xlsbiff8.pas | 23 +++- .../fpspreadsheet/source/common/xlscommon.pas | 31 +++++ .../fpspreadsheet/source/common/xlsxooxml.pas | 6 + 9 files changed, 190 insertions(+), 102 deletions(-) diff --git a/components/fpspreadsheet/source/common/fpsopendocument.pas b/components/fpspreadsheet/source/common/fpsopendocument.pas index e1e5db9a0..596d6647b 100644 --- a/components/fpspreadsheet/source/common/fpsopendocument.pas +++ b/components/fpspreadsheet/source/common/fpsopendocument.pas @@ -171,6 +171,9 @@ type constructor Create(AWorkbook: TsBasicWorkbook); override; destructor Destroy; override; + { File format detection } + class function CheckFileFormat(AStream: TStream): Boolean; override; + { General reading methods } procedure ReadFromStream(AStream: TStream; APassword: String = ''; AParams: TsStreamParams = []); override; @@ -1159,6 +1162,11 @@ begin end; end; +class function TsSpreadOpenDocReader.CheckFileFormat(AStream: TStream): Boolean; +begin + Result := HasZipHeader(AStream); +end; + function TsSpreadOpenDocReader.ExtractFormatIndexFromStyle(ACellStyleName: String; ACol: Integer): Integer; var diff --git a/components/fpspreadsheet/source/common/fpspreadsheet.pas b/components/fpspreadsheet/source/common/fpspreadsheet.pas index d84a6dbcf..0bb8d58ac 100644 --- a/components/fpspreadsheet/source/common/fpspreadsheet.pas +++ b/components/fpspreadsheet/source/common/fpspreadsheet.pas @@ -8488,8 +8488,7 @@ end; {@@ ---------------------------------------------------------------------------- Helper method for determining the spreadsheet type. Read the first few bytes of a file and determines the spreadsheet type from the characteristic - signature. Only implemented for xls files where several file types have the - same extension + signature. -------------------------------------------------------------------------------} class procedure TsWorkbook.GetFormatFromFileHeader(const AFileName: TFileName; out AFormatIDs: TsSpreadFormatIDArray); @@ -8505,85 +8504,32 @@ begin end; {@@ ---------------------------------------------------------------------------- - Helper method for determining the spreadsheet format. Read the first few bytes - of a stream and determines the spreadsheet type from the characteristic - signature. + Helper method for determining the spreadsheet format. Reads the first + few bytes of a stream and determines the spreadsheet type from the + characteristic signature. -------------------------------------------------------------------------------} class procedure TsWorkbook.GetFormatFromFileHeader(AStream: TStream; out AFormatIDs: TsSpreadFormatIDArray); overload; -const - BIFF2_HEADER: array[0..3] of byte = ( - $09,$00, $04,$00); // they are common to all BIFF2 files that I've seen - BIFF58_HEADER: array[0..7] of byte = ( - $D0,$CF, $11,$E0, $A1,$B1, $1A,$E1); - ZIP_HEADER: array[0..1] of byte = ( - byte('P'), byte('K')); - - function ValidOLEStream(AStream: TStream; AName: String): Boolean; - var - fsOLE: TVirtualLayer_OLE; - begin - AStream.Position := 0; - fsOLE := TVirtualLayer_OLE.Create(AStream); - try - fsOLE.Initialize; - Result := fsOLE.FileExists('/'+AName); - finally - fsOLE.Free; - end; - end; - var - buf: packed array[0..7] of byte = (0,0,0,0,0,0,0,0); - i: Integer; + reader: TsSpreadReaderClass; + fmtIDs: TsSpreadformatIDArray; + i, j: Integer; begin SetLength(AFormatIDs, 0); - if AStream = nil then exit; - // Read first 8 bytes - i := AStream.Read(buf, Length(buf)); - if i < Length(buf) then - exit; - - // Check for zip header of xlsx and ods - if (buf[0] = ZIP_HEADER[0]) and (buf[1] = ZIP_HEADER[1]) then begin - SetLength(AFormatIDs, 2); - AFormatIDs[0] := ord(sfOOXML); - AFormatIDs[1] := ord(sfOpenDocument); - exit; - end; - - // Check for Excel 2 - for i:=0 to High(BIFF2_HEADER) do - if buf[i] = BIFF2_HEADER[i] then - begin - SetLength(AFormatIDs, 1); - AFormatIDs[0] := ord(sfExcel2); - exit; + fmtIDs := GetSpreadFormats(faRead, [ord(sfExcel8)]); + SetLength(AFormatIDs, Length(fmtIDs)); + j := 0; + for i:=0 to High(fmtIDs) do begin + reader := GetSpreadReaderClass(fmtIDs[i]); + if Assigned(reader) and reader.CheckFileFormat(AStream) then begin + AFormatIDs[j] := fmtIDs[i]; + inc(j); end; - - // Check for Excel 5 or 8 - for i:=0 to High(BIFF58_HEADER) do - if buf[i] <> BIFF58_HEADER[i] then - exit; - - // Now we know that the file is a Microsoft compound document. - - // We check for Excel 5 in which the stream is named "Book" - if ValidOLEStream(AStream, 'Book') then begin - SetLength(AFormatIDs, 1); - AFormatIDs[0] := ord(sfExcel5); - exit; - end; - - // Now we check for Excel 8 which names the stream "Workbook" - if ValidOLEStream(AStream, 'Workbook') then begin - SetLength(AFormatIDs, 1); - AFormatIDs[0] := ord(sfExcel8); - exit; end; + SetLength(AFormatIDs, j); end; {@@ ---------------------------------------------------------------------------- @@ -8694,22 +8640,11 @@ var fmtID: TsSpreadFormatID; fileFormats: TsSpreadFormatIDArray; i: Integer; - found: Boolean; + ext: String; begin if not FileExists(AFileName) then raise EFPSpreadsheetReader.CreateFmt(rsFileNotFound, [AFileName]); - // First try to determine file format from the extension - if GetFormatFromFileName(AFilename, fmtID) then begin - try - ReadFromFile(AFileName, fmtID, APassword, AParams); - exit; - except - // format does not match. We must continue with rest of procedure - end; - end else - fmtID := MaxInt; - // Try to get file format from file header GetFormatFromFileHeader(AFileName, fileformats); if Length(fileformats) = 0 then @@ -8719,21 +8654,14 @@ begin if Length(fileformats) = 0 then fileformats := GetSpreadFormats(faRead, [ord(sfExcel8)]); - // Remove already tested format - found := false; - i := 0; - while (i <= High(fileFormats)) do begin - if fileFormats[i] = fmtID then begin - found := true; - inc(i); - while (i <= High(fileFormats)) do begin - fileFormats[i-1] := fileFormats[i]; - inc(i); - end; - end else - inc(i); - end; - if found then SetLength(fileFormats, Length(fileFormats)-1); + // Move file format corresponding to file extension to the top to load it first. + ext := Lowercase(ExtractFileExt(AFileName)); + for i := 0 to High(fileformats) do + if ext = GetSpreadFormatExt(fileformats[i]) then begin + fmtID := fileformats[0]; + fileFormats[0] := fileformats[i]; + fileFormats[i] := fmtID; + end; // No file format found for this file --> error if Length(fileformats) = 0 then @@ -8747,7 +8675,6 @@ begin success := true; break; // Exit the loop if we reach this point successfully. except - //success := false; end; end; diff --git a/components/fpspreadsheet/source/common/fpsreaderwriter.pas b/components/fpspreadsheet/source/common/fpsreaderwriter.pas index 17fb9952b..a749595d1 100644 --- a/components/fpspreadsheet/source/common/fpsreaderwriter.pas +++ b/components/fpspreadsheet/source/common/fpsreaderwriter.pas @@ -46,6 +46,8 @@ type { TsBasicSpreadReader } TsBasicSpreadReader = class(TsBasicSpreadReaderWriter) public + { File format detection } + class function CheckFileFormat(AStream: TStream): boolean; virtual; abstract; { General writing methods } procedure ReadFromFile(AFileName: string; APassword: String = ''; AParams: TsStreamParams = []); virtual; abstract; @@ -92,6 +94,8 @@ type { Helper methods } procedure AddBuiltinNumFormats; virtual; + {@@ More detailed check for file format } + class function CheckFileFormatDetails(AStream: TStream): Boolean; virtual; {@@ Removes column records if all of them have the same column width } procedure FixCols(AWorksheet: TsBasicWorksheet); {@@ Removes row records if all of them have the same row height } @@ -113,7 +117,9 @@ type constructor Create(AWorkbook: TsBasicWorkbook); override; destructor Destroy; override; - { General writing methods } + { File format detection } + class function CheckFileFormat(AStream: TStream): Boolean; override; + { General reading methods } procedure ReadFromFile(AFileName: string; APassword: String = ''; AParams: TsStreamParams = []); override; procedure ReadFromStream(AStream: TStream; APassword: String = ''; @@ -350,6 +356,24 @@ begin // to be overridden by descendants end; +{@@ ---------------------------------------------------------------------------- + Must be overridden to check the file header for the signature of the file. + Returns true by default which means that the file is qualified for trying to + be loaded. +-------------------------------------------------------------------------------} +class function TsCustomSpreadReader.CheckFileFormat(AStream: TStream): boolean; +begin + Result := true; +end; + +{@@ ---------------------------------------------------------------------------- + More details check for file format +-------------------------------------------------------------------------------} +class function TsCustomSpreadReader.CheckFileFormatDetails(AStream: TStream): Boolean; +begin + Result := true; +end; + {@@ ---------------------------------------------------------------------------- Deletes unnecessary column records as they are written by some Office applications when they convert a file to another format. diff --git a/components/fpspreadsheet/source/common/fpsxmlcommon.pas b/components/fpspreadsheet/source/common/fpsxmlcommon.pas index eac2d01a6..a771fff26 100644 --- a/components/fpspreadsheet/source/common/fpsxmlcommon.pas +++ b/components/fpspreadsheet/source/common/fpsxmlcommon.pas @@ -57,6 +57,8 @@ function CreateTempStream(AWorkbook: TsBasicWorkbook; AFileNameBase: String): TStream; procedure DestroyTempStream(AStream: TStream); +function HasZipHeader(AStream: TStream): Boolean; + implementation @@ -410,5 +412,26 @@ begin end; +{ Returns true if the file begins with a ZIP header *PK'. Needed for + file format detection. } +function HasZipHeader(AStream: TStream): Boolean; +const + ZIP_HEADER: packed array[0..1] of char = ('P', 'K'); +var + P: Int64; + buf: packed array[0..1] of char; +begin + Result := false; + P := AStream.Position; + try + AStream.Position := 0; + if AStream.Read(buf, 2) < 2 then + exit; + Result := (buf[0] = ZIP_HEADER[0]) and (buf[1] = ZIP_HEADER[1]); + finally + AStream.Position := P; + end; +end; + end. diff --git a/components/fpspreadsheet/source/common/xlsbiff2.pas b/components/fpspreadsheet/source/common/xlsbiff2.pas index 3e9970272..a4d425fd1 100644 --- a/components/fpspreadsheet/source/common/xlsbiff2.pas +++ b/components/fpspreadsheet/source/common/xlsbiff2.pas @@ -79,6 +79,8 @@ type { General reading methods } procedure ReadFromStream(AStream: TStream; APassword: String = ''; AParams: TsStreamParams = []); override; + { File format detection } + class function CheckfileFormat(AStream: TStream): Boolean; override; end; @@ -331,6 +333,35 @@ begin FFirstNumFormatIndexInFile := 0; end; +{@@ ---------------------------------------------------------------------------- + Checks the header of the stream for the signature of BIFF2 files +-------------------------------------------------------------------------------} +class function TsSpreadBIFF2Reader.CheckFileFormat(AStream: TStream): Boolean; +const + BIFF2_HEADER: packed array[0..3] of byte = ( + $09,$00, $04,$00); // they are common to all BIFF2 files that I've seen +var + P: Int64; + buf: packed array[0..3] of byte; + n: Integer; +begin + Result := false; + P := AStream.Position; + try + AStream.Position := 0; + n := AStream.Read(buf, SizeOf(buf)); + if n < Length(BIFF2_HEADER) then + exit; + for n:=0 to High(buf) do + if buf[n] <> BIFF2_HEADER[n] then + exit; + Result := true; + finally + AStream.Position := P; + end; +end; + + procedure TsSpreadBIFF2Reader.ReadBlank(AStream: TStream); var ARow, ACol: Cardinal; diff --git a/components/fpspreadsheet/source/common/xlsbiff5.pas b/components/fpspreadsheet/source/common/xlsbiff5.pas index 7abf1c4d5..56cb9c963 100644 --- a/components/fpspreadsheet/source/common/xlsbiff5.pas +++ b/components/fpspreadsheet/source/common/xlsbiff5.pas @@ -72,6 +72,7 @@ type TsSpreadBIFF5Reader = class(TsSpreadBIFFReader) protected + class function CheckFileFormatDetails(AStream: TStream): Boolean; override; procedure PopulatePalette; override; { Record writing methods } procedure ReadBOUNDSHEET(AStream: TStream); @@ -229,7 +230,7 @@ uses LazLogger, {$ENDIF} Math, - fpsStrings, fpspreadsheet, fpsReaderWriter, fpsStreams, + uvirtuallayer_ole, fpsStrings, fpspreadsheet, fpsReaderWriter, fpsStreams, fpsPalette, fpsNumFormat, xlsconst; const @@ -363,6 +364,24 @@ type { TsSpreadBIFF5Reader } {------------------------------------------------------------------------------} +{@@ ---------------------------------------------------------------------------- + Checks, for automatic file format detection, whether tie OLE stream is named + 'Book' - this is typical of BIFF5 files. +-------------------------------------------------------------------------------} +class function TsSpreadBIFF5Reader.CheckFileFormatDetails(AStream: TStream): Boolean; +var + fsOLE: TVirtualLayer_OLE; +begin + AStream.Position := 0; + fsOLE := TVirtualLayer_OLE.Create(AStream); + try + fsOLE.Initialize; + Result := fsOLE.FileExists('/Book'); + finally + fsOLE.Free; + end; +end; + {@@ ---------------------------------------------------------------------------- Populates the reader's default palette using the BIFF5 default colors. -------------------------------------------------------------------------------} diff --git a/components/fpspreadsheet/source/common/xlsbiff8.pas b/components/fpspreadsheet/source/common/xlsbiff8.pas index e4ee90846..dfdcd6ec7 100644 --- a/components/fpspreadsheet/source/common/xlsbiff8.pas +++ b/components/fpspreadsheet/source/common/xlsbiff8.pas @@ -133,6 +133,7 @@ type function ReadWideString(const AStream: TStream; const AUse8BitLength: Boolean): WideString; overload; protected + class function CheckFileFormatDetails(AStream: TStream): Boolean; override; procedure PopulatePalette; override; procedure ReadBOUNDSHEET(AStream: TStream); procedure ReadCONTINUE(const AStream: TStream); @@ -366,8 +367,8 @@ uses LazLogger, {$ENDIF} Math, lconvencoding, LazFileUtils, URIParser, - fpsStrings, {%H-}fpsPatches, fpsStreams, fpsReaderWriter, fpsPalette, - fpspreadsheet, fpsNumFormat, fpsExprParser, xlsEscher; + uvirtuallayer_ole, fpsStrings, {%H-}fpsPatches, fpsStreams, fpsReaderWriter, + fpsPalette, fpspreadsheet, fpsNumFormat, fpsExprParser, xlsEscher; const { Excel record IDs } @@ -794,6 +795,24 @@ begin inherited; end; +{@@ ---------------------------------------------------------------------------- + Checks, for automatic file format detection, whether tie OLE stream is named + 'Workbook' - this is typical of BIFF8 files. +-------------------------------------------------------------------------------} +class function TsSpreadBIFF8Reader.CheckFileFormatDetails(AStream: TStream): Boolean; +var + fsOLE: TVirtualLayer_OLE; +begin + AStream.Position := 0; + fsOLE := TVirtualLayer_OLE.Create(AStream); + try + fsOLE.Initialize; + Result := fsOLE.FileExists('/Workbook'); + finally + fsOLE.Free; + end; +end; + {@@ ---------------------------------------------------------------------------- Populates the reader's default palette using the BIFF8 default colors. -------------------------------------------------------------------------------} diff --git a/components/fpspreadsheet/source/common/xlscommon.pas b/components/fpspreadsheet/source/common/xlscommon.pas index e2ef29be3..a1a0be352 100644 --- a/components/fpspreadsheet/source/common/xlscommon.pas +++ b/components/fpspreadsheet/source/common/xlscommon.pas @@ -562,6 +562,8 @@ type public constructor Create(AWorkbook: TsBasicWorkbook); override; destructor Destroy; override; + { File format detection } + class function CheckFileFormat(AStream: TStream): Boolean; override; end; @@ -1180,6 +1182,35 @@ begin ); end; +{@@ ---------------------------------------------------------------------------- + Checks the stream header to verify that the file is a BIFF5 or BIFF8 file. +-------------------------------------------------------------------------------} +class function TsSpreadBIFFReader.CheckFileFormat(AStream: TStream): Boolean; +const + BIFF58_HEADER: packed array[0..7] of byte = ( + $D0,$CF, $11,$E0, $A1,$B1, $1A,$E1); +var + P: Int64; + buf: packed array[0..7] of byte; + n: Integer; +begin + Result := false; + P := AStream.Position; + try + AStream.Position := 0; + n := AStream.Read(buf, Length(buf)); + if n <> Length(BIFF58_HEADER) then + exit; + for n:=0 to High(BIFF58_HEADER) do + if buf[n] <> BIFF58_HEADER[n] then + exit; + Result := CheckFileFormatDetails(AStream); + finally + AStream.Position := P; + end; +end; + + {@@ ---------------------------------------------------------------------------- Applies the XF formatting referred to by XFIndex to the specified cell -------------------------------------------------------------------------------} diff --git a/components/fpspreadsheet/source/common/xlsxooxml.pas b/components/fpspreadsheet/source/common/xlsxooxml.pas index bbcce5360..8044df2d1 100644 --- a/components/fpspreadsheet/source/common/xlsxooxml.pas +++ b/components/fpspreadsheet/source/common/xlsxooxml.pas @@ -103,6 +103,7 @@ type public constructor Create(AWorkbook: TsBasicWorkbook); override; destructor Destroy; override; + class function CheckFileFormat(AStream: TStream): Boolean; override; procedure ReadFromStream(AStream: TStream; APassword: String = ''; AParams: TsStreamParams = []); override; end; @@ -490,6 +491,11 @@ begin end; end; +class function TsSpreadOOXMLReader.CheckFileFormat(AStream: TStream): Boolean; +begin + Result := HasZipHeader(AStream); +end; + function TsSpreadOOXMLReader.FindCommentsFileName(ANode: TDOMNode): String; var s: String;