From df88d029524e0f10156e6319b2ca899e5139d736 Mon Sep 17 00:00:00 2001 From: wp_xxyyzz Date: Tue, 10 Jul 2018 08:32:50 +0000 Subject: [PATCH] fpspreadsheet: Improved autodetection of file format (e.g. xls files renamed to xlsx - see https://forum.lazarus.freepascal.org/index.php/topic,41830.msg291032.html). Add corresponding test cases. git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@6554 8e941d3f-bd1b-0410-a28a-d453659cc2b4 --- .../source/common/fpspreadsheet.pas | 78 ++++++---- .../fpspreadsheet/tests/fileformattests.pas | 142 ++++++++++++++++++ .../fpspreadsheet/tests/spreadtestgui.lpi | 11 +- .../fpspreadsheet/tests/spreadtestgui.lpr | 4 +- 4 files changed, 200 insertions(+), 35 deletions(-) create mode 100644 components/fpspreadsheet/tests/fileformattests.pas diff --git a/components/fpspreadsheet/source/common/fpspreadsheet.pas b/components/fpspreadsheet/source/common/fpspreadsheet.pas index bf490cb32..16db5dffa 100644 --- a/components/fpspreadsheet/source/common/fpspreadsheet.pas +++ b/components/fpspreadsheet/source/common/fpspreadsheet.pas @@ -687,10 +687,10 @@ type FEmbeddedObjList: TFPList; { Internal methods } - class function GetFormatFromFileHeader(const AFileName: TFileName; - out AFormatID: TsSpreadFormatID): Boolean; overload; - class function GetFormatFromFileHeader(AStream: TStream; - out AFormatID: TsSpreadFormatID): Boolean; overload; + class procedure GetFormatFromFileHeader(const AFileName: TFileName; + out AFormatIDs: TsSpreadFormatIDArray); overload; + class procedure GetFormatFromFileHeader(AStream: TStream; + out AFormatIDs: TsSpreadFormatIDArray); overload; procedure PrepareBeforeReading; procedure PrepareBeforeSaving; @@ -8279,14 +8279,14 @@ end; signature. Only implemented for xls files where several file types have the same extension -------------------------------------------------------------------------------} -class function TsWorkbook.GetFormatFromFileHeader(const AFileName: TFileName; - out AFormatID: TsSpreadFormatID): Boolean; +class procedure TsWorkbook.GetFormatFromFileHeader(const AFileName: TFileName; + out AFormatIDs: TsSpreadFormatIDArray); var stream: TStream; begin stream := TFileStream.Create(AFileName, fmOpenRead + fmShareDenyNone); try - Result := GetFormatFromFileHeader(stream, AFormatID) + GetFormatFromFileHeader(stream, AFormatIDs) finally stream.Free; end; @@ -8295,16 +8295,17 @@ end; {@@ ---------------------------------------------------------------------------- Helper method for determining the spreadsheet format. Read the first few bytes of a stream and determines the spreadsheet type from the characteristic - signature. Only implemented for xls where several file types have the same - extension. + signature. -------------------------------------------------------------------------------} -class function TsWorkbook.GetFormatFromFileHeader(AStream: TStream; - out AFormatID: TsSpreadFormatID): Boolean; overload; +class procedure TsWorkbook.GetFormatFromFileHeader(AStream: TStream; + out AFormatIDs: TsSpreadFormatIDArray); overload; const BIFF2_HEADER: array[0..3] of byte = ( $09,$00, $04,$00); // they are common to all BIFF2 files that I've seen BIFF58_HEADER: array[0..7] of byte = ( $D0,$CF, $11,$E0, $A1,$B1, $1A,$E1); + ZIP_HEADER: array[0..1] of byte = ( + byte('P'), byte('K')); function ValidOLEStream(AStream: TStream; AName: String): Boolean; var @@ -8325,44 +8326,53 @@ var i: Integer; ok: Boolean; begin - Result := false; + SetLength(AFormatIDs, 0); + if AStream = nil then exit; // Read first 8 bytes - AStream.ReadBuffer(buf, 8); + i := AStream.Read(buf, Length(buf)); + if i < Length(buf) then + exit; + + // Check for zip header of xlsx and ods + if (buf[0] = ZIP_HEADER[0]) and (buf[1] = ZIP_HEADER[1]) then begin + SetLength(AFormatIDs, 2); + AFormatIDs[0] := ord(sfOOXML); + AFormatIDs[1] := ord(sfOpenDocument); + exit; + end; // Check for Excel 2 ok := true; for i:=0 to High(BIFF2_HEADER) do - if buf[i] <> BIFF2_HEADER[i] then + if buf[i] = BIFF2_HEADER[i] then begin - ok := false; - break; + SetLength(AFormatIDs, 1); + AFormatIDs[0] := ord(sfExcel2); + exit; end; - if ok then - begin - AFormatID := ord(sfExcel2); - exit(true); - end; // Check for Excel 5 or 8 for i:=0 to High(BIFF58_HEADER) do if buf[i] <> BIFF58_HEADER[i] then - exit(false); + exit; // Now we know that the file is a Microsoft compound document. // We check for Excel 5 in which the stream is named "Book" if ValidOLEStream(AStream, 'Book') then begin - AFormatID := ord(sfExcel5); - exit(true); + SetLength(AFormatIDs, 1); + AFormatIDs[0] := ord(sfExcel5); + exit; end; // Now we check for Excel 8 which names the stream "Workbook" if ValidOLEStream(AStream, 'Workbook') then begin - AFormatID := ord(sfExcel8); - exit(true); + SetLength(AFormatIDs, 1); + AFormatIDs[0] := ord(sfExcel8); + exit; end; end; @@ -8470,8 +8480,8 @@ end; procedure TsWorkbook.ReadFromFile(AFileName: string; APassword: String = ''; AParams: TsStreamParams = []); var - formatID: TsSpreadFormatID; - canLoad, success: Boolean; + formatIDs: TsSpreadFormatIDArray; + success: Boolean; fileFormats: TsSpreadFormatIDArray; ext: String; i: Integer; @@ -8481,12 +8491,18 @@ begin ext := LowerCase(ExtractFileExt(AFileName)); + // Try to get file format from file header + GetFormatFromFileHeader(AFileName, fileformats); + if Length(fileformats) = 0 then + // If not successful use formats defined by extension + fileFormats := GetSpreadFormatsFromFileName(faRead, AFileName); + (* // Collect all formats which have the same extension fileFormats := GetSpreadFormatsFromFileName(faRead, AFileName); - if (Length(fileFormats) > 1) and (ext = STR_EXCEL_EXTENSION) then + if (Length(fileFormats) > 1) {and (ext = STR_EXCEL_EXTENSION)} then begin // In case of xls files we try to determine the format from the header - canLoad := GetFormatFromFileHeader(AFilename, formatID); + canLoad := GetFormatFromFileHeader(AFilename, formatIDs); if canLoad then begin // Analysis of the file header was successful --> we know the file // format and shorten the list of fileformats to just one item. @@ -8498,7 +8514,7 @@ begin // We begin with BIFF8 which is the most common xls format now. // The next command re-reads the format list with BIFF8 at the first place. fileFormats := GetSpreadFormatsFromFileName(faRead, AFileName, ord(sfExcel8)); - end; + end; *) // No file format found for this file --> error if Length(fileformats) = 0 then diff --git a/components/fpspreadsheet/tests/fileformattests.pas b/components/fpspreadsheet/tests/fileformattests.pas new file mode 100644 index 000000000..04819c865 --- /dev/null +++ b/components/fpspreadsheet/tests/fileformattests.pas @@ -0,0 +1,142 @@ +unit fileformattests; + +{$mode objfpc}{$H+} + +interface +{ Cell type tests +This unit tests writing the various cell data types out to and reading them +back from files. +} + +uses + // Not using Lazarus package as the user may be working with multiple versions + // Instead, add .. to unit search path + Classes, SysUtils, fpcunit, testregistry, + fpstypes, fpspreadsheet, + xlsbiff2, xlsbiff5, xlsbiff8, fpsOpenDocument, + testsutility; + +type + { TSpreadFileFormatTests } + // Write cell types to xls/xml file and read back + TSpreadFileFormatTests = class(TTestCase) + private + + protected + // Set up expected values: + procedure SetUp; override; + procedure TearDown; override; + procedure TestAutoDetect(AFormat: TsSpreadsheetFormat); + + published + procedure TestAutoDetect_BIFF2; + procedure TestAutoDetect_BIFF5; + procedure TestAutoDetect_BIFF8; + procedure TestAutoDetect_OOXML; + procedure TestAutoDetect_ODS; + end; + +implementation + +uses + fpsReaderWriter; + +const + SheetName = 'FileFormat'; + + +{ TSpreadFileFormatTests } + +procedure TSpreadFileFormatTests.SetUp; +begin + inherited SetUp; +end; + +procedure TSpreadFileFormatTests.TearDown; +begin + inherited TearDown; +end; + +procedure TSpreadFileFormatTests.TestAutoDetect(AFormat: TsSpreadsheetFormat); +const + EXPECTED_TEXT = 'abcefg'; +var + MyWorksheet: TsWorksheet; + MyWorkbook: TsWorkbook; + row, col: Integer; + MyCell: PCell; + value: Boolean; + TempFile: string; //write xls/xml to this file and read back from it + actualText: String; +begin + MyWorkbook := TsWorkbook.Create; + try + MyWorkSheet:= MyWorkBook.AddWorksheet(SheetName); + + // write any content to the file + MyWorksheet.WriteText(0, 0, EXPECTED_TEXT); + + // Write workbook to file using format specified, but with wrong extension + TempFile := ChangeFileExt(NewTempFile, '.abc'); + MyWorkBook.WriteToFile(TempFile, AFormat, true); + finally + MyWorkbook.Free; + end; + + // Open the spreadsheet + MyWorkbook := TsWorkbook.Create; + try + // Try to read file and detect format automatically + try + MyWorkbook.ReadFromFile(TempFile); + // If the tests gets here the format was detected correctly. + // Quickly check the cell content + MyWorksheet := MyWorkbook.GetFirstWorksheet; + actualText := MyWorksheet.ReadAsUTF8Text(0, 0); + CheckEquals(EXPECTED_TEXT, actualText, 'Cell mismatch in A1'); + except + fail('Cannot read file with format ' + GetSpreadFormatName(ord(AFormat))); + end; + finally + MyWorkbook.Free; + DeleteFile(TempFile); + end; +end; + + +{ BIFF2 } +procedure TSpreadFileFormatTests.TestAutoDetect_BIFF2; +begin + TestAutoDetect(sfExcel2); +end; + +{ BIFF5 } +procedure TSpreadFileFormatTests.TestAutoDetect_BIFF5; +begin + TestAutoDetect(sfExcel5); +end; + +{ BIFF8 } +procedure TSpreadFileFormatTests.TestAutoDetect_BIFF8; +begin + TestAutoDetect(sfExcel8); +end; + +{ OOXML } +procedure TSpreadFileFormatTests.TestAutoDetect_OOXML; +begin + TestAutoDetect(sfOOXML); +end; + +{ ODS } +procedure TSpreadFileFormatTests.TestAutoDetect_ODS; +begin + TestAutoDetect(sfOpenDocument); +end; + + +initialization + RegisterTest(TSpreadFileFormatTests); + +end. + diff --git a/components/fpspreadsheet/tests/spreadtestgui.lpi b/components/fpspreadsheet/tests/spreadtestgui.lpi index 6aae442fb..15b943958 100644 --- a/components/fpspreadsheet/tests/spreadtestgui.lpi +++ b/components/fpspreadsheet/tests/spreadtestgui.lpi @@ -38,7 +38,7 @@ - + @@ -161,6 +161,10 @@ + + + + @@ -186,7 +190,7 @@ - + @@ -209,6 +213,9 @@ + + + diff --git a/components/fpspreadsheet/tests/spreadtestgui.lpr b/components/fpspreadsheet/tests/spreadtestgui.lpr index 73d618a8b..297c2c405 100644 --- a/components/fpspreadsheet/tests/spreadtestgui.lpr +++ b/components/fpspreadsheet/tests/spreadtestgui.lpr @@ -11,8 +11,8 @@ uses {$ENDIF} Interfaces, Forms, GuiTestRunner, testsutility, datetests, stringtests, numberstests, manualtests, internaltests, - formattests, colortests, fonttests, optiontests, numformatparsertests, - formulatests, rpnFormulaUnit, singleformulatests, + fileformattests, formattests, colortests, fonttests, optiontests, + numformatparsertests, formulatests, rpnFormulaUnit, singleformulatests, exceltests, emptycelltests, errortests, virtualmodetests, insertdeletetests, ssttests, celltypetests, sortingtests, copytests, enumeratortests, commenttests, hyperlinktests, pagelayouttests, protectiontests;