fpspreadsheet: Improved autodetection of file format (e.g. xls files renamed to xlsx - see https://forum.lazarus.freepascal.org/index.php/topic,41830.msg291032.html). Add corresponding test cases.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@6554 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2018-07-10 08:32:50 +00:00
parent 25b83a1948
commit df88d02952
4 changed files with 200 additions and 35 deletions

View File

@ -687,10 +687,10 @@ type
FEmbeddedObjList: TFPList;
{ Internal methods }
class function GetFormatFromFileHeader(const AFileName: TFileName;
out AFormatID: TsSpreadFormatID): Boolean; overload;
class function GetFormatFromFileHeader(AStream: TStream;
out AFormatID: TsSpreadFormatID): Boolean; overload;
class procedure GetFormatFromFileHeader(const AFileName: TFileName;
out AFormatIDs: TsSpreadFormatIDArray); overload;
class procedure GetFormatFromFileHeader(AStream: TStream;
out AFormatIDs: TsSpreadFormatIDArray); overload;
procedure PrepareBeforeReading;
procedure PrepareBeforeSaving;
@ -8279,14 +8279,14 @@ end;
signature. Only implemented for xls files where several file types have the
same extension
-------------------------------------------------------------------------------}
class function TsWorkbook.GetFormatFromFileHeader(const AFileName: TFileName;
out AFormatID: TsSpreadFormatID): Boolean;
class procedure TsWorkbook.GetFormatFromFileHeader(const AFileName: TFileName;
out AFormatIDs: TsSpreadFormatIDArray);
var
stream: TStream;
begin
stream := TFileStream.Create(AFileName, fmOpenRead + fmShareDenyNone);
try
Result := GetFormatFromFileHeader(stream, AFormatID)
GetFormatFromFileHeader(stream, AFormatIDs)
finally
stream.Free;
end;
@ -8295,16 +8295,17 @@ end;
{@@ ----------------------------------------------------------------------------
Helper method for determining the spreadsheet format. Read the first few bytes
of a stream and determines the spreadsheet type from the characteristic
signature. Only implemented for xls where several file types have the same
extension.
signature.
-------------------------------------------------------------------------------}
class function TsWorkbook.GetFormatFromFileHeader(AStream: TStream;
out AFormatID: TsSpreadFormatID): Boolean; overload;
class procedure TsWorkbook.GetFormatFromFileHeader(AStream: TStream;
out AFormatIDs: TsSpreadFormatIDArray); overload;
const
BIFF2_HEADER: array[0..3] of byte = (
$09,$00, $04,$00); // they are common to all BIFF2 files that I've seen
BIFF58_HEADER: array[0..7] of byte = (
$D0,$CF, $11,$E0, $A1,$B1, $1A,$E1);
ZIP_HEADER: array[0..1] of byte = (
byte('P'), byte('K'));
function ValidOLEStream(AStream: TStream; AName: String): Boolean;
var
@ -8325,44 +8326,53 @@ var
i: Integer;
ok: Boolean;
begin
Result := false;
SetLength(AFormatIDs, 0);
if AStream = nil then
exit;
// Read first 8 bytes
AStream.ReadBuffer(buf, 8);
i := AStream.Read(buf, Length(buf));
if i < Length(buf) then
exit;
// Check for zip header of xlsx and ods
if (buf[0] = ZIP_HEADER[0]) and (buf[1] = ZIP_HEADER[1]) then begin
SetLength(AFormatIDs, 2);
AFormatIDs[0] := ord(sfOOXML);
AFormatIDs[1] := ord(sfOpenDocument);
exit;
end;
// Check for Excel 2
ok := true;
for i:=0 to High(BIFF2_HEADER) do
if buf[i] <> BIFF2_HEADER[i] then
if buf[i] = BIFF2_HEADER[i] then
begin
ok := false;
break;
SetLength(AFormatIDs, 1);
AFormatIDs[0] := ord(sfExcel2);
exit;
end;
if ok then
begin
AFormatID := ord(sfExcel2);
exit(true);
end;
// Check for Excel 5 or 8
for i:=0 to High(BIFF58_HEADER) do
if buf[i] <> BIFF58_HEADER[i] then
exit(false);
exit;
// Now we know that the file is a Microsoft compound document.
// We check for Excel 5 in which the stream is named "Book"
if ValidOLEStream(AStream, 'Book') then begin
AFormatID := ord(sfExcel5);
exit(true);
SetLength(AFormatIDs, 1);
AFormatIDs[0] := ord(sfExcel5);
exit;
end;
// Now we check for Excel 8 which names the stream "Workbook"
if ValidOLEStream(AStream, 'Workbook') then begin
AFormatID := ord(sfExcel8);
exit(true);
SetLength(AFormatIDs, 1);
AFormatIDs[0] := ord(sfExcel8);
exit;
end;
end;
@ -8470,8 +8480,8 @@ end;
procedure TsWorkbook.ReadFromFile(AFileName: string; APassword: String = '';
AParams: TsStreamParams = []);
var
formatID: TsSpreadFormatID;
canLoad, success: Boolean;
formatIDs: TsSpreadFormatIDArray;
success: Boolean;
fileFormats: TsSpreadFormatIDArray;
ext: String;
i: Integer;
@ -8481,12 +8491,18 @@ begin
ext := LowerCase(ExtractFileExt(AFileName));
// Try to get file format from file header
GetFormatFromFileHeader(AFileName, fileformats);
if Length(fileformats) = 0 then
// If not successful use formats defined by extension
fileFormats := GetSpreadFormatsFromFileName(faRead, AFileName);
(*
// Collect all formats which have the same extension
fileFormats := GetSpreadFormatsFromFileName(faRead, AFileName);
if (Length(fileFormats) > 1) and (ext = STR_EXCEL_EXTENSION) then
if (Length(fileFormats) > 1) {and (ext = STR_EXCEL_EXTENSION)} then
begin
// In case of xls files we try to determine the format from the header
canLoad := GetFormatFromFileHeader(AFilename, formatID);
canLoad := GetFormatFromFileHeader(AFilename, formatIDs);
if canLoad then begin
// Analysis of the file header was successful --> we know the file
// format and shorten the list of fileformats to just one item.
@ -8498,7 +8514,7 @@ begin
// We begin with BIFF8 which is the most common xls format now.
// The next command re-reads the format list with BIFF8 at the first place.
fileFormats := GetSpreadFormatsFromFileName(faRead, AFileName, ord(sfExcel8));
end;
end; *)
// No file format found for this file --> error
if Length(fileformats) = 0 then

View File

@ -0,0 +1,142 @@
unit fileformattests;
{$mode objfpc}{$H+}
interface
{ Cell type tests
This unit tests writing the various cell data types out to and reading them
back from files.
}
uses
// Not using Lazarus package as the user may be working with multiple versions
// Instead, add .. to unit search path
Classes, SysUtils, fpcunit, testregistry,
fpstypes, fpspreadsheet,
xlsbiff2, xlsbiff5, xlsbiff8, fpsOpenDocument,
testsutility;
type
{ TSpreadFileFormatTests }
// Write cell types to xls/xml file and read back
TSpreadFileFormatTests = class(TTestCase)
private
protected
// Set up expected values:
procedure SetUp; override;
procedure TearDown; override;
procedure TestAutoDetect(AFormat: TsSpreadsheetFormat);
published
procedure TestAutoDetect_BIFF2;
procedure TestAutoDetect_BIFF5;
procedure TestAutoDetect_BIFF8;
procedure TestAutoDetect_OOXML;
procedure TestAutoDetect_ODS;
end;
implementation
uses
fpsReaderWriter;
const
SheetName = 'FileFormat';
{ TSpreadFileFormatTests }
procedure TSpreadFileFormatTests.SetUp;
begin
inherited SetUp;
end;
procedure TSpreadFileFormatTests.TearDown;
begin
inherited TearDown;
end;
procedure TSpreadFileFormatTests.TestAutoDetect(AFormat: TsSpreadsheetFormat);
const
EXPECTED_TEXT = 'abcefg';
var
MyWorksheet: TsWorksheet;
MyWorkbook: TsWorkbook;
row, col: Integer;
MyCell: PCell;
value: Boolean;
TempFile: string; //write xls/xml to this file and read back from it
actualText: String;
begin
MyWorkbook := TsWorkbook.Create;
try
MyWorkSheet:= MyWorkBook.AddWorksheet(SheetName);
// write any content to the file
MyWorksheet.WriteText(0, 0, EXPECTED_TEXT);
// Write workbook to file using format specified, but with wrong extension
TempFile := ChangeFileExt(NewTempFile, '.abc');
MyWorkBook.WriteToFile(TempFile, AFormat, true);
finally
MyWorkbook.Free;
end;
// Open the spreadsheet
MyWorkbook := TsWorkbook.Create;
try
// Try to read file and detect format automatically
try
MyWorkbook.ReadFromFile(TempFile);
// If the tests gets here the format was detected correctly.
// Quickly check the cell content
MyWorksheet := MyWorkbook.GetFirstWorksheet;
actualText := MyWorksheet.ReadAsUTF8Text(0, 0);
CheckEquals(EXPECTED_TEXT, actualText, 'Cell mismatch in A1');
except
fail('Cannot read file with format ' + GetSpreadFormatName(ord(AFormat)));
end;
finally
MyWorkbook.Free;
DeleteFile(TempFile);
end;
end;
{ BIFF2 }
procedure TSpreadFileFormatTests.TestAutoDetect_BIFF2;
begin
TestAutoDetect(sfExcel2);
end;
{ BIFF5 }
procedure TSpreadFileFormatTests.TestAutoDetect_BIFF5;
begin
TestAutoDetect(sfExcel5);
end;
{ BIFF8 }
procedure TSpreadFileFormatTests.TestAutoDetect_BIFF8;
begin
TestAutoDetect(sfExcel8);
end;
{ OOXML }
procedure TSpreadFileFormatTests.TestAutoDetect_OOXML;
begin
TestAutoDetect(sfOOXML);
end;
{ ODS }
procedure TSpreadFileFormatTests.TestAutoDetect_ODS;
begin
TestAutoDetect(sfOpenDocument);
end;
initialization
RegisterTest(TSpreadFileFormatTests);
end.

View File

@ -38,7 +38,7 @@
<PackageName Value="FCL"/>
</Item4>
</RequiredPackages>
<Units Count="30">
<Units Count="31">
<Unit0>
<Filename Value="spreadtestgui.lpr"/>
<IsPartOfProject Value="True"/>
@ -161,6 +161,10 @@
<IsPartOfProject Value="True"/>
<UnitName Value="SingleFormulaTests"/>
</Unit29>
<Unit30>
<Filename Value="fileformattests.pas"/>
<IsPartOfProject Value="True"/>
</Unit30>
</Units>
</ProjectOptions>
<CompilerOptions>
@ -186,7 +190,7 @@
</Other>
</CompilerOptions>
<Debugging>
<Exceptions Count="6">
<Exceptions Count="7">
<Item1>
<Name Value="EAbort"/>
<Enabled Value="False"/>
@ -209,6 +213,9 @@
<Name Value="EConvertError"/>
<Enabled Value="False"/>
</Item6>
<Item7>
<Name Value="EFPSpreadsheetReader"/>
</Item7>
</Exceptions>
</Debugging>
</CONFIG>

View File

@ -11,8 +11,8 @@ uses
{$ENDIF}
Interfaces, Forms, GuiTestRunner, testsutility,
datetests, stringtests, numberstests, manualtests, internaltests,
formattests, colortests, fonttests, optiontests, numformatparsertests,
formulatests, rpnFormulaUnit, singleformulatests,
fileformattests, formattests, colortests, fonttests, optiontests,
numformatparsertests, formulatests, rpnFormulaUnit, singleformulatests,
exceltests, emptycelltests, errortests, virtualmodetests,
insertdeletetests, ssttests, celltypetests, sortingtests, copytests,
enumeratortests, commenttests, hyperlinktests, pagelayouttests, protectiontests;