You've already forked lazarus-ccr
fpspreadsheet: Improved autodetection of file format (e.g. xls files renamed to xlsx - see https://forum.lazarus.freepascal.org/index.php/topic,41830.msg291032.html). Add corresponding test cases.
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@6554 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
@ -687,10 +687,10 @@ type
|
|||||||
FEmbeddedObjList: TFPList;
|
FEmbeddedObjList: TFPList;
|
||||||
|
|
||||||
{ Internal methods }
|
{ Internal methods }
|
||||||
class function GetFormatFromFileHeader(const AFileName: TFileName;
|
class procedure GetFormatFromFileHeader(const AFileName: TFileName;
|
||||||
out AFormatID: TsSpreadFormatID): Boolean; overload;
|
out AFormatIDs: TsSpreadFormatIDArray); overload;
|
||||||
class function GetFormatFromFileHeader(AStream: TStream;
|
class procedure GetFormatFromFileHeader(AStream: TStream;
|
||||||
out AFormatID: TsSpreadFormatID): Boolean; overload;
|
out AFormatIDs: TsSpreadFormatIDArray); overload;
|
||||||
|
|
||||||
procedure PrepareBeforeReading;
|
procedure PrepareBeforeReading;
|
||||||
procedure PrepareBeforeSaving;
|
procedure PrepareBeforeSaving;
|
||||||
@ -8279,14 +8279,14 @@ end;
|
|||||||
signature. Only implemented for xls files where several file types have the
|
signature. Only implemented for xls files where several file types have the
|
||||||
same extension
|
same extension
|
||||||
-------------------------------------------------------------------------------}
|
-------------------------------------------------------------------------------}
|
||||||
class function TsWorkbook.GetFormatFromFileHeader(const AFileName: TFileName;
|
class procedure TsWorkbook.GetFormatFromFileHeader(const AFileName: TFileName;
|
||||||
out AFormatID: TsSpreadFormatID): Boolean;
|
out AFormatIDs: TsSpreadFormatIDArray);
|
||||||
var
|
var
|
||||||
stream: TStream;
|
stream: TStream;
|
||||||
begin
|
begin
|
||||||
stream := TFileStream.Create(AFileName, fmOpenRead + fmShareDenyNone);
|
stream := TFileStream.Create(AFileName, fmOpenRead + fmShareDenyNone);
|
||||||
try
|
try
|
||||||
Result := GetFormatFromFileHeader(stream, AFormatID)
|
GetFormatFromFileHeader(stream, AFormatIDs)
|
||||||
finally
|
finally
|
||||||
stream.Free;
|
stream.Free;
|
||||||
end;
|
end;
|
||||||
@ -8295,16 +8295,17 @@ end;
|
|||||||
{@@ ----------------------------------------------------------------------------
|
{@@ ----------------------------------------------------------------------------
|
||||||
Helper method for determining the spreadsheet format. Read the first few bytes
|
Helper method for determining the spreadsheet format. Read the first few bytes
|
||||||
of a stream and determines the spreadsheet type from the characteristic
|
of a stream and determines the spreadsheet type from the characteristic
|
||||||
signature. Only implemented for xls where several file types have the same
|
signature.
|
||||||
extension.
|
|
||||||
-------------------------------------------------------------------------------}
|
-------------------------------------------------------------------------------}
|
||||||
class function TsWorkbook.GetFormatFromFileHeader(AStream: TStream;
|
class procedure TsWorkbook.GetFormatFromFileHeader(AStream: TStream;
|
||||||
out AFormatID: TsSpreadFormatID): Boolean; overload;
|
out AFormatIDs: TsSpreadFormatIDArray); overload;
|
||||||
const
|
const
|
||||||
BIFF2_HEADER: array[0..3] of byte = (
|
BIFF2_HEADER: array[0..3] of byte = (
|
||||||
$09,$00, $04,$00); // they are common to all BIFF2 files that I've seen
|
$09,$00, $04,$00); // they are common to all BIFF2 files that I've seen
|
||||||
BIFF58_HEADER: array[0..7] of byte = (
|
BIFF58_HEADER: array[0..7] of byte = (
|
||||||
$D0,$CF, $11,$E0, $A1,$B1, $1A,$E1);
|
$D0,$CF, $11,$E0, $A1,$B1, $1A,$E1);
|
||||||
|
ZIP_HEADER: array[0..1] of byte = (
|
||||||
|
byte('P'), byte('K'));
|
||||||
|
|
||||||
function ValidOLEStream(AStream: TStream; AName: String): Boolean;
|
function ValidOLEStream(AStream: TStream; AName: String): Boolean;
|
||||||
var
|
var
|
||||||
@ -8325,44 +8326,53 @@ var
|
|||||||
i: Integer;
|
i: Integer;
|
||||||
ok: Boolean;
|
ok: Boolean;
|
||||||
begin
|
begin
|
||||||
Result := false;
|
SetLength(AFormatIDs, 0);
|
||||||
|
|
||||||
if AStream = nil then
|
if AStream = nil then
|
||||||
exit;
|
exit;
|
||||||
|
|
||||||
// Read first 8 bytes
|
// Read first 8 bytes
|
||||||
AStream.ReadBuffer(buf, 8);
|
i := AStream.Read(buf, Length(buf));
|
||||||
|
if i < Length(buf) then
|
||||||
|
exit;
|
||||||
|
|
||||||
|
// Check for zip header of xlsx and ods
|
||||||
|
if (buf[0] = ZIP_HEADER[0]) and (buf[1] = ZIP_HEADER[1]) then begin
|
||||||
|
SetLength(AFormatIDs, 2);
|
||||||
|
AFormatIDs[0] := ord(sfOOXML);
|
||||||
|
AFormatIDs[1] := ord(sfOpenDocument);
|
||||||
|
exit;
|
||||||
|
end;
|
||||||
|
|
||||||
// Check for Excel 2
|
// Check for Excel 2
|
||||||
ok := true;
|
ok := true;
|
||||||
for i:=0 to High(BIFF2_HEADER) do
|
for i:=0 to High(BIFF2_HEADER) do
|
||||||
if buf[i] <> BIFF2_HEADER[i] then
|
if buf[i] = BIFF2_HEADER[i] then
|
||||||
begin
|
begin
|
||||||
ok := false;
|
SetLength(AFormatIDs, 1);
|
||||||
break;
|
AFormatIDs[0] := ord(sfExcel2);
|
||||||
|
exit;
|
||||||
end;
|
end;
|
||||||
if ok then
|
|
||||||
begin
|
|
||||||
AFormatID := ord(sfExcel2);
|
|
||||||
exit(true);
|
|
||||||
end;
|
|
||||||
|
|
||||||
// Check for Excel 5 or 8
|
// Check for Excel 5 or 8
|
||||||
for i:=0 to High(BIFF58_HEADER) do
|
for i:=0 to High(BIFF58_HEADER) do
|
||||||
if buf[i] <> BIFF58_HEADER[i] then
|
if buf[i] <> BIFF58_HEADER[i] then
|
||||||
exit(false);
|
exit;
|
||||||
|
|
||||||
// Now we know that the file is a Microsoft compound document.
|
// Now we know that the file is a Microsoft compound document.
|
||||||
|
|
||||||
// We check for Excel 5 in which the stream is named "Book"
|
// We check for Excel 5 in which the stream is named "Book"
|
||||||
if ValidOLEStream(AStream, 'Book') then begin
|
if ValidOLEStream(AStream, 'Book') then begin
|
||||||
AFormatID := ord(sfExcel5);
|
SetLength(AFormatIDs, 1);
|
||||||
exit(true);
|
AFormatIDs[0] := ord(sfExcel5);
|
||||||
|
exit;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
// Now we check for Excel 8 which names the stream "Workbook"
|
// Now we check for Excel 8 which names the stream "Workbook"
|
||||||
if ValidOLEStream(AStream, 'Workbook') then begin
|
if ValidOLEStream(AStream, 'Workbook') then begin
|
||||||
AFormatID := ord(sfExcel8);
|
SetLength(AFormatIDs, 1);
|
||||||
exit(true);
|
AFormatIDs[0] := ord(sfExcel8);
|
||||||
|
exit;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -8470,8 +8480,8 @@ end;
|
|||||||
procedure TsWorkbook.ReadFromFile(AFileName: string; APassword: String = '';
|
procedure TsWorkbook.ReadFromFile(AFileName: string; APassword: String = '';
|
||||||
AParams: TsStreamParams = []);
|
AParams: TsStreamParams = []);
|
||||||
var
|
var
|
||||||
formatID: TsSpreadFormatID;
|
formatIDs: TsSpreadFormatIDArray;
|
||||||
canLoad, success: Boolean;
|
success: Boolean;
|
||||||
fileFormats: TsSpreadFormatIDArray;
|
fileFormats: TsSpreadFormatIDArray;
|
||||||
ext: String;
|
ext: String;
|
||||||
i: Integer;
|
i: Integer;
|
||||||
@ -8481,12 +8491,18 @@ begin
|
|||||||
|
|
||||||
ext := LowerCase(ExtractFileExt(AFileName));
|
ext := LowerCase(ExtractFileExt(AFileName));
|
||||||
|
|
||||||
|
// Try to get file format from file header
|
||||||
|
GetFormatFromFileHeader(AFileName, fileformats);
|
||||||
|
if Length(fileformats) = 0 then
|
||||||
|
// If not successful use formats defined by extension
|
||||||
|
fileFormats := GetSpreadFormatsFromFileName(faRead, AFileName);
|
||||||
|
(*
|
||||||
// Collect all formats which have the same extension
|
// Collect all formats which have the same extension
|
||||||
fileFormats := GetSpreadFormatsFromFileName(faRead, AFileName);
|
fileFormats := GetSpreadFormatsFromFileName(faRead, AFileName);
|
||||||
if (Length(fileFormats) > 1) and (ext = STR_EXCEL_EXTENSION) then
|
if (Length(fileFormats) > 1) {and (ext = STR_EXCEL_EXTENSION)} then
|
||||||
begin
|
begin
|
||||||
// In case of xls files we try to determine the format from the header
|
// In case of xls files we try to determine the format from the header
|
||||||
canLoad := GetFormatFromFileHeader(AFilename, formatID);
|
canLoad := GetFormatFromFileHeader(AFilename, formatIDs);
|
||||||
if canLoad then begin
|
if canLoad then begin
|
||||||
// Analysis of the file header was successful --> we know the file
|
// Analysis of the file header was successful --> we know the file
|
||||||
// format and shorten the list of fileformats to just one item.
|
// format and shorten the list of fileformats to just one item.
|
||||||
@ -8498,7 +8514,7 @@ begin
|
|||||||
// We begin with BIFF8 which is the most common xls format now.
|
// We begin with BIFF8 which is the most common xls format now.
|
||||||
// The next command re-reads the format list with BIFF8 at the first place.
|
// The next command re-reads the format list with BIFF8 at the first place.
|
||||||
fileFormats := GetSpreadFormatsFromFileName(faRead, AFileName, ord(sfExcel8));
|
fileFormats := GetSpreadFormatsFromFileName(faRead, AFileName, ord(sfExcel8));
|
||||||
end;
|
end; *)
|
||||||
|
|
||||||
// No file format found for this file --> error
|
// No file format found for this file --> error
|
||||||
if Length(fileformats) = 0 then
|
if Length(fileformats) = 0 then
|
||||||
|
142
components/fpspreadsheet/tests/fileformattests.pas
Normal file
142
components/fpspreadsheet/tests/fileformattests.pas
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
unit fileformattests;
|
||||||
|
|
||||||
|
{$mode objfpc}{$H+}
|
||||||
|
|
||||||
|
interface
|
||||||
|
{ Cell type tests
|
||||||
|
This unit tests writing the various cell data types out to and reading them
|
||||||
|
back from files.
|
||||||
|
}
|
||||||
|
|
||||||
|
uses
|
||||||
|
// Not using Lazarus package as the user may be working with multiple versions
|
||||||
|
// Instead, add .. to unit search path
|
||||||
|
Classes, SysUtils, fpcunit, testregistry,
|
||||||
|
fpstypes, fpspreadsheet,
|
||||||
|
xlsbiff2, xlsbiff5, xlsbiff8, fpsOpenDocument,
|
||||||
|
testsutility;
|
||||||
|
|
||||||
|
type
|
||||||
|
{ TSpreadFileFormatTests }
|
||||||
|
// Write cell types to xls/xml file and read back
|
||||||
|
TSpreadFileFormatTests = class(TTestCase)
|
||||||
|
private
|
||||||
|
|
||||||
|
protected
|
||||||
|
// Set up expected values:
|
||||||
|
procedure SetUp; override;
|
||||||
|
procedure TearDown; override;
|
||||||
|
procedure TestAutoDetect(AFormat: TsSpreadsheetFormat);
|
||||||
|
|
||||||
|
published
|
||||||
|
procedure TestAutoDetect_BIFF2;
|
||||||
|
procedure TestAutoDetect_BIFF5;
|
||||||
|
procedure TestAutoDetect_BIFF8;
|
||||||
|
procedure TestAutoDetect_OOXML;
|
||||||
|
procedure TestAutoDetect_ODS;
|
||||||
|
end;
|
||||||
|
|
||||||
|
implementation
|
||||||
|
|
||||||
|
uses
|
||||||
|
fpsReaderWriter;
|
||||||
|
|
||||||
|
const
|
||||||
|
SheetName = 'FileFormat';
|
||||||
|
|
||||||
|
|
||||||
|
{ TSpreadFileFormatTests }
|
||||||
|
|
||||||
|
procedure TSpreadFileFormatTests.SetUp;
|
||||||
|
begin
|
||||||
|
inherited SetUp;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure TSpreadFileFormatTests.TearDown;
|
||||||
|
begin
|
||||||
|
inherited TearDown;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure TSpreadFileFormatTests.TestAutoDetect(AFormat: TsSpreadsheetFormat);
|
||||||
|
const
|
||||||
|
EXPECTED_TEXT = 'abcefg';
|
||||||
|
var
|
||||||
|
MyWorksheet: TsWorksheet;
|
||||||
|
MyWorkbook: TsWorkbook;
|
||||||
|
row, col: Integer;
|
||||||
|
MyCell: PCell;
|
||||||
|
value: Boolean;
|
||||||
|
TempFile: string; //write xls/xml to this file and read back from it
|
||||||
|
actualText: String;
|
||||||
|
begin
|
||||||
|
MyWorkbook := TsWorkbook.Create;
|
||||||
|
try
|
||||||
|
MyWorkSheet:= MyWorkBook.AddWorksheet(SheetName);
|
||||||
|
|
||||||
|
// write any content to the file
|
||||||
|
MyWorksheet.WriteText(0, 0, EXPECTED_TEXT);
|
||||||
|
|
||||||
|
// Write workbook to file using format specified, but with wrong extension
|
||||||
|
TempFile := ChangeFileExt(NewTempFile, '.abc');
|
||||||
|
MyWorkBook.WriteToFile(TempFile, AFormat, true);
|
||||||
|
finally
|
||||||
|
MyWorkbook.Free;
|
||||||
|
end;
|
||||||
|
|
||||||
|
// Open the spreadsheet
|
||||||
|
MyWorkbook := TsWorkbook.Create;
|
||||||
|
try
|
||||||
|
// Try to read file and detect format automatically
|
||||||
|
try
|
||||||
|
MyWorkbook.ReadFromFile(TempFile);
|
||||||
|
// If the tests gets here the format was detected correctly.
|
||||||
|
// Quickly check the cell content
|
||||||
|
MyWorksheet := MyWorkbook.GetFirstWorksheet;
|
||||||
|
actualText := MyWorksheet.ReadAsUTF8Text(0, 0);
|
||||||
|
CheckEquals(EXPECTED_TEXT, actualText, 'Cell mismatch in A1');
|
||||||
|
except
|
||||||
|
fail('Cannot read file with format ' + GetSpreadFormatName(ord(AFormat)));
|
||||||
|
end;
|
||||||
|
finally
|
||||||
|
MyWorkbook.Free;
|
||||||
|
DeleteFile(TempFile);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
{ BIFF2 }
|
||||||
|
procedure TSpreadFileFormatTests.TestAutoDetect_BIFF2;
|
||||||
|
begin
|
||||||
|
TestAutoDetect(sfExcel2);
|
||||||
|
end;
|
||||||
|
|
||||||
|
{ BIFF5 }
|
||||||
|
procedure TSpreadFileFormatTests.TestAutoDetect_BIFF5;
|
||||||
|
begin
|
||||||
|
TestAutoDetect(sfExcel5);
|
||||||
|
end;
|
||||||
|
|
||||||
|
{ BIFF8 }
|
||||||
|
procedure TSpreadFileFormatTests.TestAutoDetect_BIFF8;
|
||||||
|
begin
|
||||||
|
TestAutoDetect(sfExcel8);
|
||||||
|
end;
|
||||||
|
|
||||||
|
{ OOXML }
|
||||||
|
procedure TSpreadFileFormatTests.TestAutoDetect_OOXML;
|
||||||
|
begin
|
||||||
|
TestAutoDetect(sfOOXML);
|
||||||
|
end;
|
||||||
|
|
||||||
|
{ ODS }
|
||||||
|
procedure TSpreadFileFormatTests.TestAutoDetect_ODS;
|
||||||
|
begin
|
||||||
|
TestAutoDetect(sfOpenDocument);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
initialization
|
||||||
|
RegisterTest(TSpreadFileFormatTests);
|
||||||
|
|
||||||
|
end.
|
||||||
|
|
@ -38,7 +38,7 @@
|
|||||||
<PackageName Value="FCL"/>
|
<PackageName Value="FCL"/>
|
||||||
</Item4>
|
</Item4>
|
||||||
</RequiredPackages>
|
</RequiredPackages>
|
||||||
<Units Count="30">
|
<Units Count="31">
|
||||||
<Unit0>
|
<Unit0>
|
||||||
<Filename Value="spreadtestgui.lpr"/>
|
<Filename Value="spreadtestgui.lpr"/>
|
||||||
<IsPartOfProject Value="True"/>
|
<IsPartOfProject Value="True"/>
|
||||||
@ -161,6 +161,10 @@
|
|||||||
<IsPartOfProject Value="True"/>
|
<IsPartOfProject Value="True"/>
|
||||||
<UnitName Value="SingleFormulaTests"/>
|
<UnitName Value="SingleFormulaTests"/>
|
||||||
</Unit29>
|
</Unit29>
|
||||||
|
<Unit30>
|
||||||
|
<Filename Value="fileformattests.pas"/>
|
||||||
|
<IsPartOfProject Value="True"/>
|
||||||
|
</Unit30>
|
||||||
</Units>
|
</Units>
|
||||||
</ProjectOptions>
|
</ProjectOptions>
|
||||||
<CompilerOptions>
|
<CompilerOptions>
|
||||||
@ -186,7 +190,7 @@
|
|||||||
</Other>
|
</Other>
|
||||||
</CompilerOptions>
|
</CompilerOptions>
|
||||||
<Debugging>
|
<Debugging>
|
||||||
<Exceptions Count="6">
|
<Exceptions Count="7">
|
||||||
<Item1>
|
<Item1>
|
||||||
<Name Value="EAbort"/>
|
<Name Value="EAbort"/>
|
||||||
<Enabled Value="False"/>
|
<Enabled Value="False"/>
|
||||||
@ -209,6 +213,9 @@
|
|||||||
<Name Value="EConvertError"/>
|
<Name Value="EConvertError"/>
|
||||||
<Enabled Value="False"/>
|
<Enabled Value="False"/>
|
||||||
</Item6>
|
</Item6>
|
||||||
|
<Item7>
|
||||||
|
<Name Value="EFPSpreadsheetReader"/>
|
||||||
|
</Item7>
|
||||||
</Exceptions>
|
</Exceptions>
|
||||||
</Debugging>
|
</Debugging>
|
||||||
</CONFIG>
|
</CONFIG>
|
||||||
|
@ -11,8 +11,8 @@ uses
|
|||||||
{$ENDIF}
|
{$ENDIF}
|
||||||
Interfaces, Forms, GuiTestRunner, testsutility,
|
Interfaces, Forms, GuiTestRunner, testsutility,
|
||||||
datetests, stringtests, numberstests, manualtests, internaltests,
|
datetests, stringtests, numberstests, manualtests, internaltests,
|
||||||
formattests, colortests, fonttests, optiontests, numformatparsertests,
|
fileformattests, formattests, colortests, fonttests, optiontests,
|
||||||
formulatests, rpnFormulaUnit, singleformulatests,
|
numformatparsertests, formulatests, rpnFormulaUnit, singleformulatests,
|
||||||
exceltests, emptycelltests, errortests, virtualmodetests,
|
exceltests, emptycelltests, errortests, virtualmodetests,
|
||||||
insertdeletetests, ssttests, celltypetests, sortingtests, copytests,
|
insertdeletetests, ssttests, celltypetests, sortingtests, copytests,
|
||||||
enumeratortests, commenttests, hyperlinktests, pagelayouttests, protectiontests;
|
enumeratortests, commenttests, hyperlinktests, pagelayouttests, protectiontests;
|
||||||
|
Reference in New Issue
Block a user