fpspreadsheet: Redo automatic file format detection: Add method CheckFileFormat to each reader for overriding (check file header or whatever).

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@6764 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2018-12-18 13:08:58 +00:00
parent 27d77fe114
commit 495fcb92eb
9 changed files with 190 additions and 102 deletions

View File

@ -171,6 +171,9 @@ type
constructor Create(AWorkbook: TsBasicWorkbook); override;
destructor Destroy; override;
{ File format detection }
class function CheckFileFormat(AStream: TStream): Boolean; override;
{ General reading methods }
procedure ReadFromStream(AStream: TStream;
APassword: String = ''; AParams: TsStreamParams = []); override;
@ -1159,6 +1162,11 @@ begin
end;
end;
class function TsSpreadOpenDocReader.CheckFileFormat(AStream: TStream): Boolean;
begin
Result := HasZipHeader(AStream);
end;
function TsSpreadOpenDocReader.ExtractFormatIndexFromStyle(ACellStyleName: String;
ACol: Integer): Integer;
var

View File

@ -8488,8 +8488,7 @@ end;
{@@ ----------------------------------------------------------------------------
Helper method for determining the spreadsheet type. Read the first few bytes
of a file and determines the spreadsheet type from the characteristic
signature. Only implemented for xls files where several file types have the
same extension
signature.
-------------------------------------------------------------------------------}
class procedure TsWorkbook.GetFormatFromFileHeader(const AFileName: TFileName;
out AFormatIDs: TsSpreadFormatIDArray);
@ -8505,85 +8504,32 @@ begin
end;
{@@ ----------------------------------------------------------------------------
Helper method for determining the spreadsheet format. Read the first few bytes
of a stream and determines the spreadsheet type from the characteristic
signature.
Helper method for determining the spreadsheet format. Reads the first
few bytes of a stream and determines the spreadsheet type from the
characteristic signature.
-------------------------------------------------------------------------------}
class procedure TsWorkbook.GetFormatFromFileHeader(AStream: TStream;
out AFormatIDs: TsSpreadFormatIDArray); overload;
const
BIFF2_HEADER: array[0..3] of byte = (
$09,$00, $04,$00); // they are common to all BIFF2 files that I've seen
BIFF58_HEADER: array[0..7] of byte = (
$D0,$CF, $11,$E0, $A1,$B1, $1A,$E1);
ZIP_HEADER: array[0..1] of byte = (
byte('P'), byte('K'));
function ValidOLEStream(AStream: TStream; AName: String): Boolean;
var
fsOLE: TVirtualLayer_OLE;
begin
AStream.Position := 0;
fsOLE := TVirtualLayer_OLE.Create(AStream);
try
fsOLE.Initialize;
Result := fsOLE.FileExists('/'+AName);
finally
fsOLE.Free;
end;
end;
var
buf: packed array[0..7] of byte = (0,0,0,0,0,0,0,0);
i: Integer;
reader: TsSpreadReaderClass;
fmtIDs: TsSpreadformatIDArray;
i, j: Integer;
begin
SetLength(AFormatIDs, 0);
if AStream = nil then
exit;
// Read first 8 bytes
i := AStream.Read(buf, Length(buf));
if i < Length(buf) then
exit;
// Check for zip header of xlsx and ods
if (buf[0] = ZIP_HEADER[0]) and (buf[1] = ZIP_HEADER[1]) then begin
SetLength(AFormatIDs, 2);
AFormatIDs[0] := ord(sfOOXML);
AFormatIDs[1] := ord(sfOpenDocument);
exit;
end;
// Check for Excel 2
for i:=0 to High(BIFF2_HEADER) do
if buf[i] = BIFF2_HEADER[i] then
begin
SetLength(AFormatIDs, 1);
AFormatIDs[0] := ord(sfExcel2);
exit;
fmtIDs := GetSpreadFormats(faRead, [ord(sfExcel8)]);
SetLength(AFormatIDs, Length(fmtIDs));
j := 0;
for i:=0 to High(fmtIDs) do begin
reader := GetSpreadReaderClass(fmtIDs[i]);
if Assigned(reader) and reader.CheckFileFormat(AStream) then begin
AFormatIDs[j] := fmtIDs[i];
inc(j);
end;
// Check for Excel 5 or 8
for i:=0 to High(BIFF58_HEADER) do
if buf[i] <> BIFF58_HEADER[i] then
exit;
// Now we know that the file is a Microsoft compound document.
// We check for Excel 5 in which the stream is named "Book"
if ValidOLEStream(AStream, 'Book') then begin
SetLength(AFormatIDs, 1);
AFormatIDs[0] := ord(sfExcel5);
exit;
end;
// Now we check for Excel 8 which names the stream "Workbook"
if ValidOLEStream(AStream, 'Workbook') then begin
SetLength(AFormatIDs, 1);
AFormatIDs[0] := ord(sfExcel8);
exit;
end;
SetLength(AFormatIDs, j);
end;
{@@ ----------------------------------------------------------------------------
@ -8694,22 +8640,11 @@ var
fmtID: TsSpreadFormatID;
fileFormats: TsSpreadFormatIDArray;
i: Integer;
found: Boolean;
ext: String;
begin
if not FileExists(AFileName) then
raise EFPSpreadsheetReader.CreateFmt(rsFileNotFound, [AFileName]);
// First try to determine file format from the extension
if GetFormatFromFileName(AFilename, fmtID) then begin
try
ReadFromFile(AFileName, fmtID, APassword, AParams);
exit;
except
// format does not match. We must continue with rest of procedure
end;
end else
fmtID := MaxInt;
// Try to get file format from file header
GetFormatFromFileHeader(AFileName, fileformats);
if Length(fileformats) = 0 then
@ -8719,21 +8654,14 @@ begin
if Length(fileformats) = 0 then
fileformats := GetSpreadFormats(faRead, [ord(sfExcel8)]);
// Remove already tested format
found := false;
i := 0;
while (i <= High(fileFormats)) do begin
if fileFormats[i] = fmtID then begin
found := true;
inc(i);
while (i <= High(fileFormats)) do begin
fileFormats[i-1] := fileFormats[i];
inc(i);
end;
end else
inc(i);
end;
if found then SetLength(fileFormats, Length(fileFormats)-1);
// Move file format corresponding to file extension to the top to load it first.
ext := Lowercase(ExtractFileExt(AFileName));
for i := 0 to High(fileformats) do
if ext = GetSpreadFormatExt(fileformats[i]) then begin
fmtID := fileformats[0];
fileFormats[0] := fileformats[i];
fileFormats[i] := fmtID;
end;
// No file format found for this file --> error
if Length(fileformats) = 0 then
@ -8747,7 +8675,6 @@ begin
success := true;
break; // Exit the loop if we reach this point successfully.
except
//success := false;
end;
end;

View File

@ -46,6 +46,8 @@ type
{ TsBasicSpreadReader }
TsBasicSpreadReader = class(TsBasicSpreadReaderWriter)
public
{ File format detection }
class function CheckFileFormat(AStream: TStream): boolean; virtual; abstract;
{ General writing methods }
procedure ReadFromFile(AFileName: string; APassword: String = '';
AParams: TsStreamParams = []); virtual; abstract;
@ -92,6 +94,8 @@ type
{ Helper methods }
procedure AddBuiltinNumFormats; virtual;
{@@ More detailed check for file format }
class function CheckFileFormatDetails(AStream: TStream): Boolean; virtual;
{@@ Removes column records if all of them have the same column width }
procedure FixCols(AWorksheet: TsBasicWorksheet);
{@@ Removes row records if all of them have the same row height }
@ -113,7 +117,9 @@ type
constructor Create(AWorkbook: TsBasicWorkbook); override;
destructor Destroy; override;
{ General writing methods }
{ File format detection }
class function CheckFileFormat(AStream: TStream): Boolean; override;
{ General reading methods }
procedure ReadFromFile(AFileName: string; APassword: String = '';
AParams: TsStreamParams = []); override;
procedure ReadFromStream(AStream: TStream; APassword: String = '';
@ -350,6 +356,24 @@ begin
// to be overridden by descendants
end;
{@@ ----------------------------------------------------------------------------
Must be overridden to check the file header for the signature of the file.
Returns true by default which means that the file is qualified for trying to
be loaded.
-------------------------------------------------------------------------------}
class function TsCustomSpreadReader.CheckFileFormat(AStream: TStream): boolean;
begin
Result := true;
end;
{@@ ----------------------------------------------------------------------------
More details check for file format
-------------------------------------------------------------------------------}
class function TsCustomSpreadReader.CheckFileFormatDetails(AStream: TStream): Boolean;
begin
Result := true;
end;
{@@ ----------------------------------------------------------------------------
Deletes unnecessary column records as they are written by some
Office applications when they convert a file to another format.

View File

@ -57,6 +57,8 @@ function CreateTempStream(AWorkbook: TsBasicWorkbook;
AFileNameBase: String): TStream;
procedure DestroyTempStream(AStream: TStream);
function HasZipHeader(AStream: TStream): Boolean;
implementation
@ -410,5 +412,26 @@ begin
end;
{ Returns true if the file begins with a ZIP header *PK'. Needed for
file format detection. }
function HasZipHeader(AStream: TStream): Boolean;
const
ZIP_HEADER: packed array[0..1] of char = ('P', 'K');
var
P: Int64;
buf: packed array[0..1] of char;
begin
Result := false;
P := AStream.Position;
try
AStream.Position := 0;
if AStream.Read(buf, 2) < 2 then
exit;
Result := (buf[0] = ZIP_HEADER[0]) and (buf[1] = ZIP_HEADER[1]);
finally
AStream.Position := P;
end;
end;
end.

View File

@ -79,6 +79,8 @@ type
{ General reading methods }
procedure ReadFromStream(AStream: TStream; APassword: String = '';
AParams: TsStreamParams = []); override;
{ File format detection }
class function CheckfileFormat(AStream: TStream): Boolean; override;
end;
@ -331,6 +333,35 @@ begin
FFirstNumFormatIndexInFile := 0;
end;
{@@ ----------------------------------------------------------------------------
Checks the header of the stream for the signature of BIFF2 files
-------------------------------------------------------------------------------}
class function TsSpreadBIFF2Reader.CheckFileFormat(AStream: TStream): Boolean;
const
BIFF2_HEADER: packed array[0..3] of byte = (
$09,$00, $04,$00); // they are common to all BIFF2 files that I've seen
var
P: Int64;
buf: packed array[0..3] of byte;
n: Integer;
begin
Result := false;
P := AStream.Position;
try
AStream.Position := 0;
n := AStream.Read(buf, SizeOf(buf));
if n < Length(BIFF2_HEADER) then
exit;
for n:=0 to High(buf) do
if buf[n] <> BIFF2_HEADER[n] then
exit;
Result := true;
finally
AStream.Position := P;
end;
end;
procedure TsSpreadBIFF2Reader.ReadBlank(AStream: TStream);
var
ARow, ACol: Cardinal;

View File

@ -72,6 +72,7 @@ type
TsSpreadBIFF5Reader = class(TsSpreadBIFFReader)
protected
class function CheckFileFormatDetails(AStream: TStream): Boolean; override;
procedure PopulatePalette; override;
{ Record writing methods }
procedure ReadBOUNDSHEET(AStream: TStream);
@ -229,7 +230,7 @@ uses
LazLogger,
{$ENDIF}
Math,
fpsStrings, fpspreadsheet, fpsReaderWriter, fpsStreams,
uvirtuallayer_ole, fpsStrings, fpspreadsheet, fpsReaderWriter, fpsStreams,
fpsPalette, fpsNumFormat, xlsconst;
const
@ -363,6 +364,24 @@ type
{ TsSpreadBIFF5Reader }
{------------------------------------------------------------------------------}
{@@ ----------------------------------------------------------------------------
Checks, for automatic file format detection, whether tie OLE stream is named
'Book' - this is typical of BIFF5 files.
-------------------------------------------------------------------------------}
class function TsSpreadBIFF5Reader.CheckFileFormatDetails(AStream: TStream): Boolean;
var
fsOLE: TVirtualLayer_OLE;
begin
AStream.Position := 0;
fsOLE := TVirtualLayer_OLE.Create(AStream);
try
fsOLE.Initialize;
Result := fsOLE.FileExists('/Book');
finally
fsOLE.Free;
end;
end;
{@@ ----------------------------------------------------------------------------
Populates the reader's default palette using the BIFF5 default colors.
-------------------------------------------------------------------------------}

View File

@ -133,6 +133,7 @@ type
function ReadWideString(const AStream: TStream;
const AUse8BitLength: Boolean): WideString; overload;
protected
class function CheckFileFormatDetails(AStream: TStream): Boolean; override;
procedure PopulatePalette; override;
procedure ReadBOUNDSHEET(AStream: TStream);
procedure ReadCONTINUE(const AStream: TStream);
@ -366,8 +367,8 @@ uses
LazLogger,
{$ENDIF}
Math, lconvencoding, LazFileUtils, URIParser,
fpsStrings, {%H-}fpsPatches, fpsStreams, fpsReaderWriter, fpsPalette,
fpspreadsheet, fpsNumFormat, fpsExprParser, xlsEscher;
uvirtuallayer_ole, fpsStrings, {%H-}fpsPatches, fpsStreams, fpsReaderWriter,
fpsPalette, fpspreadsheet, fpsNumFormat, fpsExprParser, xlsEscher;
const
{ Excel record IDs }
@ -794,6 +795,24 @@ begin
inherited;
end;
{@@ ----------------------------------------------------------------------------
Checks, for automatic file format detection, whether tie OLE stream is named
'Workbook' - this is typical of BIFF8 files.
-------------------------------------------------------------------------------}
class function TsSpreadBIFF8Reader.CheckFileFormatDetails(AStream: TStream): Boolean;
var
fsOLE: TVirtualLayer_OLE;
begin
AStream.Position := 0;
fsOLE := TVirtualLayer_OLE.Create(AStream);
try
fsOLE.Initialize;
Result := fsOLE.FileExists('/Workbook');
finally
fsOLE.Free;
end;
end;
{@@ ----------------------------------------------------------------------------
Populates the reader's default palette using the BIFF8 default colors.
-------------------------------------------------------------------------------}

View File

@ -562,6 +562,8 @@ type
public
constructor Create(AWorkbook: TsBasicWorkbook); override;
destructor Destroy; override;
{ File format detection }
class function CheckFileFormat(AStream: TStream): Boolean; override;
end;
@ -1180,6 +1182,35 @@ begin
);
end;
{@@ ----------------------------------------------------------------------------
Checks the stream header to verify that the file is a BIFF5 or BIFF8 file.
-------------------------------------------------------------------------------}
class function TsSpreadBIFFReader.CheckFileFormat(AStream: TStream): Boolean;
const
BIFF58_HEADER: packed array[0..7] of byte = (
$D0,$CF, $11,$E0, $A1,$B1, $1A,$E1);
var
P: Int64;
buf: packed array[0..7] of byte;
n: Integer;
begin
Result := false;
P := AStream.Position;
try
AStream.Position := 0;
n := AStream.Read(buf, Length(buf));
if n <> Length(BIFF58_HEADER) then
exit;
for n:=0 to High(BIFF58_HEADER) do
if buf[n] <> BIFF58_HEADER[n] then
exit;
Result := CheckFileFormatDetails(AStream);
finally
AStream.Position := P;
end;
end;
{@@ ----------------------------------------------------------------------------
Applies the XF formatting referred to by XFIndex to the specified cell
-------------------------------------------------------------------------------}

View File

@ -103,6 +103,7 @@ type
public
constructor Create(AWorkbook: TsBasicWorkbook); override;
destructor Destroy; override;
class function CheckFileFormat(AStream: TStream): Boolean; override;
procedure ReadFromStream(AStream: TStream; APassword: String = '';
AParams: TsStreamParams = []); override;
end;
@ -490,6 +491,11 @@ begin
end;
end;
class function TsSpreadOOXMLReader.CheckFileFormat(AStream: TStream): Boolean;
begin
Result := HasZipHeader(AStream);
end;
function TsSpreadOOXMLReader.FindCommentsFileName(ANode: TDOMNode): String;
var
s: String;