From 7c8e0a8b3d004e95a59f70bda30f56b7061ff98b Mon Sep 17 00:00:00 2001 From: wp_xxyyzz Date: Sat, 1 Aug 2015 22:11:44 +0000 Subject: [PATCH] fpspreadsheet: Initial support for reading html files (data only, no formats, no nested tables). git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@4236 8e941d3f-bd1b-0410-a28a-d453659cc2b4 --- .../examples/read_write/csvdemo/csvread.lpr | 2 +- .../examples/read_write/htmldemo/htmlread.lpi | 66 +++ .../examples/read_write/htmldemo/htmlread.lpr | 70 +++ .../examples/read_write/htmldemo/readme.txt | 2 + .../examples/visual/fpsctrls/main.lfm | 4 +- .../examples/visual/fpsctrls/main.pas | 1 + components/fpspreadsheet/fpolestorage.pas | 0 components/fpspreadsheet/fpscsv.pas | 248 +------- components/fpspreadsheet/fpshtml.pas | 550 +++++++++++++++++- components/fpspreadsheet/fpshtmlutils.pas | 400 +++++++++++++ components/fpspreadsheet/fpsnumformat.pas | 274 ++++++++- .../fpspreadsheet/fpsnumformatparser.pas | 1 - .../fpspreadsheet/laz_fpspreadsheet.lpk | 6 +- .../fpspreadsheet/laz_fpspreadsheet.pas | 3 +- .../fpspreadsheet/tests/spreadtestgui.lpi | 3 + components/fpspreadsheet/xlsbiff5.pas | 4 +- components/fpspreadsheet/xlsbiff8.pas | 61 +- components/fpspreadsheet/xlscommon.pas | 21 +- 18 files changed, 1383 insertions(+), 333 deletions(-) create mode 100644 components/fpspreadsheet/examples/read_write/htmldemo/htmlread.lpi create mode 100644 components/fpspreadsheet/examples/read_write/htmldemo/htmlread.lpr mode change 100755 => 100644 components/fpspreadsheet/fpolestorage.pas create mode 100644 components/fpspreadsheet/fpshtmlutils.pas diff --git a/components/fpspreadsheet/examples/read_write/csvdemo/csvread.lpr b/components/fpspreadsheet/examples/read_write/csvdemo/csvread.lpr index 2079d1b8a..950e5cda4 100644 --- a/components/fpspreadsheet/examples/read_write/csvdemo/csvread.lpr +++ b/components/fpspreadsheet/examples/read_write/csvdemo/csvread.lpr @@ -23,7 +23,7 @@ begin MyDir := ExtractFilePath(ParamStr(0)); InputFileName := MyDir + 'test' + STR_COMMA_SEPARATED_EXTENSION; if not FileExists(InputFileName) then begin - WriteLn('Input file ', InputFileName, ' does not exist. Please run excel2write first.'); + WriteLn('Input file ', InputFileName, ' does not exist. Please run csvwrite first.'); Halt; end; diff --git a/components/fpspreadsheet/examples/read_write/htmldemo/htmlread.lpi b/components/fpspreadsheet/examples/read_write/htmldemo/htmlread.lpi new file mode 100644 index 000000000..cf636a9ab --- /dev/null +++ b/components/fpspreadsheet/examples/read_write/htmldemo/htmlread.lpi @@ -0,0 +1,66 @@ + + + + + + + + + + + + + <UseAppBundle Value="False"/> + </General> + <VersionInfo> + <StringTable ProductVersion=""/> + </VersionInfo> + <BuildModes Count="1"> + <Item1 Name="default" Default="True"/> + </BuildModes> + <PublishOptions> + <Version Value="2"/> + <IgnoreBinaries Value="False"/> + <IncludeFileFilter Value="*.(pas|pp|inc|lfm|lpr|lrs|lpi|lpk|sh|xml)"/> + <ExcludeFileFilter Value="*.(bak|ppu|ppw|o|so);*~;backup"/> + </PublishOptions> + <RunParams> + <local> + <FormatVersion Value="1"/> + <LaunchingApplication PathPlusParams="\usr\X11R6\bin\xterm -T 'Lazarus Run Output' -e $(LazarusDir)\tools\runwait.sh $(TargetCmdLine)"/> + </local> + </RunParams> + <RequiredPackages Count="1"> + <Item1> + <PackageName Value="LazUtils"/> + </Item1> + </RequiredPackages> + <Units Count="1"> + <Unit0> + <Filename Value="htmlread.lpr"/> + <IsPartOfProject Value="True"/> + </Unit0> + </Units> + </ProjectOptions> + <CompilerOptions> + <Version Value="11"/> + <PathDelim Value="\"/> + <Target> + <Filename Value="htmlread"/> + </Target> + <SearchPaths> + <OtherUnitFiles Value="..\..\.."/> + <UnitOutputDirectory Value="..\..\lib\$(TargetCPU)-$(TargetOS)"/> + </SearchPaths> + <Parsing> + <SyntaxOptions> + <UseAnsiStrings Value="False"/> + </SyntaxOptions> + </Parsing> + <Linking> + <Debugging> + <DebugInfoType Value="dsDwarf2Set"/> + </Debugging> + </Linking> + </CompilerOptions> +</CONFIG> diff --git a/components/fpspreadsheet/examples/read_write/htmldemo/htmlread.lpr b/components/fpspreadsheet/examples/read_write/htmldemo/htmlread.lpr new file mode 100644 index 000000000..4387d8e3e --- /dev/null +++ b/components/fpspreadsheet/examples/read_write/htmldemo/htmlread.lpr @@ -0,0 +1,70 @@ +{ +htmlread.dpr + +Demonstrates how to read a html file using the fpspreadsheet library. +IMPORTANT: Requires the output file of the htmlwrite demo. +} + +program htmlread; + +{$mode delphi}{$H+} + +uses + Classes, SysUtils, LazUTF8, fpstypes, fpsutils, fpspreadsheet, fpshtml; + +var + MyWorkbook: TsWorkbook; + MyWorksheet: TsWorksheet; + InputFilename: string; + MyDir: string; + i: Integer; + CurCell: PCell; + +begin + // Open the input file + MyDir := ExtractFilePath(ParamStr(0)); + InputFileName := MyDir + 'test' + STR_HTML_EXTENSION; + if not FileExists(InputFileName) then begin + WriteLn('Input file ', InputFileName, ' does not exist. Please run htmlwrite first.'); + Halt; + end; + + WriteLn('Opening input file ', InputFilename); + + // Parameters + HTMLParams.TableIndex := 0; + + // Create the spreadsheet + MyWorkbook := TsWorkbook.Create; + try + MyWorkbook.Options := MyWorkbook.Options + [boReadFormulas]; + MyWorkbook.ReadFromFile(InputFilename, sfHTML); + + MyWorksheet := MyWorkbook.GetFirstWorksheet; + + // Write all cells with contents to the console + WriteLn(''); + WriteLn('Contents of the first worksheet of the file:'); + WriteLn(''); + + for CurCell in MyWorksheet.Cells do + begin + WriteLn( + 'Row: ', CurCell^.Row, + ' Col: ', CurCell^.Col, + ' Value: ', UTF8ToConsole(MyWorkSheet.ReadAsUTF8Text(CurCell^.Row, CurCell^.Col)) + ); + end; + + finally + // Finalization + MyWorkbook.Free; + end; + + {$IFDEF MSWINDOWS} + WriteLn; + WriteLn('Press ENTER to exit.'); + ReadLn; + {$ENDIF} +end. + diff --git a/components/fpspreadsheet/examples/read_write/htmldemo/readme.txt b/components/fpspreadsheet/examples/read_write/htmldemo/readme.txt index e04258916..3c87de8e8 100644 --- a/components/fpspreadsheet/examples/read_write/htmldemo/readme.txt +++ b/components/fpspreadsheet/examples/read_write/htmldemo/readme.txt @@ -1 +1,3 @@ This demo demonstrates how to use fpspreadsheet to read and write html files which can be opened by the browser. + +Please run the write demo before the read demo in order to create the required spreadsheet file. diff --git a/components/fpspreadsheet/examples/visual/fpsctrls/main.lfm b/components/fpspreadsheet/examples/visual/fpsctrls/main.lfm index 80a618109..7d09f3dd5 100644 --- a/components/fpspreadsheet/examples/visual/fpsctrls/main.lfm +++ b/components/fpspreadsheet/examples/visual/fpsctrls/main.lfm @@ -641,7 +641,7 @@ object MainForm: TMainForm end object OpenDialog: TOpenDialog DefaultExt = '.xls' - Filter = 'All spreadsheet files|*.xls;*.xlsx;*.ods;*.csv|All Excel files (*.xls, *.xlsx)|*.xls;*.xlsx|Excel XML spreadsheet (*.xlsx)|*.xlsx|Excel 97-2003 spreadsheets (*.xls)|*.xls|Excel 5 spreadsheet (*.xls)|*.xls|Excel 2.1 spreadsheets (*.xls)|*.xls|LibreOffice/OpenOffice spreadsheet (*.ods)|*.ods|Comma-delimited files (*.csv)|*.csv' + Filter = 'All spreadsheet files|*.xls;*.xlsx;*.ods;*.csv|All Excel files (*.xls, *.xlsx)|*.xls;*.xlsx|Excel XML spreadsheet (*.xlsx)|*.xlsx|Excel 97-2003 spreadsheets (*.xls)|*.xls|Excel 5 spreadsheet (*.xls)|*.xls|Excel 2.1 spreadsheets (*.xls)|*.xls|LibreOffice/OpenOffice spreadsheet (*.ods)|*.ods|HTML files (*.html; *.htm)|*.html;*.htm|Comma-delimited files (*.csv)|*.csv' Options = [ofExtensionDifferent, ofEnableSizing, ofViewDetail] left = 312 top = 160 @@ -943,7 +943,7 @@ object MainForm: TMainForm object AcFileOpen: TFileOpen Category = 'File' Caption = '&Open ...' - Dialog.Filter = 'All spreadsheet files|*.xls;*.xlsx;*.ods;*.csv|All Excel files (*.xls, *.xlsx)|*.xls;*.xlsx|Excel XML spreadsheet (*.xlsx)|*.xlsx|Excel 97-2003 spreadsheets (*.xls)|*.xls|Excel 5 spreadsheet (*.xls)|*.xls|Excel 2.1 spreadsheets (*.xls)|*.xls|LibreOffice/OpenOffice spreadsheet (*.ods)|*.ods|Comma-delimited files (*.csv)|*.csv' + Dialog.Filter = 'All supported spreadsheet files|*.xls;*.xlsx;*.ods;*.csv;*.html;*.htm|All Excel files (*.xls, *.xlsx)|*.xls;*.xlsx|Excel XML spreadsheet (*.xlsx)|*.xlsx|Excel 97-2003 spreadsheets (*.xls)|*.xls|Excel 5 spreadsheet (*.xls)|*.xls|Excel 2.1 spreadsheets (*.xls)|*.xls|LibreOffice/OpenOffice spreadsheet (*.ods)|*.ods|Comma-delimited files (*.csv)|*.csv|HTML files (*.html; *.htm)|*.html;*.htm' Dialog.Options = [ofExtensionDifferent, ofFileMustExist, ofEnableSizing, ofViewDetail] Hint = 'Open spreadsheet file' ImageIndex = 44 diff --git a/components/fpspreadsheet/examples/visual/fpsctrls/main.pas b/components/fpspreadsheet/examples/visual/fpsctrls/main.pas index 275d060f9..31d947c13 100644 --- a/components/fpspreadsheet/examples/visual/fpsctrls/main.pas +++ b/components/fpspreadsheet/examples/visual/fpsctrls/main.pas @@ -460,6 +460,7 @@ var F: TNumFormatForm; sample: Double; begin + Unused(AWorkbook); F := TNumFormatForm.Create(nil); try F.Position := poMainFormCenter; diff --git a/components/fpspreadsheet/fpolestorage.pas b/components/fpspreadsheet/fpolestorage.pas old mode 100755 new mode 100644 diff --git a/components/fpspreadsheet/fpscsv.pas b/components/fpspreadsheet/fpscsv.pas index 699c4a6d4..791e9aa2f 100644 --- a/components/fpspreadsheet/fpscsv.pas +++ b/components/fpspreadsheet/fpscsv.pas @@ -13,11 +13,6 @@ type private FWorksheetName: String; FFormatSettings: TFormatSettings; - function IsBool(AText: String; out AValue: Boolean): Boolean; - function IsDateTime(AText: String; out ADateTime: TDateTime; - out ANumFormat: TsNumberFormat): Boolean; - function IsNumber(AText: String; out ANumber: Double; out ANumFormat: TsNumberFormat; - out ADecimals: Integer; out ACurrencySymbol, AWarning: String): Boolean; function IsQuotedText(var AText: String): Boolean; procedure ReadCellValue(ARow, ACol: Cardinal; AText: String); protected @@ -96,93 +91,7 @@ implementation uses //StrUtils, DateUtils, LConvEncoding, Math, - fpsUtils, fpsCurrency, fpsNumFormat; - -{ Initializes the FormatSettings of the CSVParams to default values which - can be replaced by the FormatSettings of the workbook's FormatSettings } -procedure InitCSVFormatSettings; -var - i: Integer; -begin - with CSVParams.FormatSettings do - begin - CurrencyFormat := Byte(-1); - NegCurrFormat := Byte(-1); - ThousandSeparator := #0; - DecimalSeparator := #0; - CurrencyDecimals := Byte(-1); - DateSeparator := #0; - TimeSeparator := #0; - ListSeparator := #0; - CurrencyString := ''; - ShortDateFormat := ''; - LongDateFormat := ''; - TimeAMString := ''; - TimePMString := ''; - ShortTimeFormat := ''; - LongTimeFormat := ''; - for i:=1 to 12 do - begin - ShortMonthNames[i] := ''; - LongMonthNames[i] := ''; - end; - for i:=1 to 7 do - begin - ShortDayNames[i] := ''; - LongDayNames[i] := ''; - end; - TwoDigitYearCenturyWindow := Word(-1); - end; -end; - -procedure ReplaceFormatSettings(var AFormatSettings: TFormatSettings; - const ADefaultFormats: TFormatSettings); -var - i: Integer; -begin - if AFormatSettings.CurrencyFormat = Byte(-1) then - AFormatSettings.CurrencyFormat := ADefaultFormats.CurrencyFormat; - if AFormatSettings.NegCurrFormat = Byte(-1) then - AFormatSettings.NegCurrFormat := ADefaultFormats.NegCurrFormat; - if AFormatSettings.ThousandSeparator = #0 then - AFormatSettings.ThousandSeparator := ADefaultFormats.ThousandSeparator; - if AFormatSettings.DecimalSeparator = #0 then - AFormatSettings.DecimalSeparator := ADefaultFormats.DecimalSeparator; - if AFormatSettings.CurrencyDecimals = Byte(-1) then - AFormatSettings.CurrencyDecimals := ADefaultFormats.CurrencyDecimals; - if AFormatSettings.DateSeparator = #0 then - AFormatSettings.DateSeparator := ADefaultFormats.DateSeparator; - if AFormatSettings.TimeSeparator = #0 then - AFormatSettings.TimeSeparator := ADefaultFormats.TimeSeparator; - if AFormatSettings.ListSeparator = #0 then - AFormatSettings.ListSeparator := ADefaultFormats.ListSeparator; - if AFormatSettings.CurrencyString = '' then - AFormatSettings.CurrencyString := ADefaultFormats.CurrencyString; - if AFormatSettings.ShortDateFormat = '' then - AFormatSettings.ShortDateFormat := ADefaultFormats.ShortDateFormat; - if AFormatSettings.LongDateFormat = '' then - AFormatSettings.LongDateFormat := ADefaultFormats.LongDateFormat; - if AFormatSettings.ShortTimeFormat = '' then - AFormatSettings.ShortTimeFormat := ADefaultFormats.ShortTimeFormat; - if AFormatSettings.LongTimeFormat = '' then - AFormatSettings.LongTimeFormat := ADefaultFormats.LongTimeFormat; - for i:=1 to 12 do - begin - if AFormatSettings.ShortMonthNames[i] = '' then - AFormatSettings.ShortMonthNames[i] := ADefaultFormats.ShortMonthNames[i]; - if AFormatSettings.LongMonthNames[i] = '' then - AFormatSettings.LongMonthNames[i] := ADefaultFormats.LongMonthNames[i]; - end; - for i:=1 to 7 do - begin - if AFormatSettings.ShortDayNames[i] = '' then - AFormatSettings.ShortDayNames[i] := ADefaultFormats.ShortDayNames[i]; - if AFormatSettings.LongDayNames[i] = '' then - AFormatSettings.LongDayNames[i] := ADefaultFormats.LongDayNames[i]; - end; - if AFormatSettings.TwoDigitYearCenturyWindow = Word(-1) then - AFormatSettings.TwoDigitYearCenturyWindow := ADefaultFormats.TwoDigitYearCenturyWindow; -end; + fpsUtils, fpsNumFormat; function LineEndingAsString(ALineEnding: TsCSVLineEnding): String; begin @@ -207,152 +116,6 @@ begin ReplaceFormatSettings(FFormatSettings, FWorkbook.FormatSettings); end; -function TsCSVReader.IsBool(AText: String; out AValue: Boolean): Boolean; -begin - if SameText(AText, CSVParams.TrueText) then - begin - AValue := true; - Result := true; - end else - if SameText(AText, CSVParams.FalseText) then - begin - AValue := false; - Result := true; - end else - Result := false; -end; - -function TsCSVReader.IsDateTime(AText: String; out ADateTime: TDateTime; - out ANumFormat: TsNumberFormat): Boolean; - - { Test whether the text is formatted according to a built-in date/time format. - Converts the obtained date/time value back to a string and compares. } - function TestFormat(lNumFmt: TsNumberFormat): Boolean; - var - fmt: string; - begin - fmt := BuildDateTimeFormatString(lNumFmt, FFormatSettings); - Result := FormatDateTime(fmt, ADateTime, FFormatSettings) = AText; - if Result then ANumFormat := lNumFmt; - end; - -begin - Result := TryStrToDateTime(AText, ADateTime, FFormatSettings); - if Result then - begin - ANumFormat := nfCustom; - if abs(ADateTime) > 1 then // this is most probably a date - begin - if TestFormat(nfShortDateTime) then - exit; - if TestFormat(nfLongDate) then - exit; - if TestFormat(nfShortDate) then - exit; - end else - begin // this case is time-only - if TestFormat(nfLongTimeAM) then - exit; - if TestFormat(nfLongTime) then - exit; - if TestFormat(nfShortTimeAM) then - exit; - if TestFormat(nfShortTime) then - exit; - end; - end; -end; - -function TsCSVReader.IsNumber(AText: String; out ANumber: Double; - out ANumFormat: TsNumberFormat; out ADecimals: Integer; - out ACurrencySymbol, AWarning: String): Boolean; -var - p: Integer; - DecSep, ThousSep: Char; -begin - Result := false; - AWarning := ''; - - // To detect whether the text is a currency value we look for the currency - // string. If we find it, we delete it and convert the remaining string to - // a number. - ACurrencySymbol := FFormatSettings.CurrencyString; - if RemoveCurrencySymbol(ACurrencySymbol, AText) then - begin - if IsNegative(AText) then - begin - if AText = '' then - exit; - AText := '-' + AText; - end; - end else - ACurrencySymbol := ''; - - if CSVParams.AutoDetectNumberFormat then - Result := TryStrToFloatAuto(AText, ANumber, DecSep, ThousSep, AWarning) - else begin - Result := TryStrToFloat(AText, ANumber, FFormatSettings); - if Result then - begin - if pos(FFormatSettings.DecimalSeparator, AText) = 0 - then DecSep := #0 - else DecSep := FFormatSettings.DecimalSeparator; - if pos(CSVParams.FormatSettings.ThousandSeparator, AText) = 0 - then ThousSep := #0 - else ThousSep := FFormatSettings.ThousandSeparator; - end; - end; - - // Try to determine the number format - if Result then - begin - if ThousSep <> #0 then - ANumFormat := nfFixedTh - else - ANumFormat := nfGeneral; - // count number of decimal places and try to catch special formats - ADecimals := 0; - if DecSep <> #0 then - begin - // Go to the decimal separator and search towards the end of the string - p := pos(DecSep, AText) + 1; - while (p <= Length(AText)) do begin - // exponential format - if AText[p] in ['+', '-', 'E', 'e'] then - begin - ANumFormat := nfExp; - break; - end else - // percent format - if AText[p] = '%' then - begin - ANumFormat := nfPercentage; - break; - end else - begin - inc(p); - inc(ADecimals); - end; - end; - if (ADecimals > 0) and (ADecimals < 9) and (ANumFormat = nfGeneral) then - // "no formatting" assumed if there are "many" decimals - ANumFormat := nfFixed; - end else - begin - p := Length(AText); - while (p > 0) do begin - case AText[p] of - '%' : ANumFormat := nfPercentage; - 'e', 'E': ANumFormat := nfExp; - else dec(p); - end; - break; - end; - end; - end else - ACurrencySymbol := ''; -end; - { Checks if text is quoted; strips any starting and ending quotes } function TsCSVReader.IsQuotedText(var AText: String): Boolean; begin @@ -399,7 +162,8 @@ begin end; // Check for a NUMBER or CURRENCY cell - if IsNumber(AText, dblValue, nf, decs, currSym, warning) then + if IsNumberValue(AText, CSVParams.AutoDetectNumberFormat, FFormatSettings, + dblValue, nf, decs, currSym, warning) then begin if currSym <> '' then FWorksheet.WriteCurrency(cell, dblValue, nfCurrency, decs, currSym) @@ -412,14 +176,14 @@ begin // Check for a DATE/TIME cell // No idea how to apply the date/time formatsettings here... - if IsDateTime(AText, dtValue, nf) then + if IsDateTimeValue(AText, FFormatSettings, dtValue, nf) then begin FWorksheet.WriteDateTime(cell, dtValue, nf); exit; end; // Check for a BOOLEAN cell - if IsBool(AText, boolValue) then + if IsBoolValue(AText, CSVParams.TrueText, CSVParams.FalseText, boolValue) then begin FWorksheet.WriteBoolValue(cell, boolValue); exit; @@ -656,7 +420,7 @@ end; initialization - InitCSVFormatSettings; + InitFormatSettings(CSVParams.FormatSettings); RegisterSpreadFormat(TsCSVReader, TsCSVWriter, sfCSV); end. diff --git a/components/fpspreadsheet/fpshtml.pas b/components/fpspreadsheet/fpshtml.pas index ec78630c9..4c2a88a20 100644 --- a/components/fpspreadsheet/fpshtml.pas +++ b/components/fpspreadsheet/fpshtml.pas @@ -8,30 +8,38 @@ uses Classes, SysUtils, fasthtmlparser, fpstypes, fpspreadsheet, fpsReaderWriter; -type (* +type + TsHTMLTokenKind = (htkTABLE, htkTR, htkTH, htkTD, htkDIV, htkSPAN, htkP); + { + TsHTMLToken = class + Kind: TsHTMLTokenKind; + Parent: TsHTMLToken; + Children +} TsHTMLReader = class(TsCustomSpreadReader) private - FWorksheetName: String; FFormatSettings: TFormatSettings; - function IsBool(AText: String; out AValue: Boolean): Boolean; - function IsDateTime(AText: String; out ADateTime: TDateTime; - out ANumFormat: TsNumberFormat): Boolean; - function IsNumber(AText: String; out ANumber: Double; out ANumFormat: TsNumberFormat; - out ADecimals: Integer; out ACurrencySymbol, AWarning: String): Boolean; - function IsQuotedText(var AText: String): Boolean; - procedure ReadCellValue(ARow, ACol: Cardinal; AText: String); + parser: THTMLParser; + FInTable: Boolean; + FInSubTable: Boolean; + FInCell: Boolean; + FInSpan: Boolean; + FInA: Boolean; + FInHeader: Boolean; + FTableCounter: Integer; + FCurrRow, FCurrCol: LongInt; + FCelLText: String; + procedure TagFoundHandler(NoCaseTag, ActualTag: string); + procedure TextFoundHandler(AText: String); protected - procedure ReadBlank(AStream: TStream); override; - procedure ReadFormula(AStream: TStream); override; - procedure ReadLabel(AStream: TStream); override; - procedure ReadNumber(AStream: TStream); override; + procedure ProcessCellValue(ARow, ACol: LongInt; AText: String); public constructor Create(AWorkbook: TsWorkbook); override; - procedure ReadFromFile(AFileName: String); override; + destructor Destroy; override; procedure ReadFromStream(AStream: TStream); override; procedure ReadFromStrings(AStrings: TStrings); override; end; - *) + TsHTMLWriter = class(TsCustomSpreadWriter) private FPointSeparatorSettings: TFormatSettings; @@ -78,26 +86,523 @@ type (* end; TsHTMLParams = record + TableIndex: Integer; // R: Index of the table in the HTML file SheetIndex: Integer; // W: Index of the sheet to be written ShowRowColHeaders: Boolean; // RW: Show row/column headers + DetectContentType: Boolean; // R: try to convert strings to content types + NumberFormat: String; // W: if empty write numbers like in sheet, otherwise use this format + AutoDetectNumberFormat: Boolean; // R: automatically detects decimal/thousand separator used in numbers TrueText: String; // RW: String for boolean TRUE FalseText: String; // RW: String for boolean FALSE + FormatSettings: TFormatSettings; // RW: add'l parameters for conversion end; var HTMLParams: TsHTMLParams = ( + TableIndex: -1; // -1 = all tables SheetIndex: -1; // -1 = active sheet, MaxInt = all sheets ShowRowColHeaders: false; + DetectContentType: true; + NumberFormat: ''; + AutoDetectNumberFormat: true; TrueText: 'TRUE'; FalseText: 'FALSE'; - ); + {%H-}); implementation uses - LazUTF8, URIParser, Math, StrUtils, - fpsUtils; + LazUTF8, URIParser, StrUtils, + fpsUtils, fpsHTMLUtils, fpsNumFormat; + (* +type + THTMLEntity = record + E: String; + Ch: String; + end; +const + HTMLEntities: array[0..251] of THTMLEntity = ( + // A + (E: 'Acirc'; Ch: 'Â'), // 0 + (E: 'acirc'; Ch: 'â'), + (E: 'acute'; Ch: '´'), + (E: 'AElig'; Ch: 'Æ'), + (E: 'aelig'; Ch: 'æ'), + (E: 'Agrave'; Ch: 'À'), + (E: 'agrave'; Ch: 'à'), + (E: 'alefsym';Ch: 'ℵ'), + (E: 'Alpha'; Ch: 'Α'), + (E: 'alpha'; Ch: 'α'), + (E: 'amp'; Ch: '&'), // 10 + (E: 'and'; Ch: '∧'), + (E: 'ang'; Ch: '∠'), + (E: 'apos'; Ch: ''''), + (E: 'Aring'; Ch: 'Å'), + (E: 'aring'; Ch: 'å'), + (E: 'asymp'; Ch: '≈'), + (E: 'Atilde'; Ch: 'Ã'), + (E: 'atilde'; Ch: 'ã'), + (E: 'Auml'; Ch: 'Ä'), + (E: 'auml'; Ch: 'ä'), // 20 + // B + (E: 'bdquo'; Ch: '„'), // 21 + (E: 'Beta'; Ch: 'Β'), + (E: 'beta'; Ch: 'β'), + (E: 'brvbar'; Ch: '¦'), + (E: 'bull'; Ch: '•'), + // C + (E: 'cap'; Ch: '∩'), // 26 + (E: 'Ccedil'; Ch: 'Ç'), + (E: 'ccedil'; Ch: 'ç'), + (E: 'cedil'; Ch: '¸'), + (E: 'cent'; Ch: '¢'), // 39 + (E: 'Chi'; Ch: 'Χ'), + (E: 'chi'; Ch: 'χ'), + (E: 'circ'; Ch: 'ˆ'), + (E: 'clubs'; Ch: '♣'), + (E: 'cong'; Ch: '≅'), // approximately equal + (E: 'copy'; Ch: '©'), + (E: 'crarr'; Ch: '↵'), // carriage return + (E: 'cup'; Ch: '∪'), + (E: 'curren'; Ch: '¤'), + // D + (E: 'Dagger'; Ch: '‡'), // 40 + (E: 'dagger'; Ch: '†'), + (E: 'dArr'; Ch: '⇓'), // wide down-arrow + (E: 'darr'; Ch: '↓'), // narrow down-arrow + (E: 'deg'; Ch: '°'), + (E: 'Delta'; Ch: 'Δ'), + (E: 'delta'; Ch: 'δ'), + (E: 'diams'; Ch: '♦'), + (E: 'divide'; Ch: '÷'), + // E + (E: 'Eacute'; Ch: 'É'), + (E: 'eacute'; Ch: 'é'), + (E: 'Ecirc'; Ch: 'Ê'), + (E: 'ecirc'; Ch: 'ê'), + (E: 'Egrave'; Ch: 'È'), + (E: 'egrave'; Ch: 'è'), + (E: 'empty'; Ch: '∅'), + (E: 'emsp'; Ch: ' '), // Space character width of "m" + (E: 'ensp'; Ch: ' '), // Space character width of "n" + (E: 'Epsilon';Ch: 'Ε'), // capital epsilon + (E: 'epsilon';Ch: 'ε'), + (E: 'equiv'; Ch: '≡'), + (E: 'Eta'; Ch: 'Η'), + (E: 'eta'; Ch: 'η'), + (E: 'ETH'; Ch: 'Ð'), + (E: 'eth'; Ch: 'ð'), + (E: 'Euml'; Ch: 'Ë'), + (E: 'euml'; Ch: 'ë'), + (E: 'euro'; Ch: '€'), + (E: 'exist'; Ch: '∃'), + // F + (E: 'fnof'; Ch: 'ƒ'), + (E: 'forall'; Ch: '∀'), + (E: 'frac12'; Ch: '½'), + (E: 'frac14'; Ch: '¼'), + (E: 'frac34'; Ch: '¾'), + (E: 'frasl'; Ch: '⁄'), + // G + (E: 'Gamma'; Ch: 'Γ'), + (E: 'gamma'; Ch: 'γ'), + (E: 'ge'; Ch: '≥'), + (E: 'gt'; Ch: '>'), + // H + (E: 'hArr'; Ch: '⇔'), // wide horizontal double arrow + (E: 'harr'; Ch: '↔'), // narrow horizontal double arrow + (E: 'hearts'; Ch: '♥'), + (E: 'hellip'; Ch: '…'), + // I + (E: 'Iacute'; Ch: 'Í'), + (E: 'iacute'; Ch: 'í'), + (E: 'Icirc'; Ch: 'Î'), + (E: 'icirc'; Ch: 'î'), + (E: 'iexcl'; Ch: '¡'), + (E: 'Igrave'; Ch: 'Ì'), + (E: 'igrave'; Ch: 'ì'), + (E: 'image'; Ch: 'ℑ'), // + (E: 'infin'; Ch: '∞'), + (E: 'int'; Ch: '∫'), + (E: 'Iota'; Ch: 'Ι'), + (E: 'iota'; Ch: 'ι'), + (E: 'iquest'; Ch: '¿'), + (E: 'isin'; Ch: '∈'), + (E: 'Iuml'; Ch: 'Ï'), + (E: 'iuml'; Ch: 'ï'), + // K + (E: 'Kappa'; Ch: 'Κ'), + (E: 'kappa'; Ch: 'κ'), + // L + (E: 'Lambda'; Ch: 'Λ'), + (E: 'lambda'; Ch: 'λ'), + (E: 'lang'; Ch: '⟨'), // Left-pointing angle bracket + (E: 'laquo'; Ch: '«'), + (E: 'lArr'; Ch: '⇐'), // Left-pointing wide arrow + (E: 'larr'; Ch: '←'), + (E: 'lceil'; Ch: '⌈'), // Left ceiling + (E: 'ldquo'; Ch: '“'), + (E: 'le'; Ch: '≤'), + (E: 'lfloor'; Ch: '⌊'), // Left floor + (E: 'lowast'; Ch: '∗'), // Low asterisk + (E: 'loz'; Ch: '◊'), + (E: 'lrm'; Ch: '‎'), // Left-to-right mark + (E: 'lsaquo'; Ch: '‹'), + (E: 'lsquo'; Ch: '‘'), + (E: 'lt'; Ch: '<'), + // M + (E: 'macr'; Ch: '¯'), + (E: 'mdash'; Ch: '—'), + (E: 'micro'; Ch: 'µ'), + (E: 'middot'; Ch: '·'), + (E: 'minus'; Ch: '−'), + (E: 'Mu'; Ch: 'Μ'), + (E: 'mu'; Ch: 'μ'), + // N + (E: 'nabla'; Ch: '∇'), + (E: 'nbsp'; Ch: ' '), + (E: 'ndash'; Ch: '–'), + (E: 'ne'; Ch: '≠'), + (E: 'ni'; Ch: '∋'), + (E: 'not'; Ch: '¬'), + (E: 'notin'; Ch: '∉'), // math: "not in" + (E: 'nsub'; Ch: '⊄'), // math: "not a subset of" + (E: 'Ntilde'; Ch: 'Ñ'), + (E: 'ntilde'; Ch: 'ñ'), + (E: 'Nu'; Ch: 'Ν'), + (E: 'nu'; Ch: 'ν'), + // O + (E: 'Oacute'; Ch: 'Ó'), + (E: 'oacute'; Ch: 'ó'), + (E: 'Ocirc'; Ch: 'Ô'), + (E: 'ocirc'; Ch: 'ô'), + (E: 'OElig'; Ch: 'Œ'), + (E: 'oelig'; Ch: 'œ'), + (E: 'Ograve'; Ch: 'Ò'), + (E: 'ograve'; Ch: 'ò'), + (E: 'oline'; Ch: '‾'), + (E: 'Omega'; Ch: 'Ω'), + (E: 'omega'; Ch: 'ω'), + (E: 'Omicron';Ch: 'Ο'), + (E: 'omicron';Ch: 'ο'), + (E: 'oplus'; Ch: '⊕'), // Circled plus + (E: 'or'; Ch: '∨'), + (E: 'ordf'; Ch: 'ª'), + (E: 'ordm'; Ch: 'º'), + (E: 'Oslash'; Ch: 'Ø'), + (E: 'oslash'; Ch: 'ø'), + (E: 'Otilde'; Ch: 'Õ'), + (E: 'otilde'; Ch: 'õ'), + (E: 'otimes'; Ch: '⊗'), // Circled times + (E: 'Ouml'; Ch: 'Ö'), + (E: 'ouml'; Ch: 'ö'), + // P + (E: 'para'; Ch: '¶'), + (E: 'part'; Ch: '∂'), + (E: 'permil'; Ch: '‰'), + (E: 'perp'; Ch: '⊥'), + (E: 'Phi'; Ch: 'Φ'), + (E: 'phi'; Ch: 'φ'), + (E: 'Pi'; Ch: 'Π'), + (E: 'pi'; Ch: 'π'), // lower-case pi + (E: 'piv'; Ch: 'ϖ'), + (E: 'plusmn'; Ch: '±'), + (E: 'pound'; Ch: '£'), + (E: 'Prime'; Ch: '″'), + (E: 'prime'; Ch: '′'), + (E: 'prod'; Ch: '∏'), + (E: 'prop'; Ch: '∝'), + (E: 'Psi'; Ch: 'Ψ'), + (E: 'psi'; Ch: 'ψ'), + // Q + (E: 'quot'; Ch: '"'), + // R + (E: 'radic'; Ch: '√'), + (E: 'rang'; Ch: '⟩'), // right-pointing angle bracket + (E: 'raquo'; Ch: '»'), + (E: 'rArr'; Ch: '⇒'), + (E: 'rarr'; Ch: '→'), + (E: 'rceil'; Ch: '⌉'), // right ceiling + (E: 'rdquo'; Ch: '”'), + (E: 'real'; Ch: 'ℜ'), // R in factura + (E: 'reg'; Ch: '®'), + (E: 'rfloor'; Ch: '⌋'), // Right floor + (E: 'Rho'; Ch: 'Ρ'), + (E: 'rho'; Ch: 'ρ'), + (E: 'rlm'; Ch: ''), // right-to-left mark + (E: 'rsaquo'; Ch: '›'), + (E: 'rsquo'; Ch: '’'), + + // S + (E: 'sbquo'; Ch: '‚'), + (E: 'Scaron'; Ch: 'Š'), + (E: 'scaron'; Ch: 'š'), + (E: 'sdot'; Ch: '⋅'), // math: dot operator + (E: 'sect'; Ch: '§'), + (E: 'shy'; Ch: ''), // conditional hyphen + (E: 'Sigma'; Ch: 'Σ'), + (E: 'sigma'; Ch: 'σ'), + (E: 'sigmaf'; Ch: 'ς'), + (E: 'sim'; Ch: '∼'), // similar + (E: 'spades'; Ch: '♠'), + (E: 'sub'; Ch: '⊂'), + (E: 'sube'; Ch: '⊆'), + (E: 'sum'; Ch: '∑'), + (E: 'sup'; Ch: '⊃'), + (E: 'sup1'; Ch: '¹'), + (E: 'sup2'; Ch: '²'), + (E: 'sup3'; Ch: '³'), + (E: 'supe'; Ch: '⊇'), + (E: 'szlig'; Ch: 'ß'), + //T + (E: 'Tau'; Ch: 'Τ'), + (E: 'tau'; Ch: 'τ'), + (E: 'there4'; Ch: '∴'), + (E: 'Theta'; Ch: 'Θ'), + (E: 'theta'; Ch: 'θ'), + (E: 'thetasym';Ch: 'ϑ'), + (E: 'thinsp'; Ch: ' '), // thin space + (E: 'THORN'; Ch: 'Þ'), + (E: 'thorn'; Ch: 'þ'), + (E: 'tilde'; Ch: '˜'), + (E: 'times'; Ch: '×'), + (E: 'trade'; Ch: '™'), + // U + (E: 'Uacute'; Ch: 'Ú'), + (E: 'uacute'; Ch: 'ú'), + (E: 'uArr'; Ch: '⇑'), // wide up-arrow + (E: 'uarr'; Ch: '↑'), + (E: 'Ucirc'; Ch: 'Û'), + (E: 'ucirc'; Ch: 'û'), + (E: 'Ugrave'; Ch: 'Ù'), + (E: 'ugrave'; Ch: 'ù'), + (E: 'uml'; Ch: '¨'), + (E: 'upsih'; Ch: 'ϒ'), + (E: 'Upsilon';Ch: 'Υ'), + (E: 'upsilon';Ch: 'υ'), + (E: 'Uuml'; Ch: 'Ü'), + (E: 'uuml'; Ch: 'ü'), + // W + (E: 'weierp'; Ch: '℘'), // Script Capital P; Weierstrass Elliptic Function + // X + (E: 'Xi'; Ch: 'Ξ'), + (E: 'xi'; Ch: 'ξ'), + // Y + (E: 'Yacute'; Ch: 'Ý'), + (E: 'yacute'; Ch: 'ý'), + (E: 'yen'; Ch: '¥'), + (E: 'Yuml'; Ch: 'Ÿ'), + (E: 'yuml'; Ch: 'ÿ'), + // Z + (E: 'Zeta'; Ch: 'Ζ'), + (E: 'zeta'; Ch: 'ζ'), + (E: 'zwj'; Ch: ''), // Zero-width joiner + (E: 'zwnj'; Ch: ''), // Zero-width non-joiner + + (E: '#160'; Ch: ' ') // numerical value of " " + ); + *) +{==============================================================================} +{ TsHTMLReader } +{==============================================================================} + +constructor TsHTMLReader.Create(AWorkbook: TsWorkbook); +begin + inherited Create(AWorkbook); + FFormatSettings := HTMLParams.FormatSettings; + ReplaceFormatSettings(FFormatSettings, FWorkbook.FormatSettings); + FTableCounter := -1; +end; + +destructor TsHTMLReader.Destroy; +begin + FreeAndNil(parser); + inherited Destroy; +end; + +procedure TsHTMLReader.ReadFromStream(AStream: TStream); +var + list: TStringList; +begin + list := TStringList.Create; + try + list.LoadFromStream(AStream); + ReadFromStrings(list); + if FWorkbook.GetWorksheetCount = 0 then + begin + FWorkbook.AddErrorMsg('Requested table not found, or no tables in html file'); + FWorkbook.AddWorksheet('Dummy'); + end; + finally + list.Free; + end; +end; + +procedure TsHTMLReader.ReadFromStrings(AStrings: TStrings); +begin + // Create html parser + FreeAndNil(parser); + parser := THTMLParser.Create(AStrings.Text); + parser.OnFoundTag := @TagFoundHandler; + parser.OnFoundText := @TextFoundHandler; + // Execute the html parser + parser.Exec; +end; + +procedure TsHTMLReader.ProcessCellValue(ARow, ACol: LongInt; AText: String); +var + cell: PCell; + dblValue: Double; + dtValue: TDateTime; + boolValue: Boolean; + nf: TsNumberFormat; + decs: Integer; + currSym: String; + warning: String; +begin + // Empty strings are blank cells -- nothing to do + if (AText = '') then + exit; + + cell := FWorksheet.AddCell(ARow, ACol); + + // Do not try to interpret the strings. --> everything is a LABEL cell. + if not HTMLParams.DetectContentType then + begin + FWorksheet.WriteUTF8Text(cell, AText); + exit; + end; + + // Check for a NUMBER or CURRENCY cell + if IsNumberValue(AText, HTMLParams.AutoDetectNumberFormat, FFormatSettings, + dblValue, nf, decs, currSym, warning) then + begin + if currSym <> '' then + FWorksheet.WriteCurrency(cell, dblValue, nfCurrency, decs, currSym) + else + FWorksheet.WriteNumber(cell, dblValue, nf, decs); + if warning <> '' then + FWorkbook.AddErrorMsg('Cell %s: %s', [GetCellString(ARow, ACol), warning]); + exit; + end; + + // Check for a DATE/TIME cell + // No idea how to apply the date/time formatsettings here... + if IsDateTimevalue(AText, FFormatSettings, dtValue, nf) then + begin + FWorksheet.WriteDateTime(cell, dtValue, nf); + exit; + end; + + // Check for a BOOLEAN cell + if IsBoolValue(AText, HTMLParams.TrueText, HTMLParams.FalseText, boolValue) then + begin + FWorksheet.WriteBoolValue(cell, boolValue); + exit; + end; + + // What is left is handled as a TEXT cell + FWorksheet.WriteUTF8Text(cell, AText); +end; + + +procedure TsHTMLReader.TagFoundHandler(NoCaseTag, ActualTag: string); +begin + if pos('<TABLE', NoCaseTag) = 1 then + begin + inc(FTableCounter); + if HTMLParams.TableIndex < 0 then // all tables + begin + FWorksheet := FWorkbook.AddWorksheet(Format('Table #%d', [FTableCounter+1])); + FInTable := true; + FCurrRow := -1; + FCurrCol := -1; + end else + if FTableCounter = HTMLParams.TableIndex then + begin + FWorksheet := FWorkbook.AddWorksheet(Format('Table #%d', [FTableCounter+1])); + FInTable := true; + FCurrRow := -1; + FCurrCol := -1; + end; + end else + if ((NoCaseTag = '<TR>') or (pos('<TR ', NoCaseTag) = 1)) and FInTable then + begin + inc(FCurrRow); + FCurrCol := -1; + end else + if ((NoCaseTag = '<TD>') or (pos('<TD ', NoCaseTag) = 1)) and FInTable then + begin + FInCell := true; + inc(FCurrCol); + FCellText := ''; + end else + if ((NoCaseTag = '<TH>') or (pos('<TH ', NoCaseTag) = 1)) and FInTable then + begin + FInCell := true; + FCellText := ''; + end else + if pos('<SPAN', NoCaseTag) = 1 then + begin + if FInCell then + FInSpan := true; + end else + if pos('<A', NoCaseTag) = 1 then + begin + if FInCell then + FInA := true + end else + if (pos('<H', NoCaseTag) = 1) and (NoCaseTag[3] in ['1', '2', '3', '4', '5', '6']) then + begin + if FInCell then + FInHeader := true; + end else + if ((NoCaseTag = '<BR>') or (pos('<BR ', NoCaseTag) = 1)) and FInCell then + FCellText := FCellText + LineEnding + else + case NoCaseTag of + '</TABLE>': + if FInTable then FInTable := false; + '</TD>', '</TH>': + if FInCell then + begin + ProcessCellValue(FCurrRow, FCurrCol, FCellText); + FInCell := false; + end; + '</A>': + if FInCell then FInA := false; + '</SPAN>': + if FInCell then FInSpan := false; + '<H1/>', '<H2/>', '<H3/>', '<H4/>', '<H5/>', '<H6/>': + if FinCell then FInHeader := false; + '<TR/>', '<TR />': + if FInTable then inc(FCurrRow); + '<TD/>', '<TD />': + if FInCell then inc(FCurrCol); + '<TH/>', '<TH />': + if FInCell then inc(FCurrCol); + end; +end; + +procedure TsHTMLReader.TextFoundHandler(AText: String); +begin + if FInCell then + begin + AText := CleanHTMLString(AText); + if AText <> '' then + begin + if FCellText = '' then + FCellText := AText + else + FCellText := FCellText + ' ' + AText; + end; + end; +end; + +{==============================================================================} +{ TsHTMLWriter } +{==============================================================================} constructor TsHTMLWriter.Create(AWorkbook: TsWorkbook); begin inherited Create(AWorkbook); @@ -493,7 +998,7 @@ begin Unused(AStream); Unused(ARow, ACol, ACell); AppendToStream(AStream, - '<div>' + IfThen(AValue, HTMLParams.TrueText, HTMLParams.FalseText) + '</div>'); + '<div>' + StrUtils.IfThen(AValue, HTMLParams.TrueText, HTMLParams.FalseText) + '</div>'); end; { Write date/time values in the same way they are displayed in the sheet } @@ -502,6 +1007,7 @@ procedure TsHTMLWriter.WriteDateTime(AStream: TStream; const ARow, ACol: Cardina var s: String; begin + Unused(AValue); s := FWorksheet.ReadAsUTF8Text(ACell); AppendToStream(AStream, '<div>' + s + '</div>'); @@ -512,6 +1018,7 @@ procedure TsHTMLWriter.WriteError(AStream: TStream; var s: String; begin + Unused(AValue); s := FWOrksheet.ReadAsUTF8Text(ACell); AppendToStream(AStream, '<div>' + s + '</div>'); @@ -663,7 +1170,7 @@ procedure TsHTMLWriter.WriteNumber(AStream: TStream; const ARow, ACol: Cardinal; var s: String; begin - Unused(ARow, ACol); + Unused(ARow, ACol, AValue); s := FWorksheet.ReadAsUTF8Text(ACell, FWorkbook.FormatSettings); AppendToStream(AStream, '<div>' + s + '</div>'); @@ -873,7 +1380,8 @@ begin end; initialization - RegisterSpreadFormat(nil, TsHTMLWriter, sfHTML); + InitFormatSettings(HTMLParams.FormatSettings); + RegisterSpreadFormat(TsHTMLReader, TsHTMLWriter, sfHTML); end. diff --git a/components/fpspreadsheet/fpshtmlutils.pas b/components/fpspreadsheet/fpshtmlutils.pas new file mode 100644 index 000000000..1fef4655f --- /dev/null +++ b/components/fpspreadsheet/fpshtmlutils.pas @@ -0,0 +1,400 @@ +unit fpsHTMLUtils; + +{$mode objfpc}{$H+} + +interface + +uses + Classes, SysUtils; + +type + THTMLEntity = record + E: String; + Ch: String; + N: Word; + end; + + +function IsHTMLEntity(AText: PChar; out AEntity: THTMLEntity): Boolean; +function CleanHTMLString(AText: String): String; + + +implementation + +uses + Strings; + +const + // http://unicode.e-workers.de/entities.php + HTMLEntities: array[0..250] of THTMLEntity = ( + // A + (E: 'Acirc'; Ch: 'Â'; N: 194), // 0 + (E: 'acirc'; Ch: 'â'; N: 226), + (E: 'acute'; Ch: '´'; N: 180), + (E: 'AElig'; Ch: 'Æ'; N: 198), + (E: 'aelig'; Ch: 'æ'; N: 230), + (E: 'Agrave'; Ch: 'À'; N: 192), + (E: 'agrave'; Ch: 'à'; N: 224), + (E: 'alefsym';Ch: 'ℵ'; N: 8501), + (E: 'Alpha'; Ch: 'Α'; N: 913), + (E: 'alpha'; Ch: 'α'; N: 945), + (E: 'amp'; Ch: '&'; N: 38), // 10 + (E: 'and'; Ch: '∧'; N: 8743), + (E: 'ang'; Ch: '∠'; N: 8736), + (E: 'apos'; Ch: ''''; N: 39), + (E: 'Aring'; Ch: 'Å'; N: 197), + (E: 'aring'; Ch: 'å'; N: 229), + (E: 'asymp'; Ch: '≈'; N: 2248), + (E: 'Atilde'; Ch: 'Ã'; N: 195), + (E: 'atilde'; Ch: 'ã'; N: 227), + (E: 'Auml'; Ch: 'Ä'; N: 196), + (E: 'auml'; Ch: 'ä'; N: 228), // 20 + // B + (E: 'bdquo'; Ch: '„'; N: 8222), // 21 + (E: 'Beta'; Ch: 'Β'; N: 914), + (E: 'beta'; Ch: 'β'; N: 946), + (E: 'brvbar'; Ch: '¦'; N: 166), + (E: 'bull'; Ch: '•'; N: 8226), + // C + (E: 'cap'; Ch: '∩'; N: 8745), // 26 + (E: 'Ccedil'; Ch: 'Ç'; N: 199), + (E: 'ccedil'; Ch: 'ç'; N: 231), + (E: 'cedil'; Ch: '¸'; N: 184), + (E: 'cent'; Ch: '¢'; N: 162), // 30 + (E: 'Chi'; Ch: 'Χ'; N: 935), + (E: 'chi'; Ch: 'χ'; N: 967), + (E: 'circ'; Ch: 'ˆ'; N: 710), + (E: 'clubs'; Ch: '♣'; N: 9827), + (E: 'cong'; Ch: '≅'; N: 8773), // approximately equal + (E: 'copy'; Ch: '©'; N: 169), + (E: 'crarr'; Ch: '↵'; N: 8629), // carriage return + (E: 'cup'; Ch: '∪'; N: 8746), + (E: 'curren'; Ch: '¤'; N: 164), + // D + (E: 'Dagger'; Ch: '‡'; N: 8225), // 40 + (E: 'dagger'; Ch: '†'; N: 8224), + (E: 'dArr'; Ch: '⇓'; N: 8659), // wide down-arrow + (E: 'darr'; Ch: '↓'; N: 8595), // narrow down-arrow + (E: 'deg'; Ch: '°'; N: 176), + (E: 'Delta'; Ch: 'Δ'; N: 916), + (E: 'delta'; Ch: 'δ'; N: 948), + (E: 'diams'; Ch: '♦'; N: 9830), + (E: 'divide'; Ch: '÷'; N: 247), + // E + (E: 'Eacute'; Ch: 'É'; N: 201), // 49 + (E: 'eacute'; Ch: 'é'; N: 233), // 50 + (E: 'Ecirc'; Ch: 'Ê'; N: 202), + (E: 'ecirc'; Ch: 'ê'; N: 234), + (E: 'Egrave'; Ch: 'È'; N: 200), + (E: 'egrave'; Ch: 'è'; N: 232), + (E: 'empty'; Ch: '∅'; N: 8709), + (E: 'emsp'; Ch: ' '; N: 8195), // Space character width of "m" + (E: 'ensp'; Ch: ' '; N: 8194), // Space character width of "n" + (E: 'Epsilon';Ch: 'Ε'; N: 917), // capital epsilon + (E: 'epsilon';Ch: 'ε'; N: 949), + (E: 'equiv'; Ch: '≡'; N: 8801), // 60 + (E: 'Eta'; Ch: 'Η'; N: 919), + (E: 'eta'; Ch: 'η'; N: 951), + (E: 'ETH'; Ch: 'Ð'; N: 208), + (E: 'eth'; Ch: 'ð'; N: 240), + (E: 'Euml'; Ch: 'Ë'; N: 203), + (E: 'euml'; Ch: 'ë'; N: 235), + (E: 'euro'; Ch: '€'; N: 8364), + (E: 'exist'; Ch: '∃'; N: 8707), + // F + (E: 'fnof'; Ch: 'ƒ'; N: 402), // 70 + (E: 'forall'; Ch: '∀'; N: 8704), + (E: 'frac12'; Ch: '½'; N: 189), + (E: 'frac14'; Ch: '¼'; N: 188), + (E: 'frac34'; Ch: '¾'; N: 190), + (E: 'frasl'; Ch: '⁄'; N: 8260), + // G + (E: 'Gamma'; Ch: 'Γ'; N: 915), + (E: 'gamma'; Ch: 'γ'; N: 947), + (E: 'ge'; Ch: '≥'; N: 8805), + (E: 'gt'; Ch: '>'; N: 62), + // H + (E: 'hArr'; Ch: '⇔'; N: 8660), // 80, wide horizontal double arrow + (E: 'harr'; Ch: '↔'; N: 8596), // narrow horizontal double arrow + (E: 'hearts'; Ch: '♥'; N: 9829), + (E: 'hellip'; Ch: '…'; N: 8230), + // I + (E: 'Iacute'; Ch: 'Í'; N: 205), + (E: 'iacute'; Ch: 'í'; N: 237), + (E: 'Icirc'; Ch: 'Î'; N: 206), + (E: 'icirc'; Ch: 'î'; N: 238), + (E: 'iexcl'; Ch: '¡'; N: 161), + (E: 'Igrave'; Ch: 'Ì'; N: 204), + (E: 'igrave'; Ch: 'ì'; N: 236), // 90 + (E: 'image'; Ch: 'ℑ'; N: 2465), // I in factura + (E: 'infin'; Ch: '∞'; N: 8734), + (E: 'int'; Ch: '∫'; N: 8747), + (E: 'Iota'; Ch: 'Ι'; N: 921), + (E: 'iota'; Ch: 'ι'; N: 953), + (E: 'iquest'; Ch: '¿'; N: 191), + (E: 'isin'; Ch: '∈'; N: 8712), + (E: 'Iuml'; Ch: 'Ï'; N: 207), + (E: 'iuml'; Ch: 'ï'; N: 239), + // K + (E: 'Kappa'; Ch: 'Κ'; N: 922), // 100 + (E: 'kappa'; Ch: 'κ'; N: 254), + // L + (E: 'Lambda'; Ch: 'Λ'; N: 923), + (E: 'lambda'; Ch: 'λ'; N: 955), + (E: 'lang'; Ch: '⟨'; N: 9001), // Left-pointing angle bracket + (E: 'laquo'; Ch: '«'; N: 171), + (E: 'lArr'; Ch: '⇐'; N: 8656), // Left-pointing wide arrow + (E: 'larr'; Ch: '←'; N: 8592), + (E: 'lceil'; Ch: '⌈'; N: 8968), // Left ceiling + (E: 'ldquo'; Ch: '“'; N: 8220), + (E: 'le'; Ch: '≤'; N: 8804), // 110 + (E: 'lfloor'; Ch: '⌊'; N: 8970), // Left floor + (E: 'lowast'; Ch: '∗'; N: 8727), // Low asterisk + (E: 'loz'; Ch: '◊'; N: 9674), + (E: 'lrm'; Ch: '‎'; N: 8206), // Left-to-right mark + (E: 'lsaquo'; Ch: '‹'; N: 8249), + (E: 'lsquo'; Ch: '‘'; N: 8216), + (E: 'lt'; Ch: '<'; N: 60), + // M + (E: 'macr'; Ch: '¯'; N: 175), + (E: 'mdash'; Ch: '—'; N: 8212), + (E: 'micro'; Ch: 'µ'; N: 181), // 120 + (E: 'middot'; Ch: '·'; N: 183), + (E: 'minus'; Ch: '−'; N: 8722), + (E: 'Mu'; Ch: 'Μ'; N: 924), + (E: 'mu'; Ch: 'μ'; N: 956), + // N + (E: 'nabla'; Ch: '∇'; N: 8711), + (E: 'nbsp'; Ch: ' '; N: 160), // 126 + (E: 'ndash'; Ch: '–'; N: 8211), + (E: 'ne'; Ch: '≠'; N: 8800), + (E: 'ni'; Ch: '∋'; N: 8715), + (E: 'not'; Ch: '¬'; N: 172), // 130 + (E: 'notin'; Ch: '∉'; N: 8713), // math: "not in" + (E: 'nsub'; Ch: '⊄'; N: 8836), // math: "not a subset of" + (E: 'Ntilde'; Ch: 'Ñ'; N: 209), + (E: 'ntilde'; Ch: 'ñ'; N: 241), + (E: 'Nu'; Ch: 'Ν'; N: 925), + (E: 'nu'; Ch: 'ν'; N: 957), + // O + (E: 'Oacute'; Ch: 'Ó'; N: 211), + (E: 'oacute'; Ch: 'ó'; N: 243), + (E: 'Ocirc'; Ch: 'Ô'; N: 212), + (E: 'ocirc'; Ch: 'ô'; N: 244), + (E: 'OElig'; Ch: 'Œ'; N: 338), + (E: 'oelig'; Ch: 'œ'; N: 339), + (E: 'Ograve'; Ch: 'Ò'; N: 210), + (E: 'ograve'; Ch: 'ò'; N: 242), + (E: 'oline'; Ch: '‾'; N: 8254), + (E: 'Omega'; Ch: 'Ω'; N: 937), + (E: 'omega'; Ch: 'ω'; N: 969), + (E: 'Omicron';Ch: 'Ο'; N: 927), + (E: 'omicron';Ch: 'ο'; N: 959), + (E: 'oplus'; Ch: '⊕'; N: 8853), // Circled plus + (E: 'or'; Ch: '∨'; N: 8744), + (E: 'ordf'; Ch: 'ª'; N: 170), + (E: 'ordm'; Ch: 'º'; N: 186), + (E: 'Oslash'; Ch: 'Ø'; N: 216), + (E: 'oslash'; Ch: 'ø'; N: 248), + (E: 'Otilde'; Ch: 'Õ'; N: 213), + (E: 'otilde'; Ch: 'õ'; N: 245), + (E: 'otimes'; Ch: '⊗'; N: 8855), // Circled times + (E: 'Ouml'; Ch: 'Ö'; N: 214), + (E: 'ouml'; Ch: 'ö'; N: 246), + // P + (E: 'para'; Ch: '¶'; N: 182), + (E: 'part'; Ch: '∂'; N: 8706), + (E: 'permil'; Ch: '‰'; N: 8240), + (E: 'perp'; Ch: '⊥'; N: 8869), + (E: 'Phi'; Ch: 'Φ'; N: 934), + (E: 'phi'; Ch: 'φ'; N: 966), + (E: 'Pi'; Ch: 'Π'; N: 928), + (E: 'pi'; Ch: 'π'; N: 960), // lower-case pi + (E: 'piv'; Ch: 'ϖ'; N: 982), + (E: 'plusmn'; Ch: '±'; N: 177), + (E: 'pound'; Ch: '£'; N: 163), + (E: 'Prime'; Ch: '″'; N: 8243), + (E: 'prime'; Ch: '′'; N: 8242), + (E: 'prod'; Ch: '∏'; N: 8719), + (E: 'prop'; Ch: '∝'; N: 8733), + (E: 'Psi'; Ch: 'Ψ'; N: 936), + (E: 'psi'; Ch: 'ψ'; N: 968), + // Q + (E: 'quot'; Ch: '"'; N: 34), + // R + (E: 'radic'; Ch: '√'; N: 8730), + (E: 'rang'; Ch: '⟩'; N: 9002), // right-pointing angle bracket + (E: 'raquo'; Ch: '»'; N: 187), + (E: 'rArr'; Ch: '⇒'; N: 8658), + (E: 'rarr'; Ch: '→'; N: 8594), + (E: 'rceil'; Ch: '⌉'; N: 8969), // right ceiling + (E: 'rdquo'; Ch: '”'; N: 8221), + (E: 'real'; Ch: 'ℜ'; N: 8476), // R in factura + (E: 'reg'; Ch: '®'; N: 174), + (E: 'rfloor'; Ch: '⌋'; N: 8971), // Right floor + (E: 'Rho'; Ch: 'Ρ'; N: 929), + (E: 'rho'; Ch: 'ρ'; N: 961), + (E: 'rlm'; Ch: ''; N: 8207), // right-to-left mark + (E: 'rsaquo'; Ch: '›'; N: 8250), + (E: 'rsquo'; Ch: '’'; N: 8217), + + // S + (E: 'sbquo'; Ch: '‚'; N: 8218), + (E: 'Scaron'; Ch: 'Š'; N: 352), + (E: 'scaron'; Ch: 'š'; N: 353), + (E: 'sdot'; Ch: '⋅'; N: 8901), // math: dot operator + (E: 'sect'; Ch: '§'; N: 167), + (E: 'shy'; Ch: ''; N: 173), // conditional hyphen + (E: 'Sigma'; Ch: 'Σ'; N: 931), + (E: 'sigma'; Ch: 'σ'; N: 963), + (E: 'sigmaf'; Ch: 'ς'; N: 962), + (E: 'sim'; Ch: '∼'; N: 8764), // similar + (E: 'spades'; Ch: '♠'; N: 9824), + (E: 'sub'; Ch: '⊂'; N: 8834), + (E: 'sube'; Ch: '⊆'; N: 8838), + (E: 'sum'; Ch: '∑'; N: 8721), + (E: 'sup'; Ch: '⊃'; N: 8835), + (E: 'sup1'; Ch: '¹'; N: 185), + (E: 'sup2'; Ch: '²'; N: 178), + (E: 'sup3'; Ch: '³'; N: 179), + (E: 'supe'; Ch: '⊇'; N: 8839), + (E: 'szlig'; Ch: 'ß'; N: 223), + //T + (E: 'Tau'; Ch: 'Τ'; N: 932), + (E: 'tau'; Ch: 'τ'; N: 964), + (E: 'there4'; Ch: '∴'; N: 8756), + (E: 'Theta'; Ch: 'Θ'; N: 920), + (E: 'theta'; Ch: 'θ'; N: 952), + (E: 'thetasym';Ch: 'ϑ'; N: 977), + (E: 'thinsp'; Ch: ' '; N: 8201), // thin space + (E: 'THORN'; Ch: 'Þ'; N: 222), + (E: 'thorn'; Ch: 'þ'; N: 254), + (E: 'tilde'; Ch: '˜'; N: 732), + (E: 'times'; Ch: '×'; N: 215), + (E: 'trade'; Ch: '™'; N: 8482), + // U + (E: 'Uacute'; Ch: 'Ú'; N: 218), + (E: 'uacute'; Ch: 'ú'; N: 250), + (E: 'uArr'; Ch: '⇑'; N: 8657), // wide up-arrow + (E: 'uarr'; Ch: '↑'; N: 8593), + (E: 'Ucirc'; Ch: 'Û'; N: 219), + (E: 'ucirc'; Ch: 'û'; N: 251), + (E: 'Ugrave'; Ch: 'Ù'; N: 217), + (E: 'ugrave'; Ch: 'ù'; N: 249), + (E: 'uml'; Ch: '¨'; N: 168), + (E: 'upsih'; Ch: 'ϒ'; N: 978), + (E: 'Upsilon';Ch: 'Υ'; N: 933), + (E: 'upsilon';Ch: 'υ'; N: 965), + (E: 'Uuml'; Ch: 'Ü'; N: 220), + (E: 'uuml'; Ch: 'ü'; N: 252), + // W + (E: 'weierp'; Ch: '℘'; N: 8472), // Script Capital P; Weierstrass Elliptic Function + // X + (E: 'Xi'; Ch: 'Ξ'; N: 926), + (E: 'xi'; Ch: 'ξ'; N: 958), + // Y + (E: 'Yacute'; Ch: 'Ý'; N: 221), + (E: 'yacute'; Ch: 'ý'; N: 253), + (E: 'yen'; Ch: '¥'; N: 165), + (E: 'Yuml'; Ch: 'Ÿ'; N: 376), + (E: 'yuml'; Ch: 'ÿ'; N: 255), + // Z + (E: 'Zeta'; Ch: 'Ζ'; N: 918), + (E: 'zeta'; Ch: 'ζ'; N: 950), + (E: 'zwj'; Ch: ''; N: 8205), // Zero-width joiner + (E: 'zwnj'; Ch: ''; N: 8204) // Zero-width non-joiner + ); + +function IsHTMLEntity(AText: PChar; out AEntity: THTMLEntity): Boolean; + + function Compare(s: String): Boolean; + var + j: Integer; + begin + Result := false; + for j:=1 to Length(s) do + if s[j] <> PChar(AText)[j-1] then + exit; + if PChar(AText)[Length(s)] <> ';' then + exit; + Result := true; + end; + +var + j, k: Integer; + equ: Boolean; + ch1, ch2: Char; + P: PChar; + +begin + Result := false; + for k:=0 to High(HTMLEntities) do + begin + equ := Compare(HTMLEntities[k].E); + if not equ then + begin + P := AText; + ch1 := P^; + if ch1 = '#' then + begin + inc(P); + ch2 := P^; + if ch1 = 'x' then + equ := Compare(Format('#x%x', [HTMLEntities[k].N])) + else + equ := Compare(Format('#%d', [HTMLEntities[k].N])); + end; + end; + if equ then + begin + AEntity := HTMLEntities[k]; + Result := true; + exit; + end; + end; +end; + +function CleanHTMLString(AText: String): String; +var + len: Integer; + ent: THTMLEntity; + P: PChar; + ch: Char; +begin + Result := ''; + + // Remove leading and trailing spaces and line endings coming from formatted + // source lines + while (Length(AText) > 0) and (AText[1] in [#9, #10, #13, ' ']) do + Delete(AText, 1,1); + while (Length(AText) > 0) and (AText[Length(AText)] in [#9, #10, #13, ' ']) do + Delete(AText, Length(AText), 1); + if AText = '' then + exit; + + // Replace HTML entities by their counter part UTF8 characters + len := Length(AText); + P := @AText[1]; + while (P^ <> #0) do begin + ch := P^; + case ch of + '&': begin + inc(P); + if (P <> nil) and IsHTMLEntity(P, ent) then + begin + Result := Result + ent.Ch; + inc(P, Length(ent.E)); + end else + begin + Result := Result + '&'; + Continue; + end; + end; + else Result := Result + ch; + end; + inc(P); + end; +end; + +end. + diff --git a/components/fpspreadsheet/fpsnumformat.pas b/components/fpspreadsheet/fpsnumformat.pas index 39b3c5d5f..cbc29d184 100644 --- a/components/fpspreadsheet/fpsnumformat.pas +++ b/components/fpspreadsheet/fpsnumformat.pas @@ -212,6 +212,9 @@ function ConvertFloatToStr(AValue: Double; AParams: TsNumFormatParams; AFormatSettings: TFormatSettings): String; function CountDecs(AFormatString: String; ADecChars: TsDecsChars = ['0']): Byte; +function IsBoolValue(const AText, ATrueText, AFalseText: String; + out AValue: Boolean): Boolean; + function IsCurrencyFormat(AFormat: TsNumberFormat): Boolean; overload; function IsCurrencyFormat(ANumFormat: TsNumFormatParams): Boolean; overload; @@ -219,6 +222,9 @@ function IsDateTimeFormat(AFormat: TsNumberFormat): Boolean; overload; function IsDateTimeFormat(AFormatStr: String): Boolean; overload; function IsDateTimeFormat(ANumFormat: TsNumFormatParams): Boolean; overload; +function IsDateTimeValue(AText: String; const AFormatSettings: TFormatSettings; + out ADateTime: TDateTime; out ANumFormat: TsNumberFormat): Boolean; + function IsDateFormat(ANumFormat: TsNumFormatParams): Boolean; function IsTimeFormat(AFormat: TsNumberFormat): Boolean; overload; @@ -226,6 +232,11 @@ function IsTimeFormat(AFormatStr: String): Boolean; overload; function IsTimeFormat(ANumFormat: TsNumFormatParams): Boolean; overload; function IsLongTimeFormat(AFormatStr: String; ATimeSeparator: char): Boolean; overload; +function IsNumberValue(AText: String; AutoDetectNumberFormat: Boolean; + const AFormatSettings: TFormatSettings; out ANumber: Double; + out ANumFormat: TsNumberFormat; out ADecimals: Integer; + out ACurrencySymbol, AWarning: String): Boolean; + function IsTimeIntervalFormat(ANumFormat: TsNumFormatParams): Boolean; function MakeLongDateFormat(ADateFormat: String): String; @@ -233,12 +244,15 @@ function MakeShortDateFormat(ADateFormat: String): String; procedure MakeTimeIntervalMask(Src: String; var Dest: String); function StripAMPM(const ATimeFormatString: String): String; +procedure InitFormatSettings(out AFormatSettings: TFormatSettings); +procedure ReplaceFormatSettings(var AFormatSettings: TFormatSettings; + const ADefaultFormats: TFormatSettings); implementation uses StrUtils, Math, - fpsUtils, fpsNumFormatParser; + fpsUtils, fpsNumFormatParser, fpsCurrency; const {@@ Array of format strings identifying the order of number and @@ -1408,6 +1422,26 @@ begin end; end; +{@@ ---------------------------------------------------------------------------- + Checks whether the specified text corresponds to a boolean value. For this, + it must match the specified TRUE and FALSE text phrases. +-------------------------------------------------------------------------------} +function IsBoolValue(const AText, ATrueText, AFalseText: String; + out AValue: Boolean): Boolean; +begin + if SameText(AText, ATrueText) then + begin + AValue := true; + Result := true; + end else + if SameText(AText, AFalseText) then + begin + AValue := false; + Result := true; + end else + Result := false; +end; + {@@ ---------------------------------------------------------------------------- Checks whether the given number format code is for currency, i.e. requires a currency symbol. @@ -1479,6 +1513,55 @@ begin (ANumFormat.Sections[0].Kind * [nfkDate, nfkTime] <> []); end; +{@@ ---------------------------------------------------------------------------- + Checks whether the specified text corresponds to a date/time value and returns + true, its numerical value and its built-in numberformat if it is. +-------------------------------------------------------------------------------} +function IsDateTimeValue(AText: String; const AFormatSettings: TFormatSettings; + out ADateTime: TDateTime; out ANumFormat: TsNumberFormat): Boolean; + + { Test whether the text is formatted according to a built-in date/time format. + Converts the obtained date/time value back to a string and compares. } + function TestFormat(lNumFmt: TsNumberFormat): Boolean; + var + fmt: string; + begin + fmt := BuildDateTimeFormatString(lNumFmt, AFormatSettings); + Result := FormatDateTime(fmt, ADateTime, AFormatSettings) = AText; + if Result then ANumFormat := lNumFmt; + end; + +begin + Result := TryStrToDateTime(AText, ADateTime, AFormatSettings); + if Result then + begin + ANumFormat := nfCustom; + if abs(ADateTime) > 1 then // this is most probably a date + begin + if TestFormat(nfShortDateTime) then + exit; + if TestFormat(nfLongDate) then + exit; + if TestFormat(nfShortDate) then + exit; + if TestFormat(nfMonthYear) then + exit; + if TestFormat(nfDayMonth) then + exit; + end else + begin // this case is time-only + if TestFormat(nfLongTimeAM) then + exit; + if TestFormat(nfLongTime) then + exit; + if TestFormat(nfShortTimeAM) then + exit; + if TestFormat(nfShortTime) then + exit; + end; + end; +end; + {@@ ---------------------------------------------------------------------------- Checks whether the specified number format parameters apply to a date value. @@ -1549,6 +1632,102 @@ begin Result := (n=2); end; +{@@ ---------------------------------------------------------------------------- + Checks whether the specified text corresponds to a numerical value. If it is + then the function result is TRUE, and the number value and its formatting + parameters are returned. +-------------------------------------------------------------------------------} +function IsNumberValue(AText: String; AutoDetectNumberFormat: Boolean; + const AFormatSettings: TFormatSettings; + out ANumber: Double; out ANumFormat: TsNumberFormat; out ADecimals: Integer; + out ACurrencySymbol, AWarning: String): Boolean; +var + p: Integer; + DecSep, ThousSep: Char; +begin + Result := false; + AWarning := ''; + + // To detect whether the text is a currency value we look for the currency + // string. If we find it, we delete it and convert the remaining string to + // a number. + ACurrencySymbol := AFormatSettings.CurrencyString; + if RemoveCurrencySymbol(ACurrencySymbol, AText) then + begin + if IsNegative(AText) then + begin + if AText = '' then + exit; + AText := '-' + AText; + end; + end else + ACurrencySymbol := ''; + + if AutoDetectNumberFormat then + Result := TryStrToFloatAuto(AText, ANumber, DecSep, ThousSep, AWarning) + else begin + Result := TryStrToFloat(AText, ANumber, AFormatSettings); + if Result then + begin + if pos(AFormatSettings.DecimalSeparator, AText) = 0 + then DecSep := #0 + else DecSep := AFormatSettings.DecimalSeparator; + if pos(AFormatSettings.ThousandSeparator, AText) = 0 + then ThousSep := #0 + else ThousSep := AFormatSettings.ThousandSeparator; + end; + end; + + // Try to determine the number format + if Result then + begin + if ThousSep <> #0 then + ANumFormat := nfFixedTh + else + ANumFormat := nfGeneral; + // count number of decimal places and try to catch special formats + ADecimals := 0; + if DecSep <> #0 then + begin + // Go to the decimal separator and search towards the end of the string + p := pos(DecSep, AText) + 1; + while (p <= Length(AText)) do begin + // exponential format + if AText[p] in ['+', '-', 'E', 'e'] then + begin + ANumFormat := nfExp; + break; + end else + // percent format + if AText[p] = '%' then + begin + ANumFormat := nfPercentage; + break; + end else + begin + inc(p); + inc(ADecimals); + end; + end; + if (ADecimals > 0) and (ADecimals < 9) and (ANumFormat = nfGeneral) then + // "no formatting" assumed if there are "many" decimals + ANumFormat := nfFixed; + end else + begin + p := Length(AText); + while (p > 0) do begin + case AText[p] of + '%' : ANumFormat := nfPercentage; + 'e', 'E': ANumFormat := nfExp; + else dec(p); + end; + break; + end; + end; + end else + ACurrencySymbol := ''; +end; + {@@ ---------------------------------------------------------------------------- Checks whether the specified number format parameters is a time interval format. @@ -1684,6 +1863,99 @@ begin end; end; +{@@ ---------------------------------------------------------------------------- + Initializes the FormatSettings of file a import/export parameters record to + default values which can be replaced by the FormatSettings of the + workbook's FormatSettings +-------------------------------------------------------------------------------} +procedure InitFormatSettings(out AFormatSettings: TFormatSettings); +var + i: Integer; +begin + with AFormatSettings do + begin + CurrencyFormat := Byte(-1); + NegCurrFormat := Byte(-1); + ThousandSeparator := #0; + DecimalSeparator := #0; + CurrencyDecimals := Byte(-1); + DateSeparator := #0; + TimeSeparator := #0; + ListSeparator := #0; + CurrencyString := ''; + ShortDateFormat := ''; + LongDateFormat := ''; + TimeAMString := ''; + TimePMString := ''; + ShortTimeFormat := ''; + LongTimeFormat := ''; + for i:=1 to 12 do + begin + ShortMonthNames[i] := ''; + LongMonthNames[i] := ''; + end; + for i:=1 to 7 do + begin + ShortDayNames[i] := ''; + LongDayNames[i] := ''; + end; + TwoDigitYearCenturyWindow := Word(-1); + end; +end; + +{@@ ---------------------------------------------------------------------------- + Replaces in AFormatSettings all members marked as having default values (#0, + -1, '') by the corresponding values of the ADefaultFormats record +-------------------------------------------------------------------------------} +procedure ReplaceFormatSettings(var AFormatSettings: TFormatSettings; + const ADefaultFormats: TFormatSettings); +var + i: Integer; +begin + if AFormatSettings.CurrencyFormat = Byte(-1) then + AFormatSettings.CurrencyFormat := ADefaultFormats.CurrencyFormat; + if AFormatSettings.NegCurrFormat = Byte(-1) then + AFormatSettings.NegCurrFormat := ADefaultFormats.NegCurrFormat; + if AFormatSettings.ThousandSeparator = #0 then + AFormatSettings.ThousandSeparator := ADefaultFormats.ThousandSeparator; + if AFormatSettings.DecimalSeparator = #0 then + AFormatSettings.DecimalSeparator := ADefaultFormats.DecimalSeparator; + if AFormatSettings.CurrencyDecimals = Byte(-1) then + AFormatSettings.CurrencyDecimals := ADefaultFormats.CurrencyDecimals; + if AFormatSettings.DateSeparator = #0 then + AFormatSettings.DateSeparator := ADefaultFormats.DateSeparator; + if AFormatSettings.TimeSeparator = #0 then + AFormatSettings.TimeSeparator := ADefaultFormats.TimeSeparator; + if AFormatSettings.ListSeparator = #0 then + AFormatSettings.ListSeparator := ADefaultFormats.ListSeparator; + if AFormatSettings.CurrencyString = '' then + AFormatSettings.CurrencyString := ADefaultFormats.CurrencyString; + if AFormatSettings.ShortDateFormat = '' then + AFormatSettings.ShortDateFormat := ADefaultFormats.ShortDateFormat; + if AFormatSettings.LongDateFormat = '' then + AFormatSettings.LongDateFormat := ADefaultFormats.LongDateFormat; + if AFormatSettings.ShortTimeFormat = '' then + AFormatSettings.ShortTimeFormat := ADefaultFormats.ShortTimeFormat; + if AFormatSettings.LongTimeFormat = '' then + AFormatSettings.LongTimeFormat := ADefaultFormats.LongTimeFormat; + for i:=1 to 12 do + begin + if AFormatSettings.ShortMonthNames[i] = '' then + AFormatSettings.ShortMonthNames[i] := ADefaultFormats.ShortMonthNames[i]; + if AFormatSettings.LongMonthNames[i] = '' then + AFormatSettings.LongMonthNames[i] := ADefaultFormats.LongMonthNames[i]; + end; + for i:=1 to 7 do + begin + if AFormatSettings.ShortDayNames[i] = '' then + AFormatSettings.ShortDayNames[i] := ADefaultFormats.ShortDayNames[i]; + if AFormatSettings.LongDayNames[i] = '' then + AFormatSettings.LongDayNames[i] := ADefaultFormats.LongDayNames[i]; + end; + if AFormatSettings.TwoDigitYearCenturyWindow = Word(-1) then + AFormatSettings.TwoDigitYearCenturyWindow := ADefaultFormats.TwoDigitYearCenturyWindow; +end; + {==============================================================================} { TsNumFormatParams } diff --git a/components/fpspreadsheet/fpsnumformatparser.pas b/components/fpspreadsheet/fpsnumformatparser.pas index 9db9e715c..a0d976fbd 100644 --- a/components/fpspreadsheet/fpsnumformatparser.pas +++ b/components/fpspreadsheet/fpsnumformatparser.pas @@ -1253,7 +1253,6 @@ end; procedure TsNumFormatParser.ScanFormat; var done: Boolean; - s: String; n: Integer; uch: Cardinal; begin diff --git a/components/fpspreadsheet/laz_fpspreadsheet.lpk b/components/fpspreadsheet/laz_fpspreadsheet.lpk index 2df0cae7c..55e72177a 100644 --- a/components/fpspreadsheet/laz_fpspreadsheet.lpk +++ b/components/fpspreadsheet/laz_fpspreadsheet.lpk @@ -29,7 +29,7 @@ This package is all you need if you don't want graphical components (like grids and charts)."/> <License Value="LGPL with static linking exception. This is the same license as is used in the LCL (Lazarus Component Library)."/> <Version Major="1" Minor="7"/> - <Files Count="36"> + <Files Count="37"> <Item1> <Filename Value="fpolestorage.pas"/> <UnitName Value="fpolestorage"/> @@ -174,6 +174,10 @@ This package is all you need if you don't want graphical components (like grids <Filename Value="fpshtml.pas"/> <UnitName Value="fpsHTML"/> </Item36> + <Item37> + <Filename Value="fpshtmlutils.pas"/> + <UnitName Value="fpshtmlutils"/> + </Item37> </Files> <RequiredPkgs Count="2"> <Item1> diff --git a/components/fpspreadsheet/laz_fpspreadsheet.pas b/components/fpspreadsheet/laz_fpspreadsheet.pas index debf32f1a..1221268fd 100644 --- a/components/fpspreadsheet/laz_fpspreadsheet.pas +++ b/components/fpspreadsheet/laz_fpspreadsheet.pas @@ -13,7 +13,8 @@ uses uvirtuallayer_ole_helpers, uvirtuallayer_ole_types, uvirtuallayer_stream, fpolebasic, wikitable, fpsNumFormatParser, fpsfunc, fpsRPN, fpsStrings, fpscsv, fpsCsvDocument, fpspatches, fpsTypes, xlsEscher, fpsReaderWriter, - fpsNumFormat, fpsclasses, fpsHeaderFooterParser, fpsPalette, fpsHTML; + fpsNumFormat, fpsclasses, fpsHeaderFooterParser, fpsPalette, fpsHTML, + fpshtmlutils; implementation diff --git a/components/fpspreadsheet/tests/spreadtestgui.lpi b/components/fpspreadsheet/tests/spreadtestgui.lpi index 96a83295e..7ef67a513 100644 --- a/components/fpspreadsheet/tests/spreadtestgui.lpi +++ b/components/fpspreadsheet/tests/spreadtestgui.lpi @@ -155,6 +155,9 @@ <OtherUnitFiles Value=".."/> <UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/> </SearchPaths> + <Other> + <CustomOptions Value="-d-MDelphi"/> + </Other> </CompilerOptions> <Debugging> <Exceptions Count="6"> diff --git a/components/fpspreadsheet/xlsbiff5.pas b/components/fpspreadsheet/xlsbiff5.pas index 1a481e5da..63abb497f 100755 --- a/components/fpspreadsheet/xlsbiff5.pas +++ b/components/fpspreadsheet/xlsbiff5.pas @@ -519,7 +519,7 @@ end; procedure TsSpreadBIFF5Reader.ReadRSTRING(AStream: TStream); var L: Word; - B, F: Byte; + B: Byte; ARow, ACol: Cardinal; XF: Word; ansistr: ansistring; @@ -629,7 +629,7 @@ procedure TsSpreadBIFF5Reader.ReadXF(AStream: TStream); var rec: TBIFF5_XFRecord; fmt: TsCellFormat; - i, cidx: Integer; + cidx: Integer; nfparams: TsNumFormatParams; nfs: String; b: Byte; diff --git a/components/fpspreadsheet/xlsbiff8.pas b/components/fpspreadsheet/xlsbiff8.pas index db323f680..b0f4886e4 100755 --- a/components/fpspreadsheet/xlsbiff8.pas +++ b/components/fpspreadsheet/xlsbiff8.pas @@ -557,31 +557,14 @@ function TsSpreadBIFF8Reader.ReadUnformattedWideString(const AStream: TStream; var flags: Byte; DecomprStrValue: WideString; - AnsiStrValue: ansistring; - //RunsCounter: Word; - //AsianPhoneticBytes: DWord; i: Integer; - j: SizeUInt; len: SizeInt; recType: Word; - recSize: Word; + {%H-}recSize: Word; C: WideChar; begin flags := AStream.ReadByte; dec(PendingRecordSize); - { - if StringFlags and 4 = 4 then begin - //Asian phonetics - //Read Asian phonetics Length (not used) - AsianPhoneticBytes := DWordLEtoN(AStream.ReadDWord); - dec(PendingRecordSize,4); - end; - if StringFlags and 8 = 8 then begin - //Rich string - RunsCounter := WordLEtoN(AStream.ReadWord); - dec(PendingRecordSize,2); - end; - } if flags and 1 = 1 Then begin //String is WideStringLE if (ALength * SizeOf(WideChar)) > PendingRecordSize then begin @@ -619,34 +602,6 @@ begin end; Result := DecomprStrValue; end; - { - if StringFlags and 8 = 8 then begin - // Rich string (This only occurs in BIFF8) - SetLength(ARichTextRuns, RunsCounter); - for j := 0 to RunsCounter - 1 do begin - if (PendingRecordSize <= 0) then begin - // A CONTINUE may happened here - RecordType := WordLEToN(AStream.ReadWord); - RecordSize := WordLEToN(AStream.ReadWord); - if RecordType <> INT_EXCEL_ID_CONTINUE then begin - Raise Exception.Create('[TsSpreadBIFF8Reader.ReadWideString] Expected CONTINUE record not found.'); - end else begin - PendingRecordSize := RecordSize; - end; - end; - ARichTextRuns[j].FirstIndex := WordLEToN(AStream.ReadWord); - ARichTextRuns[j].FontIndex := WordLEToN(AStream.ReadWord); - dec(PendingRecordSize, 2*2); - end; - end; - if StringFlags and 4 = 4 then begin - //Asian phonetics - //Read Asian phonetics, discarded as not used. - SetLength(AnsiStrValue, AsianPhoneticBytes); - AStream.ReadBuffer(AnsiStrValue[1], AsianPhoneticBytes); - dec(PendingRecordSize, AsianPhoneticBytes); - end; - } end; function TsSpreadBIFF8Reader.ReadWideString(const AStream: TStream; @@ -1369,7 +1324,7 @@ var cell: PCell; ms: TMemoryStream; rtfRuns: TsRichTextFormattingRuns; - j, n: Integer; + n: Integer; begin rec.Row := 0; // to silence the compiler... @@ -1450,16 +1405,6 @@ begin end; procedure TsSpreadBIFF8Reader.ReadXF(const AStream: TStream); - (* - function FixLineStyle(dw: DWord): TsLineStyle; - { Not all line styles defined in BIFF8 are supported by fpspreadsheet. } - begin - case dw of - $01..$07: result := TsLineStyle(dw-1); - else Result := lsDashed; - end; - end; - *) var rec: TBIFF8_XFRecord; fmt: TsCellFormat; @@ -1469,9 +1414,7 @@ var fs: TsFillStyle; nfs: String; nfParams: TsNumFormatParams; - i: Integer; iclr: Integer; - fnt: TsFont; begin InitFormatRecord(fmt); fmt.ID := FCellFormatList.Count; diff --git a/components/fpspreadsheet/xlscommon.pas b/components/fpspreadsheet/xlscommon.pas index beaef7caf..4ce4ec9d5 100644 --- a/components/fpspreadsheet/xlscommon.pas +++ b/components/fpspreadsheet/xlscommon.pas @@ -325,6 +325,7 @@ type // Adjusts Excel float (date, date/time, time) with the file's base date to get a TDateTime function ConvertExcelDateTimeToDateTime (const AExcelDateNum: Double; ADateMode: TDateMode): TDateTime; + // Adjusts TDateTime with the file's base date to get // an Excel float value representing a time/date/datetime function ConvertDateTimeToExcelDateTime @@ -332,6 +333,7 @@ type // Converts the error byte read from cells or formulas to fps error value function ConvertFromExcelError(AValue: Byte): TsErrorValue; + // Converts an fps error value to the byte code needed in xls files function ConvertToExcelError(AValue: TsErrorValue): byte; @@ -665,6 +667,16 @@ begin case ADateMode of dm1900: begin + { + Result := AExcelDateNum + DATEMODE_1900_BASE - 1.0; + // Excel and Lotus 1-2-3 incorrectly assume that 1900 was a leap year + // Therefore all dates before March 01 are off by 1. + // The old fps implementation corrected only Feb 29, but all days are + // wrong! + if AExcelDateNum < 61 then + Result := Result + 1.0; + } + // Check for Lotus 1-2-3 bug with 1900 leap year if AExcelDateNum=61.0 then // 29 feb does not exist, change to 28 @@ -693,9 +705,12 @@ begin begin case ADateMode of dm1900: - result:=ADateTime-DATEMODE_1900_BASE+1.0; + begin + Result := ADateTime - DATEMODE_1900_BASE + 1.0; + // if Result < 61 then Result := Result - 1.0; + end; dm1904: - result:=ADateTime-DATEMODE_1904_BASE; + Result := ADateTime - DATEMODE_1904_BASE; else raise Exception.CreateFmt('ConvertDateTimeToExcelDateTime: unknown datemode %d. Please correct fpspreadsheet source code. ', [ADateMode]); end; @@ -2271,11 +2286,13 @@ end; procedure TsSpreadBIFFReader.ReadWorkbookGlobals(AStream: TStream); begin // To be overridden by BIFF5 and BIFF8 + Unused(AStream); end; procedure TsSpreadBIFFReader.ReadWorksheet(AStream: TStream); begin // To be overridden by BIFF5 and BIFF8 + Unused(AStream); end; {@@ ----------------------------------------------------------------------------