From 4473d3b4a5c713f7f9e418f3ee3080d311396dcd Mon Sep 17 00:00:00 2001 From: wp_xxyyzz Date: Mon, 20 Oct 2014 21:04:20 +0000 Subject: [PATCH] fpspreadsheet: More improvements for reading numbers in csv files and automatic format detection. git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@3672 8e941d3f-bd1b-0410-a28a-d453659cc2b4 --- .../examples/spready/spready.lpi | 3 - components/fpspreadsheet/fpscsv.pas | 76 +++++++-- components/fpspreadsheet/fpsutils.pas | 151 +++++++++++------- 3 files changed, 155 insertions(+), 75 deletions(-) diff --git a/components/fpspreadsheet/examples/spready/spready.lpi b/components/fpspreadsheet/examples/spready/spready.lpi index e16146a05..47be25695 100644 --- a/components/fpspreadsheet/examples/spready/spready.lpi +++ b/components/fpspreadsheet/examples/spready/spready.lpi @@ -111,12 +111,10 @@ - - @@ -124,7 +122,6 @@ - diff --git a/components/fpspreadsheet/fpscsv.pas b/components/fpspreadsheet/fpscsv.pas index 88225cd5d..e541675c1 100644 --- a/components/fpspreadsheet/fpscsv.pas +++ b/components/fpspreadsheet/fpscsv.pas @@ -14,8 +14,8 @@ type FWorksheetName: String; function IsBool(AText: String; out AValue: Boolean): Boolean; function IsDateTime(AText: String; out ADateTime: TDateTime): Boolean; - function IsNumber(AText: String; out ANumber: Double; - out ACurrencySymbol, AWarning: String): Boolean; + function IsNumber(AText: String; out ANumber: Double; out ANumFormat: TsNumberFormat; + out ADecimals: Integer; out ACurrencySymbol, AWarning: String): Boolean; function IsQuotedText(var AText: String): Boolean; procedure ReadCellValue(ARow, ACol: Cardinal; AText: String); protected @@ -207,10 +207,13 @@ begin end; function TsCSVReader.IsNumber(AText: String; out ANumber: Double; + out ANumFormat: TsNumberFormat; out ADecimals: Integer; out ACurrencySymbol, AWarning: String): Boolean; var p: Integer; + decsep, thsep: Char; begin + Result := false; AWarning := ''; // To detect whether the text is a currency value we look for the currency @@ -223,11 +226,8 @@ begin if p > 0 then begin Delete(AText, p, Length(ACurrencySymbol)); AText := Trim(AText); - if AText = '' then begin - Result := false; - ACurrencySymbol := ''; + if AText = '' then exit; - end; // Negative financial values are often enclosed by parenthesis if ((AText[1] = '(') and (AText[Length(AText)] = ')')) then AText := '-' + Trim(Copy(AText, 2, Length(AText)-2)); @@ -235,11 +235,57 @@ begin ACurrencySymbol := ''; if CSVParams.AutoDetectNumberFormat then - Result := TryStrToFloatAuto(AText, ANumber, AWarning) - else + Result := TryStrToFloatAuto(AText, ANumber, decsep, thsep, AWarning) + else begin Result := TryStrToFloat(AText, ANumber, CSVParams.FormatSettings); + if Result then + begin + if pos(CSVParams.FormatSettings.DecimalSeparator, AText) = 0 + then decsep := #0 + else decsep := CSVParams.FormatSettings.DecimalSeparator; + if pos(CSVParams.FormatSettings.ThousandSeparator, AText) = 0 + then thsep := #0 + else thsep := CSVParams.FormatSettings.ThousandSeparator; + end; + end; - if not Result then ACurrencySymbol := ''; + // Try to determine the number format + if Result then + begin + if thsep <> #0 then + ANumFormat := nfFixedTh + else + ANumFormat := nfGeneral; + // count number of decimal places and try to catch special formats + ADecimals := 0; + if decsep <> #0 then + begin + // Go to the decimal separator and search towards the end of the string + p := pos(decsep, AText) + 1; + while (p <= Length(AText)) do begin + // exponential format + if AText[p] in ['+', '-', 'E', 'e'] then + begin + ANumFormat := nfExp; + break; + end else + // percent format + if AText[p] = '%' then + begin + ANumFormat := nfPercentage; + break; + end else + begin + inc(p); + inc(ADecimals); + end; + end; + if (ADecimals > 0) and (ADecimals < 9) and (ANumFormat = nfGeneral) then + // "no formatting" assumed if there are "many" decimals + ANumFormat := nfFixed; + end; + end else + ACurrencySymbol := ''; end; function TsCSVReader.IsQuotedText(var AText: String): Boolean; @@ -268,6 +314,7 @@ var currSym: string; warning: String; nf: TsNumberFormat; + decs: Integer; begin // Empty strings are blank cells -- nothing to do if AText = '' then @@ -282,7 +329,10 @@ begin // Remove quotes if (AText[1] = CSVParams.QuoteChar) and (AText[Length(AText)] = CSVParams.QuoteChar) then - Delete(AText, 2, Length(AText)-2); + begin + Delete(AText, Length(AText), 1); + Delete(AText, 1, 1); + end; { // Quoted text is a TEXT cell @@ -294,12 +344,12 @@ begin } // Check for a NUMBER or CURRENCY cell - if IsNumber(AText, dblValue, currSym, warning) then + if IsNumber(AText, dblValue, nf, decs, currSym, warning) then begin if currSym <> '' then - FWorksheet.WriteCurrency(ARow, ACol, dblValue, nfCurrency, 2, currSym) + FWorksheet.WriteCurrency(ARow, ACol, dblValue, nfCurrency, decs, currSym) else - FWorksheet.WriteNumber(ARow, ACol, dblValue); + FWorksheet.WriteNumber(ARow, ACol, dblValue, nf, decs); if warning <> '' then FWorkbook.AddErrorMsg('Cell %s: %s', [GetCellString(ARow, ACol), warning]); exit; diff --git a/components/fpspreadsheet/fpsutils.pas b/components/fpspreadsheet/fpsutils.pas index 9daf791a1..299d61a39 100644 --- a/components/fpspreadsheet/fpsutils.pas +++ b/components/fpspreadsheet/fpsutils.pas @@ -130,7 +130,7 @@ function FormatDateTime(const FormatStr: string; DateTime: TDateTime; const FormatSettings: TFormatSettings; Options : TFormatDateTimeOptions = []): string; function TryStrToFloatAuto(AText: String; out ANumber: Double; - out AWarning: String): Boolean; + out ADecimalSeparator, AThousandSeparator: Char; out AWarning: String): Boolean; function TwipsToPts(AValue: Integer): Single; function PtsToTwips(AValue: Single): Integer; @@ -1481,17 +1481,21 @@ end; Is needed for reading CSV files. -------------------------------------------------------------------------------} function TryStrToFloatAuto(AText: String; out ANumber: Double; - out AWarning: String): Boolean; + out ADecimalSeparator, AThousandSeparator: Char; out AWarning: String): Boolean; var i: Integer; testSep: Char; testSepPos: Integer; + lastDigitPos: Integer; isPercent: Boolean; + isExp: Boolean; fs: TFormatSettings; done: Boolean; begin Result := false; AWarning := ''; + ADecimalSeparator := #0; + AThousandSeparator := #0; if AText = '' then exit; @@ -1506,65 +1510,89 @@ begin // but no decimal separator misinterprets the thousand separator as a // decimal separator. - done := false; // Indicates that both decimal and thousand separators are found - testSep := #0; // Separator candidate to be tested - testSepPos := 0; // Position of this separator chandidate in the string + done := false; // Indicates that both decimal and thousand separators are found + testSep := #0; // Separator candidate to be tested + testSepPos := 0; // Position of this separator candidate in the string + lastDigitPos := 0; // Position of the last numerical digit + isExp := false; // Flag for exponential format + isPercent := false; // Flag for percentage format + i := Length(AText); // Start at end... while i >= 1 do // ...and search towards start begin - if AText[i] in ['.', ','] then - begin - if testSep = #0 then begin - testSep := AText[i]; - testSepPos := i; - end; - // This is the right-most separator candidate in the text - // It can be a decimal or a thousand separator. - dec(i); - while i >= 1 do - begin - if not (AText[i] in ['0'..'9']) then begin - Result := false; - exit; - end; - // If we find the testSep character again it must be a thousand separator. - if (AText[i] = testSep) then + case AText[i] of + '0'..'9': + if (lastDigitPos = 0) and (AText[i] in ['0'..'9']) then + lastDigitPos := i; + + 'e', 'E': + isExp := true; + + '%': + isPercent := true; + + '+', '-': + ; + + '.', ',': begin - // ... but only if there are 3 numerical digits in between - if (testSepPos - i = 4) then - begin - fs.ThousandSeparator := testSep; - // The decimal separator is the "other" character. - if testSep = '.' then - fs.DecimalSeparator := ',' - else - fs.DecimalSeparator := '.'; - done := true; - i := 0; - end else - begin - Result := false; - exit; + if testSep = #0 then begin + testSep := AText[i]; + testSepPos := i; + end; + // This is the right-most separator candidate in the text + // It can be a decimal or a thousand separator. + // Therefore, we continue searching from here. + dec(i); + while i >= 1 do + begin + if not (AText[i] in ['0'..'9', '+', '-']) then + exit; + + // If we find the testSep character again it must be a thousand separator. + if (AText[i] = testSep) then + begin + // ... but only if there are 3 numerical digits in between + if (testSepPos - i = 4) then + begin + fs.ThousandSeparator := testSep; + // The decimal separator is the "other" character. + if testSep = '.' then + fs.DecimalSeparator := ',' + else + fs.DecimalSeparator := '.'; + ADecimalSeparator := fs.DecimalSeparator; + AThousandSeparator := fs.ThousandSeparator; + done := true; + i := 0; + end else + begin + Result := false; + exit; + end; + end + else + // If we find the "other" separator character, then testSep was a + // decimal separator and the current character is a thousand separator. + // But there must be 3 digits in between. + if AText[i] in ['.', ','] then + begin + if testSepPos - i <> 4 then // no 3 digits in between --> no number, maybe a date. + exit; + fs.DecimalSeparator := testSep; + fs.ThousandSeparator := AText[i]; + ADecimalSeparator := fs.DecimalSeparator; + AThousandSeparator := fs.ThousandSeparator; + done := true; + i := 0; + end; + dec(i); end; - end - else - // If we find the "other" separator character, then testSep was a - // decimal separator and the current character is a thousand separator. - if AText[i] in ['.',','] then - begin - fs.DecimalSeparator := testSep; - fs.ThousandSeparator := AText[i]; - done := true; - i := 0; end; - dec(i); - end; - end else - if not (AText[i] in ['0'..'9', '+', '-', 'e', 'E', '%']) then - begin - Result := false; - AWarning := ''; - exit; + + else + exit; // Non-numeric character found, no need to continue + end; dec(i); end; @@ -1576,9 +1604,10 @@ begin // type is found and it is at the third position from the string's end it // might by a thousand separator or a decimal separator. We assume the // latter case, but create a warning. - if Length(AText) - testSepPos = 3 then + if (lastDigitPos - testSepPos = 3) and not isPercent then AWarning := Format(rsAmbiguousDecThouSeparator, [AText]); fs.DecimalSeparator := testSep; + ADecimalSeparator := fs.DecimalSeparator; // Make sure that the thousand separator is different from the decimal sep. if testSep = '.' then fs.ThousandSeparator := ',' else fs.ThousandSeparator := '.'; end; @@ -1595,9 +1624,13 @@ begin // Try string-to-number conversion Result := TryStrToFloat(AText, ANumber, fs); - // If successful take care of the percentage sign - if Result and isPercent then - ANumber := ANumber * 0.01; + // If successful ... + if Result then + begin + // ... take care of the percentage sign + if isPercent then + ANumber := ANumber * 0.01; + end; end;