You've already forked lazarus-ccr
fpspreadsheet: HTMLReader detects background color (from "bgcolor" or "style:background-color" tags of "td" node).
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@4252 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
@ -28,12 +28,17 @@ type
|
||||
FInHeader: Boolean;
|
||||
FTableCounter: Integer;
|
||||
FCurrRow, FCurrCol: LongInt;
|
||||
FCelLText: String;
|
||||
FCurrCellFormat: TsCellFormat;
|
||||
FCellFont: TsFont;
|
||||
FCellText: String;
|
||||
FAttrList: TsHTMLAttrList;
|
||||
FColSpan, FRowSpan: Integer;
|
||||
FHRef: String;
|
||||
procedure ExtractBackgroundColor;
|
||||
procedure ExtractHRef;
|
||||
procedure ExtractMergedRange;
|
||||
procedure InitFont(AFont: TsFont);
|
||||
procedure InitCellFormat;
|
||||
procedure TagFoundHandler(NoCaseTag, ActualTag: string);
|
||||
procedure TextFoundHandler(AText: String);
|
||||
protected
|
||||
@ -419,10 +424,12 @@ begin
|
||||
ReplaceFormatSettings(FFormatSettings, FWorkbook.FormatSettings);
|
||||
FTableCounter := -1;
|
||||
FAttrList := TsHTMLAttrList.Create;
|
||||
FCellFont := TsFont.Create;
|
||||
end;
|
||||
|
||||
destructor TsHTMLReader.Destroy;
|
||||
begin
|
||||
FreeAndNil(FCellFont);
|
||||
FreeAndNil(FAttrList);
|
||||
FreeAndNil(parser);
|
||||
inherited Destroy;
|
||||
@ -438,13 +445,24 @@ var
|
||||
decs: Integer;
|
||||
currSym: String;
|
||||
warning: String;
|
||||
fntIndex: Integer;
|
||||
begin
|
||||
// Empty strings are blank cells -- nothing to do
|
||||
if (AText = '') then
|
||||
exit;
|
||||
|
||||
// Create cell
|
||||
cell := FWorksheet.AddCell(ARow, ACol);
|
||||
|
||||
// Format
|
||||
fntIndex := FWorkbook.FindFont(FCellFont.FontName, FCellFont.Size,
|
||||
FCellFont.Style, FCellFont.Color, FCellFont.Position);
|
||||
if fntIndex = -1 then
|
||||
fntIndex := FWorkbook.AddFont(FCellFont.FontName, FCellFont.Size,
|
||||
FCellFont.Style, FCellFont.Color, FCellFont.Position);
|
||||
FCurrCellFormat.FontIndex := fntIndex;
|
||||
cell^.FormatIndex := FWorkbook.AddCellFormat(FCurrCellFormat);
|
||||
|
||||
// Merged cells
|
||||
if (FColSpan > 0) or (FRowSpan > 0) then begin
|
||||
FWorksheet.MergeCells(ARow, ACol, ARow + FRowSpan, ACol + FColSpan);
|
||||
@ -497,6 +515,22 @@ begin
|
||||
FWorksheet.WriteUTF8Text(cell, AText);
|
||||
end;
|
||||
|
||||
procedure TsHTMLReader.ExtractBackgroundColor;
|
||||
var
|
||||
idx: Integer;
|
||||
begin
|
||||
idx := FAttrList.IndexOfName('bgcolor'); // html tag
|
||||
if idx = -1 then
|
||||
idx := FAttrList.IndexOfName('background-color'); // value taken from "style"
|
||||
if idx > -1 then
|
||||
begin
|
||||
FCurrCellFormat.Background.BgColor := HTMLColorStrToColor(FAttrList[idx].Value);
|
||||
FCurrCellFormat.Background.FgColor := FCurrCellFormat.Background.BgColor;
|
||||
FCurrCellFormat.Background.Style := fsSolidFill; // No other fill styles in html
|
||||
Include(FCurrCellFormat.UsedFormattingFields, uffBackground);
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure TsHTMLReader.ExtractHRef;
|
||||
var
|
||||
idx: Integer;
|
||||
@ -522,6 +556,27 @@ begin
|
||||
// -1 to compensate for correct determination of the range end cell
|
||||
end;
|
||||
|
||||
procedure TsHTMLReader.InitFont(AFont: TsFont);
|
||||
var
|
||||
fnt: TsFont;
|
||||
begin
|
||||
fnt := FWorkbook.GetDefaultFont;
|
||||
AFont.FontName := fnt.FontName;
|
||||
AFont.Size := fnt.Size;
|
||||
AFont.Style := fnt.Style;
|
||||
AFont.Color := fnt.Color;
|
||||
AFont.Position := fnt.Position;
|
||||
end;
|
||||
|
||||
procedure TsHTMLReader.InitCellFormat;
|
||||
begin
|
||||
InitFormatRecord(FCurrCellFormat);
|
||||
InitFont(FCellFont);
|
||||
|
||||
// HTML tables, by default, have word-wrapped cell texts.
|
||||
Include(FCurrCellFormat.UsedFormattingFields, uffWordwrap);
|
||||
end;
|
||||
|
||||
procedure TsHTMLReader.ReadFromStream(AStream: TStream);
|
||||
var
|
||||
list: TStringList;
|
||||
@ -581,8 +636,10 @@ begin
|
||||
FInCell := true;
|
||||
inc(FCurrCol);
|
||||
FCellText := '';
|
||||
InitCellFormat;
|
||||
FAttrList.Parse(ActualTag);
|
||||
ExtractMergedRange;
|
||||
ExtractBackgroundColor;
|
||||
end else
|
||||
if ((NoCaseTag = '<TH>') or (pos('<TH ', NoCaseTag) = 1)) and FInTable then
|
||||
begin
|
||||
|
@ -28,6 +28,8 @@ type
|
||||
private
|
||||
function GetItem(AIndex: Integer): TsHTMLAttr;
|
||||
procedure SetItem(AIndex: Integer; AValue: TsHTMLAttr);
|
||||
protected
|
||||
procedure ParseStyle(AStyle: String);
|
||||
public
|
||||
function IndexOfName(AName: String): Integer;
|
||||
procedure Parse(AHTML: String);
|
||||
@ -42,6 +44,7 @@ uses
|
||||
|
||||
const
|
||||
// http://unicode.e-workers.de/entities.php
|
||||
// http://www.utf8-zeichentabelle.de/unicode-utf8-table.pl
|
||||
HTMLEntities: array[0..250] of TsHTMLEntity = (
|
||||
// A
|
||||
(E: 'Acirc'; Ch: 'Â'; N: 194), // 0
|
||||
@ -51,7 +54,7 @@ const
|
||||
(E: 'aelig'; Ch: 'æ'; N: 230),
|
||||
(E: 'Agrave'; Ch: 'À'; N: 192),
|
||||
(E: 'agrave'; Ch: 'à'; N: 224),
|
||||
(E: 'alefsym';Ch: 'ℵ'; N: 8501),
|
||||
(E: 'alefsym';Ch: #$E2#$84#$B5; N: 8501),
|
||||
(E: 'Alpha'; Ch: 'Α'; N: 913),
|
||||
(E: 'alpha'; Ch: 'α'; N: 945),
|
||||
(E: 'amp'; Ch: '&'; N: 38), // 10
|
||||
@ -81,15 +84,15 @@ const
|
||||
(E: 'chi'; Ch: 'χ'; N: 967),
|
||||
(E: 'circ'; Ch: 'ˆ'; N: 710),
|
||||
(E: 'clubs'; Ch: '♣'; N: 9827),
|
||||
(E: 'cong'; Ch: '≅'; N: 8773), // approximately equal
|
||||
(E: 'cong'; Ch: #$E2#$89#$85; N: 8773), // approximately equal
|
||||
(E: 'copy'; Ch: '©'; N: 169),
|
||||
(E: 'crarr'; Ch: '↵'; N: 8629), // carriage return
|
||||
(E: 'crarr'; Ch: #$E2#$86#$B5; N: 8629), // carriage return
|
||||
(E: 'cup'; Ch: '∪'; N: 8746),
|
||||
(E: 'curren'; Ch: '¤'; N: 164),
|
||||
// D
|
||||
(E: 'Dagger'; Ch: '‡'; N: 8225), // 40
|
||||
(E: 'dagger'; Ch: '†'; N: 8224),
|
||||
(E: 'dArr'; Ch: '⇓'; N: 8659), // wide down-arrow
|
||||
(E: 'dArr'; Ch: #$E2#$87#$93; N: 8659), // wide down-arrow
|
||||
(E: 'darr'; Ch: '↓'; N: 8595), // narrow down-arrow
|
||||
(E: 'deg'; Ch: '°'; N: 176),
|
||||
(E: 'Delta'; Ch: 'Δ'; N: 916),
|
||||
@ -103,9 +106,9 @@ const
|
||||
(E: 'ecirc'; Ch: 'ê'; N: 234),
|
||||
(E: 'Egrave'; Ch: 'È'; N: 200),
|
||||
(E: 'egrave'; Ch: 'è'; N: 232),
|
||||
(E: 'empty'; Ch: '∅'; N: 8709),
|
||||
(E: 'emsp'; Ch: ' '; N: 8195), // Space character width of "m"
|
||||
(E: 'ensp'; Ch: ' '; N: 8194), // Space character width of "n"
|
||||
(E: 'empty'; Ch: #$e2#$88#$85; N: 8709),
|
||||
(E: 'emsp'; Ch: #$E2#$80#$83; N: 8195), // Space character width of "m"
|
||||
(E: 'ensp'; Ch: #$E2#$80#$82; N: 8194), // Space character width of "n"
|
||||
(E: 'Epsilon';Ch: 'Ε'; N: 917), // capital epsilon
|
||||
(E: 'epsilon';Ch: 'ε'; N: 949),
|
||||
(E: 'equiv'; Ch: '≡'; N: 8801), // 60
|
||||
@ -142,7 +145,7 @@ const
|
||||
(E: 'iexcl'; Ch: '¡'; N: 161),
|
||||
(E: 'Igrave'; Ch: 'Ì'; N: 204),
|
||||
(E: 'igrave'; Ch: 'ì'; N: 236), // 90
|
||||
(E: 'image'; Ch: 'ℑ'; N: 2465), // I in factura
|
||||
(E: 'image'; Ch: #$E2#$84#$91; N: 2465), // I in factura
|
||||
(E: 'infin'; Ch: '∞'; N: 8734),
|
||||
(E: 'int'; Ch: '∫'; N: 8747),
|
||||
(E: 'Iota'; Ch: 'Ι'; N: 921),
|
||||
@ -159,13 +162,13 @@ const
|
||||
(E: 'lambda'; Ch: 'λ'; N: 955),
|
||||
(E: 'lang'; Ch: '⟨'; N: 9001), // Left-pointing angle bracket
|
||||
(E: 'laquo'; Ch: '«'; N: 171),
|
||||
(E: 'lArr'; Ch: '⇐'; N: 8656), // Left-pointing wide arrow
|
||||
(E: 'lArr'; Ch: #$E2#$87#$90; N: 8656), // Left-pointing wide arrow
|
||||
(E: 'larr'; Ch: '←'; N: 8592),
|
||||
(E: 'lceil'; Ch: '⌈'; N: 8968), // Left ceiling
|
||||
(E: 'ldquo'; Ch: '“'; N: 8220),
|
||||
(E: 'le'; Ch: '≤'; N: 8804), // 110
|
||||
(E: 'lfloor'; Ch: '⌊'; N: 8970), // Left floor
|
||||
(E: 'lowast'; Ch: '∗'; N: 8727), // Low asterisk
|
||||
(E: 'lowast'; Ch: #$e2#$88#$97; N: 8727), // Low asterisk
|
||||
(E: 'loz'; Ch: '◊'; N: 9674),
|
||||
(E: 'lrm'; Ch: ''; N: 8206), // Left-to-right mark
|
||||
(E: 'lsaquo'; Ch: '‹'; N: 8249),
|
||||
@ -186,8 +189,8 @@ const
|
||||
(E: 'ne'; Ch: '≠'; N: 8800),
|
||||
(E: 'ni'; Ch: '∋'; N: 8715),
|
||||
(E: 'not'; Ch: '¬'; N: 172), // 130
|
||||
(E: 'notin'; Ch: '∉'; N: 8713), // math: "not in"
|
||||
(E: 'nsub'; Ch: '⊄'; N: 8836), // math: "not a subset of"
|
||||
(E: 'notin'; Ch: #$e2#$88#$89; N: 8713), // math: "not in"
|
||||
(E: 'nsub'; Ch: #$e2#$8a#$84; N: 8836), // math: "not a subset of"
|
||||
(E: 'Ntilde'; Ch: 'Ñ'; N: 209),
|
||||
(E: 'ntilde'; Ch: 'ñ'; N: 241),
|
||||
(E: 'Nu'; Ch: 'Ν'; N: 925),
|
||||
@ -206,7 +209,7 @@ const
|
||||
(E: 'omega'; Ch: 'ω'; N: 969),
|
||||
(E: 'Omicron';Ch: 'Ο'; N: 927),
|
||||
(E: 'omicron';Ch: 'ο'; N: 959),
|
||||
(E: 'oplus'; Ch: '⊕'; N: 8853), // Circled plus
|
||||
(E: 'oplus'; Ch: #$e2#$8a#$95; N: 8853), // Circled plus
|
||||
(E: 'or'; Ch: '∨'; N: 8744),
|
||||
(E: 'ordf'; Ch: 'ª'; N: 170),
|
||||
(E: 'ordm'; Ch: 'º'; N: 186),
|
||||
@ -214,7 +217,7 @@ const
|
||||
(E: 'oslash'; Ch: 'ø'; N: 248),
|
||||
(E: 'Otilde'; Ch: 'Õ'; N: 213),
|
||||
(E: 'otilde'; Ch: 'õ'; N: 245),
|
||||
(E: 'otimes'; Ch: '⊗'; N: 8855), // Circled times
|
||||
(E: 'otimes'; Ch: #$E2#$8A#$97; N: 8855), // Circled times
|
||||
(E: 'Ouml'; Ch: 'Ö'; N: 214),
|
||||
(E: 'ouml'; Ch: 'ö'; N: 246),
|
||||
// P
|
||||
@ -245,7 +248,7 @@ const
|
||||
(E: 'rarr'; Ch: '→'; N: 8594),
|
||||
(E: 'rceil'; Ch: '⌉'; N: 8969), // right ceiling
|
||||
(E: 'rdquo'; Ch: '”'; N: 8221),
|
||||
(E: 'real'; Ch: 'ℜ'; N: 8476), // R in factura
|
||||
(E: 'real'; Ch: #$E2#$84#$9C; N: 8476), // R in factura
|
||||
(E: 'reg'; Ch: '®'; N: 174),
|
||||
(E: 'rfloor'; Ch: '⌋'; N: 8971), // Right floor
|
||||
(E: 'Rho'; Ch: 'Ρ'; N: 929),
|
||||
@ -258,13 +261,13 @@ const
|
||||
(E: 'sbquo'; Ch: '‚'; N: 8218),
|
||||
(E: 'Scaron'; Ch: 'Š'; N: 352),
|
||||
(E: 'scaron'; Ch: 'š'; N: 353),
|
||||
(E: 'sdot'; Ch: '⋅'; N: 8901), // math: dot operator
|
||||
(E: 'sdot'; Ch: #$E2#$8B#$85; N: 8901), // math: dot operator
|
||||
(E: 'sect'; Ch: '§'; N: 167),
|
||||
(E: 'shy'; Ch: ''; N: 173), // conditional hyphen
|
||||
(E: 'shy'; Ch: #$C2#$AD; N: 173), // conditional hyphen
|
||||
(E: 'Sigma'; Ch: 'Σ'; N: 931),
|
||||
(E: 'sigma'; Ch: 'σ'; N: 963),
|
||||
(E: 'sigmaf'; Ch: 'ς'; N: 962),
|
||||
(E: 'sim'; Ch: '∼'; N: 8764), // similar
|
||||
(E: 'sim'; Ch: #$E2#$88#$BC; N: 8764), // similar
|
||||
(E: 'spades'; Ch: '♠'; N: 9824),
|
||||
(E: 'sub'; Ch: '⊂'; N: 8834),
|
||||
(E: 'sube'; Ch: '⊆'; N: 8838),
|
||||
@ -282,7 +285,7 @@ const
|
||||
(E: 'Theta'; Ch: 'Θ'; N: 920),
|
||||
(E: 'theta'; Ch: 'θ'; N: 952),
|
||||
(E: 'thetasym';Ch: 'ϑ'; N: 977),
|
||||
(E: 'thinsp'; Ch: ' '; N: 8201), // thin space
|
||||
(E: 'thinsp'; Ch: #$E2#$80#$89; N: 8201), // thin space
|
||||
(E: 'THORN'; Ch: 'Þ'; N: 222),
|
||||
(E: 'thorn'; Ch: 'þ'; N: 254),
|
||||
(E: 'tilde'; Ch: '˜'; N: 732),
|
||||
@ -291,7 +294,7 @@ const
|
||||
// U
|
||||
(E: 'Uacute'; Ch: 'Ú'; N: 218),
|
||||
(E: 'uacute'; Ch: 'ú'; N: 250),
|
||||
(E: 'uArr'; Ch: '⇑'; N: 8657), // wide up-arrow
|
||||
(E: 'uArr'; Ch: #$E2#$87#$91; N: 8657), // wide up-arrow
|
||||
(E: 'uarr'; Ch: '↑'; N: 8593),
|
||||
(E: 'Ucirc'; Ch: 'Û'; N: 219),
|
||||
(E: 'ucirc'; Ch: 'û'; N: 251),
|
||||
@ -304,7 +307,7 @@ const
|
||||
(E: 'Uuml'; Ch: 'Ü'; N: 220),
|
||||
(E: 'uuml'; Ch: 'ü'; N: 252),
|
||||
// W
|
||||
(E: 'weierp'; Ch: '℘'; N: 8472), // Script Capital P; Weierstrass Elliptic Function
|
||||
(E: 'weierp'; Ch: #$E2#$84#$98; N: 8472), // Script Capital P; Weierstrass Elliptic Function
|
||||
// X
|
||||
(E: 'Xi'; Ch: 'Ξ'; N: 926),
|
||||
(E: 'xi'; Ch: 'ξ'; N: 958),
|
||||
@ -486,6 +489,44 @@ begin
|
||||
end;
|
||||
inc(i);
|
||||
end;
|
||||
|
||||
i := IndexOfName('style');
|
||||
if i > -1 then ParseStyle(Items[i].Value);
|
||||
end;
|
||||
|
||||
{ AStyle is the value part of a 'style="...."' HTML string. Splits the into
|
||||
individual records at the semicolons (;) and into name-value pairs at the
|
||||
colon (:). Adds the name-value pairs to the list. }
|
||||
procedure TsHTMLAttrList.ParseStyle(AStyle: String);
|
||||
var
|
||||
i, len: Integer;
|
||||
value, nam: String;
|
||||
begin
|
||||
i := 1;
|
||||
len := Length(AStyle);
|
||||
|
||||
// skip white space
|
||||
while (i <= len) and (AStyle[i] = ' ') do inc(i);
|
||||
|
||||
// iterate through string
|
||||
nam := '';
|
||||
while (i <= len) do
|
||||
begin
|
||||
case AStyle[i] of
|
||||
':': begin // name-value separator
|
||||
while (i <= len) and (AStyle[i] = ' ') do inc(i); // skip white space
|
||||
value := '';
|
||||
while (i <= len) and (AStyle[i] <> ';') do
|
||||
value := value + AStyle[i];
|
||||
inc(i); // skip final ';'
|
||||
Add(TsHTMLAttr.Create(lowercase(nam), value));
|
||||
nam := '';
|
||||
end;
|
||||
' ': ;
|
||||
else nam := nam + AStyle[i];
|
||||
end;
|
||||
inc(i);
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure TsHTMLAttrList.SetItem(AIndex: Integer; AValue: TsHTMLAttr);
|
||||
|
@ -1474,6 +1474,10 @@ begin
|
||||
if AValue[1] = '#' then begin
|
||||
AValue[1] := '$';
|
||||
Result := LongRGBToExcelPhysical(DWord(StrToInt(AValue)));
|
||||
end else
|
||||
if AValue[1] in ['0'..'9','A'..'F', 'a'..'f'] then begin
|
||||
AValue := '$' + AValue;
|
||||
Result := LongRGBToExcelPhysical(DWord(StrToInt(AValue)));
|
||||
end else begin
|
||||
AValue := lowercase(AValue);
|
||||
if AValue = 'red' then
|
||||
|
Reference in New Issue
Block a user