You've already forked lazarus-ccr
aarre
applications
bindings
components
ZVDateTimeCtrls
aboutcomponent
acs
beepfp
callite
chelper
cmdline
cmdlinecfg
colorpalette
cryptini
csvdocument
epiktimer
flashfiler
fpsound
fpspreadsheet
docs
examples
images
languages
reference
source
common
fpolebasic.pas
fpolestorage.pas
fpsallformats.pas
fpscell.pas
fpsclasses.pas
fpscsv.pas
fpscsvdocument.pas
fpscurrency.pas
fpsexprparser.pas
fpsfunc.pas
fpsheaderfooterparser.pas
fpshtml.pas
fpshtmlutils.pas
fpsimages.pas
fpsnumformat.pas
fpsopendocument.pas
fpspagelayout.pas
fpspalette.pas
fpspatches.pas
fpspreadsheet.pas
fpsreaderwriter.pas
fpsrpn.pas
fpssearch.pas
fpsstreams.pas
fpsstrings.pas
fpstypes.pas
fpsutils.pas
fpsxmlcommon.pas
fpszipper.pp
uvirtuallayer.pas
uvirtuallayer_ole.pas
uvirtuallayer_ole_helpers.pas
uvirtuallayer_ole_types.pas
uvirtuallayer_stream.pas
uvirtuallayer_types.pas
wikitable.pas
xlsbiff2.pas
xlsbiff5.pas
xlsbiff8.pas
xlscommon.pas
xlsconst.pas
xlsescher.pas
xlsxml.pas
xlsxooxml.pas
export
visual
fps.inc
install.txt
laz_fpspreadsheet.lpk
laz_fpspreadsheet_visual.lpk
laz_fpspreadsheetexport_visual.lpk
tests
README.txt
fractions
freetypepascal
geckoport
gradcontrols
industrialstuff
iosdesigner
iphonelazext
jujiboutils
jvcllaz
kcontrols
lazautoupdate
lazbarcodes
lclextensions
longtimer
manualdock
mbColorLib
mplayer
multithreadprocs
nvidia-widgets
onguard
orpheus
playsoundpackage
poweredby
powerpdf
rgbgraphics
richmemo
richview
rtfview
rx
scrolltext
smnetgradient
spktoolbar
svn
tdi
thtmlport
tparadoxdataset
tvplanit
virtualtreeview
virtualtreeview-new
xdev_toolkit
zlibar
examples
lclbindings
wst
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@5319 8e941d3f-bd1b-0410-a28a-d453659cc2b4
1192 lines
38 KiB
ObjectPascal
1192 lines
38 KiB
ObjectPascal
unit fpsHTMLUtils;
|
|
|
|
{$mode objfpc}{$H+}
|
|
|
|
interface
|
|
|
|
uses
|
|
Classes, SysUtils, contnrs, fpstypes, fpspreadsheet;
|
|
|
|
type
|
|
TsHTMLEntity = record
|
|
E: String;
|
|
Ch: String;
|
|
N: Word;
|
|
end;
|
|
|
|
function CleanHTMLString(AText: String): String;
|
|
function IsHTMLEntity(AText: PChar; out AEntity: TsHTMLEntity): Boolean;
|
|
function RemoveHTMLEntities(const AText: String): String;
|
|
|
|
type
|
|
TsHTMLAttr = class
|
|
Name: String;
|
|
Value: String;
|
|
constructor Create(AName, AValue: String);
|
|
end;
|
|
|
|
TsHTMLAttrList = class(TObjectList)
|
|
private
|
|
function GetItem(AIndex: Integer): TsHTMLAttr;
|
|
procedure SetItem(AIndex: Integer; AValue: TsHTMLAttr);
|
|
protected
|
|
procedure ParseStyle(AStyle: String);
|
|
public
|
|
function IndexOfName(AName: String): Integer;
|
|
procedure Parse(AHTML: String);
|
|
property Items[AIndex: Integer]: TsHTMLAttr read GetItem write SetItem; default;
|
|
end;
|
|
|
|
TsTagCase = (tcLowercase, tcUppercase, tcProperCase);
|
|
|
|
procedure HTMLToRichText(AWorkbook: TsWorkbook; AFont: TsFont;
|
|
const AHTMLText: String; out APlainText: String;
|
|
out ARichTextParams: TsRichTextParams);
|
|
|
|
procedure RichTextToHTML(AWorkbook: TsWorkbook; AFont: TsFont;
|
|
const APlainText: String; const ARichTextParams: TsRichTextParams;
|
|
out AHTMLText: String; APrefix:String = ''; ATagCase: TsTagCase = tcLowercase);
|
|
|
|
|
|
implementation
|
|
|
|
uses
|
|
math, lazUtf8, fasthtmlparser,
|
|
fpsUtils, fpsClasses;
|
|
|
|
const
|
|
// http://unicode.e-workers.de/entities.php
|
|
// http://www.utf8-zeichentabelle.de/unicode-utf8-table.pl
|
|
HTMLEntities: array[0..250] of TsHTMLEntity = (
|
|
// A
|
|
(E: 'Acirc'; Ch: 'Â'; N: 194), // 0
|
|
(E: 'acirc'; Ch: 'â'; N: 226),
|
|
(E: 'acute'; Ch: '´'; N: 180),
|
|
(E: 'AElig'; Ch: 'Æ'; N: 198),
|
|
(E: 'aelig'; Ch: 'æ'; N: 230),
|
|
(E: 'Agrave'; Ch: 'À'; N: 192),
|
|
(E: 'agrave'; Ch: 'à'; N: 224),
|
|
(E: 'alefsym';Ch: #$E2#$84#$B5; N: 8501),
|
|
(E: 'Alpha'; Ch: 'Α'; N: 913),
|
|
(E: 'alpha'; Ch: 'α'; N: 945),
|
|
(E: 'amp'; Ch: '&'; N: 38), // 10
|
|
(E: 'and'; Ch: '∧'; N: 8743),
|
|
(E: 'ang'; Ch: '∠'; N: 8736),
|
|
(E: 'apos'; Ch: ''''; N: 39),
|
|
(E: 'Aring'; Ch: 'Å'; N: 197),
|
|
(E: 'aring'; Ch: 'å'; N: 229),
|
|
(E: 'asymp'; Ch: '≈'; N: 2248),
|
|
(E: 'Atilde'; Ch: 'Ã'; N: 195),
|
|
(E: 'atilde'; Ch: 'ã'; N: 227),
|
|
(E: 'Auml'; Ch: 'Ä'; N: 196),
|
|
(E: 'auml'; Ch: 'ä'; N: 228), // 20
|
|
// B
|
|
(E: 'bdquo'; Ch: '„'; N: 8222), // 21
|
|
(E: 'Beta'; Ch: 'Β'; N: 914),
|
|
(E: 'beta'; Ch: 'β'; N: 946),
|
|
(E: 'brvbar'; Ch: '¦'; N: 166),
|
|
(E: 'bull'; Ch: '•'; N: 8226),
|
|
// C
|
|
(E: 'cap'; Ch: '∩'; N: 8745), // 26
|
|
(E: 'Ccedil'; Ch: 'Ç'; N: 199),
|
|
(E: 'ccedil'; Ch: 'ç'; N: 231),
|
|
(E: 'cedil'; Ch: '¸'; N: 184),
|
|
(E: 'cent'; Ch: '¢'; N: 162), // 30
|
|
(E: 'Chi'; Ch: 'Χ'; N: 935),
|
|
(E: 'chi'; Ch: 'χ'; N: 967),
|
|
(E: 'circ'; Ch: 'ˆ'; N: 710),
|
|
(E: 'clubs'; Ch: '♣'; N: 9827),
|
|
(E: 'cong'; Ch: #$E2#$89#$85; N: 8773), // approximately equal
|
|
(E: 'copy'; Ch: '©'; N: 169),
|
|
(E: 'crarr'; Ch: #$E2#$86#$B5; N: 8629), // carriage return
|
|
(E: 'cup'; Ch: '∪'; N: 8746),
|
|
(E: 'curren'; Ch: '¤'; N: 164),
|
|
// D
|
|
(E: 'Dagger'; Ch: '‡'; N: 8225), // 40
|
|
(E: 'dagger'; Ch: '†'; N: 8224),
|
|
(E: 'dArr'; Ch: #$E2#$87#$93; N: 8659), // wide down-arrow
|
|
(E: 'darr'; Ch: '↓'; N: 8595), // narrow down-arrow
|
|
(E: 'deg'; Ch: '°'; N: 176),
|
|
(E: 'Delta'; Ch: 'Δ'; N: 916),
|
|
(E: 'delta'; Ch: 'δ'; N: 948),
|
|
(E: 'diams'; Ch: '♦'; N: 9830),
|
|
(E: 'divide'; Ch: '÷'; N: 247),
|
|
// E
|
|
(E: 'Eacute'; Ch: 'É'; N: 201), // 49
|
|
(E: 'eacute'; Ch: 'é'; N: 233), // 50
|
|
(E: 'Ecirc'; Ch: 'Ê'; N: 202),
|
|
(E: 'ecirc'; Ch: 'ê'; N: 234),
|
|
(E: 'Egrave'; Ch: 'È'; N: 200),
|
|
(E: 'egrave'; Ch: 'è'; N: 232),
|
|
(E: 'empty'; Ch: #$e2#$88#$85; N: 8709),
|
|
(E: 'emsp'; Ch: #$E2#$80#$83; N: 8195), // Space character width of "m"
|
|
(E: 'ensp'; Ch: #$E2#$80#$82; N: 8194), // Space character width of "n"
|
|
(E: 'Epsilon';Ch: 'Ε'; N: 917), // capital epsilon
|
|
(E: 'epsilon';Ch: 'ε'; N: 949),
|
|
(E: 'equiv'; Ch: '≡'; N: 8801), // 60
|
|
(E: 'Eta'; Ch: 'Η'; N: 919),
|
|
(E: 'eta'; Ch: 'η'; N: 951),
|
|
(E: 'ETH'; Ch: 'Ð'; N: 208),
|
|
(E: 'eth'; Ch: 'ð'; N: 240),
|
|
(E: 'Euml'; Ch: 'Ë'; N: 203),
|
|
(E: 'euml'; Ch: 'ë'; N: 235),
|
|
(E: 'euro'; Ch: '€'; N: 8364),
|
|
(E: 'exist'; Ch: '∃'; N: 8707),
|
|
// F
|
|
(E: 'fnof'; Ch: 'ƒ'; N: 402), // 70
|
|
(E: 'forall'; Ch: '∀'; N: 8704),
|
|
(E: 'frac12'; Ch: '½'; N: 189),
|
|
(E: 'frac14'; Ch: '¼'; N: 188),
|
|
(E: 'frac34'; Ch: '¾'; N: 190),
|
|
(E: 'frasl'; Ch: '⁄'; N: 8260),
|
|
// G
|
|
(E: 'Gamma'; Ch: 'Γ'; N: 915),
|
|
(E: 'gamma'; Ch: 'γ'; N: 947),
|
|
(E: 'ge'; Ch: '≥'; N: 8805),
|
|
(E: 'gt'; Ch: '>'; N: 62),
|
|
// H
|
|
(E: 'hArr'; Ch: '⇔'; N: 8660), // 80, wide horizontal double arrow
|
|
(E: 'harr'; Ch: '↔'; N: 8596), // narrow horizontal double arrow
|
|
(E: 'hearts'; Ch: '♥'; N: 9829),
|
|
(E: 'hellip'; Ch: '…'; N: 8230),
|
|
// I
|
|
(E: 'Iacute'; Ch: 'Í'; N: 205),
|
|
(E: 'iacute'; Ch: 'í'; N: 237),
|
|
(E: 'Icirc'; Ch: 'Î'; N: 206),
|
|
(E: 'icirc'; Ch: 'î'; N: 238),
|
|
(E: 'iexcl'; Ch: '¡'; N: 161),
|
|
(E: 'Igrave'; Ch: 'Ì'; N: 204),
|
|
(E: 'igrave'; Ch: 'ì'; N: 236), // 90
|
|
(E: 'image'; Ch: #$E2#$84#$91; N: 2465), // I in factura
|
|
(E: 'infin'; Ch: '∞'; N: 8734),
|
|
(E: 'int'; Ch: '∫'; N: 8747),
|
|
(E: 'Iota'; Ch: 'Ι'; N: 921),
|
|
(E: 'iota'; Ch: 'ι'; N: 953),
|
|
(E: 'iquest'; Ch: '¿'; N: 191),
|
|
(E: 'isin'; Ch: '∈'; N: 8712),
|
|
(E: 'Iuml'; Ch: 'Ï'; N: 207),
|
|
(E: 'iuml'; Ch: 'ï'; N: 239),
|
|
// K
|
|
(E: 'Kappa'; Ch: 'Κ'; N: 922), // 100
|
|
(E: 'kappa'; Ch: 'κ'; N: 254),
|
|
// L
|
|
(E: 'Lambda'; Ch: 'Λ'; N: 923),
|
|
(E: 'lambda'; Ch: 'λ'; N: 955),
|
|
(E: 'lang'; Ch: '⟨'; N: 9001), // Left-pointing angle bracket
|
|
(E: 'laquo'; Ch: '«'; N: 171),
|
|
(E: 'lArr'; Ch: #$E2#$87#$90; N: 8656), // Left-pointing wide arrow
|
|
(E: 'larr'; Ch: '←'; N: 8592),
|
|
(E: 'lceil'; Ch: '⌈'; N: 8968), // Left ceiling
|
|
(E: 'ldquo'; Ch: '“'; N: 8220),
|
|
(E: 'le'; Ch: '≤'; N: 8804), // 110
|
|
(E: 'lfloor'; Ch: '⌊'; N: 8970), // Left floor
|
|
(E: 'lowast'; Ch: #$e2#$88#$97; N: 8727), // Low asterisk
|
|
(E: 'loz'; Ch: '◊'; N: 9674),
|
|
(E: 'lrm'; Ch: ''; N: 8206), // Left-to-right mark
|
|
(E: 'lsaquo'; Ch: '‹'; N: 8249),
|
|
(E: 'lsquo'; Ch: '‘'; N: 8216),
|
|
(E: 'lt'; Ch: '<'; N: 60),
|
|
// M
|
|
(E: 'macr'; Ch: '¯'; N: 175),
|
|
(E: 'mdash'; Ch: '—'; N: 8212),
|
|
(E: 'micro'; Ch: 'µ'; N: 181), // 120
|
|
(E: 'middot'; Ch: '·'; N: 183),
|
|
(E: 'minus'; Ch: '−'; N: 8722),
|
|
(E: 'Mu'; Ch: 'Μ'; N: 924),
|
|
(E: 'mu'; Ch: 'μ'; N: 956),
|
|
// N
|
|
(E: 'nabla'; Ch: '∇'; N: 8711),
|
|
(E: 'nbsp'; Ch: ' '; N: 160), // 126
|
|
(E: 'ndash'; Ch: '–'; N: 8211),
|
|
(E: 'ne'; Ch: '≠'; N: 8800),
|
|
(E: 'ni'; Ch: '∋'; N: 8715),
|
|
(E: 'not'; Ch: '¬'; N: 172), // 130
|
|
(E: 'notin'; Ch: #$e2#$88#$89; N: 8713), // math: "not in"
|
|
(E: 'nsub'; Ch: #$e2#$8a#$84; N: 8836), // math: "not a subset of"
|
|
(E: 'Ntilde'; Ch: 'Ñ'; N: 209),
|
|
(E: 'ntilde'; Ch: 'ñ'; N: 241),
|
|
(E: 'Nu'; Ch: 'Ν'; N: 925),
|
|
(E: 'nu'; Ch: 'ν'; N: 957),
|
|
// O
|
|
(E: 'Oacute'; Ch: 'Ó'; N: 211),
|
|
(E: 'oacute'; Ch: 'ó'; N: 243),
|
|
(E: 'Ocirc'; Ch: 'Ô'; N: 212),
|
|
(E: 'ocirc'; Ch: 'ô'; N: 244),
|
|
(E: 'OElig'; Ch: 'Œ'; N: 338),
|
|
(E: 'oelig'; Ch: 'œ'; N: 339),
|
|
(E: 'Ograve'; Ch: 'Ò'; N: 210),
|
|
(E: 'ograve'; Ch: 'ò'; N: 242),
|
|
(E: 'oline'; Ch: '‾'; N: 8254),
|
|
(E: 'Omega'; Ch: 'Ω'; N: 937),
|
|
(E: 'omega'; Ch: 'ω'; N: 969),
|
|
(E: 'Omicron';Ch: 'Ο'; N: 927),
|
|
(E: 'omicron';Ch: 'ο'; N: 959),
|
|
(E: 'oplus'; Ch: #$e2#$8a#$95; N: 8853), // Circled plus
|
|
(E: 'or'; Ch: '∨'; N: 8744),
|
|
(E: 'ordf'; Ch: 'ª'; N: 170),
|
|
(E: 'ordm'; Ch: 'º'; N: 186),
|
|
(E: 'Oslash'; Ch: 'Ø'; N: 216),
|
|
(E: 'oslash'; Ch: 'ø'; N: 248),
|
|
(E: 'Otilde'; Ch: 'Õ'; N: 213),
|
|
(E: 'otilde'; Ch: 'õ'; N: 245),
|
|
(E: 'otimes'; Ch: #$E2#$8A#$97; N: 8855), // Circled times
|
|
(E: 'Ouml'; Ch: 'Ö'; N: 214),
|
|
(E: 'ouml'; Ch: 'ö'; N: 246),
|
|
// P
|
|
(E: 'para'; Ch: '¶'; N: 182),
|
|
(E: 'part'; Ch: '∂'; N: 8706),
|
|
(E: 'permil'; Ch: '‰'; N: 8240),
|
|
(E: 'perp'; Ch: '⊥'; N: 8869),
|
|
(E: 'Phi'; Ch: 'Φ'; N: 934),
|
|
(E: 'phi'; Ch: 'φ'; N: 966),
|
|
(E: 'Pi'; Ch: 'Π'; N: 928),
|
|
(E: 'pi'; Ch: 'π'; N: 960), // lower-case pi
|
|
(E: 'piv'; Ch: 'ϖ'; N: 982),
|
|
(E: 'plusmn'; Ch: '±'; N: 177),
|
|
(E: 'pound'; Ch: '£'; N: 163),
|
|
(E: 'Prime'; Ch: '″'; N: 8243),
|
|
(E: 'prime'; Ch: '′'; N: 8242),
|
|
(E: 'prod'; Ch: '∏'; N: 8719),
|
|
(E: 'prop'; Ch: '∝'; N: 8733),
|
|
(E: 'Psi'; Ch: 'Ψ'; N: 936),
|
|
(E: 'psi'; Ch: 'ψ'; N: 968),
|
|
// Q
|
|
(E: 'quot'; Ch: '"'; N: 34),
|
|
// R
|
|
(E: 'radic'; Ch: '√'; N: 8730),
|
|
(E: 'rang'; Ch: '⟩'; N: 9002), // right-pointing angle bracket
|
|
(E: 'raquo'; Ch: '»'; N: 187),
|
|
(E: 'rArr'; Ch: '⇒'; N: 8658),
|
|
(E: 'rarr'; Ch: '→'; N: 8594),
|
|
(E: 'rceil'; Ch: '⌉'; N: 8969), // right ceiling
|
|
(E: 'rdquo'; Ch: '”'; N: 8221),
|
|
(E: 'real'; Ch: #$E2#$84#$9C; N: 8476), // R in factura
|
|
(E: 'reg'; Ch: '®'; N: 174),
|
|
(E: 'rfloor'; Ch: '⌋'; N: 8971), // Right floor
|
|
(E: 'Rho'; Ch: 'Ρ'; N: 929),
|
|
(E: 'rho'; Ch: 'ρ'; N: 961),
|
|
(E: 'rlm'; Ch: ''; N: 8207), // right-to-left mark
|
|
(E: 'rsaquo'; Ch: '›'; N: 8250),
|
|
(E: 'rsquo'; Ch: '’'; N: 8217),
|
|
|
|
// S
|
|
(E: 'sbquo'; Ch: '‚'; N: 8218),
|
|
(E: 'Scaron'; Ch: 'Š'; N: 352),
|
|
(E: 'scaron'; Ch: 'š'; N: 353),
|
|
(E: 'sdot'; Ch: #$E2#$8B#$85; N: 8901), // math: dot operator
|
|
(E: 'sect'; Ch: '§'; N: 167),
|
|
(E: 'shy'; Ch: #$C2#$AD; N: 173), // conditional hyphen
|
|
(E: 'Sigma'; Ch: 'Σ'; N: 931),
|
|
(E: 'sigma'; Ch: 'σ'; N: 963),
|
|
(E: 'sigmaf'; Ch: 'ς'; N: 962),
|
|
(E: 'sim'; Ch: #$E2#$88#$BC; N: 8764), // similar
|
|
(E: 'spades'; Ch: '♠'; N: 9824),
|
|
(E: 'sub'; Ch: '⊂'; N: 8834),
|
|
(E: 'sube'; Ch: '⊆'; N: 8838),
|
|
(E: 'sum'; Ch: '∑'; N: 8721),
|
|
(E: 'sup'; Ch: '⊃'; N: 8835),
|
|
(E: 'sup1'; Ch: '¹'; N: 185),
|
|
(E: 'sup2'; Ch: '²'; N: 178),
|
|
(E: 'sup3'; Ch: '³'; N: 179),
|
|
(E: 'supe'; Ch: '⊇'; N: 8839),
|
|
(E: 'szlig'; Ch: 'ß'; N: 223),
|
|
//T
|
|
(E: 'Tau'; Ch: 'Τ'; N: 932),
|
|
(E: 'tau'; Ch: 'τ'; N: 964),
|
|
(E: 'there4'; Ch: '∴'; N: 8756),
|
|
(E: 'Theta'; Ch: 'Θ'; N: 920),
|
|
(E: 'theta'; Ch: 'θ'; N: 952),
|
|
(E: 'thetasym';Ch: 'ϑ'; N: 977),
|
|
(E: 'thinsp'; Ch: #$E2#$80#$89; N: 8201), // thin space
|
|
(E: 'THORN'; Ch: 'Þ'; N: 222),
|
|
(E: 'thorn'; Ch: 'þ'; N: 254),
|
|
(E: 'tilde'; Ch: '˜'; N: 732),
|
|
(E: 'times'; Ch: '×'; N: 215),
|
|
(E: 'trade'; Ch: '™'; N: 8482),
|
|
// U
|
|
(E: 'Uacute'; Ch: 'Ú'; N: 218),
|
|
(E: 'uacute'; Ch: 'ú'; N: 250),
|
|
(E: 'uArr'; Ch: #$E2#$87#$91; N: 8657), // wide up-arrow
|
|
(E: 'uarr'; Ch: '↑'; N: 8593),
|
|
(E: 'Ucirc'; Ch: 'Û'; N: 219),
|
|
(E: 'ucirc'; Ch: 'û'; N: 251),
|
|
(E: 'Ugrave'; Ch: 'Ù'; N: 217),
|
|
(E: 'ugrave'; Ch: 'ù'; N: 249),
|
|
(E: 'uml'; Ch: '¨'; N: 168),
|
|
(E: 'upsih'; Ch: 'ϒ'; N: 978),
|
|
(E: 'Upsilon';Ch: 'Υ'; N: 933),
|
|
(E: 'upsilon';Ch: 'υ'; N: 965),
|
|
(E: 'Uuml'; Ch: 'Ü'; N: 220),
|
|
(E: 'uuml'; Ch: 'ü'; N: 252),
|
|
// W
|
|
(E: 'weierp'; Ch: #$E2#$84#$98; N: 8472), // Script Capital P; Weierstrass Elliptic Function
|
|
// X
|
|
(E: 'Xi'; Ch: 'Ξ'; N: 926),
|
|
(E: 'xi'; Ch: 'ξ'; N: 958),
|
|
// Y
|
|
(E: 'Yacute'; Ch: 'Ý'; N: 221),
|
|
(E: 'yacute'; Ch: 'ý'; N: 253),
|
|
(E: 'yen'; Ch: '¥'; N: 165),
|
|
(E: 'Yuml'; Ch: 'Ÿ'; N: 376),
|
|
(E: 'yuml'; Ch: 'ÿ'; N: 255),
|
|
// Z
|
|
(E: 'Zeta'; Ch: 'Ζ'; N: 918),
|
|
(E: 'zeta'; Ch: 'ζ'; N: 950),
|
|
(E: 'zwj'; Ch: ''; N: 8205), // Zero-width joiner
|
|
(E: 'zwnj'; Ch: ''; N: 8204) // Zero-width non-joiner
|
|
);
|
|
|
|
function IsHTMLEntity(AText: PChar; out AEntity: TsHTMLEntity): Boolean;
|
|
|
|
function Compare(s: String): Boolean;
|
|
var
|
|
j: Integer;
|
|
begin
|
|
Result := false;
|
|
for j:=1 to Length(s) do
|
|
if s[j] <> PChar(AText)[j-1] then
|
|
exit;
|
|
if PChar(AText)[Length(s)] <> ';' then
|
|
exit;
|
|
Result := true;
|
|
end;
|
|
|
|
var
|
|
k: Integer;
|
|
equ: Boolean;
|
|
ch1: Char;
|
|
P: PChar;
|
|
|
|
begin
|
|
Result := false;
|
|
for k:=0 to High(HTMLEntities) do
|
|
begin
|
|
equ := Compare(HTMLEntities[k].E);
|
|
if not equ then
|
|
begin
|
|
P := AText;
|
|
ch1 := P^;
|
|
if ch1 = '#' then
|
|
begin
|
|
inc(P);
|
|
if ch1 = 'x' then
|
|
equ := Compare(Format('#x%x', [HTMLEntities[k].N]))
|
|
else
|
|
equ := Compare(Format('#%d', [HTMLEntities[k].N]));
|
|
end;
|
|
end;
|
|
if equ then
|
|
begin
|
|
AEntity := HTMLEntities[k];
|
|
Result := true;
|
|
exit;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
function CleanHTMLString(AText: String): String;
|
|
var
|
|
ent: TsHTMLEntity;
|
|
P: PChar;
|
|
ch: Char;
|
|
hasStartSpace, hasEndSpace: Boolean;
|
|
begin
|
|
Result := '';
|
|
|
|
// Remove leading and trailing spaces and line endings coming from formatted
|
|
// source lines. Retain 1 single space, at the end even without spaces found.
|
|
// No idea if this is 100% correct - at least, looks good.
|
|
hasStartSpace := false;
|
|
while (Length(AText) > 0) and (AText[1] in [#9, #13, #10, ' ']) do
|
|
begin
|
|
if AText[1] = ' ' then hasStartSpace := true; // A leading space will be added later
|
|
Delete(AText, 1, 1);
|
|
end;
|
|
|
|
hasEndSpace := false;
|
|
while (Length(AText) > 0) and (AText[Length(AText)] in [#9, #10, #13, ' ']) do
|
|
begin
|
|
hasEndSpace := true; // A trailing space will be added later
|
|
Delete(AText, Length(AText), 1);
|
|
end;
|
|
|
|
if AText = '' then
|
|
begin
|
|
if hasStartSpace or hasEndSpace then Result := ' ';
|
|
exit;
|
|
end;
|
|
|
|
// Replace HTML entities by their counter part UTF8 characters
|
|
P := @AText[1];
|
|
while (P^ <> #0) do begin
|
|
ch := P^;
|
|
case ch of
|
|
' ': begin
|
|
// collapse multiple spaces to a single space (HTML spec)
|
|
// http://stackoverflow.com/questions/24615355/browser-white-space-rendering
|
|
Result := Result + ' ';
|
|
inc(P);
|
|
while (P^ = ' ') do inc(P);
|
|
dec(P);
|
|
end;
|
|
'&': begin
|
|
inc(P);
|
|
if (P <> nil) and IsHTMLEntity(P, ent) then
|
|
begin
|
|
Result := Result + ent.Ch;
|
|
inc(P, Length(ent.E));
|
|
end else
|
|
begin
|
|
Result := Result + '&';
|
|
Continue;
|
|
end;
|
|
end;
|
|
else
|
|
Result := Result + ch;
|
|
end;
|
|
inc(P);
|
|
end;
|
|
|
|
// Add leading and trailing spaces from above.
|
|
if hasStartSpace then Result := ' ' + Result;
|
|
if hasEndSpace then Result := Result + ' ';
|
|
end;
|
|
|
|
function RemoveHTMLEntities(const AText: String): String;
|
|
var
|
|
ent: TsHTMLEntity;
|
|
P: PChar;
|
|
ch: AnsiChar;
|
|
begin
|
|
Result := '';
|
|
P := @AText[1];
|
|
while (P^ <> #0) do begin
|
|
ch := P^;
|
|
case ch of
|
|
'&': begin
|
|
inc(P);
|
|
if (P <> nil) and IsHTMLEntity(P, ent) then
|
|
begin
|
|
Result := Result + ent.Ch;
|
|
inc(P, Length(ent.E));
|
|
end else
|
|
begin
|
|
Result := Result + '&';
|
|
Continue;
|
|
end;
|
|
end;
|
|
else Result := Result + ch;
|
|
end;
|
|
inc(P);
|
|
end;
|
|
end;
|
|
|
|
{==============================================================================}
|
|
{ TsHTMLAttr }
|
|
{==============================================================================}
|
|
|
|
constructor TsHTMLAttr.Create(AName, AValue: String);
|
|
begin
|
|
Name := AName;
|
|
Value := AValue;
|
|
end;
|
|
|
|
|
|
{==============================================================================}
|
|
{ TsHTMLAttrList }
|
|
{==============================================================================}
|
|
|
|
function TsHTMLAttrList.GetItem(AIndex: Integer): TsHTMLAttr;
|
|
begin
|
|
Result := TsHTMLAttr(inherited GetItem(AIndex));
|
|
end;
|
|
|
|
function TsHTMLAttrList.IndexOfName(AName: String): Integer;
|
|
begin
|
|
AName := Lowercase(AName);
|
|
for Result := 0 to Count-1 do
|
|
if GetItem(Result).Name = AName then
|
|
exit;
|
|
Result := -1;
|
|
end;
|
|
|
|
{ AHTML is a HTML string beginning with a < tag. Seeks the first space to split
|
|
off the HTML tag. Then seeks for = and " characters to extract the attributes
|
|
which are split into name/value pairs at the = character. The value part is
|
|
unquoted. }
|
|
procedure TsHTMLAttrList.Parse(AHTML: String);
|
|
var
|
|
i: Integer;
|
|
len: Integer;
|
|
value, nam: String;
|
|
begin
|
|
Clear;
|
|
if (AHTML[1] <> '<') then // just for simplification
|
|
raise Exception.Create('[THTMLAttrList.Parse] HTML tags expected.');
|
|
|
|
// Find first space
|
|
i := 1;
|
|
len := Length(AHTML);
|
|
while (i <= len) and (AHTML[i] <> ' ') do inc(i);
|
|
|
|
// Parse attribute string
|
|
nam := '';
|
|
while (i <= len) do
|
|
begin
|
|
case AHTML[i] of
|
|
'=': begin
|
|
inc(i);
|
|
value := '';
|
|
if AHTML[i] = '"' then
|
|
begin
|
|
inc(i); // skip the initial '"'
|
|
while AHTML[i] <> '"' do
|
|
begin
|
|
value := value + AHTML[i];
|
|
inc(i);
|
|
end;
|
|
inc(i); // skip the final '"'
|
|
end else
|
|
while not (AHTML[i] in [' ', '>', '/']) do
|
|
begin
|
|
value := value + AHTML[i];
|
|
inc(i);
|
|
end;
|
|
Add(TsHTMLAttr.Create(lowercase(trim(nam)), trim(value)));
|
|
nam := '';
|
|
end;
|
|
' ', '/', '>': ;
|
|
else nam := nam + AHTML[i];
|
|
end;
|
|
inc(i);
|
|
end;
|
|
|
|
i := IndexOfName('style');
|
|
if i > -1 then ParseStyle(Items[i].Value);
|
|
end;
|
|
|
|
{ AStyle is the value part of a 'style="...."' HTML string. Splits the into
|
|
individual records at the semicolons (;) and into name-value pairs at the
|
|
colon (:). Adds the name-value pairs to the list. }
|
|
procedure TsHTMLAttrList.ParseStyle(AStyle: String);
|
|
var
|
|
i, len: Integer;
|
|
value, nam: String;
|
|
begin
|
|
i := 1;
|
|
len := Length(AStyle);
|
|
|
|
// skip white space
|
|
while (i <= len) and (AStyle[i] = ' ') do inc(i);
|
|
|
|
// iterate through string
|
|
nam := '';
|
|
while (i <= len) do
|
|
begin
|
|
case AStyle[i] of
|
|
':': begin // name-value separator
|
|
inc(i); // skip ':' ...
|
|
while (i <= len) and (AStyle[i] = ' ') do inc(i); // ... and white space
|
|
value := '';
|
|
while (i <= len) and (AStyle[i] <> ';') do
|
|
begin
|
|
value := value + AStyle[i];
|
|
inc(i);
|
|
end;
|
|
Add(TsHTMLAttr.Create(lowercase(trim(nam)), UnquoteStr(trim(value))));
|
|
nam := '';
|
|
end;
|
|
' ': ; // skip white space
|
|
else nam := nam + AStyle[i];
|
|
end;
|
|
inc(i);
|
|
end;
|
|
end;
|
|
|
|
procedure TsHTMLAttrList.SetItem(AIndex: Integer; AValue: TsHTMLAttr);
|
|
begin
|
|
inherited SetItem(AIndex, AValue);
|
|
end;
|
|
|
|
|
|
{==============================================================================}
|
|
{ HTML-to-Rich-text conversion }
|
|
{==============================================================================}
|
|
type
|
|
TsHTMLAnalyzer = class(THTMLParser)
|
|
private
|
|
FWorkbook: TsWorkbook;
|
|
FPlainText: String;
|
|
FRichTextParams: TsRichTextParams;
|
|
FAttrList: TsHTMLAttrList;
|
|
FFontStack: TsIntegerStack;
|
|
FCurrFont: TsFont;
|
|
FPointSeparatorSettings: TFormatSettings;
|
|
FPreserveSpaces: Boolean;
|
|
function AddFont(AFont: TsFont): Integer;
|
|
procedure AddRichTextParam(AFont: TsFont; AHyperlinkIndex: Integer = -1);
|
|
procedure ProcessFontRestore;
|
|
procedure ReadFont(AFont: TsFont);
|
|
procedure TagFoundHandler(NoCaseTag, ActualTag: string);
|
|
procedure TextFoundHandler(AText: string);
|
|
public
|
|
constructor Create(AWorkbook: TsWorkbook; AFont: TsFont; AText: String);
|
|
destructor Destroy; override;
|
|
property PlainText: String read FPlainText;
|
|
property RichTextParams: TsRichTextParams read FRichTextParams;
|
|
property PreserveSpaces: Boolean read FPreserveSpaces write FPreserveSpaces;
|
|
end;
|
|
|
|
constructor TsHTMLAnalyzer.Create(AWorkbook: TsWorkbook; AFont: TsFont;
|
|
AText: String);
|
|
begin
|
|
if AWorkbook = nil then
|
|
raise Exception.Create('[TsHTMLAnalyzer.Create] Workbook required.');
|
|
if AFont = nil then
|
|
raise Exception.Create('[TsHTMLAnalyzer.Create] Font required.');
|
|
|
|
inherited Create(AText);
|
|
FWorkbook := AWorkbook;
|
|
|
|
OnFoundTag := @TagFoundHandler;
|
|
OnFoundText := @TextFoundHandler;
|
|
|
|
FPlainText := '';
|
|
SetLength(FRichTextParams, 0);
|
|
|
|
FAttrList := TsHTMLAttrList.Create;
|
|
FCurrFont := TsFont.Create;
|
|
FCurrFont.CopyOf(AFont);
|
|
|
|
FFontStack := TsIntegerStack.Create;
|
|
|
|
FPointSeparatorSettings := DefaultFormatSettings;
|
|
FPointSeparatorSettings.DecimalSeparator := '.';
|
|
end;
|
|
|
|
destructor TsHTMLAnalyzer.Destroy;
|
|
begin
|
|
FreeAndNil(FFontStack);
|
|
FreeAndNil(FCurrFont);
|
|
FreeAndNil(FAttrList);
|
|
inherited Destroy;
|
|
end;
|
|
|
|
{ Stores a font in the workbook's font list. Does not allow duplicates. }
|
|
function TsHTMLAnalyzer.AddFont(AFont: TsFont): Integer;
|
|
var
|
|
fnt: TsFont;
|
|
begin
|
|
// Is the font already stored in the workbook's font list?
|
|
Result := FWorkbook.FindFont(AFont.FontName, AFont.Size, AFont.Style, AFont.Color, AFont.Position);
|
|
if Result = -1 then
|
|
begin
|
|
// No. Create a new font, add it to the list, and return the new index.
|
|
fnt := TsFont.Create;
|
|
fnt.CopyOf(AFont);
|
|
Result := FWorkbook.AddFont(fnt);
|
|
end;
|
|
end;
|
|
|
|
procedure TsHTMLAnalyzer.AddRichTextParam(AFont: TsFont;
|
|
AHyperlinkIndex: Integer = -1);
|
|
var
|
|
len: Integer;
|
|
fntIndex: Integer;
|
|
n: Integer;
|
|
begin
|
|
n := Length(FRichTextParams);
|
|
len := UTF8Length(FPlainText);
|
|
fntIndex := AddFont(AFont);
|
|
if (n > 0) and (FRichTextparams[n-1].FirstIndex = len+1) then
|
|
begin
|
|
// Avoid adding another rich-text parameter for the same text location:
|
|
// Update the previous one
|
|
FRichTextParams[n-1].FontIndex := fntIndex;
|
|
FRichTextParams[n-1].HyperlinkIndex := AHyperlinkIndex;
|
|
end else
|
|
begin
|
|
// Add a new rich-text parameter
|
|
SetLength(FRichTextParams, n+1);
|
|
FRichTextParams[n].FirstIndex := len + 1;
|
|
FRichTextParams[n].FontIndex := fntIndex;
|
|
FRichTextParams[n].HyperlinkIndex := AHyperlinkIndex;
|
|
end;
|
|
end;
|
|
|
|
procedure TsHTMLAnalyzer.ProcessFontRestore;
|
|
var
|
|
fntIndex: Integer;
|
|
begin
|
|
fntIndex := FFontStack.Pop;
|
|
if fntIndex > -1 then
|
|
begin
|
|
FCurrFont.CopyOf(FWorkbook.GetFont(fntIndex));
|
|
AddRichTextParam(FCurrFont);
|
|
end;
|
|
end;
|
|
|
|
procedure TsHTMLAnalyzer.ReadFont(AFont: TsFont);
|
|
const
|
|
FACTOR = 1.2;
|
|
MIN_FONTSIZE = 6;
|
|
var
|
|
idx: Integer;
|
|
L: TStringList;
|
|
i, ip, im: Integer;
|
|
s: String;
|
|
f: Double;
|
|
defFntSize: Single;
|
|
begin
|
|
idx := FAttrList.IndexOfName('font-family'); // style tag
|
|
if idx = -1 then
|
|
idx := FAttrList.IndexOfName('face'); // html tag
|
|
if idx > -1 then begin
|
|
L := TStringList.Create;
|
|
try
|
|
L.StrictDelimiter := true;
|
|
L.DelimitedText := FAttrList[idx].Value;
|
|
AFont.FontName := L[0];
|
|
finally
|
|
L.Free;
|
|
end;
|
|
end;
|
|
|
|
idx := FAttrList.IndexOfName('font-size');
|
|
if idx = -1 then
|
|
idx := FAttrList.IndexOfName('size');
|
|
if idx > -1 then begin
|
|
defFntSize := FWorkbook.GetDefaultFont.Size;
|
|
s := FAttrList[idx].Value;
|
|
case s of
|
|
'medium', '3' : AFont.Size := defFntSize;
|
|
'large', '4' : AFont.Size := defFntSize*FACTOR;
|
|
'x-large', '5' : AFont.Size := defFntSize*FACTOR*FACTOR;
|
|
'xx-large', '6' : AFont.Size := defFntSize*FACTOR*FACTOR*FACTOR;
|
|
'small', '2' : AFont.Size := Max(MIN_FONTSIZE, defFntSize/FACTOR);
|
|
'x-small' : AFont.Size := Max(MIN_FONTSIZE, defFntSize/FACTOR/FACTOR);
|
|
'xx-small', '1' : AFont.Size := Max(MIN_FONTSIZE, defFntSize/FACTOR/FACTOR/FACTOR);
|
|
'larger' : AFont.Size := AFont.Size * FACTOR;
|
|
'smaller' : AFont.Size := Max(MIN_FONTSIZE, AFont.Size / FACTOR);
|
|
else
|
|
if s[1] in ['+', '-'] then
|
|
begin
|
|
TryStrToInt(s, i);
|
|
AFont.Size := defFntSize * IntPower(FACTOR, i);
|
|
end else
|
|
begin
|
|
i := 0;
|
|
im := 0;
|
|
ip := pos('%', s);
|
|
if ip = 0 then begin
|
|
im := pos('rem', s);
|
|
if im = 0 then
|
|
im := pos('em', s);
|
|
end;
|
|
if (ip > 0) then i := ip else
|
|
if (im > 0) then i := im;
|
|
if i > 0 then
|
|
begin
|
|
s := copy(s, 1, i-1);
|
|
if TryStrToFloat(s, f, FPointSeparatorSettings) then
|
|
begin
|
|
if ip > 0 then f := f * 0.01;
|
|
AFont.Size := Max(MIN_FONTSIZE, abs(f) * defFntSize);
|
|
end;
|
|
end else
|
|
AFont.Size := Max(MIN_FONTSIZE, HTMLLengthStrToPts(s));
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
idx := FAttrList.IndexOfName('font-style');
|
|
if idx > -1 then
|
|
case FAttrList[idx].Value of
|
|
'normal' : Exclude(AFont.Style, fssItalic);
|
|
'italic' : Include(AFont.Style, fssItalic);
|
|
'oblique' : Include(AFont.Style, fssItalic);
|
|
end;
|
|
|
|
idx := FAttrList.IndexOfName('font-weight');
|
|
if idx > -1 then
|
|
begin
|
|
s := FAttrList[idx].Value;
|
|
if TryStrToInt(s, i) and (i >= 700) then Include(AFont.Style, fssBold);
|
|
end;
|
|
|
|
idx := FAttrList.IndexOfName('text-decoration');
|
|
if idx > -1 then
|
|
begin
|
|
s := FAttrList[idx].Value;
|
|
if pos('underline', s) <> 0 then Include(AFont.Style, fssUnderline);
|
|
if pos('line-through', s) <> 0 then Include(AFont.Style, fssStrikeout);
|
|
end;
|
|
|
|
idx := FAttrList.IndexOfName('color');
|
|
if idx > -1 then
|
|
AFont.Color := HTMLColorStrToColor(FAttrList[idx].Value);
|
|
end;
|
|
|
|
procedure TsHTMLAnalyzer.TagFoundHandler(NoCaseTag, ActualTag: String);
|
|
begin
|
|
case NoCaseTag[2] of
|
|
'B': case NoCaseTag of
|
|
'<B>' : begin
|
|
FFontStack.Push(AddFont(FCurrFont));
|
|
Include(FCurrFont.Style, fssBold);
|
|
AddRichTextParam(FCurrFont);
|
|
end;
|
|
'<BR>',
|
|
'<BR/>': FPlainText := FPlainText + FPS_LINE_ENDING;
|
|
else if (pos('<BR ', NoCaseTag) = 1) then
|
|
FPlainText := FPlainText + FPS_LINE_ENDING;
|
|
end;
|
|
'D': if (NoCaseTag = '<DEL>') then
|
|
begin
|
|
FFontStack.Push(AddFont(FCurrFont));
|
|
Include(FCurrFont.Style, fssStrikeout);
|
|
AddRichTextParam(FCurrFont);
|
|
end;
|
|
'E': if (NoCaseTag = '<EM>') then
|
|
begin
|
|
FFontStack.Push(AddFont(FCurrFont));
|
|
Include(FCurrFont.Style, fssItalic);
|
|
AddRichTextParam(FCurrFont);
|
|
end;
|
|
'F': if (pos('<FONT ', NoCaseTag) = 1) then
|
|
begin
|
|
FFontStack.Push(AddFont(FCurrFont));
|
|
FAttrList.Parse(ActualTag);
|
|
ReadFont(FCurrFont);
|
|
AddRichTextparam(FCurrFont);
|
|
end;
|
|
'I': case NoCaseTag of
|
|
'<I>' : begin
|
|
FFontStack.Push(AddFont(FCurrFont));
|
|
Include(FCurrFont.Style, fssItalic);
|
|
AddRichTextParam(FCurrFont);
|
|
end;
|
|
'<INS>': begin
|
|
FFontStack.Push(AddFont(FCurrFont));
|
|
Include(FCurrFont.Style, fssUnderline);
|
|
AddRichTextParam(FCurrFont);
|
|
end;
|
|
end;
|
|
'S': case NoCaseTag of
|
|
'<S>' : begin
|
|
FFontStack.Push(AddFont(FCurrFont));
|
|
Include(FCurrFont.Style, fssStrikeout);
|
|
AddRichTextParam(FCurrFont);
|
|
end;
|
|
'<STRONG>':begin
|
|
FFontStack.Push(AddFont(FCurrFont));
|
|
Include(FCurrFont.Style, fssBold);
|
|
AddRichTextParam(FCurrFont);
|
|
end;
|
|
'<SUB>': begin
|
|
FFontStack.Push(AddFont(FCurrFont));
|
|
FCurrFont.Position := fpSubscript;
|
|
AddRichTextParam(FCurrFont);
|
|
end;
|
|
'<SUP>': begin
|
|
FFontStack.Push(AddFont(FCurrFont));
|
|
FCurrFont.Position := fpSuperscript;
|
|
AddRichTextParam(FCurrFont);
|
|
end;
|
|
end;
|
|
'U': if (NoCaseTag = '<U>') then
|
|
begin
|
|
FFontStack.Push(AddFont(FCurrFont));
|
|
Include(FCurrFont.Style, fssUnderline);
|
|
AddRichTextParam(FCurrFont);
|
|
end;
|
|
'/': case NoCaseTag[3] of
|
|
'B': if (NoCaseTag) = '</B>' then ProcessFontRestore;
|
|
'D': if (NoCaseTag) = '</DEL>' then ProcessFontRestore;
|
|
'E': if (NoCaseTag) = '</EM>' then ProcessFontRestore;
|
|
'F': if (NoCaseTag) = '</FONT>' then ProcessFontRestore;
|
|
'I': if (NoCaseTag = '</I>') or (NoCaseTag = '</INS>') then ProcessFontRestore;
|
|
'S': if (NoCaseTag = '</S>') or (NoCaseTag = '</STRONG>') or
|
|
(NoCaseTag = '</SUB>') or (NoCaseTag = '</SUP>') then ProcessFontRestore;
|
|
'U': if (NoCaseTag = '</U>') then ProcessFontRestore;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure TsHTMLAnalyzer.TextFoundHandler(AText: String);
|
|
begin
|
|
if not FPreserveSpaces then
|
|
AText := CleanHTMLString(AText) else
|
|
AText := RemoveHTMLEntities(AText);
|
|
if AText <> '' then
|
|
begin
|
|
if FPlainText = '' then
|
|
FPlainText := AText
|
|
else
|
|
FPlainText := FPlainText + AText;
|
|
end;
|
|
end;
|
|
|
|
|
|
{@@ ----------------------------------------------------------------------------
|
|
Extracts rich-text parameters out of an html-formatted string and returns the
|
|
plain text
|
|
-------------------------------------------------------------------------------}
|
|
procedure HTMLToRichText(AWorkbook: TsWorkbook; AFont: TsFont;
|
|
const AHTMLText: String; out APlainText: String;
|
|
out ARichTextParams: TsRichTextParams);
|
|
var
|
|
analyzer: TsHTMLAnalyzer;
|
|
j: Integer;
|
|
begin
|
|
analyzer := TsHTMLAnalyzer.Create(AWorkbook, AFont, AHTMLText + '<end>');
|
|
try
|
|
analyzer.PreserveSpaces := true;
|
|
analyzer.Exec;
|
|
APlainText := analyzer.PlainText;
|
|
SetLength(ARichTextParams, Length(analyzer.RichTextParams));
|
|
for j:=0 to High(ARichTextParams) do
|
|
ARichTextParams[j] := analyzer.RichTextParams[j];
|
|
finally
|
|
analyzer.Free;
|
|
end;
|
|
end;
|
|
|
|
|
|
{==============================================================================}
|
|
{ Rich-text-to-HTML conversion }
|
|
{==============================================================================}
|
|
|
|
type
|
|
TsHTMLComposer = class
|
|
private
|
|
FPointSeparatorSettings: TFormatSettings;
|
|
FWorkbook: TsWorkbook;
|
|
FBaseFont: TsFont;
|
|
FPlainText: String;
|
|
FRichTextParams: TsRichTextParams;
|
|
FPrefix: String;
|
|
FTagCase: TsTagCase;
|
|
procedure GetFontsFromWorkbook(out AFonts: TsFontArray);
|
|
function GetTextOfRichTextParam(AIndex: Integer): String;
|
|
protected
|
|
function FixTagCase(ATag: String): String;
|
|
public
|
|
constructor Create(AWorkbook: TsWorkbook; AFont: TsFont; APrefix: String = '';
|
|
ATagCase: TsTagCase = tcLowercase);
|
|
function Exec(const APlainText: String; const ARichTextParams: TsRichTextParams): String;
|
|
end;
|
|
|
|
constructor TsHTMLComposer.Create(AWorkbook: TsWorkbook; AFont: TsFont;
|
|
APrefix: String = ''; ATagCase: TsTagCase = tcLowercase);
|
|
begin
|
|
FPointSeparatorSettings := DefaultFormatSettings;
|
|
FPointSeparatorSettings.DecimalSeparator := '.';
|
|
FWorkbook := AWorkbook;
|
|
FBaseFont := AFont;
|
|
FPrefix := APrefix;
|
|
FTagCase := ATagCase;
|
|
end;
|
|
|
|
function TsHTMLComposer.Exec(const APlainText: String;
|
|
const ARichTextParams: TsRichTextParams): String;
|
|
type
|
|
TChangeFlag = (cfFontName, cfFontSize, cfFontColor);
|
|
const
|
|
EPS = 1E-3;
|
|
var
|
|
i: Integer;
|
|
prevFnt, currFnt: TsFont;
|
|
chgFlags: set of TChangeFlag;
|
|
openingTag, closingTag: String;
|
|
fonts: TsFontArray;
|
|
tag: String;
|
|
begin
|
|
if Length(ARichTextParams) = 0 then
|
|
begin
|
|
Result := FPlainText;
|
|
exit;
|
|
end;
|
|
|
|
FRichTextParams := ARichTextParams;
|
|
FPlainText := APlainText;
|
|
|
|
prevFnt := TsFont.Create;
|
|
prevFnt.CopyOf(FBaseFont);
|
|
|
|
if FRichTextParams[0].FirstIndex > 1 then
|
|
Result := GetTextOfRichTextParam(-1) else
|
|
Result := '';
|
|
|
|
GetFontsFromWorkbook(fonts);
|
|
for i:=0 to High(FRichTextParams) do
|
|
begin
|
|
currFnt := fonts[i];
|
|
openingTag := '';
|
|
closingTag := '';
|
|
if not SameFont(currFnt, prevFnt) then
|
|
begin
|
|
chgFlags := [];
|
|
if not SameText(prevFnt.FontName, currFnt.FontName) then
|
|
Include(chgFlags, cfFontName);
|
|
if not SameValue(currFnt.Size, prevFnt.Size, EPS) then
|
|
Include(chgFlags, cfFontSize);
|
|
if currFnt.Color <> prevFnt.Color then
|
|
Include(chgFlags, cfFontColor);
|
|
|
|
if [cfFontName, cfFontSize, cfFontColor] * chgFlags <> [] then
|
|
begin
|
|
tag := FixTagCase('font');
|
|
openingTag := '<' + tag;
|
|
if cfFontName in chgFlags then
|
|
begin
|
|
openingTag := openingTag + ' ' + FPrefix + FixTagCase('face') +
|
|
'="' + UnquoteStr(currFnt.FontName) + '"';
|
|
prevFnt.FontName := currFnt.FontName;
|
|
end;
|
|
if cfFontSize in chgFlags then
|
|
begin
|
|
openingTag := openingTag + ' ' + FPrefix + FixTagCase('size') +
|
|
'="' + Format('%.gpt', [currFnt.Size], FPointSeparatorSettings) + '"';
|
|
prevFnt.Size := currFnt.Size;
|
|
end;
|
|
if cfFontColor in chgFlags then
|
|
begin
|
|
openingTag := openingTag + ' ' + FPrefix + FixTagCase('color') +
|
|
'="' + ColorToHTMLColorStr(currFnt.Color) + '"';
|
|
prevFnt.Size := currFnt.Color;
|
|
end;
|
|
openingTag := openingTag + '>';
|
|
closingTag :='</' + tag + '>' + closingTag;
|
|
end;
|
|
|
|
if (fssBold in currFnt.Style) then
|
|
begin
|
|
tag := FixTagCase('b');
|
|
openingTag := openingTag + '<' + tag + '>';
|
|
closingTag := '</' + tag + '>' + closingTag;
|
|
prevFnt.Style := prevFnt.Style + [fssBold];
|
|
end else
|
|
prevFnt.Style := prevFnt.Style - [fssBold];;
|
|
|
|
if (fssItalic in currFnt.Style) then
|
|
begin
|
|
tag := FixTagCase('i');
|
|
openingTag := openingTag + '<' + tag + '>';
|
|
closingTag := '</' + tag + '>' + closingTag;
|
|
prevFnt.Style := prevFnt.Style + [fssItalic];
|
|
end else
|
|
prevFnt.Style := prevFnt.Style - [fssItalic];
|
|
|
|
if (fssUnderline in currFnt.Style) then
|
|
begin
|
|
tag := FixTagCase('u');
|
|
openingTag := openingTag + '<' + tag + '>';
|
|
closingTag := '</' + tag + '>' + closingTag;
|
|
prevFnt.Style := prevFnt.Style + [fssUnderline];
|
|
end else
|
|
prevFnt.Style := prevFnt.Style - [fssUnderline];
|
|
|
|
if (fssStrikeout in currFnt.Style) then
|
|
begin
|
|
tag := FixTagCase('s');
|
|
openingTag := openingTag + '<' + tag + '>';
|
|
closingTag := '</' + tag + '>' + closingTag;
|
|
prevFnt.Style := prevFnt.Style + [fssStrikeout];
|
|
end else
|
|
prevFnt.Style := prevFnt.Style - [fssStrikeout];
|
|
|
|
if currFnt.Position <> prevFnt.Position then
|
|
begin
|
|
if currFnt.Position = fpSuperscript then
|
|
begin
|
|
tag := FixTagCase('sup');
|
|
openingTag := openingTag + '<' + tag + '>';
|
|
closingTag := '</' + tag + '>' + closingTag;
|
|
currFnt.Position := fpSuperscript;
|
|
end else
|
|
if currFnt.Position = fpSubscript then
|
|
begin
|
|
tag := FixTagCase('sub');
|
|
openingTag := openingTag + '<' + tag + '>';
|
|
closingTag := '</' + tag + '>' + closingTag;
|
|
currFnt.Position := fpSubscript;
|
|
end else
|
|
currFnt.Position := fpNormal;
|
|
end;
|
|
end;
|
|
|
|
// Add the node text with opening and closing tags (reverse order as opening!)
|
|
Result := Result + openingTag + GetTextOfRichTextParam(i) + closingTag;
|
|
end; // for
|
|
end;
|
|
|
|
function TsHTMLComposer.FixTagCase(ATag: String): String;
|
|
begin
|
|
case FTagCase of
|
|
tcLowercase:
|
|
Result := Lowercase(ATag);
|
|
tcUppercase:
|
|
Result := Uppercase(ATag);
|
|
tcProperCase:
|
|
begin
|
|
Result := Lowercase(ATag);
|
|
Result[1] := UpCase(Result[1]);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure TsHTMLComposer.GetFontsFromWorkbook(out AFonts: TsFontArray);
|
|
var
|
|
i: Integer;
|
|
begin
|
|
SetLength(AFonts, Length(FRichTextParams));
|
|
for i:=0 to High(AFonts) do
|
|
AFonts[i] := FWorkbook.GetFont(FRichTextParams[i].FontIndex);
|
|
end;
|
|
|
|
function TsHTMLComposer.GetTextOfRichTextParam(AIndex: Integer): String;
|
|
var
|
|
p1, p2: Integer;
|
|
begin
|
|
if AIndex = -1 then
|
|
Result := UTF8Copy(FPlainText, 1, FRichTextParams[0].FirstIndex-1)
|
|
else
|
|
if AIndex <= High(FRichTextParams) then
|
|
begin
|
|
p1 := FRichTextParams[AIndex].FirstIndex;
|
|
if AIndex < High(FRichTextparams) then
|
|
p2 := FRichTextParams[AIndex+1].FirstIndex else
|
|
p2 := UTF8Length(FPlainText) + 1;
|
|
Result := UTF8Copy(FPlaiNText, p1, p2-p1);
|
|
end else
|
|
Result := '';
|
|
end;
|
|
|
|
|
|
{@@ ----------------------------------------------------------------------------
|
|
Constructs a html-coded string from a plain text string and
|
|
rich-text parameters
|
|
-------------------------------------------------------------------------------}
|
|
procedure RichTextToHTML(AWorkbook: TsWorkbook; AFont: TsFont;
|
|
const APlainText: String; const ARichTextParams: TsRichTextParams;
|
|
out AHTMLText: String; APrefix: String = ''; ATagCase: TsTagCase = tcLowercase);
|
|
var
|
|
composer: TsHTMLComposer;
|
|
begin
|
|
if Length(ARichTextParams) = 0 then
|
|
AHTMLText := APlainText
|
|
else
|
|
begin
|
|
composer := TsHTMLComposer.Create(AWorkbook, AFont, APrefix, ATagCase);
|
|
try
|
|
AHTMLText := composer.Exec(APlainText, ARichTextParams);
|
|
finally
|
|
composer.Free;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
end.
|
|
|