{----------------------------------------------------------------------------- The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.mozilla.org/MPL/MPL-1.1.html Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either expressed or implied. See the License for the specific language governing rights and limitations under the License. The Original Code is: JvStrToHtml.PAS, released on 2001-02-28. The Initial Developer of the Original Code is Sébastien Buysse [sbuysse att buypin dott com] Portions created by Sébastien Buysse are Copyright (C) 2001 Sébastien Buysse. All Rights Reserved. Contributor(s): Michael Beck [mbeck att bigfoot dott com]. Andreas Hausladen [Andreas dott Hausladen att gmx dott de] You may retrieve the latest version of this file at the Project JEDI's JVCL home page, located at http://jvcl.delphi-jedi.org Known Issues: -----------------------------------------------------------------------------} // $Id$ unit JvStrToHtml; {$mode objfpc}{$H+} interface uses SysUtils, Classes; type TJvStrToHtml = class(TComponent) // TJvComponent) private FHtml: string; FValue: string; procedure SetHtml(const Value: string); procedure SetValue(const Value: string); public constructor Create(AOwner: TComponent); override; function TextToHtml(const Text: string): string; function HtmlToText(const Text: string): string; published property Text: string read FValue write SetValue; property Html: string read FHtml write SetHtml; end; function StringToHtml(const Value: string): string; function HtmlToString(const Value: string): string; //function CharToHtml(Ch: Char): string; implementation uses LCLVersion, LazUtf8; type TJvHtmlCodeRec = record Ch: Cardinal; Html: string; end; const { References: http://www.w3.org/TR/REC-html40/charset.html#h-5.3 http://www.w3.org/TR/REC-html40/sgml/entities.html#h-24.2.1 http://www.w3.org/TR/REC-html40/sgml/entities.html#h-24.4.1 } Conversions: array [0..240] of TJvHtmlCodeRec = ( (Ch: 34; Html: '"'), (Ch: 38; Html: '&'), (Ch: 39; Html: '''), (Ch: 60; Html: '<'), (Ch: 62; Html: '>'), (Ch: 160; Html: ' '), (Ch: 161; Html: '¡'), (Ch: 162; Html: '¢'), (Ch: 163; Html: '£'), (Ch: 164; Html: '¤'), (Ch: 165; Html: '¥'), (Ch: 166; Html: '¦'), (Ch: 167; Html: '§'), (Ch: 168; Html: '¨'), (Ch: 169; Html: '©'), (Ch: 170; Html: 'ª'), (Ch: 171; Html: '«'), (Ch: 172; Html: '¬'), (Ch: 173; Html: '­'), (Ch: 174; Html: '®'), (Ch: 175; Html: '¯'), (Ch: 176; Html: '°'), (Ch: 177; Html: '±'), (Ch: 178; Html: '²'), (Ch: 179; Html: '³'), (Ch: 180; Html: '´'), (Ch: 181; Html: 'µ'), (Ch: 182; Html: '¶'), (Ch: 183; Html: '·'), (Ch: 184; Html: '¸'), (Ch: 185; Html: '¹'), (Ch: 186; Html: 'º'), (Ch: 187; Html: '»'), (Ch: 188; Html: '¼'), (Ch: 189; Html: '½'), (Ch: 190; Html: '¾'), (Ch: 191; Html: '¿'), (Ch: 192; Html: 'À'), (Ch: 193; Html: 'Á'), (Ch: 194; Html: 'Â'), (Ch: 195; Html: 'Ã'), (Ch: 196; Html: 'Ä'), (Ch: 197; Html: 'Å'), (Ch: 198; Html: 'Æ'), (Ch: 199; Html: 'Ç'), (Ch: 200; Html: 'È'), (Ch: 201; Html: 'É'), (Ch: 202; Html: 'Ê'), (Ch: 203; Html: 'Ë'), (Ch: 204; Html: 'Ì'), (Ch: 205; Html: 'Í'), (Ch: 206; Html: 'Î'), (Ch: 207; Html: 'Ï'), (Ch: 208; Html: 'Ð'), (Ch: 209; Html: 'Ñ'), (Ch: 210; Html: 'Ò'), (Ch: 211; Html: 'Ó'), (Ch: 212; Html: 'Ô'), (Ch: 213; Html: 'Õ'), (Ch: 214; Html: 'Ö'), (Ch: 215; Html: '×'), (Ch: 216; Html: 'Ø'), (Ch: 217; Html: 'Ù'), (Ch: 218; Html: 'Ú'), (Ch: 219; Html: 'Û'), (Ch: 220; Html: 'Ü'), (Ch: 221; Html: 'Ý'), (Ch: 222; Html: 'Þ'), (Ch: 223; Html: 'ß'), (Ch: 224; Html: 'à'), (Ch: 225; Html: 'á'), (Ch: 226; Html: 'â'), (Ch: 227; Html: 'ã'), (Ch: 228; Html: 'ä'), (Ch: 229; Html: 'å'), (Ch: 230; Html: 'æ'), (Ch: 231; Html: 'ç'), (Ch: 232; Html: 'è'), (Ch: 233; Html: 'é'), (Ch: 234; Html: 'ê'), (Ch: 235; Html: 'ë'), (Ch: 236; Html: 'ì'), (Ch: 237; Html: 'í'), (Ch: 238; Html: 'î'), (Ch: 239; Html: 'ï'), (Ch: 240; Html: 'ð'), (Ch: 241; Html: 'ñ'), (Ch: 242; Html: 'ò'), (Ch: 243; Html: 'ó'), (Ch: 244; Html: 'ô'), (Ch: 245; Html: 'õ'), (Ch: 246; Html: 'ö'), (Ch: 247; Html: '÷'), (Ch: 248; Html: 'ø'), (Ch: 249; Html: 'ù'), (Ch: 250; Html: 'ú'), (Ch: 251; Html: 'û'), (Ch: 252; Html: 'ü'), (Ch: 253; Html: 'ý'), (Ch: 254; Html: 'þ'), (Ch: 255; Html: 'ÿ'), (Ch: 338; Html: 'Œ'), (Ch: 339; Html: 'œ'), (Ch: 352; Html: 'Š'), (Ch: 353; Html: 'š'), (Ch: 376; Html: 'Ÿ'), (Ch: 402; Html: 'ƒ'), (Ch: 710; Html: 'ˆ'), (Ch: 732; Html: '˜'), (Ch: 913; Html: 'Α'), (Ch: 914; Html: 'Β'), (Ch: 915; Html: 'Γ'), (Ch: 916; Html: 'Δ'), (Ch: 917; Html: 'Ε'), (Ch: 918; Html: 'Ζ'), (Ch: 919; Html: 'Η'), (Ch: 920; Html: 'Θ'), (Ch: 921; Html: 'Ι'), (Ch: 922; Html: 'Κ'), (Ch: 923; Html: 'Λ'), (Ch: 924; Html: 'Μ'), (Ch: 925; Html: 'Ν'), (Ch: 926; Html: 'Ξ'), (Ch: 927; Html: 'Ο'), (Ch: 928; Html: 'Π'), (Ch: 929; Html: 'Ρ'), (Ch: 931; Html: 'Σ'), (Ch: 932; Html: 'Τ'), (Ch: 933; Html: 'Υ'), (Ch: 934; Html: 'Φ'), (Ch: 935; Html: 'Χ'), (Ch: 936; Html: 'Ψ'), (Ch: 937; Html: 'Ω'), (Ch: 945; Html: 'α'), (Ch: 946; Html: 'β'), (Ch: 947; Html: 'γ'), (Ch: 948; Html: 'δ'), (Ch: 949; Html: 'ε'), (Ch: 950; Html: 'ζ'), (Ch: 951; Html: 'η'), (Ch: 952; Html: 'θ'), (Ch: 953; Html: 'ι'), (Ch: 954; Html: 'κ'), (Ch: 955; Html: 'λ'), (Ch: 956; Html: 'μ'), (Ch: 957; Html: 'ν'), (Ch: 958; Html: 'ξ'), (Ch: 959; Html: 'ο'), (Ch: 960; Html: 'π'), (Ch: 961; Html: 'ρ'), (Ch: 962; Html: 'ς'), (Ch: 963; Html: 'σ'), (Ch: 964; Html: 'τ'), (Ch: 965; Html: 'υ'), (Ch: 966; Html: 'φ'), (Ch: 967; Html: 'χ'), (Ch: 968; Html: 'ψ'), (Ch: 969; Html: 'ω'), (Ch: 977; Html: 'ϑ'), (Ch: 978; Html: 'ϒ'), (Ch: 982; Html: 'ϖ'), (Ch: 8194; Html: ' '), (Ch: 8195; Html: ' '), (Ch: 8201; Html: ' '), (Ch: 8204; Html: '‌'), (Ch: 8205; Html: '‍'), (Ch: 8206; Html: '‎'), (Ch: 8207; Html: '‏'), (Ch: 8211; Html: '–'), (Ch: 8212; Html: '—'), (Ch: 8216; Html: '‘'), (Ch: 8217; Html: '’'), (Ch: 8218; Html: '‚'), (Ch: 8220; Html: '“'), (Ch: 8221; Html: '”'), (Ch: 8222; Html: '„'), (Ch: 8224; Html: '†'), (Ch: 8225; Html: '‡'), (Ch: 8226; Html: '•'), (Ch: 8230; Html: '…'), (Ch: 8240; Html: '‰'), (Ch: 8242; Html: '′'), (Ch: 8243; Html: '″'), (Ch: 8249; Html: '‹'), (Ch: 8250; Html: '›'), (Ch: 8254; Html: '‾'), (Ch: 8364; Html: '€'), (Ch: 8482; Html: '™'), (Ch: 8592; Html: '←'), (Ch: 8593; Html: '↑'), (Ch: 8594; Html: '→'), (Ch: 8595; Html: '↓'), (Ch: 8596; Html: '↔'), (Ch: 8629; Html: '↵'), (Ch: 8704; Html: '∀'), (Ch: 8706; Html: '∂'), (Ch: 8707; Html: '∃'), (Ch: 8709; Html: '∅'), (Ch: 8711; Html: '∇'), (Ch: 8712; Html: '∈'), (Ch: 8713; Html: '∉'), (Ch: 8715; Html: '∋'), (Ch: 8719; Html: '∏'), (Ch: 8721; Html: '∑'), (Ch: 8722; Html: '−'), (Ch: 8727; Html: '∗'), (Ch: 8730; Html: '√'), (Ch: 8733; Html: '∝'), (Ch: 8734; Html: '∞'), (Ch: 8736; Html: '∠'), (Ch: 8743; Html: '∧'), (Ch: 8744; Html: '∨'), (Ch: 8745; Html: '∩'), (Ch: 8746; Html: '∪'), (Ch: 8747; Html: '∫'), (Ch: 8756; Html: '∴'), (Ch: 8764; Html: '∼'), (Ch: 8773; Html: '≅'), (Ch: 8776; Html: '≈'), (Ch: 8800; Html: '≠'), (Ch: 8801; Html: '≡'), (Ch: 8804; Html: '≤'), (Ch: 8805; Html: '≥'), (Ch: 8834; Html: '⊂'), (Ch: 8835; Html: '⊃'), (Ch: 8836; Html: '⊄'), (Ch: 8838; Html: '⊆'), (Ch: 8839; Html: '⊇'), (Ch: 8853; Html: '⊕'), (Ch: 8855; Html: '⊗'), (Ch: 8869; Html: '⊥'), (Ch: 8901; Html: '⋅'), (Ch: 8968; Html: '⌈'), (Ch: 8969; Html: '⌉'), (Ch: 8970; Html: '⌊'), (Ch: 8971; Html: '⌋'), (Ch: 9674; Html: '◊'), (Ch: 9824; Html: '♠'), (Ch: 9827; Html: '♣'), (Ch: 9829; Html: '♥'), (Ch: 9830; Html: '♦') ); var ConversionsHash: array of Word; { TJvStrToHtml } constructor TJvStrToHtml.Create(AOwner: TComponent); begin inherited Create(AOwner); FValue := ''; FHtml := ''; end; function TJvStrToHtml.HtmlToText(const Text: string): string; begin Result := HtmlToString(Text); end; procedure TJvStrToHtml.SetHtml(const Value: string); begin FValue := HtmlToText(Value); end; procedure TJvStrToHtml.SetValue(const Value: string); begin FHtml := TextToHtml(Value); end; function TJvStrToHtml.TextToHtml(const Text: string): string; begin Result := StringToHtml(Text); end; function GetHtmlHash(const S: string): Word; var I: Integer; begin Result := Length(S); for I := 1 to Length(S) do Result := Word(Result + Ord(S[I]) shl (I mod 4)); end; procedure InitConversionsHash; var I: Integer; begin SetLength(ConversionsHash, Length(Conversions)); for I := 0 to High(ConversionsHash) do ConversionsHash[I] := GetHtmlHash(Conversions[I].Html); end; function StringToHtml(const Value: String): String; var ResultLen: Integer; CurrPos: Integer; P, PEnd: PChar; procedure Append(s: String); var n: Integer; begin n := Length(s); if CurrPos + n > ResultLen then begin ResultLen := ResultLen + 100; SetLength(Result, ResultLen); end; Move(s[1], Result[CurrPos], n); inc(CurrPos, n); end; var J, n: Integer; ch: Cardinal; found: Boolean; begin if Value = '' then begin Result := ''; exit; end; ResultLen := Length(Value); SetLength(Result, ResultLen); P := @Value[1]; PEnd := @Value[ResultLen]; CurrPos := 1; while P <= PEnd do begin n := 1; if P^ in ['a'..'z', 'A'..'Z', '0'..'9', '_', ' '] then Append(P^) else begin {$IF LCL_FullVersion >= 2000000} ch := UTF8CodePointToUniCode(P, n); {$ELSE} ch := UTF8CharacterToUniCode(P, n); {$IFEND} found := false; for J := Low(Conversions) to High(Conversions) do if ch = Conversions[J].Ch then begin Append(Conversions[J].Html); found := true; break; end; if not found then Append(Format('&#%d;', [ch])); end; inc(P, n); end; SetLength(Result, CurrPos-1); end; function HtmlToString(const Value: String): String; var ResultLen: Integer; P, PEnd: PChar; CurrPos: Integer; procedure Append(s: String); var n: Integer; begin n := Length(s); Move(s[1], Result[CurrPos], n); inc(CurrPos, n); end; var html: String; found: Boolean; J: Integer; begin if Value = '' then begin Result := ''; exit; end; ResultLen := Length(Value); SetLength(Result, ResultLen); P := @Value[1]; PEnd := @Value[ResultLen]; CurrPos := 1; while P <= PEnd do begin if P^ = '&' then begin html := '&'; while P < PEnd do begin inc(P); html := html + P^; if P^ = ';' then break; end; found := false; for J := Low(Conversions) to High(Conversions) do if html = Conversions[J].Html then begin Append(UnicodeToUTF8(Conversions[j].Ch)); found := true; break; end; if not found then begin Delete(html, Length(html), 1); Delete(html, 1, 1); if html[1] = '#' then Delete(html, 1, 1); if html[1] = 'x' then html[1] := '$'; Append(UnicodeToUTF8(StrToInt(html))); end; end else Append(P^); inc(P); end; SetLength(Result, CurrPos-1); end; end.