Files
lazarus-ccr/components/jvcllaz/run/JvNet/jvstrtohtml.pas

493 lines
13 KiB
ObjectPascal
Raw Normal View History

{-----------------------------------------------------------------------------
The contents of this file are subject to the Mozilla Public License
Version 1.1 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/MPL-1.1.html
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either expressed or implied. See the License for
the specific language governing rights and limitations under the License.
The Original Code is: JvStrToHtml.PAS, released on 2001-02-28.
The Initial Developer of the Original Code is Sébastien Buysse [sbuysse att buypin dott com]
Portions created by Sébastien Buysse are Copyright (C) 2001 Sébastien Buysse.
All Rights Reserved.
Contributor(s): Michael Beck [mbeck att bigfoot dott com].
Andreas Hausladen [Andreas dott Hausladen att gmx dott de]
You may retrieve the latest version of this file at the Project JEDI's JVCL home page,
located at http://jvcl.delphi-jedi.org
Known Issues:
-----------------------------------------------------------------------------}
// $Id$
unit JvStrToHtml;
{$mode objfpc}{$H+}
interface
uses
SysUtils, Classes;
type
TJvStrToHtml = class(TComponent) // TJvComponent)
private
FHtml: string;
FValue: string;
procedure SetHtml(const Value: string);
procedure SetValue(const Value: string);
public
constructor Create(AOwner: TComponent); override;
function TextToHtml(const Text: string): string;
function HtmlToText(const Text: string): string;
published
property Text: string read FValue write SetValue;
property Html: string read FHtml write SetHtml;
end;
function StringToHtml(const Value: string): string;
function HtmlToString(const Value: string): string;
//function CharToHtml(Ch: Char): string;
implementation
uses
LCLVersion, LazUtf8;
type
TJvHtmlCodeRec = record
Ch: Cardinal;
Html: string;
end;
const
{ References:
http://www.w3.org/TR/REC-html40/charset.html#h-5.3
http://www.w3.org/TR/REC-html40/sgml/entities.html#h-24.2.1
http://www.w3.org/TR/REC-html40/sgml/entities.html#h-24.4.1
}
Conversions: array [0..240] of TJvHtmlCodeRec = (
(Ch: 34; Html: '"'),
(Ch: 38; Html: '&'),
(Ch: 39; Html: '''),
(Ch: 60; Html: '<'),
(Ch: 62; Html: '>'),
(Ch: 160; Html: ' '),
(Ch: 161; Html: '¡'),
(Ch: 162; Html: '¢'),
(Ch: 163; Html: '£'),
(Ch: 164; Html: '¤'),
(Ch: 165; Html: '¥'),
(Ch: 166; Html: '¦'),
(Ch: 167; Html: '§'),
(Ch: 168; Html: '¨'),
(Ch: 169; Html: '©'),
(Ch: 170; Html: 'ª'),
(Ch: 171; Html: '«'),
(Ch: 172; Html: '¬'),
(Ch: 173; Html: '­'),
(Ch: 174; Html: '®'),
(Ch: 175; Html: '¯'),
(Ch: 176; Html: '°'),
(Ch: 177; Html: '±'),
(Ch: 178; Html: '²'),
(Ch: 179; Html: '³'),
(Ch: 180; Html: '´'),
(Ch: 181; Html: 'µ'),
(Ch: 182; Html: '¶'),
(Ch: 183; Html: '·'),
(Ch: 184; Html: '¸'),
(Ch: 185; Html: '¹'),
(Ch: 186; Html: 'º'),
(Ch: 187; Html: '»'),
(Ch: 188; Html: '¼'),
(Ch: 189; Html: '½'),
(Ch: 190; Html: '¾'),
(Ch: 191; Html: '¿'),
(Ch: 192; Html: 'À'),
(Ch: 193; Html: 'Á'),
(Ch: 194; Html: 'Â'),
(Ch: 195; Html: 'Ã'),
(Ch: 196; Html: 'Ä'),
(Ch: 197; Html: 'Å'),
(Ch: 198; Html: 'Æ'),
(Ch: 199; Html: 'Ç'),
(Ch: 200; Html: 'È'),
(Ch: 201; Html: 'É'),
(Ch: 202; Html: 'Ê'),
(Ch: 203; Html: 'Ë'),
(Ch: 204; Html: 'Ì'),
(Ch: 205; Html: 'Í'),
(Ch: 206; Html: 'Î'),
(Ch: 207; Html: 'Ï'),
(Ch: 208; Html: 'Ð'),
(Ch: 209; Html: 'Ñ'),
(Ch: 210; Html: 'Ò'),
(Ch: 211; Html: 'Ó'),
(Ch: 212; Html: 'Ô'),
(Ch: 213; Html: 'Õ'),
(Ch: 214; Html: 'Ö'),
(Ch: 215; Html: '×'),
(Ch: 216; Html: 'Ø'),
(Ch: 217; Html: 'Ù'),
(Ch: 218; Html: 'Ú'),
(Ch: 219; Html: 'Û'),
(Ch: 220; Html: 'Ü'),
(Ch: 221; Html: 'Ý'),
(Ch: 222; Html: 'Þ'),
(Ch: 223; Html: 'ß'),
(Ch: 224; Html: 'à'),
(Ch: 225; Html: 'á'),
(Ch: 226; Html: 'â'),
(Ch: 227; Html: 'ã'),
(Ch: 228; Html: 'ä'),
(Ch: 229; Html: 'å'),
(Ch: 230; Html: 'æ'),
(Ch: 231; Html: 'ç'),
(Ch: 232; Html: 'è'),
(Ch: 233; Html: 'é'),
(Ch: 234; Html: 'ê'),
(Ch: 235; Html: 'ë'),
(Ch: 236; Html: 'ì'),
(Ch: 237; Html: 'í'),
(Ch: 238; Html: 'î'),
(Ch: 239; Html: 'ï'),
(Ch: 240; Html: 'ð'),
(Ch: 241; Html: 'ñ'),
(Ch: 242; Html: 'ò'),
(Ch: 243; Html: 'ó'),
(Ch: 244; Html: 'ô'),
(Ch: 245; Html: 'õ'),
(Ch: 246; Html: 'ö'),
(Ch: 247; Html: '÷'),
(Ch: 248; Html: 'ø'),
(Ch: 249; Html: 'ù'),
(Ch: 250; Html: 'ú'),
(Ch: 251; Html: 'û'),
(Ch: 252; Html: 'ü'),
(Ch: 253; Html: 'ý'),
(Ch: 254; Html: 'þ'),
(Ch: 255; Html: 'ÿ'),
(Ch: 338; Html: 'Œ'),
(Ch: 339; Html: 'œ'),
(Ch: 352; Html: 'Š'),
(Ch: 353; Html: 'š'),
(Ch: 376; Html: 'Ÿ'),
(Ch: 402; Html: 'ƒ'),
(Ch: 710; Html: 'ˆ'),
(Ch: 732; Html: '˜'),
(Ch: 913; Html: 'Α'),
(Ch: 914; Html: 'Β'),
(Ch: 915; Html: 'Γ'),
(Ch: 916; Html: 'Δ'),
(Ch: 917; Html: 'Ε'),
(Ch: 918; Html: 'Ζ'),
(Ch: 919; Html: 'Η'),
(Ch: 920; Html: 'Θ'),
(Ch: 921; Html: 'Ι'),
(Ch: 922; Html: 'Κ'),
(Ch: 923; Html: 'Λ'),
(Ch: 924; Html: 'Μ'),
(Ch: 925; Html: 'Ν'),
(Ch: 926; Html: 'Ξ'),
(Ch: 927; Html: 'Ο'),
(Ch: 928; Html: 'Π'),
(Ch: 929; Html: 'Ρ'),
(Ch: 931; Html: 'Σ'),
(Ch: 932; Html: 'Τ'),
(Ch: 933; Html: 'Υ'),
(Ch: 934; Html: 'Φ'),
(Ch: 935; Html: 'Χ'),
(Ch: 936; Html: 'Ψ'),
(Ch: 937; Html: 'Ω'),
(Ch: 945; Html: 'α'),
(Ch: 946; Html: 'β'),
(Ch: 947; Html: 'γ'),
(Ch: 948; Html: 'δ'),
(Ch: 949; Html: 'ε'),
(Ch: 950; Html: 'ζ'),
(Ch: 951; Html: 'η'),
(Ch: 952; Html: 'θ'),
(Ch: 953; Html: 'ι'),
(Ch: 954; Html: 'κ'),
(Ch: 955; Html: 'λ'),
(Ch: 956; Html: 'μ'),
(Ch: 957; Html: 'ν'),
(Ch: 958; Html: 'ξ'),
(Ch: 959; Html: 'ο'),
(Ch: 960; Html: 'π'),
(Ch: 961; Html: 'ρ'),
(Ch: 962; Html: 'ς'),
(Ch: 963; Html: 'σ'),
(Ch: 964; Html: 'τ'),
(Ch: 965; Html: 'υ'),
(Ch: 966; Html: 'φ'),
(Ch: 967; Html: 'χ'),
(Ch: 968; Html: 'ψ'),
(Ch: 969; Html: 'ω'),
(Ch: 977; Html: 'ϑ'),
(Ch: 978; Html: 'ϒ'),
(Ch: 982; Html: 'ϖ'),
(Ch: 8194; Html: ' '),
(Ch: 8195; Html: ' '),
(Ch: 8201; Html: ' '),
(Ch: 8204; Html: '‌'),
(Ch: 8205; Html: '‍'),
(Ch: 8206; Html: '‎'),
(Ch: 8207; Html: '‏'),
(Ch: 8211; Html: '–'),
(Ch: 8212; Html: '—'),
(Ch: 8216; Html: '‘'),
(Ch: 8217; Html: '’'),
(Ch: 8218; Html: '‚'),
(Ch: 8220; Html: '“'),
(Ch: 8221; Html: '”'),
(Ch: 8222; Html: '„'),
(Ch: 8224; Html: '†'),
(Ch: 8225; Html: '‡'),
(Ch: 8226; Html: '•'),
(Ch: 8230; Html: '…'),
(Ch: 8240; Html: '‰'),
(Ch: 8242; Html: '′'),
(Ch: 8243; Html: '″'),
(Ch: 8249; Html: '‹'),
(Ch: 8250; Html: '›'),
(Ch: 8254; Html: '‾'),
(Ch: 8364; Html: '€'),
(Ch: 8482; Html: '™'),
(Ch: 8592; Html: '←'),
(Ch: 8593; Html: '↑'),
(Ch: 8594; Html: '→'),
(Ch: 8595; Html: '↓'),
(Ch: 8596; Html: '↔'),
(Ch: 8629; Html: '↵'),
(Ch: 8704; Html: '∀'),
(Ch: 8706; Html: '∂'),
(Ch: 8707; Html: '∃'),
(Ch: 8709; Html: '∅'),
(Ch: 8711; Html: '∇'),
(Ch: 8712; Html: '∈'),
(Ch: 8713; Html: '∉'),
(Ch: 8715; Html: '∋'),
(Ch: 8719; Html: '∏'),
(Ch: 8721; Html: '∑'),
(Ch: 8722; Html: '−'),
(Ch: 8727; Html: '∗'),
(Ch: 8730; Html: '√'),
(Ch: 8733; Html: '∝'),
(Ch: 8734; Html: '∞'),
(Ch: 8736; Html: '∠'),
(Ch: 8743; Html: '∧'),
(Ch: 8744; Html: '∨'),
(Ch: 8745; Html: '∩'),
(Ch: 8746; Html: '∪'),
(Ch: 8747; Html: '∫'),
(Ch: 8756; Html: '∴'),
(Ch: 8764; Html: '∼'),
(Ch: 8773; Html: '≅'),
(Ch: 8776; Html: '≈'),
(Ch: 8800; Html: '≠'),
(Ch: 8801; Html: '≡'),
(Ch: 8804; Html: '≤'),
(Ch: 8805; Html: '≥'),
(Ch: 8834; Html: '⊂'),
(Ch: 8835; Html: '⊃'),
(Ch: 8836; Html: '⊄'),
(Ch: 8838; Html: '⊆'),
(Ch: 8839; Html: '⊇'),
(Ch: 8853; Html: '⊕'),
(Ch: 8855; Html: '⊗'),
(Ch: 8869; Html: '⊥'),
(Ch: 8901; Html: '⋅'),
(Ch: 8968; Html: '⌈'),
(Ch: 8969; Html: '⌉'),
(Ch: 8970; Html: '⌊'),
(Ch: 8971; Html: '⌋'),
(Ch: 9674; Html: '◊'),
(Ch: 9824; Html: '♠'),
(Ch: 9827; Html: '♣'),
(Ch: 9829; Html: '♥'),
(Ch: 9830; Html: '♦')
);
var
ConversionsHash: array of Word;
{ TJvStrToHtml }
constructor TJvStrToHtml.Create(AOwner: TComponent);
begin
inherited Create(AOwner);
FValue := '';
FHtml := '';
end;
function TJvStrToHtml.HtmlToText(const Text: string): string;
begin
Result := HtmlToString(Text);
end;
procedure TJvStrToHtml.SetHtml(const Value: string);
begin
FValue := HtmlToText(Value);
end;
procedure TJvStrToHtml.SetValue(const Value: string);
begin
FHtml := TextToHtml(Value);
end;
function TJvStrToHtml.TextToHtml(const Text: string): string;
begin
Result := StringToHtml(Text);
end;
function GetHtmlHash(const S: string): Word;
var
I: Integer;
begin
Result := Length(S);
for I := 1 to Length(S) do
Result := Word(Result + Ord(S[I]) shl (I mod 4));
end;
procedure InitConversionsHash;
var
I: Integer;
begin
SetLength(ConversionsHash, Length(Conversions));
for I := 0 to High(ConversionsHash) do
ConversionsHash[I] := GetHtmlHash(Conversions[I].Html);
end;
function StringToHtml(const Value: String): String;
var
ResultLen: Integer;
CurrPos: Integer;
P, PEnd: PChar;
procedure Append(s: String);
var
n: Integer;
begin
n := Length(s);
if CurrPos + n > ResultLen then begin
ResultLen := ResultLen + 100;
SetLength(Result, ResultLen);
end;
Move(s[1], Result[CurrPos], n);
inc(CurrPos, n);
end;
var
J, n: Integer;
ch: Cardinal;
found: Boolean;
begin
if Value = '' then begin
Result := '';
exit;
end;
ResultLen := Length(Value);
SetLength(Result, ResultLen);
P := @Value[1];
PEnd := @Value[ResultLen];
CurrPos := 1;
while P <= PEnd do begin
n := 1;
if P^ in ['a'..'z', 'A'..'Z', '0'..'9', '_', ' '] then
Append(P^)
else begin
{$IF LCL_FullVersion >= 2000000}
ch := UTF8CodePointToUniCode(P, n);
{$ELSE}
ch := UTF8CharacterToUniCode(P, n);
{$IFEND}
found := false;
for J := Low(Conversions) to High(Conversions) do
if ch = Conversions[J].Ch then begin
Append(Conversions[J].Html);
found := true;
break;
end;
if not found then
Append(Format('&#%d;', [ch]));
end;
inc(P, n);
end;
SetLength(Result, CurrPos-1);
end;
function HtmlToString(const Value: String): String;
var
ResultLen: Integer;
P, PEnd: PChar;
CurrPos: Integer;
procedure Append(s: String);
var
n: Integer;
begin
n := Length(s);
Move(s[1], Result[CurrPos], n);
inc(CurrPos, n);
end;
var
html: String;
found: Boolean;
J: Integer;
begin
if Value = '' then begin
Result := '';
exit;
end;
ResultLen := Length(Value);
SetLength(Result, ResultLen);
P := @Value[1];
PEnd := @Value[ResultLen];
CurrPos := 1;
while P <= PEnd do begin
if P^ = '&' then begin
html := '&';
while P < PEnd do begin
inc(P);
html := html + P^;
if P^ = ';' then break;
end;
found := false;
for J := Low(Conversions) to High(Conversions) do
if html = Conversions[J].Html then begin
Append(UnicodeToUTF8(Conversions[j].Ch));
found := true;
break;
end;
if not found then begin
Delete(html, Length(html), 1);
Delete(html, 1, 1);
if html[1] = '#' then Delete(html, 1, 1);
if html[1] = 'x' then html[1] := '$';
Append(UnicodeToUTF8(StrToInt(html)));
end;
end else
Append(P^);
inc(P);
end;
SetLength(Result, CurrPos-1);
end;
end.