Files
lazarus-ccr/components/jvcllaz/run/JvNet/jvhtmlparser.pas
2018-09-26 22:57:28 +00:00

484 lines
13 KiB
ObjectPascal

{-----------------------------------------------------------------------------
The contents of this file are subject to the Mozilla Public License
Version 1.1 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/MPL-1.1.html
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either expressed or implied. See the License for
the specific language governing rights and limitations under the License.
The Original Code is: JvHTMLParser.PAS, released on 2001-02-28.
The Initial Developer of the Original Code is Sébastien Buysse [sbuysse att buypin dott com]
Portions created by Sébastien Buysse are Copyright (C) 2001 Sébastien Buysse.
All Rights Reserved.
Contributor(s): Michael Beck [mbeck att bigfoot dott com].
Alexander Samusenko[sandx att chat dott ru].
CarlEfird.
You may retrieve the latest version of this file at the Project JEDI's JVCL home page,
located at http://jvcl.delphi-jedi.org
Known Issues:
-----------------------------------------------------------------------------}
// $Id$
unit JvHtmlParser;
{$mode objfpc}{$H+}
interface
uses
SysUtils, Classes;
///JclStrings,
// JvComponentBase;
type
PTagInfo = ^TTagInfo;
TTagInfo = record
BeginPos: Integer;
EndPos: Integer;
BeginContext: Integer;
EndContext: Integer;
Key: Integer;
end;
// (rom) definitely needs improvement
TJvParserInfo = class(TObject)
public
StartTag: string;
EndTag: string;
MustBe: Integer;
TakeText: Integer;
end;
TTagInfoList = class(TList)
public
procedure AddValue(const Value: TTagInfo);
procedure Clear; override;
end;
TJvKeyFoundEvent = procedure(Sender: TObject; Key, Results, OriginalLine: string) of object;
TJvKeyFoundExEvent = procedure(Sender: TObject; Key, Results, OriginalLine: string;
TagInfo: TTagInfo; Attributes: TStrings) of object;
TJvHTMLParser = class(TComponent) //TJvComponent)
private
FParser: TStringList;
FKeys: TStringList;
FFileName: TFileName;
FTagList: TTagInfoList;
FContent: string;
FOnKeyFound: TJvKeyFoundEvent;
FOnKeyFoundEx: TJvKeyFoundExEvent;
function GetParser: TStrings;
procedure SetParser(Value: TStrings);
procedure SetFileName(Value: TFileName);
procedure SetTagList(const Value: TTagInfoList);
function GetConditionsCount: Integer;
protected
procedure Loaded; override;
property TagList: TTagInfoList read FTagList write SetTagList;
public
procedure AnalyseString(const Str: string);
procedure AnalyseFile;
procedure AddCondition(const Keyword: string; const StartTag: string = '<';
const EndTag: string = '>'; TextSelection: Integer = 0);
procedure RemoveCondition(Index: Integer);
constructor Create(AOwner: TComponent); override;
destructor Destroy; override;
property ConditionsCount: Integer read GetConditionsCount;
procedure ClearConditions;
procedure GetCondition(Index: Integer; var Keyword, StartTag, EndTag: string); overload;
procedure GetCondition(Index: Integer; var Keyword, StartTag, EndTag: string;
var TextSelection: Integer); overload;
property Content: string read FContent;
published
property FileName: TFileName read FFileName write SetFileName;
property Parser: TStrings read GetParser write SetParser;
property OnKeyFound: TJvKeyFoundEvent read FOnKeyFound write FOnKeyFound;
property OnKeyFoundEx: TJvKeyFoundExEvent read FOnKeyFoundEx write FOnKeyFoundEx;
end;
implementation
uses
StrUtils,
{$IFNDEF COMPILER12_UP}
JvJCLUtils,
{$ENDIF ~COMPILER12_UP}
JvConsts;
{Comparison function. Used internally for observance of the sequences tags}
function CompareTags(Item1, Item2: Pointer): Integer;
begin
Result := (PTagInfo(Item1)^.BeginPos - PTagInfo(Item2)^.BeginPos);
end;
{ Utilities similar to corresponding JCL routines }
function StrFind(const Substr, S: string; const Index: SizeInt = 1): SizeInt;
begin
Result := PosEx(SubStr, S, Index);
end;
//=== { TJvHTMLParser } ======================================================
constructor TJvHTMLParser.Create(AOwner: TComponent);
begin
inherited Create(AOwner);
FParser := TStringList.Create;
FKeys := TStringList.Create;
FTagList := TTagInfoList.Create;
end;
destructor TJvHTMLParser.Destroy;
var
Index: Integer;
begin
FParser.Free;
for Index := FKeys.Count - 1 downto 0 do
FKeys.Objects[Index].Free;
FKeys.Free;
FTagList.Free;
inherited Destroy;
end;
procedure TJvHTMLParser.Loaded;
begin
inherited Loaded;
SetParser(FParser);
end;
procedure TJvHTMLParser.SetFileName(Value: TFileName);
begin
if FFileName <> Value then
begin
FFileName := Value;
if not (csDesigning in ComponentState) then
AnalyseFile;
end;
end;
function TJvHTMLParser.GetParser: TStrings;
begin
Result := FParser;
end;
procedure TJvHTMLParser.SetParser(Value: TStrings);
var
I: Integer;
Obj: TJvParserInfo;
Cap: string;
begin
if FParser <> Value then // make sure we don't assign to ourselves (that will clear the list)
FParser.Assign(Value);
for I := FKeys.Count - 1 downto 0 do
FKeys.Objects[I].Free;
FKeys.Clear;
I := 0;
while I < FParser.Count do
begin
Obj := TJvParserInfo.Create;
try
Cap := FParser[I];
Inc(I);
Obj.StartTag := FParser[I];
Inc(I);
Obj.EndTag := FParser[I];
Inc(I);
Obj.MustBe := StrToInt(FParser[I]);
Inc(I);
Obj.TakeText := StrToInt(FParser[I]);
Inc(I);
finally
FKeys.AddObject(Cap, Obj);
end;
end;
end;
// (rom) reimplemented with a TStringList
procedure TJvHTMLParser.AnalyseFile;
var
List: TStringList;
begin
List := TStringList.Create;
try
if FileExists(FileName) then
begin
List.LoadFromFile(FileName);
AnalyseString(List.Text);
end;
finally
List.Free;
end;
end;
procedure TJvHTMLParser.AnalyseString(const Str: string);
var
Str2, Str3: string;
StartTag1, StartTag2: string;
I, J, K, Index: Integer;
TagInfo: TTagInfo;
AttributesList: TStringList;
procedure InnerParseAttributes(Content: PChar; Strings: TStrings);
var
Head, Tail: PChar;
EOS, InQuote, LeadQuote: Boolean;
QuoteChar: Char;
begin
if (Content = nil) or (Content^ = #0) then
Exit;
Tail := Content;
QuoteChar := #0;
repeat
while CharInSet(Tail^, [Cr, Lf, ' ']) do
Inc(Tail);
Head := Tail;
InQuote := False;
LeadQuote := False;
while True do
begin
while (InQuote and not CharInSet(Tail^, [#0, '"'])) or
not CharInSet(Tail^, [#0, Cr, Lf, ' ', '"']) do
Inc(Tail);
if Tail^ = '"' then
begin
if (QuoteChar <> #0) and (QuoteChar = Tail^) then
QuoteChar := #0
else
begin
LeadQuote := Head = Tail;
QuoteChar := Tail^;
if LeadQuote then
Inc(Head);
end;
InQuote := QuoteChar <> #0;
if InQuote then
Inc(Tail)
else
Break;
end
else
Break;
end;
if not LeadQuote and (Tail^ <> #0) and (Tail^ = '"') then
Inc(Tail);
EOS := Tail^ = #0;
Tail^ := #0;
if Head^ <> #0 then
Strings.Add(Head);
Inc(Tail);
until EOS;
end;
procedure ParseAttributes(Strings: TStrings; const Value: string);
var
P: PChar;
Tmp: string;
begin
Strings.Clear;
Tmp := Value;
UniqueString(Tmp);
P := PChar(Tmp);
// if P^ in [#0, '<', '>'] then
if CharInSet(P^, [#0, '>']) then
Exit;
// skip first word (the tag) and any whitespace
while (P^ <> #0) and (P <> nil) do
begin
if P^ = ' ' then
begin
Inc(P);
Break;
end;
Inc(P);
end;
InnerParseAttributes(P, Strings);
end;
begin
if (FKeys.Count = 0) and (FParser.Count <> 0) then
SetParser(FParser);
FContent := Str;
AttributesList := TStringList.Create;
try
if FKeys.Count > 0 then
begin
FTagList.Clear;
for I := 0 to FKeys.Count - 1 do
begin
StartTag1 := TJvParserInfo(FKeys.Objects[I]).StartTag;
Starttag2 := '';
if (Length(StartTag1) > 2) and (StartTag1[Length(StartTag1)] = '>') then
begin
// split the tag so tags with attributes can be found
Delete(StartTag1, Length(StartTag1), 1);
StartTag2 := '>';
end;
J := 1;
while J <> 0 do
begin
// changed from StrSearch(case sensitive) to StrFind (case insensitive)
J := StrFind(StartTag1, Str, J);
if J > 0 then
begin
// changed from StrSearch(case sensitive) to StrFind (case insensitive)
K := StrFind(TJvParserInfo(FKeys.Objects[I]).EndTag, Str, J);
TagInfo.BeginPos := J;
TagInfo.EndPos := K + Length(TJvParserInfo(FKeys.Objects[I]).EndTag);
TagInfo.Key := I;
case TJvParserInfo(FKeys.Objects[I]).TakeText of
0: //Between limits
begin
if StartTag2 = '' then
TagInfo.BeginContext := J + Length(TJvParserInfo(FKeys.Objects[I]).StartTag)
else
// changed from StrSearch(case sensitive) to StrFind (case insensitive)
TagInfo.BeginContext := StrFind(StartTag2, Str, j) + 1;
TagInfo.EndContext := K;
end;
1: //All before start tag
begin
TagInfo.BeginContext := 1;
TagInfo.EndContext := J;
end;
2: //All after start tag
begin
if StartTag2 = '' then
TagInfo.BeginContext := J + Length(TJvParserInfo(FKeys.Objects[I]).StartTag)
else
// changed from StrSearch(case sensitive) to StrFind (case insensitive)
TagInfo.BeginContext := StrFind(StartTag2, Str, j) + 1;
TagInfo.EndContext := Length(Str);
end;
3: //The whole line if containing start tag
begin
TagInfo.BeginContext := J;
// changed from StrSearch(case sensitive) to StrFind (case insensitive)
TagInfo.EndContext := StrFind(Lf, Str, J);
end;
end;
FTagList.AddValue(TagInfo);
J := J + Length(TJvParserInfo(FKeys.Objects[I]).StartTag);
end;
end;
end;
end;
FTagList.Sort(@CompareTags);
with FTagList do
begin
for Index := 0 to Count - 1 do
begin
// Str2 now contains eveything between the start and end tags
Str2 := Copy(Str, PTagInfo(Items[Index])^.BeginContext,
PTagInfo(Items[Index])^.EndContext - PTagInfo(Items[Index])^.BeginContext);
if StartTag2 = '' then
Str3 := ''
else
//Str3 contains the start tag as found, may include attributes or other tags
Str3 := Copy(Str, PTagInfo(Items[Index])^.BeginPos,
PTagInfo(Items[Index])^.BeginContext - PTagInfo(Items[Index])^.BeginPos - 1);
if Assigned(FOnKeyFound) then
FOnKeyFound(Self, FKeys[PTagInfo(Items[Index])^.Key], Str2, Str);
if Assigned(FOnKeyFoundEx) then
begin
if Str3 <> '' then
ParseAttributes(AttributesList, Str3)
else
ParseAttributes(AttributesList, Str2);
FOnKeyFoundEx(Self, FKeys[PTagInfo(Items[Index])^.Key], Str2, Str,
PTagInfo(Items[Index])^, AttributesList);
end;
end;
end;
finally
AttributesList.Free;
end;
end;
procedure TJvHTMLParser.AddCondition(const Keyword: string;
const StartTag: string; const EndTag: string; TextSelection: Integer);
var
Obj: TJvParserInfo;
begin
Obj := TJvParserInfo.Create;
Obj.StartTag := StartTag;
Obj.EndTag := EndTag;
Obj.TakeText := TextSelection;
FKeys.AddObject(Keyword, TObject(Obj));
end;
procedure TJvHTMLParser.RemoveCondition(Index: Integer);
begin
FKeys.Objects[Index].Free;
FKeys.Delete(Index);
end;
procedure TJvHTMLParser.ClearConditions;
var
Index: Integer;
begin
FParser.Clear;
for Index := FKeys.Count - 1 downto 0 do
FKeys.Objects[Index].Free;
FKeys.Clear;
end;
procedure TJvHTMLParser.GetCondition(Index: Integer; var Keyword, StartTag, EndTag: string);
begin
Keyword := FKeys[Index];
StartTag := TJvParserInfo(FKeys.Objects[Index]).StartTag;
EndTag := TJvParserInfo(FKeys.Objects[Index]).EndTag;
end;
procedure TJvHTMLParser.GetCondition(Index: Integer; var Keyword, StartTag, EndTag: string;
var TextSelection: Integer);
begin
GetCondition(Index, Keyword, StartTag, EndTag);
TextSelection := TJvParserInfo(FKeys.Objects[Index]).TakeText;
end;
function TJvHTMLParser.GetConditionsCount: Integer;
begin
Result := FKeys.Count;
end;
procedure TJvHTMLParser.SetTagList(const Value: TTagInfoList);
begin
FTagList := Value;
end;
//=== { TTagInfoList } =======================================================
procedure TTagInfoList.AddValue(const Value: TTagInfo);
var
P: PTagInfo;
begin
GetMem(P, SizeOf(TTagInfo));
if P <> nil then
begin
P^ := Value;
Add(P);
end;
end;
procedure TTagInfoList.Clear;
var
I: Integer;
begin
for I := 0 to Count - 1 do
FreeMem(Items[I], SizeOf(TTagInfo));
inherited Clear;
end;
end.