1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-03-17 20:58:08 +02:00

added: ObjectPascal lexer (#883)

Initial version (proposal) of an ObjectPascal lexer

- the assembler state is not in use yet...
- the Intrinsic Routines rule is commented out and probably not needed...
This commit is contained in:
codiacdev 2023-11-17 00:13:06 +01:00 committed by GitHub
parent 96a12e377e
commit 07a90a93dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 2742 additions and 0 deletions

View File

@ -0,0 +1,137 @@
<lexer>
<config>
<name>ObjectPascal</name>
<alias>objectpascal</alias>
<filename>*.pas</filename>
<filename>*.pp</filename>
<filename>*.inc</filename>
<filename>*.dpr</filename>
<filename>*.dpk</filename>
<filename>*.lpr</filename>
<filename>*.lpk</filename>
<mime_type>text/x-pascal</mime_type>
</config>
<rules>
<state name="root">
<!-- TextWhitespace -->
<rule pattern="[^\S\n]+">
<token type="TextWhitespace" />
</rule>
<rule pattern="\n">
<token type="Text"/>
</rule>
<rule pattern="\s+">
<token type="Text"/>
</rule>
<!-- Magic Number (BOM) -->
<rule pattern="[^\u0000-\u007F]+">
<token type="Text"/>
</rule>
<!-- Compiler Directive -->
<rule pattern="\{[$].*?\}">
<token type="CommentPreproc" />
</rule>
<!-- Comment Single -->
<rule pattern="(//.*?)(\n)">
<bygroups>
<token type="CommentSingle" />
<token type="TextWhitespace" />
</bygroups>
</rule>
<!-- Comment Multiline Block -->
<rule pattern="\([*](.|\n)*?[*]\)">
<token type="CommentMultiline"/>
</rule>
<!-- Comment Multiline Source Documentation -->
<rule pattern="[{](.|\n)*?[}]">
<token type="CommentMultiline"/>
</rule>
<!-- Range Indicator -->
<rule pattern="(?i:(\.\.))">
<token type="Operator" />
</rule>
<!-- Numbers -->
<rule pattern="[\$][0-9a-fA-F]*[xX][0-9a-fA-F]*|[\$][0-9a-fA-F]*|[0-9]+[xX][0-9a-fA-F]*|([0-9]+[0-9a-fA-F]+(?=[hH]))">
<token type="LiteralNumberHex" />
</rule>
<rule pattern="([0-9][0-9_]*\.([0-9][0-9_]*)?|\.[0-9][0-9_]*)([eE][+\-]?[0-9][0-9_]*)?[fFdD]?|[0-9][eE][+\-]?[0-9][0-9_]*[fFdD]?|[0-9]([eE][+\-]?[0-9][0-9_]*)?[fFdD]|0[xX]([0-9a-fA-F][0-9a-fA-F_]*\.?|([0-9a-fA-F][0-9a-fA-F_]*)?\.[0-9a-fA-F][0-9a-fA-F_]*)[pP][+\-]?[0-9][0-9_]*[fFdD]?">
<token type="LiteralNumberFloat" />
</rule>
<rule pattern="0|[1-9][0-9_]*?">
<token type="LiteralNumberInteger" />
</rule>
<!-- Multiline string Literal -->
<rule pattern="(&#39;&#39;&#39;\s*\n)(.|\n)*?(&#39;&#39;&#39;)(?=\s*&#59;)">
<token type="LiteralString" />
</rule>
<!-- string -->
<rule pattern="(?i:(\')).*?(?i:(\'))">
<token type="LiteralString" />
</rule>
<!-- string (Special case for Delphi Assembler)-->
<rule pattern="(?i:(&#34;)).*?(?i:(&#34;))">
<token type="LiteralString" />
</rule>
<!-- Simple Types -->
<rule pattern="\b(?!=\.)(?i:(NativeInt|NativeUInt|LongInt|LongWord|Integer|Int64|Cardinal|UInt64|ShortInt|SmallInt|FixedInt|Byte|Word|FixedUInt|Int8|Int16|Int32|UInt8|UInt16|UInt32|Real48|Single|Double|Real|Extended|Comp|Currency|Char|AnsiChar|WideChar|UCS2Char|UCS4Char|string|ShortString|AnsiString|UnicodeString|WideString|RawByteString|UTF8String|File|TextFile|Text|Boolean|ByteBool|WordBool|LongBool|Pointer|Variant|OleVariant))\b(?![&#60;\/(])">
<token type="KeywordType" />
</rule>
<!-- T Types -->
<rule pattern="\b(?!=\.)(?i:(TSingleRec|TDoubleRec|TExtended80Rec|TByteArray|TTextBuf|TVarRec|TWordArray))\b(?![&#60;\/(])">
<token type="KeywordType" />
</rule>
<!-- Pointer Types -->
<rule pattern="\b(?!=\.)(?i:(PChar|PAnsiChar|PWideChar|PRawByteString|PUnicodeString|PString|PAnsiString|PShortString|PTextBuf|PWideString|PByte|PShortInt|PWord|PSmallInt|PCardinal|PLongWord|PFixedUInt|PLongint|PFixedInt|PUInt64|PInt64|PNativeUInt|PNativeInt|PByteArray|PCurrency|PDouble|PExtended|PSingle|PInteger|POleVariant|PVarRec|PVariant|PWordArray|PBoolean|PWordBool|PLongBool|PPointer))\b(?![&#60;\/(])">
<token type="KeywordType" />
</rule>
<!-- Result -->
<rule pattern="\b(?!=\.)(?i:(Result))\b(?![&#60;\/(])">
<token type="GenericEmph" />
</rule>
<!-- Result Constants -->
<rule pattern="\b(?!=\.)(?i:(True|False))\b(?![&#60;\/(])">
<token type="NameConstant" />
</rule>
<!-- Operator (Assign) -->
<rule pattern="[(\:\=)]">
<token type="Operator" />
</rule>
<!-- Operators (Arithmetic, Unary Arithmetic, String, Pointer, Set, Relational, Address) -->
<rule pattern="[\+\-\*\/\^&#60;&#62;\=\@]">
<token type="Operator" />
</rule>
<!-- Operators (Arithmetic, Boolean, Logical (Bitwise), Set) -->
<rule pattern="\b(?i:([div][mod][not][and][or][xor][shl][shr][in]))\b">
<token type="OperatorWord" />
</rule>
<!-- Special Symbols (Escape, Literal Chr, Hex Value, Binary Numeral Expression Indicator) -->
<rule pattern="[&#38;\#\$\%]">
<token type="Operator" />
</rule>
<!-- Special Symbols (Punctuation) -->
<rule pattern="[\(\)\,\.\:\;\[\]]">
<token type="Punctuation" />
</rule>
<!-- Reserved Words -->
<rule pattern="\b(?!=\.)(?i:(and|end|interface|record|var|array|except|is|repeat|while|as|exports|label|resourcestring|with|asm|file|library|set|xor|begin|finalization|mod|shl|case|finally|nil|shr|class|for|not|string|const|function|object|then|constructor|goto|of|threadvar|destructor|if|or|to|dispinterface|implementation|packed|try|div|in|procedure|type|do|inherited|program|unit|downto|initialization|property|until|else|inline|raise|uses))\b(?![&#60;\/(])">
<token type="KeywordReserved" />
</rule>
<!-- Directives -->
<rule pattern="\b(?!=\.)(?i:(absolute|export|name|public|stdcall|abstract|external|published|strict|assembler|nodefault|read|stored|automated|final|operator|readonly|unsafe|cdecl|forward|out|reference|varargs|contains|helper|overload|register|virtual|default|implements|override|reintroduce|winapi|delayed|index|package|requires|write|deprecated|inline|pascal|writeonly|dispid|library|platform|safecall|dynamic|local|private|sealed|experimental|message|protected|static))\b(?![&#60;\/(])">
<token type="Keyword" />
</rule>
<!-- Directives obsolete -->
<rule pattern="\b(?!=\.)(?i:(near|far|resident))\b(?![&#60;\/(])">
<token type="Keyword" />
</rule>
<!-- Constant Expressions -->
<rule pattern="\b(?!=\.)(?i:(Abs|High|Low|Pred|Succ|Chr|Length|Odd|Round|Swap|Hi|Lo|Ord|SizeOf|Trunc))\b(?![&#60;\/(])">
<token type="KeywordConstant" />
</rule>
<!-- everything else -->
<rule pattern="([^\W\d]|\$)[\w$]*">
<token type="Text" />
</rule>
</state>
</rules>
</lexer>

363
lexers/testdata/objectpascal.actual vendored Normal file
View File

@ -0,0 +1,363 @@
{*******************************************************}
{ a totally senseless test unit }
{*******************************************************}
unit UnitLexerTest;
interface
uses
System.Classes,
System.SysUtils;
{$SCOPEDENUMS ON}
resourcestring
sEIsRunning = 'Cannot perform this operation when test is running.';
sENotRunning = 'Test not running.';
const
CUNICODE = '��';
CBUFFERSIZE = 1024;
CLITERALNUMBERFLOAT = 0.123;
CLITERALNUMBERHEX = $1F2A3C platform deprecated;
CInteger = 123;
CLo = $0A7640000;
CHi = $00DE0B6B3;
CEXTENDED: Extended = 1E18;
const
EmptyString: string = '';
NullString: PString = @EmptyString;
(*
This is
a multiline
comment block
*)
type
TState = (undefined = -1, hidden, showing);
TDataBuffer = array [0 .. CBUFFERSIZE - 1] of AnsiChar;
TFlag = (Default, Unicode);
TFlags = set of TFlag;
TIntSet = set of 0 .. SizeOf(Integer) * 8 - 1;
{ This is
another multiline
comment block }
WRec = packed record
case Integer of
0: (Lo, Hi: Byte);
1: (Bytes: array [0 .. 1] of Byte);
end;
{ Comment }
// Comment Single
const
cMultiLineString1 = '''
some text
and now '''
some more text
''';
const
cMultiLineString2 = '''''
some text
and now '''
some more text
''''';
var
LNativeInt: NativeInt;
LNativeUInt: NativeUInt;
LLongInt: LongInt;
LLongWord: LongWord;
LInteger: Integer;
LInt64: Int64;
LCardinal: Cardinal;
LUInt64: UInt64;
LShortInt: ShortInt;
LSmallInt: SmallInt;
LFixedInt: FixedInt;
LByte: Byte;
LWord: Word;
LFixedUInt: FixedUInt;
LInt8: Int8;
LInt16: Int16;
LInt32: Int32;
LUInt8: UInt8;
LUInt16: UInt16;
LUInt32: UInt32;
LReal48: Real48;
LSingle: Single;
LDouble: Double;
LReal: Real;
LExtended: Extended;
LComp: Comp;
LCurrency: Currency;
LChar: Char;
LAnsiChar: AnsiChar;
LWideChar: WideChar;
LUCS2Char: UCS2Char;
LUCS4Char: UCS4Char;
Lstring: string;
LShortString: ShortString;
LAnsiString: AnsiString;
LUnicodeString: UnicodeString;
LWideString: WideString;
LRawByteString: RawByteString;
LUTF8String: UTF8String;
LFile: File;
LTextFile: TextFile;
LText: Text;
LBoolean: Boolean;
LByteBool: ByteBool;
LWordBool: WordBool;
LLongBool: LongBool;
LPointer: Pointer;
LVariant: Variant;
LOleVariant: OleVariant;
var
LTSingleRec: TSingleRec; // deprecated
LTDoubleRec: TDoubleRec; // deprecated
LTExtended80Rec: TExtended80Rec;
LTByteArray: TByteArray;
LTTextBuf: TTextBuf;
LTVarRec: TVarRec;
LTWordArray: TWordArray;
var
LPChar: PChar;
LPAnsiChar: PAnsiChar;
LPWideChar: PWideChar;
LPRawByteString: PRawByteString;
LPUnicodeString: PUnicodeString;
LPString: PString;
LPAnsiString: PAnsiString;
LPShortString: PShortString;
LPTextBuf: PTextBuf;
LPWideString: PWideString;
LPByte: PByte;
LPShortInt: PShortInt;
LPWord: PWord;
LPSmallInt: PSmallInt;
LPCardinal: PCardinal;
LPLongWord: PLongWord;
LPFixedUInt: PFixedUInt;
LPLongint: PLongint;
LPFixedInt: PFixedInt;
LPUInt64: PUInt64;
LPInt64: PInt64;
LPNativeUInt: PNativeUInt;
LPNativeInt: PNativeInt;
LPByteArray: PByteArray;
LPCurrency: PCurrency;
LPDouble: PDouble;
LPExtended: PExtended;
LPSingle: PSingle;
LPInteger: PInteger;
LPOleVariant: POleVariant;
LPVarRec: PVarRec;
LPVariant: PVariant;
LPWordArray: PWordArray;
LPBoolean: PBoolean;
LPWordBool: PWordBool;
LPLongBool: PLongBool;
LPPointer: PPointer;
type
TLexerPath = class
strict private
const
CLexerFolder = '\LEXER';
{$IFDEF MSWINDOWS}
class function GetSystemDrivePath: string; static;
class function GetProgramFilesPath: string; static;
{$ENDIF}
class function GetTempPath: string; static;
public
{$IFDEF MSWINDOWS}
class property SystemDrivePath: string read GetSystemDrivePath;
class property ProgramFilesPath: string read GetProgramFilesPath;
{$ENDIF}
class property TempPath: string read GetTempPath;
end;
type
ILexerTest = interface
['{F2A3AC58-4CBD-4AFB-8ACC-5AA0DCB6E23E}']
function GetLexerHandle: THandle;
end;
[ComponentPlatformsAttribute(pfidWindows)]
TLexerTest = class(TComponent, ILexerTest)
private
FLexerHandle: THandle;
FTag: NativeInt;
procedure SetTag(const Value: NativeInt);
protected
function GetLexerHandle: THandle;
property LexerHandle: THandle read GetLexerHandle;
public
constructor Create(AOwner: TComponent); override;
destructor Destroy; override;
procedure Run;
published
property Tag: NativeInt read FTag write SetTag default 0;
end;
implementation
uses
System.IOUtils;
function AllocateLexer: THandle;
begin
Result := 1; // ???
end;
function GetResult(const AInputString: string): Boolean; // this is hurting
var
LResult: Boolean;
LInt: Integer;
LIntPtr: PInteger;
begin
Result := False;
var
Lstring: string := 'string';
var
LString2: string := '''a '' string''';
if string(Lstring + LString2).Equals(AInputString) then Exit(True)
else
begin
var
LRes: Integer := 1;
LResult := (AInputString <> ('some input' + LRes.ToString));
end;
if LResult then
begin
LInt := 66;
LIntPtr := @LInt;
Result := ((Round(1 + 2 - 0.45 * 7 / 10) > Ord(#10).ToExtended)) = True;
if Result then Result := (LIntPtr^ + PInteger(LInt)^) <> 13;
end;
Beep;
try
if @Result <> nil then
begin
Result := 1 = 2;
end;
finally
Beep;
end;
var
s: string := '<rule pattern="\b(?i:([div][mod][not][and][or][xor][shl][shr][in]))\b">';
s := s + ' ... this will just work, I''m sure';
for var res in [1, 2] do
begin
s := res.ToString + ' ' + s;
end;
var
start: Integer := 0;
var
&end: Integer := 9;
var
LChar: Char;
Beep;
asm
test %eax,%eax // fpc
mov %fs:(0x2c),%edx // fpc
mov $0x1000000,%eax // fpc assembler
DB 'a string...',0DH,0AH
DW 0FFFFH
DD 0FFFFFFFFH
CMP AL,"'"
JE @@fd3
@@fd1: CALL @@fd3
SUB EAX,7FFFH
@@fd3: MOV AL,[EBX]
MOV EAX, [LARGE $42]
MOV&LChar, 1
end;
end;
{ TLexerPath }
{$IFDEF MSWINDOWS}
class function TLexerPath.GetProgramFilesPath: string;
{$IFDEF WIN32}
const
CBACKUPPATH = '\Program Files (x86)';
{$ENDIF}
{$IFDEF WIN64}
const
CBACKUPPATH = '\Program Files';
{$ENDIF}
begin
{$IFDEF WIN32}
Result := GetEnvironmentVariable('ProgramFiles(x86)') + CLexerFolder;
if Result.IsEmpty then Result := GetSystemDrivePath + CBACKUPPATH + CLexerFolder;
{$ENDIF}
{$IFDEF WIN64}
Result := GetEnvironmentVariable('ProgramW6432') + CLexerFolder;
if Result.IsEmpty then Result := GetSystemDrivePath + CBACKUPPATH + CLexerFolder;
{$ENDIF}
end;
class function TLexerPath.GetSystemDrivePath: string;
const
CBACKUPPATH = 'C:';
begin
Result := GetEnvironmentVariable('SystemDrive');
if Result.IsEmpty then Result := CBACKUPPATH;
end;
{$ENDIF}
class function TLexerPath.GetTempPath: string;
begin
Result := System.IOUtils.TPath.GetTempPath + CLexerFolder;
end;
{ TLexerTest }
constructor TLexerTest.Create(AOwner: TComponent);
begin
inherited Create(nil);
FTag := 10;
end;
destructor TLexerTest.Destroy;
begin
inherited Destroy;
end;
function TLexerTest.GetLexerHandle: THandle;
begin
if FLexerHandle = 0 then FLexerHandle := AllocateLexer;
Result := FLexerHandle;
end;
procedure TLexerTest.Run;
begin
// DoIt
end;
procedure TLexerTest.SetTag(const Value: NativeInt);
begin
if FTag <> Value then FTag := Value;
end;
initialization
// Unit initialization code...
finalization
// Unit finalization code...
end.

2242
lexers/testdata/objectpascal.expected vendored Normal file

File diff suppressed because it is too large Load Diff