1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-03-17 20:58:08 +02:00

Split PHP into two lexers - PHP and PHTML.

The former is pure PHP code while the latter is PHP code in <? ?> tags,
within HTML.

Fixes #210.
This commit is contained in:
Alec Thomas 2020-06-30 20:56:49 +10:00
parent 11501493c9
commit 2b9ea60d89
10 changed files with 229 additions and 118 deletions

View File

@ -20,6 +20,11 @@ linters:
- wsl
- gomnd
- gocognit
- goerr113
- nolintlint
- testpackage
- godot
- nestif
linters-settings:
govet:

View File

@ -4,7 +4,7 @@ go:
- "1.13.x"
script:
- go test -v ./...
- curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | bash -s v1.22.2
- curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | bash -s 1.26.0
- ./bin/golangci-lint run
- git clean -fdx .
after_success:

View File

@ -1,15 +1,12 @@
package circular
import (
"strings"
. "github.com/alecthomas/chroma" // nolint
"github.com/alecthomas/chroma/lexers/h"
"github.com/alecthomas/chroma/lexers/internal"
)
// PHP lexer.
var PHP = internal.Register(DelegatingLexer(h.HTML, MustNewLexer(
// PHP lexer for pure PHP code (not embedded in HTML).
var PHP = internal.Register(MustNewLexer(
&Config{
Name: "PHP",
Aliases: []string{"php", "php3", "php4", "php5"},
@ -19,73 +16,65 @@ var PHP = internal.Register(DelegatingLexer(h.HTML, MustNewLexer(
CaseInsensitive: true,
EnsureNL: true,
},
Rules{
"root": {
{`<\?(php)?`, CommentPreproc, Push("php")},
{`[^<]+`, Other, nil},
{`<`, Other, nil},
},
"php": {
{`\?>`, CommentPreproc, Pop(1)},
{`(<<<)([\'"]?)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)(\2\n.*?\n\s*)(\3)(;?)(\n)`, ByGroups(LiteralString, LiteralString, LiteralStringDelimiter, LiteralString, LiteralStringDelimiter, Punctuation, Text), nil},
{`\s+`, Text, nil},
{`#.*?\n`, CommentSingle, nil},
{`//.*?\n`, CommentSingle, nil},
{`/\*\*/`, CommentMultiline, nil},
{`/\*\*.*?\*/`, LiteralStringDoc, nil},
{`/\*.*?\*/`, CommentMultiline, nil},
{`(->|::)(\s*)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Operator, Text, NameAttribute), nil},
{`[~!%^&*+=|:.<>/@-]+`, Operator, nil},
{`\?`, Operator, nil},
{`[\[\]{}();,]+`, Punctuation, nil},
{`(class)(\s+)`, ByGroups(Keyword, Text), Push("classname")},
{`(function)(\s*)(?=\()`, ByGroups(Keyword, Text), nil},
{`(function)(\s+)(&?)(\s*)`, ByGroups(Keyword, Text, Operator, Text), Push("functionname")},
{`(const)(\s+)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Keyword, Text, NameConstant), nil},
{`(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|FALSE|print|for|require|continue|foreach|require_once|declare|return|default|static|do|switch|die|stdClass|echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|virtual|endfor|include_once|while|endforeach|global|endif|list|endswitch|new|endwhile|not|array|E_ALL|NULL|final|php_user_filter|interface|implements|public|private|protected|abstract|clone|try|catch|throw|this|use|namespace|trait|yield|finally)\b`, Keyword, nil},
{`(true|false|null)\b`, KeywordConstant, nil},
Include("magicconstants"),
{`\$\{\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*\}`, NameVariable, nil},
{`\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameVariable, nil},
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameOther, nil},
{`(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?`, LiteralNumberFloat, nil},
{`\d+e[+-]?[0-9]+`, LiteralNumberFloat, nil},
{`0[0-7]+`, LiteralNumberOct, nil},
{`0x[a-f0-9]+`, LiteralNumberHex, nil},
{`\d+`, LiteralNumberInteger, nil},
{`0b[01]+`, LiteralNumberBin, nil},
{`'([^'\\]*(?:\\.[^'\\]*)*)'`, LiteralStringSingle, nil},
{"`([^`\\\\]*(?:\\\\.[^`\\\\]*)*)`", LiteralStringBacktick, nil},
{`"`, LiteralStringDouble, Push("string")},
},
"magicfuncs": {
{Words(``, `\b`, `__construct`, `__destruct`, `__call`, `__callStatic`, `__get`, `__set`, `__isset`, `__unset`, `__sleep`, `__wakeup`, `__toString`, `__invoke`, `__set_state`, `__clone`, `__debugInfo`), NameFunctionMagic, nil},
},
"magicconstants": {
{Words(``, `\b`, `__LINE__`, `__FILE__`, `__DIR__`, `__FUNCTION__`, `__CLASS__`, `__TRAIT__`, `__METHOD__`, `__NAMESPACE__`), NameConstant, nil},
},
"classname": {
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameClass, Pop(1)},
},
"functionname": {
Include("magicfuncs"),
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameFunction, Pop(1)},
Default(Pop(1)),
},
"string": {
{`"`, LiteralStringDouble, Pop(1)},
{`[^{$"\\]+`, LiteralStringDouble, nil},
{`\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})`, LiteralStringEscape, nil},
{`\$(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*(\[\S+?\]|->(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)?`, LiteralStringInterpol, nil},
{`(\{\$\{)(.*?)(\}\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil},
{`(\{)(\$.*?)(\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil},
{`(\$\{)(\S+)(\})`, ByGroups(LiteralStringInterpol, NameVariable, LiteralStringInterpol), nil},
{`[${\\]`, LiteralStringDouble, nil},
},
phpCommonRules.Rename("php", "root"),
))
var phpCommonRules = Rules{
"php": {
{`\?>`, CommentPreproc, Pop(1)},
{`(<<<)([\'"]?)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)(\2\n.*?\n\s*)(\3)(;?)(\n)`, ByGroups(LiteralString, LiteralString, LiteralStringDelimiter, LiteralString, LiteralStringDelimiter, Punctuation, Text), nil},
{`\s+`, Text, nil},
{`#.*?\n`, CommentSingle, nil},
{`//.*?\n`, CommentSingle, nil},
{`/\*\*/`, CommentMultiline, nil},
{`/\*\*.*?\*/`, LiteralStringDoc, nil},
{`/\*.*?\*/`, CommentMultiline, nil},
{`(->|::)(\s*)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Operator, Text, NameAttribute), nil},
{`[~!%^&*+=|:.<>/@-]+`, Operator, nil},
{`\?`, Operator, nil},
{`[\[\]{}();,]+`, Punctuation, nil},
{`(class)(\s+)`, ByGroups(Keyword, Text), Push("classname")},
{`(function)(\s*)(?=\()`, ByGroups(Keyword, Text), nil},
{`(function)(\s+)(&?)(\s*)`, ByGroups(Keyword, Text, Operator, Text), Push("functionname")},
{`(const)(\s+)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Keyword, Text, NameConstant), nil},
{`(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|FALSE|print|for|require|continue|foreach|require_once|declare|return|default|static|do|switch|die|stdClass|echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|virtual|endfor|include_once|while|endforeach|global|endif|list|endswitch|new|endwhile|not|array|E_ALL|NULL|final|php_user_filter|interface|implements|public|private|protected|abstract|clone|try|catch|throw|this|use|namespace|trait|yield|finally)\b`, Keyword, nil},
{`(true|false|null)\b`, KeywordConstant, nil},
Include("magicconstants"),
{`\$\{\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*\}`, NameVariable, nil},
{`\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameVariable, nil},
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameOther, nil},
{`(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?`, LiteralNumberFloat, nil},
{`\d+e[+-]?[0-9]+`, LiteralNumberFloat, nil},
{`0[0-7]+`, LiteralNumberOct, nil},
{`0x[a-f0-9]+`, LiteralNumberHex, nil},
{`\d+`, LiteralNumberInteger, nil},
{`0b[01]+`, LiteralNumberBin, nil},
{`'([^'\\]*(?:\\.[^'\\]*)*)'`, LiteralStringSingle, nil},
{"`([^`\\\\]*(?:\\\\.[^`\\\\]*)*)`", LiteralStringBacktick, nil},
{`"`, LiteralStringDouble, Push("string")},
},
).SetAnalyser(func(text string) float32 {
if strings.Contains(text, "<?php") {
return 0.5
}
return 0.0
})))
"magicfuncs": {
{Words(``, `\b`, `__construct`, `__destruct`, `__call`, `__callStatic`, `__get`, `__set`, `__isset`, `__unset`, `__sleep`, `__wakeup`, `__toString`, `__invoke`, `__set_state`, `__clone`, `__debugInfo`), NameFunctionMagic, nil},
},
"magicconstants": {
{Words(``, `\b`, `__LINE__`, `__FILE__`, `__DIR__`, `__FUNCTION__`, `__CLASS__`, `__TRAIT__`, `__METHOD__`, `__NAMESPACE__`), NameConstant, nil},
},
"classname": {
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameClass, Pop(1)},
},
"functionname": {
Include("magicfuncs"),
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameFunction, Pop(1)},
Default(Pop(1)),
},
"string": {
{`"`, LiteralStringDouble, Pop(1)},
{`[^{$"\\]+`, LiteralStringDouble, nil},
{`\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})`, LiteralStringEscape, nil},
{`\$(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*(\[\S+?\]|->(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)?`, LiteralStringInterpol, nil},
{`(\{\$\{)(.*?)(\}\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil},
{`(\{)(\$.*?)(\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil},
{`(\$\{)(\S+)(\})`, ByGroups(LiteralStringInterpol, NameVariable, LiteralStringInterpol), nil},
{`[${\\]`, LiteralStringDouble, nil},
},
}

34
lexers/circular/phtml.go Normal file
View File

@ -0,0 +1,34 @@
package circular
import (
"strings"
. "github.com/alecthomas/chroma" // nolint
"github.com/alecthomas/chroma/lexers/h"
"github.com/alecthomas/chroma/lexers/internal"
)
// PHTML lexer is PHP in HTML.
var PHTML = internal.Register(DelegatingLexer(h.HTML, MustNewLexer(
&Config{
Name: "PHTML",
Aliases: []string{"phtml"},
Filenames: []string{"*.phtml"},
MimeTypes: []string{"application/x-php", "application/x-httpd-php", "application/x-httpd-php3", "application/x-httpd-php4", "application/x-httpd-php5"},
DotAll: true,
CaseInsensitive: true,
EnsureNL: true,
},
Rules{
"root": {
{`<\?(php)?`, CommentPreproc, Push("php")},
{`[^<]+`, Other, nil},
{`<`, Other, nil},
},
}.Merge(phpCommonRules),
).SetAnalyser(func(text string) float32 {
if strings.Contains(text, "<?php") {
return 0.5
}
return 0.0
})))

View File

@ -73,7 +73,7 @@ func TestLexers(t *testing.T) {
if os.Getenv("RECORD") == "true" {
// Update the expected file with the generated output of this lexer
f, err := os.Create(expectedFilename)
defer f.Close()
defer f.Close() // nolint: gosec
assert.NoError(t, err)
assert.NoError(t, formatters.JSON.Format(f, nil, chroma.Literator(actual...)))
} else {

View File

@ -1,10 +1,3 @@
<!DOCTYPE html>
<html>
<body>
<h1>My first PHP page</h1>
<?php
$docs = $modx->getIterator('modResource', ["parent" => 84]);
foreach($docs as $doc){
@ -12,8 +5,5 @@ foreach($docs as $doc){
$doc->set("content", preg_replace("/Some value/i", "Replacement", $q));
print_r($doc->content);
// $doc->save();
}
}
// some comment
?>
</body>
</html>

View File

@ -1,24 +1,4 @@
[
{"type":"CommentPreproc","value":"\u003c!DOCTYPE html\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"html"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"body"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"h1"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"My first PHP page"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"h1"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"CommentPreproc","value":"\u003c?php"},
{"type":"Text","value":"\n\n"},
{"type":"NameVariable","value":"$docs"},
{"type":"Text","value":" "},
{"type":"Operator","value":"="},
@ -81,16 +61,6 @@
{"type":"Text","value":"\n "},
{"type":"CommentSingle","value":"// $doc-\u003esave();\n"},
{"type":"Punctuation","value":"}"},
{"type":"Text","value":" \n"},
{"type":"CommentSingle","value":"// some comment\n"},
{"type":"CommentPreproc","value":"?\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"body"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"html"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"}
{"type":"CommentSingle","value":"// some comment\n"}
]

19
lexers/testdata/phtml.actual vendored Normal file
View File

@ -0,0 +1,19 @@
<!DOCTYPE html>
<html>
<body>
<h1>My first PHP page</h1>
<?php
$docs = $modx->getIterator('modResource', ["parent" => 84]);
foreach($docs as $doc){
$q=$doc->content;
$doc->set("content", preg_replace("/Some value/i", "Replacement", $q));
print_r($doc->content);
// $doc->save();
}
// some comment
?>
</body>
</html>

96
lexers/testdata/phtml.expected vendored Normal file
View File

@ -0,0 +1,96 @@
[
{"type":"CommentPreproc","value":"\u003c!DOCTYPE html\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"html"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"body"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"h1"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"My first PHP page"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"h1"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"CommentPreproc","value":"\u003c?php"},
{"type":"Text","value":"\n\n"},
{"type":"NameVariable","value":"$docs"},
{"type":"Text","value":" "},
{"type":"Operator","value":"="},
{"type":"Text","value":" "},
{"type":"NameVariable","value":"$modx"},
{"type":"Operator","value":"-\u003e"},
{"type":"NameAttribute","value":"getIterator"},
{"type":"Punctuation","value":"("},
{"type":"LiteralStringSingle","value":"'modResource'"},
{"type":"Punctuation","value":","},
{"type":"Text","value":" "},
{"type":"Punctuation","value":"["},
{"type":"LiteralStringDouble","value":"\"parent\""},
{"type":"Text","value":" "},
{"type":"Operator","value":"=\u003e"},
{"type":"Text","value":" "},
{"type":"LiteralNumberInteger","value":"84"},
{"type":"Punctuation","value":"]);"},
{"type":"Text","value":"\n\n"},
{"type":"Keyword","value":"foreach"},
{"type":"Punctuation","value":"("},
{"type":"NameVariable","value":"$docs"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"as"},
{"type":"Text","value":" "},
{"type":"NameVariable","value":"$doc"},
{"type":"Punctuation","value":"){"},
{"type":"Text","value":"\n "},
{"type":"NameVariable","value":"$q"},
{"type":"Operator","value":"="},
{"type":"NameVariable","value":"$doc"},
{"type":"Operator","value":"-\u003e"},
{"type":"NameAttribute","value":"content"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n "},
{"type":"NameVariable","value":"$doc"},
{"type":"Operator","value":"-\u003e"},
{"type":"NameAttribute","value":"set"},
{"type":"Punctuation","value":"("},
{"type":"LiteralStringDouble","value":"\"content\""},
{"type":"Punctuation","value":","},
{"type":"Text","value":" "},
{"type":"NameOther","value":"preg_replace"},
{"type":"Punctuation","value":"("},
{"type":"LiteralStringDouble","value":"\"/Some value/i\""},
{"type":"Punctuation","value":","},
{"type":"Text","value":" "},
{"type":"LiteralStringDouble","value":"\"Replacement\""},
{"type":"Punctuation","value":","},
{"type":"Text","value":" "},
{"type":"NameVariable","value":"$q"},
{"type":"Punctuation","value":"));"},
{"type":"Text","value":"\n "},
{"type":"NameOther","value":"print_r"},
{"type":"Punctuation","value":"("},
{"type":"NameVariable","value":"$doc"},
{"type":"Operator","value":"-\u003e"},
{"type":"NameAttribute","value":"content"},
{"type":"Punctuation","value":");"},
{"type":"Text","value":"\n "},
{"type":"CommentSingle","value":"// $doc-\u003esave();\n"},
{"type":"Punctuation","value":"}"},
{"type":"Text","value":" \n"},
{"type":"CommentSingle","value":"// some comment\n"},
{"type":"CommentPreproc","value":"?\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"body"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"html"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"}
]

View File

@ -160,6 +160,14 @@ func Tokenise(lexer Lexer, options *TokeniseOptions, text string) ([]Token, erro
// Rules maps from state to a sequence of Rules.
type Rules map[string][]Rule
// Rename clones rules then a rule.
func (r Rules) Rename(old, new string) Rules {
r = r.Clone()
r[new] = r[old]
delete(r, old)
return r
}
// Clone returns a clone of the Rules.
func (r Rules) Clone() Rules {
out := map[string][]Rule{}