1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-11-25 22:32:32 +02:00

Split PHP into two lexers - PHP and PHTML.

The former is pure PHP code while the latter is PHP code in <? ?> tags,
within HTML.

Fixes #210.
This commit is contained in:
Alec Thomas
2020-06-30 20:56:49 +10:00
parent 11501493c9
commit 2b9ea60d89
10 changed files with 229 additions and 118 deletions

View File

@@ -20,6 +20,11 @@ linters:
- wsl - wsl
- gomnd - gomnd
- gocognit - gocognit
- goerr113
- nolintlint
- testpackage
- godot
- nestif
linters-settings: linters-settings:
govet: govet:

View File

@@ -4,7 +4,7 @@ go:
- "1.13.x" - "1.13.x"
script: script:
- go test -v ./... - go test -v ./...
- curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | bash -s v1.22.2 - curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | bash -s 1.26.0
- ./bin/golangci-lint run - ./bin/golangci-lint run
- git clean -fdx . - git clean -fdx .
after_success: after_success:

View File

@@ -1,15 +1,12 @@
package circular package circular
import ( import (
"strings"
. "github.com/alecthomas/chroma" // nolint . "github.com/alecthomas/chroma" // nolint
"github.com/alecthomas/chroma/lexers/h"
"github.com/alecthomas/chroma/lexers/internal" "github.com/alecthomas/chroma/lexers/internal"
) )
// PHP lexer. // PHP lexer for pure PHP code (not embedded in HTML).
var PHP = internal.Register(DelegatingLexer(h.HTML, MustNewLexer( var PHP = internal.Register(MustNewLexer(
&Config{ &Config{
Name: "PHP", Name: "PHP",
Aliases: []string{"php", "php3", "php4", "php5"}, Aliases: []string{"php", "php3", "php4", "php5"},
@@ -19,73 +16,65 @@ var PHP = internal.Register(DelegatingLexer(h.HTML, MustNewLexer(
CaseInsensitive: true, CaseInsensitive: true,
EnsureNL: true, EnsureNL: true,
}, },
Rules{ phpCommonRules.Rename("php", "root"),
"root": { ))
{`<\?(php)?`, CommentPreproc, Push("php")},
{`[^<]+`, Other, nil}, var phpCommonRules = Rules{
{`<`, Other, nil}, "php": {
}, {`\?>`, CommentPreproc, Pop(1)},
"php": { {`(<<<)([\'"]?)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)(\2\n.*?\n\s*)(\3)(;?)(\n)`, ByGroups(LiteralString, LiteralString, LiteralStringDelimiter, LiteralString, LiteralStringDelimiter, Punctuation, Text), nil},
{`\?>`, CommentPreproc, Pop(1)}, {`\s+`, Text, nil},
{`(<<<)([\'"]?)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)(\2\n.*?\n\s*)(\3)(;?)(\n)`, ByGroups(LiteralString, LiteralString, LiteralStringDelimiter, LiteralString, LiteralStringDelimiter, Punctuation, Text), nil}, {`#.*?\n`, CommentSingle, nil},
{`\s+`, Text, nil}, {`//.*?\n`, CommentSingle, nil},
{`#.*?\n`, CommentSingle, nil}, {`/\*\*/`, CommentMultiline, nil},
{`//.*?\n`, CommentSingle, nil}, {`/\*\*.*?\*/`, LiteralStringDoc, nil},
{`/\*\*/`, CommentMultiline, nil}, {`/\*.*?\*/`, CommentMultiline, nil},
{`/\*\*.*?\*/`, LiteralStringDoc, nil}, {`(->|::)(\s*)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Operator, Text, NameAttribute), nil},
{`/\*.*?\*/`, CommentMultiline, nil}, {`[~!%^&*+=|:.<>/@-]+`, Operator, nil},
{`(->|::)(\s*)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Operator, Text, NameAttribute), nil}, {`\?`, Operator, nil},
{`[~!%^&*+=|:.<>/@-]+`, Operator, nil}, {`[\[\]{}();,]+`, Punctuation, nil},
{`\?`, Operator, nil}, {`(class)(\s+)`, ByGroups(Keyword, Text), Push("classname")},
{`[\[\]{}();,]+`, Punctuation, nil}, {`(function)(\s*)(?=\()`, ByGroups(Keyword, Text), nil},
{`(class)(\s+)`, ByGroups(Keyword, Text), Push("classname")}, {`(function)(\s+)(&?)(\s*)`, ByGroups(Keyword, Text, Operator, Text), Push("functionname")},
{`(function)(\s*)(?=\()`, ByGroups(Keyword, Text), nil}, {`(const)(\s+)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Keyword, Text, NameConstant), nil},
{`(function)(\s+)(&?)(\s*)`, ByGroups(Keyword, Text, Operator, Text), Push("functionname")}, {`(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|FALSE|print|for|require|continue|foreach|require_once|declare|return|default|static|do|switch|die|stdClass|echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|virtual|endfor|include_once|while|endforeach|global|endif|list|endswitch|new|endwhile|not|array|E_ALL|NULL|final|php_user_filter|interface|implements|public|private|protected|abstract|clone|try|catch|throw|this|use|namespace|trait|yield|finally)\b`, Keyword, nil},
{`(const)(\s+)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Keyword, Text, NameConstant), nil}, {`(true|false|null)\b`, KeywordConstant, nil},
{`(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|FALSE|print|for|require|continue|foreach|require_once|declare|return|default|static|do|switch|die|stdClass|echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|virtual|endfor|include_once|while|endforeach|global|endif|list|endswitch|new|endwhile|not|array|E_ALL|NULL|final|php_user_filter|interface|implements|public|private|protected|abstract|clone|try|catch|throw|this|use|namespace|trait|yield|finally)\b`, Keyword, nil}, Include("magicconstants"),
{`(true|false|null)\b`, KeywordConstant, nil}, {`\$\{\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*\}`, NameVariable, nil},
Include("magicconstants"), {`\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameVariable, nil},
{`\$\{\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*\}`, NameVariable, nil}, {`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameOther, nil},
{`\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameVariable, nil}, {`(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?`, LiteralNumberFloat, nil},
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameOther, nil}, {`\d+e[+-]?[0-9]+`, LiteralNumberFloat, nil},
{`(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?`, LiteralNumberFloat, nil}, {`0[0-7]+`, LiteralNumberOct, nil},
{`\d+e[+-]?[0-9]+`, LiteralNumberFloat, nil}, {`0x[a-f0-9]+`, LiteralNumberHex, nil},
{`0[0-7]+`, LiteralNumberOct, nil}, {`\d+`, LiteralNumberInteger, nil},
{`0x[a-f0-9]+`, LiteralNumberHex, nil}, {`0b[01]+`, LiteralNumberBin, nil},
{`\d+`, LiteralNumberInteger, nil}, {`'([^'\\]*(?:\\.[^'\\]*)*)'`, LiteralStringSingle, nil},
{`0b[01]+`, LiteralNumberBin, nil}, {"`([^`\\\\]*(?:\\\\.[^`\\\\]*)*)`", LiteralStringBacktick, nil},
{`'([^'\\]*(?:\\.[^'\\]*)*)'`, LiteralStringSingle, nil}, {`"`, LiteralStringDouble, Push("string")},
{"`([^`\\\\]*(?:\\\\.[^`\\\\]*)*)`", LiteralStringBacktick, nil},
{`"`, LiteralStringDouble, Push("string")},
},
"magicfuncs": {
{Words(``, `\b`, `__construct`, `__destruct`, `__call`, `__callStatic`, `__get`, `__set`, `__isset`, `__unset`, `__sleep`, `__wakeup`, `__toString`, `__invoke`, `__set_state`, `__clone`, `__debugInfo`), NameFunctionMagic, nil},
},
"magicconstants": {
{Words(``, `\b`, `__LINE__`, `__FILE__`, `__DIR__`, `__FUNCTION__`, `__CLASS__`, `__TRAIT__`, `__METHOD__`, `__NAMESPACE__`), NameConstant, nil},
},
"classname": {
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameClass, Pop(1)},
},
"functionname": {
Include("magicfuncs"),
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameFunction, Pop(1)},
Default(Pop(1)),
},
"string": {
{`"`, LiteralStringDouble, Pop(1)},
{`[^{$"\\]+`, LiteralStringDouble, nil},
{`\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})`, LiteralStringEscape, nil},
{`\$(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*(\[\S+?\]|->(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)?`, LiteralStringInterpol, nil},
{`(\{\$\{)(.*?)(\}\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil},
{`(\{)(\$.*?)(\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil},
{`(\$\{)(\S+)(\})`, ByGroups(LiteralStringInterpol, NameVariable, LiteralStringInterpol), nil},
{`[${\\]`, LiteralStringDouble, nil},
},
}, },
).SetAnalyser(func(text string) float32 { "magicfuncs": {
if strings.Contains(text, "<?php") { {Words(``, `\b`, `__construct`, `__destruct`, `__call`, `__callStatic`, `__get`, `__set`, `__isset`, `__unset`, `__sleep`, `__wakeup`, `__toString`, `__invoke`, `__set_state`, `__clone`, `__debugInfo`), NameFunctionMagic, nil},
return 0.5 },
} "magicconstants": {
return 0.0 {Words(``, `\b`, `__LINE__`, `__FILE__`, `__DIR__`, `__FUNCTION__`, `__CLASS__`, `__TRAIT__`, `__METHOD__`, `__NAMESPACE__`), NameConstant, nil},
}))) },
"classname": {
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameClass, Pop(1)},
},
"functionname": {
Include("magicfuncs"),
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameFunction, Pop(1)},
Default(Pop(1)),
},
"string": {
{`"`, LiteralStringDouble, Pop(1)},
{`[^{$"\\]+`, LiteralStringDouble, nil},
{`\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})`, LiteralStringEscape, nil},
{`\$(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*(\[\S+?\]|->(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)?`, LiteralStringInterpol, nil},
{`(\{\$\{)(.*?)(\}\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil},
{`(\{)(\$.*?)(\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil},
{`(\$\{)(\S+)(\})`, ByGroups(LiteralStringInterpol, NameVariable, LiteralStringInterpol), nil},
{`[${\\]`, LiteralStringDouble, nil},
},
}

34
lexers/circular/phtml.go Normal file
View File

@@ -0,0 +1,34 @@
package circular
import (
"strings"
. "github.com/alecthomas/chroma" // nolint
"github.com/alecthomas/chroma/lexers/h"
"github.com/alecthomas/chroma/lexers/internal"
)
// PHTML lexer is PHP in HTML.
var PHTML = internal.Register(DelegatingLexer(h.HTML, MustNewLexer(
&Config{
Name: "PHTML",
Aliases: []string{"phtml"},
Filenames: []string{"*.phtml"},
MimeTypes: []string{"application/x-php", "application/x-httpd-php", "application/x-httpd-php3", "application/x-httpd-php4", "application/x-httpd-php5"},
DotAll: true,
CaseInsensitive: true,
EnsureNL: true,
},
Rules{
"root": {
{`<\?(php)?`, CommentPreproc, Push("php")},
{`[^<]+`, Other, nil},
{`<`, Other, nil},
},
}.Merge(phpCommonRules),
).SetAnalyser(func(text string) float32 {
if strings.Contains(text, "<?php") {
return 0.5
}
return 0.0
})))

View File

@@ -73,7 +73,7 @@ func TestLexers(t *testing.T) {
if os.Getenv("RECORD") == "true" { if os.Getenv("RECORD") == "true" {
// Update the expected file with the generated output of this lexer // Update the expected file with the generated output of this lexer
f, err := os.Create(expectedFilename) f, err := os.Create(expectedFilename)
defer f.Close() defer f.Close() // nolint: gosec
assert.NoError(t, err) assert.NoError(t, err)
assert.NoError(t, formatters.JSON.Format(f, nil, chroma.Literator(actual...))) assert.NoError(t, formatters.JSON.Format(f, nil, chroma.Literator(actual...)))
} else { } else {

View File

@@ -1,10 +1,3 @@
<!DOCTYPE html>
<html>
<body>
<h1>My first PHP page</h1>
<?php
$docs = $modx->getIterator('modResource', ["parent" => 84]); $docs = $modx->getIterator('modResource', ["parent" => 84]);
foreach($docs as $doc){ foreach($docs as $doc){
@@ -12,8 +5,5 @@ foreach($docs as $doc){
$doc->set("content", preg_replace("/Some value/i", "Replacement", $q)); $doc->set("content", preg_replace("/Some value/i", "Replacement", $q));
print_r($doc->content); print_r($doc->content);
// $doc->save(); // $doc->save();
} }
// some comment // some comment
?>
</body>
</html>

View File

@@ -1,24 +1,4 @@
[ [
{"type":"CommentPreproc","value":"\u003c!DOCTYPE html\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"html"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"body"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"h1"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"My first PHP page"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"h1"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"CommentPreproc","value":"\u003c?php"},
{"type":"Text","value":"\n\n"},
{"type":"NameVariable","value":"$docs"}, {"type":"NameVariable","value":"$docs"},
{"type":"Text","value":" "}, {"type":"Text","value":" "},
{"type":"Operator","value":"="}, {"type":"Operator","value":"="},
@@ -81,16 +61,6 @@
{"type":"Text","value":"\n "}, {"type":"Text","value":"\n "},
{"type":"CommentSingle","value":"// $doc-\u003esave();\n"}, {"type":"CommentSingle","value":"// $doc-\u003esave();\n"},
{"type":"Punctuation","value":"}"}, {"type":"Punctuation","value":"}"},
{"type":"Text","value":" \n"},
{"type":"CommentSingle","value":"// some comment\n"},
{"type":"CommentPreproc","value":"?\u003e"},
{"type":"Text","value":"\n"}, {"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c/"}, {"type":"CommentSingle","value":"// some comment\n"}
{"type":"NameTag","value":"body"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"html"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"}
] ]

19
lexers/testdata/phtml.actual vendored Normal file
View File

@@ -0,0 +1,19 @@
<!DOCTYPE html>
<html>
<body>
<h1>My first PHP page</h1>
<?php
$docs = $modx->getIterator('modResource', ["parent" => 84]);
foreach($docs as $doc){
$q=$doc->content;
$doc->set("content", preg_replace("/Some value/i", "Replacement", $q));
print_r($doc->content);
// $doc->save();
}
// some comment
?>
</body>
</html>

96
lexers/testdata/phtml.expected vendored Normal file
View File

@@ -0,0 +1,96 @@
[
{"type":"CommentPreproc","value":"\u003c!DOCTYPE html\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"html"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"body"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"h1"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"My first PHP page"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"h1"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"CommentPreproc","value":"\u003c?php"},
{"type":"Text","value":"\n\n"},
{"type":"NameVariable","value":"$docs"},
{"type":"Text","value":" "},
{"type":"Operator","value":"="},
{"type":"Text","value":" "},
{"type":"NameVariable","value":"$modx"},
{"type":"Operator","value":"-\u003e"},
{"type":"NameAttribute","value":"getIterator"},
{"type":"Punctuation","value":"("},
{"type":"LiteralStringSingle","value":"'modResource'"},
{"type":"Punctuation","value":","},
{"type":"Text","value":" "},
{"type":"Punctuation","value":"["},
{"type":"LiteralStringDouble","value":"\"parent\""},
{"type":"Text","value":" "},
{"type":"Operator","value":"=\u003e"},
{"type":"Text","value":" "},
{"type":"LiteralNumberInteger","value":"84"},
{"type":"Punctuation","value":"]);"},
{"type":"Text","value":"\n\n"},
{"type":"Keyword","value":"foreach"},
{"type":"Punctuation","value":"("},
{"type":"NameVariable","value":"$docs"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"as"},
{"type":"Text","value":" "},
{"type":"NameVariable","value":"$doc"},
{"type":"Punctuation","value":"){"},
{"type":"Text","value":"\n "},
{"type":"NameVariable","value":"$q"},
{"type":"Operator","value":"="},
{"type":"NameVariable","value":"$doc"},
{"type":"Operator","value":"-\u003e"},
{"type":"NameAttribute","value":"content"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n "},
{"type":"NameVariable","value":"$doc"},
{"type":"Operator","value":"-\u003e"},
{"type":"NameAttribute","value":"set"},
{"type":"Punctuation","value":"("},
{"type":"LiteralStringDouble","value":"\"content\""},
{"type":"Punctuation","value":","},
{"type":"Text","value":" "},
{"type":"NameOther","value":"preg_replace"},
{"type":"Punctuation","value":"("},
{"type":"LiteralStringDouble","value":"\"/Some value/i\""},
{"type":"Punctuation","value":","},
{"type":"Text","value":" "},
{"type":"LiteralStringDouble","value":"\"Replacement\""},
{"type":"Punctuation","value":","},
{"type":"Text","value":" "},
{"type":"NameVariable","value":"$q"},
{"type":"Punctuation","value":"));"},
{"type":"Text","value":"\n "},
{"type":"NameOther","value":"print_r"},
{"type":"Punctuation","value":"("},
{"type":"NameVariable","value":"$doc"},
{"type":"Operator","value":"-\u003e"},
{"type":"NameAttribute","value":"content"},
{"type":"Punctuation","value":");"},
{"type":"Text","value":"\n "},
{"type":"CommentSingle","value":"// $doc-\u003esave();\n"},
{"type":"Punctuation","value":"}"},
{"type":"Text","value":" \n"},
{"type":"CommentSingle","value":"// some comment\n"},
{"type":"CommentPreproc","value":"?\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"body"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"html"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"}
]

View File

@@ -160,6 +160,14 @@ func Tokenise(lexer Lexer, options *TokeniseOptions, text string) ([]Token, erro
// Rules maps from state to a sequence of Rules. // Rules maps from state to a sequence of Rules.
type Rules map[string][]Rule type Rules map[string][]Rule
// Rename clones rules then a rule.
func (r Rules) Rename(old, new string) Rules {
r = r.Clone()
r[new] = r[old]
delete(r, old)
return r
}
// Clone returns a clone of the Rules. // Clone returns a clone of the Rules.
func (r Rules) Clone() Rules { func (r Rules) Clone() Rules {
out := map[string][]Rule{} out := map[string][]Rule{}