From 2b9ea60d89edafe3fa89fd5443cb818f4b3be222 Mon Sep 17 00:00:00 2001 From: Alec Thomas Date: Tue, 30 Jun 2020 20:56:49 +1000 Subject: [PATCH] Split PHP into two lexers - PHP and PHTML. The former is pure PHP code while the latter is PHP code in tags, within HTML. Fixes #210. --- .golangci.yml | 5 ++ .travis.yml | 2 +- lexers/circular/php.go | 137 +++++++++++++++------------------ lexers/circular/phtml.go | 34 ++++++++ lexers/lexers_test.go | 2 +- lexers/testdata/php.actual | 12 +-- lexers/testdata/php.expected | 32 +------- lexers/testdata/phtml.actual | 19 +++++ lexers/testdata/phtml.expected | 96 +++++++++++++++++++++++ regexp.go | 8 ++ 10 files changed, 229 insertions(+), 118 deletions(-) create mode 100644 lexers/circular/phtml.go create mode 100644 lexers/testdata/phtml.actual create mode 100644 lexers/testdata/phtml.expected diff --git a/.golangci.yml b/.golangci.yml index b1e51f3..9a73537 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -20,6 +20,11 @@ linters: - wsl - gomnd - gocognit + - goerr113 + - nolintlint + - testpackage + - godot + - nestif linters-settings: govet: diff --git a/.travis.yml b/.travis.yml index 9216ec9..701c623 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,7 @@ go: - "1.13.x" script: - go test -v ./... - - curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | bash -s v1.22.2 + - curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | bash -s 1.26.0 - ./bin/golangci-lint run - git clean -fdx . after_success: diff --git a/lexers/circular/php.go b/lexers/circular/php.go index d7f8940..2107cb7 100644 --- a/lexers/circular/php.go +++ b/lexers/circular/php.go @@ -1,15 +1,12 @@ package circular import ( - "strings" - . "github.com/alecthomas/chroma" // nolint - "github.com/alecthomas/chroma/lexers/h" "github.com/alecthomas/chroma/lexers/internal" ) -// PHP lexer. -var PHP = internal.Register(DelegatingLexer(h.HTML, MustNewLexer( +// PHP lexer for pure PHP code (not embedded in HTML). +var PHP = internal.Register(MustNewLexer( &Config{ Name: "PHP", Aliases: []string{"php", "php3", "php4", "php5"}, @@ -19,73 +16,65 @@ var PHP = internal.Register(DelegatingLexer(h.HTML, MustNewLexer( CaseInsensitive: true, EnsureNL: true, }, - Rules{ - "root": { - {`<\?(php)?`, CommentPreproc, Push("php")}, - {`[^<]+`, Other, nil}, - {`<`, Other, nil}, - }, - "php": { - {`\?>`, CommentPreproc, Pop(1)}, - {`(<<<)([\'"]?)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)(\2\n.*?\n\s*)(\3)(;?)(\n)`, ByGroups(LiteralString, LiteralString, LiteralStringDelimiter, LiteralString, LiteralStringDelimiter, Punctuation, Text), nil}, - {`\s+`, Text, nil}, - {`#.*?\n`, CommentSingle, nil}, - {`//.*?\n`, CommentSingle, nil}, - {`/\*\*/`, CommentMultiline, nil}, - {`/\*\*.*?\*/`, LiteralStringDoc, nil}, - {`/\*.*?\*/`, CommentMultiline, nil}, - {`(->|::)(\s*)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Operator, Text, NameAttribute), nil}, - {`[~!%^&*+=|:.<>/@-]+`, Operator, nil}, - {`\?`, Operator, nil}, - {`[\[\]{}();,]+`, Punctuation, nil}, - {`(class)(\s+)`, ByGroups(Keyword, Text), Push("classname")}, - {`(function)(\s*)(?=\()`, ByGroups(Keyword, Text), nil}, - {`(function)(\s+)(&?)(\s*)`, ByGroups(Keyword, Text, Operator, Text), Push("functionname")}, - {`(const)(\s+)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Keyword, Text, NameConstant), nil}, - {`(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|FALSE|print|for|require|continue|foreach|require_once|declare|return|default|static|do|switch|die|stdClass|echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|virtual|endfor|include_once|while|endforeach|global|endif|list|endswitch|new|endwhile|not|array|E_ALL|NULL|final|php_user_filter|interface|implements|public|private|protected|abstract|clone|try|catch|throw|this|use|namespace|trait|yield|finally)\b`, Keyword, nil}, - {`(true|false|null)\b`, KeywordConstant, nil}, - Include("magicconstants"), - {`\$\{\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*\}`, NameVariable, nil}, - {`\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameVariable, nil}, - {`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameOther, nil}, - {`(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?`, LiteralNumberFloat, nil}, - {`\d+e[+-]?[0-9]+`, LiteralNumberFloat, nil}, - {`0[0-7]+`, LiteralNumberOct, nil}, - {`0x[a-f0-9]+`, LiteralNumberHex, nil}, - {`\d+`, LiteralNumberInteger, nil}, - {`0b[01]+`, LiteralNumberBin, nil}, - {`'([^'\\]*(?:\\.[^'\\]*)*)'`, LiteralStringSingle, nil}, - {"`([^`\\\\]*(?:\\\\.[^`\\\\]*)*)`", LiteralStringBacktick, nil}, - {`"`, LiteralStringDouble, Push("string")}, - }, - "magicfuncs": { - {Words(``, `\b`, `__construct`, `__destruct`, `__call`, `__callStatic`, `__get`, `__set`, `__isset`, `__unset`, `__sleep`, `__wakeup`, `__toString`, `__invoke`, `__set_state`, `__clone`, `__debugInfo`), NameFunctionMagic, nil}, - }, - "magicconstants": { - {Words(``, `\b`, `__LINE__`, `__FILE__`, `__DIR__`, `__FUNCTION__`, `__CLASS__`, `__TRAIT__`, `__METHOD__`, `__NAMESPACE__`), NameConstant, nil}, - }, - "classname": { - {`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameClass, Pop(1)}, - }, - "functionname": { - Include("magicfuncs"), - {`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameFunction, Pop(1)}, - Default(Pop(1)), - }, - "string": { - {`"`, LiteralStringDouble, Pop(1)}, - {`[^{$"\\]+`, LiteralStringDouble, nil}, - {`\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})`, LiteralStringEscape, nil}, - {`\$(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*(\[\S+?\]|->(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)?`, LiteralStringInterpol, nil}, - {`(\{\$\{)(.*?)(\}\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil}, - {`(\{)(\$.*?)(\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil}, - {`(\$\{)(\S+)(\})`, ByGroups(LiteralStringInterpol, NameVariable, LiteralStringInterpol), nil}, - {`[${\\]`, LiteralStringDouble, nil}, - }, + phpCommonRules.Rename("php", "root"), +)) + +var phpCommonRules = Rules{ + "php": { + {`\?>`, CommentPreproc, Pop(1)}, + {`(<<<)([\'"]?)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)(\2\n.*?\n\s*)(\3)(;?)(\n)`, ByGroups(LiteralString, LiteralString, LiteralStringDelimiter, LiteralString, LiteralStringDelimiter, Punctuation, Text), nil}, + {`\s+`, Text, nil}, + {`#.*?\n`, CommentSingle, nil}, + {`//.*?\n`, CommentSingle, nil}, + {`/\*\*/`, CommentMultiline, nil}, + {`/\*\*.*?\*/`, LiteralStringDoc, nil}, + {`/\*.*?\*/`, CommentMultiline, nil}, + {`(->|::)(\s*)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Operator, Text, NameAttribute), nil}, + {`[~!%^&*+=|:.<>/@-]+`, Operator, nil}, + {`\?`, Operator, nil}, + {`[\[\]{}();,]+`, Punctuation, nil}, + {`(class)(\s+)`, ByGroups(Keyword, Text), Push("classname")}, + {`(function)(\s*)(?=\()`, ByGroups(Keyword, Text), nil}, + {`(function)(\s+)(&?)(\s*)`, ByGroups(Keyword, Text, Operator, Text), Push("functionname")}, + {`(const)(\s+)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Keyword, Text, NameConstant), nil}, + {`(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|FALSE|print|for|require|continue|foreach|require_once|declare|return|default|static|do|switch|die|stdClass|echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|virtual|endfor|include_once|while|endforeach|global|endif|list|endswitch|new|endwhile|not|array|E_ALL|NULL|final|php_user_filter|interface|implements|public|private|protected|abstract|clone|try|catch|throw|this|use|namespace|trait|yield|finally)\b`, Keyword, nil}, + {`(true|false|null)\b`, KeywordConstant, nil}, + Include("magicconstants"), + {`\$\{\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*\}`, NameVariable, nil}, + {`\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameVariable, nil}, + {`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameOther, nil}, + {`(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?`, LiteralNumberFloat, nil}, + {`\d+e[+-]?[0-9]+`, LiteralNumberFloat, nil}, + {`0[0-7]+`, LiteralNumberOct, nil}, + {`0x[a-f0-9]+`, LiteralNumberHex, nil}, + {`\d+`, LiteralNumberInteger, nil}, + {`0b[01]+`, LiteralNumberBin, nil}, + {`'([^'\\]*(?:\\.[^'\\]*)*)'`, LiteralStringSingle, nil}, + {"`([^`\\\\]*(?:\\\\.[^`\\\\]*)*)`", LiteralStringBacktick, nil}, + {`"`, LiteralStringDouble, Push("string")}, }, -).SetAnalyser(func(text string) float32 { - if strings.Contains(text, "(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)?`, LiteralStringInterpol, nil}, + {`(\{\$\{)(.*?)(\}\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil}, + {`(\{)(\$.*?)(\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil}, + {`(\$\{)(\S+)(\})`, ByGroups(LiteralStringInterpol, NameVariable, LiteralStringInterpol), nil}, + {`[${\\]`, LiteralStringDouble, nil}, + }, +} diff --git a/lexers/circular/phtml.go b/lexers/circular/phtml.go new file mode 100644 index 0000000..b9bffd3 --- /dev/null +++ b/lexers/circular/phtml.go @@ -0,0 +1,34 @@ +package circular + +import ( + "strings" + + . "github.com/alecthomas/chroma" // nolint + "github.com/alecthomas/chroma/lexers/h" + "github.com/alecthomas/chroma/lexers/internal" +) + +// PHTML lexer is PHP in HTML. +var PHTML = internal.Register(DelegatingLexer(h.HTML, MustNewLexer( + &Config{ + Name: "PHTML", + Aliases: []string{"phtml"}, + Filenames: []string{"*.phtml"}, + MimeTypes: []string{"application/x-php", "application/x-httpd-php", "application/x-httpd-php3", "application/x-httpd-php4", "application/x-httpd-php5"}, + DotAll: true, + CaseInsensitive: true, + EnsureNL: true, + }, + Rules{ + "root": { + {`<\?(php)?`, CommentPreproc, Push("php")}, + {`[^<]+`, Other, nil}, + {`<`, Other, nil}, + }, + }.Merge(phpCommonRules), +).SetAnalyser(func(text string) float32 { + if strings.Contains(text, " - - - -

My first PHP page

-getIterator('modResource', ["parent" => 84]); foreach($docs as $doc){ @@ -12,8 +5,5 @@ foreach($docs as $doc){ $doc->set("content", preg_replace("/Some value/i", "Replacement", $q)); print_r($doc->content); // $doc->save(); -} +} // some comment -?> - - diff --git a/lexers/testdata/php.expected b/lexers/testdata/php.expected index 108a23b..0e0ed98 100644 --- a/lexers/testdata/php.expected +++ b/lexers/testdata/php.expected @@ -1,24 +1,4 @@ [ - {"type":"CommentPreproc","value":"\u003c!DOCTYPE html\u003e"}, - {"type":"Text","value":"\n"}, - {"type":"Punctuation","value":"\u003c"}, - {"type":"NameTag","value":"html"}, - {"type":"Punctuation","value":"\u003e"}, - {"type":"Text","value":"\n"}, - {"type":"Punctuation","value":"\u003c"}, - {"type":"NameTag","value":"body"}, - {"type":"Punctuation","value":"\u003e"}, - {"type":"Text","value":"\n\n"}, - {"type":"Punctuation","value":"\u003c"}, - {"type":"NameTag","value":"h1"}, - {"type":"Punctuation","value":"\u003e"}, - {"type":"Text","value":"My first PHP page"}, - {"type":"Punctuation","value":"\u003c/"}, - {"type":"NameTag","value":"h1"}, - {"type":"Punctuation","value":"\u003e"}, - {"type":"Text","value":"\n"}, - {"type":"CommentPreproc","value":"\u003c?php"}, - {"type":"Text","value":"\n\n"}, {"type":"NameVariable","value":"$docs"}, {"type":"Text","value":" "}, {"type":"Operator","value":"="}, @@ -81,16 +61,6 @@ {"type":"Text","value":"\n "}, {"type":"CommentSingle","value":"// $doc-\u003esave();\n"}, {"type":"Punctuation","value":"}"}, - {"type":"Text","value":" \n"}, - {"type":"CommentSingle","value":"// some comment\n"}, - {"type":"CommentPreproc","value":"?\u003e"}, {"type":"Text","value":"\n"}, - {"type":"Punctuation","value":"\u003c/"}, - {"type":"NameTag","value":"body"}, - {"type":"Punctuation","value":"\u003e"}, - {"type":"Text","value":"\n"}, - {"type":"Punctuation","value":"\u003c/"}, - {"type":"NameTag","value":"html"}, - {"type":"Punctuation","value":"\u003e"}, - {"type":"Text","value":"\n"} + {"type":"CommentSingle","value":"// some comment\n"} ] diff --git a/lexers/testdata/phtml.actual b/lexers/testdata/phtml.actual new file mode 100644 index 0000000..764de58 --- /dev/null +++ b/lexers/testdata/phtml.actual @@ -0,0 +1,19 @@ + + + + +

My first PHP page

+getIterator('modResource', ["parent" => 84]); + +foreach($docs as $doc){ + $q=$doc->content; + $doc->set("content", preg_replace("/Some value/i", "Replacement", $q)); + print_r($doc->content); + // $doc->save(); +} +// some comment +?> + + diff --git a/lexers/testdata/phtml.expected b/lexers/testdata/phtml.expected new file mode 100644 index 0000000..108a23b --- /dev/null +++ b/lexers/testdata/phtml.expected @@ -0,0 +1,96 @@ +[ + {"type":"CommentPreproc","value":"\u003c!DOCTYPE html\u003e"}, + {"type":"Text","value":"\n"}, + {"type":"Punctuation","value":"\u003c"}, + {"type":"NameTag","value":"html"}, + {"type":"Punctuation","value":"\u003e"}, + {"type":"Text","value":"\n"}, + {"type":"Punctuation","value":"\u003c"}, + {"type":"NameTag","value":"body"}, + {"type":"Punctuation","value":"\u003e"}, + {"type":"Text","value":"\n\n"}, + {"type":"Punctuation","value":"\u003c"}, + {"type":"NameTag","value":"h1"}, + {"type":"Punctuation","value":"\u003e"}, + {"type":"Text","value":"My first PHP page"}, + {"type":"Punctuation","value":"\u003c/"}, + {"type":"NameTag","value":"h1"}, + {"type":"Punctuation","value":"\u003e"}, + {"type":"Text","value":"\n"}, + {"type":"CommentPreproc","value":"\u003c?php"}, + {"type":"Text","value":"\n\n"}, + {"type":"NameVariable","value":"$docs"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":"="}, + {"type":"Text","value":" "}, + {"type":"NameVariable","value":"$modx"}, + {"type":"Operator","value":"-\u003e"}, + {"type":"NameAttribute","value":"getIterator"}, + {"type":"Punctuation","value":"("}, + {"type":"LiteralStringSingle","value":"'modResource'"}, + {"type":"Punctuation","value":","}, + {"type":"Text","value":" "}, + {"type":"Punctuation","value":"["}, + {"type":"LiteralStringDouble","value":"\"parent\""}, + {"type":"Text","value":" "}, + {"type":"Operator","value":"=\u003e"}, + {"type":"Text","value":" "}, + {"type":"LiteralNumberInteger","value":"84"}, + {"type":"Punctuation","value":"]);"}, + {"type":"Text","value":"\n\n"}, + {"type":"Keyword","value":"foreach"}, + {"type":"Punctuation","value":"("}, + {"type":"NameVariable","value":"$docs"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"as"}, + {"type":"Text","value":" "}, + {"type":"NameVariable","value":"$doc"}, + {"type":"Punctuation","value":"){"}, + {"type":"Text","value":"\n "}, + {"type":"NameVariable","value":"$q"}, + {"type":"Operator","value":"="}, + {"type":"NameVariable","value":"$doc"}, + {"type":"Operator","value":"-\u003e"}, + {"type":"NameAttribute","value":"content"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n "}, + {"type":"NameVariable","value":"$doc"}, + {"type":"Operator","value":"-\u003e"}, + {"type":"NameAttribute","value":"set"}, + {"type":"Punctuation","value":"("}, + {"type":"LiteralStringDouble","value":"\"content\""}, + {"type":"Punctuation","value":","}, + {"type":"Text","value":" "}, + {"type":"NameOther","value":"preg_replace"}, + {"type":"Punctuation","value":"("}, + {"type":"LiteralStringDouble","value":"\"/Some value/i\""}, + {"type":"Punctuation","value":","}, + {"type":"Text","value":" "}, + {"type":"LiteralStringDouble","value":"\"Replacement\""}, + {"type":"Punctuation","value":","}, + {"type":"Text","value":" "}, + {"type":"NameVariable","value":"$q"}, + {"type":"Punctuation","value":"));"}, + {"type":"Text","value":"\n "}, + {"type":"NameOther","value":"print_r"}, + {"type":"Punctuation","value":"("}, + {"type":"NameVariable","value":"$doc"}, + {"type":"Operator","value":"-\u003e"}, + {"type":"NameAttribute","value":"content"}, + {"type":"Punctuation","value":");"}, + {"type":"Text","value":"\n "}, + {"type":"CommentSingle","value":"// $doc-\u003esave();\n"}, + {"type":"Punctuation","value":"}"}, + {"type":"Text","value":" \n"}, + {"type":"CommentSingle","value":"// some comment\n"}, + {"type":"CommentPreproc","value":"?\u003e"}, + {"type":"Text","value":"\n"}, + {"type":"Punctuation","value":"\u003c/"}, + {"type":"NameTag","value":"body"}, + {"type":"Punctuation","value":"\u003e"}, + {"type":"Text","value":"\n"}, + {"type":"Punctuation","value":"\u003c/"}, + {"type":"NameTag","value":"html"}, + {"type":"Punctuation","value":"\u003e"}, + {"type":"Text","value":"\n"} +] diff --git a/regexp.go b/regexp.go index 96bcce6..590bcab 100644 --- a/regexp.go +++ b/regexp.go @@ -160,6 +160,14 @@ func Tokenise(lexer Lexer, options *TokeniseOptions, text string) ([]Token, erro // Rules maps from state to a sequence of Rules. type Rules map[string][]Rule +// Rename clones rules then a rule. +func (r Rules) Rename(old, new string) Rules { + r = r.Clone() + r[new] = r[old] + delete(r, old) + return r +} + // Clone returns a clone of the Rules. func (r Rules) Clone() Rules { out := map[string][]Rule{}