1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-01-12 01:22:30 +02:00

Fix bug with nested newlines.

Fixes #124.

Also reinstitute lexer tests that disappeared during package split.
This commit is contained in:
Alec Thomas 2018-03-03 10:16:21 +11:00
parent aaa96c6984
commit 3020e2ea8c
18 changed files with 99 additions and 73 deletions

View File

@ -85,7 +85,7 @@ def resolve_emitter(emitter):
name = args.__name__
if name.endswith('Lexer'):
name = name[:-5]
emitter = 'Using(%s, nil)' % name
emitter = 'Using(%s)' % name
else:
raise ValueError('only support "using" with lexer classes, not %r' % args)
else:

View File

@ -84,7 +84,7 @@ func TestClassPrefix(t *testing.T) {
for st := range chroma.StandardTypes {
if noPrefix.class(st) == "" {
if got := withPrefix.class(st); got != "" {
t.Errorf("Formatter.class(%v): prefix shouldn't be added to empty classes")
t.Errorf("Formatter.class(%v): prefix shouldn't be added to empty classes", st)
}
} else if got := withPrefix.class(st); !strings.HasPrefix(got, wantPrefix) {
t.Errorf("Formatter.class(%v): %q should have a class prefix", st, got)

View File

@ -75,6 +75,8 @@ func (t *Token) Clone() *Token {
type TokeniseOptions struct {
// State to start tokenisation in. Defaults to "root".
State string
// Nested tokenisation.
Nested bool
}
// A Lexer for tokenising source code.

View File

@ -20,9 +20,9 @@ var Cheetah = internal.Register(MustNewLexer(
{`#[*](.|\n)*?[*]#`, Comment, nil},
{`#end[^#\n]*(?:#|$)`, CommentPreproc, nil},
{`#slurp$`, CommentPreproc, nil},
{`(#[a-zA-Z]+)([^#\n]*)(#|$)`, ByGroups(CommentPreproc, Using(Python, nil), CommentPreproc), nil},
{`(\$)([a-zA-Z_][\w.]*\w)`, ByGroups(CommentPreproc, Using(Python, nil)), nil},
{`(\$\{!?)(.*?)(\})(?s)`, ByGroups(CommentPreproc, Using(Python, nil), CommentPreproc), nil},
{`(#[a-zA-Z]+)([^#\n]*)(#|$)`, ByGroups(CommentPreproc, Using(Python), CommentPreproc), nil},
{`(\$)([a-zA-Z_][\w.]*\w)`, ByGroups(CommentPreproc, Using(Python)), nil},
{`(\$\{!?)(.*?)(\})(?s)`, ByGroups(CommentPreproc, Using(Python), CommentPreproc), nil},
{`(?sx)
(.+?) # anything, followed by:
(?:

View File

@ -21,7 +21,7 @@ var Docker = internal.Register(MustNewLexer(
{`^((?:FROM|MAINTAINER|CMD|EXPOSE|ENV|ADD|ENTRYPOINT|VOLUME|WORKDIR))\b(.*)`, ByGroups(Keyword, LiteralString), nil},
{`#.*`, Comment, nil},
{`RUN`, Keyword, nil},
{`(.*\\\n)*.+`, Using(Bash, nil), nil},
{`(.*\\\n)*.+`, Using(Bash), nil},
},
},
))

View File

@ -24,13 +24,13 @@ var GenshiText = internal.Register(MustNewLexer(
},
"directive": {
{`\n`, Text, Pop(1)},
{`(?:def|for|if)\s+.*`, Using(Python, nil), Pop(1)},
{`(choose|when|with)([^\S\n]+)(.*)`, ByGroups(Keyword, Text, Using(Python, nil)), Pop(1)},
{`(?:def|for|if)\s+.*`, Using(Python), Pop(1)},
{`(choose|when|with)([^\S\n]+)(.*)`, ByGroups(Keyword, Text, Using(Python)), Pop(1)},
{`(choose|otherwise)\b`, Keyword, Pop(1)},
{`(end\w*)([^\S\n]*)(.*)`, ByGroups(Keyword, Text, Comment), Pop(1)},
},
"variable": {
{`(?<!\$)(\$\{)(.+?)(\})`, ByGroups(CommentPreproc, Using(Python, nil), CommentPreproc), nil},
{`(?<!\$)(\$\{)(.+?)(\})`, ByGroups(CommentPreproc, Using(Python), CommentPreproc), nil},
{`(?<!\$)(\$)([a-zA-Z_][\w.]*)`, NameVariable, nil},
},
},
@ -65,7 +65,7 @@ var Genshi = internal.Register(MustNewLexer(
var genshiMarkupRules = Rules{
"root": {
{`[^<$]+`, Other, nil},
{`(<\?python)(.*?)(\?>)`, ByGroups(CommentPreproc, Using(Python, nil), CommentPreproc), nil},
{`(<\?python)(.*?)(\?>)`, ByGroups(CommentPreproc, Using(Python), CommentPreproc), nil},
{`<\s*(script|style)\s*.*?>.*?<\s*/\1\s*>`, Other, nil},
{`<\s*py:[a-zA-Z0-9]+`, NameTag, Push("pytag")},
{`<\s*[a-zA-Z0-9:.]+`, NameTag, Push("tag")},
@ -78,8 +78,8 @@ var genshiMarkupRules = Rules{
{`/?\s*>`, NameTag, Pop(1)},
},
"pyattr": {
{`(")(.*?)(")`, ByGroups(LiteralString, Using(Python, nil), LiteralString), Pop(1)},
{`(')(.*?)(')`, ByGroups(LiteralString, Using(Python, nil), LiteralString), Pop(1)},
{`(")(.*?)(")`, ByGroups(LiteralString, Using(Python), LiteralString), Pop(1)},
{`(')(.*?)(')`, ByGroups(LiteralString, Using(Python), LiteralString), Pop(1)},
{`[^\s>]+`, LiteralString, Pop(1)},
},
"tag": {
@ -108,7 +108,7 @@ var genshiMarkupRules = Rules{
Include("variable"),
},
"variable": {
{`(?<!\$)(\$\{)(.+?)(\})`, ByGroups(CommentPreproc, Using(Python, nil), CommentPreproc), nil},
{`(?<!\$)(\$\{)(.+?)(\})`, ByGroups(CommentPreproc, Using(Python), CommentPreproc), nil},
{`(?<!\$)(\$)([a-zA-Z_][\w\.]*)`, NameVariable, nil},
},
}

View File

@ -44,11 +44,11 @@ var HTML = internal.Register(MustNewLexer(
},
"script-content": {
{`(<)(\s*)(/)(\s*)(script)(\s*)(>)`, ByGroups(Punctuation, Text, Punctuation, Text, NameTag, Text, Punctuation), Pop(1)},
{`.+?(?=<\s*/\s*script\s*>)`, Using(Javascript, nil), nil},
{`.+?(?=<\s*/\s*script\s*>)`, Using(Javascript), nil},
},
"style-content": {
{`(<)(\s*)(/)(\s*)(style)(\s*)(>)`, ByGroups(Punctuation, Text, Punctuation, Text, NameTag, Text, Punctuation), Pop(1)},
{`.+?(?=<\s*/\s*style\s*>)`, Using(CSS, nil), nil},
{`.+?(?=<\s*/\s*style\s*>)`, Using(CSS), nil},
},
"attr": {
{`".*?"`, LiteralString, Pop(1)},

View File

@ -1,10 +1,17 @@
package lexers_test
import (
"encoding/json"
"io/ioutil"
"os"
"path/filepath"
"strings"
"testing"
"github.com/stretchr/testify/require"
"github.com/alecthomas/assert"
"github.com/alecthomas/chroma"
"github.com/alecthomas/chroma/formatters"
"github.com/alecthomas/chroma/lexers"
@ -33,3 +40,41 @@ func TestGet(t *testing.T) {
assert.Equal(t, lexers.Get("svg"), x.XML)
})
}
// Test source files are in the form <key>.<key> and validation data is in the form <key>.<key>.expected.
func TestLexers(t *testing.T) {
files, err := ioutil.ReadDir("testdata")
require.NoError(t, err)
for _, file := range files {
ext := filepath.Ext(file.Name())[1:]
if ext != "actual" {
continue
}
lexer := lexers.Get(strings.TrimSuffix(file.Name(), filepath.Ext(file.Name())))
assert.NotNil(t, lexer)
filename := filepath.Join("testdata", file.Name())
expectedFilename := strings.TrimSuffix(filename, filepath.Ext(filename)) + ".expected"
lexer = chroma.Coalesce(lexer)
t.Run(lexer.Config().Name, func(t *testing.T) {
// Read and tokenise source text.
actualText, err := ioutil.ReadFile(filename)
assert.NoError(t, err)
actual, err := chroma.Tokenise(lexer, nil, string(actualText))
assert.NoError(t, err)
// Read expected JSON into token slice.
expected := []*chroma.Token{}
r, err := os.Open(expectedFilename)
assert.NoError(t, err)
err = json.NewDecoder(r).Decode(&expected)
assert.NoError(t, err)
// Equal?
assert.Equal(t, expected, actual)
})
}
}

View File

@ -17,13 +17,13 @@ var Makefile = internal.Register(MustNewLexer(
},
Rules{
"root": {
{`^(?:[\t ]+.*\n|\n)+`, Using(Bash, nil), nil},
{`^(?:[\t ]+.*\n|\n)+`, Using(Bash), nil},
{`\$[<@$+%?|*]`, Keyword, nil},
{`\s+`, Text, nil},
{`#.*?\n`, Comment, nil},
{`(export)(\s+)(?=[\w${}\t -]+\n)`, ByGroups(Keyword, Text), Push("export")},
{`export\s+`, Keyword, nil},
{`([\w${}().-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)`, ByGroups(NameVariable, Text, Operator, Text, Using(Bash, nil)), nil},
{`([\w${}().-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)`, ByGroups(NameVariable, Text, Operator, Text, Using(Bash)), nil},
{`(?s)"(\\\\|\\.|[^"\\])*"`, LiteralStringDouble, nil},
{`(?s)'(\\\\|\\.|[^'\\])*'`, LiteralStringSingle, nil},
{`([^\n:]+)(:+)([ \t]*)`, ByGroups(NameFunction, Operator, Text), Push("block-header")},

View File

@ -17,14 +17,14 @@ var Mako = internal.Register(MustNewLexer(
Rules{
"root": {
{`(\s*)(%)(\s*end(?:\w+))(\n|\Z)`, ByGroups(Text, CommentPreproc, Keyword, Other), nil},
{`(\s*)(%)([^\n]*)(\n|\Z)`, ByGroups(Text, CommentPreproc, Using(Python, nil), Other), nil},
{`(\s*)(%)([^\n]*)(\n|\Z)`, ByGroups(Text, CommentPreproc, Using(Python), Other), nil},
{`(\s*)(##[^\n]*)(\n|\Z)`, ByGroups(Text, CommentPreproc, Other), nil},
{`(?s)<%doc>.*?</%doc>`, CommentPreproc, nil},
{`(<%)([\w.:]+)`, ByGroups(CommentPreproc, NameBuiltin), Push("tag")},
{`(</%)([\w.:]+)(>)`, ByGroups(CommentPreproc, NameBuiltin, CommentPreproc), nil},
{`<%(?=([\w.:]+))`, CommentPreproc, Push("ondeftags")},
{`(<%(?:!?))(.*?)(%>)(?s)`, ByGroups(CommentPreproc, Using(Python, nil), CommentPreproc), nil},
{`(\$\{)(.*?)(\})`, ByGroups(CommentPreproc, Using(Python, nil), CommentPreproc), nil},
{`(<%(?:!?))(.*?)(%>)(?s)`, ByGroups(CommentPreproc, Using(Python), CommentPreproc), nil},
{`(\$\{)(.*?)(\})`, ByGroups(CommentPreproc, Using(Python), CommentPreproc), nil},
{`(?sx)
(.+?) # anything, followed by:
(?:

View File

@ -21,13 +21,13 @@ var Mason = internal.Register(MustNewLexer(
{`\s+`, Text, nil},
{`(<%doc>)(.*?)(</%doc>)(?s)`, ByGroups(NameTag, CommentMultiline, NameTag), nil},
{`(<%(?:def|method))(\s*)(.*?)(>)(.*?)(</%\2\s*>)(?s)`, ByGroups(NameTag, Text, NameFunction, NameTag, UsingSelf("root"), NameTag), nil},
{`(<%\w+)(.*?)(>)(.*?)(</%\2\s*>)(?s)`, ByGroups(NameTag, NameFunction, NameTag, Using(Perl, nil), NameTag), nil},
{`(<&[^|])(.*?)(,.*?)?(&>)(?s)`, ByGroups(NameTag, NameFunction, Using(Perl, nil), NameTag), nil},
{`(<&\|)(.*?)(,.*?)?(&>)(?s)`, ByGroups(NameTag, NameFunction, Using(Perl, nil), NameTag), nil},
{`(<%\w+)(.*?)(>)(.*?)(</%\2\s*>)(?s)`, ByGroups(NameTag, NameFunction, NameTag, Using(Perl), NameTag), nil},
{`(<&[^|])(.*?)(,.*?)?(&>)(?s)`, ByGroups(NameTag, NameFunction, Using(Perl), NameTag), nil},
{`(<&\|)(.*?)(,.*?)?(&>)(?s)`, ByGroups(NameTag, NameFunction, Using(Perl), NameTag), nil},
{`</&>`, NameTag, nil},
{`(<%!?)(.*?)(%>)(?s)`, ByGroups(NameTag, Using(Perl, nil), NameTag), nil},
{`(<%!?)(.*?)(%>)(?s)`, ByGroups(NameTag, Using(Perl), NameTag), nil},
{`(?<=^)#[^\n]*(\n|\Z)`, Comment, nil},
{`(?<=^)(%)([^\n]*)(\n|\Z)`, ByGroups(NameTag, Using(Perl, nil), Other), nil},
{`(?<=^)(%)([^\n]*)(\n|\Z)`, ByGroups(NameTag, Using(Perl), Other), nil},
{`(?sx)
(.+?) # anything, followed by:
(?:
@ -37,7 +37,7 @@ var Mason = internal.Register(MustNewLexer(
# - don't consume
(\\\n) | # an escaped newline
\Z # end of string
)`, ByGroups(Using(HTML, nil), Operator), nil},
)`, ByGroups(Using(HTML), Operator), nil},
},
},
))

View File

@ -18,13 +18,13 @@ var Myghty = internal.Register(MustNewLexer(
"root": {
{`\s+`, Text, nil},
{`(<%(?:def|method))(\s*)(.*?)(>)(.*?)(</%\2\s*>)(?s)`, ByGroups(NameTag, Text, NameFunction, NameTag, UsingSelf("root"), NameTag), nil},
{`(<%\w+)(.*?)(>)(.*?)(</%\2\s*>)(?s)`, ByGroups(NameTag, NameFunction, NameTag, Using(Python, nil), NameTag), nil},
{`(<&[^|])(.*?)(,.*?)?(&>)`, ByGroups(NameTag, NameFunction, Using(Python, nil), NameTag), nil},
{`(<&\|)(.*?)(,.*?)?(&>)(?s)`, ByGroups(NameTag, NameFunction, Using(Python, nil), NameTag), nil},
{`(<%\w+)(.*?)(>)(.*?)(</%\2\s*>)(?s)`, ByGroups(NameTag, NameFunction, NameTag, Using(Python), NameTag), nil},
{`(<&[^|])(.*?)(,.*?)?(&>)`, ByGroups(NameTag, NameFunction, Using(Python), NameTag), nil},
{`(<&\|)(.*?)(,.*?)?(&>)(?s)`, ByGroups(NameTag, NameFunction, Using(Python), NameTag), nil},
{`</&>`, NameTag, nil},
{`(<%!?)(.*?)(%>)(?s)`, ByGroups(NameTag, Using(Python, nil), NameTag), nil},
{`(<%!?)(.*?)(%>)(?s)`, ByGroups(NameTag, Using(Python), NameTag), nil},
{`(?<=^)#[^\n]*(\n|\Z)`, Comment, nil},
{`(?<=^)(%)([^\n]*)(\n|\Z)`, ByGroups(NameTag, Using(Python, nil), Other), nil},
{`(?<=^)(%)([^\n]*)(\n|\Z)`, ByGroups(NameTag, Using(Python), Other), nil},
{`(?sx)
(.+?) # anything, followed by:
(?:

View File

@ -19,7 +19,7 @@ var Smarty = internal.Register(MustNewLexer(
"root": {
{`[^{]+`, Other, nil},
{`(\{)(\*.*?\*)(\})`, ByGroups(CommentPreproc, Comment, CommentPreproc), nil},
{`(\{php\})(.*?)(\{/php\})`, ByGroups(CommentPreproc, Using(PHP, nil), CommentPreproc), nil},
{`(\{php\})(.*?)(\{/php\})`, ByGroups(CommentPreproc, Using(PHP), CommentPreproc), nil},
{`(\{)(/?[a-zA-Z_]\w*)(\s*)`, ByGroups(CommentPreproc, NameFunction, Text), Push("smarty")},
{`\{`, CommentPreproc, Push("smarty")},
},

View File

@ -42,9 +42,9 @@ var Typoscript = internal.Register(MustNewLexer(
{`\s+`, Text, nil},
},
"html": {
{`<\S[^\n>]*>`, Using(TypoScriptHTMLData, nil), nil},
{`<\S[^\n>]*>`, Using(TypoScriptHTMLData), nil},
{`&[^;\n]*;`, LiteralString, nil},
{`(_CSS_DEFAULT_STYLE)(\s*)(\()(?s)(.*(?=\n\)))`, ByGroups(NameClass, Text, LiteralStringSymbol, Using(TypoScriptCSSData, nil)), nil},
{`(_CSS_DEFAULT_STYLE)(\s*)(\()(?s)(.*(?=\n\)))`, ByGroups(NameClass, Text, LiteralStringSymbol, Using(TypoScriptCSSData)), nil},
},
"literal": {
{`0x[0-9A-Fa-f]+t?`, LiteralNumberHex, nil},

View File

@ -1,3 +1,3 @@
template <typename T>
void func(const std::string &s, const T &t) {
} // Do interesting things.
int main() {
return 0;
}

View File

@ -1,37 +1,16 @@
[
{"type":"Keyword","value":"template"},
{"type":"KeywordType","value":"int"},
{"type":"Text","value":" "},
{"type":"Operator","value":"\u003c"},
{"type":"Keyword","value":"typename"},
{"type":"Text","value":" "},
{"type":"Name","value":"T"},
{"type":"Operator","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"KeywordType","value":"void"},
{"type":"Text","value":" "},
{"type":"Name","value":"func"},
{"type":"Punctuation","value":"("},
{"type":"Keyword","value":"const"},
{"type":"Text","value":" "},
{"type":"Name","value":"std"},
{"type":"Operator","value":"::"},
{"type":"Name","value":"string"},
{"type":"Text","value":" "},
{"type":"Operator","value":"\u0026"},
{"type":"Name","value":"s"},
{"type":"Punctuation","value":","},
{"type":"Text","value":" "},
{"type":"Keyword","value":"const"},
{"type":"Text","value":" "},
{"type":"Name","value":"T"},
{"type":"Text","value":" "},
{"type":"Operator","value":"\u0026"},
{"type":"Name","value":"t"},
{"type":"Punctuation","value":")"},
{"type":"NameFunction","value":"main"},
{"type":"Punctuation","value":"()"},
{"type":"Text","value":" "},
{"type":"Punctuation","value":"{"},
{"type":"Text","value":"\n "},
{"type":"Keyword","value":"return"},
{"type":"Text","value":" "},
{"type":"LiteralNumberInteger","value":"0"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"}"},
{"type":"Text","value":" "},
{"type":"CommentSingle","value":"// Do interesting things.\n"}
{"type":"Text","value":"\n"}
]

View File

@ -16,8 +16,8 @@ var Viml = internal.Register(MustNewLexer(
},
Rules{
"root": {
{`^([ \t:]*)(py(?:t(?:h(?:o(?:n)?)?)?)?)([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)`, ByGroups(UsingSelf("root"), Keyword, Text, Operator, Text, Text, Using(Python, nil), Text), nil},
{`^([ \t:]*)(py(?:t(?:h(?:o(?:n)?)?)?)?)([ \t])(.*)`, ByGroups(UsingSelf("root"), Keyword, Text, Using(Python, nil)), nil},
{`^([ \t:]*)(py(?:t(?:h(?:o(?:n)?)?)?)?)([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)`, ByGroups(UsingSelf("root"), Keyword, Text, Operator, Text, Text, Using(Python), Text), nil},
{`^([ \t:]*)(py(?:t(?:h(?:o(?:n)?)?)?)?)([ \t])(.*)`, ByGroups(UsingSelf("root"), Keyword, Text, Using(Python)), nil},
{`^\s*".*`, Comment, nil},
{`[ \t]+`, Text, nil},
{`/(\\\\|\\/|[^\n/])*/`, LiteralStringRegex, nil},

View File

@ -42,9 +42,9 @@ func ByGroups(emitters ...Emitter) Emitter {
}
// Using returns an Emitter that uses a given Lexer for parsing and emitting.
func Using(lexer Lexer, options *TokeniseOptions) Emitter {
func Using(lexer Lexer) Emitter {
return EmitterFunc(func(groups []string, _ Lexer) Iterator {
it, err := lexer.Tokenise(options, groups[0])
it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
if err != nil {
panic(err)
}
@ -55,7 +55,7 @@ func Using(lexer Lexer, options *TokeniseOptions) Emitter {
// UsingSelf is like Using, but uses the current Lexer.
func UsingSelf(state string) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
it, err := lexer.Tokenise(&TokeniseOptions{State: state}, groups[0])
it, err := lexer.Tokenise(&TokeniseOptions{State: state, Nested: true}, groups[0])
if err != nil {
panic(err)
}
@ -309,7 +309,7 @@ func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string) (Iterator,
if options == nil {
options = defaultOptions
}
if r.config.EnsureNL && !strings.HasSuffix(text, "\n") {
if !options.Nested && r.config.EnsureNL && !strings.HasSuffix(text, "\n") {
text += "\n"
}
state := &LexerState{