diff --git a/_tools/pygments2chroma.py b/_tools/pygments2chroma.py index d84d998..76efbae 100644 --- a/_tools/pygments2chroma.py +++ b/_tools/pygments2chroma.py @@ -85,7 +85,7 @@ def resolve_emitter(emitter): name = args.__name__ if name.endswith('Lexer'): name = name[:-5] - emitter = 'Using(%s, nil)' % name + emitter = 'Using(%s)' % name else: raise ValueError('only support "using" with lexer classes, not %r' % args) else: diff --git a/formatters/html/html_test.go b/formatters/html/html_test.go index 6e30b76..4e7760f 100644 --- a/formatters/html/html_test.go +++ b/formatters/html/html_test.go @@ -84,7 +84,7 @@ func TestClassPrefix(t *testing.T) { for st := range chroma.StandardTypes { if noPrefix.class(st) == "" { if got := withPrefix.class(st); got != "" { - t.Errorf("Formatter.class(%v): prefix shouldn't be added to empty classes") + t.Errorf("Formatter.class(%v): prefix shouldn't be added to empty classes", st) } } else if got := withPrefix.class(st); !strings.HasPrefix(got, wantPrefix) { t.Errorf("Formatter.class(%v): %q should have a class prefix", st, got) diff --git a/lexer.go b/lexer.go index fbf4233..e881762 100644 --- a/lexer.go +++ b/lexer.go @@ -75,6 +75,8 @@ func (t *Token) Clone() *Token { type TokeniseOptions struct { // State to start tokenisation in. Defaults to "root". State string + // Nested tokenisation. + Nested bool } // A Lexer for tokenising source code. diff --git a/lexers/c/cheetah.go b/lexers/c/cheetah.go index 385a859..b2cb9c4 100644 --- a/lexers/c/cheetah.go +++ b/lexers/c/cheetah.go @@ -20,9 +20,9 @@ var Cheetah = internal.Register(MustNewLexer( {`#[*](.|\n)*?[*]#`, Comment, nil}, {`#end[^#\n]*(?:#|$)`, CommentPreproc, nil}, {`#slurp$`, CommentPreproc, nil}, - {`(#[a-zA-Z]+)([^#\n]*)(#|$)`, ByGroups(CommentPreproc, Using(Python, nil), CommentPreproc), nil}, - {`(\$)([a-zA-Z_][\w.]*\w)`, ByGroups(CommentPreproc, Using(Python, nil)), nil}, - {`(\$\{!?)(.*?)(\})(?s)`, ByGroups(CommentPreproc, Using(Python, nil), CommentPreproc), nil}, + {`(#[a-zA-Z]+)([^#\n]*)(#|$)`, ByGroups(CommentPreproc, Using(Python), CommentPreproc), nil}, + {`(\$)([a-zA-Z_][\w.]*\w)`, ByGroups(CommentPreproc, Using(Python)), nil}, + {`(\$\{!?)(.*?)(\})(?s)`, ByGroups(CommentPreproc, Using(Python), CommentPreproc), nil}, {`(?sx) (.+?) # anything, followed by: (?: diff --git a/lexers/d/docker.go b/lexers/d/docker.go index 48640c0..412646b 100644 --- a/lexers/d/docker.go +++ b/lexers/d/docker.go @@ -21,7 +21,7 @@ var Docker = internal.Register(MustNewLexer( {`^((?:FROM|MAINTAINER|CMD|EXPOSE|ENV|ADD|ENTRYPOINT|VOLUME|WORKDIR))\b(.*)`, ByGroups(Keyword, LiteralString), nil}, {`#.*`, Comment, nil}, {`RUN`, Keyword, nil}, - {`(.*\\\n)*.+`, Using(Bash, nil), nil}, + {`(.*\\\n)*.+`, Using(Bash), nil}, }, }, )) diff --git a/lexers/g/genshi.go b/lexers/g/genshi.go index 3f810da..0d3663a 100644 --- a/lexers/g/genshi.go +++ b/lexers/g/genshi.go @@ -24,13 +24,13 @@ var GenshiText = internal.Register(MustNewLexer( }, "directive": { {`\n`, Text, Pop(1)}, - {`(?:def|for|if)\s+.*`, Using(Python, nil), Pop(1)}, - {`(choose|when|with)([^\S\n]+)(.*)`, ByGroups(Keyword, Text, Using(Python, nil)), Pop(1)}, + {`(?:def|for|if)\s+.*`, Using(Python), Pop(1)}, + {`(choose|when|with)([^\S\n]+)(.*)`, ByGroups(Keyword, Text, Using(Python)), Pop(1)}, {`(choose|otherwise)\b`, Keyword, Pop(1)}, {`(end\w*)([^\S\n]*)(.*)`, ByGroups(Keyword, Text, Comment), Pop(1)}, }, "variable": { - {`(?)`, ByGroups(CommentPreproc, Using(Python, nil), CommentPreproc), nil}, + {`(<\?python)(.*?)(\?>)`, ByGroups(CommentPreproc, Using(Python), CommentPreproc), nil}, {`<\s*(script|style)\s*.*?>.*?<\s*/\1\s*>`, Other, nil}, {`<\s*py:[a-zA-Z0-9]+`, NameTag, Push("pytag")}, {`<\s*[a-zA-Z0-9:.]+`, NameTag, Push("tag")}, @@ -78,8 +78,8 @@ var genshiMarkupRules = Rules{ {`/?\s*>`, NameTag, Pop(1)}, }, "pyattr": { - {`(")(.*?)(")`, ByGroups(LiteralString, Using(Python, nil), LiteralString), Pop(1)}, - {`(')(.*?)(')`, ByGroups(LiteralString, Using(Python, nil), LiteralString), Pop(1)}, + {`(")(.*?)(")`, ByGroups(LiteralString, Using(Python), LiteralString), Pop(1)}, + {`(')(.*?)(')`, ByGroups(LiteralString, Using(Python), LiteralString), Pop(1)}, {`[^\s>]+`, LiteralString, Pop(1)}, }, "tag": { @@ -108,7 +108,7 @@ var genshiMarkupRules = Rules{ Include("variable"), }, "variable": { - {`(?)`, ByGroups(Punctuation, Text, Punctuation, Text, NameTag, Text, Punctuation), Pop(1)}, - {`.+?(?=<\s*/\s*script\s*>)`, Using(Javascript, nil), nil}, + {`.+?(?=<\s*/\s*script\s*>)`, Using(Javascript), nil}, }, "style-content": { {`(<)(\s*)(/)(\s*)(style)(\s*)(>)`, ByGroups(Punctuation, Text, Punctuation, Text, NameTag, Text, Punctuation), Pop(1)}, - {`.+?(?=<\s*/\s*style\s*>)`, Using(CSS, nil), nil}, + {`.+?(?=<\s*/\s*style\s*>)`, Using(CSS), nil}, }, "attr": { {`".*?"`, LiteralString, Pop(1)}, diff --git a/lexers/lexers_test.go b/lexers/lexers_test.go index e87a640..e2743c7 100644 --- a/lexers/lexers_test.go +++ b/lexers/lexers_test.go @@ -1,10 +1,17 @@ package lexers_test import ( + "encoding/json" "io/ioutil" + "os" + "path/filepath" + "strings" "testing" + "github.com/stretchr/testify/require" + "github.com/alecthomas/assert" + "github.com/alecthomas/chroma" "github.com/alecthomas/chroma/formatters" "github.com/alecthomas/chroma/lexers" @@ -33,3 +40,41 @@ func TestGet(t *testing.T) { assert.Equal(t, lexers.Get("svg"), x.XML) }) } + +// Test source files are in the form . and validation data is in the form ..expected. +func TestLexers(t *testing.T) { + files, err := ioutil.ReadDir("testdata") + require.NoError(t, err) + + for _, file := range files { + ext := filepath.Ext(file.Name())[1:] + if ext != "actual" { + continue + } + + lexer := lexers.Get(strings.TrimSuffix(file.Name(), filepath.Ext(file.Name()))) + assert.NotNil(t, lexer) + + filename := filepath.Join("testdata", file.Name()) + expectedFilename := strings.TrimSuffix(filename, filepath.Ext(filename)) + ".expected" + + lexer = chroma.Coalesce(lexer) + t.Run(lexer.Config().Name, func(t *testing.T) { + // Read and tokenise source text. + actualText, err := ioutil.ReadFile(filename) + assert.NoError(t, err) + actual, err := chroma.Tokenise(lexer, nil, string(actualText)) + assert.NoError(t, err) + + // Read expected JSON into token slice. + expected := []*chroma.Token{} + r, err := os.Open(expectedFilename) + assert.NoError(t, err) + err = json.NewDecoder(r).Decode(&expected) + assert.NoError(t, err) + + // Equal? + assert.Equal(t, expected, actual) + }) + } +} diff --git a/lexers/m/make.go b/lexers/m/make.go index 0429f3f..eb9d9e6 100644 --- a/lexers/m/make.go +++ b/lexers/m/make.go @@ -17,13 +17,13 @@ var Makefile = internal.Register(MustNewLexer( }, Rules{ "root": { - {`^(?:[\t ]+.*\n|\n)+`, Using(Bash, nil), nil}, + {`^(?:[\t ]+.*\n|\n)+`, Using(Bash), nil}, {`\$[<@$+%?|*]`, Keyword, nil}, {`\s+`, Text, nil}, {`#.*?\n`, Comment, nil}, {`(export)(\s+)(?=[\w${}\t -]+\n)`, ByGroups(Keyword, Text), Push("export")}, {`export\s+`, Keyword, nil}, - {`([\w${}().-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)`, ByGroups(NameVariable, Text, Operator, Text, Using(Bash, nil)), nil}, + {`([\w${}().-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)`, ByGroups(NameVariable, Text, Operator, Text, Using(Bash)), nil}, {`(?s)"(\\\\|\\.|[^"\\])*"`, LiteralStringDouble, nil}, {`(?s)'(\\\\|\\.|[^'\\])*'`, LiteralStringSingle, nil}, {`([^\n:]+)(:+)([ \t]*)`, ByGroups(NameFunction, Operator, Text), Push("block-header")}, diff --git a/lexers/m/mako.go b/lexers/m/mako.go index 5b89f44..f7c140d 100644 --- a/lexers/m/mako.go +++ b/lexers/m/mako.go @@ -17,14 +17,14 @@ var Mako = internal.Register(MustNewLexer( Rules{ "root": { {`(\s*)(%)(\s*end(?:\w+))(\n|\Z)`, ByGroups(Text, CommentPreproc, Keyword, Other), nil}, - {`(\s*)(%)([^\n]*)(\n|\Z)`, ByGroups(Text, CommentPreproc, Using(Python, nil), Other), nil}, + {`(\s*)(%)([^\n]*)(\n|\Z)`, ByGroups(Text, CommentPreproc, Using(Python), Other), nil}, {`(\s*)(##[^\n]*)(\n|\Z)`, ByGroups(Text, CommentPreproc, Other), nil}, {`(?s)<%doc>.*?`, CommentPreproc, nil}, {`(<%)([\w.:]+)`, ByGroups(CommentPreproc, NameBuiltin), Push("tag")}, {`()`, ByGroups(CommentPreproc, NameBuiltin, CommentPreproc), nil}, {`<%(?=([\w.:]+))`, CommentPreproc, Push("ondeftags")}, - {`(<%(?:!?))(.*?)(%>)(?s)`, ByGroups(CommentPreproc, Using(Python, nil), CommentPreproc), nil}, - {`(\$\{)(.*?)(\})`, ByGroups(CommentPreproc, Using(Python, nil), CommentPreproc), nil}, + {`(<%(?:!?))(.*?)(%>)(?s)`, ByGroups(CommentPreproc, Using(Python), CommentPreproc), nil}, + {`(\$\{)(.*?)(\})`, ByGroups(CommentPreproc, Using(Python), CommentPreproc), nil}, {`(?sx) (.+?) # anything, followed by: (?: diff --git a/lexers/m/mason.go b/lexers/m/mason.go index 24cbd10..5c70ab0 100644 --- a/lexers/m/mason.go +++ b/lexers/m/mason.go @@ -21,13 +21,13 @@ var Mason = internal.Register(MustNewLexer( {`\s+`, Text, nil}, {`(<%doc>)(.*?)()(?s)`, ByGroups(NameTag, CommentMultiline, NameTag), nil}, {`(<%(?:def|method))(\s*)(.*?)(>)(.*?)()(?s)`, ByGroups(NameTag, Text, NameFunction, NameTag, UsingSelf("root"), NameTag), nil}, - {`(<%\w+)(.*?)(>)(.*?)()(?s)`, ByGroups(NameTag, NameFunction, NameTag, Using(Perl, nil), NameTag), nil}, - {`(<&[^|])(.*?)(,.*?)?(&>)(?s)`, ByGroups(NameTag, NameFunction, Using(Perl, nil), NameTag), nil}, - {`(<&\|)(.*?)(,.*?)?(&>)(?s)`, ByGroups(NameTag, NameFunction, Using(Perl, nil), NameTag), nil}, + {`(<%\w+)(.*?)(>)(.*?)()(?s)`, ByGroups(NameTag, NameFunction, NameTag, Using(Perl), NameTag), nil}, + {`(<&[^|])(.*?)(,.*?)?(&>)(?s)`, ByGroups(NameTag, NameFunction, Using(Perl), NameTag), nil}, + {`(<&\|)(.*?)(,.*?)?(&>)(?s)`, ByGroups(NameTag, NameFunction, Using(Perl), NameTag), nil}, {``, NameTag, nil}, - {`(<%!?)(.*?)(%>)(?s)`, ByGroups(NameTag, Using(Perl, nil), NameTag), nil}, + {`(<%!?)(.*?)(%>)(?s)`, ByGroups(NameTag, Using(Perl), NameTag), nil}, {`(?<=^)#[^\n]*(\n|\Z)`, Comment, nil}, - {`(?<=^)(%)([^\n]*)(\n|\Z)`, ByGroups(NameTag, Using(Perl, nil), Other), nil}, + {`(?<=^)(%)([^\n]*)(\n|\Z)`, ByGroups(NameTag, Using(Perl), Other), nil}, {`(?sx) (.+?) # anything, followed by: (?: @@ -37,7 +37,7 @@ var Mason = internal.Register(MustNewLexer( # - don't consume (\\\n) | # an escaped newline \Z # end of string - )`, ByGroups(Using(HTML, nil), Operator), nil}, + )`, ByGroups(Using(HTML), Operator), nil}, }, }, )) diff --git a/lexers/m/myghty.go b/lexers/m/myghty.go index 09c289f..02a20ea 100644 --- a/lexers/m/myghty.go +++ b/lexers/m/myghty.go @@ -18,13 +18,13 @@ var Myghty = internal.Register(MustNewLexer( "root": { {`\s+`, Text, nil}, {`(<%(?:def|method))(\s*)(.*?)(>)(.*?)()(?s)`, ByGroups(NameTag, Text, NameFunction, NameTag, UsingSelf("root"), NameTag), nil}, - {`(<%\w+)(.*?)(>)(.*?)()(?s)`, ByGroups(NameTag, NameFunction, NameTag, Using(Python, nil), NameTag), nil}, - {`(<&[^|])(.*?)(,.*?)?(&>)`, ByGroups(NameTag, NameFunction, Using(Python, nil), NameTag), nil}, - {`(<&\|)(.*?)(,.*?)?(&>)(?s)`, ByGroups(NameTag, NameFunction, Using(Python, nil), NameTag), nil}, + {`(<%\w+)(.*?)(>)(.*?)()(?s)`, ByGroups(NameTag, NameFunction, NameTag, Using(Python), NameTag), nil}, + {`(<&[^|])(.*?)(,.*?)?(&>)`, ByGroups(NameTag, NameFunction, Using(Python), NameTag), nil}, + {`(<&\|)(.*?)(,.*?)?(&>)(?s)`, ByGroups(NameTag, NameFunction, Using(Python), NameTag), nil}, {``, NameTag, nil}, - {`(<%!?)(.*?)(%>)(?s)`, ByGroups(NameTag, Using(Python, nil), NameTag), nil}, + {`(<%!?)(.*?)(%>)(?s)`, ByGroups(NameTag, Using(Python), NameTag), nil}, {`(?<=^)#[^\n]*(\n|\Z)`, Comment, nil}, - {`(?<=^)(%)([^\n]*)(\n|\Z)`, ByGroups(NameTag, Using(Python, nil), Other), nil}, + {`(?<=^)(%)([^\n]*)(\n|\Z)`, ByGroups(NameTag, Using(Python), Other), nil}, {`(?sx) (.+?) # anything, followed by: (?: diff --git a/lexers/s/smarty.go b/lexers/s/smarty.go index 72490c1..a3592e2 100644 --- a/lexers/s/smarty.go +++ b/lexers/s/smarty.go @@ -19,7 +19,7 @@ var Smarty = internal.Register(MustNewLexer( "root": { {`[^{]+`, Other, nil}, {`(\{)(\*.*?\*)(\})`, ByGroups(CommentPreproc, Comment, CommentPreproc), nil}, - {`(\{php\})(.*?)(\{/php\})`, ByGroups(CommentPreproc, Using(PHP, nil), CommentPreproc), nil}, + {`(\{php\})(.*?)(\{/php\})`, ByGroups(CommentPreproc, Using(PHP), CommentPreproc), nil}, {`(\{)(/?[a-zA-Z_]\w*)(\s*)`, ByGroups(CommentPreproc, NameFunction, Text), Push("smarty")}, {`\{`, CommentPreproc, Push("smarty")}, }, diff --git a/lexers/t/typoscript.go b/lexers/t/typoscript.go index 03a56db..c759590 100644 --- a/lexers/t/typoscript.go +++ b/lexers/t/typoscript.go @@ -42,9 +42,9 @@ var Typoscript = internal.Register(MustNewLexer( {`\s+`, Text, nil}, }, "html": { - {`<\S[^\n>]*>`, Using(TypoScriptHTMLData, nil), nil}, + {`<\S[^\n>]*>`, Using(TypoScriptHTMLData), nil}, {`&[^;\n]*;`, LiteralString, nil}, - {`(_CSS_DEFAULT_STYLE)(\s*)(\()(?s)(.*(?=\n\)))`, ByGroups(NameClass, Text, LiteralStringSymbol, Using(TypoScriptCSSData, nil)), nil}, + {`(_CSS_DEFAULT_STYLE)(\s*)(\()(?s)(.*(?=\n\)))`, ByGroups(NameClass, Text, LiteralStringSymbol, Using(TypoScriptCSSData)), nil}, }, "literal": { {`0x[0-9A-Fa-f]+t?`, LiteralNumberHex, nil}, diff --git a/lexers/testdata/cpp.actual b/lexers/testdata/cpp.actual index f037957..33c14ce 100644 --- a/lexers/testdata/cpp.actual +++ b/lexers/testdata/cpp.actual @@ -1,3 +1,3 @@ -template -void func(const std::string &s, const T &t) { -} // Do interesting things. \ No newline at end of file +int main() { + return 0; +} diff --git a/lexers/testdata/cpp.expected b/lexers/testdata/cpp.expected index 81724d1..8498a49 100644 --- a/lexers/testdata/cpp.expected +++ b/lexers/testdata/cpp.expected @@ -1,37 +1,16 @@ [ - {"type":"Keyword","value":"template"}, + {"type":"KeywordType","value":"int"}, {"type":"Text","value":" "}, - {"type":"Operator","value":"\u003c"}, - {"type":"Keyword","value":"typename"}, - {"type":"Text","value":" "}, - {"type":"Name","value":"T"}, - {"type":"Operator","value":"\u003e"}, - {"type":"Text","value":"\n"}, - {"type":"KeywordType","value":"void"}, - {"type":"Text","value":" "}, - {"type":"Name","value":"func"}, - {"type":"Punctuation","value":"("}, - {"type":"Keyword","value":"const"}, - {"type":"Text","value":" "}, - {"type":"Name","value":"std"}, - {"type":"Operator","value":"::"}, - {"type":"Name","value":"string"}, - {"type":"Text","value":" "}, - {"type":"Operator","value":"\u0026"}, - {"type":"Name","value":"s"}, - {"type":"Punctuation","value":","}, - {"type":"Text","value":" "}, - {"type":"Keyword","value":"const"}, - {"type":"Text","value":" "}, - {"type":"Name","value":"T"}, - {"type":"Text","value":" "}, - {"type":"Operator","value":"\u0026"}, - {"type":"Name","value":"t"}, - {"type":"Punctuation","value":")"}, + {"type":"NameFunction","value":"main"}, + {"type":"Punctuation","value":"()"}, {"type":"Text","value":" "}, {"type":"Punctuation","value":"{"}, + {"type":"Text","value":"\n "}, + {"type":"Keyword","value":"return"}, + {"type":"Text","value":" "}, + {"type":"LiteralNumberInteger","value":"0"}, + {"type":"Punctuation","value":";"}, {"type":"Text","value":"\n"}, {"type":"Punctuation","value":"}"}, - {"type":"Text","value":" "}, - {"type":"CommentSingle","value":"// Do interesting things.\n"} + {"type":"Text","value":"\n"} ] diff --git a/lexers/v/vim.go b/lexers/v/vim.go index fd03ae7..7e1a131 100644 --- a/lexers/v/vim.go +++ b/lexers/v/vim.go @@ -16,8 +16,8 @@ var Viml = internal.Register(MustNewLexer( }, Rules{ "root": { - {`^([ \t:]*)(py(?:t(?:h(?:o(?:n)?)?)?)?)([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)`, ByGroups(UsingSelf("root"), Keyword, Text, Operator, Text, Text, Using(Python, nil), Text), nil}, - {`^([ \t:]*)(py(?:t(?:h(?:o(?:n)?)?)?)?)([ \t])(.*)`, ByGroups(UsingSelf("root"), Keyword, Text, Using(Python, nil)), nil}, + {`^([ \t:]*)(py(?:t(?:h(?:o(?:n)?)?)?)?)([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)`, ByGroups(UsingSelf("root"), Keyword, Text, Operator, Text, Text, Using(Python), Text), nil}, + {`^([ \t:]*)(py(?:t(?:h(?:o(?:n)?)?)?)?)([ \t])(.*)`, ByGroups(UsingSelf("root"), Keyword, Text, Using(Python)), nil}, {`^\s*".*`, Comment, nil}, {`[ \t]+`, Text, nil}, {`/(\\\\|\\/|[^\n/])*/`, LiteralStringRegex, nil}, diff --git a/regexp.go b/regexp.go index 329f182..4ad561a 100644 --- a/regexp.go +++ b/regexp.go @@ -42,9 +42,9 @@ func ByGroups(emitters ...Emitter) Emitter { } // Using returns an Emitter that uses a given Lexer for parsing and emitting. -func Using(lexer Lexer, options *TokeniseOptions) Emitter { +func Using(lexer Lexer) Emitter { return EmitterFunc(func(groups []string, _ Lexer) Iterator { - it, err := lexer.Tokenise(options, groups[0]) + it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0]) if err != nil { panic(err) } @@ -55,7 +55,7 @@ func Using(lexer Lexer, options *TokeniseOptions) Emitter { // UsingSelf is like Using, but uses the current Lexer. func UsingSelf(state string) Emitter { return EmitterFunc(func(groups []string, lexer Lexer) Iterator { - it, err := lexer.Tokenise(&TokeniseOptions{State: state}, groups[0]) + it, err := lexer.Tokenise(&TokeniseOptions{State: state, Nested: true}, groups[0]) if err != nil { panic(err) } @@ -309,7 +309,7 @@ func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, if options == nil { options = defaultOptions } - if r.config.EnsureNL && !strings.HasSuffix(text, "\n") { + if !options.Nested && r.config.EnsureNL && !strings.HasSuffix(text, "\n") { text += "\n" } state := &LexerState{