1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-07-17 01:22:22 +02:00

Switch to an Iterator interface.

This is to solve an issue where writers returned by the Formatter
were often stateful, but this fact was not obvious to the API consumer,
and failed in interesting ways.
This commit is contained in:
Alec Thomas
2017-09-20 22:19:36 +10:00
parent 36ead7258a
commit cc0e4a59ab
20 changed files with 215 additions and 129 deletions

View File

@ -1,5 +1,7 @@
# Chroma - A general purpose syntax highlighter in pure Go [![](https://godoc.org/github.com/alecthomas/chroma?status.svg)](http://godoc.org/github.com/alecthomas/chroma) [![Build Status](https://travis-ci.org/alecthomas/chroma.png)](https://travis-ci.org/alecthomas/chroma) [![Gitter chat](https://badges.gitter.im/alecthomas.png)](https://gitter.im/alecthomas/Lobby) # Chroma - A general purpose syntax highlighter in pure Go [![](https://godoc.org/github.com/alecthomas/chroma?status.svg)](http://godoc.org/github.com/alecthomas/chroma) [![Build Status](https://travis-ci.org/alecthomas/chroma.png)](https://travis-ci.org/alecthomas/chroma) [![Gitter chat](https://badges.gitter.im/alecthomas.png)](https://gitter.im/alecthomas/Lobby)
> **NOTE:** As Chroma has just been released, its API is till in flux. That said, the high-level interface should not change significantly.
Chroma takes source code and other structured text and converts it into syntax Chroma takes source code and other structured text and converts it into syntax
highlighted HTML, ANSI-coloured text, etc. highlighted HTML, ANSI-coloured text, etc.
@ -115,17 +117,17 @@ if formatter == nil {
} }
``` ```
Then obtain a formatting function from the formatter: Then obtain an iterator over the tokens:
```go
writer, err := formatter.Format(w, style)
```
And finally, lex the source code and write the output:
```go ```go
contents, err := ioutil.ReadAll(r) contents, err := ioutil.ReadAll(r)
err := lexer.Tokenise(nil, string(contents), writer) iterator, err := lexer.Tokenise(nil, string(contents))
```
And finally, format the tokens from the iterator:
```go
err := formatter.Format(w, style, iterator)
``` ```
### The HTML formatter ### The HTML formatter
@ -139,6 +141,9 @@ following constructor options:
- `Standalone()` - generate standalone HTML with embedded CSS. - `Standalone()` - generate standalone HTML with embedded CSS.
- `WithClasses()` - use classes rather than inlined style attributes. - `WithClasses()` - use classes rather than inlined style attributes.
- `ClassPrefix(prefix)` - prefix each generated CSS class. - `ClassPrefix(prefix)` - prefix each generated CSS class.
- `TabWidth(width)` - Set the rendered tab width, in characters.
- `WithLineNumbers()` - Render line numbers (style with `LineNumbers`).
- `HighlightLines(ranges)` - Highlight lines in these ranges (style with `LineHighlight`).
If `WithClasses()` is used, the corresponding CSS can be obtained from the formatter with: If `WithClasses()` is used, the corresponding CSS can be obtained from the formatter with:

View File

@ -146,16 +146,15 @@ command, for Go.
} }
formatters.Register("html", html.New(options...)) formatters.Register("html", html.New(options...))
} }
writer := getWriter(w, style)
if len(*filesArgs) == 0 { if len(*filesArgs) == 0 {
contents, err := ioutil.ReadAll(os.Stdin) contents, err := ioutil.ReadAll(os.Stdin)
kingpin.FatalIfError(err, "") kingpin.FatalIfError(err, "")
lex("", string(contents), writer) format(os.Stdout, style, lex("", string(contents)))
} else { } else {
for _, filename := range *filesArgs { for _, filename := range *filesArgs {
contents, err := ioutil.ReadFile(filename) contents, err := ioutil.ReadFile(filename)
kingpin.FatalIfError(err, "") kingpin.FatalIfError(err, "")
lex(filename, string(contents), writer) format(os.Stdout, style, lex(filename, string(contents)))
} }
} }
} }
@ -192,14 +191,15 @@ func listAll() {
fmt.Println() fmt.Println()
} }
func lex(path string, contents string, writer func(*chroma.Token)) { func lex(path string, contents string) chroma.Iterator {
lexer := selexer(path, contents) lexer := selexer(path, contents)
if lexer == nil { if lexer == nil {
lexer = lexers.Fallback lexer = lexers.Fallback
} }
lexer = chroma.Coalesce(lexer) lexer = chroma.Coalesce(lexer)
err := lexer.Tokenise(nil, string(contents), writer) it, err := lexer.Tokenise(nil, string(contents))
kingpin.FatalIfError(err, "") kingpin.FatalIfError(err, "")
return it
} }
func selexer(path, contents string) (lexer chroma.Lexer) { func selexer(path, contents string) (lexer chroma.Lexer) {
@ -215,10 +215,8 @@ func selexer(path, contents string) (lexer chroma.Lexer) {
return lexers.Analyse(contents) return lexers.Analyse(contents)
} }
func getWriter(w io.Writer, style *chroma.Style) func(*chroma.Token) { func format(w io.Writer, style *chroma.Style, it chroma.Iterator) {
formatter := formatters.Get(*formatterFlag) formatter := formatters.Get(*formatterFlag)
// formatter := formatters.TTY8 err := formatter.Format(w, style, it)
writer, err := formatter.Format(w, style)
kingpin.FatalIfError(err, "") kingpin.FatalIfError(err, "")
return writer
} }

View File

@ -9,21 +9,24 @@ type coalescer struct {
Lexer Lexer
} }
func (d *coalescer) Tokenise(options *TokeniseOptions, text string, out func(*Token)) error { func (d *coalescer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) {
var prev *Token var prev *Token
return d.Lexer.Tokenise(options, text, func(token *Token) { it, err := d.Lexer.Tokenise(options, text)
if prev == nil { if err != nil {
prev = token return nil, err
} else { }
if prev.Type == token.Type && len(prev.Value) < 8192 { return func() *Token {
prev.Value += token.Value for token := it(); token != nil; token = it() {
} else { if prev == nil {
out(prev)
prev = token prev = token
} else {
if prev.Type == token.Type && len(prev.Value) < 8192 {
prev.Value += token.Value
}
} }
} }
if token.Type == EOF { out := prev
out(token) prev = nil
} return out
}) }, nil
} }

View File

@ -14,9 +14,6 @@ func TestCoalesce(t *testing.T) {
})) }))
actual, err := Tokenise(lexer, nil, "!@#$") actual, err := Tokenise(lexer, nil, "!@#$")
require.NoError(t, err) require.NoError(t, err)
expected := []*Token{ expected := []*Token{{Punctuation, "!@#$"}}
&Token{Punctuation, "!@#$"},
&Token{EOF, ""},
}
require.Equal(t, expected, actual) require.Equal(t, expected, actual)
} }

View File

@ -7,12 +7,10 @@ import (
// A Formatter for Chroma lexers. // A Formatter for Chroma lexers.
type Formatter interface { type Formatter interface {
// Format returns a formatting function for tokens. // Format returns a formatting function for tokens.
Format(w io.Writer, style *Style) (func(*Token), error) Format(w io.Writer, style *Style, iterator Iterator) error
} }
// A FormatterFunc is a Formatter implemented as a function. // A FormatterFunc is a Formatter implemented as a function.
type FormatterFunc func(io.Writer, *Style) (func(*Token), error) type FormatterFunc func(w io.Writer, style *Style, iterator Iterator) error
func (f FormatterFunc) Format(w io.Writer, s *Style) (func(*Token), error) { func (f FormatterFunc) Format(w io.Writer, s *Style, it Iterator) error { return f(w, s, it) }
return f(w, s)
}

View File

@ -10,8 +10,13 @@ import (
var ( var (
// NoOp formatter. // NoOp formatter.
NoOp = Register("noop", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style) (func(*chroma.Token), error) { NoOp = Register("noop", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style, iterator chroma.Iterator) error {
return func(t *chroma.Token) { io.WriteString(w, t.Value) }, nil for t := iterator(); t != nil; t = iterator() {
if _, err := io.WriteString(w, t.Value); err != nil {
return err
}
}
return nil
})) }))
// Default HTML formatter outputs self-contained HTML. // Default HTML formatter outputs self-contained HTML.
htmlFull = Register("html", html.New(html.Standalone(), html.WithClasses())) htmlFull = Register("html", html.New(html.Standalone(), html.WithClasses()))

View File

@ -67,15 +67,8 @@ func (h highlightRanges) Len() int { return len(h) }
func (h highlightRanges) Swap(i, j int) { h[i], h[j] = h[j], h[i] } func (h highlightRanges) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h highlightRanges) Less(i, j int) bool { return h[i][0] < h[j][0] } func (h highlightRanges) Less(i, j int) bool { return h[i][0] < h[j][0] }
func (f *Formatter) Format(w io.Writer, style *chroma.Style) (func(*chroma.Token), error) { func (f *Formatter) Format(w io.Writer, style *chroma.Style, iterator chroma.Iterator) error {
tokens := []*chroma.Token{} return f.writeHTML(w, style, chroma.Flatten(iterator))
return func(token *chroma.Token) {
tokens = append(tokens, token)
if token.Type == chroma.EOF {
f.writeHTML(w, style, tokens)
return
}
}, nil
} }
func (f *Formatter) writeHTML(w io.Writer, style *chroma.Style, tokens []*chroma.Token) error { // nolint: gocyclo func (f *Formatter) writeHTML(w io.Writer, style *chroma.Style, tokens []*chroma.Token) error { // nolint: gocyclo

View File

@ -20,11 +20,11 @@ func TestCompressStyle(t *testing.T) {
func BenchmarkHTMLFormatter(b *testing.B) { func BenchmarkHTMLFormatter(b *testing.B) {
formatter := New() formatter := New()
writer, err := formatter.Format(ioutil.Discard, styles.Fallback)
assert.NoError(b, err)
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
err = lexers.Go.Tokenise(nil, "package main\nfunc main()\n{\nprintln(`hello world`)\n}\n", writer) it, err := lexers.Go.Tokenise(nil, "package main\nfunc main()\n{\nprintln(`hello world`)\n}\n")
assert.NoError(b, err)
err = formatter.Format(ioutil.Discard, styles.Fallback, it)
assert.NoError(b, err) assert.NoError(b, err)
} }
} }
@ -33,7 +33,6 @@ func TestSplitTokensIntoLines(t *testing.T) {
in := []*chroma.Token{ in := []*chroma.Token{
{Value: "hello", Type: chroma.NameKeyword}, {Value: "hello", Type: chroma.NameKeyword},
{Value: " world\nwhat?\n", Type: chroma.NameKeyword}, {Value: " world\nwhat?\n", Type: chroma.NameKeyword},
{Type: chroma.EOF},
} }
expected := [][]*chroma.Token{ expected := [][]*chroma.Token{
[]*chroma.Token{ []*chroma.Token{
@ -45,7 +44,6 @@ func TestSplitTokensIntoLines(t *testing.T) {
}, },
[]*chroma.Token{ []*chroma.Token{
{Type: chroma.NameKeyword}, {Type: chroma.NameKeyword},
{Type: chroma.EOF},
}, },
} }
actual := splitTokensIntoLines(in) actual := splitTokensIntoLines(in)

View File

@ -8,8 +8,11 @@ import (
) )
// Tokens formatter outputs the raw token structures. // Tokens formatter outputs the raw token structures.
var Tokens = Register("tokens", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style) (func(*chroma.Token), error) { var Tokens = Register("tokens", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style, it chroma.Iterator) error {
return func(token *chroma.Token) { for t := it(); t != nil; t = it() {
fmt.Fprintln(w, token.GoString()) if _, err := fmt.Fprintln(w, t.GoString()); err != nil {
}, nil return err
}
}
return nil
})) }))

View File

@ -234,9 +234,9 @@ type indexedTTYFormatter struct {
table *ttyTable table *ttyTable
} }
func (c *indexedTTYFormatter) Format(w io.Writer, style *chroma.Style) (func(*chroma.Token), error) { func (c *indexedTTYFormatter) Format(w io.Writer, style *chroma.Style, it chroma.Iterator) error {
theme := styleToEscapeSequence(c.table, style) theme := styleToEscapeSequence(c.table, style)
return func(token *chroma.Token) { for token := it(); token != nil; token = it() {
// TODO: Cache token lookups? // TODO: Cache token lookups?
clr, ok := theme[token.Type] clr, ok := theme[token.Type]
if !ok { if !ok {
@ -255,7 +255,8 @@ func (c *indexedTTYFormatter) Format(w io.Writer, style *chroma.Style) (func(*ch
if clr != "" { if clr != "" {
fmt.Fprintf(w, "\033[0m") fmt.Fprintf(w, "\033[0m")
} }
}, nil }
return nil
} }
// TTY8 is an 8-colour terminal formatter. // TTY8 is an 8-colour terminal formatter.

View File

@ -10,8 +10,8 @@ import (
// TTY16m is a true-colour terminal formatter. // TTY16m is a true-colour terminal formatter.
var TTY16m = Register("terminal16m", chroma.FormatterFunc(trueColourFormatter)) var TTY16m = Register("terminal16m", chroma.FormatterFunc(trueColourFormatter))
func trueColourFormatter(w io.Writer, style *chroma.Style) (func(*chroma.Token), error) { func trueColourFormatter(w io.Writer, style *chroma.Style, it chroma.Iterator) error {
return func(token *chroma.Token) { for token := it(); token != nil; token = it() {
entry := style.Get(token.Type) entry := style.Get(token.Type)
if !entry.IsZero() { if !entry.IsZero() {
out := "" out := ""
@ -33,5 +33,6 @@ func trueColourFormatter(w io.Writer, style *chroma.Style) (func(*chroma.Token),
if !entry.IsZero() { if !entry.IsZero() {
fmt.Fprint(w, "\033[0m") fmt.Fprint(w, "\033[0m")
} }
}, nil }
return nil
} }

41
iterator.go Normal file
View File

@ -0,0 +1,41 @@
package chroma
// An Iterator across tokens.
//
// nil will be returned at the end of the Token stream.
type Iterator func() *Token
// Concaterator concatenates tokens from a series of iterators.
func Concaterator(iterators ...Iterator) Iterator {
return func() *Token {
for len(iterators) > 0 {
t := iterators[0]()
if t != nil {
return t
}
iterators = iterators[1:]
}
return nil
}
}
// Literator converts a sequence of literal Tokens into an Iterator.
func Literator(tokens ...*Token) Iterator {
return func() (out *Token) {
if len(tokens) == 0 {
return nil
}
token := tokens[0]
tokens = tokens[1:]
return token
}
}
// Flatten an Iterator into its tokens.
func Flatten(iterator Iterator) []*Token {
out := []*Token{}
for t := iterator(); t != nil; t = iterator() {
out = append(out, t)
}
return out
}

View File

@ -76,10 +76,8 @@ type TokeniseOptions struct {
type Lexer interface { type Lexer interface {
// Config describing the features of the Lexer. // Config describing the features of the Lexer.
Config() *Config Config() *Config
// Tokenise text and call out for each generated token. // Tokenise returns an Iterator over tokens in text.
// Tokenise(options *TokeniseOptions, text string) (Iterator, error)
// A token of type EOF will be passed to out() to signify the end of the stream.
Tokenise(options *TokeniseOptions, text string, out func(*Token)) error
} }
type Lexers []Lexer type Lexers []Lexer

View File

@ -47,7 +47,6 @@ func TestSimpleLexer(t *testing.T) {
{Whitespace, " "}, {Whitespace, " "},
{LiteralString, "10"}, {LiteralString, "10"},
{Whitespace, "\n"}, {Whitespace, "\n"},
{EOF, ""},
} }
require.Equal(t, expected, actual) require.Equal(t, expected, actual)
} }

View File

@ -12,10 +12,10 @@ import (
) )
func TestCompileAllRegexes(t *testing.T) { func TestCompileAllRegexes(t *testing.T) {
writer, err := formatters.NoOp.Format(ioutil.Discard, styles.SwapOff)
assert.NoError(t, err)
for _, lexer := range lexers.Registry.Lexers { for _, lexer := range lexers.Registry.Lexers {
err = lexer.Tokenise(nil, "", writer) it, err := lexer.Tokenise(nil, "")
assert.NoError(t, err, "%s failed", lexer.Config().Name)
err = formatters.NoOp.Format(ioutil.Discard, styles.SwapOff, it)
assert.NoError(t, err, "%s failed", lexer.Config().Name) assert.NoError(t, err, "%s failed", lexer.Config().Name)
} }
} }

View File

@ -3,7 +3,7 @@ package lexers
import ( import (
"testing" "testing"
"github.com/alecthomas/chroma" "github.com/stretchr/testify/assert"
) )
const lexerBenchSource = `package chroma const lexerBenchSource = `package chroma
@ -29,6 +29,9 @@ func (f FormatterFunc) Format(w io.Writer, s *Style) (func(*Token), error) {
func Benchmark(b *testing.B) { func Benchmark(b *testing.B) {
b.ReportAllocs() b.ReportAllocs()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
Go.Tokenise(nil, lexerBenchSource, func(t *chroma.Token) {}) it, err := Go.Tokenise(nil, lexerBenchSource)
assert.NoError(b, err)
for t := it(); t != nil; t = it() {
}
} }
} }

View File

@ -38,16 +38,25 @@ var Markdown = Register(MustNewLexer(
}, },
)) ))
func handleCodeblock(groups []string, lexer Lexer, out func(*Token)) { func handleCodeblock(groups []string, lexer Lexer) Iterator {
out(&Token{String, groups[1]}) iterators := []Iterator{}
out(&Token{String, groups[2]}) tokens := []*Token{
out(&Token{Text, groups[3]}) &Token{String, groups[1]},
&Token{String, groups[2]},
&Token{Text, groups[3]},
}
code := groups[4] code := groups[4]
lexer = Get(groups[2]) lexer = Get(groups[2])
if lexer == nil { if lexer == nil {
out(&Token{String, code}) tokens = append(tokens, &Token{String, code})
iterators = append(iterators, Literator(tokens...))
} else { } else {
lexer.Tokenise(nil, code, out) sub, err := lexer.Tokenise(nil, code)
if err != nil {
panic(err)
}
iterators = append(iterators, sub)
} }
out(&Token{String, groups[5]}) iterators = append(iterators, Literator(&Token{String, groups[5]}))
return Concaterator(iterators...)
} }

View File

@ -35,10 +35,9 @@ func Highlight(w io.Writer, source, lexer, formatter, style string) error {
s = styles.Fallback s = styles.Fallback
} }
writer, err := f.Format(w, s) it, err := l.Tokenise(nil, source)
if err != nil { if err != nil {
return err return err
} }
return f.Format(w, s, it)
return l.Tokenise(nil, source, writer)
} }

115
regexp.go
View File

@ -19,42 +19,47 @@ type Rule struct {
// An Emitter takes group matches and returns tokens. // An Emitter takes group matches and returns tokens.
type Emitter interface { type Emitter interface {
// Emit tokens for the given regex groups. // Emit tokens for the given regex groups.
Emit(groups []string, lexer Lexer, out func(*Token)) Emit(groups []string, lexer Lexer) Iterator
} }
// EmitterFunc is a function that is an Emitter. // EmitterFunc is a function that is an Emitter.
type EmitterFunc func(groups []string, lexer Lexer, out func(*Token)) type EmitterFunc func(groups []string, lexer Lexer) Iterator
// Emit tokens for groups. // Emit tokens for groups.
func (e EmitterFunc) Emit(groups []string, lexer Lexer, out func(*Token)) { e(groups, lexer, out) } func (e EmitterFunc) Emit(groups []string, lexer Lexer) Iterator { return e(groups, lexer) }
// ByGroups emits a token for each matching group in the rule's regex. // ByGroups emits a token for each matching group in the rule's regex.
func ByGroups(emitters ...Emitter) Emitter { func ByGroups(emitters ...Emitter) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer, out func(*Token)) { return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
// NOTE: If this line panics, there is a mismatch with groups. Uncomment the following line to debug. iterators := make([]Iterator, 0, len(groups)-1)
// NOTE: If this panics, there is a mismatch with groups. Uncomment the following line to debug.
// fmt.Printf("%s %#v\n", emitters, groups[1:]) // fmt.Printf("%s %#v\n", emitters, groups[1:])
for i, group := range groups[1:] { for i, group := range groups[1:] {
emitters[i].Emit([]string{group}, lexer, out) iterators = append(iterators, emitters[i].Emit([]string{group}, lexer))
} }
return return Concaterator(iterators...)
}) })
} }
// Using returns an Emitter that uses a given Lexer for parsing and emitting. // Using returns an Emitter that uses a given Lexer for parsing and emitting.
func Using(lexer Lexer, options *TokeniseOptions) Emitter { func Using(lexer Lexer, options *TokeniseOptions) Emitter {
return EmitterFunc(func(groups []string, _ Lexer, out func(*Token)) { return EmitterFunc(func(groups []string, _ Lexer) Iterator {
if err := lexer.Tokenise(options, groups[0], out); err != nil { it, err := lexer.Tokenise(options, groups[0])
if err != nil {
panic(err) panic(err)
} }
return it
}) })
} }
// UsingSelf is like Using, but uses the current Lexer. // UsingSelf is like Using, but uses the current Lexer.
func UsingSelf(state string) Emitter { func UsingSelf(state string) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer, out func(*Token)) { return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
if err := lexer.Tokenise(&TokeniseOptions{State: state}, groups[0], out); err != nil { it, err := lexer.Tokenise(&TokeniseOptions{State: state}, groups[0])
if err != nil {
panic(err) panic(err)
} }
return it
}) })
} }
@ -69,7 +74,14 @@ func Words(prefix, suffix string, words ...string) string {
// Tokenise text using lexer, returning tokens as a slice. // Tokenise text using lexer, returning tokens as a slice.
func Tokenise(lexer Lexer, options *TokeniseOptions, text string) ([]*Token, error) { func Tokenise(lexer Lexer, options *TokeniseOptions, text string) ([]*Token, error) {
out := []*Token{} out := []*Token{}
return out, lexer.Tokenise(options, text, func(token *Token) { out = append(out, token) }) it, err := lexer.Tokenise(options, text)
if err != nil {
return nil, err
}
for t := it(); t != nil; t = it() {
out = append(out, t)
}
return out, nil
} }
// Rules maps from state to a sequence of Rules. // Rules maps from state to a sequence of Rules.
@ -129,6 +141,7 @@ type CompiledRule struct {
type CompiledRules map[string][]CompiledRule type CompiledRules map[string][]CompiledRule
type LexerState struct { type LexerState struct {
Lexer *RegexLexer
Text []rune Text []rune
Pos int Pos int
Rules map[string][]CompiledRule Rules map[string][]CompiledRule
@ -149,6 +162,55 @@ func (l *LexerState) Get(key interface{}) interface{} {
return l.MutatorContext[key] return l.MutatorContext[key]
} }
func (l *LexerState) Iterator() Iterator {
iteratorStack := []Iterator{}
return func() *Token {
for l.Pos < len(l.Text) && len(l.Stack) > 0 {
// Exhaust the IteratorStack, if any.
for len(iteratorStack) > 0 {
n := len(iteratorStack) - 1
t := iteratorStack[n]()
if t == nil {
iteratorStack = iteratorStack[:n]
continue
}
return t
}
l.State = l.Stack[len(l.Stack)-1]
ruleIndex, rule, groups := matchRules(l.Text[l.Pos:], l.Rules[l.State])
// No match.
if groups == nil {
l.Pos++
return &Token{Error, string(l.Text[l.Pos-1 : l.Pos])}
}
l.Rule = ruleIndex
l.Groups = groups
l.Pos += utf8.RuneCountInString(groups[0])
if rule.Mutator != nil {
if err := rule.Mutator.Mutate(l); err != nil {
panic(err)
}
}
if rule.Type != nil {
iteratorStack = append(iteratorStack, rule.Type.Emit(l.Groups, l.Lexer))
}
}
// Exhaust the IteratorStack, if any.
// Duplicate code, but eh.
for len(iteratorStack) > 0 {
n := len(iteratorStack) - 1
t := iteratorStack[n]()
if t == nil {
iteratorStack = iteratorStack[:n]
continue
}
return t
}
return nil
}
}
type RegexLexer struct { type RegexLexer struct {
config *Config config *Config
analyser func(text string) float32 analyser func(text string) float32
@ -197,9 +259,9 @@ func (r *RegexLexer) maybeCompile() (err error) {
return nil return nil
} }
func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string, out func(*Token)) error { func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) {
if err := r.maybeCompile(); err != nil { if err := r.maybeCompile(); err != nil {
return err return nil, err
} }
if options == nil { if options == nil {
options = defaultOptions options = defaultOptions
@ -210,30 +272,7 @@ func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string, out func(*T
Rules: r.rules, Rules: r.rules,
MutatorContext: map[interface{}]interface{}{}, MutatorContext: map[interface{}]interface{}{},
} }
for state.Pos < len(state.Text) && len(state.Stack) > 0 { return state.Iterator(), nil
state.State = state.Stack[len(state.Stack)-1]
ruleIndex, rule, groups := matchRules(state.Text[state.Pos:], state.Rules[state.State])
// No match.
if groups == nil {
out(&Token{Error, string(state.Text[state.Pos : state.Pos+1])})
state.Pos++
continue
}
state.Rule = ruleIndex
state.Groups = groups
state.Pos += utf8.RuneCountInString(groups[0])
if rule.Mutator != nil {
if err := rule.Mutator.Mutate(state); err != nil {
return err
}
}
if rule.Type != nil {
rule.Type.Emit(state.Groups, r, out)
}
}
out(&Token{Type: EOF})
return nil
} }
func matchRules(text []rune, rules []CompiledRule) (int, CompiledRule, []string) { func matchRules(text []rune, rules []CompiledRule) (int, CompiledRule, []string) {

View File

@ -21,16 +21,12 @@ const (
LineNumbers LineNumbers
// Line higlight style. // Line higlight style.
LineHighlight LineHighlight
// Character highlight style.
Highlight
// Input that could not be tokenised. // Input that could not be tokenised.
Error Error
// Other is used by the Delegate lexer to indicate which tokens should be handled by the delegate. // Other is used by the Delegate lexer to indicate which tokens should be handled by the delegate.
Other Other
// No highlighting. // No highlighting.
None None
// Final token.
EOF
) )
// Keywords. // Keywords.
@ -208,6 +204,6 @@ func (t TokenType) InSubCategory(other TokenType) bool {
return t/100 == other/100 return t/100 == other/100
} }
func (t TokenType) Emit(groups []string, lexer Lexer, out func(*Token)) { func (t TokenType) Emit(groups []string, lexer Lexer) Iterator {
out(&Token{Type: t, Value: groups[0]}) return Literator(&Token{Type: t, Value: groups[0]})
} }