mirror of
https://github.com/alecthomas/chroma.git
synced 2025-01-26 03:20:10 +02:00
Switch to an Iterator interface.
This is to solve an issue where writers returned by the Formatter were often stateful, but this fact was not obvious to the API consumer, and failed in interesting ways.
This commit is contained in:
parent
36ead7258a
commit
cc0e4a59ab
21
README.md
21
README.md
@ -1,5 +1,7 @@
|
||||
# Chroma - A general purpose syntax highlighter in pure Go [![](https://godoc.org/github.com/alecthomas/chroma?status.svg)](http://godoc.org/github.com/alecthomas/chroma) [![Build Status](https://travis-ci.org/alecthomas/chroma.png)](https://travis-ci.org/alecthomas/chroma) [![Gitter chat](https://badges.gitter.im/alecthomas.png)](https://gitter.im/alecthomas/Lobby)
|
||||
|
||||
> **NOTE:** As Chroma has just been released, its API is till in flux. That said, the high-level interface should not change significantly.
|
||||
|
||||
Chroma takes source code and other structured text and converts it into syntax
|
||||
highlighted HTML, ANSI-coloured text, etc.
|
||||
|
||||
@ -115,17 +117,17 @@ if formatter == nil {
|
||||
}
|
||||
```
|
||||
|
||||
Then obtain a formatting function from the formatter:
|
||||
|
||||
```go
|
||||
writer, err := formatter.Format(w, style)
|
||||
```
|
||||
|
||||
And finally, lex the source code and write the output:
|
||||
Then obtain an iterator over the tokens:
|
||||
|
||||
```go
|
||||
contents, err := ioutil.ReadAll(r)
|
||||
err := lexer.Tokenise(nil, string(contents), writer)
|
||||
iterator, err := lexer.Tokenise(nil, string(contents))
|
||||
```
|
||||
|
||||
And finally, format the tokens from the iterator:
|
||||
|
||||
```go
|
||||
err := formatter.Format(w, style, iterator)
|
||||
```
|
||||
|
||||
### The HTML formatter
|
||||
@ -139,6 +141,9 @@ following constructor options:
|
||||
- `Standalone()` - generate standalone HTML with embedded CSS.
|
||||
- `WithClasses()` - use classes rather than inlined style attributes.
|
||||
- `ClassPrefix(prefix)` - prefix each generated CSS class.
|
||||
- `TabWidth(width)` - Set the rendered tab width, in characters.
|
||||
- `WithLineNumbers()` - Render line numbers (style with `LineNumbers`).
|
||||
- `HighlightLines(ranges)` - Highlight lines in these ranges (style with `LineHighlight`).
|
||||
|
||||
If `WithClasses()` is used, the corresponding CSS can be obtained from the formatter with:
|
||||
|
||||
|
@ -146,16 +146,15 @@ command, for Go.
|
||||
}
|
||||
formatters.Register("html", html.New(options...))
|
||||
}
|
||||
writer := getWriter(w, style)
|
||||
if len(*filesArgs) == 0 {
|
||||
contents, err := ioutil.ReadAll(os.Stdin)
|
||||
kingpin.FatalIfError(err, "")
|
||||
lex("", string(contents), writer)
|
||||
format(os.Stdout, style, lex("", string(contents)))
|
||||
} else {
|
||||
for _, filename := range *filesArgs {
|
||||
contents, err := ioutil.ReadFile(filename)
|
||||
kingpin.FatalIfError(err, "")
|
||||
lex(filename, string(contents), writer)
|
||||
format(os.Stdout, style, lex(filename, string(contents)))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -192,14 +191,15 @@ func listAll() {
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
func lex(path string, contents string, writer func(*chroma.Token)) {
|
||||
func lex(path string, contents string) chroma.Iterator {
|
||||
lexer := selexer(path, contents)
|
||||
if lexer == nil {
|
||||
lexer = lexers.Fallback
|
||||
}
|
||||
lexer = chroma.Coalesce(lexer)
|
||||
err := lexer.Tokenise(nil, string(contents), writer)
|
||||
it, err := lexer.Tokenise(nil, string(contents))
|
||||
kingpin.FatalIfError(err, "")
|
||||
return it
|
||||
}
|
||||
|
||||
func selexer(path, contents string) (lexer chroma.Lexer) {
|
||||
@ -215,10 +215,8 @@ func selexer(path, contents string) (lexer chroma.Lexer) {
|
||||
return lexers.Analyse(contents)
|
||||
}
|
||||
|
||||
func getWriter(w io.Writer, style *chroma.Style) func(*chroma.Token) {
|
||||
func format(w io.Writer, style *chroma.Style, it chroma.Iterator) {
|
||||
formatter := formatters.Get(*formatterFlag)
|
||||
// formatter := formatters.TTY8
|
||||
writer, err := formatter.Format(w, style)
|
||||
err := formatter.Format(w, style, it)
|
||||
kingpin.FatalIfError(err, "")
|
||||
return writer
|
||||
}
|
||||
|
29
coalesce.go
29
coalesce.go
@ -9,21 +9,24 @@ type coalescer struct {
|
||||
Lexer
|
||||
}
|
||||
|
||||
func (d *coalescer) Tokenise(options *TokeniseOptions, text string, out func(*Token)) error {
|
||||
func (d *coalescer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) {
|
||||
var prev *Token
|
||||
return d.Lexer.Tokenise(options, text, func(token *Token) {
|
||||
if prev == nil {
|
||||
prev = token
|
||||
} else {
|
||||
if prev.Type == token.Type && len(prev.Value) < 8192 {
|
||||
prev.Value += token.Value
|
||||
} else {
|
||||
out(prev)
|
||||
it, err := d.Lexer.Tokenise(options, text)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return func() *Token {
|
||||
for token := it(); token != nil; token = it() {
|
||||
if prev == nil {
|
||||
prev = token
|
||||
} else {
|
||||
if prev.Type == token.Type && len(prev.Value) < 8192 {
|
||||
prev.Value += token.Value
|
||||
}
|
||||
}
|
||||
}
|
||||
if token.Type == EOF {
|
||||
out(token)
|
||||
}
|
||||
})
|
||||
out := prev
|
||||
prev = nil
|
||||
return out
|
||||
}, nil
|
||||
}
|
||||
|
@ -14,9 +14,6 @@ func TestCoalesce(t *testing.T) {
|
||||
}))
|
||||
actual, err := Tokenise(lexer, nil, "!@#$")
|
||||
require.NoError(t, err)
|
||||
expected := []*Token{
|
||||
&Token{Punctuation, "!@#$"},
|
||||
&Token{EOF, ""},
|
||||
}
|
||||
expected := []*Token{{Punctuation, "!@#$"}}
|
||||
require.Equal(t, expected, actual)
|
||||
}
|
||||
|
@ -7,12 +7,10 @@ import (
|
||||
// A Formatter for Chroma lexers.
|
||||
type Formatter interface {
|
||||
// Format returns a formatting function for tokens.
|
||||
Format(w io.Writer, style *Style) (func(*Token), error)
|
||||
Format(w io.Writer, style *Style, iterator Iterator) error
|
||||
}
|
||||
|
||||
// A FormatterFunc is a Formatter implemented as a function.
|
||||
type FormatterFunc func(io.Writer, *Style) (func(*Token), error)
|
||||
type FormatterFunc func(w io.Writer, style *Style, iterator Iterator) error
|
||||
|
||||
func (f FormatterFunc) Format(w io.Writer, s *Style) (func(*Token), error) {
|
||||
return f(w, s)
|
||||
}
|
||||
func (f FormatterFunc) Format(w io.Writer, s *Style, it Iterator) error { return f(w, s, it) }
|
||||
|
@ -10,8 +10,13 @@ import (
|
||||
|
||||
var (
|
||||
// NoOp formatter.
|
||||
NoOp = Register("noop", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style) (func(*chroma.Token), error) {
|
||||
return func(t *chroma.Token) { io.WriteString(w, t.Value) }, nil
|
||||
NoOp = Register("noop", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style, iterator chroma.Iterator) error {
|
||||
for t := iterator(); t != nil; t = iterator() {
|
||||
if _, err := io.WriteString(w, t.Value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}))
|
||||
// Default HTML formatter outputs self-contained HTML.
|
||||
htmlFull = Register("html", html.New(html.Standalone(), html.WithClasses()))
|
||||
|
@ -67,15 +67,8 @@ func (h highlightRanges) Len() int { return len(h) }
|
||||
func (h highlightRanges) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
|
||||
func (h highlightRanges) Less(i, j int) bool { return h[i][0] < h[j][0] }
|
||||
|
||||
func (f *Formatter) Format(w io.Writer, style *chroma.Style) (func(*chroma.Token), error) {
|
||||
tokens := []*chroma.Token{}
|
||||
return func(token *chroma.Token) {
|
||||
tokens = append(tokens, token)
|
||||
if token.Type == chroma.EOF {
|
||||
f.writeHTML(w, style, tokens)
|
||||
return
|
||||
}
|
||||
}, nil
|
||||
func (f *Formatter) Format(w io.Writer, style *chroma.Style, iterator chroma.Iterator) error {
|
||||
return f.writeHTML(w, style, chroma.Flatten(iterator))
|
||||
}
|
||||
|
||||
func (f *Formatter) writeHTML(w io.Writer, style *chroma.Style, tokens []*chroma.Token) error { // nolint: gocyclo
|
||||
|
@ -20,11 +20,11 @@ func TestCompressStyle(t *testing.T) {
|
||||
|
||||
func BenchmarkHTMLFormatter(b *testing.B) {
|
||||
formatter := New()
|
||||
writer, err := formatter.Format(ioutil.Discard, styles.Fallback)
|
||||
assert.NoError(b, err)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
err = lexers.Go.Tokenise(nil, "package main\nfunc main()\n{\nprintln(`hello world`)\n}\n", writer)
|
||||
it, err := lexers.Go.Tokenise(nil, "package main\nfunc main()\n{\nprintln(`hello world`)\n}\n")
|
||||
assert.NoError(b, err)
|
||||
err = formatter.Format(ioutil.Discard, styles.Fallback, it)
|
||||
assert.NoError(b, err)
|
||||
}
|
||||
}
|
||||
@ -33,7 +33,6 @@ func TestSplitTokensIntoLines(t *testing.T) {
|
||||
in := []*chroma.Token{
|
||||
{Value: "hello", Type: chroma.NameKeyword},
|
||||
{Value: " world\nwhat?\n", Type: chroma.NameKeyword},
|
||||
{Type: chroma.EOF},
|
||||
}
|
||||
expected := [][]*chroma.Token{
|
||||
[]*chroma.Token{
|
||||
@ -45,7 +44,6 @@ func TestSplitTokensIntoLines(t *testing.T) {
|
||||
},
|
||||
[]*chroma.Token{
|
||||
{Type: chroma.NameKeyword},
|
||||
{Type: chroma.EOF},
|
||||
},
|
||||
}
|
||||
actual := splitTokensIntoLines(in)
|
||||
|
@ -8,8 +8,11 @@ import (
|
||||
)
|
||||
|
||||
// Tokens formatter outputs the raw token structures.
|
||||
var Tokens = Register("tokens", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style) (func(*chroma.Token), error) {
|
||||
return func(token *chroma.Token) {
|
||||
fmt.Fprintln(w, token.GoString())
|
||||
}, nil
|
||||
var Tokens = Register("tokens", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style, it chroma.Iterator) error {
|
||||
for t := it(); t != nil; t = it() {
|
||||
if _, err := fmt.Fprintln(w, t.GoString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}))
|
||||
|
@ -234,9 +234,9 @@ type indexedTTYFormatter struct {
|
||||
table *ttyTable
|
||||
}
|
||||
|
||||
func (c *indexedTTYFormatter) Format(w io.Writer, style *chroma.Style) (func(*chroma.Token), error) {
|
||||
func (c *indexedTTYFormatter) Format(w io.Writer, style *chroma.Style, it chroma.Iterator) error {
|
||||
theme := styleToEscapeSequence(c.table, style)
|
||||
return func(token *chroma.Token) {
|
||||
for token := it(); token != nil; token = it() {
|
||||
// TODO: Cache token lookups?
|
||||
clr, ok := theme[token.Type]
|
||||
if !ok {
|
||||
@ -255,7 +255,8 @@ func (c *indexedTTYFormatter) Format(w io.Writer, style *chroma.Style) (func(*ch
|
||||
if clr != "" {
|
||||
fmt.Fprintf(w, "\033[0m")
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TTY8 is an 8-colour terminal formatter.
|
||||
|
@ -10,8 +10,8 @@ import (
|
||||
// TTY16m is a true-colour terminal formatter.
|
||||
var TTY16m = Register("terminal16m", chroma.FormatterFunc(trueColourFormatter))
|
||||
|
||||
func trueColourFormatter(w io.Writer, style *chroma.Style) (func(*chroma.Token), error) {
|
||||
return func(token *chroma.Token) {
|
||||
func trueColourFormatter(w io.Writer, style *chroma.Style, it chroma.Iterator) error {
|
||||
for token := it(); token != nil; token = it() {
|
||||
entry := style.Get(token.Type)
|
||||
if !entry.IsZero() {
|
||||
out := ""
|
||||
@ -33,5 +33,6 @@ func trueColourFormatter(w io.Writer, style *chroma.Style) (func(*chroma.Token),
|
||||
if !entry.IsZero() {
|
||||
fmt.Fprint(w, "\033[0m")
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
41
iterator.go
Normal file
41
iterator.go
Normal file
@ -0,0 +1,41 @@
|
||||
package chroma
|
||||
|
||||
// An Iterator across tokens.
|
||||
//
|
||||
// nil will be returned at the end of the Token stream.
|
||||
type Iterator func() *Token
|
||||
|
||||
// Concaterator concatenates tokens from a series of iterators.
|
||||
func Concaterator(iterators ...Iterator) Iterator {
|
||||
return func() *Token {
|
||||
for len(iterators) > 0 {
|
||||
t := iterators[0]()
|
||||
if t != nil {
|
||||
return t
|
||||
}
|
||||
iterators = iterators[1:]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Literator converts a sequence of literal Tokens into an Iterator.
|
||||
func Literator(tokens ...*Token) Iterator {
|
||||
return func() (out *Token) {
|
||||
if len(tokens) == 0 {
|
||||
return nil
|
||||
}
|
||||
token := tokens[0]
|
||||
tokens = tokens[1:]
|
||||
return token
|
||||
}
|
||||
}
|
||||
|
||||
// Flatten an Iterator into its tokens.
|
||||
func Flatten(iterator Iterator) []*Token {
|
||||
out := []*Token{}
|
||||
for t := iterator(); t != nil; t = iterator() {
|
||||
out = append(out, t)
|
||||
}
|
||||
return out
|
||||
}
|
6
lexer.go
6
lexer.go
@ -76,10 +76,8 @@ type TokeniseOptions struct {
|
||||
type Lexer interface {
|
||||
// Config describing the features of the Lexer.
|
||||
Config() *Config
|
||||
// Tokenise text and call out for each generated token.
|
||||
//
|
||||
// A token of type EOF will be passed to out() to signify the end of the stream.
|
||||
Tokenise(options *TokeniseOptions, text string, out func(*Token)) error
|
||||
// Tokenise returns an Iterator over tokens in text.
|
||||
Tokenise(options *TokeniseOptions, text string) (Iterator, error)
|
||||
}
|
||||
|
||||
type Lexers []Lexer
|
||||
|
@ -47,7 +47,6 @@ func TestSimpleLexer(t *testing.T) {
|
||||
{Whitespace, " "},
|
||||
{LiteralString, "10"},
|
||||
{Whitespace, "\n"},
|
||||
{EOF, ""},
|
||||
}
|
||||
require.Equal(t, expected, actual)
|
||||
}
|
||||
|
@ -12,10 +12,10 @@ import (
|
||||
)
|
||||
|
||||
func TestCompileAllRegexes(t *testing.T) {
|
||||
writer, err := formatters.NoOp.Format(ioutil.Discard, styles.SwapOff)
|
||||
assert.NoError(t, err)
|
||||
for _, lexer := range lexers.Registry.Lexers {
|
||||
err = lexer.Tokenise(nil, "", writer)
|
||||
it, err := lexer.Tokenise(nil, "")
|
||||
assert.NoError(t, err, "%s failed", lexer.Config().Name)
|
||||
err = formatters.NoOp.Format(ioutil.Discard, styles.SwapOff, it)
|
||||
assert.NoError(t, err, "%s failed", lexer.Config().Name)
|
||||
}
|
||||
}
|
||||
|
@ -3,7 +3,7 @@ package lexers
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/alecthomas/chroma"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
const lexerBenchSource = `package chroma
|
||||
@ -29,6 +29,9 @@ func (f FormatterFunc) Format(w io.Writer, s *Style) (func(*Token), error) {
|
||||
func Benchmark(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
Go.Tokenise(nil, lexerBenchSource, func(t *chroma.Token) {})
|
||||
it, err := Go.Tokenise(nil, lexerBenchSource)
|
||||
assert.NoError(b, err)
|
||||
for t := it(); t != nil; t = it() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -38,16 +38,25 @@ var Markdown = Register(MustNewLexer(
|
||||
},
|
||||
))
|
||||
|
||||
func handleCodeblock(groups []string, lexer Lexer, out func(*Token)) {
|
||||
out(&Token{String, groups[1]})
|
||||
out(&Token{String, groups[2]})
|
||||
out(&Token{Text, groups[3]})
|
||||
func handleCodeblock(groups []string, lexer Lexer) Iterator {
|
||||
iterators := []Iterator{}
|
||||
tokens := []*Token{
|
||||
&Token{String, groups[1]},
|
||||
&Token{String, groups[2]},
|
||||
&Token{Text, groups[3]},
|
||||
}
|
||||
code := groups[4]
|
||||
lexer = Get(groups[2])
|
||||
if lexer == nil {
|
||||
out(&Token{String, code})
|
||||
tokens = append(tokens, &Token{String, code})
|
||||
iterators = append(iterators, Literator(tokens...))
|
||||
} else {
|
||||
lexer.Tokenise(nil, code, out)
|
||||
sub, err := lexer.Tokenise(nil, code)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
iterators = append(iterators, sub)
|
||||
}
|
||||
out(&Token{String, groups[5]})
|
||||
iterators = append(iterators, Literator(&Token{String, groups[5]}))
|
||||
return Concaterator(iterators...)
|
||||
}
|
||||
|
@ -35,10 +35,9 @@ func Highlight(w io.Writer, source, lexer, formatter, style string) error {
|
||||
s = styles.Fallback
|
||||
}
|
||||
|
||||
writer, err := f.Format(w, s)
|
||||
it, err := l.Tokenise(nil, source)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return l.Tokenise(nil, source, writer)
|
||||
return f.Format(w, s, it)
|
||||
}
|
||||
|
115
regexp.go
115
regexp.go
@ -19,42 +19,47 @@ type Rule struct {
|
||||
// An Emitter takes group matches and returns tokens.
|
||||
type Emitter interface {
|
||||
// Emit tokens for the given regex groups.
|
||||
Emit(groups []string, lexer Lexer, out func(*Token))
|
||||
Emit(groups []string, lexer Lexer) Iterator
|
||||
}
|
||||
|
||||
// EmitterFunc is a function that is an Emitter.
|
||||
type EmitterFunc func(groups []string, lexer Lexer, out func(*Token))
|
||||
type EmitterFunc func(groups []string, lexer Lexer) Iterator
|
||||
|
||||
// Emit tokens for groups.
|
||||
func (e EmitterFunc) Emit(groups []string, lexer Lexer, out func(*Token)) { e(groups, lexer, out) }
|
||||
func (e EmitterFunc) Emit(groups []string, lexer Lexer) Iterator { return e(groups, lexer) }
|
||||
|
||||
// ByGroups emits a token for each matching group in the rule's regex.
|
||||
func ByGroups(emitters ...Emitter) Emitter {
|
||||
return EmitterFunc(func(groups []string, lexer Lexer, out func(*Token)) {
|
||||
// NOTE: If this line panics, there is a mismatch with groups. Uncomment the following line to debug.
|
||||
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
|
||||
iterators := make([]Iterator, 0, len(groups)-1)
|
||||
// NOTE: If this panics, there is a mismatch with groups. Uncomment the following line to debug.
|
||||
// fmt.Printf("%s %#v\n", emitters, groups[1:])
|
||||
for i, group := range groups[1:] {
|
||||
emitters[i].Emit([]string{group}, lexer, out)
|
||||
iterators = append(iterators, emitters[i].Emit([]string{group}, lexer))
|
||||
}
|
||||
return
|
||||
return Concaterator(iterators...)
|
||||
})
|
||||
}
|
||||
|
||||
// Using returns an Emitter that uses a given Lexer for parsing and emitting.
|
||||
func Using(lexer Lexer, options *TokeniseOptions) Emitter {
|
||||
return EmitterFunc(func(groups []string, _ Lexer, out func(*Token)) {
|
||||
if err := lexer.Tokenise(options, groups[0], out); err != nil {
|
||||
return EmitterFunc(func(groups []string, _ Lexer) Iterator {
|
||||
it, err := lexer.Tokenise(options, groups[0])
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return it
|
||||
})
|
||||
}
|
||||
|
||||
// UsingSelf is like Using, but uses the current Lexer.
|
||||
func UsingSelf(state string) Emitter {
|
||||
return EmitterFunc(func(groups []string, lexer Lexer, out func(*Token)) {
|
||||
if err := lexer.Tokenise(&TokeniseOptions{State: state}, groups[0], out); err != nil {
|
||||
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
|
||||
it, err := lexer.Tokenise(&TokeniseOptions{State: state}, groups[0])
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return it
|
||||
})
|
||||
}
|
||||
|
||||
@ -69,7 +74,14 @@ func Words(prefix, suffix string, words ...string) string {
|
||||
// Tokenise text using lexer, returning tokens as a slice.
|
||||
func Tokenise(lexer Lexer, options *TokeniseOptions, text string) ([]*Token, error) {
|
||||
out := []*Token{}
|
||||
return out, lexer.Tokenise(options, text, func(token *Token) { out = append(out, token) })
|
||||
it, err := lexer.Tokenise(options, text)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for t := it(); t != nil; t = it() {
|
||||
out = append(out, t)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// Rules maps from state to a sequence of Rules.
|
||||
@ -129,6 +141,7 @@ type CompiledRule struct {
|
||||
type CompiledRules map[string][]CompiledRule
|
||||
|
||||
type LexerState struct {
|
||||
Lexer *RegexLexer
|
||||
Text []rune
|
||||
Pos int
|
||||
Rules map[string][]CompiledRule
|
||||
@ -149,6 +162,55 @@ func (l *LexerState) Get(key interface{}) interface{} {
|
||||
return l.MutatorContext[key]
|
||||
}
|
||||
|
||||
func (l *LexerState) Iterator() Iterator {
|
||||
iteratorStack := []Iterator{}
|
||||
return func() *Token {
|
||||
for l.Pos < len(l.Text) && len(l.Stack) > 0 {
|
||||
// Exhaust the IteratorStack, if any.
|
||||
for len(iteratorStack) > 0 {
|
||||
n := len(iteratorStack) - 1
|
||||
t := iteratorStack[n]()
|
||||
if t == nil {
|
||||
iteratorStack = iteratorStack[:n]
|
||||
continue
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
l.State = l.Stack[len(l.Stack)-1]
|
||||
ruleIndex, rule, groups := matchRules(l.Text[l.Pos:], l.Rules[l.State])
|
||||
// No match.
|
||||
if groups == nil {
|
||||
l.Pos++
|
||||
return &Token{Error, string(l.Text[l.Pos-1 : l.Pos])}
|
||||
}
|
||||
l.Rule = ruleIndex
|
||||
l.Groups = groups
|
||||
l.Pos += utf8.RuneCountInString(groups[0])
|
||||
if rule.Mutator != nil {
|
||||
if err := rule.Mutator.Mutate(l); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
if rule.Type != nil {
|
||||
iteratorStack = append(iteratorStack, rule.Type.Emit(l.Groups, l.Lexer))
|
||||
}
|
||||
}
|
||||
// Exhaust the IteratorStack, if any.
|
||||
// Duplicate code, but eh.
|
||||
for len(iteratorStack) > 0 {
|
||||
n := len(iteratorStack) - 1
|
||||
t := iteratorStack[n]()
|
||||
if t == nil {
|
||||
iteratorStack = iteratorStack[:n]
|
||||
continue
|
||||
}
|
||||
return t
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
type RegexLexer struct {
|
||||
config *Config
|
||||
analyser func(text string) float32
|
||||
@ -197,9 +259,9 @@ func (r *RegexLexer) maybeCompile() (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string, out func(*Token)) error {
|
||||
func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) {
|
||||
if err := r.maybeCompile(); err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
if options == nil {
|
||||
options = defaultOptions
|
||||
@ -210,30 +272,7 @@ func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string, out func(*T
|
||||
Rules: r.rules,
|
||||
MutatorContext: map[interface{}]interface{}{},
|
||||
}
|
||||
for state.Pos < len(state.Text) && len(state.Stack) > 0 {
|
||||
state.State = state.Stack[len(state.Stack)-1]
|
||||
ruleIndex, rule, groups := matchRules(state.Text[state.Pos:], state.Rules[state.State])
|
||||
// No match.
|
||||
if groups == nil {
|
||||
out(&Token{Error, string(state.Text[state.Pos : state.Pos+1])})
|
||||
state.Pos++
|
||||
continue
|
||||
}
|
||||
state.Rule = ruleIndex
|
||||
|
||||
state.Groups = groups
|
||||
state.Pos += utf8.RuneCountInString(groups[0])
|
||||
if rule.Mutator != nil {
|
||||
if err := rule.Mutator.Mutate(state); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if rule.Type != nil {
|
||||
rule.Type.Emit(state.Groups, r, out)
|
||||
}
|
||||
}
|
||||
out(&Token{Type: EOF})
|
||||
return nil
|
||||
return state.Iterator(), nil
|
||||
}
|
||||
|
||||
func matchRules(text []rune, rules []CompiledRule) (int, CompiledRule, []string) {
|
||||
|
8
types.go
8
types.go
@ -21,16 +21,12 @@ const (
|
||||
LineNumbers
|
||||
// Line higlight style.
|
||||
LineHighlight
|
||||
// Character highlight style.
|
||||
Highlight
|
||||
// Input that could not be tokenised.
|
||||
Error
|
||||
// Other is used by the Delegate lexer to indicate which tokens should be handled by the delegate.
|
||||
Other
|
||||
// No highlighting.
|
||||
None
|
||||
// Final token.
|
||||
EOF
|
||||
)
|
||||
|
||||
// Keywords.
|
||||
@ -208,6 +204,6 @@ func (t TokenType) InSubCategory(other TokenType) bool {
|
||||
return t/100 == other/100
|
||||
}
|
||||
|
||||
func (t TokenType) Emit(groups []string, lexer Lexer, out func(*Token)) {
|
||||
out(&Token{Type: t, Value: groups[0]})
|
||||
func (t TokenType) Emit(groups []string, lexer Lexer) Iterator {
|
||||
return Literator(&Token{Type: t, Value: groups[0]})
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user