mirror of
https://github.com/alecthomas/chroma.git
synced 2025-07-17 01:22:22 +02:00
Switch to an Iterator interface.
This is to solve an issue where writers returned by the Formatter were often stateful, but this fact was not obvious to the API consumer, and failed in interesting ways.
This commit is contained in:
21
README.md
21
README.md
@ -1,5 +1,7 @@
|
|||||||
# Chroma - A general purpose syntax highlighter in pure Go [](http://godoc.org/github.com/alecthomas/chroma) [](https://travis-ci.org/alecthomas/chroma) [](https://gitter.im/alecthomas/Lobby)
|
# Chroma - A general purpose syntax highlighter in pure Go [](http://godoc.org/github.com/alecthomas/chroma) [](https://travis-ci.org/alecthomas/chroma) [](https://gitter.im/alecthomas/Lobby)
|
||||||
|
|
||||||
|
> **NOTE:** As Chroma has just been released, its API is till in flux. That said, the high-level interface should not change significantly.
|
||||||
|
|
||||||
Chroma takes source code and other structured text and converts it into syntax
|
Chroma takes source code and other structured text and converts it into syntax
|
||||||
highlighted HTML, ANSI-coloured text, etc.
|
highlighted HTML, ANSI-coloured text, etc.
|
||||||
|
|
||||||
@ -115,17 +117,17 @@ if formatter == nil {
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Then obtain a formatting function from the formatter:
|
Then obtain an iterator over the tokens:
|
||||||
|
|
||||||
```go
|
|
||||||
writer, err := formatter.Format(w, style)
|
|
||||||
```
|
|
||||||
|
|
||||||
And finally, lex the source code and write the output:
|
|
||||||
|
|
||||||
```go
|
```go
|
||||||
contents, err := ioutil.ReadAll(r)
|
contents, err := ioutil.ReadAll(r)
|
||||||
err := lexer.Tokenise(nil, string(contents), writer)
|
iterator, err := lexer.Tokenise(nil, string(contents))
|
||||||
|
```
|
||||||
|
|
||||||
|
And finally, format the tokens from the iterator:
|
||||||
|
|
||||||
|
```go
|
||||||
|
err := formatter.Format(w, style, iterator)
|
||||||
```
|
```
|
||||||
|
|
||||||
### The HTML formatter
|
### The HTML formatter
|
||||||
@ -139,6 +141,9 @@ following constructor options:
|
|||||||
- `Standalone()` - generate standalone HTML with embedded CSS.
|
- `Standalone()` - generate standalone HTML with embedded CSS.
|
||||||
- `WithClasses()` - use classes rather than inlined style attributes.
|
- `WithClasses()` - use classes rather than inlined style attributes.
|
||||||
- `ClassPrefix(prefix)` - prefix each generated CSS class.
|
- `ClassPrefix(prefix)` - prefix each generated CSS class.
|
||||||
|
- `TabWidth(width)` - Set the rendered tab width, in characters.
|
||||||
|
- `WithLineNumbers()` - Render line numbers (style with `LineNumbers`).
|
||||||
|
- `HighlightLines(ranges)` - Highlight lines in these ranges (style with `LineHighlight`).
|
||||||
|
|
||||||
If `WithClasses()` is used, the corresponding CSS can be obtained from the formatter with:
|
If `WithClasses()` is used, the corresponding CSS can be obtained from the formatter with:
|
||||||
|
|
||||||
|
@ -146,16 +146,15 @@ command, for Go.
|
|||||||
}
|
}
|
||||||
formatters.Register("html", html.New(options...))
|
formatters.Register("html", html.New(options...))
|
||||||
}
|
}
|
||||||
writer := getWriter(w, style)
|
|
||||||
if len(*filesArgs) == 0 {
|
if len(*filesArgs) == 0 {
|
||||||
contents, err := ioutil.ReadAll(os.Stdin)
|
contents, err := ioutil.ReadAll(os.Stdin)
|
||||||
kingpin.FatalIfError(err, "")
|
kingpin.FatalIfError(err, "")
|
||||||
lex("", string(contents), writer)
|
format(os.Stdout, style, lex("", string(contents)))
|
||||||
} else {
|
} else {
|
||||||
for _, filename := range *filesArgs {
|
for _, filename := range *filesArgs {
|
||||||
contents, err := ioutil.ReadFile(filename)
|
contents, err := ioutil.ReadFile(filename)
|
||||||
kingpin.FatalIfError(err, "")
|
kingpin.FatalIfError(err, "")
|
||||||
lex(filename, string(contents), writer)
|
format(os.Stdout, style, lex(filename, string(contents)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -192,14 +191,15 @@ func listAll() {
|
|||||||
fmt.Println()
|
fmt.Println()
|
||||||
}
|
}
|
||||||
|
|
||||||
func lex(path string, contents string, writer func(*chroma.Token)) {
|
func lex(path string, contents string) chroma.Iterator {
|
||||||
lexer := selexer(path, contents)
|
lexer := selexer(path, contents)
|
||||||
if lexer == nil {
|
if lexer == nil {
|
||||||
lexer = lexers.Fallback
|
lexer = lexers.Fallback
|
||||||
}
|
}
|
||||||
lexer = chroma.Coalesce(lexer)
|
lexer = chroma.Coalesce(lexer)
|
||||||
err := lexer.Tokenise(nil, string(contents), writer)
|
it, err := lexer.Tokenise(nil, string(contents))
|
||||||
kingpin.FatalIfError(err, "")
|
kingpin.FatalIfError(err, "")
|
||||||
|
return it
|
||||||
}
|
}
|
||||||
|
|
||||||
func selexer(path, contents string) (lexer chroma.Lexer) {
|
func selexer(path, contents string) (lexer chroma.Lexer) {
|
||||||
@ -215,10 +215,8 @@ func selexer(path, contents string) (lexer chroma.Lexer) {
|
|||||||
return lexers.Analyse(contents)
|
return lexers.Analyse(contents)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getWriter(w io.Writer, style *chroma.Style) func(*chroma.Token) {
|
func format(w io.Writer, style *chroma.Style, it chroma.Iterator) {
|
||||||
formatter := formatters.Get(*formatterFlag)
|
formatter := formatters.Get(*formatterFlag)
|
||||||
// formatter := formatters.TTY8
|
err := formatter.Format(w, style, it)
|
||||||
writer, err := formatter.Format(w, style)
|
|
||||||
kingpin.FatalIfError(err, "")
|
kingpin.FatalIfError(err, "")
|
||||||
return writer
|
|
||||||
}
|
}
|
||||||
|
29
coalesce.go
29
coalesce.go
@ -9,21 +9,24 @@ type coalescer struct {
|
|||||||
Lexer
|
Lexer
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *coalescer) Tokenise(options *TokeniseOptions, text string, out func(*Token)) error {
|
func (d *coalescer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) {
|
||||||
var prev *Token
|
var prev *Token
|
||||||
return d.Lexer.Tokenise(options, text, func(token *Token) {
|
it, err := d.Lexer.Tokenise(options, text)
|
||||||
if prev == nil {
|
if err != nil {
|
||||||
prev = token
|
return nil, err
|
||||||
} else {
|
}
|
||||||
if prev.Type == token.Type && len(prev.Value) < 8192 {
|
return func() *Token {
|
||||||
prev.Value += token.Value
|
for token := it(); token != nil; token = it() {
|
||||||
} else {
|
if prev == nil {
|
||||||
out(prev)
|
|
||||||
prev = token
|
prev = token
|
||||||
|
} else {
|
||||||
|
if prev.Type == token.Type && len(prev.Value) < 8192 {
|
||||||
|
prev.Value += token.Value
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if token.Type == EOF {
|
out := prev
|
||||||
out(token)
|
prev = nil
|
||||||
}
|
return out
|
||||||
})
|
}, nil
|
||||||
}
|
}
|
||||||
|
@ -14,9 +14,6 @@ func TestCoalesce(t *testing.T) {
|
|||||||
}))
|
}))
|
||||||
actual, err := Tokenise(lexer, nil, "!@#$")
|
actual, err := Tokenise(lexer, nil, "!@#$")
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
expected := []*Token{
|
expected := []*Token{{Punctuation, "!@#$"}}
|
||||||
&Token{Punctuation, "!@#$"},
|
|
||||||
&Token{EOF, ""},
|
|
||||||
}
|
|
||||||
require.Equal(t, expected, actual)
|
require.Equal(t, expected, actual)
|
||||||
}
|
}
|
||||||
|
@ -7,12 +7,10 @@ import (
|
|||||||
// A Formatter for Chroma lexers.
|
// A Formatter for Chroma lexers.
|
||||||
type Formatter interface {
|
type Formatter interface {
|
||||||
// Format returns a formatting function for tokens.
|
// Format returns a formatting function for tokens.
|
||||||
Format(w io.Writer, style *Style) (func(*Token), error)
|
Format(w io.Writer, style *Style, iterator Iterator) error
|
||||||
}
|
}
|
||||||
|
|
||||||
// A FormatterFunc is a Formatter implemented as a function.
|
// A FormatterFunc is a Formatter implemented as a function.
|
||||||
type FormatterFunc func(io.Writer, *Style) (func(*Token), error)
|
type FormatterFunc func(w io.Writer, style *Style, iterator Iterator) error
|
||||||
|
|
||||||
func (f FormatterFunc) Format(w io.Writer, s *Style) (func(*Token), error) {
|
func (f FormatterFunc) Format(w io.Writer, s *Style, it Iterator) error { return f(w, s, it) }
|
||||||
return f(w, s)
|
|
||||||
}
|
|
||||||
|
@ -10,8 +10,13 @@ import (
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
// NoOp formatter.
|
// NoOp formatter.
|
||||||
NoOp = Register("noop", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style) (func(*chroma.Token), error) {
|
NoOp = Register("noop", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style, iterator chroma.Iterator) error {
|
||||||
return func(t *chroma.Token) { io.WriteString(w, t.Value) }, nil
|
for t := iterator(); t != nil; t = iterator() {
|
||||||
|
if _, err := io.WriteString(w, t.Value); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}))
|
}))
|
||||||
// Default HTML formatter outputs self-contained HTML.
|
// Default HTML formatter outputs self-contained HTML.
|
||||||
htmlFull = Register("html", html.New(html.Standalone(), html.WithClasses()))
|
htmlFull = Register("html", html.New(html.Standalone(), html.WithClasses()))
|
||||||
|
@ -67,15 +67,8 @@ func (h highlightRanges) Len() int { return len(h) }
|
|||||||
func (h highlightRanges) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
|
func (h highlightRanges) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
|
||||||
func (h highlightRanges) Less(i, j int) bool { return h[i][0] < h[j][0] }
|
func (h highlightRanges) Less(i, j int) bool { return h[i][0] < h[j][0] }
|
||||||
|
|
||||||
func (f *Formatter) Format(w io.Writer, style *chroma.Style) (func(*chroma.Token), error) {
|
func (f *Formatter) Format(w io.Writer, style *chroma.Style, iterator chroma.Iterator) error {
|
||||||
tokens := []*chroma.Token{}
|
return f.writeHTML(w, style, chroma.Flatten(iterator))
|
||||||
return func(token *chroma.Token) {
|
|
||||||
tokens = append(tokens, token)
|
|
||||||
if token.Type == chroma.EOF {
|
|
||||||
f.writeHTML(w, style, tokens)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *Formatter) writeHTML(w io.Writer, style *chroma.Style, tokens []*chroma.Token) error { // nolint: gocyclo
|
func (f *Formatter) writeHTML(w io.Writer, style *chroma.Style, tokens []*chroma.Token) error { // nolint: gocyclo
|
||||||
|
@ -20,11 +20,11 @@ func TestCompressStyle(t *testing.T) {
|
|||||||
|
|
||||||
func BenchmarkHTMLFormatter(b *testing.B) {
|
func BenchmarkHTMLFormatter(b *testing.B) {
|
||||||
formatter := New()
|
formatter := New()
|
||||||
writer, err := formatter.Format(ioutil.Discard, styles.Fallback)
|
|
||||||
assert.NoError(b, err)
|
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
err = lexers.Go.Tokenise(nil, "package main\nfunc main()\n{\nprintln(`hello world`)\n}\n", writer)
|
it, err := lexers.Go.Tokenise(nil, "package main\nfunc main()\n{\nprintln(`hello world`)\n}\n")
|
||||||
|
assert.NoError(b, err)
|
||||||
|
err = formatter.Format(ioutil.Discard, styles.Fallback, it)
|
||||||
assert.NoError(b, err)
|
assert.NoError(b, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -33,7 +33,6 @@ func TestSplitTokensIntoLines(t *testing.T) {
|
|||||||
in := []*chroma.Token{
|
in := []*chroma.Token{
|
||||||
{Value: "hello", Type: chroma.NameKeyword},
|
{Value: "hello", Type: chroma.NameKeyword},
|
||||||
{Value: " world\nwhat?\n", Type: chroma.NameKeyword},
|
{Value: " world\nwhat?\n", Type: chroma.NameKeyword},
|
||||||
{Type: chroma.EOF},
|
|
||||||
}
|
}
|
||||||
expected := [][]*chroma.Token{
|
expected := [][]*chroma.Token{
|
||||||
[]*chroma.Token{
|
[]*chroma.Token{
|
||||||
@ -45,7 +44,6 @@ func TestSplitTokensIntoLines(t *testing.T) {
|
|||||||
},
|
},
|
||||||
[]*chroma.Token{
|
[]*chroma.Token{
|
||||||
{Type: chroma.NameKeyword},
|
{Type: chroma.NameKeyword},
|
||||||
{Type: chroma.EOF},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
actual := splitTokensIntoLines(in)
|
actual := splitTokensIntoLines(in)
|
||||||
|
@ -8,8 +8,11 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// Tokens formatter outputs the raw token structures.
|
// Tokens formatter outputs the raw token structures.
|
||||||
var Tokens = Register("tokens", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style) (func(*chroma.Token), error) {
|
var Tokens = Register("tokens", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style, it chroma.Iterator) error {
|
||||||
return func(token *chroma.Token) {
|
for t := it(); t != nil; t = it() {
|
||||||
fmt.Fprintln(w, token.GoString())
|
if _, err := fmt.Fprintln(w, t.GoString()); err != nil {
|
||||||
}, nil
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}))
|
}))
|
||||||
|
@ -234,9 +234,9 @@ type indexedTTYFormatter struct {
|
|||||||
table *ttyTable
|
table *ttyTable
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *indexedTTYFormatter) Format(w io.Writer, style *chroma.Style) (func(*chroma.Token), error) {
|
func (c *indexedTTYFormatter) Format(w io.Writer, style *chroma.Style, it chroma.Iterator) error {
|
||||||
theme := styleToEscapeSequence(c.table, style)
|
theme := styleToEscapeSequence(c.table, style)
|
||||||
return func(token *chroma.Token) {
|
for token := it(); token != nil; token = it() {
|
||||||
// TODO: Cache token lookups?
|
// TODO: Cache token lookups?
|
||||||
clr, ok := theme[token.Type]
|
clr, ok := theme[token.Type]
|
||||||
if !ok {
|
if !ok {
|
||||||
@ -255,7 +255,8 @@ func (c *indexedTTYFormatter) Format(w io.Writer, style *chroma.Style) (func(*ch
|
|||||||
if clr != "" {
|
if clr != "" {
|
||||||
fmt.Fprintf(w, "\033[0m")
|
fmt.Fprintf(w, "\033[0m")
|
||||||
}
|
}
|
||||||
}, nil
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// TTY8 is an 8-colour terminal formatter.
|
// TTY8 is an 8-colour terminal formatter.
|
||||||
|
@ -10,8 +10,8 @@ import (
|
|||||||
// TTY16m is a true-colour terminal formatter.
|
// TTY16m is a true-colour terminal formatter.
|
||||||
var TTY16m = Register("terminal16m", chroma.FormatterFunc(trueColourFormatter))
|
var TTY16m = Register("terminal16m", chroma.FormatterFunc(trueColourFormatter))
|
||||||
|
|
||||||
func trueColourFormatter(w io.Writer, style *chroma.Style) (func(*chroma.Token), error) {
|
func trueColourFormatter(w io.Writer, style *chroma.Style, it chroma.Iterator) error {
|
||||||
return func(token *chroma.Token) {
|
for token := it(); token != nil; token = it() {
|
||||||
entry := style.Get(token.Type)
|
entry := style.Get(token.Type)
|
||||||
if !entry.IsZero() {
|
if !entry.IsZero() {
|
||||||
out := ""
|
out := ""
|
||||||
@ -33,5 +33,6 @@ func trueColourFormatter(w io.Writer, style *chroma.Style) (func(*chroma.Token),
|
|||||||
if !entry.IsZero() {
|
if !entry.IsZero() {
|
||||||
fmt.Fprint(w, "\033[0m")
|
fmt.Fprint(w, "\033[0m")
|
||||||
}
|
}
|
||||||
}, nil
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
41
iterator.go
Normal file
41
iterator.go
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
package chroma
|
||||||
|
|
||||||
|
// An Iterator across tokens.
|
||||||
|
//
|
||||||
|
// nil will be returned at the end of the Token stream.
|
||||||
|
type Iterator func() *Token
|
||||||
|
|
||||||
|
// Concaterator concatenates tokens from a series of iterators.
|
||||||
|
func Concaterator(iterators ...Iterator) Iterator {
|
||||||
|
return func() *Token {
|
||||||
|
for len(iterators) > 0 {
|
||||||
|
t := iterators[0]()
|
||||||
|
if t != nil {
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
iterators = iterators[1:]
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Literator converts a sequence of literal Tokens into an Iterator.
|
||||||
|
func Literator(tokens ...*Token) Iterator {
|
||||||
|
return func() (out *Token) {
|
||||||
|
if len(tokens) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
token := tokens[0]
|
||||||
|
tokens = tokens[1:]
|
||||||
|
return token
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flatten an Iterator into its tokens.
|
||||||
|
func Flatten(iterator Iterator) []*Token {
|
||||||
|
out := []*Token{}
|
||||||
|
for t := iterator(); t != nil; t = iterator() {
|
||||||
|
out = append(out, t)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
6
lexer.go
6
lexer.go
@ -76,10 +76,8 @@ type TokeniseOptions struct {
|
|||||||
type Lexer interface {
|
type Lexer interface {
|
||||||
// Config describing the features of the Lexer.
|
// Config describing the features of the Lexer.
|
||||||
Config() *Config
|
Config() *Config
|
||||||
// Tokenise text and call out for each generated token.
|
// Tokenise returns an Iterator over tokens in text.
|
||||||
//
|
Tokenise(options *TokeniseOptions, text string) (Iterator, error)
|
||||||
// A token of type EOF will be passed to out() to signify the end of the stream.
|
|
||||||
Tokenise(options *TokeniseOptions, text string, out func(*Token)) error
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Lexers []Lexer
|
type Lexers []Lexer
|
||||||
|
@ -47,7 +47,6 @@ func TestSimpleLexer(t *testing.T) {
|
|||||||
{Whitespace, " "},
|
{Whitespace, " "},
|
||||||
{LiteralString, "10"},
|
{LiteralString, "10"},
|
||||||
{Whitespace, "\n"},
|
{Whitespace, "\n"},
|
||||||
{EOF, ""},
|
|
||||||
}
|
}
|
||||||
require.Equal(t, expected, actual)
|
require.Equal(t, expected, actual)
|
||||||
}
|
}
|
||||||
|
@ -12,10 +12,10 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestCompileAllRegexes(t *testing.T) {
|
func TestCompileAllRegexes(t *testing.T) {
|
||||||
writer, err := formatters.NoOp.Format(ioutil.Discard, styles.SwapOff)
|
|
||||||
assert.NoError(t, err)
|
|
||||||
for _, lexer := range lexers.Registry.Lexers {
|
for _, lexer := range lexers.Registry.Lexers {
|
||||||
err = lexer.Tokenise(nil, "", writer)
|
it, err := lexer.Tokenise(nil, "")
|
||||||
|
assert.NoError(t, err, "%s failed", lexer.Config().Name)
|
||||||
|
err = formatters.NoOp.Format(ioutil.Discard, styles.SwapOff, it)
|
||||||
assert.NoError(t, err, "%s failed", lexer.Config().Name)
|
assert.NoError(t, err, "%s failed", lexer.Config().Name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,7 @@ package lexers
|
|||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/alecthomas/chroma"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
const lexerBenchSource = `package chroma
|
const lexerBenchSource = `package chroma
|
||||||
@ -29,6 +29,9 @@ func (f FormatterFunc) Format(w io.Writer, s *Style) (func(*Token), error) {
|
|||||||
func Benchmark(b *testing.B) {
|
func Benchmark(b *testing.B) {
|
||||||
b.ReportAllocs()
|
b.ReportAllocs()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
Go.Tokenise(nil, lexerBenchSource, func(t *chroma.Token) {})
|
it, err := Go.Tokenise(nil, lexerBenchSource)
|
||||||
|
assert.NoError(b, err)
|
||||||
|
for t := it(); t != nil; t = it() {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -38,16 +38,25 @@ var Markdown = Register(MustNewLexer(
|
|||||||
},
|
},
|
||||||
))
|
))
|
||||||
|
|
||||||
func handleCodeblock(groups []string, lexer Lexer, out func(*Token)) {
|
func handleCodeblock(groups []string, lexer Lexer) Iterator {
|
||||||
out(&Token{String, groups[1]})
|
iterators := []Iterator{}
|
||||||
out(&Token{String, groups[2]})
|
tokens := []*Token{
|
||||||
out(&Token{Text, groups[3]})
|
&Token{String, groups[1]},
|
||||||
|
&Token{String, groups[2]},
|
||||||
|
&Token{Text, groups[3]},
|
||||||
|
}
|
||||||
code := groups[4]
|
code := groups[4]
|
||||||
lexer = Get(groups[2])
|
lexer = Get(groups[2])
|
||||||
if lexer == nil {
|
if lexer == nil {
|
||||||
out(&Token{String, code})
|
tokens = append(tokens, &Token{String, code})
|
||||||
|
iterators = append(iterators, Literator(tokens...))
|
||||||
} else {
|
} else {
|
||||||
lexer.Tokenise(nil, code, out)
|
sub, err := lexer.Tokenise(nil, code)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
iterators = append(iterators, sub)
|
||||||
}
|
}
|
||||||
out(&Token{String, groups[5]})
|
iterators = append(iterators, Literator(&Token{String, groups[5]}))
|
||||||
|
return Concaterator(iterators...)
|
||||||
}
|
}
|
||||||
|
@ -35,10 +35,9 @@ func Highlight(w io.Writer, source, lexer, formatter, style string) error {
|
|||||||
s = styles.Fallback
|
s = styles.Fallback
|
||||||
}
|
}
|
||||||
|
|
||||||
writer, err := f.Format(w, s)
|
it, err := l.Tokenise(nil, source)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
return f.Format(w, s, it)
|
||||||
return l.Tokenise(nil, source, writer)
|
|
||||||
}
|
}
|
||||||
|
115
regexp.go
115
regexp.go
@ -19,42 +19,47 @@ type Rule struct {
|
|||||||
// An Emitter takes group matches and returns tokens.
|
// An Emitter takes group matches and returns tokens.
|
||||||
type Emitter interface {
|
type Emitter interface {
|
||||||
// Emit tokens for the given regex groups.
|
// Emit tokens for the given regex groups.
|
||||||
Emit(groups []string, lexer Lexer, out func(*Token))
|
Emit(groups []string, lexer Lexer) Iterator
|
||||||
}
|
}
|
||||||
|
|
||||||
// EmitterFunc is a function that is an Emitter.
|
// EmitterFunc is a function that is an Emitter.
|
||||||
type EmitterFunc func(groups []string, lexer Lexer, out func(*Token))
|
type EmitterFunc func(groups []string, lexer Lexer) Iterator
|
||||||
|
|
||||||
// Emit tokens for groups.
|
// Emit tokens for groups.
|
||||||
func (e EmitterFunc) Emit(groups []string, lexer Lexer, out func(*Token)) { e(groups, lexer, out) }
|
func (e EmitterFunc) Emit(groups []string, lexer Lexer) Iterator { return e(groups, lexer) }
|
||||||
|
|
||||||
// ByGroups emits a token for each matching group in the rule's regex.
|
// ByGroups emits a token for each matching group in the rule's regex.
|
||||||
func ByGroups(emitters ...Emitter) Emitter {
|
func ByGroups(emitters ...Emitter) Emitter {
|
||||||
return EmitterFunc(func(groups []string, lexer Lexer, out func(*Token)) {
|
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
|
||||||
// NOTE: If this line panics, there is a mismatch with groups. Uncomment the following line to debug.
|
iterators := make([]Iterator, 0, len(groups)-1)
|
||||||
|
// NOTE: If this panics, there is a mismatch with groups. Uncomment the following line to debug.
|
||||||
// fmt.Printf("%s %#v\n", emitters, groups[1:])
|
// fmt.Printf("%s %#v\n", emitters, groups[1:])
|
||||||
for i, group := range groups[1:] {
|
for i, group := range groups[1:] {
|
||||||
emitters[i].Emit([]string{group}, lexer, out)
|
iterators = append(iterators, emitters[i].Emit([]string{group}, lexer))
|
||||||
}
|
}
|
||||||
return
|
return Concaterator(iterators...)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Using returns an Emitter that uses a given Lexer for parsing and emitting.
|
// Using returns an Emitter that uses a given Lexer for parsing and emitting.
|
||||||
func Using(lexer Lexer, options *TokeniseOptions) Emitter {
|
func Using(lexer Lexer, options *TokeniseOptions) Emitter {
|
||||||
return EmitterFunc(func(groups []string, _ Lexer, out func(*Token)) {
|
return EmitterFunc(func(groups []string, _ Lexer) Iterator {
|
||||||
if err := lexer.Tokenise(options, groups[0], out); err != nil {
|
it, err := lexer.Tokenise(options, groups[0])
|
||||||
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
return it
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// UsingSelf is like Using, but uses the current Lexer.
|
// UsingSelf is like Using, but uses the current Lexer.
|
||||||
func UsingSelf(state string) Emitter {
|
func UsingSelf(state string) Emitter {
|
||||||
return EmitterFunc(func(groups []string, lexer Lexer, out func(*Token)) {
|
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
|
||||||
if err := lexer.Tokenise(&TokeniseOptions{State: state}, groups[0], out); err != nil {
|
it, err := lexer.Tokenise(&TokeniseOptions{State: state}, groups[0])
|
||||||
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
return it
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -69,7 +74,14 @@ func Words(prefix, suffix string, words ...string) string {
|
|||||||
// Tokenise text using lexer, returning tokens as a slice.
|
// Tokenise text using lexer, returning tokens as a slice.
|
||||||
func Tokenise(lexer Lexer, options *TokeniseOptions, text string) ([]*Token, error) {
|
func Tokenise(lexer Lexer, options *TokeniseOptions, text string) ([]*Token, error) {
|
||||||
out := []*Token{}
|
out := []*Token{}
|
||||||
return out, lexer.Tokenise(options, text, func(token *Token) { out = append(out, token) })
|
it, err := lexer.Tokenise(options, text)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for t := it(); t != nil; t = it() {
|
||||||
|
out = append(out, t)
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rules maps from state to a sequence of Rules.
|
// Rules maps from state to a sequence of Rules.
|
||||||
@ -129,6 +141,7 @@ type CompiledRule struct {
|
|||||||
type CompiledRules map[string][]CompiledRule
|
type CompiledRules map[string][]CompiledRule
|
||||||
|
|
||||||
type LexerState struct {
|
type LexerState struct {
|
||||||
|
Lexer *RegexLexer
|
||||||
Text []rune
|
Text []rune
|
||||||
Pos int
|
Pos int
|
||||||
Rules map[string][]CompiledRule
|
Rules map[string][]CompiledRule
|
||||||
@ -149,6 +162,55 @@ func (l *LexerState) Get(key interface{}) interface{} {
|
|||||||
return l.MutatorContext[key]
|
return l.MutatorContext[key]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (l *LexerState) Iterator() Iterator {
|
||||||
|
iteratorStack := []Iterator{}
|
||||||
|
return func() *Token {
|
||||||
|
for l.Pos < len(l.Text) && len(l.Stack) > 0 {
|
||||||
|
// Exhaust the IteratorStack, if any.
|
||||||
|
for len(iteratorStack) > 0 {
|
||||||
|
n := len(iteratorStack) - 1
|
||||||
|
t := iteratorStack[n]()
|
||||||
|
if t == nil {
|
||||||
|
iteratorStack = iteratorStack[:n]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
l.State = l.Stack[len(l.Stack)-1]
|
||||||
|
ruleIndex, rule, groups := matchRules(l.Text[l.Pos:], l.Rules[l.State])
|
||||||
|
// No match.
|
||||||
|
if groups == nil {
|
||||||
|
l.Pos++
|
||||||
|
return &Token{Error, string(l.Text[l.Pos-1 : l.Pos])}
|
||||||
|
}
|
||||||
|
l.Rule = ruleIndex
|
||||||
|
l.Groups = groups
|
||||||
|
l.Pos += utf8.RuneCountInString(groups[0])
|
||||||
|
if rule.Mutator != nil {
|
||||||
|
if err := rule.Mutator.Mutate(l); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if rule.Type != nil {
|
||||||
|
iteratorStack = append(iteratorStack, rule.Type.Emit(l.Groups, l.Lexer))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Exhaust the IteratorStack, if any.
|
||||||
|
// Duplicate code, but eh.
|
||||||
|
for len(iteratorStack) > 0 {
|
||||||
|
n := len(iteratorStack) - 1
|
||||||
|
t := iteratorStack[n]()
|
||||||
|
if t == nil {
|
||||||
|
iteratorStack = iteratorStack[:n]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type RegexLexer struct {
|
type RegexLexer struct {
|
||||||
config *Config
|
config *Config
|
||||||
analyser func(text string) float32
|
analyser func(text string) float32
|
||||||
@ -197,9 +259,9 @@ func (r *RegexLexer) maybeCompile() (err error) {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string, out func(*Token)) error {
|
func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) {
|
||||||
if err := r.maybeCompile(); err != nil {
|
if err := r.maybeCompile(); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
if options == nil {
|
if options == nil {
|
||||||
options = defaultOptions
|
options = defaultOptions
|
||||||
@ -210,30 +272,7 @@ func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string, out func(*T
|
|||||||
Rules: r.rules,
|
Rules: r.rules,
|
||||||
MutatorContext: map[interface{}]interface{}{},
|
MutatorContext: map[interface{}]interface{}{},
|
||||||
}
|
}
|
||||||
for state.Pos < len(state.Text) && len(state.Stack) > 0 {
|
return state.Iterator(), nil
|
||||||
state.State = state.Stack[len(state.Stack)-1]
|
|
||||||
ruleIndex, rule, groups := matchRules(state.Text[state.Pos:], state.Rules[state.State])
|
|
||||||
// No match.
|
|
||||||
if groups == nil {
|
|
||||||
out(&Token{Error, string(state.Text[state.Pos : state.Pos+1])})
|
|
||||||
state.Pos++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
state.Rule = ruleIndex
|
|
||||||
|
|
||||||
state.Groups = groups
|
|
||||||
state.Pos += utf8.RuneCountInString(groups[0])
|
|
||||||
if rule.Mutator != nil {
|
|
||||||
if err := rule.Mutator.Mutate(state); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if rule.Type != nil {
|
|
||||||
rule.Type.Emit(state.Groups, r, out)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
out(&Token{Type: EOF})
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchRules(text []rune, rules []CompiledRule) (int, CompiledRule, []string) {
|
func matchRules(text []rune, rules []CompiledRule) (int, CompiledRule, []string) {
|
||||||
|
8
types.go
8
types.go
@ -21,16 +21,12 @@ const (
|
|||||||
LineNumbers
|
LineNumbers
|
||||||
// Line higlight style.
|
// Line higlight style.
|
||||||
LineHighlight
|
LineHighlight
|
||||||
// Character highlight style.
|
|
||||||
Highlight
|
|
||||||
// Input that could not be tokenised.
|
// Input that could not be tokenised.
|
||||||
Error
|
Error
|
||||||
// Other is used by the Delegate lexer to indicate which tokens should be handled by the delegate.
|
// Other is used by the Delegate lexer to indicate which tokens should be handled by the delegate.
|
||||||
Other
|
Other
|
||||||
// No highlighting.
|
// No highlighting.
|
||||||
None
|
None
|
||||||
// Final token.
|
|
||||||
EOF
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Keywords.
|
// Keywords.
|
||||||
@ -208,6 +204,6 @@ func (t TokenType) InSubCategory(other TokenType) bool {
|
|||||||
return t/100 == other/100
|
return t/100 == other/100
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t TokenType) Emit(groups []string, lexer Lexer, out func(*Token)) {
|
func (t TokenType) Emit(groups []string, lexer Lexer) Iterator {
|
||||||
out(&Token{Type: t, Value: groups[0]})
|
return Literator(&Token{Type: t, Value: groups[0]})
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user