1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-01-26 03:20:10 +02:00

Use a callback to emit tokens.

This is a) faster and b) supports streaming output.
This commit is contained in:
Alec Thomas 2017-06-02 15:15:15 +10:00
parent 6dd81b044b
commit b30de35ff1
7 changed files with 71 additions and 64 deletions

View File

@ -3,6 +3,7 @@ package main
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"os"
"runtime/pprof"
@ -30,21 +31,27 @@ func main() {
}
w := bufio.NewWriterSize(os.Stdout, 16384)
defer w.Flush()
formatter := formatters.Console(formatters.DefaultConsoleTheme)
writer := getWriter(w)
for _, filename := range *filesArgs {
lexers := lexers.Registry.Match(filename)
lexer := lexers[0]
lexer = chroma.Coalesce(lexer)
contents, err := ioutil.ReadFile(filename)
kingpin.FatalIfError(err, "")
tokens, err := lexer.Tokenise(string(contents))
err = lexer.Tokenise(string(contents), writer)
kingpin.FatalIfError(err, "")
if *tokensFlag {
for _, token := range tokens {
fmt.Println(token)
}
} else {
formatter.Format(w, tokens)
}
}
}
func getWriter(w io.Writer) func(chroma.Token) {
if *tokensFlag {
return func(token chroma.Token) {
fmt.Println(token)
}
} else {
formatter := formatters.Console(formatters.DefaultConsoleTheme)
writer, err := formatter.Format(w)
kingpin.FatalIfError(err, "")
return writer
}
}

View File

@ -9,23 +9,23 @@ type coalescer struct {
Lexer
}
func (d *coalescer) Tokenise(text string) ([]Token, error) {
in, err := d.Lexer.Tokenise(text)
if err != nil {
return in, err
}
out := []Token{}
for _, token := range in {
if len(out) == 0 {
out = append(out, token)
continue
func (d *coalescer) Tokenise(text string, out func(Token)) error {
var last *Token
defer func() {
if last != nil {
out(*last)
}
last := &out[len(out)-1]
if last.Type == token.Type {
last.Value += token.Value
}()
return d.Lexer.Tokenise(text, func(token Token) {
if last == nil {
last = &token
} else {
out = append(out, token)
if last.Type == token.Type {
last.Value += token.Value
} else {
out(*last)
last = &token
}
}
}
return out, err
})
}

View File

@ -6,7 +6,7 @@ import (
"github.com/alecthomas/chroma"
)
// Formatter takes a token stream and formats it.
// Formatter returns a formatting function for tokens.
type Formatter interface {
Format(w io.Writer, tokens []chroma.Token) error
Format(w io.Writer) (func(chroma.Token), error)
}

View File

@ -27,8 +27,8 @@ type consoleFormatter struct {
theme map[TokenType]string
}
func (c *consoleFormatter) Format(w io.Writer, tokens []Token) error {
for _, token := range tokens {
func (c *consoleFormatter) Format(w io.Writer) (func(Token), error) {
return func(token Token) {
clr, ok := c.theme[token.Type]
if !ok {
clr, ok = c.theme[token.Type.SubCategory()]
@ -41,6 +41,5 @@ func (c *consoleFormatter) Format(w io.Writer, tokens []Token) error {
}
fmt.Fprint(w, clr)
fmt.Fprint(w, token.Value)
}
return nil
}, nil
}

View File

@ -55,7 +55,7 @@ func (t Token) GoString() string { return t.String() }
type Lexer interface {
Config() *Config
Tokenise(text string) ([]Token, error)
Tokenise(text string, out func(Token)) error
}
// Analyser determines if this lexer is appropriate for the given text.
@ -72,23 +72,36 @@ type Rule struct {
// An Emitter takes group matches and returns tokens.
type Emitter interface {
// Emit tokens for the given regex groups.
Emit(groups []string) []Token
Emit(groups []string, out func(Token))
}
type EmitterFunc func(groups []string) []Token
// EmitterFunc is a function that is an Emitter.
type EmitterFunc func(groups []string, out func(Token))
func (e EmitterFunc) Emit(groups []string) []Token { return e(groups) }
// Emit tokens for groups.
func (e EmitterFunc) Emit(groups []string, out func(Token)) { e(groups, out) }
// ByGroups emits a token for each matching group in the rule's regex.
func ByGroups(emitters ...Emitter) Emitter {
return EmitterFunc(func(groups []string) (out []Token) {
return EmitterFunc(func(groups []string, out func(Token)) {
for i, group := range groups[1:] {
out = append(out, emitters[i].Emit([]string{group})...)
emitters[i].Emit([]string{group}, out)
}
return
})
}
// Using uses a given Lexer for parsing and emitting.
func Using(lexer Lexer) Emitter {
return EmitterFunc(func(groups []string, out func(Token)) {
if err := lexer.Tokenise(groups[0], out); err != nil {
// TODO: Emitters should return an error, though it's not clear what one would do with
// it.
panic(err)
}
})
}
// Words creates a regex that matches any of the given literal words.
func Words(words ...string) string {
for i, word := range words {
@ -168,7 +181,7 @@ type LexerState struct {
State string
}
func (r *regexLexer) Tokenise(text string) (out []Token, err error) {
func (r *regexLexer) Tokenise(text string, out func(Token)) error {
state := &LexerState{
Text: text,
Stack: []string{"root"},
@ -179,7 +192,7 @@ func (r *regexLexer) Tokenise(text string) (out []Token, err error) {
rule, index := matchRules(state.Text[state.Pos:], state.Rules[state.State])
// No match.
if index == nil {
out = append(out, Token{Error, state.Text[state.Pos : state.Pos+1]})
out(Token{Error, state.Text[state.Pos : state.Pos+1]})
state.Pos++
continue
}
@ -190,14 +203,14 @@ func (r *regexLexer) Tokenise(text string) (out []Token, err error) {
}
state.Pos += index[1]
if rule.Modifier != nil {
if err = rule.Modifier.Mutate(state); err != nil {
return
if err := rule.Modifier.Mutate(state); err != nil {
return err
}
} else {
out = append(out, rule.Type.Emit(groups)...)
rule.Type.Emit(groups, out)
}
}
return
return nil
}
func matchRules(text string, rules []CompiledRule) (CompiledRule, []int) {

View File

@ -19,22 +19,19 @@ var Markdown = Register(NewLexer(
{`^(#{2,6})(.+\n)`, ByGroups(GenericSubheading, Text), nil},
// task list
{`^(\s*)([*-] )(\[[ xX]\])( .+\n)`,
// ByGroups(Text, Keyword, Keyword, using(this, state='inline')), nil},
ByGroups(Text, Keyword, Keyword, Text), nil},
// bulleted lists
{`^(\s*)([*-])(\s)(.+\n)`,
// ByGroups(Text, Keyword, Text, using(this, state='inline')), nil},
ByGroups(Text, Keyword, Text, Text), nil},
// numbered lists
{`^(\s*)([0-9]+\.)( .+\n)`,
// ByGroups(Text, Keyword, using(this, state='inline')), nil},
ByGroups(Text, Keyword, Text), nil},
// quote
{`^(\s*>\s)(.+\n)`, ByGroups(Keyword, GenericEmph), nil},
// text block
{"^(```\n)([\\w\\W]*?)(^```$)", ByGroups(String, Text, String), nil},
// code block with language
{"^(```)(\\w+)(\n)([\\w\\W]*?)(^```$)", EmitterFunc(HandleCodeblock), nil},
{"^(```)(\\w+)(\n)([\\w\\W]*?)(^```$)", EmitterFunc(handleCodeblock), nil},
Include(`inline`),
},
`inline`: []Rule{
@ -61,21 +58,12 @@ var Markdown = Register(NewLexer(
},
))
func HandleCodeblock(groups []string) []Token {
out := []Token{
{String, groups[1]},
{String, groups[2]},
{Text, groups[3]},
}
func handleCodeblock(groups []string, out func(Token)) {
out(Token{String, groups[1]})
out(Token{String, groups[2]})
out(Token{Text, groups[3]})
code := groups[4]
lexer := Registry.Get(groups[2])
tokens, err := lexer.Tokenise(code)
if err == nil {
out = append(out, tokens...)
} else {
out = append(out, Token{Error, code})
}
out = append(out, Token{String, groups[5]})
return out
lexer.Tokenise(code, out)
out(Token{String, groups[5]})
}

View File

@ -176,6 +176,6 @@ func (t TokenType) InSubCategory(other TokenType) bool {
return t/100 == other/100
}
func (t TokenType) Emit(groups []string) []Token {
return []Token{Token{Type: t, Value: groups[0]}}
func (t TokenType) Emit(groups []string, out func(Token)) {
out(Token{Type: t, Value: groups[0]})
}