1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-04-17 11:56:39 +02:00

Use a callback to emit tokens.

This is a) faster and b) supports streaming output.
This commit is contained in:
Alec Thomas 2017-06-02 15:15:15 +10:00
parent 6dd81b044b
commit b30de35ff1
7 changed files with 71 additions and 64 deletions

View File

@ -3,6 +3,7 @@ package main
import ( import (
"bufio" "bufio"
"fmt" "fmt"
"io"
"io/ioutil" "io/ioutil"
"os" "os"
"runtime/pprof" "runtime/pprof"
@ -30,21 +31,27 @@ func main() {
} }
w := bufio.NewWriterSize(os.Stdout, 16384) w := bufio.NewWriterSize(os.Stdout, 16384)
defer w.Flush() defer w.Flush()
formatter := formatters.Console(formatters.DefaultConsoleTheme) writer := getWriter(w)
for _, filename := range *filesArgs { for _, filename := range *filesArgs {
lexers := lexers.Registry.Match(filename) lexers := lexers.Registry.Match(filename)
lexer := lexers[0] lexer := lexers[0]
lexer = chroma.Coalesce(lexer) lexer = chroma.Coalesce(lexer)
contents, err := ioutil.ReadFile(filename) contents, err := ioutil.ReadFile(filename)
kingpin.FatalIfError(err, "") kingpin.FatalIfError(err, "")
tokens, err := lexer.Tokenise(string(contents)) err = lexer.Tokenise(string(contents), writer)
kingpin.FatalIfError(err, "") kingpin.FatalIfError(err, "")
}
}
func getWriter(w io.Writer) func(chroma.Token) {
if *tokensFlag { if *tokensFlag {
for _, token := range tokens { return func(token chroma.Token) {
fmt.Println(token) fmt.Println(token)
} }
} else { } else {
formatter.Format(w, tokens) formatter := formatters.Console(formatters.DefaultConsoleTheme)
} writer, err := formatter.Format(w)
kingpin.FatalIfError(err, "")
return writer
} }
} }

View File

@ -9,23 +9,23 @@ type coalescer struct {
Lexer Lexer
} }
func (d *coalescer) Tokenise(text string) ([]Token, error) { func (d *coalescer) Tokenise(text string, out func(Token)) error {
in, err := d.Lexer.Tokenise(text) var last *Token
if err != nil { defer func() {
return in, err if last != nil {
out(*last)
} }
out := []Token{} }()
for _, token := range in { return d.Lexer.Tokenise(text, func(token Token) {
if len(out) == 0 { if last == nil {
out = append(out, token) last = &token
continue } else {
}
last := &out[len(out)-1]
if last.Type == token.Type { if last.Type == token.Type {
last.Value += token.Value last.Value += token.Value
} else { } else {
out = append(out, token) out(*last)
last = &token
} }
} }
return out, err })
} }

View File

@ -6,7 +6,7 @@ import (
"github.com/alecthomas/chroma" "github.com/alecthomas/chroma"
) )
// Formatter takes a token stream and formats it. // Formatter returns a formatting function for tokens.
type Formatter interface { type Formatter interface {
Format(w io.Writer, tokens []chroma.Token) error Format(w io.Writer) (func(chroma.Token), error)
} }

View File

@ -27,8 +27,8 @@ type consoleFormatter struct {
theme map[TokenType]string theme map[TokenType]string
} }
func (c *consoleFormatter) Format(w io.Writer, tokens []Token) error { func (c *consoleFormatter) Format(w io.Writer) (func(Token), error) {
for _, token := range tokens { return func(token Token) {
clr, ok := c.theme[token.Type] clr, ok := c.theme[token.Type]
if !ok { if !ok {
clr, ok = c.theme[token.Type.SubCategory()] clr, ok = c.theme[token.Type.SubCategory()]
@ -41,6 +41,5 @@ func (c *consoleFormatter) Format(w io.Writer, tokens []Token) error {
} }
fmt.Fprint(w, clr) fmt.Fprint(w, clr)
fmt.Fprint(w, token.Value) fmt.Fprint(w, token.Value)
} }, nil
return nil
} }

View File

@ -55,7 +55,7 @@ func (t Token) GoString() string { return t.String() }
type Lexer interface { type Lexer interface {
Config() *Config Config() *Config
Tokenise(text string) ([]Token, error) Tokenise(text string, out func(Token)) error
} }
// Analyser determines if this lexer is appropriate for the given text. // Analyser determines if this lexer is appropriate for the given text.
@ -72,23 +72,36 @@ type Rule struct {
// An Emitter takes group matches and returns tokens. // An Emitter takes group matches and returns tokens.
type Emitter interface { type Emitter interface {
// Emit tokens for the given regex groups. // Emit tokens for the given regex groups.
Emit(groups []string) []Token Emit(groups []string, out func(Token))
} }
type EmitterFunc func(groups []string) []Token // EmitterFunc is a function that is an Emitter.
type EmitterFunc func(groups []string, out func(Token))
func (e EmitterFunc) Emit(groups []string) []Token { return e(groups) } // Emit tokens for groups.
func (e EmitterFunc) Emit(groups []string, out func(Token)) { e(groups, out) }
// ByGroups emits a token for each matching group in the rule's regex. // ByGroups emits a token for each matching group in the rule's regex.
func ByGroups(emitters ...Emitter) Emitter { func ByGroups(emitters ...Emitter) Emitter {
return EmitterFunc(func(groups []string) (out []Token) { return EmitterFunc(func(groups []string, out func(Token)) {
for i, group := range groups[1:] { for i, group := range groups[1:] {
out = append(out, emitters[i].Emit([]string{group})...) emitters[i].Emit([]string{group}, out)
} }
return return
}) })
} }
// Using uses a given Lexer for parsing and emitting.
func Using(lexer Lexer) Emitter {
return EmitterFunc(func(groups []string, out func(Token)) {
if err := lexer.Tokenise(groups[0], out); err != nil {
// TODO: Emitters should return an error, though it's not clear what one would do with
// it.
panic(err)
}
})
}
// Words creates a regex that matches any of the given literal words. // Words creates a regex that matches any of the given literal words.
func Words(words ...string) string { func Words(words ...string) string {
for i, word := range words { for i, word := range words {
@ -168,7 +181,7 @@ type LexerState struct {
State string State string
} }
func (r *regexLexer) Tokenise(text string) (out []Token, err error) { func (r *regexLexer) Tokenise(text string, out func(Token)) error {
state := &LexerState{ state := &LexerState{
Text: text, Text: text,
Stack: []string{"root"}, Stack: []string{"root"},
@ -179,7 +192,7 @@ func (r *regexLexer) Tokenise(text string) (out []Token, err error) {
rule, index := matchRules(state.Text[state.Pos:], state.Rules[state.State]) rule, index := matchRules(state.Text[state.Pos:], state.Rules[state.State])
// No match. // No match.
if index == nil { if index == nil {
out = append(out, Token{Error, state.Text[state.Pos : state.Pos+1]}) out(Token{Error, state.Text[state.Pos : state.Pos+1]})
state.Pos++ state.Pos++
continue continue
} }
@ -190,14 +203,14 @@ func (r *regexLexer) Tokenise(text string) (out []Token, err error) {
} }
state.Pos += index[1] state.Pos += index[1]
if rule.Modifier != nil { if rule.Modifier != nil {
if err = rule.Modifier.Mutate(state); err != nil { if err := rule.Modifier.Mutate(state); err != nil {
return return err
} }
} else { } else {
out = append(out, rule.Type.Emit(groups)...) rule.Type.Emit(groups, out)
} }
} }
return return nil
} }
func matchRules(text string, rules []CompiledRule) (CompiledRule, []int) { func matchRules(text string, rules []CompiledRule) (CompiledRule, []int) {

View File

@ -19,22 +19,19 @@ var Markdown = Register(NewLexer(
{`^(#{2,6})(.+\n)`, ByGroups(GenericSubheading, Text), nil}, {`^(#{2,6})(.+\n)`, ByGroups(GenericSubheading, Text), nil},
// task list // task list
{`^(\s*)([*-] )(\[[ xX]\])( .+\n)`, {`^(\s*)([*-] )(\[[ xX]\])( .+\n)`,
// ByGroups(Text, Keyword, Keyword, using(this, state='inline')), nil},
ByGroups(Text, Keyword, Keyword, Text), nil}, ByGroups(Text, Keyword, Keyword, Text), nil},
// bulleted lists // bulleted lists
{`^(\s*)([*-])(\s)(.+\n)`, {`^(\s*)([*-])(\s)(.+\n)`,
// ByGroups(Text, Keyword, Text, using(this, state='inline')), nil},
ByGroups(Text, Keyword, Text, Text), nil}, ByGroups(Text, Keyword, Text, Text), nil},
// numbered lists // numbered lists
{`^(\s*)([0-9]+\.)( .+\n)`, {`^(\s*)([0-9]+\.)( .+\n)`,
// ByGroups(Text, Keyword, using(this, state='inline')), nil},
ByGroups(Text, Keyword, Text), nil}, ByGroups(Text, Keyword, Text), nil},
// quote // quote
{`^(\s*>\s)(.+\n)`, ByGroups(Keyword, GenericEmph), nil}, {`^(\s*>\s)(.+\n)`, ByGroups(Keyword, GenericEmph), nil},
// text block // text block
{"^(```\n)([\\w\\W]*?)(^```$)", ByGroups(String, Text, String), nil}, {"^(```\n)([\\w\\W]*?)(^```$)", ByGroups(String, Text, String), nil},
// code block with language // code block with language
{"^(```)(\\w+)(\n)([\\w\\W]*?)(^```$)", EmitterFunc(HandleCodeblock), nil}, {"^(```)(\\w+)(\n)([\\w\\W]*?)(^```$)", EmitterFunc(handleCodeblock), nil},
Include(`inline`), Include(`inline`),
}, },
`inline`: []Rule{ `inline`: []Rule{
@ -61,21 +58,12 @@ var Markdown = Register(NewLexer(
}, },
)) ))
func HandleCodeblock(groups []string) []Token { func handleCodeblock(groups []string, out func(Token)) {
out := []Token{ out(Token{String, groups[1]})
{String, groups[1]}, out(Token{String, groups[2]})
{String, groups[2]}, out(Token{Text, groups[3]})
{Text, groups[3]},
}
code := groups[4] code := groups[4]
lexer := Registry.Get(groups[2]) lexer := Registry.Get(groups[2])
tokens, err := lexer.Tokenise(code) lexer.Tokenise(code, out)
if err == nil { out(Token{String, groups[5]})
out = append(out, tokens...)
} else {
out = append(out, Token{Error, code})
}
out = append(out, Token{String, groups[5]})
return out
} }

View File

@ -176,6 +176,6 @@ func (t TokenType) InSubCategory(other TokenType) bool {
return t/100 == other/100 return t/100 == other/100
} }
func (t TokenType) Emit(groups []string) []Token { func (t TokenType) Emit(groups []string, out func(Token)) {
return []Token{Token{Type: t, Value: groups[0]}} out(Token{Type: t, Value: groups[0]})
} }