mirror of
https://github.com/alecthomas/chroma.git
synced 2025-01-26 03:20:10 +02:00
Use a callback to emit tokens.
This is a) faster and b) supports streaming output.
This commit is contained in:
parent
6dd81b044b
commit
b30de35ff1
@ -3,6 +3,7 @@ package main
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"runtime/pprof"
|
||||
@ -30,21 +31,27 @@ func main() {
|
||||
}
|
||||
w := bufio.NewWriterSize(os.Stdout, 16384)
|
||||
defer w.Flush()
|
||||
formatter := formatters.Console(formatters.DefaultConsoleTheme)
|
||||
writer := getWriter(w)
|
||||
for _, filename := range *filesArgs {
|
||||
lexers := lexers.Registry.Match(filename)
|
||||
lexer := lexers[0]
|
||||
lexer = chroma.Coalesce(lexer)
|
||||
contents, err := ioutil.ReadFile(filename)
|
||||
kingpin.FatalIfError(err, "")
|
||||
tokens, err := lexer.Tokenise(string(contents))
|
||||
err = lexer.Tokenise(string(contents), writer)
|
||||
kingpin.FatalIfError(err, "")
|
||||
if *tokensFlag {
|
||||
for _, token := range tokens {
|
||||
fmt.Println(token)
|
||||
}
|
||||
} else {
|
||||
formatter.Format(w, tokens)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func getWriter(w io.Writer) func(chroma.Token) {
|
||||
if *tokensFlag {
|
||||
return func(token chroma.Token) {
|
||||
fmt.Println(token)
|
||||
}
|
||||
} else {
|
||||
formatter := formatters.Console(formatters.DefaultConsoleTheme)
|
||||
writer, err := formatter.Format(w)
|
||||
kingpin.FatalIfError(err, "")
|
||||
return writer
|
||||
}
|
||||
}
|
||||
|
32
coalesce.go
32
coalesce.go
@ -9,23 +9,23 @@ type coalescer struct {
|
||||
Lexer
|
||||
}
|
||||
|
||||
func (d *coalescer) Tokenise(text string) ([]Token, error) {
|
||||
in, err := d.Lexer.Tokenise(text)
|
||||
if err != nil {
|
||||
return in, err
|
||||
}
|
||||
out := []Token{}
|
||||
for _, token := range in {
|
||||
if len(out) == 0 {
|
||||
out = append(out, token)
|
||||
continue
|
||||
func (d *coalescer) Tokenise(text string, out func(Token)) error {
|
||||
var last *Token
|
||||
defer func() {
|
||||
if last != nil {
|
||||
out(*last)
|
||||
}
|
||||
last := &out[len(out)-1]
|
||||
if last.Type == token.Type {
|
||||
last.Value += token.Value
|
||||
}()
|
||||
return d.Lexer.Tokenise(text, func(token Token) {
|
||||
if last == nil {
|
||||
last = &token
|
||||
} else {
|
||||
out = append(out, token)
|
||||
if last.Type == token.Type {
|
||||
last.Value += token.Value
|
||||
} else {
|
||||
out(*last)
|
||||
last = &token
|
||||
}
|
||||
}
|
||||
}
|
||||
return out, err
|
||||
})
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ import (
|
||||
"github.com/alecthomas/chroma"
|
||||
)
|
||||
|
||||
// Formatter takes a token stream and formats it.
|
||||
// Formatter returns a formatting function for tokens.
|
||||
type Formatter interface {
|
||||
Format(w io.Writer, tokens []chroma.Token) error
|
||||
Format(w io.Writer) (func(chroma.Token), error)
|
||||
}
|
||||
|
@ -27,8 +27,8 @@ type consoleFormatter struct {
|
||||
theme map[TokenType]string
|
||||
}
|
||||
|
||||
func (c *consoleFormatter) Format(w io.Writer, tokens []Token) error {
|
||||
for _, token := range tokens {
|
||||
func (c *consoleFormatter) Format(w io.Writer) (func(Token), error) {
|
||||
return func(token Token) {
|
||||
clr, ok := c.theme[token.Type]
|
||||
if !ok {
|
||||
clr, ok = c.theme[token.Type.SubCategory()]
|
||||
@ -41,6 +41,5 @@ func (c *consoleFormatter) Format(w io.Writer, tokens []Token) error {
|
||||
}
|
||||
fmt.Fprint(w, clr)
|
||||
fmt.Fprint(w, token.Value)
|
||||
}
|
||||
return nil
|
||||
}, nil
|
||||
}
|
||||
|
37
lexer.go
37
lexer.go
@ -55,7 +55,7 @@ func (t Token) GoString() string { return t.String() }
|
||||
|
||||
type Lexer interface {
|
||||
Config() *Config
|
||||
Tokenise(text string) ([]Token, error)
|
||||
Tokenise(text string, out func(Token)) error
|
||||
}
|
||||
|
||||
// Analyser determines if this lexer is appropriate for the given text.
|
||||
@ -72,23 +72,36 @@ type Rule struct {
|
||||
// An Emitter takes group matches and returns tokens.
|
||||
type Emitter interface {
|
||||
// Emit tokens for the given regex groups.
|
||||
Emit(groups []string) []Token
|
||||
Emit(groups []string, out func(Token))
|
||||
}
|
||||
|
||||
type EmitterFunc func(groups []string) []Token
|
||||
// EmitterFunc is a function that is an Emitter.
|
||||
type EmitterFunc func(groups []string, out func(Token))
|
||||
|
||||
func (e EmitterFunc) Emit(groups []string) []Token { return e(groups) }
|
||||
// Emit tokens for groups.
|
||||
func (e EmitterFunc) Emit(groups []string, out func(Token)) { e(groups, out) }
|
||||
|
||||
// ByGroups emits a token for each matching group in the rule's regex.
|
||||
func ByGroups(emitters ...Emitter) Emitter {
|
||||
return EmitterFunc(func(groups []string) (out []Token) {
|
||||
return EmitterFunc(func(groups []string, out func(Token)) {
|
||||
for i, group := range groups[1:] {
|
||||
out = append(out, emitters[i].Emit([]string{group})...)
|
||||
emitters[i].Emit([]string{group}, out)
|
||||
}
|
||||
return
|
||||
})
|
||||
}
|
||||
|
||||
// Using uses a given Lexer for parsing and emitting.
|
||||
func Using(lexer Lexer) Emitter {
|
||||
return EmitterFunc(func(groups []string, out func(Token)) {
|
||||
if err := lexer.Tokenise(groups[0], out); err != nil {
|
||||
// TODO: Emitters should return an error, though it's not clear what one would do with
|
||||
// it.
|
||||
panic(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Words creates a regex that matches any of the given literal words.
|
||||
func Words(words ...string) string {
|
||||
for i, word := range words {
|
||||
@ -168,7 +181,7 @@ type LexerState struct {
|
||||
State string
|
||||
}
|
||||
|
||||
func (r *regexLexer) Tokenise(text string) (out []Token, err error) {
|
||||
func (r *regexLexer) Tokenise(text string, out func(Token)) error {
|
||||
state := &LexerState{
|
||||
Text: text,
|
||||
Stack: []string{"root"},
|
||||
@ -179,7 +192,7 @@ func (r *regexLexer) Tokenise(text string) (out []Token, err error) {
|
||||
rule, index := matchRules(state.Text[state.Pos:], state.Rules[state.State])
|
||||
// No match.
|
||||
if index == nil {
|
||||
out = append(out, Token{Error, state.Text[state.Pos : state.Pos+1]})
|
||||
out(Token{Error, state.Text[state.Pos : state.Pos+1]})
|
||||
state.Pos++
|
||||
continue
|
||||
}
|
||||
@ -190,14 +203,14 @@ func (r *regexLexer) Tokenise(text string) (out []Token, err error) {
|
||||
}
|
||||
state.Pos += index[1]
|
||||
if rule.Modifier != nil {
|
||||
if err = rule.Modifier.Mutate(state); err != nil {
|
||||
return
|
||||
if err := rule.Modifier.Mutate(state); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
out = append(out, rule.Type.Emit(groups)...)
|
||||
rule.Type.Emit(groups, out)
|
||||
}
|
||||
}
|
||||
return
|
||||
return nil
|
||||
}
|
||||
|
||||
func matchRules(text string, rules []CompiledRule) (CompiledRule, []int) {
|
||||
|
@ -19,22 +19,19 @@ var Markdown = Register(NewLexer(
|
||||
{`^(#{2,6})(.+\n)`, ByGroups(GenericSubheading, Text), nil},
|
||||
// task list
|
||||
{`^(\s*)([*-] )(\[[ xX]\])( .+\n)`,
|
||||
// ByGroups(Text, Keyword, Keyword, using(this, state='inline')), nil},
|
||||
ByGroups(Text, Keyword, Keyword, Text), nil},
|
||||
// bulleted lists
|
||||
{`^(\s*)([*-])(\s)(.+\n)`,
|
||||
// ByGroups(Text, Keyword, Text, using(this, state='inline')), nil},
|
||||
ByGroups(Text, Keyword, Text, Text), nil},
|
||||
// numbered lists
|
||||
{`^(\s*)([0-9]+\.)( .+\n)`,
|
||||
// ByGroups(Text, Keyword, using(this, state='inline')), nil},
|
||||
ByGroups(Text, Keyword, Text), nil},
|
||||
// quote
|
||||
{`^(\s*>\s)(.+\n)`, ByGroups(Keyword, GenericEmph), nil},
|
||||
// text block
|
||||
{"^(```\n)([\\w\\W]*?)(^```$)", ByGroups(String, Text, String), nil},
|
||||
// code block with language
|
||||
{"^(```)(\\w+)(\n)([\\w\\W]*?)(^```$)", EmitterFunc(HandleCodeblock), nil},
|
||||
{"^(```)(\\w+)(\n)([\\w\\W]*?)(^```$)", EmitterFunc(handleCodeblock), nil},
|
||||
Include(`inline`),
|
||||
},
|
||||
`inline`: []Rule{
|
||||
@ -61,21 +58,12 @@ var Markdown = Register(NewLexer(
|
||||
},
|
||||
))
|
||||
|
||||
func HandleCodeblock(groups []string) []Token {
|
||||
out := []Token{
|
||||
{String, groups[1]},
|
||||
{String, groups[2]},
|
||||
{Text, groups[3]},
|
||||
}
|
||||
func handleCodeblock(groups []string, out func(Token)) {
|
||||
out(Token{String, groups[1]})
|
||||
out(Token{String, groups[2]})
|
||||
out(Token{Text, groups[3]})
|
||||
code := groups[4]
|
||||
lexer := Registry.Get(groups[2])
|
||||
tokens, err := lexer.Tokenise(code)
|
||||
if err == nil {
|
||||
out = append(out, tokens...)
|
||||
} else {
|
||||
out = append(out, Token{Error, code})
|
||||
}
|
||||
out = append(out, Token{String, groups[5]})
|
||||
return out
|
||||
|
||||
lexer.Tokenise(code, out)
|
||||
out(Token{String, groups[5]})
|
||||
}
|
||||
|
4
types.go
4
types.go
@ -176,6 +176,6 @@ func (t TokenType) InSubCategory(other TokenType) bool {
|
||||
return t/100 == other/100
|
||||
}
|
||||
|
||||
func (t TokenType) Emit(groups []string) []Token {
|
||||
return []Token{Token{Type: t, Value: groups[0]}}
|
||||
func (t TokenType) Emit(groups []string, out func(Token)) {
|
||||
out(Token{Type: t, Value: groups[0]})
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user