mirror of
https://github.com/alecthomas/chroma.git
synced 2025-04-17 11:56:39 +02:00
Use a callback to emit tokens.
This is a) faster and b) supports streaming output.
This commit is contained in:
parent
6dd81b044b
commit
b30de35ff1
@ -3,6 +3,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"runtime/pprof"
|
"runtime/pprof"
|
||||||
@ -30,21 +31,27 @@ func main() {
|
|||||||
}
|
}
|
||||||
w := bufio.NewWriterSize(os.Stdout, 16384)
|
w := bufio.NewWriterSize(os.Stdout, 16384)
|
||||||
defer w.Flush()
|
defer w.Flush()
|
||||||
formatter := formatters.Console(formatters.DefaultConsoleTheme)
|
writer := getWriter(w)
|
||||||
for _, filename := range *filesArgs {
|
for _, filename := range *filesArgs {
|
||||||
lexers := lexers.Registry.Match(filename)
|
lexers := lexers.Registry.Match(filename)
|
||||||
lexer := lexers[0]
|
lexer := lexers[0]
|
||||||
lexer = chroma.Coalesce(lexer)
|
lexer = chroma.Coalesce(lexer)
|
||||||
contents, err := ioutil.ReadFile(filename)
|
contents, err := ioutil.ReadFile(filename)
|
||||||
kingpin.FatalIfError(err, "")
|
kingpin.FatalIfError(err, "")
|
||||||
tokens, err := lexer.Tokenise(string(contents))
|
err = lexer.Tokenise(string(contents), writer)
|
||||||
kingpin.FatalIfError(err, "")
|
kingpin.FatalIfError(err, "")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getWriter(w io.Writer) func(chroma.Token) {
|
||||||
if *tokensFlag {
|
if *tokensFlag {
|
||||||
for _, token := range tokens {
|
return func(token chroma.Token) {
|
||||||
fmt.Println(token)
|
fmt.Println(token)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
formatter.Format(w, tokens)
|
formatter := formatters.Console(formatters.DefaultConsoleTheme)
|
||||||
}
|
writer, err := formatter.Format(w)
|
||||||
|
kingpin.FatalIfError(err, "")
|
||||||
|
return writer
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
26
coalesce.go
26
coalesce.go
@ -9,23 +9,23 @@ type coalescer struct {
|
|||||||
Lexer
|
Lexer
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *coalescer) Tokenise(text string) ([]Token, error) {
|
func (d *coalescer) Tokenise(text string, out func(Token)) error {
|
||||||
in, err := d.Lexer.Tokenise(text)
|
var last *Token
|
||||||
if err != nil {
|
defer func() {
|
||||||
return in, err
|
if last != nil {
|
||||||
|
out(*last)
|
||||||
}
|
}
|
||||||
out := []Token{}
|
}()
|
||||||
for _, token := range in {
|
return d.Lexer.Tokenise(text, func(token Token) {
|
||||||
if len(out) == 0 {
|
if last == nil {
|
||||||
out = append(out, token)
|
last = &token
|
||||||
continue
|
} else {
|
||||||
}
|
|
||||||
last := &out[len(out)-1]
|
|
||||||
if last.Type == token.Type {
|
if last.Type == token.Type {
|
||||||
last.Value += token.Value
|
last.Value += token.Value
|
||||||
} else {
|
} else {
|
||||||
out = append(out, token)
|
out(*last)
|
||||||
|
last = &token
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return out, err
|
})
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@ import (
|
|||||||
"github.com/alecthomas/chroma"
|
"github.com/alecthomas/chroma"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Formatter takes a token stream and formats it.
|
// Formatter returns a formatting function for tokens.
|
||||||
type Formatter interface {
|
type Formatter interface {
|
||||||
Format(w io.Writer, tokens []chroma.Token) error
|
Format(w io.Writer) (func(chroma.Token), error)
|
||||||
}
|
}
|
||||||
|
@ -27,8 +27,8 @@ type consoleFormatter struct {
|
|||||||
theme map[TokenType]string
|
theme map[TokenType]string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *consoleFormatter) Format(w io.Writer, tokens []Token) error {
|
func (c *consoleFormatter) Format(w io.Writer) (func(Token), error) {
|
||||||
for _, token := range tokens {
|
return func(token Token) {
|
||||||
clr, ok := c.theme[token.Type]
|
clr, ok := c.theme[token.Type]
|
||||||
if !ok {
|
if !ok {
|
||||||
clr, ok = c.theme[token.Type.SubCategory()]
|
clr, ok = c.theme[token.Type.SubCategory()]
|
||||||
@ -41,6 +41,5 @@ func (c *consoleFormatter) Format(w io.Writer, tokens []Token) error {
|
|||||||
}
|
}
|
||||||
fmt.Fprint(w, clr)
|
fmt.Fprint(w, clr)
|
||||||
fmt.Fprint(w, token.Value)
|
fmt.Fprint(w, token.Value)
|
||||||
}
|
}, nil
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
37
lexer.go
37
lexer.go
@ -55,7 +55,7 @@ func (t Token) GoString() string { return t.String() }
|
|||||||
|
|
||||||
type Lexer interface {
|
type Lexer interface {
|
||||||
Config() *Config
|
Config() *Config
|
||||||
Tokenise(text string) ([]Token, error)
|
Tokenise(text string, out func(Token)) error
|
||||||
}
|
}
|
||||||
|
|
||||||
// Analyser determines if this lexer is appropriate for the given text.
|
// Analyser determines if this lexer is appropriate for the given text.
|
||||||
@ -72,23 +72,36 @@ type Rule struct {
|
|||||||
// An Emitter takes group matches and returns tokens.
|
// An Emitter takes group matches and returns tokens.
|
||||||
type Emitter interface {
|
type Emitter interface {
|
||||||
// Emit tokens for the given regex groups.
|
// Emit tokens for the given regex groups.
|
||||||
Emit(groups []string) []Token
|
Emit(groups []string, out func(Token))
|
||||||
}
|
}
|
||||||
|
|
||||||
type EmitterFunc func(groups []string) []Token
|
// EmitterFunc is a function that is an Emitter.
|
||||||
|
type EmitterFunc func(groups []string, out func(Token))
|
||||||
|
|
||||||
func (e EmitterFunc) Emit(groups []string) []Token { return e(groups) }
|
// Emit tokens for groups.
|
||||||
|
func (e EmitterFunc) Emit(groups []string, out func(Token)) { e(groups, out) }
|
||||||
|
|
||||||
// ByGroups emits a token for each matching group in the rule's regex.
|
// ByGroups emits a token for each matching group in the rule's regex.
|
||||||
func ByGroups(emitters ...Emitter) Emitter {
|
func ByGroups(emitters ...Emitter) Emitter {
|
||||||
return EmitterFunc(func(groups []string) (out []Token) {
|
return EmitterFunc(func(groups []string, out func(Token)) {
|
||||||
for i, group := range groups[1:] {
|
for i, group := range groups[1:] {
|
||||||
out = append(out, emitters[i].Emit([]string{group})...)
|
emitters[i].Emit([]string{group}, out)
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Using uses a given Lexer for parsing and emitting.
|
||||||
|
func Using(lexer Lexer) Emitter {
|
||||||
|
return EmitterFunc(func(groups []string, out func(Token)) {
|
||||||
|
if err := lexer.Tokenise(groups[0], out); err != nil {
|
||||||
|
// TODO: Emitters should return an error, though it's not clear what one would do with
|
||||||
|
// it.
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// Words creates a regex that matches any of the given literal words.
|
// Words creates a regex that matches any of the given literal words.
|
||||||
func Words(words ...string) string {
|
func Words(words ...string) string {
|
||||||
for i, word := range words {
|
for i, word := range words {
|
||||||
@ -168,7 +181,7 @@ type LexerState struct {
|
|||||||
State string
|
State string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *regexLexer) Tokenise(text string) (out []Token, err error) {
|
func (r *regexLexer) Tokenise(text string, out func(Token)) error {
|
||||||
state := &LexerState{
|
state := &LexerState{
|
||||||
Text: text,
|
Text: text,
|
||||||
Stack: []string{"root"},
|
Stack: []string{"root"},
|
||||||
@ -179,7 +192,7 @@ func (r *regexLexer) Tokenise(text string) (out []Token, err error) {
|
|||||||
rule, index := matchRules(state.Text[state.Pos:], state.Rules[state.State])
|
rule, index := matchRules(state.Text[state.Pos:], state.Rules[state.State])
|
||||||
// No match.
|
// No match.
|
||||||
if index == nil {
|
if index == nil {
|
||||||
out = append(out, Token{Error, state.Text[state.Pos : state.Pos+1]})
|
out(Token{Error, state.Text[state.Pos : state.Pos+1]})
|
||||||
state.Pos++
|
state.Pos++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -190,14 +203,14 @@ func (r *regexLexer) Tokenise(text string) (out []Token, err error) {
|
|||||||
}
|
}
|
||||||
state.Pos += index[1]
|
state.Pos += index[1]
|
||||||
if rule.Modifier != nil {
|
if rule.Modifier != nil {
|
||||||
if err = rule.Modifier.Mutate(state); err != nil {
|
if err := rule.Modifier.Mutate(state); err != nil {
|
||||||
return
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
out = append(out, rule.Type.Emit(groups)...)
|
rule.Type.Emit(groups, out)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchRules(text string, rules []CompiledRule) (CompiledRule, []int) {
|
func matchRules(text string, rules []CompiledRule) (CompiledRule, []int) {
|
||||||
|
@ -19,22 +19,19 @@ var Markdown = Register(NewLexer(
|
|||||||
{`^(#{2,6})(.+\n)`, ByGroups(GenericSubheading, Text), nil},
|
{`^(#{2,6})(.+\n)`, ByGroups(GenericSubheading, Text), nil},
|
||||||
// task list
|
// task list
|
||||||
{`^(\s*)([*-] )(\[[ xX]\])( .+\n)`,
|
{`^(\s*)([*-] )(\[[ xX]\])( .+\n)`,
|
||||||
// ByGroups(Text, Keyword, Keyword, using(this, state='inline')), nil},
|
|
||||||
ByGroups(Text, Keyword, Keyword, Text), nil},
|
ByGroups(Text, Keyword, Keyword, Text), nil},
|
||||||
// bulleted lists
|
// bulleted lists
|
||||||
{`^(\s*)([*-])(\s)(.+\n)`,
|
{`^(\s*)([*-])(\s)(.+\n)`,
|
||||||
// ByGroups(Text, Keyword, Text, using(this, state='inline')), nil},
|
|
||||||
ByGroups(Text, Keyword, Text, Text), nil},
|
ByGroups(Text, Keyword, Text, Text), nil},
|
||||||
// numbered lists
|
// numbered lists
|
||||||
{`^(\s*)([0-9]+\.)( .+\n)`,
|
{`^(\s*)([0-9]+\.)( .+\n)`,
|
||||||
// ByGroups(Text, Keyword, using(this, state='inline')), nil},
|
|
||||||
ByGroups(Text, Keyword, Text), nil},
|
ByGroups(Text, Keyword, Text), nil},
|
||||||
// quote
|
// quote
|
||||||
{`^(\s*>\s)(.+\n)`, ByGroups(Keyword, GenericEmph), nil},
|
{`^(\s*>\s)(.+\n)`, ByGroups(Keyword, GenericEmph), nil},
|
||||||
// text block
|
// text block
|
||||||
{"^(```\n)([\\w\\W]*?)(^```$)", ByGroups(String, Text, String), nil},
|
{"^(```\n)([\\w\\W]*?)(^```$)", ByGroups(String, Text, String), nil},
|
||||||
// code block with language
|
// code block with language
|
||||||
{"^(```)(\\w+)(\n)([\\w\\W]*?)(^```$)", EmitterFunc(HandleCodeblock), nil},
|
{"^(```)(\\w+)(\n)([\\w\\W]*?)(^```$)", EmitterFunc(handleCodeblock), nil},
|
||||||
Include(`inline`),
|
Include(`inline`),
|
||||||
},
|
},
|
||||||
`inline`: []Rule{
|
`inline`: []Rule{
|
||||||
@ -61,21 +58,12 @@ var Markdown = Register(NewLexer(
|
|||||||
},
|
},
|
||||||
))
|
))
|
||||||
|
|
||||||
func HandleCodeblock(groups []string) []Token {
|
func handleCodeblock(groups []string, out func(Token)) {
|
||||||
out := []Token{
|
out(Token{String, groups[1]})
|
||||||
{String, groups[1]},
|
out(Token{String, groups[2]})
|
||||||
{String, groups[2]},
|
out(Token{Text, groups[3]})
|
||||||
{Text, groups[3]},
|
|
||||||
}
|
|
||||||
code := groups[4]
|
code := groups[4]
|
||||||
lexer := Registry.Get(groups[2])
|
lexer := Registry.Get(groups[2])
|
||||||
tokens, err := lexer.Tokenise(code)
|
lexer.Tokenise(code, out)
|
||||||
if err == nil {
|
out(Token{String, groups[5]})
|
||||||
out = append(out, tokens...)
|
|
||||||
} else {
|
|
||||||
out = append(out, Token{Error, code})
|
|
||||||
}
|
|
||||||
out = append(out, Token{String, groups[5]})
|
|
||||||
return out
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
4
types.go
4
types.go
@ -176,6 +176,6 @@ func (t TokenType) InSubCategory(other TokenType) bool {
|
|||||||
return t/100 == other/100
|
return t/100 == other/100
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t TokenType) Emit(groups []string) []Token {
|
func (t TokenType) Emit(groups []string, out func(Token)) {
|
||||||
return []Token{Token{Type: t, Value: groups[0]}}
|
out(Token{Type: t, Value: groups[0]})
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user