mirror of
https://github.com/alecthomas/chroma.git
synced 2025-04-04 22:24:23 +02:00
Initial commit! Working!
This commit is contained in:
parent
3de978543f
commit
b2fb8edf77
2
Makefile
Normal file
2
Makefile
Normal file
@ -0,0 +1,2 @@
|
||||
tokentype_string.go: types.go
|
||||
stringer -type TokenType
|
47
cmd/chroma/main.go
Normal file
47
cmd/chroma/main.go
Normal file
@ -0,0 +1,47 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"runtime/pprof"
|
||||
|
||||
"gopkg.in/alecthomas/kingpin.v3-unstable"
|
||||
|
||||
"github.com/alecthomas/chroma"
|
||||
"github.com/alecthomas/chroma/formatters"
|
||||
"github.com/alecthomas/chroma/lexers"
|
||||
)
|
||||
|
||||
var (
|
||||
profileFlag = kingpin.Flag("profile", "Enable profiling to file.").String()
|
||||
tokensFlag = kingpin.Flag("tokens", "Dump raw tokens.").Bool()
|
||||
filesArgs = kingpin.Arg("files", "Files to highlight.").Required().ExistingFiles()
|
||||
)
|
||||
|
||||
func main() {
|
||||
kingpin.Parse()
|
||||
if *profileFlag != "" {
|
||||
f, err := os.Create(*profileFlag)
|
||||
kingpin.FatalIfError(err, "")
|
||||
pprof.StartCPUProfile(f)
|
||||
defer pprof.StopCPUProfile()
|
||||
}
|
||||
formatter := formatters.Console(formatters.DefaultConsoleTheme)
|
||||
for _, filename := range *filesArgs {
|
||||
lexers := lexers.Registry.Match(filename)
|
||||
lexer := lexers[0]
|
||||
lexer = chroma.Coalesce(lexer)
|
||||
contents, err := ioutil.ReadFile(filename)
|
||||
kingpin.FatalIfError(err, "")
|
||||
tokens, err := lexer.Tokenise(string(contents))
|
||||
kingpin.FatalIfError(err, "")
|
||||
if *tokensFlag {
|
||||
for _, token := range tokens {
|
||||
fmt.Println(token)
|
||||
}
|
||||
} else {
|
||||
formatter.Format(os.Stdout, tokens)
|
||||
}
|
||||
}
|
||||
}
|
31
coalesce.go
Normal file
31
coalesce.go
Normal file
@ -0,0 +1,31 @@
|
||||
package chroma
|
||||
|
||||
// Coalesce is a Lexer interceptor that collapses runs of common types into a single token.
|
||||
func Coalesce(lexer Lexer) Lexer {
|
||||
return &coalescer{lexer}
|
||||
}
|
||||
|
||||
type coalescer struct {
|
||||
Lexer
|
||||
}
|
||||
|
||||
func (d *coalescer) Tokenise(text string) ([]Token, error) {
|
||||
in, err := d.Lexer.Tokenise(text)
|
||||
if err != nil {
|
||||
return in, err
|
||||
}
|
||||
out := []Token{}
|
||||
for _, token := range in {
|
||||
if len(out) == 0 {
|
||||
out = append(out, token)
|
||||
continue
|
||||
}
|
||||
last := &out[len(out)-1]
|
||||
if last.Type == token.Type {
|
||||
last.Value += token.Value
|
||||
} else {
|
||||
out = append(out, token)
|
||||
}
|
||||
}
|
||||
return out, err
|
||||
}
|
22
coalesce_test.go
Normal file
22
coalesce_test.go
Normal file
@ -0,0 +1,22 @@
|
||||
package chroma
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestCoalesce(t *testing.T) {
|
||||
lexer, err := Coalesce(MustNewLexer(nil, Rules{
|
||||
"root": []Rule{
|
||||
Rule{`[[:punct:]]`, Punctuation, nil},
|
||||
},
|
||||
}))
|
||||
require.NoError(t, err)
|
||||
actual, err := lexer.Tokenise("!@#$%")
|
||||
require.NoError(t, err)
|
||||
expected := []Token{
|
||||
Token{Punctuation, "!@#$%"},
|
||||
}
|
||||
require.Equal(t, expected, actual)
|
||||
}
|
12
formatters/api.go
Normal file
12
formatters/api.go
Normal file
@ -0,0 +1,12 @@
|
||||
package formatters
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"github.com/alecthomas/chroma"
|
||||
)
|
||||
|
||||
// Formatter takes a token stream and formats it.
|
||||
type Formatter interface {
|
||||
Format(w io.Writer, tokens []chroma.Token) error
|
||||
}
|
47
formatters/console.go
Normal file
47
formatters/console.go
Normal file
@ -0,0 +1,47 @@
|
||||
package formatters
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
|
||||
. "github.com/alecthomas/chroma" // nolint
|
||||
"github.com/alecthomas/colour"
|
||||
)
|
||||
|
||||
var DefaultConsoleTheme = map[TokenType]string{
|
||||
Number: "^B^3",
|
||||
Comment: "^5",
|
||||
String: "^B^5",
|
||||
Keyword: "^B^7",
|
||||
}
|
||||
|
||||
// Console formatter.
|
||||
//
|
||||
// formatter := Console(DefaultConsoleTheme)
|
||||
func Console(theme map[TokenType]string) Formatter {
|
||||
return &consoleFormatter{theme}
|
||||
}
|
||||
|
||||
type consoleFormatter struct {
|
||||
theme map[TokenType]string
|
||||
}
|
||||
|
||||
func (c *consoleFormatter) Format(w io.Writer, tokens []Token) error {
|
||||
bw := bufio.NewWriterSize(w, 1024)
|
||||
printer := colour.Colour(bw)
|
||||
for _, token := range tokens {
|
||||
clr, ok := c.theme[token.Type]
|
||||
if !ok {
|
||||
clr, ok = c.theme[token.Type.SubCategory()]
|
||||
if !ok {
|
||||
clr, ok = c.theme[token.Type.Category()]
|
||||
if !ok {
|
||||
clr = "^R"
|
||||
}
|
||||
}
|
||||
}
|
||||
printer.Printf(clr+"%s", token.Value)
|
||||
}
|
||||
bw.Flush()
|
||||
return nil
|
||||
}
|
210
lexer.go
Normal file
210
lexer.go
Normal file
@ -0,0 +1,210 @@
|
||||
package chroma
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Config for a lexer.
|
||||
type Config struct {
|
||||
// Name of the lexer.
|
||||
Name string
|
||||
|
||||
// Shortcuts for the lexer
|
||||
Aliases []string
|
||||
|
||||
// File name globs
|
||||
Filenames []string
|
||||
|
||||
// Secondary file name globs
|
||||
AliasFilenames []string
|
||||
|
||||
// MIME types
|
||||
MimeTypes []string
|
||||
|
||||
// Priority, should multiple lexers match and no content is provided
|
||||
Priority int
|
||||
|
||||
// Don't strip leading and trailing newlines from the input.
|
||||
DontStripNL bool
|
||||
|
||||
// Strip all leading and trailing whitespace from the input
|
||||
StripAll bool
|
||||
|
||||
// Make sure that the input does not end with a newline. This
|
||||
// is required for some lexers that consume input linewise.
|
||||
DontEnsureNL bool
|
||||
|
||||
// If given and greater than 0, expand tabs in the input.
|
||||
TabSize int
|
||||
|
||||
// If given, must be an encoding name. This encoding will be used to
|
||||
// convert the input string to Unicode, if it is not already a Unicode
|
||||
// string.
|
||||
Encoding string
|
||||
}
|
||||
|
||||
type Token struct {
|
||||
Type TokenType
|
||||
Value string
|
||||
}
|
||||
|
||||
func (t Token) String() string { return fmt.Sprintf("Token{%s, %q}", t.Type, t.Value) }
|
||||
func (t Token) GoString() string { return t.String() }
|
||||
|
||||
type Lexer interface {
|
||||
Config() *Config
|
||||
Tokenise(text string) ([]Token, error)
|
||||
}
|
||||
|
||||
// Analyser determines if this lexer is appropriate for the given text.
|
||||
type Analyser interface {
|
||||
AnalyseText(text string) float32
|
||||
}
|
||||
|
||||
type Rule struct {
|
||||
Pattern string
|
||||
Type Emitter
|
||||
Modifier Modifier
|
||||
}
|
||||
|
||||
// An Emitter takes group matches and returns tokens.
|
||||
type Emitter interface {
|
||||
// Emit tokens for the given regex groups.
|
||||
Emit(groups []string) []Token
|
||||
}
|
||||
|
||||
type EmitterFunc func(groups []string) []Token
|
||||
|
||||
func (e EmitterFunc) Emit(groups []string) []Token { return e(groups) }
|
||||
|
||||
// ByGroups emits a token for each matching group in the rule's regex.
|
||||
func ByGroups(types ...TokenType) Emitter {
|
||||
return EmitterFunc(func(groups []string) (out []Token) {
|
||||
for i, group := range groups[1:] {
|
||||
out = append(out, Token{types[i], group})
|
||||
}
|
||||
return
|
||||
})
|
||||
}
|
||||
|
||||
// Words creates a regex that matches any of the given literal words.
|
||||
func Words(words ...string) string {
|
||||
for i, word := range words {
|
||||
words[i] = regexp.QuoteMeta(word)
|
||||
}
|
||||
return "\\b(?:" + strings.Join(words, "|") + ")\\b"
|
||||
}
|
||||
|
||||
type Rules map[string][]Rule
|
||||
|
||||
// MustNewLexer creates a new Lexer or panics.
|
||||
func MustNewLexer(config *Config, rules Rules) Lexer {
|
||||
lexer, err := NewLexer(config, rules)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return lexer
|
||||
}
|
||||
|
||||
// NewLexer creates a new regex-based Lexer.
|
||||
//
|
||||
// "rules" is a state machine transitition map. Each key is a state. Values are sets of rules
|
||||
// that match input, optionally modify lexer state, and output tokens.
|
||||
func NewLexer(config *Config, rules Rules) (Lexer, error) {
|
||||
if _, ok := rules["root"]; !ok {
|
||||
return nil, fmt.Errorf("no \"root\" state")
|
||||
}
|
||||
compiledRules := map[string][]CompiledRule{}
|
||||
for state, rules := range rules {
|
||||
for _, rule := range rules {
|
||||
crule := CompiledRule{Rule: rule}
|
||||
re, err := regexp.Compile("^(?m)" + rule.Pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid regex %q for state %q: %s", rule.Pattern, state, err)
|
||||
}
|
||||
crule.Regexp = re
|
||||
compiledRules[state] = append(compiledRules[state], crule)
|
||||
}
|
||||
}
|
||||
// Apply any pre-processor modifiers.
|
||||
for state, rules := range compiledRules {
|
||||
for index, rule := range rules {
|
||||
if rule.Modifier != nil {
|
||||
err := rule.Modifier.Preprocess(compiledRules, state, index)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ®exLexer{
|
||||
config: config,
|
||||
rules: compiledRules,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// A CompiledRule is a Rule with a pre-compiled regex.
|
||||
type CompiledRule struct {
|
||||
Rule
|
||||
Regexp *regexp.Regexp
|
||||
}
|
||||
|
||||
type regexLexer struct {
|
||||
config *Config
|
||||
rules map[string][]CompiledRule
|
||||
}
|
||||
|
||||
func (r *regexLexer) Config() *Config {
|
||||
return r.config
|
||||
}
|
||||
|
||||
type LexerState struct {
|
||||
Text string
|
||||
Pos int
|
||||
Stack []string
|
||||
Rules map[string][]CompiledRule
|
||||
State string
|
||||
}
|
||||
|
||||
func (r *regexLexer) Tokenise(text string) (out []Token, err error) {
|
||||
state := &LexerState{
|
||||
Text: text,
|
||||
Stack: []string{"root"},
|
||||
Rules: r.rules,
|
||||
}
|
||||
for state.Pos < len(text) && len(state.Stack) > 0 {
|
||||
state.State = state.Stack[len(state.Stack)-1]
|
||||
rule, index := matchRules(state.Text[state.Pos:], state.Rules[state.State])
|
||||
// No match.
|
||||
if index == nil {
|
||||
out = append(out, Token{Error, state.Text[state.Pos : state.Pos+1]})
|
||||
state.Pos++
|
||||
continue
|
||||
}
|
||||
|
||||
groups := make([]string, len(index)/2)
|
||||
for i := 0; i < len(index); i += 2 {
|
||||
groups[i/2] = text[state.Pos+index[i] : state.Pos+index[i+1]]
|
||||
}
|
||||
state.Pos += index[1]
|
||||
if rule.Modifier != nil {
|
||||
if err = rule.Modifier.Mutate(state); err != nil {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
out = append(out, rule.Type.Emit(groups)...)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func matchRules(text string, rules []CompiledRule) (CompiledRule, []int) {
|
||||
for _, rule := range rules {
|
||||
if index := rule.Regexp.FindStringSubmatchIndex(text); index != nil {
|
||||
return rule, index
|
||||
}
|
||||
}
|
||||
return CompiledRule{}, nil
|
||||
}
|
52
lexer_test.go
Normal file
52
lexer_test.go
Normal file
@ -0,0 +1,52 @@
|
||||
package chroma
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestTokenTypeClassifiers(t *testing.T) {
|
||||
require.True(t, GenericDeleted.InCategory(Generic))
|
||||
require.True(t, LiteralStringBacktick.InSubCategory(String))
|
||||
require.Equal(t, LiteralStringBacktick.String(), "LiteralStringBacktick")
|
||||
}
|
||||
|
||||
func TestSimpleLexer(t *testing.T) {
|
||||
lexer, err := NewLexer(
|
||||
&Config{
|
||||
Name: "INI",
|
||||
Aliases: []string{"ini", "cfg"},
|
||||
Filenames: []string{"*.ini", "*.cfg"},
|
||||
},
|
||||
map[string][]Rule{
|
||||
"root": []Rule{
|
||||
{`\s+`, Whitespace, nil},
|
||||
{`;.*?$`, Comment, nil},
|
||||
{`\[.*?\]$`, Keyword, nil},
|
||||
{`(.*?)(\s*)(=)(\s*)(.*?)$`, ByGroups(Name, Whitespace, Operator, Whitespace, String), nil},
|
||||
},
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
actual, err := lexer.Tokenise(`
|
||||
; this is a comment
|
||||
[section]
|
||||
a = 10
|
||||
`)
|
||||
require.NoError(t, err)
|
||||
expected := []Token{
|
||||
Token{Whitespace, "\n\t"},
|
||||
Token{Comment, "; this is a comment"},
|
||||
Token{Whitespace, "\n\t"},
|
||||
Token{Keyword, "[section]"},
|
||||
Token{Whitespace, "\n\t"},
|
||||
Token{Name, "a"},
|
||||
Token{Whitespace, " "},
|
||||
Token{Operator, "="},
|
||||
Token{Whitespace, " "},
|
||||
Token{LiteralString, "10"},
|
||||
Token{Whitespace, "\n"},
|
||||
}
|
||||
require.Equal(t, expected, actual)
|
||||
}
|
17
lexers/default.go
Normal file
17
lexers/default.go
Normal file
@ -0,0 +1,17 @@
|
||||
package lexers
|
||||
|
||||
import (
|
||||
. "github.com/alecthomas/chroma" // nolint
|
||||
)
|
||||
|
||||
// Default lexer if no other is found.
|
||||
var Default = Register(NewLexer(&Config{
|
||||
Name: "default",
|
||||
Filenames: []string{"*"},
|
||||
Priority: 99,
|
||||
}, Rules{
|
||||
"root": []Rule{
|
||||
{`.+`, Text, nil},
|
||||
{`\n`, Text, nil},
|
||||
},
|
||||
}))
|
67
lexers/go.go
Normal file
67
lexers/go.go
Normal file
@ -0,0 +1,67 @@
|
||||
package lexers
|
||||
|
||||
import (
|
||||
. "github.com/alecthomas/chroma" // nolint: golint
|
||||
)
|
||||
|
||||
// Go lexer.
|
||||
var Go = Register(NewLexer(
|
||||
&Config{
|
||||
Name: "Go",
|
||||
Filenames: []string{"*.go"},
|
||||
Aliases: []string{"go", "golang"},
|
||||
MimeTypes: []string{"text/x-gosrc"},
|
||||
},
|
||||
Rules{
|
||||
`root`: []Rule{
|
||||
{`\n`, Text, nil},
|
||||
{`\s+`, Text, nil},
|
||||
{`\\\n`, Text, nil}, // line continuations
|
||||
{`//(.*?)\n`, CommentSingle, nil},
|
||||
{`/(\\\n)?[*](.|\n)*?[*](\\\n)?/`, CommentMultiline, nil},
|
||||
{`(import|package)\b`, KeywordNamespace, nil},
|
||||
{`(var|func|struct|map|chan|type|interface|const)\b`,
|
||||
KeywordDeclaration, nil},
|
||||
{Words(`break`, `default`, `select`, `case`, `defer`, `go`,
|
||||
`else`, `goto`, `switch`, `fallthrough`, `if`, `range`,
|
||||
`continue`, `for`, `return`), Keyword, nil},
|
||||
{`(true|false|iota|nil)\b`, KeywordConstant, nil},
|
||||
{Words(`uint`, `uint8`, `uint16`, `uint32`, `uint64`,
|
||||
`int`, `int8`, `int16`, `int32`, `int64`,
|
||||
`float`, `float32`, `float64`,
|
||||
`complex64`, `complex128`, `byte`, `rune`,
|
||||
`string`, `bool`, `erro`, `uintpt`,
|
||||
`print`, `println`, `panic`, `recove`, `close`, `complex`,
|
||||
`real`, `imag`, `len`, `cap`, `append`, `copy`, `delete`,
|
||||
`new`, `make`),
|
||||
KeywordType, nil},
|
||||
// imaginary_lit
|
||||
{`\d+i`, LiteralNumber, nil},
|
||||
{`\d+\.\d*([Ee][-+]\d+)?i`, LiteralNumber, nil},
|
||||
{`\.\d+([Ee][-+]\d+)?i`, LiteralNumber, nil},
|
||||
{`\d+[Ee][-+]\d+i`, LiteralNumber, nil},
|
||||
// float_lit
|
||||
{`\d+(\.\d+[eE][+\-]?\d+|\.\d*|[eE][+\-]?\d+)`, LiteralNumberFloat, nil},
|
||||
{`\.\d+([eE][+\-]?\d+)?`, LiteralNumberFloat, nil},
|
||||
// int_lit
|
||||
// -- octal_lit
|
||||
{`0[0-7]+`, LiteralNumberOct, nil},
|
||||
// -- hex_lit
|
||||
{`0[xX][0-9a-fA-F]+`, LiteralNumberHex, nil},
|
||||
// -- decimal_lit
|
||||
{`(0|[1-9][0-9]*)`, LiteralNumberInteger, nil},
|
||||
// char_lit
|
||||
{`'(\\['"\\abfnrtv]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|[^\\])'`, LiteralStringChar, nil},
|
||||
// StringLiteral
|
||||
// -- raw_string_lit
|
||||
{"`[^`]*`", String, nil},
|
||||
// -- interpreted_string_lit
|
||||
{`"(\\\\|\\"|[^"])*"`, String, nil},
|
||||
// Tokens
|
||||
{`(<<=|>>=|<<|>>|<=|>=|&\^=|&\^|\+=|-=|\*=|/=|%=|&=|\|=|&&|\|\||<-|\+\+|--|==|!=|:=|\.\.\.|[+\-*/%&])`, Operator, nil},
|
||||
{`[|^<>=!()\[\]{}.,;:]`, Punctuation, nil},
|
||||
// identifier
|
||||
{`[^\W\d]\w*`, NameOther, nil},
|
||||
},
|
||||
},
|
||||
))
|
24
lexers/ini.go
Normal file
24
lexers/ini.go
Normal file
@ -0,0 +1,24 @@
|
||||
package lexers
|
||||
|
||||
import (
|
||||
. "github.com/alecthomas/chroma" // nolint
|
||||
)
|
||||
|
||||
var INI = Register(NewLexer(
|
||||
&Config{
|
||||
Name: "INI",
|
||||
Aliases: []string{"ini", "cfg", "dosini"},
|
||||
Filenames: []string{"*.ini", "*.cfg", "*.inf"},
|
||||
MimeTypes: []string{"text/x-ini", "text/inf"},
|
||||
},
|
||||
map[string][]Rule{
|
||||
"root": []Rule{
|
||||
{`\s+`, Whitespace, nil},
|
||||
{`;.*?$`, Comment, nil},
|
||||
{`\[.*?\]$`, Keyword, nil},
|
||||
{`(.*?)(\s*)(=)(\s*)(.*?)$`, ByGroups(Name, Whitespace, Operator, Whitespace, String), nil},
|
||||
// standalone option, supported by some INI parsers
|
||||
{`(.+?)$`, NameAttribute, nil},
|
||||
},
|
||||
},
|
||||
))
|
71
lexers/registry.go
Normal file
71
lexers/registry.go
Normal file
@ -0,0 +1,71 @@
|
||||
package lexers
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"github.com/danwakefield/fnmatch"
|
||||
|
||||
"github.com/alecthomas/chroma"
|
||||
)
|
||||
|
||||
type prioritisedLexers []chroma.Lexer
|
||||
|
||||
func (p prioritisedLexers) Len() int { return len(p) }
|
||||
func (p prioritisedLexers) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
||||
func (p prioritisedLexers) Less(i, j int) bool { return p[i].Config().Priority < p[j].Config().Priority }
|
||||
|
||||
// Registry is the global Lexer registry.
|
||||
var Registry = registry{byName: map[string]chroma.Lexer{}}
|
||||
|
||||
type registry struct {
|
||||
Lexers []chroma.Lexer
|
||||
byName map[string]chroma.Lexer
|
||||
}
|
||||
|
||||
// Names of all lexers, optionally including aliases.
|
||||
func (r *registry) Names(withAliases bool) []string {
|
||||
out := []string{}
|
||||
for _, lexer := range r.Lexers {
|
||||
config := lexer.Config()
|
||||
out = append(out, config.Name)
|
||||
if withAliases {
|
||||
out = append(out, config.Aliases...)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Get a Lexer by name.
|
||||
func (r *registry) Get(name string) chroma.Lexer {
|
||||
return r.byName[name]
|
||||
}
|
||||
|
||||
// Match returns all lexers matching filename.
|
||||
func (r *registry) Match(filename string) []chroma.Lexer {
|
||||
lexers := prioritisedLexers{}
|
||||
for _, lexer := range r.Lexers {
|
||||
config := lexer.Config()
|
||||
for _, glob := range config.Filenames {
|
||||
if fnmatch.Match(glob, filename, 0) {
|
||||
lexers = append(lexers, lexer)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
sort.Sort(lexers)
|
||||
return lexers
|
||||
}
|
||||
|
||||
// Register a Lexer with the global registry.
|
||||
func Register(lexer chroma.Lexer, err error) chroma.Lexer {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
config := lexer.Config()
|
||||
Registry.byName[config.Name] = lexer
|
||||
for _, alias := range config.Aliases {
|
||||
Registry.byName[alias] = lexer
|
||||
}
|
||||
Registry.Lexers = append(Registry.Lexers, lexer)
|
||||
return lexer
|
||||
}
|
79
modifiers.go
Normal file
79
modifiers.go
Normal file
@ -0,0 +1,79 @@
|
||||
package chroma
|
||||
|
||||
import "fmt"
|
||||
|
||||
// A Modifier modifies the behaviour of the lexer.
|
||||
type Modifier interface {
|
||||
// Preprocess the lexer rules.
|
||||
//
|
||||
// "self" and "rule" are the rule name and index this Modifier is associated with.
|
||||
Preprocess(rules map[string][]CompiledRule, self string, rule int) error
|
||||
// Mutate the lexer state machine as it is processing.
|
||||
Mutate(state *LexerState) error
|
||||
}
|
||||
|
||||
// A MutatorFunc is a Modifier that mutates the lexer state machine as it is processing.
|
||||
type MutatorFunc func(state *LexerState) error
|
||||
|
||||
func (m MutatorFunc) Preprocess(rules map[string][]CompiledRule, self string, rule int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m MutatorFunc) Mutate(state *LexerState) error {
|
||||
return m(state)
|
||||
}
|
||||
|
||||
// A PreprocessorFunc is a Modifier that pre-processes the lexer rules.
|
||||
type PreprocessorFunc func(rules map[string][]CompiledRule, self string, rule int) error
|
||||
|
||||
func (p PreprocessorFunc) Preprocess(rules map[string][]CompiledRule, self string, rule int) error {
|
||||
return p(rules, self, rule)
|
||||
}
|
||||
|
||||
func (p PreprocessorFunc) Mutate(state *LexerState) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Modifiers applies a set of Modifiers in order.
|
||||
func Modifiers(modifiers ...Modifier) MutatorFunc {
|
||||
return func(state *LexerState) error {
|
||||
for _, modifier := range modifiers {
|
||||
if err := modifier.Mutate(state); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Include the given state.
|
||||
func Include(state string) Rule {
|
||||
return Rule{
|
||||
Modifier: PreprocessorFunc(func(rules map[string][]CompiledRule, self string, rule int) error {
|
||||
includedRules, ok := rules[state]
|
||||
if !ok {
|
||||
return fmt.Errorf("invalid include state %q", state)
|
||||
}
|
||||
stateRules := rules[self]
|
||||
stateRules = append(stateRules[:rule], append(includedRules, stateRules[rule+1:]...)...)
|
||||
rules[self] = stateRules
|
||||
return nil
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
// Push states onto the stack.
|
||||
func Push(states ...string) MutatorFunc {
|
||||
return func(s *LexerState) error {
|
||||
s.Stack = append(s.Stack, states...)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Pop state from the stack when rule matches.
|
||||
func Pop(n int) MutatorFunc {
|
||||
return func(state *LexerState) error {
|
||||
state.Stack = state.Stack[:len(state.Stack)-n]
|
||||
return nil
|
||||
}
|
||||
}
|
6
modifiers_test.go
Normal file
6
modifiers_test.go
Normal file
@ -0,0 +1,6 @@
|
||||
package chroma
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestPop(t *testing.T) {
|
||||
}
|
94
tokentype_string.go
Normal file
94
tokentype_string.go
Normal file
@ -0,0 +1,94 @@
|
||||
// Code generated by "stringer -type TokenType"; DO NOT EDIT
|
||||
|
||||
package chroma
|
||||
|
||||
import "fmt"
|
||||
|
||||
const _TokenType_name = "EscapeErrorOtherKeywordKeywordConstantKeywordDeclarationKeywordNamespaceKeywordPseudoKeywordReservedKeywordTypeNameNameAttributeNameBuiltinNameBuiltinPseudoNameClassNameConstantNameDecoratorNameEntityNameExceptionNameFunctionNameFunctionMagicNamePropertyNameLabelNameNamespaceNameOtherNameTagNameVariableNameVariableClassNameVariableGlobalNameVariableInstanceNameVariableMagicLiteralLiteralDateLiteralStringLiteralStringAffixLiteralStringBacktickLiteralStringCharLiteralStringDelimiterLiteralStringDocLiteralStringDoubleLiteralStringEscapeLiteralStringHeredocLiteralStringInterpolLiteralStringOtherLiteralStringRegexLiteralStringSingleLiteralStringSymbolLiteralNumberLiteralNumberBinLiteralNumberFloatLiteralNumberHexLiteralNumberIntegerLiteralNumberIntegerLongLiteralNumberOctOperatorOperatorWordPunctuationCommentCommentHashbangCommentMultilineCommentPreprocCommentPreprocFileCommentSingleCommentSpecialGenericGenericDeletedGenericEmphGenericErrorGenericHeadingGenericInsertedGenericOutputGenericPromptGenericStrongGenericSubheadingGenericTracebackTextTextWhitespace"
|
||||
|
||||
var _TokenType_map = map[TokenType]string{
|
||||
0: _TokenType_name[0:6],
|
||||
1: _TokenType_name[6:11],
|
||||
2: _TokenType_name[11:16],
|
||||
1000: _TokenType_name[16:23],
|
||||
1001: _TokenType_name[23:38],
|
||||
1002: _TokenType_name[38:56],
|
||||
1003: _TokenType_name[56:72],
|
||||
1004: _TokenType_name[72:85],
|
||||
1005: _TokenType_name[85:100],
|
||||
1006: _TokenType_name[100:111],
|
||||
2000: _TokenType_name[111:115],
|
||||
2001: _TokenType_name[115:128],
|
||||
2002: _TokenType_name[128:139],
|
||||
2003: _TokenType_name[139:156],
|
||||
2004: _TokenType_name[156:165],
|
||||
2005: _TokenType_name[165:177],
|
||||
2006: _TokenType_name[177:190],
|
||||
2007: _TokenType_name[190:200],
|
||||
2008: _TokenType_name[200:213],
|
||||
2009: _TokenType_name[213:225],
|
||||
2010: _TokenType_name[225:242],
|
||||
2011: _TokenType_name[242:254],
|
||||
2012: _TokenType_name[254:263],
|
||||
2013: _TokenType_name[263:276],
|
||||
2014: _TokenType_name[276:285],
|
||||
2015: _TokenType_name[285:292],
|
||||
2016: _TokenType_name[292:304],
|
||||
2017: _TokenType_name[304:321],
|
||||
2018: _TokenType_name[321:339],
|
||||
2019: _TokenType_name[339:359],
|
||||
2020: _TokenType_name[359:376],
|
||||
3000: _TokenType_name[376:383],
|
||||
3001: _TokenType_name[383:394],
|
||||
3100: _TokenType_name[394:407],
|
||||
3101: _TokenType_name[407:425],
|
||||
3102: _TokenType_name[425:446],
|
||||
3103: _TokenType_name[446:463],
|
||||
3104: _TokenType_name[463:485],
|
||||
3105: _TokenType_name[485:501],
|
||||
3106: _TokenType_name[501:520],
|
||||
3107: _TokenType_name[520:539],
|
||||
3108: _TokenType_name[539:559],
|
||||
3109: _TokenType_name[559:580],
|
||||
3110: _TokenType_name[580:598],
|
||||
3111: _TokenType_name[598:616],
|
||||
3112: _TokenType_name[616:635],
|
||||
3113: _TokenType_name[635:654],
|
||||
3200: _TokenType_name[654:667],
|
||||
3201: _TokenType_name[667:683],
|
||||
3202: _TokenType_name[683:701],
|
||||
3203: _TokenType_name[701:717],
|
||||
3204: _TokenType_name[717:737],
|
||||
3205: _TokenType_name[737:761],
|
||||
3206: _TokenType_name[761:777],
|
||||
4000: _TokenType_name[777:785],
|
||||
4001: _TokenType_name[785:797],
|
||||
5000: _TokenType_name[797:808],
|
||||
6000: _TokenType_name[808:815],
|
||||
6001: _TokenType_name[815:830],
|
||||
6002: _TokenType_name[830:846],
|
||||
6003: _TokenType_name[846:860],
|
||||
6004: _TokenType_name[860:878],
|
||||
6005: _TokenType_name[878:891],
|
||||
6006: _TokenType_name[891:905],
|
||||
7000: _TokenType_name[905:912],
|
||||
7001: _TokenType_name[912:926],
|
||||
7002: _TokenType_name[926:937],
|
||||
7003: _TokenType_name[937:949],
|
||||
7004: _TokenType_name[949:963],
|
||||
7005: _TokenType_name[963:978],
|
||||
7006: _TokenType_name[978:991],
|
||||
7007: _TokenType_name[991:1004],
|
||||
7008: _TokenType_name[1004:1017],
|
||||
7009: _TokenType_name[1017:1034],
|
||||
7010: _TokenType_name[1034:1050],
|
||||
8000: _TokenType_name[1050:1054],
|
||||
8001: _TokenType_name[1054:1068],
|
||||
}
|
||||
|
||||
func (i TokenType) String() string {
|
||||
if str, ok := _TokenType_map[i]; ok {
|
||||
return str
|
||||
}
|
||||
return fmt.Sprintf("TokenType(%d)", i)
|
||||
}
|
181
types.go
Normal file
181
types.go
Normal file
@ -0,0 +1,181 @@
|
||||
package chroma
|
||||
|
||||
// TokenType is the type of token to highlight.
|
||||
//
|
||||
// It is also an Emitter, emitting a single token of itself
|
||||
type TokenType int
|
||||
|
||||
// Set of TokenTypes.
|
||||
//
|
||||
// Categories of types are grouped in ranges of 1000, while sub-categories are in ranges of 100. For
|
||||
// example, the literal category is in the range 3000-3999. The sub-category for literal strings is
|
||||
// in the range 3100-3199.
|
||||
const (
|
||||
Escape TokenType = iota
|
||||
Error
|
||||
Other
|
||||
)
|
||||
|
||||
// Keywords.
|
||||
const (
|
||||
Keyword TokenType = 1000 + iota
|
||||
KeywordConstant
|
||||
KeywordDeclaration
|
||||
KeywordNamespace
|
||||
KeywordPseudo
|
||||
KeywordReserved
|
||||
KeywordType
|
||||
)
|
||||
|
||||
// Names.
|
||||
const (
|
||||
Name TokenType = 2000 + iota
|
||||
NameAttribute
|
||||
NameBuiltin
|
||||
NameBuiltinPseudo
|
||||
NameClass
|
||||
NameConstant
|
||||
NameDecorator
|
||||
NameEntity
|
||||
NameException
|
||||
NameFunction
|
||||
NameFunctionMagic
|
||||
NameProperty
|
||||
NameLabel
|
||||
NameNamespace
|
||||
NameOther
|
||||
NameTag
|
||||
NameVariable
|
||||
NameVariableClass
|
||||
NameVariableGlobal
|
||||
NameVariableInstance
|
||||
NameVariableMagic
|
||||
)
|
||||
|
||||
// Literals.
|
||||
const (
|
||||
Literal TokenType = 3000 + iota
|
||||
LiteralDate
|
||||
)
|
||||
|
||||
// Strings.
|
||||
const (
|
||||
LiteralString TokenType = 3100 + iota
|
||||
LiteralStringAffix
|
||||
LiteralStringBacktick
|
||||
LiteralStringChar
|
||||
LiteralStringDelimiter
|
||||
LiteralStringDoc
|
||||
LiteralStringDouble
|
||||
LiteralStringEscape
|
||||
LiteralStringHeredoc
|
||||
LiteralStringInterpol
|
||||
LiteralStringOther
|
||||
LiteralStringRegex
|
||||
LiteralStringSingle
|
||||
LiteralStringSymbol
|
||||
)
|
||||
|
||||
// Literals.
|
||||
const (
|
||||
LiteralNumber TokenType = 3200 + iota
|
||||
LiteralNumberBin
|
||||
LiteralNumberFloat
|
||||
LiteralNumberHex
|
||||
LiteralNumberInteger
|
||||
LiteralNumberIntegerLong
|
||||
LiteralNumberOct
|
||||
)
|
||||
|
||||
// Operators.
|
||||
const (
|
||||
Operator TokenType = 4000 + iota
|
||||
OperatorWord
|
||||
)
|
||||
|
||||
// Punctuation.
|
||||
const (
|
||||
Punctuation TokenType = 5000 + iota
|
||||
)
|
||||
|
||||
// Comments.
|
||||
const (
|
||||
Comment TokenType = 6000 + iota
|
||||
CommentHashbang
|
||||
CommentMultiline
|
||||
CommentPreproc
|
||||
CommentPreprocFile
|
||||
CommentSingle
|
||||
CommentSpecial
|
||||
)
|
||||
|
||||
// Generic tokens.
|
||||
const (
|
||||
Generic TokenType = 7000 + iota
|
||||
GenericDeleted
|
||||
GenericEmph
|
||||
GenericError
|
||||
GenericHeading
|
||||
GenericInserted
|
||||
GenericOutput
|
||||
GenericPrompt
|
||||
GenericStrong
|
||||
GenericSubheading
|
||||
GenericTraceback
|
||||
)
|
||||
|
||||
// Text.
|
||||
const (
|
||||
Text TokenType = 8000 + iota
|
||||
TextWhitespace
|
||||
)
|
||||
|
||||
// Aliases.
|
||||
const (
|
||||
Whitespace = TextWhitespace
|
||||
|
||||
Date = LiteralDate
|
||||
|
||||
String = LiteralString
|
||||
StringAffix = LiteralStringAffix
|
||||
StringBacktick = LiteralStringBacktick
|
||||
StringChar = LiteralStringChar
|
||||
StringDelimiter = LiteralStringDelimiter
|
||||
StringDoc = LiteralStringDoc
|
||||
StringDouble = LiteralStringDouble
|
||||
StringEscape = LiteralStringEscape
|
||||
StringHeredoc = LiteralStringHeredoc
|
||||
StringInterpol = LiteralStringInterpol
|
||||
StringOther = LiteralStringOther
|
||||
StringRegex = LiteralStringRegex
|
||||
StringSingle = LiteralStringSingle
|
||||
StringSymbol = LiteralStringSymbol
|
||||
|
||||
Number = LiteralNumber
|
||||
NumberBin = LiteralNumberBin
|
||||
NumberFloat = LiteralNumberFloat
|
||||
NumberHex = LiteralNumberHex
|
||||
NumberInteger = LiteralNumberInteger
|
||||
NumberIntegerLong = LiteralNumberIntegerLong
|
||||
NumberOct = LiteralNumberOct
|
||||
)
|
||||
|
||||
func (t TokenType) Category() TokenType {
|
||||
return t / 1000 * 1000
|
||||
}
|
||||
|
||||
func (t TokenType) SubCategory() TokenType {
|
||||
return t / 100 * 100
|
||||
}
|
||||
|
||||
func (t TokenType) InCategory(other TokenType) bool {
|
||||
return t/1000 == other/1000
|
||||
}
|
||||
|
||||
func (t TokenType) InSubCategory(other TokenType) bool {
|
||||
return t/100 == other/100
|
||||
}
|
||||
|
||||
func (t TokenType) Emit(groups []string) []Token {
|
||||
return []Token{Token{Type: t, Value: groups[0]}}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user