mirror of
https://github.com/alecthomas/chroma.git
synced 2025-06-14 23:55:08 +02:00
Initial commit! Working!
This commit is contained in:
2
Makefile
Normal file
2
Makefile
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
tokentype_string.go: types.go
|
||||||
|
stringer -type TokenType
|
47
cmd/chroma/main.go
Normal file
47
cmd/chroma/main.go
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"runtime/pprof"
|
||||||
|
|
||||||
|
"gopkg.in/alecthomas/kingpin.v3-unstable"
|
||||||
|
|
||||||
|
"github.com/alecthomas/chroma"
|
||||||
|
"github.com/alecthomas/chroma/formatters"
|
||||||
|
"github.com/alecthomas/chroma/lexers"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
profileFlag = kingpin.Flag("profile", "Enable profiling to file.").String()
|
||||||
|
tokensFlag = kingpin.Flag("tokens", "Dump raw tokens.").Bool()
|
||||||
|
filesArgs = kingpin.Arg("files", "Files to highlight.").Required().ExistingFiles()
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
kingpin.Parse()
|
||||||
|
if *profileFlag != "" {
|
||||||
|
f, err := os.Create(*profileFlag)
|
||||||
|
kingpin.FatalIfError(err, "")
|
||||||
|
pprof.StartCPUProfile(f)
|
||||||
|
defer pprof.StopCPUProfile()
|
||||||
|
}
|
||||||
|
formatter := formatters.Console(formatters.DefaultConsoleTheme)
|
||||||
|
for _, filename := range *filesArgs {
|
||||||
|
lexers := lexers.Registry.Match(filename)
|
||||||
|
lexer := lexers[0]
|
||||||
|
lexer = chroma.Coalesce(lexer)
|
||||||
|
contents, err := ioutil.ReadFile(filename)
|
||||||
|
kingpin.FatalIfError(err, "")
|
||||||
|
tokens, err := lexer.Tokenise(string(contents))
|
||||||
|
kingpin.FatalIfError(err, "")
|
||||||
|
if *tokensFlag {
|
||||||
|
for _, token := range tokens {
|
||||||
|
fmt.Println(token)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
formatter.Format(os.Stdout, tokens)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
31
coalesce.go
Normal file
31
coalesce.go
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
package chroma
|
||||||
|
|
||||||
|
// Coalesce is a Lexer interceptor that collapses runs of common types into a single token.
|
||||||
|
func Coalesce(lexer Lexer) Lexer {
|
||||||
|
return &coalescer{lexer}
|
||||||
|
}
|
||||||
|
|
||||||
|
type coalescer struct {
|
||||||
|
Lexer
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *coalescer) Tokenise(text string) ([]Token, error) {
|
||||||
|
in, err := d.Lexer.Tokenise(text)
|
||||||
|
if err != nil {
|
||||||
|
return in, err
|
||||||
|
}
|
||||||
|
out := []Token{}
|
||||||
|
for _, token := range in {
|
||||||
|
if len(out) == 0 {
|
||||||
|
out = append(out, token)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
last := &out[len(out)-1]
|
||||||
|
if last.Type == token.Type {
|
||||||
|
last.Value += token.Value
|
||||||
|
} else {
|
||||||
|
out = append(out, token)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out, err
|
||||||
|
}
|
22
coalesce_test.go
Normal file
22
coalesce_test.go
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
package chroma
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCoalesce(t *testing.T) {
|
||||||
|
lexer, err := Coalesce(MustNewLexer(nil, Rules{
|
||||||
|
"root": []Rule{
|
||||||
|
Rule{`[[:punct:]]`, Punctuation, nil},
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
require.NoError(t, err)
|
||||||
|
actual, err := lexer.Tokenise("!@#$%")
|
||||||
|
require.NoError(t, err)
|
||||||
|
expected := []Token{
|
||||||
|
Token{Punctuation, "!@#$%"},
|
||||||
|
}
|
||||||
|
require.Equal(t, expected, actual)
|
||||||
|
}
|
12
formatters/api.go
Normal file
12
formatters/api.go
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
package formatters
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/alecthomas/chroma"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Formatter takes a token stream and formats it.
|
||||||
|
type Formatter interface {
|
||||||
|
Format(w io.Writer, tokens []chroma.Token) error
|
||||||
|
}
|
47
formatters/console.go
Normal file
47
formatters/console.go
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
package formatters
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
. "github.com/alecthomas/chroma" // nolint
|
||||||
|
"github.com/alecthomas/colour"
|
||||||
|
)
|
||||||
|
|
||||||
|
var DefaultConsoleTheme = map[TokenType]string{
|
||||||
|
Number: "^B^3",
|
||||||
|
Comment: "^5",
|
||||||
|
String: "^B^5",
|
||||||
|
Keyword: "^B^7",
|
||||||
|
}
|
||||||
|
|
||||||
|
// Console formatter.
|
||||||
|
//
|
||||||
|
// formatter := Console(DefaultConsoleTheme)
|
||||||
|
func Console(theme map[TokenType]string) Formatter {
|
||||||
|
return &consoleFormatter{theme}
|
||||||
|
}
|
||||||
|
|
||||||
|
type consoleFormatter struct {
|
||||||
|
theme map[TokenType]string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *consoleFormatter) Format(w io.Writer, tokens []Token) error {
|
||||||
|
bw := bufio.NewWriterSize(w, 1024)
|
||||||
|
printer := colour.Colour(bw)
|
||||||
|
for _, token := range tokens {
|
||||||
|
clr, ok := c.theme[token.Type]
|
||||||
|
if !ok {
|
||||||
|
clr, ok = c.theme[token.Type.SubCategory()]
|
||||||
|
if !ok {
|
||||||
|
clr, ok = c.theme[token.Type.Category()]
|
||||||
|
if !ok {
|
||||||
|
clr = "^R"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printer.Printf(clr+"%s", token.Value)
|
||||||
|
}
|
||||||
|
bw.Flush()
|
||||||
|
return nil
|
||||||
|
}
|
210
lexer.go
Normal file
210
lexer.go
Normal file
@ -0,0 +1,210 @@
|
|||||||
|
package chroma
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Config for a lexer.
|
||||||
|
type Config struct {
|
||||||
|
// Name of the lexer.
|
||||||
|
Name string
|
||||||
|
|
||||||
|
// Shortcuts for the lexer
|
||||||
|
Aliases []string
|
||||||
|
|
||||||
|
// File name globs
|
||||||
|
Filenames []string
|
||||||
|
|
||||||
|
// Secondary file name globs
|
||||||
|
AliasFilenames []string
|
||||||
|
|
||||||
|
// MIME types
|
||||||
|
MimeTypes []string
|
||||||
|
|
||||||
|
// Priority, should multiple lexers match and no content is provided
|
||||||
|
Priority int
|
||||||
|
|
||||||
|
// Don't strip leading and trailing newlines from the input.
|
||||||
|
DontStripNL bool
|
||||||
|
|
||||||
|
// Strip all leading and trailing whitespace from the input
|
||||||
|
StripAll bool
|
||||||
|
|
||||||
|
// Make sure that the input does not end with a newline. This
|
||||||
|
// is required for some lexers that consume input linewise.
|
||||||
|
DontEnsureNL bool
|
||||||
|
|
||||||
|
// If given and greater than 0, expand tabs in the input.
|
||||||
|
TabSize int
|
||||||
|
|
||||||
|
// If given, must be an encoding name. This encoding will be used to
|
||||||
|
// convert the input string to Unicode, if it is not already a Unicode
|
||||||
|
// string.
|
||||||
|
Encoding string
|
||||||
|
}
|
||||||
|
|
||||||
|
type Token struct {
|
||||||
|
Type TokenType
|
||||||
|
Value string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t Token) String() string { return fmt.Sprintf("Token{%s, %q}", t.Type, t.Value) }
|
||||||
|
func (t Token) GoString() string { return t.String() }
|
||||||
|
|
||||||
|
type Lexer interface {
|
||||||
|
Config() *Config
|
||||||
|
Tokenise(text string) ([]Token, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Analyser determines if this lexer is appropriate for the given text.
|
||||||
|
type Analyser interface {
|
||||||
|
AnalyseText(text string) float32
|
||||||
|
}
|
||||||
|
|
||||||
|
type Rule struct {
|
||||||
|
Pattern string
|
||||||
|
Type Emitter
|
||||||
|
Modifier Modifier
|
||||||
|
}
|
||||||
|
|
||||||
|
// An Emitter takes group matches and returns tokens.
|
||||||
|
type Emitter interface {
|
||||||
|
// Emit tokens for the given regex groups.
|
||||||
|
Emit(groups []string) []Token
|
||||||
|
}
|
||||||
|
|
||||||
|
type EmitterFunc func(groups []string) []Token
|
||||||
|
|
||||||
|
func (e EmitterFunc) Emit(groups []string) []Token { return e(groups) }
|
||||||
|
|
||||||
|
// ByGroups emits a token for each matching group in the rule's regex.
|
||||||
|
func ByGroups(types ...TokenType) Emitter {
|
||||||
|
return EmitterFunc(func(groups []string) (out []Token) {
|
||||||
|
for i, group := range groups[1:] {
|
||||||
|
out = append(out, Token{types[i], group})
|
||||||
|
}
|
||||||
|
return
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Words creates a regex that matches any of the given literal words.
|
||||||
|
func Words(words ...string) string {
|
||||||
|
for i, word := range words {
|
||||||
|
words[i] = regexp.QuoteMeta(word)
|
||||||
|
}
|
||||||
|
return "\\b(?:" + strings.Join(words, "|") + ")\\b"
|
||||||
|
}
|
||||||
|
|
||||||
|
type Rules map[string][]Rule
|
||||||
|
|
||||||
|
// MustNewLexer creates a new Lexer or panics.
|
||||||
|
func MustNewLexer(config *Config, rules Rules) Lexer {
|
||||||
|
lexer, err := NewLexer(config, rules)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return lexer
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewLexer creates a new regex-based Lexer.
|
||||||
|
//
|
||||||
|
// "rules" is a state machine transitition map. Each key is a state. Values are sets of rules
|
||||||
|
// that match input, optionally modify lexer state, and output tokens.
|
||||||
|
func NewLexer(config *Config, rules Rules) (Lexer, error) {
|
||||||
|
if _, ok := rules["root"]; !ok {
|
||||||
|
return nil, fmt.Errorf("no \"root\" state")
|
||||||
|
}
|
||||||
|
compiledRules := map[string][]CompiledRule{}
|
||||||
|
for state, rules := range rules {
|
||||||
|
for _, rule := range rules {
|
||||||
|
crule := CompiledRule{Rule: rule}
|
||||||
|
re, err := regexp.Compile("^(?m)" + rule.Pattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid regex %q for state %q: %s", rule.Pattern, state, err)
|
||||||
|
}
|
||||||
|
crule.Regexp = re
|
||||||
|
compiledRules[state] = append(compiledRules[state], crule)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Apply any pre-processor modifiers.
|
||||||
|
for state, rules := range compiledRules {
|
||||||
|
for index, rule := range rules {
|
||||||
|
if rule.Modifier != nil {
|
||||||
|
err := rule.Modifier.Preprocess(compiledRules, state, index)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ®exLexer{
|
||||||
|
config: config,
|
||||||
|
rules: compiledRules,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// A CompiledRule is a Rule with a pre-compiled regex.
|
||||||
|
type CompiledRule struct {
|
||||||
|
Rule
|
||||||
|
Regexp *regexp.Regexp
|
||||||
|
}
|
||||||
|
|
||||||
|
type regexLexer struct {
|
||||||
|
config *Config
|
||||||
|
rules map[string][]CompiledRule
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *regexLexer) Config() *Config {
|
||||||
|
return r.config
|
||||||
|
}
|
||||||
|
|
||||||
|
type LexerState struct {
|
||||||
|
Text string
|
||||||
|
Pos int
|
||||||
|
Stack []string
|
||||||
|
Rules map[string][]CompiledRule
|
||||||
|
State string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *regexLexer) Tokenise(text string) (out []Token, err error) {
|
||||||
|
state := &LexerState{
|
||||||
|
Text: text,
|
||||||
|
Stack: []string{"root"},
|
||||||
|
Rules: r.rules,
|
||||||
|
}
|
||||||
|
for state.Pos < len(text) && len(state.Stack) > 0 {
|
||||||
|
state.State = state.Stack[len(state.Stack)-1]
|
||||||
|
rule, index := matchRules(state.Text[state.Pos:], state.Rules[state.State])
|
||||||
|
// No match.
|
||||||
|
if index == nil {
|
||||||
|
out = append(out, Token{Error, state.Text[state.Pos : state.Pos+1]})
|
||||||
|
state.Pos++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
groups := make([]string, len(index)/2)
|
||||||
|
for i := 0; i < len(index); i += 2 {
|
||||||
|
groups[i/2] = text[state.Pos+index[i] : state.Pos+index[i+1]]
|
||||||
|
}
|
||||||
|
state.Pos += index[1]
|
||||||
|
if rule.Modifier != nil {
|
||||||
|
if err = rule.Modifier.Mutate(state); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
out = append(out, rule.Type.Emit(groups)...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func matchRules(text string, rules []CompiledRule) (CompiledRule, []int) {
|
||||||
|
for _, rule := range rules {
|
||||||
|
if index := rule.Regexp.FindStringSubmatchIndex(text); index != nil {
|
||||||
|
return rule, index
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return CompiledRule{}, nil
|
||||||
|
}
|
52
lexer_test.go
Normal file
52
lexer_test.go
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
package chroma
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestTokenTypeClassifiers(t *testing.T) {
|
||||||
|
require.True(t, GenericDeleted.InCategory(Generic))
|
||||||
|
require.True(t, LiteralStringBacktick.InSubCategory(String))
|
||||||
|
require.Equal(t, LiteralStringBacktick.String(), "LiteralStringBacktick")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSimpleLexer(t *testing.T) {
|
||||||
|
lexer, err := NewLexer(
|
||||||
|
&Config{
|
||||||
|
Name: "INI",
|
||||||
|
Aliases: []string{"ini", "cfg"},
|
||||||
|
Filenames: []string{"*.ini", "*.cfg"},
|
||||||
|
},
|
||||||
|
map[string][]Rule{
|
||||||
|
"root": []Rule{
|
||||||
|
{`\s+`, Whitespace, nil},
|
||||||
|
{`;.*?$`, Comment, nil},
|
||||||
|
{`\[.*?\]$`, Keyword, nil},
|
||||||
|
{`(.*?)(\s*)(=)(\s*)(.*?)$`, ByGroups(Name, Whitespace, Operator, Whitespace, String), nil},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
require.NoError(t, err)
|
||||||
|
actual, err := lexer.Tokenise(`
|
||||||
|
; this is a comment
|
||||||
|
[section]
|
||||||
|
a = 10
|
||||||
|
`)
|
||||||
|
require.NoError(t, err)
|
||||||
|
expected := []Token{
|
||||||
|
Token{Whitespace, "\n\t"},
|
||||||
|
Token{Comment, "; this is a comment"},
|
||||||
|
Token{Whitespace, "\n\t"},
|
||||||
|
Token{Keyword, "[section]"},
|
||||||
|
Token{Whitespace, "\n\t"},
|
||||||
|
Token{Name, "a"},
|
||||||
|
Token{Whitespace, " "},
|
||||||
|
Token{Operator, "="},
|
||||||
|
Token{Whitespace, " "},
|
||||||
|
Token{LiteralString, "10"},
|
||||||
|
Token{Whitespace, "\n"},
|
||||||
|
}
|
||||||
|
require.Equal(t, expected, actual)
|
||||||
|
}
|
17
lexers/default.go
Normal file
17
lexers/default.go
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
package lexers
|
||||||
|
|
||||||
|
import (
|
||||||
|
. "github.com/alecthomas/chroma" // nolint
|
||||||
|
)
|
||||||
|
|
||||||
|
// Default lexer if no other is found.
|
||||||
|
var Default = Register(NewLexer(&Config{
|
||||||
|
Name: "default",
|
||||||
|
Filenames: []string{"*"},
|
||||||
|
Priority: 99,
|
||||||
|
}, Rules{
|
||||||
|
"root": []Rule{
|
||||||
|
{`.+`, Text, nil},
|
||||||
|
{`\n`, Text, nil},
|
||||||
|
},
|
||||||
|
}))
|
67
lexers/go.go
Normal file
67
lexers/go.go
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
package lexers
|
||||||
|
|
||||||
|
import (
|
||||||
|
. "github.com/alecthomas/chroma" // nolint: golint
|
||||||
|
)
|
||||||
|
|
||||||
|
// Go lexer.
|
||||||
|
var Go = Register(NewLexer(
|
||||||
|
&Config{
|
||||||
|
Name: "Go",
|
||||||
|
Filenames: []string{"*.go"},
|
||||||
|
Aliases: []string{"go", "golang"},
|
||||||
|
MimeTypes: []string{"text/x-gosrc"},
|
||||||
|
},
|
||||||
|
Rules{
|
||||||
|
`root`: []Rule{
|
||||||
|
{`\n`, Text, nil},
|
||||||
|
{`\s+`, Text, nil},
|
||||||
|
{`\\\n`, Text, nil}, // line continuations
|
||||||
|
{`//(.*?)\n`, CommentSingle, nil},
|
||||||
|
{`/(\\\n)?[*](.|\n)*?[*](\\\n)?/`, CommentMultiline, nil},
|
||||||
|
{`(import|package)\b`, KeywordNamespace, nil},
|
||||||
|
{`(var|func|struct|map|chan|type|interface|const)\b`,
|
||||||
|
KeywordDeclaration, nil},
|
||||||
|
{Words(`break`, `default`, `select`, `case`, `defer`, `go`,
|
||||||
|
`else`, `goto`, `switch`, `fallthrough`, `if`, `range`,
|
||||||
|
`continue`, `for`, `return`), Keyword, nil},
|
||||||
|
{`(true|false|iota|nil)\b`, KeywordConstant, nil},
|
||||||
|
{Words(`uint`, `uint8`, `uint16`, `uint32`, `uint64`,
|
||||||
|
`int`, `int8`, `int16`, `int32`, `int64`,
|
||||||
|
`float`, `float32`, `float64`,
|
||||||
|
`complex64`, `complex128`, `byte`, `rune`,
|
||||||
|
`string`, `bool`, `erro`, `uintpt`,
|
||||||
|
`print`, `println`, `panic`, `recove`, `close`, `complex`,
|
||||||
|
`real`, `imag`, `len`, `cap`, `append`, `copy`, `delete`,
|
||||||
|
`new`, `make`),
|
||||||
|
KeywordType, nil},
|
||||||
|
// imaginary_lit
|
||||||
|
{`\d+i`, LiteralNumber, nil},
|
||||||
|
{`\d+\.\d*([Ee][-+]\d+)?i`, LiteralNumber, nil},
|
||||||
|
{`\.\d+([Ee][-+]\d+)?i`, LiteralNumber, nil},
|
||||||
|
{`\d+[Ee][-+]\d+i`, LiteralNumber, nil},
|
||||||
|
// float_lit
|
||||||
|
{`\d+(\.\d+[eE][+\-]?\d+|\.\d*|[eE][+\-]?\d+)`, LiteralNumberFloat, nil},
|
||||||
|
{`\.\d+([eE][+\-]?\d+)?`, LiteralNumberFloat, nil},
|
||||||
|
// int_lit
|
||||||
|
// -- octal_lit
|
||||||
|
{`0[0-7]+`, LiteralNumberOct, nil},
|
||||||
|
// -- hex_lit
|
||||||
|
{`0[xX][0-9a-fA-F]+`, LiteralNumberHex, nil},
|
||||||
|
// -- decimal_lit
|
||||||
|
{`(0|[1-9][0-9]*)`, LiteralNumberInteger, nil},
|
||||||
|
// char_lit
|
||||||
|
{`'(\\['"\\abfnrtv]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|[^\\])'`, LiteralStringChar, nil},
|
||||||
|
// StringLiteral
|
||||||
|
// -- raw_string_lit
|
||||||
|
{"`[^`]*`", String, nil},
|
||||||
|
// -- interpreted_string_lit
|
||||||
|
{`"(\\\\|\\"|[^"])*"`, String, nil},
|
||||||
|
// Tokens
|
||||||
|
{`(<<=|>>=|<<|>>|<=|>=|&\^=|&\^|\+=|-=|\*=|/=|%=|&=|\|=|&&|\|\||<-|\+\+|--|==|!=|:=|\.\.\.|[+\-*/%&])`, Operator, nil},
|
||||||
|
{`[|^<>=!()\[\]{}.,;:]`, Punctuation, nil},
|
||||||
|
// identifier
|
||||||
|
{`[^\W\d]\w*`, NameOther, nil},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
))
|
24
lexers/ini.go
Normal file
24
lexers/ini.go
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
package lexers
|
||||||
|
|
||||||
|
import (
|
||||||
|
. "github.com/alecthomas/chroma" // nolint
|
||||||
|
)
|
||||||
|
|
||||||
|
var INI = Register(NewLexer(
|
||||||
|
&Config{
|
||||||
|
Name: "INI",
|
||||||
|
Aliases: []string{"ini", "cfg", "dosini"},
|
||||||
|
Filenames: []string{"*.ini", "*.cfg", "*.inf"},
|
||||||
|
MimeTypes: []string{"text/x-ini", "text/inf"},
|
||||||
|
},
|
||||||
|
map[string][]Rule{
|
||||||
|
"root": []Rule{
|
||||||
|
{`\s+`, Whitespace, nil},
|
||||||
|
{`;.*?$`, Comment, nil},
|
||||||
|
{`\[.*?\]$`, Keyword, nil},
|
||||||
|
{`(.*?)(\s*)(=)(\s*)(.*?)$`, ByGroups(Name, Whitespace, Operator, Whitespace, String), nil},
|
||||||
|
// standalone option, supported by some INI parsers
|
||||||
|
{`(.+?)$`, NameAttribute, nil},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
))
|
71
lexers/registry.go
Normal file
71
lexers/registry.go
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
package lexers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
"github.com/danwakefield/fnmatch"
|
||||||
|
|
||||||
|
"github.com/alecthomas/chroma"
|
||||||
|
)
|
||||||
|
|
||||||
|
type prioritisedLexers []chroma.Lexer
|
||||||
|
|
||||||
|
func (p prioritisedLexers) Len() int { return len(p) }
|
||||||
|
func (p prioritisedLexers) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
||||||
|
func (p prioritisedLexers) Less(i, j int) bool { return p[i].Config().Priority < p[j].Config().Priority }
|
||||||
|
|
||||||
|
// Registry is the global Lexer registry.
|
||||||
|
var Registry = registry{byName: map[string]chroma.Lexer{}}
|
||||||
|
|
||||||
|
type registry struct {
|
||||||
|
Lexers []chroma.Lexer
|
||||||
|
byName map[string]chroma.Lexer
|
||||||
|
}
|
||||||
|
|
||||||
|
// Names of all lexers, optionally including aliases.
|
||||||
|
func (r *registry) Names(withAliases bool) []string {
|
||||||
|
out := []string{}
|
||||||
|
for _, lexer := range r.Lexers {
|
||||||
|
config := lexer.Config()
|
||||||
|
out = append(out, config.Name)
|
||||||
|
if withAliases {
|
||||||
|
out = append(out, config.Aliases...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get a Lexer by name.
|
||||||
|
func (r *registry) Get(name string) chroma.Lexer {
|
||||||
|
return r.byName[name]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Match returns all lexers matching filename.
|
||||||
|
func (r *registry) Match(filename string) []chroma.Lexer {
|
||||||
|
lexers := prioritisedLexers{}
|
||||||
|
for _, lexer := range r.Lexers {
|
||||||
|
config := lexer.Config()
|
||||||
|
for _, glob := range config.Filenames {
|
||||||
|
if fnmatch.Match(glob, filename, 0) {
|
||||||
|
lexers = append(lexers, lexer)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sort.Sort(lexers)
|
||||||
|
return lexers
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register a Lexer with the global registry.
|
||||||
|
func Register(lexer chroma.Lexer, err error) chroma.Lexer {
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
config := lexer.Config()
|
||||||
|
Registry.byName[config.Name] = lexer
|
||||||
|
for _, alias := range config.Aliases {
|
||||||
|
Registry.byName[alias] = lexer
|
||||||
|
}
|
||||||
|
Registry.Lexers = append(Registry.Lexers, lexer)
|
||||||
|
return lexer
|
||||||
|
}
|
79
modifiers.go
Normal file
79
modifiers.go
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
package chroma
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
// A Modifier modifies the behaviour of the lexer.
|
||||||
|
type Modifier interface {
|
||||||
|
// Preprocess the lexer rules.
|
||||||
|
//
|
||||||
|
// "self" and "rule" are the rule name and index this Modifier is associated with.
|
||||||
|
Preprocess(rules map[string][]CompiledRule, self string, rule int) error
|
||||||
|
// Mutate the lexer state machine as it is processing.
|
||||||
|
Mutate(state *LexerState) error
|
||||||
|
}
|
||||||
|
|
||||||
|
// A MutatorFunc is a Modifier that mutates the lexer state machine as it is processing.
|
||||||
|
type MutatorFunc func(state *LexerState) error
|
||||||
|
|
||||||
|
func (m MutatorFunc) Preprocess(rules map[string][]CompiledRule, self string, rule int) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m MutatorFunc) Mutate(state *LexerState) error {
|
||||||
|
return m(state)
|
||||||
|
}
|
||||||
|
|
||||||
|
// A PreprocessorFunc is a Modifier that pre-processes the lexer rules.
|
||||||
|
type PreprocessorFunc func(rules map[string][]CompiledRule, self string, rule int) error
|
||||||
|
|
||||||
|
func (p PreprocessorFunc) Preprocess(rules map[string][]CompiledRule, self string, rule int) error {
|
||||||
|
return p(rules, self, rule)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p PreprocessorFunc) Mutate(state *LexerState) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Modifiers applies a set of Modifiers in order.
|
||||||
|
func Modifiers(modifiers ...Modifier) MutatorFunc {
|
||||||
|
return func(state *LexerState) error {
|
||||||
|
for _, modifier := range modifiers {
|
||||||
|
if err := modifier.Mutate(state); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Include the given state.
|
||||||
|
func Include(state string) Rule {
|
||||||
|
return Rule{
|
||||||
|
Modifier: PreprocessorFunc(func(rules map[string][]CompiledRule, self string, rule int) error {
|
||||||
|
includedRules, ok := rules[state]
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("invalid include state %q", state)
|
||||||
|
}
|
||||||
|
stateRules := rules[self]
|
||||||
|
stateRules = append(stateRules[:rule], append(includedRules, stateRules[rule+1:]...)...)
|
||||||
|
rules[self] = stateRules
|
||||||
|
return nil
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Push states onto the stack.
|
||||||
|
func Push(states ...string) MutatorFunc {
|
||||||
|
return func(s *LexerState) error {
|
||||||
|
s.Stack = append(s.Stack, states...)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pop state from the stack when rule matches.
|
||||||
|
func Pop(n int) MutatorFunc {
|
||||||
|
return func(state *LexerState) error {
|
||||||
|
state.Stack = state.Stack[:len(state.Stack)-n]
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
6
modifiers_test.go
Normal file
6
modifiers_test.go
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
package chroma
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestPop(t *testing.T) {
|
||||||
|
}
|
94
tokentype_string.go
Normal file
94
tokentype_string.go
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
// Code generated by "stringer -type TokenType"; DO NOT EDIT
|
||||||
|
|
||||||
|
package chroma
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
const _TokenType_name = "EscapeErrorOtherKeywordKeywordConstantKeywordDeclarationKeywordNamespaceKeywordPseudoKeywordReservedKeywordTypeNameNameAttributeNameBuiltinNameBuiltinPseudoNameClassNameConstantNameDecoratorNameEntityNameExceptionNameFunctionNameFunctionMagicNamePropertyNameLabelNameNamespaceNameOtherNameTagNameVariableNameVariableClassNameVariableGlobalNameVariableInstanceNameVariableMagicLiteralLiteralDateLiteralStringLiteralStringAffixLiteralStringBacktickLiteralStringCharLiteralStringDelimiterLiteralStringDocLiteralStringDoubleLiteralStringEscapeLiteralStringHeredocLiteralStringInterpolLiteralStringOtherLiteralStringRegexLiteralStringSingleLiteralStringSymbolLiteralNumberLiteralNumberBinLiteralNumberFloatLiteralNumberHexLiteralNumberIntegerLiteralNumberIntegerLongLiteralNumberOctOperatorOperatorWordPunctuationCommentCommentHashbangCommentMultilineCommentPreprocCommentPreprocFileCommentSingleCommentSpecialGenericGenericDeletedGenericEmphGenericErrorGenericHeadingGenericInsertedGenericOutputGenericPromptGenericStrongGenericSubheadingGenericTracebackTextTextWhitespace"
|
||||||
|
|
||||||
|
var _TokenType_map = map[TokenType]string{
|
||||||
|
0: _TokenType_name[0:6],
|
||||||
|
1: _TokenType_name[6:11],
|
||||||
|
2: _TokenType_name[11:16],
|
||||||
|
1000: _TokenType_name[16:23],
|
||||||
|
1001: _TokenType_name[23:38],
|
||||||
|
1002: _TokenType_name[38:56],
|
||||||
|
1003: _TokenType_name[56:72],
|
||||||
|
1004: _TokenType_name[72:85],
|
||||||
|
1005: _TokenType_name[85:100],
|
||||||
|
1006: _TokenType_name[100:111],
|
||||||
|
2000: _TokenType_name[111:115],
|
||||||
|
2001: _TokenType_name[115:128],
|
||||||
|
2002: _TokenType_name[128:139],
|
||||||
|
2003: _TokenType_name[139:156],
|
||||||
|
2004: _TokenType_name[156:165],
|
||||||
|
2005: _TokenType_name[165:177],
|
||||||
|
2006: _TokenType_name[177:190],
|
||||||
|
2007: _TokenType_name[190:200],
|
||||||
|
2008: _TokenType_name[200:213],
|
||||||
|
2009: _TokenType_name[213:225],
|
||||||
|
2010: _TokenType_name[225:242],
|
||||||
|
2011: _TokenType_name[242:254],
|
||||||
|
2012: _TokenType_name[254:263],
|
||||||
|
2013: _TokenType_name[263:276],
|
||||||
|
2014: _TokenType_name[276:285],
|
||||||
|
2015: _TokenType_name[285:292],
|
||||||
|
2016: _TokenType_name[292:304],
|
||||||
|
2017: _TokenType_name[304:321],
|
||||||
|
2018: _TokenType_name[321:339],
|
||||||
|
2019: _TokenType_name[339:359],
|
||||||
|
2020: _TokenType_name[359:376],
|
||||||
|
3000: _TokenType_name[376:383],
|
||||||
|
3001: _TokenType_name[383:394],
|
||||||
|
3100: _TokenType_name[394:407],
|
||||||
|
3101: _TokenType_name[407:425],
|
||||||
|
3102: _TokenType_name[425:446],
|
||||||
|
3103: _TokenType_name[446:463],
|
||||||
|
3104: _TokenType_name[463:485],
|
||||||
|
3105: _TokenType_name[485:501],
|
||||||
|
3106: _TokenType_name[501:520],
|
||||||
|
3107: _TokenType_name[520:539],
|
||||||
|
3108: _TokenType_name[539:559],
|
||||||
|
3109: _TokenType_name[559:580],
|
||||||
|
3110: _TokenType_name[580:598],
|
||||||
|
3111: _TokenType_name[598:616],
|
||||||
|
3112: _TokenType_name[616:635],
|
||||||
|
3113: _TokenType_name[635:654],
|
||||||
|
3200: _TokenType_name[654:667],
|
||||||
|
3201: _TokenType_name[667:683],
|
||||||
|
3202: _TokenType_name[683:701],
|
||||||
|
3203: _TokenType_name[701:717],
|
||||||
|
3204: _TokenType_name[717:737],
|
||||||
|
3205: _TokenType_name[737:761],
|
||||||
|
3206: _TokenType_name[761:777],
|
||||||
|
4000: _TokenType_name[777:785],
|
||||||
|
4001: _TokenType_name[785:797],
|
||||||
|
5000: _TokenType_name[797:808],
|
||||||
|
6000: _TokenType_name[808:815],
|
||||||
|
6001: _TokenType_name[815:830],
|
||||||
|
6002: _TokenType_name[830:846],
|
||||||
|
6003: _TokenType_name[846:860],
|
||||||
|
6004: _TokenType_name[860:878],
|
||||||
|
6005: _TokenType_name[878:891],
|
||||||
|
6006: _TokenType_name[891:905],
|
||||||
|
7000: _TokenType_name[905:912],
|
||||||
|
7001: _TokenType_name[912:926],
|
||||||
|
7002: _TokenType_name[926:937],
|
||||||
|
7003: _TokenType_name[937:949],
|
||||||
|
7004: _TokenType_name[949:963],
|
||||||
|
7005: _TokenType_name[963:978],
|
||||||
|
7006: _TokenType_name[978:991],
|
||||||
|
7007: _TokenType_name[991:1004],
|
||||||
|
7008: _TokenType_name[1004:1017],
|
||||||
|
7009: _TokenType_name[1017:1034],
|
||||||
|
7010: _TokenType_name[1034:1050],
|
||||||
|
8000: _TokenType_name[1050:1054],
|
||||||
|
8001: _TokenType_name[1054:1068],
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i TokenType) String() string {
|
||||||
|
if str, ok := _TokenType_map[i]; ok {
|
||||||
|
return str
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("TokenType(%d)", i)
|
||||||
|
}
|
181
types.go
Normal file
181
types.go
Normal file
@ -0,0 +1,181 @@
|
|||||||
|
package chroma
|
||||||
|
|
||||||
|
// TokenType is the type of token to highlight.
|
||||||
|
//
|
||||||
|
// It is also an Emitter, emitting a single token of itself
|
||||||
|
type TokenType int
|
||||||
|
|
||||||
|
// Set of TokenTypes.
|
||||||
|
//
|
||||||
|
// Categories of types are grouped in ranges of 1000, while sub-categories are in ranges of 100. For
|
||||||
|
// example, the literal category is in the range 3000-3999. The sub-category for literal strings is
|
||||||
|
// in the range 3100-3199.
|
||||||
|
const (
|
||||||
|
Escape TokenType = iota
|
||||||
|
Error
|
||||||
|
Other
|
||||||
|
)
|
||||||
|
|
||||||
|
// Keywords.
|
||||||
|
const (
|
||||||
|
Keyword TokenType = 1000 + iota
|
||||||
|
KeywordConstant
|
||||||
|
KeywordDeclaration
|
||||||
|
KeywordNamespace
|
||||||
|
KeywordPseudo
|
||||||
|
KeywordReserved
|
||||||
|
KeywordType
|
||||||
|
)
|
||||||
|
|
||||||
|
// Names.
|
||||||
|
const (
|
||||||
|
Name TokenType = 2000 + iota
|
||||||
|
NameAttribute
|
||||||
|
NameBuiltin
|
||||||
|
NameBuiltinPseudo
|
||||||
|
NameClass
|
||||||
|
NameConstant
|
||||||
|
NameDecorator
|
||||||
|
NameEntity
|
||||||
|
NameException
|
||||||
|
NameFunction
|
||||||
|
NameFunctionMagic
|
||||||
|
NameProperty
|
||||||
|
NameLabel
|
||||||
|
NameNamespace
|
||||||
|
NameOther
|
||||||
|
NameTag
|
||||||
|
NameVariable
|
||||||
|
NameVariableClass
|
||||||
|
NameVariableGlobal
|
||||||
|
NameVariableInstance
|
||||||
|
NameVariableMagic
|
||||||
|
)
|
||||||
|
|
||||||
|
// Literals.
|
||||||
|
const (
|
||||||
|
Literal TokenType = 3000 + iota
|
||||||
|
LiteralDate
|
||||||
|
)
|
||||||
|
|
||||||
|
// Strings.
|
||||||
|
const (
|
||||||
|
LiteralString TokenType = 3100 + iota
|
||||||
|
LiteralStringAffix
|
||||||
|
LiteralStringBacktick
|
||||||
|
LiteralStringChar
|
||||||
|
LiteralStringDelimiter
|
||||||
|
LiteralStringDoc
|
||||||
|
LiteralStringDouble
|
||||||
|
LiteralStringEscape
|
||||||
|
LiteralStringHeredoc
|
||||||
|
LiteralStringInterpol
|
||||||
|
LiteralStringOther
|
||||||
|
LiteralStringRegex
|
||||||
|
LiteralStringSingle
|
||||||
|
LiteralStringSymbol
|
||||||
|
)
|
||||||
|
|
||||||
|
// Literals.
|
||||||
|
const (
|
||||||
|
LiteralNumber TokenType = 3200 + iota
|
||||||
|
LiteralNumberBin
|
||||||
|
LiteralNumberFloat
|
||||||
|
LiteralNumberHex
|
||||||
|
LiteralNumberInteger
|
||||||
|
LiteralNumberIntegerLong
|
||||||
|
LiteralNumberOct
|
||||||
|
)
|
||||||
|
|
||||||
|
// Operators.
|
||||||
|
const (
|
||||||
|
Operator TokenType = 4000 + iota
|
||||||
|
OperatorWord
|
||||||
|
)
|
||||||
|
|
||||||
|
// Punctuation.
|
||||||
|
const (
|
||||||
|
Punctuation TokenType = 5000 + iota
|
||||||
|
)
|
||||||
|
|
||||||
|
// Comments.
|
||||||
|
const (
|
||||||
|
Comment TokenType = 6000 + iota
|
||||||
|
CommentHashbang
|
||||||
|
CommentMultiline
|
||||||
|
CommentPreproc
|
||||||
|
CommentPreprocFile
|
||||||
|
CommentSingle
|
||||||
|
CommentSpecial
|
||||||
|
)
|
||||||
|
|
||||||
|
// Generic tokens.
|
||||||
|
const (
|
||||||
|
Generic TokenType = 7000 + iota
|
||||||
|
GenericDeleted
|
||||||
|
GenericEmph
|
||||||
|
GenericError
|
||||||
|
GenericHeading
|
||||||
|
GenericInserted
|
||||||
|
GenericOutput
|
||||||
|
GenericPrompt
|
||||||
|
GenericStrong
|
||||||
|
GenericSubheading
|
||||||
|
GenericTraceback
|
||||||
|
)
|
||||||
|
|
||||||
|
// Text.
|
||||||
|
const (
|
||||||
|
Text TokenType = 8000 + iota
|
||||||
|
TextWhitespace
|
||||||
|
)
|
||||||
|
|
||||||
|
// Aliases.
|
||||||
|
const (
|
||||||
|
Whitespace = TextWhitespace
|
||||||
|
|
||||||
|
Date = LiteralDate
|
||||||
|
|
||||||
|
String = LiteralString
|
||||||
|
StringAffix = LiteralStringAffix
|
||||||
|
StringBacktick = LiteralStringBacktick
|
||||||
|
StringChar = LiteralStringChar
|
||||||
|
StringDelimiter = LiteralStringDelimiter
|
||||||
|
StringDoc = LiteralStringDoc
|
||||||
|
StringDouble = LiteralStringDouble
|
||||||
|
StringEscape = LiteralStringEscape
|
||||||
|
StringHeredoc = LiteralStringHeredoc
|
||||||
|
StringInterpol = LiteralStringInterpol
|
||||||
|
StringOther = LiteralStringOther
|
||||||
|
StringRegex = LiteralStringRegex
|
||||||
|
StringSingle = LiteralStringSingle
|
||||||
|
StringSymbol = LiteralStringSymbol
|
||||||
|
|
||||||
|
Number = LiteralNumber
|
||||||
|
NumberBin = LiteralNumberBin
|
||||||
|
NumberFloat = LiteralNumberFloat
|
||||||
|
NumberHex = LiteralNumberHex
|
||||||
|
NumberInteger = LiteralNumberInteger
|
||||||
|
NumberIntegerLong = LiteralNumberIntegerLong
|
||||||
|
NumberOct = LiteralNumberOct
|
||||||
|
)
|
||||||
|
|
||||||
|
func (t TokenType) Category() TokenType {
|
||||||
|
return t / 1000 * 1000
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t TokenType) SubCategory() TokenType {
|
||||||
|
return t / 100 * 100
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t TokenType) InCategory(other TokenType) bool {
|
||||||
|
return t/1000 == other/1000
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t TokenType) InSubCategory(other TokenType) bool {
|
||||||
|
return t/100 == other/100
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t TokenType) Emit(groups []string) []Token {
|
||||||
|
return []Token{Token{Type: t, Value: groups[0]}}
|
||||||
|
}
|
Reference in New Issue
Block a user