1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-04-04 22:24:23 +02:00

Initial commit! Working!

This commit is contained in:
Alec Thomas 2017-06-02 00:17:21 +10:00
parent 3de978543f
commit b2fb8edf77
16 changed files with 962 additions and 0 deletions

2
Makefile Normal file
View File

@ -0,0 +1,2 @@
tokentype_string.go: types.go
stringer -type TokenType

47
cmd/chroma/main.go Normal file
View File

@ -0,0 +1,47 @@
package main
import (
"fmt"
"io/ioutil"
"os"
"runtime/pprof"
"gopkg.in/alecthomas/kingpin.v3-unstable"
"github.com/alecthomas/chroma"
"github.com/alecthomas/chroma/formatters"
"github.com/alecthomas/chroma/lexers"
)
var (
profileFlag = kingpin.Flag("profile", "Enable profiling to file.").String()
tokensFlag = kingpin.Flag("tokens", "Dump raw tokens.").Bool()
filesArgs = kingpin.Arg("files", "Files to highlight.").Required().ExistingFiles()
)
func main() {
kingpin.Parse()
if *profileFlag != "" {
f, err := os.Create(*profileFlag)
kingpin.FatalIfError(err, "")
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}
formatter := formatters.Console(formatters.DefaultConsoleTheme)
for _, filename := range *filesArgs {
lexers := lexers.Registry.Match(filename)
lexer := lexers[0]
lexer = chroma.Coalesce(lexer)
contents, err := ioutil.ReadFile(filename)
kingpin.FatalIfError(err, "")
tokens, err := lexer.Tokenise(string(contents))
kingpin.FatalIfError(err, "")
if *tokensFlag {
for _, token := range tokens {
fmt.Println(token)
}
} else {
formatter.Format(os.Stdout, tokens)
}
}
}

31
coalesce.go Normal file
View File

@ -0,0 +1,31 @@
package chroma
// Coalesce is a Lexer interceptor that collapses runs of common types into a single token.
func Coalesce(lexer Lexer) Lexer {
return &coalescer{lexer}
}
type coalescer struct {
Lexer
}
func (d *coalescer) Tokenise(text string) ([]Token, error) {
in, err := d.Lexer.Tokenise(text)
if err != nil {
return in, err
}
out := []Token{}
for _, token := range in {
if len(out) == 0 {
out = append(out, token)
continue
}
last := &out[len(out)-1]
if last.Type == token.Type {
last.Value += token.Value
} else {
out = append(out, token)
}
}
return out, err
}

22
coalesce_test.go Normal file
View File

@ -0,0 +1,22 @@
package chroma
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestCoalesce(t *testing.T) {
lexer, err := Coalesce(MustNewLexer(nil, Rules{
"root": []Rule{
Rule{`[[:punct:]]`, Punctuation, nil},
},
}))
require.NoError(t, err)
actual, err := lexer.Tokenise("!@#$%")
require.NoError(t, err)
expected := []Token{
Token{Punctuation, "!@#$%"},
}
require.Equal(t, expected, actual)
}

12
formatters/api.go Normal file
View File

@ -0,0 +1,12 @@
package formatters
import (
"io"
"github.com/alecthomas/chroma"
)
// Formatter takes a token stream and formats it.
type Formatter interface {
Format(w io.Writer, tokens []chroma.Token) error
}

47
formatters/console.go Normal file
View File

@ -0,0 +1,47 @@
package formatters
import (
"bufio"
"io"
. "github.com/alecthomas/chroma" // nolint
"github.com/alecthomas/colour"
)
var DefaultConsoleTheme = map[TokenType]string{
Number: "^B^3",
Comment: "^5",
String: "^B^5",
Keyword: "^B^7",
}
// Console formatter.
//
// formatter := Console(DefaultConsoleTheme)
func Console(theme map[TokenType]string) Formatter {
return &consoleFormatter{theme}
}
type consoleFormatter struct {
theme map[TokenType]string
}
func (c *consoleFormatter) Format(w io.Writer, tokens []Token) error {
bw := bufio.NewWriterSize(w, 1024)
printer := colour.Colour(bw)
for _, token := range tokens {
clr, ok := c.theme[token.Type]
if !ok {
clr, ok = c.theme[token.Type.SubCategory()]
if !ok {
clr, ok = c.theme[token.Type.Category()]
if !ok {
clr = "^R"
}
}
}
printer.Printf(clr+"%s", token.Value)
}
bw.Flush()
return nil
}

210
lexer.go Normal file
View File

@ -0,0 +1,210 @@
package chroma
import (
"fmt"
"regexp"
"strings"
)
// Config for a lexer.
type Config struct {
// Name of the lexer.
Name string
// Shortcuts for the lexer
Aliases []string
// File name globs
Filenames []string
// Secondary file name globs
AliasFilenames []string
// MIME types
MimeTypes []string
// Priority, should multiple lexers match and no content is provided
Priority int
// Don't strip leading and trailing newlines from the input.
DontStripNL bool
// Strip all leading and trailing whitespace from the input
StripAll bool
// Make sure that the input does not end with a newline. This
// is required for some lexers that consume input linewise.
DontEnsureNL bool
// If given and greater than 0, expand tabs in the input.
TabSize int
// If given, must be an encoding name. This encoding will be used to
// convert the input string to Unicode, if it is not already a Unicode
// string.
Encoding string
}
type Token struct {
Type TokenType
Value string
}
func (t Token) String() string { return fmt.Sprintf("Token{%s, %q}", t.Type, t.Value) }
func (t Token) GoString() string { return t.String() }
type Lexer interface {
Config() *Config
Tokenise(text string) ([]Token, error)
}
// Analyser determines if this lexer is appropriate for the given text.
type Analyser interface {
AnalyseText(text string) float32
}
type Rule struct {
Pattern string
Type Emitter
Modifier Modifier
}
// An Emitter takes group matches and returns tokens.
type Emitter interface {
// Emit tokens for the given regex groups.
Emit(groups []string) []Token
}
type EmitterFunc func(groups []string) []Token
func (e EmitterFunc) Emit(groups []string) []Token { return e(groups) }
// ByGroups emits a token for each matching group in the rule's regex.
func ByGroups(types ...TokenType) Emitter {
return EmitterFunc(func(groups []string) (out []Token) {
for i, group := range groups[1:] {
out = append(out, Token{types[i], group})
}
return
})
}
// Words creates a regex that matches any of the given literal words.
func Words(words ...string) string {
for i, word := range words {
words[i] = regexp.QuoteMeta(word)
}
return "\\b(?:" + strings.Join(words, "|") + ")\\b"
}
type Rules map[string][]Rule
// MustNewLexer creates a new Lexer or panics.
func MustNewLexer(config *Config, rules Rules) Lexer {
lexer, err := NewLexer(config, rules)
if err != nil {
panic(err)
}
return lexer
}
// NewLexer creates a new regex-based Lexer.
//
// "rules" is a state machine transitition map. Each key is a state. Values are sets of rules
// that match input, optionally modify lexer state, and output tokens.
func NewLexer(config *Config, rules Rules) (Lexer, error) {
if _, ok := rules["root"]; !ok {
return nil, fmt.Errorf("no \"root\" state")
}
compiledRules := map[string][]CompiledRule{}
for state, rules := range rules {
for _, rule := range rules {
crule := CompiledRule{Rule: rule}
re, err := regexp.Compile("^(?m)" + rule.Pattern)
if err != nil {
return nil, fmt.Errorf("invalid regex %q for state %q: %s", rule.Pattern, state, err)
}
crule.Regexp = re
compiledRules[state] = append(compiledRules[state], crule)
}
}
// Apply any pre-processor modifiers.
for state, rules := range compiledRules {
for index, rule := range rules {
if rule.Modifier != nil {
err := rule.Modifier.Preprocess(compiledRules, state, index)
if err != nil {
return nil, err
}
}
}
}
return &regexLexer{
config: config,
rules: compiledRules,
}, nil
}
// A CompiledRule is a Rule with a pre-compiled regex.
type CompiledRule struct {
Rule
Regexp *regexp.Regexp
}
type regexLexer struct {
config *Config
rules map[string][]CompiledRule
}
func (r *regexLexer) Config() *Config {
return r.config
}
type LexerState struct {
Text string
Pos int
Stack []string
Rules map[string][]CompiledRule
State string
}
func (r *regexLexer) Tokenise(text string) (out []Token, err error) {
state := &LexerState{
Text: text,
Stack: []string{"root"},
Rules: r.rules,
}
for state.Pos < len(text) && len(state.Stack) > 0 {
state.State = state.Stack[len(state.Stack)-1]
rule, index := matchRules(state.Text[state.Pos:], state.Rules[state.State])
// No match.
if index == nil {
out = append(out, Token{Error, state.Text[state.Pos : state.Pos+1]})
state.Pos++
continue
}
groups := make([]string, len(index)/2)
for i := 0; i < len(index); i += 2 {
groups[i/2] = text[state.Pos+index[i] : state.Pos+index[i+1]]
}
state.Pos += index[1]
if rule.Modifier != nil {
if err = rule.Modifier.Mutate(state); err != nil {
return
}
} else {
out = append(out, rule.Type.Emit(groups)...)
}
}
return
}
func matchRules(text string, rules []CompiledRule) (CompiledRule, []int) {
for _, rule := range rules {
if index := rule.Regexp.FindStringSubmatchIndex(text); index != nil {
return rule, index
}
}
return CompiledRule{}, nil
}

52
lexer_test.go Normal file
View File

@ -0,0 +1,52 @@
package chroma
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestTokenTypeClassifiers(t *testing.T) {
require.True(t, GenericDeleted.InCategory(Generic))
require.True(t, LiteralStringBacktick.InSubCategory(String))
require.Equal(t, LiteralStringBacktick.String(), "LiteralStringBacktick")
}
func TestSimpleLexer(t *testing.T) {
lexer, err := NewLexer(
&Config{
Name: "INI",
Aliases: []string{"ini", "cfg"},
Filenames: []string{"*.ini", "*.cfg"},
},
map[string][]Rule{
"root": []Rule{
{`\s+`, Whitespace, nil},
{`;.*?$`, Comment, nil},
{`\[.*?\]$`, Keyword, nil},
{`(.*?)(\s*)(=)(\s*)(.*?)$`, ByGroups(Name, Whitespace, Operator, Whitespace, String), nil},
},
},
)
require.NoError(t, err)
actual, err := lexer.Tokenise(`
; this is a comment
[section]
a = 10
`)
require.NoError(t, err)
expected := []Token{
Token{Whitespace, "\n\t"},
Token{Comment, "; this is a comment"},
Token{Whitespace, "\n\t"},
Token{Keyword, "[section]"},
Token{Whitespace, "\n\t"},
Token{Name, "a"},
Token{Whitespace, " "},
Token{Operator, "="},
Token{Whitespace, " "},
Token{LiteralString, "10"},
Token{Whitespace, "\n"},
}
require.Equal(t, expected, actual)
}

17
lexers/default.go Normal file
View File

@ -0,0 +1,17 @@
package lexers
import (
. "github.com/alecthomas/chroma" // nolint
)
// Default lexer if no other is found.
var Default = Register(NewLexer(&Config{
Name: "default",
Filenames: []string{"*"},
Priority: 99,
}, Rules{
"root": []Rule{
{`.+`, Text, nil},
{`\n`, Text, nil},
},
}))

67
lexers/go.go Normal file
View File

@ -0,0 +1,67 @@
package lexers
import (
. "github.com/alecthomas/chroma" // nolint: golint
)
// Go lexer.
var Go = Register(NewLexer(
&Config{
Name: "Go",
Filenames: []string{"*.go"},
Aliases: []string{"go", "golang"},
MimeTypes: []string{"text/x-gosrc"},
},
Rules{
`root`: []Rule{
{`\n`, Text, nil},
{`\s+`, Text, nil},
{`\\\n`, Text, nil}, // line continuations
{`//(.*?)\n`, CommentSingle, nil},
{`/(\\\n)?[*](.|\n)*?[*](\\\n)?/`, CommentMultiline, nil},
{`(import|package)\b`, KeywordNamespace, nil},
{`(var|func|struct|map|chan|type|interface|const)\b`,
KeywordDeclaration, nil},
{Words(`break`, `default`, `select`, `case`, `defer`, `go`,
`else`, `goto`, `switch`, `fallthrough`, `if`, `range`,
`continue`, `for`, `return`), Keyword, nil},
{`(true|false|iota|nil)\b`, KeywordConstant, nil},
{Words(`uint`, `uint8`, `uint16`, `uint32`, `uint64`,
`int`, `int8`, `int16`, `int32`, `int64`,
`float`, `float32`, `float64`,
`complex64`, `complex128`, `byte`, `rune`,
`string`, `bool`, `erro`, `uintpt`,
`print`, `println`, `panic`, `recove`, `close`, `complex`,
`real`, `imag`, `len`, `cap`, `append`, `copy`, `delete`,
`new`, `make`),
KeywordType, nil},
// imaginary_lit
{`\d+i`, LiteralNumber, nil},
{`\d+\.\d*([Ee][-+]\d+)?i`, LiteralNumber, nil},
{`\.\d+([Ee][-+]\d+)?i`, LiteralNumber, nil},
{`\d+[Ee][-+]\d+i`, LiteralNumber, nil},
// float_lit
{`\d+(\.\d+[eE][+\-]?\d+|\.\d*|[eE][+\-]?\d+)`, LiteralNumberFloat, nil},
{`\.\d+([eE][+\-]?\d+)?`, LiteralNumberFloat, nil},
// int_lit
// -- octal_lit
{`0[0-7]+`, LiteralNumberOct, nil},
// -- hex_lit
{`0[xX][0-9a-fA-F]+`, LiteralNumberHex, nil},
// -- decimal_lit
{`(0|[1-9][0-9]*)`, LiteralNumberInteger, nil},
// char_lit
{`'(\\['"\\abfnrtv]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|[^\\])'`, LiteralStringChar, nil},
// StringLiteral
// -- raw_string_lit
{"`[^`]*`", String, nil},
// -- interpreted_string_lit
{`"(\\\\|\\"|[^"])*"`, String, nil},
// Tokens
{`(<<=|>>=|<<|>>|<=|>=|&\^=|&\^|\+=|-=|\*=|/=|%=|&=|\|=|&&|\|\||<-|\+\+|--|==|!=|:=|\.\.\.|[+\-*/%&])`, Operator, nil},
{`[|^<>=!()\[\]{}.,;:]`, Punctuation, nil},
// identifier
{`[^\W\d]\w*`, NameOther, nil},
},
},
))

24
lexers/ini.go Normal file
View File

@ -0,0 +1,24 @@
package lexers
import (
. "github.com/alecthomas/chroma" // nolint
)
var INI = Register(NewLexer(
&Config{
Name: "INI",
Aliases: []string{"ini", "cfg", "dosini"},
Filenames: []string{"*.ini", "*.cfg", "*.inf"},
MimeTypes: []string{"text/x-ini", "text/inf"},
},
map[string][]Rule{
"root": []Rule{
{`\s+`, Whitespace, nil},
{`;.*?$`, Comment, nil},
{`\[.*?\]$`, Keyword, nil},
{`(.*?)(\s*)(=)(\s*)(.*?)$`, ByGroups(Name, Whitespace, Operator, Whitespace, String), nil},
// standalone option, supported by some INI parsers
{`(.+?)$`, NameAttribute, nil},
},
},
))

71
lexers/registry.go Normal file
View File

@ -0,0 +1,71 @@
package lexers
import (
"sort"
"github.com/danwakefield/fnmatch"
"github.com/alecthomas/chroma"
)
type prioritisedLexers []chroma.Lexer
func (p prioritisedLexers) Len() int { return len(p) }
func (p prioritisedLexers) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p prioritisedLexers) Less(i, j int) bool { return p[i].Config().Priority < p[j].Config().Priority }
// Registry is the global Lexer registry.
var Registry = registry{byName: map[string]chroma.Lexer{}}
type registry struct {
Lexers []chroma.Lexer
byName map[string]chroma.Lexer
}
// Names of all lexers, optionally including aliases.
func (r *registry) Names(withAliases bool) []string {
out := []string{}
for _, lexer := range r.Lexers {
config := lexer.Config()
out = append(out, config.Name)
if withAliases {
out = append(out, config.Aliases...)
}
}
return out
}
// Get a Lexer by name.
func (r *registry) Get(name string) chroma.Lexer {
return r.byName[name]
}
// Match returns all lexers matching filename.
func (r *registry) Match(filename string) []chroma.Lexer {
lexers := prioritisedLexers{}
for _, lexer := range r.Lexers {
config := lexer.Config()
for _, glob := range config.Filenames {
if fnmatch.Match(glob, filename, 0) {
lexers = append(lexers, lexer)
break
}
}
}
sort.Sort(lexers)
return lexers
}
// Register a Lexer with the global registry.
func Register(lexer chroma.Lexer, err error) chroma.Lexer {
if err != nil {
panic(err)
}
config := lexer.Config()
Registry.byName[config.Name] = lexer
for _, alias := range config.Aliases {
Registry.byName[alias] = lexer
}
Registry.Lexers = append(Registry.Lexers, lexer)
return lexer
}

79
modifiers.go Normal file
View File

@ -0,0 +1,79 @@
package chroma
import "fmt"
// A Modifier modifies the behaviour of the lexer.
type Modifier interface {
// Preprocess the lexer rules.
//
// "self" and "rule" are the rule name and index this Modifier is associated with.
Preprocess(rules map[string][]CompiledRule, self string, rule int) error
// Mutate the lexer state machine as it is processing.
Mutate(state *LexerState) error
}
// A MutatorFunc is a Modifier that mutates the lexer state machine as it is processing.
type MutatorFunc func(state *LexerState) error
func (m MutatorFunc) Preprocess(rules map[string][]CompiledRule, self string, rule int) error {
return nil
}
func (m MutatorFunc) Mutate(state *LexerState) error {
return m(state)
}
// A PreprocessorFunc is a Modifier that pre-processes the lexer rules.
type PreprocessorFunc func(rules map[string][]CompiledRule, self string, rule int) error
func (p PreprocessorFunc) Preprocess(rules map[string][]CompiledRule, self string, rule int) error {
return p(rules, self, rule)
}
func (p PreprocessorFunc) Mutate(state *LexerState) error {
return nil
}
// Modifiers applies a set of Modifiers in order.
func Modifiers(modifiers ...Modifier) MutatorFunc {
return func(state *LexerState) error {
for _, modifier := range modifiers {
if err := modifier.Mutate(state); err != nil {
return err
}
}
return nil
}
}
// Include the given state.
func Include(state string) Rule {
return Rule{
Modifier: PreprocessorFunc(func(rules map[string][]CompiledRule, self string, rule int) error {
includedRules, ok := rules[state]
if !ok {
return fmt.Errorf("invalid include state %q", state)
}
stateRules := rules[self]
stateRules = append(stateRules[:rule], append(includedRules, stateRules[rule+1:]...)...)
rules[self] = stateRules
return nil
}),
}
}
// Push states onto the stack.
func Push(states ...string) MutatorFunc {
return func(s *LexerState) error {
s.Stack = append(s.Stack, states...)
return nil
}
}
// Pop state from the stack when rule matches.
func Pop(n int) MutatorFunc {
return func(state *LexerState) error {
state.Stack = state.Stack[:len(state.Stack)-n]
return nil
}
}

6
modifiers_test.go Normal file
View File

@ -0,0 +1,6 @@
package chroma
import "testing"
func TestPop(t *testing.T) {
}

94
tokentype_string.go Normal file
View File

@ -0,0 +1,94 @@
// Code generated by "stringer -type TokenType"; DO NOT EDIT
package chroma
import "fmt"
const _TokenType_name = "EscapeErrorOtherKeywordKeywordConstantKeywordDeclarationKeywordNamespaceKeywordPseudoKeywordReservedKeywordTypeNameNameAttributeNameBuiltinNameBuiltinPseudoNameClassNameConstantNameDecoratorNameEntityNameExceptionNameFunctionNameFunctionMagicNamePropertyNameLabelNameNamespaceNameOtherNameTagNameVariableNameVariableClassNameVariableGlobalNameVariableInstanceNameVariableMagicLiteralLiteralDateLiteralStringLiteralStringAffixLiteralStringBacktickLiteralStringCharLiteralStringDelimiterLiteralStringDocLiteralStringDoubleLiteralStringEscapeLiteralStringHeredocLiteralStringInterpolLiteralStringOtherLiteralStringRegexLiteralStringSingleLiteralStringSymbolLiteralNumberLiteralNumberBinLiteralNumberFloatLiteralNumberHexLiteralNumberIntegerLiteralNumberIntegerLongLiteralNumberOctOperatorOperatorWordPunctuationCommentCommentHashbangCommentMultilineCommentPreprocCommentPreprocFileCommentSingleCommentSpecialGenericGenericDeletedGenericEmphGenericErrorGenericHeadingGenericInsertedGenericOutputGenericPromptGenericStrongGenericSubheadingGenericTracebackTextTextWhitespace"
var _TokenType_map = map[TokenType]string{
0: _TokenType_name[0:6],
1: _TokenType_name[6:11],
2: _TokenType_name[11:16],
1000: _TokenType_name[16:23],
1001: _TokenType_name[23:38],
1002: _TokenType_name[38:56],
1003: _TokenType_name[56:72],
1004: _TokenType_name[72:85],
1005: _TokenType_name[85:100],
1006: _TokenType_name[100:111],
2000: _TokenType_name[111:115],
2001: _TokenType_name[115:128],
2002: _TokenType_name[128:139],
2003: _TokenType_name[139:156],
2004: _TokenType_name[156:165],
2005: _TokenType_name[165:177],
2006: _TokenType_name[177:190],
2007: _TokenType_name[190:200],
2008: _TokenType_name[200:213],
2009: _TokenType_name[213:225],
2010: _TokenType_name[225:242],
2011: _TokenType_name[242:254],
2012: _TokenType_name[254:263],
2013: _TokenType_name[263:276],
2014: _TokenType_name[276:285],
2015: _TokenType_name[285:292],
2016: _TokenType_name[292:304],
2017: _TokenType_name[304:321],
2018: _TokenType_name[321:339],
2019: _TokenType_name[339:359],
2020: _TokenType_name[359:376],
3000: _TokenType_name[376:383],
3001: _TokenType_name[383:394],
3100: _TokenType_name[394:407],
3101: _TokenType_name[407:425],
3102: _TokenType_name[425:446],
3103: _TokenType_name[446:463],
3104: _TokenType_name[463:485],
3105: _TokenType_name[485:501],
3106: _TokenType_name[501:520],
3107: _TokenType_name[520:539],
3108: _TokenType_name[539:559],
3109: _TokenType_name[559:580],
3110: _TokenType_name[580:598],
3111: _TokenType_name[598:616],
3112: _TokenType_name[616:635],
3113: _TokenType_name[635:654],
3200: _TokenType_name[654:667],
3201: _TokenType_name[667:683],
3202: _TokenType_name[683:701],
3203: _TokenType_name[701:717],
3204: _TokenType_name[717:737],
3205: _TokenType_name[737:761],
3206: _TokenType_name[761:777],
4000: _TokenType_name[777:785],
4001: _TokenType_name[785:797],
5000: _TokenType_name[797:808],
6000: _TokenType_name[808:815],
6001: _TokenType_name[815:830],
6002: _TokenType_name[830:846],
6003: _TokenType_name[846:860],
6004: _TokenType_name[860:878],
6005: _TokenType_name[878:891],
6006: _TokenType_name[891:905],
7000: _TokenType_name[905:912],
7001: _TokenType_name[912:926],
7002: _TokenType_name[926:937],
7003: _TokenType_name[937:949],
7004: _TokenType_name[949:963],
7005: _TokenType_name[963:978],
7006: _TokenType_name[978:991],
7007: _TokenType_name[991:1004],
7008: _TokenType_name[1004:1017],
7009: _TokenType_name[1017:1034],
7010: _TokenType_name[1034:1050],
8000: _TokenType_name[1050:1054],
8001: _TokenType_name[1054:1068],
}
func (i TokenType) String() string {
if str, ok := _TokenType_map[i]; ok {
return str
}
return fmt.Sprintf("TokenType(%d)", i)
}

181
types.go Normal file
View File

@ -0,0 +1,181 @@
package chroma
// TokenType is the type of token to highlight.
//
// It is also an Emitter, emitting a single token of itself
type TokenType int
// Set of TokenTypes.
//
// Categories of types are grouped in ranges of 1000, while sub-categories are in ranges of 100. For
// example, the literal category is in the range 3000-3999. The sub-category for literal strings is
// in the range 3100-3199.
const (
Escape TokenType = iota
Error
Other
)
// Keywords.
const (
Keyword TokenType = 1000 + iota
KeywordConstant
KeywordDeclaration
KeywordNamespace
KeywordPseudo
KeywordReserved
KeywordType
)
// Names.
const (
Name TokenType = 2000 + iota
NameAttribute
NameBuiltin
NameBuiltinPseudo
NameClass
NameConstant
NameDecorator
NameEntity
NameException
NameFunction
NameFunctionMagic
NameProperty
NameLabel
NameNamespace
NameOther
NameTag
NameVariable
NameVariableClass
NameVariableGlobal
NameVariableInstance
NameVariableMagic
)
// Literals.
const (
Literal TokenType = 3000 + iota
LiteralDate
)
// Strings.
const (
LiteralString TokenType = 3100 + iota
LiteralStringAffix
LiteralStringBacktick
LiteralStringChar
LiteralStringDelimiter
LiteralStringDoc
LiteralStringDouble
LiteralStringEscape
LiteralStringHeredoc
LiteralStringInterpol
LiteralStringOther
LiteralStringRegex
LiteralStringSingle
LiteralStringSymbol
)
// Literals.
const (
LiteralNumber TokenType = 3200 + iota
LiteralNumberBin
LiteralNumberFloat
LiteralNumberHex
LiteralNumberInteger
LiteralNumberIntegerLong
LiteralNumberOct
)
// Operators.
const (
Operator TokenType = 4000 + iota
OperatorWord
)
// Punctuation.
const (
Punctuation TokenType = 5000 + iota
)
// Comments.
const (
Comment TokenType = 6000 + iota
CommentHashbang
CommentMultiline
CommentPreproc
CommentPreprocFile
CommentSingle
CommentSpecial
)
// Generic tokens.
const (
Generic TokenType = 7000 + iota
GenericDeleted
GenericEmph
GenericError
GenericHeading
GenericInserted
GenericOutput
GenericPrompt
GenericStrong
GenericSubheading
GenericTraceback
)
// Text.
const (
Text TokenType = 8000 + iota
TextWhitespace
)
// Aliases.
const (
Whitespace = TextWhitespace
Date = LiteralDate
String = LiteralString
StringAffix = LiteralStringAffix
StringBacktick = LiteralStringBacktick
StringChar = LiteralStringChar
StringDelimiter = LiteralStringDelimiter
StringDoc = LiteralStringDoc
StringDouble = LiteralStringDouble
StringEscape = LiteralStringEscape
StringHeredoc = LiteralStringHeredoc
StringInterpol = LiteralStringInterpol
StringOther = LiteralStringOther
StringRegex = LiteralStringRegex
StringSingle = LiteralStringSingle
StringSymbol = LiteralStringSymbol
Number = LiteralNumber
NumberBin = LiteralNumberBin
NumberFloat = LiteralNumberFloat
NumberHex = LiteralNumberHex
NumberInteger = LiteralNumberInteger
NumberIntegerLong = LiteralNumberIntegerLong
NumberOct = LiteralNumberOct
)
func (t TokenType) Category() TokenType {
return t / 1000 * 1000
}
func (t TokenType) SubCategory() TokenType {
return t / 100 * 100
}
func (t TokenType) InCategory(other TokenType) bool {
return t/1000 == other/1000
}
func (t TokenType) InSubCategory(other TokenType) bool {
return t/100 == other/100
}
func (t TokenType) Emit(groups []string) []Token {
return []Token{Token{Type: t, Value: groups[0]}}
}