Initial commit! Working!

2025-06-14 23:55:08 +02:00 · 2017-06-02 00:17:21 +10:00
parent 3de978543f
commit b2fb8edf77
16 changed files with 962 additions and 0 deletions
--- a/2
+++ b/2
@ -0,0 +1,2 @@
+tokentype_string.go: types.go
+	stringer -type TokenType
--- a/cmd/chroma/main.go
+++ b/cmd/chroma/main.go
@ -0,0 +1,47 @@
+package main
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"runtime/pprof"
+
+	"gopkg.in/alecthomas/kingpin.v3-unstable"
+
+	"github.com/alecthomas/chroma"
+	"github.com/alecthomas/chroma/formatters"
+	"github.com/alecthomas/chroma/lexers"
+)
+
+var (
+	profileFlag = kingpin.Flag("profile", "Enable profiling to file.").String()
+	tokensFlag  = kingpin.Flag("tokens", "Dump raw tokens.").Bool()
+	filesArgs   = kingpin.Arg("files", "Files to highlight.").Required().ExistingFiles()
+)
+
+func main() {
+	kingpin.Parse()
+	if *profileFlag != "" {
+		f, err := os.Create(*profileFlag)
+		kingpin.FatalIfError(err, "")
+		pprof.StartCPUProfile(f)
+		defer pprof.StopCPUProfile()
+	}
+	formatter := formatters.Console(formatters.DefaultConsoleTheme)
+	for _, filename := range *filesArgs {
+		lexers := lexers.Registry.Match(filename)
+		lexer := lexers[0]
+		lexer = chroma.Coalesce(lexer)
+		contents, err := ioutil.ReadFile(filename)
+		kingpin.FatalIfError(err, "")
+		tokens, err := lexer.Tokenise(string(contents))
+		kingpin.FatalIfError(err, "")
+		if *tokensFlag {
+			for _, token := range tokens {
+				fmt.Println(token)
+			}
+		} else {
+			formatter.Format(os.Stdout, tokens)
+		}
+	}
+}
--- a/coalesce.go
+++ b/coalesce.go
@ -0,0 +1,31 @@
+package chroma
+
+// Coalesce is a Lexer interceptor that collapses runs of common types into a single token.
+func Coalesce(lexer Lexer) Lexer {
+	return &coalescer{lexer}
+}
+
+type coalescer struct {
+	Lexer
+}
+
+func (d *coalescer) Tokenise(text string) ([]Token, error) {
+	in, err := d.Lexer.Tokenise(text)
+	if err != nil {
+		return in, err
+	}
+	out := []Token{}
+	for _, token := range in {
+		if len(out) == 0 {
+			out = append(out, token)
+			continue
+		}
+		last := &out[len(out)-1]
+		if last.Type == token.Type {
+			last.Value += token.Value
+		} else {
+			out = append(out, token)
+		}
+	}
+	return out, err
+}
--- a/coalesce_test.go
+++ b/coalesce_test.go
@ -0,0 +1,22 @@
+package chroma
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestCoalesce(t *testing.T) {
+	lexer, err := Coalesce(MustNewLexer(nil, Rules{
+		"root": []Rule{
+			Rule{`[[:punct:]]`, Punctuation, nil},
+		},
+	}))
+	require.NoError(t, err)
+	actual, err := lexer.Tokenise("!@#$%")
+	require.NoError(t, err)
+	expected := []Token{
+		Token{Punctuation, "!@#$%"},
+	}
+	require.Equal(t, expected, actual)
+}
--- a/formatters/api.go
+++ b/formatters/api.go
@ -0,0 +1,12 @@
+package formatters
+
+import (
+	"io"
+
+	"github.com/alecthomas/chroma"
+)
+
+// Formatter takes a token stream and formats it.
+type Formatter interface {
+	Format(w io.Writer, tokens []chroma.Token) error
+}
--- a/formatters/console.go
+++ b/formatters/console.go
@ -0,0 +1,47 @@
+package formatters
+
+import (
+	"bufio"
+	"io"
+
+	. "github.com/alecthomas/chroma" // nolint
+	"github.com/alecthomas/colour"
+)
+
+var DefaultConsoleTheme = map[TokenType]string{
+	Number:  "^B^3",
+	Comment: "^5",
+	String:  "^B^5",
+	Keyword: "^B^7",
+}
+
+// Console formatter.
+//
+// 		formatter := Console(DefaultConsoleTheme)
+func Console(theme map[TokenType]string) Formatter {
+	return &consoleFormatter{theme}
+}
+
+type consoleFormatter struct {
+	theme map[TokenType]string
+}
+
+func (c *consoleFormatter) Format(w io.Writer, tokens []Token) error {
+	bw := bufio.NewWriterSize(w, 1024)
+	printer := colour.Colour(bw)
+	for _, token := range tokens {
+		clr, ok := c.theme[token.Type]
+		if !ok {
+			clr, ok = c.theme[token.Type.SubCategory()]
+			if !ok {
+				clr, ok = c.theme[token.Type.Category()]
+				if !ok {
+					clr = "^R"
+				}
+			}
+		}
+		printer.Printf(clr+"%s", token.Value)
+	}
+	bw.Flush()
+	return nil
+}
--- a/lexer.go
+++ b/lexer.go
@ -0,0 +1,210 @@
+package chroma
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+)
+
+// Config for a lexer.
+type Config struct {
+	// Name of the lexer.
+	Name string
+
+	// Shortcuts for the lexer
+	Aliases []string
+
+	// File name globs
+	Filenames []string
+
+	// Secondary file name globs
+	AliasFilenames []string
+
+	// MIME types
+	MimeTypes []string
+
+	// Priority, should multiple lexers match and no content is provided
+	Priority int
+
+	// Don't strip leading and trailing newlines from the input.
+	DontStripNL bool
+
+	// Strip all leading and trailing whitespace from the input
+	StripAll bool
+
+	// Make sure that the input does not end with a newline. This
+	// is required for some lexers that consume input linewise.
+	DontEnsureNL bool
+
+	// If given and greater than 0, expand tabs in the input.
+	TabSize int
+
+	// If given, must be an encoding name. This encoding will be used to
+	// convert the input string to Unicode, if it is not already a Unicode
+	// string.
+	Encoding string
+}
+
+type Token struct {
+	Type  TokenType
+	Value string
+}
+
+func (t Token) String() string   { return fmt.Sprintf("Token{%s, %q}", t.Type, t.Value) }
+func (t Token) GoString() string { return t.String() }
+
+type Lexer interface {
+	Config() *Config
+	Tokenise(text string) ([]Token, error)
+}
+
+// Analyser determines if this lexer is appropriate for the given text.
+type Analyser interface {
+	AnalyseText(text string) float32
+}
+
+type Rule struct {
+	Pattern  string
+	Type     Emitter
+	Modifier Modifier
+}
+
+// An Emitter takes group matches and returns tokens.
+type Emitter interface {
+	// Emit tokens for the given regex groups.
+	Emit(groups []string) []Token
+}
+
+type EmitterFunc func(groups []string) []Token
+
+func (e EmitterFunc) Emit(groups []string) []Token { return e(groups) }
+
+// ByGroups emits a token for each matching group in the rule's regex.
+func ByGroups(types ...TokenType) Emitter {
+	return EmitterFunc(func(groups []string) (out []Token) {
+		for i, group := range groups[1:] {
+			out = append(out, Token{types[i], group})
+		}
+		return
+	})
+}
+
+// Words creates a regex that matches any of the given literal words.
+func Words(words ...string) string {
+	for i, word := range words {
+		words[i] = regexp.QuoteMeta(word)
+	}
+	return "\\b(?:" + strings.Join(words, "|") + ")\\b"
+}
+
+type Rules map[string][]Rule
+
+// MustNewLexer creates a new Lexer or panics.
+func MustNewLexer(config *Config, rules Rules) Lexer {
+	lexer, err := NewLexer(config, rules)
+	if err != nil {
+		panic(err)
+	}
+	return lexer
+}
+
+// NewLexer creates a new regex-based Lexer.
+//
+// "rules" is a state machine transitition map. Each key is a state. Values are sets of rules
+// that match input, optionally modify lexer state, and output tokens.
+func NewLexer(config *Config, rules Rules) (Lexer, error) {
+	if _, ok := rules["root"]; !ok {
+		return nil, fmt.Errorf("no \"root\" state")
+	}
+	compiledRules := map[string][]CompiledRule{}
+	for state, rules := range rules {
+		for _, rule := range rules {
+			crule := CompiledRule{Rule: rule}
+			re, err := regexp.Compile("^(?m)" + rule.Pattern)
+			if err != nil {
+				return nil, fmt.Errorf("invalid regex %q for state %q: %s", rule.Pattern, state, err)
+			}
+			crule.Regexp = re
+			compiledRules[state] = append(compiledRules[state], crule)
+		}
+	}
+	// Apply any pre-processor modifiers.
+	for state, rules := range compiledRules {
+		for index, rule := range rules {
+			if rule.Modifier != nil {
+				err := rule.Modifier.Preprocess(compiledRules, state, index)
+				if err != nil {
+					return nil, err
+				}
+			}
+		}
+	}
+	return &regexLexer{
+		config: config,
+		rules:  compiledRules,
+	}, nil
+}
+
+// A CompiledRule is a Rule with a pre-compiled regex.
+type CompiledRule struct {
+	Rule
+	Regexp *regexp.Regexp
+}
+
+type regexLexer struct {
+	config *Config
+	rules  map[string][]CompiledRule
+}
+
+func (r *regexLexer) Config() *Config {
+	return r.config
+}
+
+type LexerState struct {
+	Text  string
+	Pos   int
+	Stack []string
+	Rules map[string][]CompiledRule
+	State string
+}
+
+func (r *regexLexer) Tokenise(text string) (out []Token, err error) {
+	state := &LexerState{
+		Text:  text,
+		Stack: []string{"root"},
+		Rules: r.rules,
+	}
+	for state.Pos < len(text) && len(state.Stack) > 0 {
+		state.State = state.Stack[len(state.Stack)-1]
+		rule, index := matchRules(state.Text[state.Pos:], state.Rules[state.State])
+		// No match.
+		if index == nil {
+			out = append(out, Token{Error, state.Text[state.Pos : state.Pos+1]})
+			state.Pos++
+			continue
+		}
+
+		groups := make([]string, len(index)/2)
+		for i := 0; i < len(index); i += 2 {
+			groups[i/2] = text[state.Pos+index[i] : state.Pos+index[i+1]]
+		}
+		state.Pos += index[1]
+		if rule.Modifier != nil {
+			if err = rule.Modifier.Mutate(state); err != nil {
+				return
+			}
+		} else {
+			out = append(out, rule.Type.Emit(groups)...)
+		}
+	}
+	return
+}
+
+func matchRules(text string, rules []CompiledRule) (CompiledRule, []int) {
+	for _, rule := range rules {
+		if index := rule.Regexp.FindStringSubmatchIndex(text); index != nil {
+			return rule, index
+		}
+	}
+	return CompiledRule{}, nil
+}
--- a/lexer_test.go
+++ b/lexer_test.go
@ -0,0 +1,52 @@
+package chroma
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestTokenTypeClassifiers(t *testing.T) {
+	require.True(t, GenericDeleted.InCategory(Generic))
+	require.True(t, LiteralStringBacktick.InSubCategory(String))
+	require.Equal(t, LiteralStringBacktick.String(), "LiteralStringBacktick")
+}
+
+func TestSimpleLexer(t *testing.T) {
+	lexer, err := NewLexer(
+		&Config{
+			Name:      "INI",
+			Aliases:   []string{"ini", "cfg"},
+			Filenames: []string{"*.ini", "*.cfg"},
+		},
+		map[string][]Rule{
+			"root": []Rule{
+				{`\s+`, Whitespace, nil},
+				{`;.*?$`, Comment, nil},
+				{`\[.*?\]$`, Keyword, nil},
+				{`(.*?)(\s*)(=)(\s*)(.*?)$`, ByGroups(Name, Whitespace, Operator, Whitespace, String), nil},
+			},
+		},
+	)
+	require.NoError(t, err)
+	actual, err := lexer.Tokenise(`
+	; this is a comment
+	[section]
+	a = 10
+`)
+	require.NoError(t, err)
+	expected := []Token{
+		Token{Whitespace, "\n\t"},
+		Token{Comment, "; this is a comment"},
+		Token{Whitespace, "\n\t"},
+		Token{Keyword, "[section]"},
+		Token{Whitespace, "\n\t"},
+		Token{Name, "a"},
+		Token{Whitespace, " "},
+		Token{Operator, "="},
+		Token{Whitespace, " "},
+		Token{LiteralString, "10"},
+		Token{Whitespace, "\n"},
+	}
+	require.Equal(t, expected, actual)
+}
--- a/lexers/default.go
+++ b/lexers/default.go
@ -0,0 +1,17 @@
+package lexers
+
+import (
+	. "github.com/alecthomas/chroma" // nolint
+)
+
+// Default lexer if no other is found.
+var Default = Register(NewLexer(&Config{
+	Name:      "default",
+	Filenames: []string{"*"},
+	Priority:  99,
+}, Rules{
+	"root": []Rule{
+		{`.+`, Text, nil},
+		{`\n`, Text, nil},
+	},
+}))
--- a/lexers/go.go
+++ b/lexers/go.go
@ -0,0 +1,67 @@
+package lexers
+
+import (
+	. "github.com/alecthomas/chroma" // nolint: golint
+)
+
+// Go lexer.
+var Go = Register(NewLexer(
+	&Config{
+		Name:      "Go",
+		Filenames: []string{"*.go"},
+		Aliases:   []string{"go", "golang"},
+		MimeTypes: []string{"text/x-gosrc"},
+	},
+	Rules{
+		`root`: []Rule{
+			{`\n`, Text, nil},
+			{`\s+`, Text, nil},
+			{`\\\n`, Text, nil}, // line continuations
+			{`//(.*?)\n`, CommentSingle, nil},
+			{`/(\\\n)?[*](.|\n)*?[*](\\\n)?/`, CommentMultiline, nil},
+			{`(import|package)\b`, KeywordNamespace, nil},
+			{`(var|func|struct|map|chan|type|interface|const)\b`,
+				KeywordDeclaration, nil},
+			{Words(`break`, `default`, `select`, `case`, `defer`, `go`,
+				`else`, `goto`, `switch`, `fallthrough`, `if`, `range`,
+				`continue`, `for`, `return`), Keyword, nil},
+			{`(true|false|iota|nil)\b`, KeywordConstant, nil},
+			{Words(`uint`, `uint8`, `uint16`, `uint32`, `uint64`,
+				`int`, `int8`, `int16`, `int32`, `int64`,
+				`float`, `float32`, `float64`,
+				`complex64`, `complex128`, `byte`, `rune`,
+				`string`, `bool`, `erro`, `uintpt`,
+				`print`, `println`, `panic`, `recove`, `close`, `complex`,
+				`real`, `imag`, `len`, `cap`, `append`, `copy`, `delete`,
+				`new`, `make`),
+				KeywordType, nil},
+			// imaginary_lit
+			{`\d+i`, LiteralNumber, nil},
+			{`\d+\.\d*([Ee][-+]\d+)?i`, LiteralNumber, nil},
+			{`\.\d+([Ee][-+]\d+)?i`, LiteralNumber, nil},
+			{`\d+[Ee][-+]\d+i`, LiteralNumber, nil},
+			// float_lit
+			{`\d+(\.\d+[eE][+\-]?\d+|\.\d*|[eE][+\-]?\d+)`, LiteralNumberFloat, nil},
+			{`\.\d+([eE][+\-]?\d+)?`, LiteralNumberFloat, nil},
+			// int_lit
+			// -- octal_lit
+			{`0[0-7]+`, LiteralNumberOct, nil},
+			// -- hex_lit
+			{`0[xX][0-9a-fA-F]+`, LiteralNumberHex, nil},
+			// -- decimal_lit
+			{`(0|[1-9][0-9]*)`, LiteralNumberInteger, nil},
+			// char_lit
+			{`'(\\['"\\abfnrtv]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|[^\\])'`, LiteralStringChar, nil},
+			// StringLiteral
+			// -- raw_string_lit
+			{"`[^`]*`", String, nil},
+			// -- interpreted_string_lit
+			{`"(\\\\|\\"|[^"])*"`, String, nil},
+			// Tokens
+			{`(<<=|>>=|<<|>>|<=|>=|&\^=|&\^|\+=|-=|\*=|/=|%=|&=|\|=|&&|\|\||<-|\+\+|--|==|!=|:=|\.\.\.|[+\-*/%&])`, Operator, nil},
+			{`[|^<>=!()\[\]{}.,;:]`, Punctuation, nil},
+			// identifier
+			{`[^\W\d]\w*`, NameOther, nil},
+		},
+	},
+))
--- a/lexers/ini.go
+++ b/lexers/ini.go
@ -0,0 +1,24 @@
+package lexers
+
+import (
+	. "github.com/alecthomas/chroma" // nolint
+)
+
+var INI = Register(NewLexer(
+	&Config{
+		Name:      "INI",
+		Aliases:   []string{"ini", "cfg", "dosini"},
+		Filenames: []string{"*.ini", "*.cfg", "*.inf"},
+		MimeTypes: []string{"text/x-ini", "text/inf"},
+	},
+	map[string][]Rule{
+		"root": []Rule{
+			{`\s+`, Whitespace, nil},
+			{`;.*?$`, Comment, nil},
+			{`\[.*?\]$`, Keyword, nil},
+			{`(.*?)(\s*)(=)(\s*)(.*?)$`, ByGroups(Name, Whitespace, Operator, Whitespace, String), nil},
+			// standalone option, supported by some INI parsers
+			{`(.+?)$`, NameAttribute, nil},
+		},
+	},
+))
--- a/lexers/registry.go
+++ b/lexers/registry.go
@ -0,0 +1,71 @@
+package lexers
+
+import (
+	"sort"
+
+	"github.com/danwakefield/fnmatch"
+
+	"github.com/alecthomas/chroma"
+)
+
+type prioritisedLexers []chroma.Lexer
+
+func (p prioritisedLexers) Len() int           { return len(p) }
+func (p prioritisedLexers) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
+func (p prioritisedLexers) Less(i, j int) bool { return p[i].Config().Priority < p[j].Config().Priority }
+
+// Registry is the global Lexer registry.
+var Registry = registry{byName: map[string]chroma.Lexer{}}
+
+type registry struct {
+	Lexers []chroma.Lexer
+	byName map[string]chroma.Lexer
+}
+
+// Names of all lexers, optionally including aliases.
+func (r *registry) Names(withAliases bool) []string {
+	out := []string{}
+	for _, lexer := range r.Lexers {
+		config := lexer.Config()
+		out = append(out, config.Name)
+		if withAliases {
+			out = append(out, config.Aliases...)
+		}
+	}
+	return out
+}
+
+// Get a Lexer by name.
+func (r *registry) Get(name string) chroma.Lexer {
+	return r.byName[name]
+}
+
+// Match returns all lexers matching filename.
+func (r *registry) Match(filename string) []chroma.Lexer {
+	lexers := prioritisedLexers{}
+	for _, lexer := range r.Lexers {
+		config := lexer.Config()
+		for _, glob := range config.Filenames {
+			if fnmatch.Match(glob, filename, 0) {
+				lexers = append(lexers, lexer)
+				break
+			}
+		}
+	}
+	sort.Sort(lexers)
+	return lexers
+}
+
+// Register a Lexer with the global registry.
+func Register(lexer chroma.Lexer, err error) chroma.Lexer {
+	if err != nil {
+		panic(err)
+	}
+	config := lexer.Config()
+	Registry.byName[config.Name] = lexer
+	for _, alias := range config.Aliases {
+		Registry.byName[alias] = lexer
+	}
+	Registry.Lexers = append(Registry.Lexers, lexer)
+	return lexer
+}
--- a/modifiers.go
+++ b/modifiers.go
@ -0,0 +1,79 @@
+package chroma
+
+import "fmt"
+
+// A Modifier modifies the behaviour of the lexer.
+type Modifier interface {
+	// Preprocess the lexer rules.
+	//
+	// "self" and "rule" are the rule name and index this Modifier is associated with.
+	Preprocess(rules map[string][]CompiledRule, self string, rule int) error
+	// Mutate the lexer state machine as it is processing.
+	Mutate(state *LexerState) error
+}
+
+// A MutatorFunc is a Modifier that mutates the lexer state machine as it is processing.
+type MutatorFunc func(state *LexerState) error
+
+func (m MutatorFunc) Preprocess(rules map[string][]CompiledRule, self string, rule int) error {
+	return nil
+}
+
+func (m MutatorFunc) Mutate(state *LexerState) error {
+	return m(state)
+}
+
+// A PreprocessorFunc is a Modifier that pre-processes the lexer rules.
+type PreprocessorFunc func(rules map[string][]CompiledRule, self string, rule int) error
+
+func (p PreprocessorFunc) Preprocess(rules map[string][]CompiledRule, self string, rule int) error {
+	return p(rules, self, rule)
+}
+
+func (p PreprocessorFunc) Mutate(state *LexerState) error {
+	return nil
+}
+
+// Modifiers applies a set of Modifiers in order.
+func Modifiers(modifiers ...Modifier) MutatorFunc {
+	return func(state *LexerState) error {
+		for _, modifier := range modifiers {
+			if err := modifier.Mutate(state); err != nil {
+				return err
+			}
+		}
+		return nil
+	}
+}
+
+// Include the given state.
+func Include(state string) Rule {
+	return Rule{
+		Modifier: PreprocessorFunc(func(rules map[string][]CompiledRule, self string, rule int) error {
+			includedRules, ok := rules[state]
+			if !ok {
+				return fmt.Errorf("invalid include state %q", state)
+			}
+			stateRules := rules[self]
+			stateRules = append(stateRules[:rule], append(includedRules, stateRules[rule+1:]...)...)
+			rules[self] = stateRules
+			return nil
+		}),
+	}
+}
+
+// Push states onto the stack.
+func Push(states ...string) MutatorFunc {
+	return func(s *LexerState) error {
+		s.Stack = append(s.Stack, states...)
+		return nil
+	}
+}
+
+// Pop state from the stack when rule matches.
+func Pop(n int) MutatorFunc {
+	return func(state *LexerState) error {
+		state.Stack = state.Stack[:len(state.Stack)-n]
+		return nil
+	}
+}
--- a/modifiers_test.go
+++ b/modifiers_test.go
@ -0,0 +1,6 @@
+package chroma
+
+import "testing"
+
+func TestPop(t *testing.T) {
+}
--- a/tokentype_string.go
+++ b/tokentype_string.go
@ -0,0 +1,94 @@
+// Code generated by "stringer -type TokenType"; DO NOT EDIT
+
+package chroma
+
+import "fmt"
+
+const _TokenType_name = "EscapeErrorOtherKeywordKeywordConstantKeywordDeclarationKeywordNamespaceKeywordPseudoKeywordReservedKeywordTypeNameNameAttributeNameBuiltinNameBuiltinPseudoNameClassNameConstantNameDecoratorNameEntityNameExceptionNameFunctionNameFunctionMagicNamePropertyNameLabelNameNamespaceNameOtherNameTagNameVariableNameVariableClassNameVariableGlobalNameVariableInstanceNameVariableMagicLiteralLiteralDateLiteralStringLiteralStringAffixLiteralStringBacktickLiteralStringCharLiteralStringDelimiterLiteralStringDocLiteralStringDoubleLiteralStringEscapeLiteralStringHeredocLiteralStringInterpolLiteralStringOtherLiteralStringRegexLiteralStringSingleLiteralStringSymbolLiteralNumberLiteralNumberBinLiteralNumberFloatLiteralNumberHexLiteralNumberIntegerLiteralNumberIntegerLongLiteralNumberOctOperatorOperatorWordPunctuationCommentCommentHashbangCommentMultilineCommentPreprocCommentPreprocFileCommentSingleCommentSpecialGenericGenericDeletedGenericEmphGenericErrorGenericHeadingGenericInsertedGenericOutputGenericPromptGenericStrongGenericSubheadingGenericTracebackTextTextWhitespace"
+
+var _TokenType_map = map[TokenType]string{
+	0:    _TokenType_name[0:6],
+	1:    _TokenType_name[6:11],
+	2:    _TokenType_name[11:16],
+	1000: _TokenType_name[16:23],
+	1001: _TokenType_name[23:38],
+	1002: _TokenType_name[38:56],
+	1003: _TokenType_name[56:72],
+	1004: _TokenType_name[72:85],
+	1005: _TokenType_name[85:100],
+	1006: _TokenType_name[100:111],
+	2000: _TokenType_name[111:115],
+	2001: _TokenType_name[115:128],
+	2002: _TokenType_name[128:139],
+	2003: _TokenType_name[139:156],
+	2004: _TokenType_name[156:165],
+	2005: _TokenType_name[165:177],
+	2006: _TokenType_name[177:190],
+	2007: _TokenType_name[190:200],
+	2008: _TokenType_name[200:213],
+	2009: _TokenType_name[213:225],
+	2010: _TokenType_name[225:242],
+	2011: _TokenType_name[242:254],
+	2012: _TokenType_name[254:263],
+	2013: _TokenType_name[263:276],
+	2014: _TokenType_name[276:285],
+	2015: _TokenType_name[285:292],
+	2016: _TokenType_name[292:304],
+	2017: _TokenType_name[304:321],
+	2018: _TokenType_name[321:339],
+	2019: _TokenType_name[339:359],
+	2020: _TokenType_name[359:376],
+	3000: _TokenType_name[376:383],
+	3001: _TokenType_name[383:394],
+	3100: _TokenType_name[394:407],
+	3101: _TokenType_name[407:425],
+	3102: _TokenType_name[425:446],
+	3103: _TokenType_name[446:463],
+	3104: _TokenType_name[463:485],
+	3105: _TokenType_name[485:501],
+	3106: _TokenType_name[501:520],
+	3107: _TokenType_name[520:539],
+	3108: _TokenType_name[539:559],
+	3109: _TokenType_name[559:580],
+	3110: _TokenType_name[580:598],
+	3111: _TokenType_name[598:616],
+	3112: _TokenType_name[616:635],
+	3113: _TokenType_name[635:654],
+	3200: _TokenType_name[654:667],
+	3201: _TokenType_name[667:683],
+	3202: _TokenType_name[683:701],
+	3203: _TokenType_name[701:717],
+	3204: _TokenType_name[717:737],
+	3205: _TokenType_name[737:761],
+	3206: _TokenType_name[761:777],
+	4000: _TokenType_name[777:785],
+	4001: _TokenType_name[785:797],
+	5000: _TokenType_name[797:808],
+	6000: _TokenType_name[808:815],
+	6001: _TokenType_name[815:830],
+	6002: _TokenType_name[830:846],
+	6003: _TokenType_name[846:860],
+	6004: _TokenType_name[860:878],
+	6005: _TokenType_name[878:891],
+	6006: _TokenType_name[891:905],
+	7000: _TokenType_name[905:912],
+	7001: _TokenType_name[912:926],
+	7002: _TokenType_name[926:937],
+	7003: _TokenType_name[937:949],
+	7004: _TokenType_name[949:963],
+	7005: _TokenType_name[963:978],
+	7006: _TokenType_name[978:991],
+	7007: _TokenType_name[991:1004],
+	7008: _TokenType_name[1004:1017],
+	7009: _TokenType_name[1017:1034],
+	7010: _TokenType_name[1034:1050],
+	8000: _TokenType_name[1050:1054],
+	8001: _TokenType_name[1054:1068],
+}
+
+func (i TokenType) String() string {
+	if str, ok := _TokenType_map[i]; ok {
+		return str
+	}
+	return fmt.Sprintf("TokenType(%d)", i)
+}
--- a/types.go
+++ b/types.go
@ -0,0 +1,181 @@
+package chroma
+
+// TokenType is the type of token to highlight.
+//
+// It is also an Emitter, emitting a single token of itself
+type TokenType int
+
+// Set of TokenTypes.
+//
+// Categories of types are grouped in ranges of 1000, while sub-categories are in ranges of 100. For
+// example, the literal category is in the range 3000-3999. The sub-category for literal strings is
+// in the range 3100-3199.
+const (
+	Escape TokenType = iota
+	Error
+	Other
+)
+
+// Keywords.
+const (
+	Keyword TokenType = 1000 + iota
+	KeywordConstant
+	KeywordDeclaration
+	KeywordNamespace
+	KeywordPseudo
+	KeywordReserved
+	KeywordType
+)
+
+// Names.
+const (
+	Name TokenType = 2000 + iota
+	NameAttribute
+	NameBuiltin
+	NameBuiltinPseudo
+	NameClass
+	NameConstant
+	NameDecorator
+	NameEntity
+	NameException
+	NameFunction
+	NameFunctionMagic
+	NameProperty
+	NameLabel
+	NameNamespace
+	NameOther
+	NameTag
+	NameVariable
+	NameVariableClass
+	NameVariableGlobal
+	NameVariableInstance
+	NameVariableMagic
+)
+
+// Literals.
+const (
+	Literal TokenType = 3000 + iota
+	LiteralDate
+)
+
+// Strings.
+const (
+	LiteralString TokenType = 3100 + iota
+	LiteralStringAffix
+	LiteralStringBacktick
+	LiteralStringChar
+	LiteralStringDelimiter
+	LiteralStringDoc
+	LiteralStringDouble
+	LiteralStringEscape
+	LiteralStringHeredoc
+	LiteralStringInterpol
+	LiteralStringOther
+	LiteralStringRegex
+	LiteralStringSingle
+	LiteralStringSymbol
+)
+
+// Literals.
+const (
+	LiteralNumber TokenType = 3200 + iota
+	LiteralNumberBin
+	LiteralNumberFloat
+	LiteralNumberHex
+	LiteralNumberInteger
+	LiteralNumberIntegerLong
+	LiteralNumberOct
+)
+
+// Operators.
+const (
+	Operator TokenType = 4000 + iota
+	OperatorWord
+)
+
+// Punctuation.
+const (
+	Punctuation TokenType = 5000 + iota
+)
+
+// Comments.
+const (
+	Comment TokenType = 6000 + iota
+	CommentHashbang
+	CommentMultiline
+	CommentPreproc
+	CommentPreprocFile
+	CommentSingle
+	CommentSpecial
+)
+
+// Generic tokens.
+const (
+	Generic TokenType = 7000 + iota
+	GenericDeleted
+	GenericEmph
+	GenericError
+	GenericHeading
+	GenericInserted
+	GenericOutput
+	GenericPrompt
+	GenericStrong
+	GenericSubheading
+	GenericTraceback
+)
+
+// Text.
+const (
+	Text TokenType = 8000 + iota
+	TextWhitespace
+)
+
+// Aliases.
+const (
+	Whitespace = TextWhitespace
+
+	Date = LiteralDate
+
+	String          = LiteralString
+	StringAffix     = LiteralStringAffix
+	StringBacktick  = LiteralStringBacktick
+	StringChar      = LiteralStringChar
+	StringDelimiter = LiteralStringDelimiter
+	StringDoc       = LiteralStringDoc
+	StringDouble    = LiteralStringDouble
+	StringEscape    = LiteralStringEscape
+	StringHeredoc   = LiteralStringHeredoc
+	StringInterpol  = LiteralStringInterpol
+	StringOther     = LiteralStringOther
+	StringRegex     = LiteralStringRegex
+	StringSingle    = LiteralStringSingle
+	StringSymbol    = LiteralStringSymbol
+
+	Number            = LiteralNumber
+	NumberBin         = LiteralNumberBin
+	NumberFloat       = LiteralNumberFloat
+	NumberHex         = LiteralNumberHex
+	NumberInteger     = LiteralNumberInteger
+	NumberIntegerLong = LiteralNumberIntegerLong
+	NumberOct         = LiteralNumberOct
+)
+
+func (t TokenType) Category() TokenType {
+	return t / 1000 * 1000
+}
+
+func (t TokenType) SubCategory() TokenType {
+	return t / 100 * 100
+}
+
+func (t TokenType) InCategory(other TokenType) bool {
+	return t/1000 == other/1000
+}
+
+func (t TokenType) InSubCategory(other TokenType) bool {
+	return t/100 == other/100
+}
+
+func (t TokenType) Emit(groups []string) []Token {
+	return []Token{Token{Type: t, Value: groups[0]}}
+}