Add a bunch of automatically translated lexers.

2025-01-12 01:22:30 +02:00 · 2017-06-04 22:18:35 +10:00 · 2017-06-04 22:18:35 +10:00 · 5dedc6e45b
commit 5dedc6e45b
parent b30de35ff1
20 changed files with 749 additions and 214 deletions
--- a/cmd/chroma/main.go
+++ b/cmd/chroma/main.go
@ -16,9 +16,10 @@ import (
 )

 var (
-	profileFlag = kingpin.Flag("profile", "Enable profiling to file.").String()
+	profileFlag = kingpin.Flag("profile", "Enable profiling to file.").PlaceHolder("FILE").String()
 	tokensFlag  = kingpin.Flag("tokens", "Dump raw tokens.").Bool()
-	filesArgs   = kingpin.Arg("files", "Files to highlight.").Required().ExistingFiles()
+	lexerFlag   = kingpin.Flag("lexer", "Lexer to use when formatting (default is to autodetect).").Short('l').String()
+	filesArgs   = kingpin.Arg("files", "Files to highlight.").ExistingFiles()
 )

 func main() {
@ -32,14 +33,22 @@ func main() {
 	w := bufio.NewWriterSize(os.Stdout, 16384)
 	defer w.Flush()
 	writer := getWriter(w)
-	for _, filename := range *filesArgs {
-		lexers := lexers.Registry.Match(filename)
-		lexer := lexers[0]
-		lexer = chroma.Coalesce(lexer)
-		contents, err := ioutil.ReadFile(filename)
+	if len(*filesArgs) == 0 {
+		lexer := lexers.Registry.Get(*lexerFlag)
+		contents, err := ioutil.ReadAll(os.Stdin)
 		kingpin.FatalIfError(err, "")
-		err = lexer.Tokenise(string(contents), writer)
+		err = lexer.Tokenise(nil, string(contents), writer)
 		kingpin.FatalIfError(err, "")
+	} else {
+		for _, filename := range *filesArgs {
+			lexers := lexers.Registry.Match(filename)
+			lexer := lexers[0]
+			lexer = chroma.Coalesce(lexer)
+			contents, err := ioutil.ReadFile(filename)
+			kingpin.FatalIfError(err, "")
+			err = lexer.Tokenise(nil, string(contents), writer)
+			kingpin.FatalIfError(err, "")
+		}
 	}
 }

--- a/coalesce.go
+++ b/coalesce.go
@ -9,14 +9,14 @@ type coalescer struct {
 	Lexer
 }

-func (d *coalescer) Tokenise(text string, out func(Token)) error {
+func (d *coalescer) Tokenise(options *TokeniseOptions, text string, out func(Token)) error {
 	var last *Token
 	defer func() {
 		if last != nil {
 			out(*last)
 		}
 	}()
-	return d.Lexer.Tokenise(text, func(token Token) {
+	return d.Lexer.Tokenise(options, text, func(token Token) {
 		if last == nil {
 			last = &token
 		} else {
--- a/coalesce_test.go
+++ b/coalesce_test.go
@ -7,13 +7,12 @@ import (
 )

 func TestCoalesce(t *testing.T) {
-	lexer, err := Coalesce(MustNewLexer(nil, Rules{
+	lexer := Coalesce(MustNewLexer(nil, Rules{
 		"root": []Rule{
 			Rule{`[[:punct:]]`, Punctuation, nil},
 		},
 	}))
-	require.NoError(t, err)
-	actual, err := lexer.Tokenise("!@#$%")
+	actual, err := Tokenise(lexer, nil, "!@#$%")
 	require.NoError(t, err)
 	expected := []Token{
 		Token{Punctuation, "!@#$%"},
--- a/formatters/console.go
+++ b/formatters/console.go
@ -10,16 +10,23 @@ import (
 var DefaultConsoleTheme = map[TokenType]string{
 	Number:            "\033[1m\033[33m",
 	Comment:           "\033[36m",
+	CommentPreproc:    "\033[1m\033[32m",
 	String:            "\033[1m\033[36m",
 	Keyword:           "\033[1m\033[37m",
 	GenericHeading:    "\033[1m",
 	GenericSubheading: "\033[1m",
+	GenericStrong:     "\033[1m",
+	GenericUnderline:  "\033[4m",
+	GenericDeleted:    "\033[9m",
 }

 // Console formatter.
 //
-// 		formatter := Console(DefaultConsoleTheme)
+// 		formatter := Console(nil)
 func Console(theme map[TokenType]string) Formatter {
+	if theme == nil {
+		theme = DefaultConsoleTheme
+	}
 	return &consoleFormatter{theme}
 }

@ -35,11 +42,12 @@ func (c *consoleFormatter) Format(w io.Writer) (func(Token), error) {
 			if !ok {
 				clr, ok = c.theme[token.Type.Category()]
 				if !ok {
-					clr = "\033[0m"
+					clr = ""
 				}
 			}
 		}
 		fmt.Fprint(w, clr)
 		fmt.Fprint(w, token.Value)
+		fmt.Fprintf(w, "\033[0m")
 	}, nil
 }
--- a/lexer.go
+++ b/lexer.go
@ -6,6 +6,12 @@ import (
 	"strings"
 )

+var (
+	defaultOptions = &TokeniseOptions{
+		State: "root",
+	}
+)
+
 // Config for a lexer.
 type Config struct {
 	// Name of the lexer.
@ -26,23 +32,21 @@ type Config struct {
 	// Priority, should multiple lexers match and no content is provided
 	Priority int

+	// Regex matching is case-insensitive.
+	CaseInsensitive bool
+
 	// Don't strip leading and trailing newlines from the input.
-	DontStripNL bool
+	// DontStripNL bool

 	// Strip all leading and trailing whitespace from the input
-	StripAll bool
+	// StripAll bool

 	// Make sure that the input does not end with a newline. This
 	// is required for some lexers that consume input linewise.
-	DontEnsureNL bool
+	// DontEnsureNL bool

 	// If given and greater than 0, expand tabs in the input.
-	TabSize int
-
-	// If given, must be an encoding name. This encoding will be used to
-	// convert the input string to Unicode, if it is not already a Unicode
-	// string.
-	Encoding string
+	// TabSize int
 }

 type Token struct {
@ -53,9 +57,14 @@ type Token struct {
 func (t Token) String() string   { return fmt.Sprintf("Token{%s, %q}", t.Type, t.Value) }
 func (t Token) GoString() string { return t.String() }

+type TokeniseOptions struct {
+	// State to start tokenisation in. Defaults to "root".
+	State string
+}
+
 type Lexer interface {
 	Config() *Config
-	Tokenise(text string, out func(Token)) error
+	Tokenise(options *TokeniseOptions, text string, out func(Token)) error
 }

 // Analyser determines if this lexer is appropriate for the given text.
@ -64,39 +73,46 @@ type Analyser interface {
 }

 type Rule struct {
-	Pattern  string
-	Type     Emitter
-	Modifier Modifier
+	Pattern string
+	Type    Emitter
+	Mutator Mutator
 }

 // An Emitter takes group matches and returns tokens.
 type Emitter interface {
 	// Emit tokens for the given regex groups.
-	Emit(groups []string, out func(Token))
+	Emit(groups []string, lexer Lexer, out func(Token))
 }

 // EmitterFunc is a function that is an Emitter.
-type EmitterFunc func(groups []string, out func(Token))
+type EmitterFunc func(groups []string, lexer Lexer, out func(Token))

 // Emit tokens for groups.
-func (e EmitterFunc) Emit(groups []string, out func(Token)) { e(groups, out) }
+func (e EmitterFunc) Emit(groups []string, lexer Lexer, out func(Token)) { e(groups, lexer, out) }

 // ByGroups emits a token for each matching group in the rule's regex.
 func ByGroups(emitters ...Emitter) Emitter {
-	return EmitterFunc(func(groups []string, out func(Token)) {
+	return EmitterFunc(func(groups []string, lexer Lexer, out func(Token)) {
 		for i, group := range groups[1:] {
-			emitters[i].Emit([]string{group}, out)
+			emitters[i].Emit([]string{group}, lexer, out)
 		}
 		return
 	})
 }

-// Using uses a given Lexer for parsing and emitting.
-func Using(lexer Lexer) Emitter {
-	return EmitterFunc(func(groups []string, out func(Token)) {
-		if err := lexer.Tokenise(groups[0], out); err != nil {
-			// TODO: Emitters should return an error, though it's not clear what one would do with
-			// it.
+// Using returns an Emitter that uses a given Lexer for parsing and emitting.
+func Using(lexer Lexer, options *TokeniseOptions) Emitter {
+	return EmitterFunc(func(groups []string, _ Lexer, out func(Token)) {
+		if err := lexer.Tokenise(options, groups[0], out); err != nil {
+			panic(err)
+		}
+	})
+}
+
+// UsingSelf is like Using, but uses the current Lexer.
+func UsingSelf(state string) Emitter {
+	return EmitterFunc(func(groups []string, lexer Lexer, out func(Token)) {
+		if err := lexer.Tokenise(&TokeniseOptions{State: state}, groups[0], out); err != nil {
 			panic(err)
 		}
 	})
@ -107,9 +123,10 @@ func Words(words ...string) string {
 	for i, word := range words {
 		words[i] = regexp.QuoteMeta(word)
 	}
-	return "\\b(?:" + strings.Join(words, "|") + ")\\b"
+	return `\b(?:` + strings.Join(words, `|`) + `)\b`
 }

+// Rules maps from state to a sequence of Rules.
 type Rules map[string][]Rule

 // MustNewLexer creates a new Lexer or panics.
@ -133,7 +150,11 @@ func NewLexer(config *Config, rules Rules) (Lexer, error) {
 	for state, rules := range rules {
 		for _, rule := range rules {
 			crule := CompiledRule{Rule: rule}
-			re, err := regexp.Compile("^(?m)" + rule.Pattern)
+			flags := "m"
+			if config.CaseInsensitive {
+				flags += "i"
+			}
+			re, err := regexp.Compile("^(?" + flags + ")(?:" + rule.Pattern + ")")
 			if err != nil {
 				return nil, fmt.Errorf("invalid regex %q for state %q: %s", rule.Pattern, state, err)
 			}
@ -141,17 +162,6 @@ func NewLexer(config *Config, rules Rules) (Lexer, error) {
 			compiledRules[state] = append(compiledRules[state], crule)
 		}
 	}
-	// Apply any pre-processor modifiers.
-	for state, rules := range compiledRules {
-		for index, rule := range rules {
-			if rule.Modifier != nil {
-				err := rule.Modifier.Preprocess(compiledRules, state, index)
-				if err != nil {
-					return nil, err
-				}
-			}
-		}
-	}
 	return &regexLexer{
 		config: config,
 		rules:  compiledRules,
@ -164,6 +174,17 @@ type CompiledRule struct {
 	Regexp *regexp.Regexp
 }

+type CompiledRules map[string][]CompiledRule
+
+type LexerState struct {
+	Text  string
+	Pos   int
+	Rules map[string][]CompiledRule
+	Stack []string
+	State string
+	Rule  int
+}
+
 type regexLexer struct {
 	config *Config
 	rules  map[string][]CompiledRule
@ -173,51 +194,60 @@ func (r *regexLexer) Config() *Config {
 	return r.config
 }

-type LexerState struct {
-	Text  string
-	Pos   int
-	Stack []string
-	Rules map[string][]CompiledRule
-	State string
-}
-
-func (r *regexLexer) Tokenise(text string, out func(Token)) error {
+func (r *regexLexer) Tokenise(options *TokeniseOptions, text string, out func(Token)) error {
+	if options == nil {
+		options = defaultOptions
+	}
 	state := &LexerState{
 		Text:  text,
-		Stack: []string{"root"},
+		Stack: []string{options.State},
 		Rules: r.rules,
 	}
 	for state.Pos < len(text) && len(state.Stack) > 0 {
 		state.State = state.Stack[len(state.Stack)-1]
-		rule, index := matchRules(state.Text[state.Pos:], state.Rules[state.State])
+		ruleIndex, rule, index := matchRules(state.Text[state.Pos:], state.Rules[state.State])
+		// fmt.Println(text[state.Pos:state.Pos+1], rule, state.Text[state.Pos:state.Pos+1])
 		// No match.
 		if index == nil {
 			out(Token{Error, state.Text[state.Pos : state.Pos+1]})
 			state.Pos++
 			continue
 		}
+		state.Rule = ruleIndex

 		groups := make([]string, len(index)/2)
 		for i := 0; i < len(index); i += 2 {
-			groups[i/2] = text[state.Pos+index[i] : state.Pos+index[i+1]]
+			start := state.Pos + index[i]
+			end := state.Pos + index[i+1]
+			if start == -1 || end == -1 {
+				continue
+			}
+			groups[i/2] = text[start:end]
 		}
 		state.Pos += index[1]
-		if rule.Modifier != nil {
-			if err := rule.Modifier.Mutate(state); err != nil {
+		if rule.Type != nil {
+			rule.Type.Emit(groups, r, out)
+		}
+		if rule.Mutator != nil {
+			if err := rule.Mutator.Mutate(state); err != nil {
 				return err
 			}
-		} else {
-			rule.Type.Emit(groups, out)
 		}
 	}
 	return nil
 }

-func matchRules(text string, rules []CompiledRule) (CompiledRule, []int) {
-	for _, rule := range rules {
+// Tokenise text using lexer, returning tokens as a slice.
+func Tokenise(lexer Lexer, options *TokeniseOptions, text string) ([]Token, error) {
+	out := []Token{}
+	return out, lexer.Tokenise(options, text, func(token Token) { out = append(out, token) })
+}
+
+func matchRules(text string, rules []CompiledRule) (int, CompiledRule, []int) {
+	for i, rule := range rules {
 		if index := rule.Regexp.FindStringSubmatchIndex(text); index != nil {
-			return rule, index
+			return i, rule, index
 		}
 	}
-	return CompiledRule{}, nil
+	return 0, CompiledRule{}, nil
 }
--- a/lexer_test.go
+++ b/lexer_test.go
@ -20,7 +20,7 @@ func TestSimpleLexer(t *testing.T) {
 			Filenames: []string{"*.ini", "*.cfg"},
 		},
 		map[string][]Rule{
-			"root": []Rule{
+			"root": {
 				{`\s+`, Whitespace, nil},
 				{`;.*?$`, Comment, nil},
 				{`\[.*?\]$`, Keyword, nil},
@ -29,24 +29,24 @@ func TestSimpleLexer(t *testing.T) {
 		},
 	)
 	require.NoError(t, err)
-	actual, err := lexer.Tokenise(`
+	actual, err := Tokenise(lexer, nil, `
 	; this is a comment
 	[section]
 	a = 10
 `)
 	require.NoError(t, err)
 	expected := []Token{
-		Token{Whitespace, "\n\t"},
-		Token{Comment, "; this is a comment"},
-		Token{Whitespace, "\n\t"},
-		Token{Keyword, "[section]"},
-		Token{Whitespace, "\n\t"},
-		Token{Name, "a"},
-		Token{Whitespace, " "},
-		Token{Operator, "="},
-		Token{Whitespace, " "},
-		Token{LiteralString, "10"},
-		Token{Whitespace, "\n"},
+		{Whitespace, "\n\t"},
+		{Comment, "; this is a comment"},
+		{Whitespace, "\n\t"},
+		{Keyword, "[section]"},
+		{Whitespace, "\n\t"},
+		{Name, "a"},
+		{Whitespace, " "},
+		{Operator, "="},
+		{Whitespace, " "},
+		{LiteralString, "10"},
+		{Whitespace, "\n"},
 	}
 	require.Equal(t, expected, actual)
 }
--- a/lexers/bash.go
+++ b/lexers/bash.go
@ -0,0 +1,85 @@
+package lexers
+
+import (
+    . "github.com/alecthomas/chroma" // nolint
+)
+
+// Bash lexer.
+var Bash = Register(NewLexer(
+    &Config{
+        Name:      "Bash",
+        Aliases:   []string{"bash", "sh", "ksh", "zsh", "shell"},
+        Filenames: []string{"*.sh", "*.ksh", "*.bash", "*.ebuild", "*.eclass", "*.exheres-0", "*.exlib", "*.zsh", ".bashrc", "bashrc", ".bash_*", "bash_*", "zshrc", ".zshrc", "PKGBUILD"},
+        MimeTypes: []string{"application/x-sh", "application/x-shellscript"},
+    },
+    Rules{
+        "root": {
+            Include("basic"),
+            {"`", LiteralStringBacktick, Push("backticks")},
+            Include("data"),
+            Include("interp"),
+        },
+        "interp": {
+            {`\$\(\(`, Keyword, Push("math")},
+            {`\$\(`, Keyword, Push("paren")},
+            {`\$\{#?`, LiteralStringInterpol, Push("curly")},
+            {`\$[a-zA-Z_]\w*`, NameVariable, nil},
+            {`\$(?:\d+|[#$?!_*@-])`, NameVariable, nil},
+            {`\$`, Text, nil},
+        },
+        "basic": {
+            {`\b(if|fi|else|while|do|done|for|then|return|function|case|select|continue|until|esac|elif)(\s*)\b`, ByGroups(Keyword, Text), nil},
+            {`\b(alias|bg|bind|break|builtin|caller|cd|command|compgen|complete|declare|dirs|disown|echo|enable|eval|exec|exit|export|false|fc|fg|getopts|hash|help|history|jobs|kill|let|local|logout|popd|printf|pushd|pwd|read|readonly|set|shift|shopt|source|suspend|test|time|times|trap|true|type|typeset|ulimit|umask|unalias|unset|wait)(\s*)\b`, NameBuiltin, nil},
+            {`\A#!.+\n`, CommentHashbang, nil},
+            {`#.*\n`, CommentSingle, nil},
+            {`\\[\w\W]`, LiteralStringEscape, nil},
+            {`(\b\w+)(\s*)(\+?=)`, ByGroups(NameVariable, Text, Operator), nil},
+            {`[\[\]{}()=]`, Operator, nil},
+            {`<<<`, Operator, nil},
+            // {`<<-?\s*(\'?)\\?(\w+)[\w\W]+?\2`, LiteralString, nil},
+            {`&&|\|\|`, Operator, nil},
+        },
+        "data": {
+            {`(?s)\$?"(\\\\|\\[0-7]+|\\.|[^"\\$])*"`, LiteralStringDouble, nil},
+            {`"`, LiteralStringDouble, Push("string")},
+            {`(?s)\$'(\\\\|\\[0-7]+|\\.|[^'\\])*'`, LiteralStringSingle, nil},
+            {`(?s)'.*?'`, LiteralStringSingle, nil},
+            {`;`, Punctuation, nil},
+            {`&`, Punctuation, nil},
+            {`\|`, Punctuation, nil},
+            {`\s+`, Text, nil},
+            {`\d+\b`, LiteralNumber, nil},
+            {"[^=\\s\\[\\]{}()$\"\\'`\\\\<&|;]+", Text, nil},
+            {`<`, Text, nil},
+        },
+        "string": {
+            {`"`, LiteralStringDouble, Pop(1)},
+            {`(?s)(\\\\|\\[0-7]+|\\.|[^"\\$])+`, LiteralStringDouble, nil},
+            Include("interp"),
+        },
+        "curly": {
+            {`\}`, LiteralStringInterpol, Pop(1)},
+            {`:-`, Keyword, nil},
+            {`\w+`, NameVariable, nil},
+            {"[^}:\"\\'`$\\\\]+", Punctuation, nil},
+            {`:`, Punctuation, nil},
+            Include("root"),
+        },
+        "paren": {
+            {`\)`, Keyword, Pop(1)},
+            Include("root"),
+        },
+        "math": {
+            {`\)\)`, Keyword, Pop(1)},
+            {`[-+*/%^|&]|\*\*|\|\|`, Operator, nil},
+            {`\d+#\d+`, LiteralNumber, nil},
+            {`\d+#`, LiteralNumber, nil},
+            {`\d+`, LiteralNumber, nil},
+            Include("root"),
+        },
+        "backticks": {
+            {"`", LiteralStringBacktick, Pop(1)},
+            Include("root"),
+        },
+    },
+))
--- a/lexers/c.go
+++ b/lexers/c.go
@ -0,0 +1,90 @@
+package lexers
+
+import (
+    . "github.com/alecthomas/chroma" // nolint
+)
+
+// C lexer.
+var C = Register(NewLexer(
+    &Config{
+        Name:      "C",
+        Aliases:   []string{"c"},
+        Filenames: []string{"*.c", "*.h", "*.idc"},
+        MimeTypes: []string{"text/x-chdr", "text/x-csrc"},
+    },
+    Rules{
+        "whitespace": {
+            {`^#if\s+0`, CommentPreproc, Push("if0")},
+            {`^#`, CommentPreproc, Push("macro")},
+            {`^(\s*(?:/[*].*?[*]/\s*)?)(#if\s+0)`, ByGroups(UsingSelf("root"), CommentPreproc), Push("if0")},
+            {`^(\s*(?:/[*].*?[*]/\s*)?)(#)`, ByGroups(UsingSelf("root"), CommentPreproc), Push("macro")},
+            {`\n`, Text, nil},
+            {`\s+`, Text, nil},
+            {`\\\n`, Text, nil},
+            {`//(\n|[\w\W]*?[^\\]\n)`, CommentSingle, nil},
+            {`/(\\\n)?[*][\w\W]*?[*](\\\n)?/`, CommentMultiline, nil},
+            {`/(\\\n)?[*][\w\W]*`, CommentMultiline, nil},
+        },
+        "statements": {
+            {`(L?)(")`, ByGroups(LiteralStringAffix, LiteralString), Push("string")},
+            {`(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')`, ByGroups(LiteralStringAffix, LiteralStringChar, LiteralStringChar, LiteralStringChar), nil},
+            {`(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*`, LiteralNumberFloat, nil},
+            {`(\d+\.\d*|\.\d+|\d+[fF])[fF]?`, LiteralNumberFloat, nil},
+            {`0x[0-9a-fA-F]+[LlUu]*`, LiteralNumberHex, nil},
+            {`0[0-7]+[LlUu]*`, LiteralNumberOct, nil},
+            {`\d+[LlUu]*`, LiteralNumberInteger, nil},
+            {`\*/`, Error, nil},
+            {`[~!%^&*+=|?:<>/-]`, Operator, nil},
+            {`[()\[\],.]`, Punctuation, nil},
+            {`(?:asm|auto|break|case|const|continue|default|do|else|enum|extern|for|goto|if|register|restricted|return|sizeof|static|struct|switch|typedef|union|volatile|while)\b`, Keyword, nil},
+            {`(bool|int|long|float|short|double|char|unsigned|signed|void)\b`, KeywordType, nil},
+            {`(?:inline|_inline|__inline|naked|restrict|thread|typename)\b`, KeywordReserved, nil},
+            {`(__m(128i|128d|128|64))\b`, KeywordReserved, nil},
+            {`__(?:asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|declspec|finally|int64|try|leave|wchar_t|w64|unaligned|raise|noop|identifier|forceinline|assume)\b`, KeywordReserved, nil},
+            {`(true|false|NULL)\b`, NameBuiltin, nil},
+            {`([a-zA-Z_]\w*)(\s*)(:)`, ByGroups(NameLabel, Text, Punctuation), nil},
+            {`[a-zA-Z_]\w*`, Name, nil},
+        },
+        "root": {
+            Include("whitespace"),
+            {`((?:[\w*\s])+?(?:\s|[*]))([a-zA-Z_]\w*)(\s*\([^;]*?\))([^;{]*)(\{)`, ByGroups(UsingSelf("root"), NameFunction, UsingSelf("root"), UsingSelf("root"), Punctuation), Push("function")},
+            {`((?:[\w*\s])+?(?:\s|[*]))([a-zA-Z_]\w*)(\s*\([^;]*?\))([^;]*)(;)`, ByGroups(UsingSelf("root"), NameFunction, UsingSelf("root"), UsingSelf("root"), Punctuation), nil},
+            Default(Push("statement")),
+        },
+        "statement": {
+            Include("whitespace"),
+            Include("statements"),
+            {`[{}]`, Punctuation, nil},
+            {`;`, Punctuation, Pop(1)},
+        },
+        "function": {
+            Include("whitespace"),
+            Include("statements"),
+            {`;`, Punctuation, nil},
+            {`\{`, Punctuation, Push()},
+            {`\}`, Punctuation, Pop(1)},
+        },
+        "string": {
+            {`"`, LiteralString, Pop(1)},
+            {`\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})`, LiteralStringEscape, nil},
+            {`[^\\"\n]+`, LiteralString, nil},
+            {`\\\n`, LiteralString, nil},
+            {`\\`, LiteralString, nil},
+        },
+        "macro": {
+            {`(include)(\s*(?:/[*].*?[*]/\s*)?)([^\n]+)`, ByGroups(CommentPreproc, Text, CommentPreprocFile), nil},
+            {`[^/\n]+`, CommentPreproc, nil},
+            {`/[*](.|\n)*?[*]/`, CommentMultiline, nil},
+            {`//.*?\n`, CommentSingle, Pop(1)},
+            {`/`, CommentPreproc, nil},
+            // {`(?<=\\)\n`, CommentPreproc, nil},
+            {`\n`, CommentPreproc, Pop(1)},
+        },
+        "if0": {
+            {`^\s*#if.*?\n`, CommentPreproc, Push()},
+            {`^\s*#el(?:se|if).*\n`, CommentPreproc, Pop(1)},
+            {`^\s*#endif.*?\n`, CommentPreproc, Pop(1)},
+            {`.*?\n`, Comment, nil},
+        },
+    },
+))
--- a/lexers/fallback.go
+++ b/lexers/fallback.go
@ -4,9 +4,9 @@ import (
 	. "github.com/alecthomas/chroma" // nolint
 )

-// Default lexer if no other is found.
-var Default = Register(NewLexer(&Config{
-	Name:      "default",
+// Fallback lexer if no other is found.
+var Fallback = Register(NewLexer(&Config{
+	Name:      "fallback",
 	Filenames: []string{"*"},
 	Priority:  99,
 }, Rules{
--- a/lexers/makefile.go
+++ b/lexers/makefile.go
@ -0,0 +1,51 @@
+package lexers
+
+import (
+	. "github.com/alecthomas/chroma" // nolint
+)
+
+// Makefile lexer.
+var Makefile = Register(NewLexer(
+	&Config{
+		Name:      "Makefile",
+		Aliases:   []string{"make", "makefile", "mf", "bsdmake"},
+		Filenames: []string{"*.mak", "*.mk", "Makefile", "makefile", "Makefile.*", "GNUmakefile"},
+		MimeTypes: []string{"text/x-makefile"},
+	},
+	Rules{
+		"root": {
+			{`^(?:[\t ]+.*\n|\n)+`, Using(Bash, nil), nil},
+			{`\$[<@$+%?|*]`, Keyword, nil},
+			{`\s+`, Text, nil},
+			{`#.*?\n`, Comment, nil},
+			{`(export)(\s+)`, ByGroups(Keyword, Text), Push("export")},
+			{`export\s+`, Keyword, nil},
+			{`([\w${}().-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)`, ByGroups(NameVariable, Text, Operator, Text, Using(Bash, nil)), nil},
+			{`(?s)"(\\\\|\\.|[^"\\])*"`, LiteralStringDouble, nil},
+			{`(?s)'(\\\\|\\.|[^'\\])*'`, LiteralStringSingle, nil},
+			{`([^\n:]+)(:+)([ \t]*)`, ByGroups(NameFunction, Operator, Text), Push("block-header")},
+			{`\$\(`, Keyword, Push("expansion")},
+		},
+		"expansion": {
+			{`[^$a-zA-Z_()]+`, Text, nil},
+			{`[a-zA-Z_]+`, NameVariable, nil},
+			{`\$`, Keyword, nil},
+			{`\(`, Keyword, Push()},
+			{`\)`, Keyword, Pop(1)},
+		},
+		"export": {
+			{`[\w${}-]+`, NameVariable, nil},
+			{`\n`, Text, Pop(1)},
+			{`\s+`, Text, nil},
+		},
+		"block-header": {
+			{`[,|]`, Punctuation, nil},
+			{`#.*?\n`, Comment, Pop(1)},
+			{`\\\n`, Text, nil},
+			{`\$\(`, Keyword, Push("expansion")},
+			{`[a-zA-Z_]+`, Name, nil},
+			{`\n`, Text, Pop(1)},
+			{`.`, Text, nil},
+		},
+	},
+))
--- a/lexers/markdown.go
+++ b/lexers/markdown.go
@ -19,13 +19,13 @@ var Markdown = Register(NewLexer(
 			{`^(#{2,6})(.+\n)`, ByGroups(GenericSubheading, Text), nil},
 			// task list
 			{`^(\s*)([*-] )(\[[ xX]\])( .+\n)`,
-				ByGroups(Text, Keyword, Keyword, Text), nil},
+				ByGroups(Text, Keyword, Keyword, UsingSelf("inline")), nil},
 			// bulleted lists
 			{`^(\s*)([*-])(\s)(.+\n)`,
-				ByGroups(Text, Keyword, Text, Text), nil},
+				ByGroups(Text, Keyword, Text, UsingSelf("inline")), nil},
 			// numbered lists
 			{`^(\s*)([0-9]+\.)( .+\n)`,
-				ByGroups(Text, Keyword, Text), nil},
+				ByGroups(Text, Keyword, UsingSelf("inline")), nil},
 			// quote
 			{`^(\s*>\s)(.+\n)`, ByGroups(Keyword, GenericEmph), nil},
 			// text block
@ -39,6 +39,8 @@ var Markdown = Register(NewLexer(
 			{`\\.`, Text, nil},
 			// italics
 			{`(\s)([*_][^*_]+[*_])(\W|\n)`, ByGroups(Text, GenericEmph, Text), nil},
+			// underline
+			{`(\s)(__.*?__)`, ByGroups(Whitespace, GenericUnderline), nil},
 			// bold
 			// warning: the following rule eats internal tags. eg. **foo _bar_ baz** bar is not italics
 			{`(\s)(\*\*.*\*\*)`, ByGroups(Text, GenericStrong), nil},
@ -58,12 +60,12 @@ var Markdown = Register(NewLexer(
 	},
 ))

-func handleCodeblock(groups []string, out func(Token)) {
+func handleCodeblock(groups []string, lexer Lexer, out func(Token)) {
 	out(Token{String, groups[1]})
 	out(Token{String, groups[2]})
 	out(Token{Text, groups[3]})
 	code := groups[4]
-	lexer := Registry.Get(groups[2])
-	lexer.Tokenise(code, out)
+	lexer = Registry.Get(groups[2])
+	lexer.Tokenise(nil, code, out)
 	out(Token{String, groups[5]})
 }
--- a/lexers/postgres.go
+++ b/lexers/postgres.go
@ -0,0 +1,57 @@
+package lexers
+
+import (
+    . "github.com/alecthomas/chroma" // nolint
+)
+
+// Postgresql Sql Dialect lexer.
+var PostgresqlSqlDialect = Register(NewLexer(
+    &Config{
+        Name:            "PostgreSQL SQL dialect",
+        Aliases:         []string{"postgresql", "postgres"},
+        Filenames:       []string{"*.sql"},
+        MimeTypes:       []string{"text/x-postgresql"},
+        CaseInsensitive: true,
+    },
+    Rules{
+        "root": {
+            {`\s+`, Text, nil},
+            {`--.*\n?`, CommentSingle, nil},
+            {`/\*`, CommentMultiline, Push("multiline-comments")},
+            {`(bigint|bigserial|bit|bit\s+varying|bool|boolean|box|bytea|char|character|character\s+varying|cidr|circle|date|decimal|double\s+precision|float4|float8|inet|int|int2|int4|int8|integer|interval|json|jsonb|line|lseg|macaddr|money|numeric|path|pg_lsn|point|polygon|real|serial|serial2|serial4|serial8|smallint|smallserial|text|time|timestamp|timestamptz|timetz|tsquery|tsvector|txid_snapshot|uuid|varbit|varchar|with\s+time\s+zone|without\s+time\s+zone|xml|anyarray|anyelement|anyenum|anynonarray|anyrange|cstring|fdw_handler|internal|language_handler|opaque|record|void)\b`, NameBuiltin, nil},
+            {`(?:ABORT|ABSOLUTE|ACCESS|ACTION|ADD|ADMIN|AFTER|AGGREGATE|ALL|ALSO|ALTER|ALWAYS|ANALYSE|ANALYZE|AND|ANY|ARRAY|AS|ASC|ASSERTION|ASSIGNMENT|ASYMMETRIC|AT|ATTRIBUTE|AUTHORIZATION|BACKWARD|BEFORE|BEGIN|BETWEEN|BIGINT|BINARY|BIT|BOOLEAN|BOTH|BY|CACHE|CALLED|CASCADE|CASCADED|CASE|CAST|CATALOG|CHAIN|CHAR|CHARACTER|CHARACTERISTICS|CHECK|CHECKPOINT|CLASS|CLOSE|CLUSTER|COALESCE|COLLATE|COLLATION|COLUMN|COMMENT|COMMENTS|COMMIT|COMMITTED|CONCURRENTLY|CONFIGURATION|CONNECTION|CONSTRAINT|CONSTRAINTS|CONTENT|CONTINUE|CONVERSION|COPY|COST|CREATE|CROSS|CSV|CURRENT|CURRENT_CATALOG|CURRENT_DATE|CURRENT_ROLE|CURRENT_SCHEMA|CURRENT_TIME|CURRENT_TIMESTAMP|CURRENT_USER|CURSOR|CYCLE|DATA|DATABASE|DAY|DEALLOCATE|DEC|DECIMAL|DECLARE|DEFAULT|DEFAULTS|DEFERRABLE|DEFERRED|DEFINER|DELETE|DELIMITER|DELIMITERS|DESC|DICTIONARY|DISABLE|DISCARD|DISTINCT|DO|DOCUMENT|DOMAIN|DOUBLE|DROP|EACH|ELSE|ENABLE|ENCODING|ENCRYPTED|END|ENUM|ESCAPE|EVENT|EXCEPT|EXCLUDE|EXCLUDING|EXCLUSIVE|EXECUTE|EXISTS|EXPLAIN|EXTENSION|EXTERNAL|EXTRACT|FALSE|FAMILY|FETCH|FILTER|FIRST|FLOAT|FOLLOWING|FOR|FORCE|FOREIGN|FORWARD|FREEZE|FROM|FULL|FUNCTION|FUNCTIONS|GLOBAL|GRANT|GRANTED|GREATEST|GROUP|HANDLER|HAVING|HEADER|HOLD|HOUR|IDENTITY|IF|ILIKE|IMMEDIATE|IMMUTABLE|IMPLICIT|IN|INCLUDING|INCREMENT|INDEX|INDEXES|INHERIT|INHERITS|INITIALLY|INLINE|INNER|INOUT|INPUT|INSENSITIVE|INSERT|INSTEAD|INT|INTEGER|INTERSECT|INTERVAL|INTO|INVOKER|IS|ISNULL|ISOLATION|JOIN|KEY|LABEL|LANGUAGE|LARGE|LAST|LATERAL|LC_COLLATE|LC_CTYPE|LEADING|LEAKPROOF|LEAST|LEFT|LEVEL|LIKE|LIMIT|LISTEN|LOAD|LOCAL|LOCALTIME|LOCALTIMESTAMP|LOCATION|LOCK|MAPPING|MATCH|MATERIALIZED|MAXVALUE|MINUTE|MINVALUE|MODE|MONTH|MOVE|NAME|NAMES|NATIONAL|NATURAL|NCHAR|NEXT|NO|NONE|NOT|NOTHING|NOTIFY|NOTNULL|NOWAIT|NULL|NULLIF|NULLS|NUMERIC|OBJECT|OF|OFF|OFFSET|OIDS|ON|ONLY|OPERATOR|OPTION|OPTIONS|OR|ORDER|ORDINALITY|OUT|OUTER|OVER|OVERLAPS|OVERLAY|OWNED|OWNER|PARSER|PARTIAL|PARTITION|PASSING|PASSWORD|PLACING|PLANS|POLICY|POSITION|PRECEDING|PRECISION|PREPARE|PREPARED|PRESERVE|PRIMARY|PRIOR|PRIVILEGES|PROCEDURAL|PROCEDURE|PROGRAM|QUOTE|RANGE|READ|REAL|REASSIGN|RECHECK|RECURSIVE|REF|REFERENCES|REFRESH|REINDEX|RELATIVE|RELEASE|RENAME|REPEATABLE|REPLACE|REPLICA|RESET|RESTART|RESTRICT|RETURNING|RETURNS|REVOKE|RIGHT|ROLE|ROLLBACK|ROW|ROWS|RULE|SAVEPOINT|SCHEMA|SCROLL|SEARCH|SECOND|SECURITY|SELECT|SEQUENCE|SEQUENCES|SERIALIZABLE|SERVER|SESSION|SESSION_USER|SET|SETOF|SHARE|SHOW|SIMILAR|SIMPLE|SMALLINT|SNAPSHOT|SOME|STABLE|STANDALONE|START|STATEMENT|STATISTICS|STDIN|STDOUT|STORAGE|STRICT|STRIP|SUBSTRING|SYMMETRIC|SYSID|SYSTEM|TABLE|TABLES|TABLESPACE|TEMP|TEMPLATE|TEMPORARY|TEXT|THEN|TIME|TIMESTAMP|TO|TRAILING|TRANSACTION|TREAT|TRIGGER|TRIM|TRUE|TRUNCATE|TRUSTED|TYPE|TYPES|UNBOUNDED|UNCOMMITTED|UNENCRYPTED|UNION|UNIQUE|UNKNOWN|UNLISTEN|UNLOGGED|UNTIL|UPDATE|USER|USING|VACUUM|VALID|VALIDATE|VALIDATOR|VALUE|VALUES|VARCHAR|VARIADIC|VARYING|VERBOSE|VERSION|VIEW|VIEWS|VOLATILE|WHEN|WHERE|WHITESPACE|WINDOW|WITH|WITHIN|WITHOUT|WORK|WRAPPER|WRITE|XML|XMLATTRIBUTES|XMLCONCAT|XMLELEMENT|XMLEXISTS|XMLFOREST|XMLPARSE|XMLPI|XMLROOT|XMLSERIALIZE|YEAR|YES|ZONE)\b`, Keyword, nil},
+            {"[+*/<>=~!@#%^&|?-]+", Operator, nil},
+            {`::`, Operator, nil},
+            {`\$\d+`, NameVariable, nil},
+            {`([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?`, LiteralNumberFloat, nil},
+            {`[0-9]+`, LiteralNumberInteger, nil},
+            {`((?:E|U&)?)(')`, ByGroups(LiteralStringAffix, LiteralStringSingle), Push("string")},
+            {`((?:U&)?)(")`, ByGroups(LiteralStringAffix, LiteralStringName), Push("quoted-ident")},
+            // PL/SQL, etc.
+            // TODO: Make this work.
+            // {`(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)`, EmitterFunc(detectLanguage), nil},
+            {`[a-z_]\w*`, Name, nil},
+            {"`[^`]*`", LiteralStringName, nil},
+            {`:'[a-z]\w*\b'`, NameVariable, nil},
+            {`:"[a-z]\w*\b"`, NameVariable, nil},
+            {`:[a-z]\w*\b`, NameVariable, nil},
+            {`[;:()\[\]{},.]`, Punctuation, nil},
+        },
+        "multiline-comments": {
+            {`/\*`, CommentMultiline, Push("multiline-comments")},
+            {`\*/`, CommentMultiline, Pop(1)},
+            {`[^/*]+`, CommentMultiline, nil},
+            {`[/*]`, CommentMultiline, nil},
+        },
+        "string": {
+            {`[^']+`, LiteralStringSingle, nil},
+            {`''`, LiteralStringSingle, nil},
+            {`'`, LiteralStringSingle, Pop(1)},
+        },
+        "quoted-ident": {
+            {`[^"]+`, LiteralStringName, nil},
+            {`""`, LiteralStringName, nil},
+            {`"`, LiteralStringName, Pop(1)},
+        },
+    },
+))
--- a/lexers/python.go
+++ b/lexers/python.go
@ -0,0 +1,136 @@
+package lexers
+
+import (
+    . "github.com/alecthomas/chroma" // nolint
+)
+
+// Python lexer.
+var Python = Register(NewLexer(
+    &Config{
+        Name:      "Python",
+        Aliases:   []string{"python", "py", "sage"},
+        Filenames: []string{"*.py", "*.pyw", "*.sc", "SConstruct", "SConscript", "*.tac", "*.sage"},
+        MimeTypes: []string{"text/x-python", "application/x-python"},
+    },
+    Rules{
+        "root": {
+            {`\n`, Text, nil},
+            {`^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")`, ByGroups(Text, LiteralStringAffix, LiteralStringDoc), nil},
+            {`^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')`, ByGroups(Text, LiteralStringAffix, LiteralStringDoc), nil},
+            {`[^\S\n]+`, Text, nil},
+            {`\A#!.+$`, CommentHashbang, nil},
+            {`#.*$`, CommentSingle, nil},
+            {`[]{}:(),;[]`, Punctuation, nil},
+            {`\\\n`, Text, nil},
+            {`\\`, Text, nil},
+            {`(in|is|and|or|not)\b`, OperatorWord, nil},
+            {`!=|==|<<|>>|[-~+/*%=<>&^|.]`, Operator, nil},
+            Include("keywords"),
+            {`(def)((?:\s|\\\s)+)`, ByGroups(Keyword, Text), Push("funcname")},
+            {`(class)((?:\s|\\\s)+)`, ByGroups(Keyword, Text), Push("classname")},
+            {`(from)((?:\s|\\\s)+)`, ByGroups(KeywordNamespace, Text), Push("fromimport")},
+            {`(import)((?:\s|\\\s)+)`, ByGroups(KeywordNamespace, Text), Push("import")},
+            Include("builtins"),
+            Include("magicfuncs"),
+            Include("magicvars"),
+            Include("backtick"),
+            {`([rR]|[uUbB][rR]|[rR][uUbB])(""")`, ByGroups(LiteralStringAffix, LiteralStringDouble), Push("tdqs")},
+            {`([rR]|[uUbB][rR]|[rR][uUbB])(''')`, ByGroups(LiteralStringAffix, LiteralStringSingle), Push("tsqs")},
+            {`([rR]|[uUbB][rR]|[rR][uUbB])(")`, ByGroups(LiteralStringAffix, LiteralStringDouble), Push("dqs")},
+            {`([rR]|[uUbB][rR]|[rR][uUbB])(')`, ByGroups(LiteralStringAffix, LiteralStringSingle), Push("sqs")},
+            {`([uUbB]?)(""")`, ByGroups(LiteralStringAffix, LiteralStringDouble), Combined("stringescape", "tdqs")},
+            {`([uUbB]?)(''')`, ByGroups(LiteralStringAffix, LiteralStringSingle), Combined("stringescape", "tsqs")},
+            {`([uUbB]?)(")`, ByGroups(LiteralStringAffix, LiteralStringDouble), Combined("stringescape", "dqs")},
+            {`([uUbB]?)(')`, ByGroups(LiteralStringAffix, LiteralStringSingle), Combined("stringescape", "sqs")},
+            Include("name"),
+            Include("numbers"),
+        },
+        "keywords": {
+            {`(?:assert|break|continue|del|elif|else|except|exec|finally|for|global|if|lambda|pass|print|raise|return|try|while|yield|yield from|as|with)\b`, Keyword, nil},
+        },
+        "builtins": {
+            {`(?:__import__|abs|all|any|apply|basestring|bin|bool|buffer|bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|complex|delattr|dict|dir|divmod|enumerate|eval|execfile|exit|file|filter|float|frozenset|getattr|globals|hasattr|hash|hex|id|input|int|intern|isinstance|issubclass|iter|len|list|locals|long|map|max|min|next|object|oct|open|ord|pow|property|range|raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|vars|xrange|zip)\b`, NameBuiltin, nil},
+            {`(self|None|Ellipsis|NotImplemented|False|True|cls)\b`, NameBuiltinPseudo, nil},
+            {`(?:ArithmeticError|AssertionError|AttributeError|BaseException|DeprecationWarning|EOFError|EnvironmentError|Exception|FloatingPointError|FutureWarning|GeneratorExit|IOError|ImportError|ImportWarning|IndentationError|IndexError|KeyError|KeyboardInterrupt|LookupError|MemoryError|NameError|NotImplemented|NotImplementedError|OSError|OverflowError|OverflowWarning|PendingDeprecationWarning|ReferenceError|RuntimeError|RuntimeWarning|StandardError|StopIteration|SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|TypeError|UnboundLocalError|UnicodeDecodeError|UnicodeEncodeError|UnicodeError|UnicodeTranslateError|UnicodeWarning|UserWarning|ValueError|VMSError|Warning|WindowsError|ZeroDivisionError)\b`, NameException, nil},
+        },
+        "magicfuncs": {
+            {`(?:__abs__|__add__|__and__|__call__|__cmp__|__coerce__|__complex__|__contains__|__del__|__delattr__|__delete__|__delitem__|__delslice__|__div__|__divmod__|__enter__|__eq__|__exit__|__float__|__floordiv__|__ge__|__get__|__getattr__|__getattribute__|__getitem__|__getslice__|__gt__|__hash__|__hex__|__iadd__|__iand__|__idiv__|__ifloordiv__|__ilshift__|__imod__|__imul__|__index__|__init__|__instancecheck__|__int__|__invert__|__iop__|__ior__|__ipow__|__irshift__|__isub__|__iter__|__itruediv__|__ixor__|__le__|__len__|__long__|__lshift__|__lt__|__missing__|__mod__|__mul__|__ne__|__neg__|__new__|__nonzero__|__oct__|__op__|__or__|__pos__|__pow__|__radd__|__rand__|__rcmp__|__rdiv__|__rdivmod__|__repr__|__reversed__|__rfloordiv__|__rlshift__|__rmod__|__rmul__|__rop__|__ror__|__rpow__|__rrshift__|__rshift__|__rsub__|__rtruediv__|__rxor__|__set__|__setattr__|__setitem__|__setslice__|__str__|__sub__|__subclasscheck__|__truediv__|__unicode__|__xor__)\b`, NameFunctionMagic, nil},
+        },
+        "magicvars": {
+            {`(?:__bases__|__class__|__closure__|__code__|__defaults__|__dict__|__doc__|__file__|__func__|__globals__|__metaclass__|__module__|__mro__|__name__|__self__|__slots__|__weakref__)\b`, NameVariableMagic, nil},
+        },
+        "numbers": {
+            {`(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?`, LiteralNumberFloat, nil},
+            {`\d+[eE][+-]?[0-9]+j?`, LiteralNumberFloat, nil},
+            {`0[0-7]+j?`, LiteralNumberOct, nil},
+            {`0[bB][01]+`, LiteralNumberBin, nil},
+            {`0[xX][a-fA-F0-9]+`, LiteralNumberHex, nil},
+            {`\d+L`, LiteralNumberIntegerLong, nil},
+            {`\d+j?`, LiteralNumberInteger, nil},
+        },
+        "backtick": {
+            {"`.*?`", LiteralStringBacktick, nil},
+        },
+        "name": {
+            {`@[\w.]+`, NameDecorator, nil},
+            {`[a-zA-Z_]\w*`, Name, nil},
+        },
+        "funcname": {
+            Include("magicfuncs"),
+            {`[a-zA-Z_]\w*`, NameFunction, Pop(1)},
+            Default(Pop(1)),
+        },
+        "classname": {
+            {`[a-zA-Z_]\w*`, NameClass, Pop(1)},
+        },
+        "import": {
+            {`(?:[ \t]|\\\n)+`, Text, nil},
+            {`as\b`, KeywordNamespace, nil},
+            {`,`, Operator, nil},
+            {`[a-zA-Z_][\w.]*`, NameNamespace, nil},
+            Default(Pop(1)),
+        },
+        "fromimport": {
+            {`(?:[ \t]|\\\n)+`, Text, nil},
+            {`import\b`, KeywordNamespace, Pop(1)},
+            {`None\b`, NameBuiltinPseudo, Pop(1)},
+            {`[a-zA-Z_.][\w.]*`, NameNamespace, nil},
+            Default(Pop(1)),
+        },
+        "stringescape": {
+            {`\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})`, LiteralStringEscape, nil},
+        },
+        "strings-single": {
+            {`%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]`, LiteralStringInterpol, nil},
+            {`[^\\\'"%\n]+`, LiteralStringSingle, nil},
+            {`[\'"\\]`, LiteralStringSingle, nil},
+            {`%`, LiteralStringSingle, nil},
+        },
+        "strings-double": {
+            {`%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]`, LiteralStringInterpol, nil},
+            {`[^\\\'"%\n]+`, LiteralStringDouble, nil},
+            {`[\'"\\]`, LiteralStringDouble, nil},
+            {`%`, LiteralStringDouble, nil},
+        },
+        "dqs": {
+            {`"`, LiteralStringDouble, Pop(1)},
+            {`\\\\|\\"|\\\n`, LiteralStringEscape, nil},
+            Include("strings-double"),
+        },
+        "sqs": {
+            {`'`, LiteralStringSingle, Pop(1)},
+            {`\\\\|\\'|\\\n`, LiteralStringEscape, nil},
+            Include("strings-single"),
+        },
+        "tdqs": {
+            {`"""`, LiteralStringDouble, Pop(1)},
+            Include("strings-double"),
+            {`\n`, LiteralStringDouble, nil},
+        },
+        "tsqs": {
+            {`'''`, LiteralStringSingle, Pop(1)},
+            Include("strings-single"),
+            {`\n`, LiteralStringSingle, nil},
+        },
+    },
+))
--- a/lexers/registry.go
+++ b/lexers/registry.go
@ -41,7 +41,7 @@ func (r *registry) Get(name string) chroma.Lexer {
 	if ok {
 		return lexer
 	}
-	return Default
+	return Fallback
 }

 // Match returns all lexers matching filename.
--- a/modifiers.go
+++ b/modifiers.go
@ -1,79 +0,0 @@
-package chroma
-
-import "fmt"
-
-// A Modifier modifies the behaviour of the lexer.
-type Modifier interface {
-	// Preprocess the lexer rules.
-	//
-	// "self" and "rule" are the rule name and index this Modifier is associated with.
-	Preprocess(rules map[string][]CompiledRule, self string, rule int) error
-	// Mutate the lexer state machine as it is processing.
-	Mutate(state *LexerState) error
-}
-
-// A MutatorFunc is a Modifier that mutates the lexer state machine as it is processing.
-type MutatorFunc func(state *LexerState) error
-
-func (m MutatorFunc) Preprocess(rules map[string][]CompiledRule, self string, rule int) error {
-	return nil
-}
-
-func (m MutatorFunc) Mutate(state *LexerState) error {
-	return m(state)
-}
-
-// A PreprocessorFunc is a Modifier that pre-processes the lexer rules.
-type PreprocessorFunc func(rules map[string][]CompiledRule, self string, rule int) error
-
-func (p PreprocessorFunc) Preprocess(rules map[string][]CompiledRule, self string, rule int) error {
-	return p(rules, self, rule)
-}
-
-func (p PreprocessorFunc) Mutate(state *LexerState) error {
-	return nil
-}
-
-// Modifiers applies a set of Modifiers in order.
-func Modifiers(modifiers ...Modifier) MutatorFunc {
-	return func(state *LexerState) error {
-		for _, modifier := range modifiers {
-			if err := modifier.Mutate(state); err != nil {
-				return err
-			}
-		}
-		return nil
-	}
-}
-
-// Include the given state.
-func Include(state string) Rule {
-	return Rule{
-		Modifier: PreprocessorFunc(func(rules map[string][]CompiledRule, self string, rule int) error {
-			includedRules, ok := rules[state]
-			if !ok {
-				return fmt.Errorf("invalid include state %q", state)
-			}
-			stateRules := rules[self]
-			stateRules = append(stateRules[:rule], append(includedRules, stateRules[rule+1:]...)...)
-			rules[self] = stateRules
-			return nil
-		}),
-	}
-}
-
-// Push states onto the stack.
-func Push(states ...string) MutatorFunc {
-	return func(s *LexerState) error {
-		s.Stack = append(s.Stack, states...)
-		return nil
-	}
-}
-
-// Pop state from the stack when rule matches.
-func Pop(n int) MutatorFunc {
-	return func(state *LexerState) error {
-		state.Stack = state.Stack[:len(state.Stack)-n]
-		return nil
-	}
-}
--- a/modifiers_test.go
+++ b/modifiers_test.go
@ -1,6 +0,0 @@
-package chroma
-
-import "testing"
-
-func TestPop(t *testing.T) {
-}
--- a/mutators.go
+++ b/mutators.go
@ -0,0 +1,90 @@
+package chroma
+
+import (
+	"fmt"
+	"strings"
+)
+
+// A Mutator modifies the behaviour of the lexer.
+type Mutator interface {
+	// Mutate the lexer state machine as it is processing.
+	Mutate(state *LexerState) error
+}
+
+// A MutatorFunc is a Mutator that mutates the lexer state machine as it is processing.
+type MutatorFunc func(state *LexerState) error
+
+func (m MutatorFunc) Mutate(state *LexerState) error { return m(state) }
+
+// Mutators applies a set of Mutators in order.
+func Mutators(modifiers ...Mutator) MutatorFunc {
+	return func(state *LexerState) error {
+		for _, modifier := range modifiers {
+			if err := modifier.Mutate(state); err != nil {
+				return err
+			}
+		}
+		return nil
+	}
+}
+
+// Include the given state.
+func Include(state string) Rule {
+	return Rule{
+		Mutator: MutatorFunc(func(ls *LexerState) error {
+			includedRules, ok := ls.Rules[state]
+			if !ok {
+				return fmt.Errorf("invalid include state %q", state)
+			}
+			stateRules := ls.Rules[ls.State]
+			stateRules = append(stateRules[:ls.Rule], append(includedRules, stateRules[ls.Rule+1:]...)...)
+			ls.Rules[ls.State] = stateRules
+			return nil
+		}),
+	}
+}
+
+// Combined creates a new anonymous state from the given states, and pushes that state.
+func Combined(states ...string) MutatorFunc {
+	return func(s *LexerState) error {
+		name := "__combined_" + strings.Join(states, "__")
+		if _, ok := s.Rules[name]; !ok {
+			combined := []CompiledRule{}
+			for _, state := range states {
+				rules, ok := s.Rules[state]
+				if !ok {
+					return fmt.Errorf("invalid combine state %q", state)
+				}
+				combined = append(combined, rules...)
+			}
+			s.Rules[name] = combined
+		}
+		s.Rules[s.State][s.Rule].Mutator = Push(name)
+		s.Stack = append(s.Stack, name)
+		return nil
+	}
+}
+
+// Push states onto the stack.
+func Push(states ...string) MutatorFunc {
+	return func(s *LexerState) error {
+		if len(states) == 0 {
+			s.Stack = append(s.Stack, s.State)
+		} else {
+			s.Stack = append(s.Stack, states...)
+		}
+		return nil
+	}
+}
+
+// Pop state from the stack when rule matches.
+func Pop(n int) MutatorFunc {
+	return func(state *LexerState) error {
+		state.Stack = state.Stack[:len(state.Stack)-n]
+		return nil
+	}
+}
+
+func Default(mutator Mutator) Rule {
+	return Rule{Mutator: mutator}
+}
--- a/mutators_test.go
+++ b/mutators_test.go
@ -0,0 +1,55 @@
+package chroma
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestInclude(t *testing.T) {
+	include := Include("other")
+	actual := CompiledRules{
+		"root": {
+			CompiledRule{Rule: include},
+		},
+		"other": {
+			CompiledRule{Rule: Rule{
+				Pattern: "//.+",
+				Type:    Comment,
+			}},
+			CompiledRule{Rule: Rule{
+				Pattern: `"[^"]*"`,
+				Type:    String,
+			}},
+		},
+	}
+	state := &LexerState{
+		State: "root",
+		Rules: actual,
+	}
+	err := include.Mutator.Mutate(state)
+	require.NoError(t, err)
+	expected := CompiledRules{
+		"root": {
+			CompiledRule{Rule: Rule{
+				Pattern: "//.+",
+				Type:    Comment,
+			}},
+			CompiledRule{Rule: Rule{
+				Pattern: `"[^"]*"`,
+				Type:    String,
+			}},
+		},
+		"other": {
+			CompiledRule{Rule: Rule{
+				Pattern: "//.+",
+				Type:    Comment,
+			}},
+			CompiledRule{Rule: Rule{
+				Pattern: `"[^"]*"`,
+				Type:    String,
+			}},
+		},
+	}
+	require.Equal(t, expected, actual)
+}
--- a/tokentype_string.go
+++ b/tokentype_string.go
@ -4,7 +4,7 @@ package chroma

 import "fmt"

-const _TokenType_name = "EscapeErrorOtherKeywordKeywordConstantKeywordDeclarationKeywordNamespaceKeywordPseudoKeywordReservedKeywordTypeNameNameAttributeNameBuiltinNameBuiltinPseudoNameClassNameConstantNameDecoratorNameEntityNameExceptionNameFunctionNameFunctionMagicNamePropertyNameLabelNameNamespaceNameOtherNameTagNameVariableNameVariableClassNameVariableGlobalNameVariableInstanceNameVariableMagicLiteralLiteralDateLiteralStringLiteralStringAffixLiteralStringBacktickLiteralStringCharLiteralStringDelimiterLiteralStringDocLiteralStringDoubleLiteralStringEscapeLiteralStringHeredocLiteralStringInterpolLiteralStringOtherLiteralStringRegexLiteralStringSingleLiteralStringSymbolLiteralNumberLiteralNumberBinLiteralNumberFloatLiteralNumberHexLiteralNumberIntegerLiteralNumberIntegerLongLiteralNumberOctOperatorOperatorWordPunctuationCommentCommentHashbangCommentMultilineCommentPreprocCommentPreprocFileCommentSingleCommentSpecialGenericGenericDeletedGenericEmphGenericErrorGenericHeadingGenericInsertedGenericOutputGenericPromptGenericStrongGenericSubheadingGenericTracebackTextTextWhitespace"
+const _TokenType_name = "EscapeErrorOtherKeywordKeywordConstantKeywordDeclarationKeywordNamespaceKeywordPseudoKeywordReservedKeywordTypeNameNameAttributeNameBuiltinNameBuiltinPseudoNameClassNameConstantNameDecoratorNameEntityNameExceptionNameFunctionNameFunctionMagicNamePropertyNameLabelNameNamespaceNameOtherNameTagNameVariableNameVariableClassNameVariableGlobalNameVariableInstanceNameVariableMagicLiteralLiteralDateLiteralStringLiteralStringAffixLiteralStringBacktickLiteralStringCharLiteralStringDelimiterLiteralStringDocLiteralStringDoubleLiteralStringEscapeLiteralStringHeredocLiteralStringInterpolLiteralStringOtherLiteralStringRegexLiteralStringSingleLiteralStringSymbolLiteralStringNameLiteralNumberLiteralNumberBinLiteralNumberFloatLiteralNumberHexLiteralNumberIntegerLiteralNumberIntegerLongLiteralNumberOctOperatorOperatorWordPunctuationCommentCommentHashbangCommentMultilineCommentSingleCommentSpecialCommentPreprocCommentPreprocFileGenericGenericDeletedGenericEmphGenericErrorGenericHeadingGenericInsertedGenericOutputGenericPromptGenericStrongGenericSubheadingGenericTracebackGenericUnderlineTextTextWhitespace"

 var _TokenType_map = map[TokenType]string{
 	0:    _TokenType_name[0:6],
@ -54,36 +54,38 @@ var _TokenType_map = map[TokenType]string{
 	3111: _TokenType_name[598:616],
 	3112: _TokenType_name[616:635],
 	3113: _TokenType_name[635:654],
-	3200: _TokenType_name[654:667],
-	3201: _TokenType_name[667:683],
-	3202: _TokenType_name[683:701],
-	3203: _TokenType_name[701:717],
-	3204: _TokenType_name[717:737],
-	3205: _TokenType_name[737:761],
-	3206: _TokenType_name[761:777],
-	4000: _TokenType_name[777:785],
-	4001: _TokenType_name[785:797],
-	5000: _TokenType_name[797:808],
-	6000: _TokenType_name[808:815],
-	6001: _TokenType_name[815:830],
-	6002: _TokenType_name[830:846],
-	6003: _TokenType_name[846:860],
-	6004: _TokenType_name[860:878],
-	6005: _TokenType_name[878:891],
-	6006: _TokenType_name[891:905],
-	7000: _TokenType_name[905:912],
-	7001: _TokenType_name[912:926],
-	7002: _TokenType_name[926:937],
-	7003: _TokenType_name[937:949],
-	7004: _TokenType_name[949:963],
-	7005: _TokenType_name[963:978],
-	7006: _TokenType_name[978:991],
-	7007: _TokenType_name[991:1004],
-	7008: _TokenType_name[1004:1017],
-	7009: _TokenType_name[1017:1034],
-	7010: _TokenType_name[1034:1050],
-	8000: _TokenType_name[1050:1054],
-	8001: _TokenType_name[1054:1068],
+	3114: _TokenType_name[654:671],
+	3200: _TokenType_name[671:684],
+	3201: _TokenType_name[684:700],
+	3202: _TokenType_name[700:718],
+	3203: _TokenType_name[718:734],
+	3204: _TokenType_name[734:754],
+	3205: _TokenType_name[754:778],
+	3206: _TokenType_name[778:794],
+	4000: _TokenType_name[794:802],
+	4001: _TokenType_name[802:814],
+	5000: _TokenType_name[814:825],
+	6000: _TokenType_name[825:832],
+	6001: _TokenType_name[832:847],
+	6002: _TokenType_name[847:863],
+	6003: _TokenType_name[863:876],
+	6004: _TokenType_name[876:890],
+	6100: _TokenType_name[890:904],
+	6101: _TokenType_name[904:922],
+	7000: _TokenType_name[922:929],
+	7001: _TokenType_name[929:943],
+	7002: _TokenType_name[943:954],
+	7003: _TokenType_name[954:966],
+	7004: _TokenType_name[966:980],
+	7005: _TokenType_name[980:995],
+	7006: _TokenType_name[995:1008],
+	7007: _TokenType_name[1008:1021],
+	7008: _TokenType_name[1021:1034],
+	7009: _TokenType_name[1034:1051],
+	7010: _TokenType_name[1051:1067],
+	7011: _TokenType_name[1067:1083],
+	8000: _TokenType_name[1083:1087],
+	8001: _TokenType_name[1087:1101],
 }

 func (i TokenType) String() string {
--- a/types.go
+++ b/types.go
@ -74,6 +74,7 @@ const (
 	LiteralStringRegex
 	LiteralStringSingle
 	LiteralStringSymbol
+	LiteralStringName
 )

 // Literals.
@ -103,12 +104,16 @@ const (
 	Comment TokenType = 6000 + iota
 	CommentHashbang
 	CommentMultiline
-	CommentPreproc
-	CommentPreprocFile
 	CommentSingle
 	CommentSpecial
 )

+// Preprocessor "comments".
+const (
+	CommentPreproc TokenType = 6100 + iota
+	CommentPreprocFile
+)
+
 // Generic tokens.
 const (
 	Generic TokenType = 7000 + iota
@ -122,6 +127,7 @@ const (
 	GenericStrong
 	GenericSubheading
 	GenericTraceback
+	GenericUnderline
 )

 // Text.
@ -176,6 +182,6 @@ func (t TokenType) InSubCategory(other TokenType) bool {
 	return t/100 == other/100
 }

-func (t TokenType) Emit(groups []string, out func(Token)) {
+func (t TokenType) Emit(groups []string, lexer Lexer, out func(Token)) {
 	out(Token{Type: t, Value: groups[0]})
 }