1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-01-26 03:20:10 +02:00

Add a bunch of automatically translated lexers.

This commit is contained in:
Alec Thomas 2017-06-04 22:18:35 +10:00
parent b30de35ff1
commit 5dedc6e45b
20 changed files with 749 additions and 214 deletions

View File

@ -16,9 +16,10 @@ import (
)
var (
profileFlag = kingpin.Flag("profile", "Enable profiling to file.").String()
profileFlag = kingpin.Flag("profile", "Enable profiling to file.").PlaceHolder("FILE").String()
tokensFlag = kingpin.Flag("tokens", "Dump raw tokens.").Bool()
filesArgs = kingpin.Arg("files", "Files to highlight.").Required().ExistingFiles()
lexerFlag = kingpin.Flag("lexer", "Lexer to use when formatting (default is to autodetect).").Short('l').String()
filesArgs = kingpin.Arg("files", "Files to highlight.").ExistingFiles()
)
func main() {
@ -32,14 +33,22 @@ func main() {
w := bufio.NewWriterSize(os.Stdout, 16384)
defer w.Flush()
writer := getWriter(w)
for _, filename := range *filesArgs {
lexers := lexers.Registry.Match(filename)
lexer := lexers[0]
lexer = chroma.Coalesce(lexer)
contents, err := ioutil.ReadFile(filename)
if len(*filesArgs) == 0 {
lexer := lexers.Registry.Get(*lexerFlag)
contents, err := ioutil.ReadAll(os.Stdin)
kingpin.FatalIfError(err, "")
err = lexer.Tokenise(string(contents), writer)
err = lexer.Tokenise(nil, string(contents), writer)
kingpin.FatalIfError(err, "")
} else {
for _, filename := range *filesArgs {
lexers := lexers.Registry.Match(filename)
lexer := lexers[0]
lexer = chroma.Coalesce(lexer)
contents, err := ioutil.ReadFile(filename)
kingpin.FatalIfError(err, "")
err = lexer.Tokenise(nil, string(contents), writer)
kingpin.FatalIfError(err, "")
}
}
}

View File

@ -9,14 +9,14 @@ type coalescer struct {
Lexer
}
func (d *coalescer) Tokenise(text string, out func(Token)) error {
func (d *coalescer) Tokenise(options *TokeniseOptions, text string, out func(Token)) error {
var last *Token
defer func() {
if last != nil {
out(*last)
}
}()
return d.Lexer.Tokenise(text, func(token Token) {
return d.Lexer.Tokenise(options, text, func(token Token) {
if last == nil {
last = &token
} else {

View File

@ -7,13 +7,12 @@ import (
)
func TestCoalesce(t *testing.T) {
lexer, err := Coalesce(MustNewLexer(nil, Rules{
lexer := Coalesce(MustNewLexer(nil, Rules{
"root": []Rule{
Rule{`[[:punct:]]`, Punctuation, nil},
},
}))
require.NoError(t, err)
actual, err := lexer.Tokenise("!@#$%")
actual, err := Tokenise(lexer, nil, "!@#$%")
require.NoError(t, err)
expected := []Token{
Token{Punctuation, "!@#$%"},

View File

@ -10,16 +10,23 @@ import (
var DefaultConsoleTheme = map[TokenType]string{
Number: "\033[1m\033[33m",
Comment: "\033[36m",
CommentPreproc: "\033[1m\033[32m",
String: "\033[1m\033[36m",
Keyword: "\033[1m\033[37m",
GenericHeading: "\033[1m",
GenericSubheading: "\033[1m",
GenericStrong: "\033[1m",
GenericUnderline: "\033[4m",
GenericDeleted: "\033[9m",
}
// Console formatter.
//
// formatter := Console(DefaultConsoleTheme)
// formatter := Console(nil)
func Console(theme map[TokenType]string) Formatter {
if theme == nil {
theme = DefaultConsoleTheme
}
return &consoleFormatter{theme}
}
@ -35,11 +42,12 @@ func (c *consoleFormatter) Format(w io.Writer) (func(Token), error) {
if !ok {
clr, ok = c.theme[token.Type.Category()]
if !ok {
clr = "\033[0m"
clr = ""
}
}
}
fmt.Fprint(w, clr)
fmt.Fprint(w, token.Value)
fmt.Fprintf(w, "\033[0m")
}, nil
}

144
lexer.go
View File

@ -6,6 +6,12 @@ import (
"strings"
)
var (
defaultOptions = &TokeniseOptions{
State: "root",
}
)
// Config for a lexer.
type Config struct {
// Name of the lexer.
@ -26,23 +32,21 @@ type Config struct {
// Priority, should multiple lexers match and no content is provided
Priority int
// Regex matching is case-insensitive.
CaseInsensitive bool
// Don't strip leading and trailing newlines from the input.
DontStripNL bool
// DontStripNL bool
// Strip all leading and trailing whitespace from the input
StripAll bool
// StripAll bool
// Make sure that the input does not end with a newline. This
// is required for some lexers that consume input linewise.
DontEnsureNL bool
// DontEnsureNL bool
// If given and greater than 0, expand tabs in the input.
TabSize int
// If given, must be an encoding name. This encoding will be used to
// convert the input string to Unicode, if it is not already a Unicode
// string.
Encoding string
// TabSize int
}
type Token struct {
@ -53,9 +57,14 @@ type Token struct {
func (t Token) String() string { return fmt.Sprintf("Token{%s, %q}", t.Type, t.Value) }
func (t Token) GoString() string { return t.String() }
type TokeniseOptions struct {
// State to start tokenisation in. Defaults to "root".
State string
}
type Lexer interface {
Config() *Config
Tokenise(text string, out func(Token)) error
Tokenise(options *TokeniseOptions, text string, out func(Token)) error
}
// Analyser determines if this lexer is appropriate for the given text.
@ -64,39 +73,46 @@ type Analyser interface {
}
type Rule struct {
Pattern string
Type Emitter
Modifier Modifier
Pattern string
Type Emitter
Mutator Mutator
}
// An Emitter takes group matches and returns tokens.
type Emitter interface {
// Emit tokens for the given regex groups.
Emit(groups []string, out func(Token))
Emit(groups []string, lexer Lexer, out func(Token))
}
// EmitterFunc is a function that is an Emitter.
type EmitterFunc func(groups []string, out func(Token))
type EmitterFunc func(groups []string, lexer Lexer, out func(Token))
// Emit tokens for groups.
func (e EmitterFunc) Emit(groups []string, out func(Token)) { e(groups, out) }
func (e EmitterFunc) Emit(groups []string, lexer Lexer, out func(Token)) { e(groups, lexer, out) }
// ByGroups emits a token for each matching group in the rule's regex.
func ByGroups(emitters ...Emitter) Emitter {
return EmitterFunc(func(groups []string, out func(Token)) {
return EmitterFunc(func(groups []string, lexer Lexer, out func(Token)) {
for i, group := range groups[1:] {
emitters[i].Emit([]string{group}, out)
emitters[i].Emit([]string{group}, lexer, out)
}
return
})
}
// Using uses a given Lexer for parsing and emitting.
func Using(lexer Lexer) Emitter {
return EmitterFunc(func(groups []string, out func(Token)) {
if err := lexer.Tokenise(groups[0], out); err != nil {
// TODO: Emitters should return an error, though it's not clear what one would do with
// it.
// Using returns an Emitter that uses a given Lexer for parsing and emitting.
func Using(lexer Lexer, options *TokeniseOptions) Emitter {
return EmitterFunc(func(groups []string, _ Lexer, out func(Token)) {
if err := lexer.Tokenise(options, groups[0], out); err != nil {
panic(err)
}
})
}
// UsingSelf is like Using, but uses the current Lexer.
func UsingSelf(state string) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer, out func(Token)) {
if err := lexer.Tokenise(&TokeniseOptions{State: state}, groups[0], out); err != nil {
panic(err)
}
})
@ -107,9 +123,10 @@ func Words(words ...string) string {
for i, word := range words {
words[i] = regexp.QuoteMeta(word)
}
return "\\b(?:" + strings.Join(words, "|") + ")\\b"
return `\b(?:` + strings.Join(words, `|`) + `)\b`
}
// Rules maps from state to a sequence of Rules.
type Rules map[string][]Rule
// MustNewLexer creates a new Lexer or panics.
@ -133,7 +150,11 @@ func NewLexer(config *Config, rules Rules) (Lexer, error) {
for state, rules := range rules {
for _, rule := range rules {
crule := CompiledRule{Rule: rule}
re, err := regexp.Compile("^(?m)" + rule.Pattern)
flags := "m"
if config.CaseInsensitive {
flags += "i"
}
re, err := regexp.Compile("^(?" + flags + ")(?:" + rule.Pattern + ")")
if err != nil {
return nil, fmt.Errorf("invalid regex %q for state %q: %s", rule.Pattern, state, err)
}
@ -141,17 +162,6 @@ func NewLexer(config *Config, rules Rules) (Lexer, error) {
compiledRules[state] = append(compiledRules[state], crule)
}
}
// Apply any pre-processor modifiers.
for state, rules := range compiledRules {
for index, rule := range rules {
if rule.Modifier != nil {
err := rule.Modifier.Preprocess(compiledRules, state, index)
if err != nil {
return nil, err
}
}
}
}
return &regexLexer{
config: config,
rules: compiledRules,
@ -164,6 +174,17 @@ type CompiledRule struct {
Regexp *regexp.Regexp
}
type CompiledRules map[string][]CompiledRule
type LexerState struct {
Text string
Pos int
Rules map[string][]CompiledRule
Stack []string
State string
Rule int
}
type regexLexer struct {
config *Config
rules map[string][]CompiledRule
@ -173,51 +194,60 @@ func (r *regexLexer) Config() *Config {
return r.config
}
type LexerState struct {
Text string
Pos int
Stack []string
Rules map[string][]CompiledRule
State string
}
func (r *regexLexer) Tokenise(text string, out func(Token)) error {
func (r *regexLexer) Tokenise(options *TokeniseOptions, text string, out func(Token)) error {
if options == nil {
options = defaultOptions
}
state := &LexerState{
Text: text,
Stack: []string{"root"},
Stack: []string{options.State},
Rules: r.rules,
}
for state.Pos < len(text) && len(state.Stack) > 0 {
state.State = state.Stack[len(state.Stack)-1]
rule, index := matchRules(state.Text[state.Pos:], state.Rules[state.State])
ruleIndex, rule, index := matchRules(state.Text[state.Pos:], state.Rules[state.State])
// fmt.Println(text[state.Pos:state.Pos+1], rule, state.Text[state.Pos:state.Pos+1])
// No match.
if index == nil {
out(Token{Error, state.Text[state.Pos : state.Pos+1]})
state.Pos++
continue
}
state.Rule = ruleIndex
groups := make([]string, len(index)/2)
for i := 0; i < len(index); i += 2 {
groups[i/2] = text[state.Pos+index[i] : state.Pos+index[i+1]]
start := state.Pos + index[i]
end := state.Pos + index[i+1]
if start == -1 || end == -1 {
continue
}
groups[i/2] = text[start:end]
}
state.Pos += index[1]
if rule.Modifier != nil {
if err := rule.Modifier.Mutate(state); err != nil {
if rule.Type != nil {
rule.Type.Emit(groups, r, out)
}
if rule.Mutator != nil {
if err := rule.Mutator.Mutate(state); err != nil {
return err
}
} else {
rule.Type.Emit(groups, out)
}
}
return nil
}
func matchRules(text string, rules []CompiledRule) (CompiledRule, []int) {
for _, rule := range rules {
// Tokenise text using lexer, returning tokens as a slice.
func Tokenise(lexer Lexer, options *TokeniseOptions, text string) ([]Token, error) {
out := []Token{}
return out, lexer.Tokenise(options, text, func(token Token) { out = append(out, token) })
}
func matchRules(text string, rules []CompiledRule) (int, CompiledRule, []int) {
for i, rule := range rules {
if index := rule.Regexp.FindStringSubmatchIndex(text); index != nil {
return rule, index
return i, rule, index
}
}
return CompiledRule{}, nil
return 0, CompiledRule{}, nil
}

View File

@ -20,7 +20,7 @@ func TestSimpleLexer(t *testing.T) {
Filenames: []string{"*.ini", "*.cfg"},
},
map[string][]Rule{
"root": []Rule{
"root": {
{`\s+`, Whitespace, nil},
{`;.*?$`, Comment, nil},
{`\[.*?\]$`, Keyword, nil},
@ -29,24 +29,24 @@ func TestSimpleLexer(t *testing.T) {
},
)
require.NoError(t, err)
actual, err := lexer.Tokenise(`
actual, err := Tokenise(lexer, nil, `
; this is a comment
[section]
a = 10
`)
require.NoError(t, err)
expected := []Token{
Token{Whitespace, "\n\t"},
Token{Comment, "; this is a comment"},
Token{Whitespace, "\n\t"},
Token{Keyword, "[section]"},
Token{Whitespace, "\n\t"},
Token{Name, "a"},
Token{Whitespace, " "},
Token{Operator, "="},
Token{Whitespace, " "},
Token{LiteralString, "10"},
Token{Whitespace, "\n"},
{Whitespace, "\n\t"},
{Comment, "; this is a comment"},
{Whitespace, "\n\t"},
{Keyword, "[section]"},
{Whitespace, "\n\t"},
{Name, "a"},
{Whitespace, " "},
{Operator, "="},
{Whitespace, " "},
{LiteralString, "10"},
{Whitespace, "\n"},
}
require.Equal(t, expected, actual)
}

85
lexers/bash.go Normal file
View File

@ -0,0 +1,85 @@
package lexers
import (
. "github.com/alecthomas/chroma" // nolint
)
// Bash lexer.
var Bash = Register(NewLexer(
&Config{
Name: "Bash",
Aliases: []string{"bash", "sh", "ksh", "zsh", "shell"},
Filenames: []string{"*.sh", "*.ksh", "*.bash", "*.ebuild", "*.eclass", "*.exheres-0", "*.exlib", "*.zsh", ".bashrc", "bashrc", ".bash_*", "bash_*", "zshrc", ".zshrc", "PKGBUILD"},
MimeTypes: []string{"application/x-sh", "application/x-shellscript"},
},
Rules{
"root": {
Include("basic"),
{"`", LiteralStringBacktick, Push("backticks")},
Include("data"),
Include("interp"),
},
"interp": {
{`\$\(\(`, Keyword, Push("math")},
{`\$\(`, Keyword, Push("paren")},
{`\$\{#?`, LiteralStringInterpol, Push("curly")},
{`\$[a-zA-Z_]\w*`, NameVariable, nil},
{`\$(?:\d+|[#$?!_*@-])`, NameVariable, nil},
{`\$`, Text, nil},
},
"basic": {
{`\b(if|fi|else|while|do|done|for|then|return|function|case|select|continue|until|esac|elif)(\s*)\b`, ByGroups(Keyword, Text), nil},
{`\b(alias|bg|bind|break|builtin|caller|cd|command|compgen|complete|declare|dirs|disown|echo|enable|eval|exec|exit|export|false|fc|fg|getopts|hash|help|history|jobs|kill|let|local|logout|popd|printf|pushd|pwd|read|readonly|set|shift|shopt|source|suspend|test|time|times|trap|true|type|typeset|ulimit|umask|unalias|unset|wait)(\s*)\b`, NameBuiltin, nil},
{`\A#!.+\n`, CommentHashbang, nil},
{`#.*\n`, CommentSingle, nil},
{`\\[\w\W]`, LiteralStringEscape, nil},
{`(\b\w+)(\s*)(\+?=)`, ByGroups(NameVariable, Text, Operator), nil},
{`[\[\]{}()=]`, Operator, nil},
{`<<<`, Operator, nil},
// {`<<-?\s*(\'?)\\?(\w+)[\w\W]+?\2`, LiteralString, nil},
{`&&|\|\|`, Operator, nil},
},
"data": {
{`(?s)\$?"(\\\\|\\[0-7]+|\\.|[^"\\$])*"`, LiteralStringDouble, nil},
{`"`, LiteralStringDouble, Push("string")},
{`(?s)\$'(\\\\|\\[0-7]+|\\.|[^'\\])*'`, LiteralStringSingle, nil},
{`(?s)'.*?'`, LiteralStringSingle, nil},
{`;`, Punctuation, nil},
{`&`, Punctuation, nil},
{`\|`, Punctuation, nil},
{`\s+`, Text, nil},
{`\d+\b`, LiteralNumber, nil},
{"[^=\\s\\[\\]{}()$\"\\'`\\\\<&|;]+", Text, nil},
{`<`, Text, nil},
},
"string": {
{`"`, LiteralStringDouble, Pop(1)},
{`(?s)(\\\\|\\[0-7]+|\\.|[^"\\$])+`, LiteralStringDouble, nil},
Include("interp"),
},
"curly": {
{`\}`, LiteralStringInterpol, Pop(1)},
{`:-`, Keyword, nil},
{`\w+`, NameVariable, nil},
{"[^}:\"\\'`$\\\\]+", Punctuation, nil},
{`:`, Punctuation, nil},
Include("root"),
},
"paren": {
{`\)`, Keyword, Pop(1)},
Include("root"),
},
"math": {
{`\)\)`, Keyword, Pop(1)},
{`[-+*/%^|&]|\*\*|\|\|`, Operator, nil},
{`\d+#\d+`, LiteralNumber, nil},
{`\d+#`, LiteralNumber, nil},
{`\d+`, LiteralNumber, nil},
Include("root"),
},
"backticks": {
{"`", LiteralStringBacktick, Pop(1)},
Include("root"),
},
},
))

90
lexers/c.go Normal file
View File

@ -0,0 +1,90 @@
package lexers
import (
. "github.com/alecthomas/chroma" // nolint
)
// C lexer.
var C = Register(NewLexer(
&Config{
Name: "C",
Aliases: []string{"c"},
Filenames: []string{"*.c", "*.h", "*.idc"},
MimeTypes: []string{"text/x-chdr", "text/x-csrc"},
},
Rules{
"whitespace": {
{`^#if\s+0`, CommentPreproc, Push("if0")},
{`^#`, CommentPreproc, Push("macro")},
{`^(\s*(?:/[*].*?[*]/\s*)?)(#if\s+0)`, ByGroups(UsingSelf("root"), CommentPreproc), Push("if0")},
{`^(\s*(?:/[*].*?[*]/\s*)?)(#)`, ByGroups(UsingSelf("root"), CommentPreproc), Push("macro")},
{`\n`, Text, nil},
{`\s+`, Text, nil},
{`\\\n`, Text, nil},
{`//(\n|[\w\W]*?[^\\]\n)`, CommentSingle, nil},
{`/(\\\n)?[*][\w\W]*?[*](\\\n)?/`, CommentMultiline, nil},
{`/(\\\n)?[*][\w\W]*`, CommentMultiline, nil},
},
"statements": {
{`(L?)(")`, ByGroups(LiteralStringAffix, LiteralString), Push("string")},
{`(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')`, ByGroups(LiteralStringAffix, LiteralStringChar, LiteralStringChar, LiteralStringChar), nil},
{`(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*`, LiteralNumberFloat, nil},
{`(\d+\.\d*|\.\d+|\d+[fF])[fF]?`, LiteralNumberFloat, nil},
{`0x[0-9a-fA-F]+[LlUu]*`, LiteralNumberHex, nil},
{`0[0-7]+[LlUu]*`, LiteralNumberOct, nil},
{`\d+[LlUu]*`, LiteralNumberInteger, nil},
{`\*/`, Error, nil},
{`[~!%^&*+=|?:<>/-]`, Operator, nil},
{`[()\[\],.]`, Punctuation, nil},
{`(?:asm|auto|break|case|const|continue|default|do|else|enum|extern|for|goto|if|register|restricted|return|sizeof|static|struct|switch|typedef|union|volatile|while)\b`, Keyword, nil},
{`(bool|int|long|float|short|double|char|unsigned|signed|void)\b`, KeywordType, nil},
{`(?:inline|_inline|__inline|naked|restrict|thread|typename)\b`, KeywordReserved, nil},
{`(__m(128i|128d|128|64))\b`, KeywordReserved, nil},
{`__(?:asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|declspec|finally|int64|try|leave|wchar_t|w64|unaligned|raise|noop|identifier|forceinline|assume)\b`, KeywordReserved, nil},
{`(true|false|NULL)\b`, NameBuiltin, nil},
{`([a-zA-Z_]\w*)(\s*)(:)`, ByGroups(NameLabel, Text, Punctuation), nil},
{`[a-zA-Z_]\w*`, Name, nil},
},
"root": {
Include("whitespace"),
{`((?:[\w*\s])+?(?:\s|[*]))([a-zA-Z_]\w*)(\s*\([^;]*?\))([^;{]*)(\{)`, ByGroups(UsingSelf("root"), NameFunction, UsingSelf("root"), UsingSelf("root"), Punctuation), Push("function")},
{`((?:[\w*\s])+?(?:\s|[*]))([a-zA-Z_]\w*)(\s*\([^;]*?\))([^;]*)(;)`, ByGroups(UsingSelf("root"), NameFunction, UsingSelf("root"), UsingSelf("root"), Punctuation), nil},
Default(Push("statement")),
},
"statement": {
Include("whitespace"),
Include("statements"),
{`[{}]`, Punctuation, nil},
{`;`, Punctuation, Pop(1)},
},
"function": {
Include("whitespace"),
Include("statements"),
{`;`, Punctuation, nil},
{`\{`, Punctuation, Push()},
{`\}`, Punctuation, Pop(1)},
},
"string": {
{`"`, LiteralString, Pop(1)},
{`\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})`, LiteralStringEscape, nil},
{`[^\\"\n]+`, LiteralString, nil},
{`\\\n`, LiteralString, nil},
{`\\`, LiteralString, nil},
},
"macro": {
{`(include)(\s*(?:/[*].*?[*]/\s*)?)([^\n]+)`, ByGroups(CommentPreproc, Text, CommentPreprocFile), nil},
{`[^/\n]+`, CommentPreproc, nil},
{`/[*](.|\n)*?[*]/`, CommentMultiline, nil},
{`//.*?\n`, CommentSingle, Pop(1)},
{`/`, CommentPreproc, nil},
// {`(?<=\\)\n`, CommentPreproc, nil},
{`\n`, CommentPreproc, Pop(1)},
},
"if0": {
{`^\s*#if.*?\n`, CommentPreproc, Push()},
{`^\s*#el(?:se|if).*\n`, CommentPreproc, Pop(1)},
{`^\s*#endif.*?\n`, CommentPreproc, Pop(1)},
{`.*?\n`, Comment, nil},
},
},
))

View File

@ -4,9 +4,9 @@ import (
. "github.com/alecthomas/chroma" // nolint
)
// Default lexer if no other is found.
var Default = Register(NewLexer(&Config{
Name: "default",
// Fallback lexer if no other is found.
var Fallback = Register(NewLexer(&Config{
Name: "fallback",
Filenames: []string{"*"},
Priority: 99,
}, Rules{

51
lexers/makefile.go Normal file
View File

@ -0,0 +1,51 @@
package lexers
import (
. "github.com/alecthomas/chroma" // nolint
)
// Makefile lexer.
var Makefile = Register(NewLexer(
&Config{
Name: "Makefile",
Aliases: []string{"make", "makefile", "mf", "bsdmake"},
Filenames: []string{"*.mak", "*.mk", "Makefile", "makefile", "Makefile.*", "GNUmakefile"},
MimeTypes: []string{"text/x-makefile"},
},
Rules{
"root": {
{`^(?:[\t ]+.*\n|\n)+`, Using(Bash, nil), nil},
{`\$[<@$+%?|*]`, Keyword, nil},
{`\s+`, Text, nil},
{`#.*?\n`, Comment, nil},
{`(export)(\s+)`, ByGroups(Keyword, Text), Push("export")},
{`export\s+`, Keyword, nil},
{`([\w${}().-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)`, ByGroups(NameVariable, Text, Operator, Text, Using(Bash, nil)), nil},
{`(?s)"(\\\\|\\.|[^"\\])*"`, LiteralStringDouble, nil},
{`(?s)'(\\\\|\\.|[^'\\])*'`, LiteralStringSingle, nil},
{`([^\n:]+)(:+)([ \t]*)`, ByGroups(NameFunction, Operator, Text), Push("block-header")},
{`\$\(`, Keyword, Push("expansion")},
},
"expansion": {
{`[^$a-zA-Z_()]+`, Text, nil},
{`[a-zA-Z_]+`, NameVariable, nil},
{`\$`, Keyword, nil},
{`\(`, Keyword, Push()},
{`\)`, Keyword, Pop(1)},
},
"export": {
{`[\w${}-]+`, NameVariable, nil},
{`\n`, Text, Pop(1)},
{`\s+`, Text, nil},
},
"block-header": {
{`[,|]`, Punctuation, nil},
{`#.*?\n`, Comment, Pop(1)},
{`\\\n`, Text, nil},
{`\$\(`, Keyword, Push("expansion")},
{`[a-zA-Z_]+`, Name, nil},
{`\n`, Text, Pop(1)},
{`.`, Text, nil},
},
},
))

View File

@ -19,13 +19,13 @@ var Markdown = Register(NewLexer(
{`^(#{2,6})(.+\n)`, ByGroups(GenericSubheading, Text), nil},
// task list
{`^(\s*)([*-] )(\[[ xX]\])( .+\n)`,
ByGroups(Text, Keyword, Keyword, Text), nil},
ByGroups(Text, Keyword, Keyword, UsingSelf("inline")), nil},
// bulleted lists
{`^(\s*)([*-])(\s)(.+\n)`,
ByGroups(Text, Keyword, Text, Text), nil},
ByGroups(Text, Keyword, Text, UsingSelf("inline")), nil},
// numbered lists
{`^(\s*)([0-9]+\.)( .+\n)`,
ByGroups(Text, Keyword, Text), nil},
ByGroups(Text, Keyword, UsingSelf("inline")), nil},
// quote
{`^(\s*>\s)(.+\n)`, ByGroups(Keyword, GenericEmph), nil},
// text block
@ -39,6 +39,8 @@ var Markdown = Register(NewLexer(
{`\\.`, Text, nil},
// italics
{`(\s)([*_][^*_]+[*_])(\W|\n)`, ByGroups(Text, GenericEmph, Text), nil},
// underline
{`(\s)(__.*?__)`, ByGroups(Whitespace, GenericUnderline), nil},
// bold
// warning: the following rule eats internal tags. eg. **foo _bar_ baz** bar is not italics
{`(\s)(\*\*.*\*\*)`, ByGroups(Text, GenericStrong), nil},
@ -58,12 +60,12 @@ var Markdown = Register(NewLexer(
},
))
func handleCodeblock(groups []string, out func(Token)) {
func handleCodeblock(groups []string, lexer Lexer, out func(Token)) {
out(Token{String, groups[1]})
out(Token{String, groups[2]})
out(Token{Text, groups[3]})
code := groups[4]
lexer := Registry.Get(groups[2])
lexer.Tokenise(code, out)
lexer = Registry.Get(groups[2])
lexer.Tokenise(nil, code, out)
out(Token{String, groups[5]})
}

57
lexers/postgres.go Normal file
View File

@ -0,0 +1,57 @@
package lexers
import (
. "github.com/alecthomas/chroma" // nolint
)
// Postgresql Sql Dialect lexer.
var PostgresqlSqlDialect = Register(NewLexer(
&Config{
Name: "PostgreSQL SQL dialect",
Aliases: []string{"postgresql", "postgres"},
Filenames: []string{"*.sql"},
MimeTypes: []string{"text/x-postgresql"},
CaseInsensitive: true,
},
Rules{
"root": {
{`\s+`, Text, nil},
{`--.*\n?`, CommentSingle, nil},
{`/\*`, CommentMultiline, Push("multiline-comments")},
{`(bigint|bigserial|bit|bit\s+varying|bool|boolean|box|bytea|char|character|character\s+varying|cidr|circle|date|decimal|double\s+precision|float4|float8|inet|int|int2|int4|int8|integer|interval|json|jsonb|line|lseg|macaddr|money|numeric|path|pg_lsn|point|polygon|real|serial|serial2|serial4|serial8|smallint|smallserial|text|time|timestamp|timestamptz|timetz|tsquery|tsvector|txid_snapshot|uuid|varbit|varchar|with\s+time\s+zone|without\s+time\s+zone|xml|anyarray|anyelement|anyenum|anynonarray|anyrange|cstring|fdw_handler|internal|language_handler|opaque|record|void)\b`, NameBuiltin, nil},
{`(?:ABORT|ABSOLUTE|ACCESS|ACTION|ADD|ADMIN|AFTER|AGGREGATE|ALL|ALSO|ALTER|ALWAYS|ANALYSE|ANALYZE|AND|ANY|ARRAY|AS|ASC|ASSERTION|ASSIGNMENT|ASYMMETRIC|AT|ATTRIBUTE|AUTHORIZATION|BACKWARD|BEFORE|BEGIN|BETWEEN|BIGINT|BINARY|BIT|BOOLEAN|BOTH|BY|CACHE|CALLED|CASCADE|CASCADED|CASE|CAST|CATALOG|CHAIN|CHAR|CHARACTER|CHARACTERISTICS|CHECK|CHECKPOINT|CLASS|CLOSE|CLUSTER|COALESCE|COLLATE|COLLATION|COLUMN|COMMENT|COMMENTS|COMMIT|COMMITTED|CONCURRENTLY|CONFIGURATION|CONNECTION|CONSTRAINT|CONSTRAINTS|CONTENT|CONTINUE|CONVERSION|COPY|COST|CREATE|CROSS|CSV|CURRENT|CURRENT_CATALOG|CURRENT_DATE|CURRENT_ROLE|CURRENT_SCHEMA|CURRENT_TIME|CURRENT_TIMESTAMP|CURRENT_USER|CURSOR|CYCLE|DATA|DATABASE|DAY|DEALLOCATE|DEC|DECIMAL|DECLARE|DEFAULT|DEFAULTS|DEFERRABLE|DEFERRED|DEFINER|DELETE|DELIMITER|DELIMITERS|DESC|DICTIONARY|DISABLE|DISCARD|DISTINCT|DO|DOCUMENT|DOMAIN|DOUBLE|DROP|EACH|ELSE|ENABLE|ENCODING|ENCRYPTED|END|ENUM|ESCAPE|EVENT|EXCEPT|EXCLUDE|EXCLUDING|EXCLUSIVE|EXECUTE|EXISTS|EXPLAIN|EXTENSION|EXTERNAL|EXTRACT|FALSE|FAMILY|FETCH|FILTER|FIRST|FLOAT|FOLLOWING|FOR|FORCE|FOREIGN|FORWARD|FREEZE|FROM|FULL|FUNCTION|FUNCTIONS|GLOBAL|GRANT|GRANTED|GREATEST|GROUP|HANDLER|HAVING|HEADER|HOLD|HOUR|IDENTITY|IF|ILIKE|IMMEDIATE|IMMUTABLE|IMPLICIT|IN|INCLUDING|INCREMENT|INDEX|INDEXES|INHERIT|INHERITS|INITIALLY|INLINE|INNER|INOUT|INPUT|INSENSITIVE|INSERT|INSTEAD|INT|INTEGER|INTERSECT|INTERVAL|INTO|INVOKER|IS|ISNULL|ISOLATION|JOIN|KEY|LABEL|LANGUAGE|LARGE|LAST|LATERAL|LC_COLLATE|LC_CTYPE|LEADING|LEAKPROOF|LEAST|LEFT|LEVEL|LIKE|LIMIT|LISTEN|LOAD|LOCAL|LOCALTIME|LOCALTIMESTAMP|LOCATION|LOCK|MAPPING|MATCH|MATERIALIZED|MAXVALUE|MINUTE|MINVALUE|MODE|MONTH|MOVE|NAME|NAMES|NATIONAL|NATURAL|NCHAR|NEXT|NO|NONE|NOT|NOTHING|NOTIFY|NOTNULL|NOWAIT|NULL|NULLIF|NULLS|NUMERIC|OBJECT|OF|OFF|OFFSET|OIDS|ON|ONLY|OPERATOR|OPTION|OPTIONS|OR|ORDER|ORDINALITY|OUT|OUTER|OVER|OVERLAPS|OVERLAY|OWNED|OWNER|PARSER|PARTIAL|PARTITION|PASSING|PASSWORD|PLACING|PLANS|POLICY|POSITION|PRECEDING|PRECISION|PREPARE|PREPARED|PRESERVE|PRIMARY|PRIOR|PRIVILEGES|PROCEDURAL|PROCEDURE|PROGRAM|QUOTE|RANGE|READ|REAL|REASSIGN|RECHECK|RECURSIVE|REF|REFERENCES|REFRESH|REINDEX|RELATIVE|RELEASE|RENAME|REPEATABLE|REPLACE|REPLICA|RESET|RESTART|RESTRICT|RETURNING|RETURNS|REVOKE|RIGHT|ROLE|ROLLBACK|ROW|ROWS|RULE|SAVEPOINT|SCHEMA|SCROLL|SEARCH|SECOND|SECURITY|SELECT|SEQUENCE|SEQUENCES|SERIALIZABLE|SERVER|SESSION|SESSION_USER|SET|SETOF|SHARE|SHOW|SIMILAR|SIMPLE|SMALLINT|SNAPSHOT|SOME|STABLE|STANDALONE|START|STATEMENT|STATISTICS|STDIN|STDOUT|STORAGE|STRICT|STRIP|SUBSTRING|SYMMETRIC|SYSID|SYSTEM|TABLE|TABLES|TABLESPACE|TEMP|TEMPLATE|TEMPORARY|TEXT|THEN|TIME|TIMESTAMP|TO|TRAILING|TRANSACTION|TREAT|TRIGGER|TRIM|TRUE|TRUNCATE|TRUSTED|TYPE|TYPES|UNBOUNDED|UNCOMMITTED|UNENCRYPTED|UNION|UNIQUE|UNKNOWN|UNLISTEN|UNLOGGED|UNTIL|UPDATE|USER|USING|VACUUM|VALID|VALIDATE|VALIDATOR|VALUE|VALUES|VARCHAR|VARIADIC|VARYING|VERBOSE|VERSION|VIEW|VIEWS|VOLATILE|WHEN|WHERE|WHITESPACE|WINDOW|WITH|WITHIN|WITHOUT|WORK|WRAPPER|WRITE|XML|XMLATTRIBUTES|XMLCONCAT|XMLELEMENT|XMLEXISTS|XMLFOREST|XMLPARSE|XMLPI|XMLROOT|XMLSERIALIZE|YEAR|YES|ZONE)\b`, Keyword, nil},
{"[+*/<>=~!@#%^&|?-]+", Operator, nil},
{`::`, Operator, nil},
{`\$\d+`, NameVariable, nil},
{`([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?`, LiteralNumberFloat, nil},
{`[0-9]+`, LiteralNumberInteger, nil},
{`((?:E|U&)?)(')`, ByGroups(LiteralStringAffix, LiteralStringSingle), Push("string")},
{`((?:U&)?)(")`, ByGroups(LiteralStringAffix, LiteralStringName), Push("quoted-ident")},
// PL/SQL, etc.
// TODO: Make this work.
// {`(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)`, EmitterFunc(detectLanguage), nil},
{`[a-z_]\w*`, Name, nil},
{"`[^`]*`", LiteralStringName, nil},
{`:'[a-z]\w*\b'`, NameVariable, nil},
{`:"[a-z]\w*\b"`, NameVariable, nil},
{`:[a-z]\w*\b`, NameVariable, nil},
{`[;:()\[\]{},.]`, Punctuation, nil},
},
"multiline-comments": {
{`/\*`, CommentMultiline, Push("multiline-comments")},
{`\*/`, CommentMultiline, Pop(1)},
{`[^/*]+`, CommentMultiline, nil},
{`[/*]`, CommentMultiline, nil},
},
"string": {
{`[^']+`, LiteralStringSingle, nil},
{`''`, LiteralStringSingle, nil},
{`'`, LiteralStringSingle, Pop(1)},
},
"quoted-ident": {
{`[^"]+`, LiteralStringName, nil},
{`""`, LiteralStringName, nil},
{`"`, LiteralStringName, Pop(1)},
},
},
))

136
lexers/python.go Normal file
View File

@ -0,0 +1,136 @@
package lexers
import (
. "github.com/alecthomas/chroma" // nolint
)
// Python lexer.
var Python = Register(NewLexer(
&Config{
Name: "Python",
Aliases: []string{"python", "py", "sage"},
Filenames: []string{"*.py", "*.pyw", "*.sc", "SConstruct", "SConscript", "*.tac", "*.sage"},
MimeTypes: []string{"text/x-python", "application/x-python"},
},
Rules{
"root": {
{`\n`, Text, nil},
{`^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")`, ByGroups(Text, LiteralStringAffix, LiteralStringDoc), nil},
{`^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')`, ByGroups(Text, LiteralStringAffix, LiteralStringDoc), nil},
{`[^\S\n]+`, Text, nil},
{`\A#!.+$`, CommentHashbang, nil},
{`#.*$`, CommentSingle, nil},
{`[]{}:(),;[]`, Punctuation, nil},
{`\\\n`, Text, nil},
{`\\`, Text, nil},
{`(in|is|and|or|not)\b`, OperatorWord, nil},
{`!=|==|<<|>>|[-~+/*%=<>&^|.]`, Operator, nil},
Include("keywords"),
{`(def)((?:\s|\\\s)+)`, ByGroups(Keyword, Text), Push("funcname")},
{`(class)((?:\s|\\\s)+)`, ByGroups(Keyword, Text), Push("classname")},
{`(from)((?:\s|\\\s)+)`, ByGroups(KeywordNamespace, Text), Push("fromimport")},
{`(import)((?:\s|\\\s)+)`, ByGroups(KeywordNamespace, Text), Push("import")},
Include("builtins"),
Include("magicfuncs"),
Include("magicvars"),
Include("backtick"),
{`([rR]|[uUbB][rR]|[rR][uUbB])(""")`, ByGroups(LiteralStringAffix, LiteralStringDouble), Push("tdqs")},
{`([rR]|[uUbB][rR]|[rR][uUbB])(''')`, ByGroups(LiteralStringAffix, LiteralStringSingle), Push("tsqs")},
{`([rR]|[uUbB][rR]|[rR][uUbB])(")`, ByGroups(LiteralStringAffix, LiteralStringDouble), Push("dqs")},
{`([rR]|[uUbB][rR]|[rR][uUbB])(')`, ByGroups(LiteralStringAffix, LiteralStringSingle), Push("sqs")},
{`([uUbB]?)(""")`, ByGroups(LiteralStringAffix, LiteralStringDouble), Combined("stringescape", "tdqs")},
{`([uUbB]?)(''')`, ByGroups(LiteralStringAffix, LiteralStringSingle), Combined("stringescape", "tsqs")},
{`([uUbB]?)(")`, ByGroups(LiteralStringAffix, LiteralStringDouble), Combined("stringescape", "dqs")},
{`([uUbB]?)(')`, ByGroups(LiteralStringAffix, LiteralStringSingle), Combined("stringescape", "sqs")},
Include("name"),
Include("numbers"),
},
"keywords": {
{`(?:assert|break|continue|del|elif|else|except|exec|finally|for|global|if|lambda|pass|print|raise|return|try|while|yield|yield from|as|with)\b`, Keyword, nil},
},
"builtins": {
{`(?:__import__|abs|all|any|apply|basestring|bin|bool|buffer|bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|complex|delattr|dict|dir|divmod|enumerate|eval|execfile|exit|file|filter|float|frozenset|getattr|globals|hasattr|hash|hex|id|input|int|intern|isinstance|issubclass|iter|len|list|locals|long|map|max|min|next|object|oct|open|ord|pow|property|range|raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|vars|xrange|zip)\b`, NameBuiltin, nil},
{`(self|None|Ellipsis|NotImplemented|False|True|cls)\b`, NameBuiltinPseudo, nil},
{`(?:ArithmeticError|AssertionError|AttributeError|BaseException|DeprecationWarning|EOFError|EnvironmentError|Exception|FloatingPointError|FutureWarning|GeneratorExit|IOError|ImportError|ImportWarning|IndentationError|IndexError|KeyError|KeyboardInterrupt|LookupError|MemoryError|NameError|NotImplemented|NotImplementedError|OSError|OverflowError|OverflowWarning|PendingDeprecationWarning|ReferenceError|RuntimeError|RuntimeWarning|StandardError|StopIteration|SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|TypeError|UnboundLocalError|UnicodeDecodeError|UnicodeEncodeError|UnicodeError|UnicodeTranslateError|UnicodeWarning|UserWarning|ValueError|VMSError|Warning|WindowsError|ZeroDivisionError)\b`, NameException, nil},
},
"magicfuncs": {
{`(?:__abs__|__add__|__and__|__call__|__cmp__|__coerce__|__complex__|__contains__|__del__|__delattr__|__delete__|__delitem__|__delslice__|__div__|__divmod__|__enter__|__eq__|__exit__|__float__|__floordiv__|__ge__|__get__|__getattr__|__getattribute__|__getitem__|__getslice__|__gt__|__hash__|__hex__|__iadd__|__iand__|__idiv__|__ifloordiv__|__ilshift__|__imod__|__imul__|__index__|__init__|__instancecheck__|__int__|__invert__|__iop__|__ior__|__ipow__|__irshift__|__isub__|__iter__|__itruediv__|__ixor__|__le__|__len__|__long__|__lshift__|__lt__|__missing__|__mod__|__mul__|__ne__|__neg__|__new__|__nonzero__|__oct__|__op__|__or__|__pos__|__pow__|__radd__|__rand__|__rcmp__|__rdiv__|__rdivmod__|__repr__|__reversed__|__rfloordiv__|__rlshift__|__rmod__|__rmul__|__rop__|__ror__|__rpow__|__rrshift__|__rshift__|__rsub__|__rtruediv__|__rxor__|__set__|__setattr__|__setitem__|__setslice__|__str__|__sub__|__subclasscheck__|__truediv__|__unicode__|__xor__)\b`, NameFunctionMagic, nil},
},
"magicvars": {
{`(?:__bases__|__class__|__closure__|__code__|__defaults__|__dict__|__doc__|__file__|__func__|__globals__|__metaclass__|__module__|__mro__|__name__|__self__|__slots__|__weakref__)\b`, NameVariableMagic, nil},
},
"numbers": {
{`(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?`, LiteralNumberFloat, nil},
{`\d+[eE][+-]?[0-9]+j?`, LiteralNumberFloat, nil},
{`0[0-7]+j?`, LiteralNumberOct, nil},
{`0[bB][01]+`, LiteralNumberBin, nil},
{`0[xX][a-fA-F0-9]+`, LiteralNumberHex, nil},
{`\d+L`, LiteralNumberIntegerLong, nil},
{`\d+j?`, LiteralNumberInteger, nil},
},
"backtick": {
{"`.*?`", LiteralStringBacktick, nil},
},
"name": {
{`@[\w.]+`, NameDecorator, nil},
{`[a-zA-Z_]\w*`, Name, nil},
},
"funcname": {
Include("magicfuncs"),
{`[a-zA-Z_]\w*`, NameFunction, Pop(1)},
Default(Pop(1)),
},
"classname": {
{`[a-zA-Z_]\w*`, NameClass, Pop(1)},
},
"import": {
{`(?:[ \t]|\\\n)+`, Text, nil},
{`as\b`, KeywordNamespace, nil},
{`,`, Operator, nil},
{`[a-zA-Z_][\w.]*`, NameNamespace, nil},
Default(Pop(1)),
},
"fromimport": {
{`(?:[ \t]|\\\n)+`, Text, nil},
{`import\b`, KeywordNamespace, Pop(1)},
{`None\b`, NameBuiltinPseudo, Pop(1)},
{`[a-zA-Z_.][\w.]*`, NameNamespace, nil},
Default(Pop(1)),
},
"stringescape": {
{`\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})`, LiteralStringEscape, nil},
},
"strings-single": {
{`%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]`, LiteralStringInterpol, nil},
{`[^\\\'"%\n]+`, LiteralStringSingle, nil},
{`[\'"\\]`, LiteralStringSingle, nil},
{`%`, LiteralStringSingle, nil},
},
"strings-double": {
{`%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]`, LiteralStringInterpol, nil},
{`[^\\\'"%\n]+`, LiteralStringDouble, nil},
{`[\'"\\]`, LiteralStringDouble, nil},
{`%`, LiteralStringDouble, nil},
},
"dqs": {
{`"`, LiteralStringDouble, Pop(1)},
{`\\\\|\\"|\\\n`, LiteralStringEscape, nil},
Include("strings-double"),
},
"sqs": {
{`'`, LiteralStringSingle, Pop(1)},
{`\\\\|\\'|\\\n`, LiteralStringEscape, nil},
Include("strings-single"),
},
"tdqs": {
{`"""`, LiteralStringDouble, Pop(1)},
Include("strings-double"),
{`\n`, LiteralStringDouble, nil},
},
"tsqs": {
{`'''`, LiteralStringSingle, Pop(1)},
Include("strings-single"),
{`\n`, LiteralStringSingle, nil},
},
},
))

View File

@ -41,7 +41,7 @@ func (r *registry) Get(name string) chroma.Lexer {
if ok {
return lexer
}
return Default
return Fallback
}
// Match returns all lexers matching filename.

View File

@ -1,79 +0,0 @@
package chroma
import "fmt"
// A Modifier modifies the behaviour of the lexer.
type Modifier interface {
// Preprocess the lexer rules.
//
// "self" and "rule" are the rule name and index this Modifier is associated with.
Preprocess(rules map[string][]CompiledRule, self string, rule int) error
// Mutate the lexer state machine as it is processing.
Mutate(state *LexerState) error
}
// A MutatorFunc is a Modifier that mutates the lexer state machine as it is processing.
type MutatorFunc func(state *LexerState) error
func (m MutatorFunc) Preprocess(rules map[string][]CompiledRule, self string, rule int) error {
return nil
}
func (m MutatorFunc) Mutate(state *LexerState) error {
return m(state)
}
// A PreprocessorFunc is a Modifier that pre-processes the lexer rules.
type PreprocessorFunc func(rules map[string][]CompiledRule, self string, rule int) error
func (p PreprocessorFunc) Preprocess(rules map[string][]CompiledRule, self string, rule int) error {
return p(rules, self, rule)
}
func (p PreprocessorFunc) Mutate(state *LexerState) error {
return nil
}
// Modifiers applies a set of Modifiers in order.
func Modifiers(modifiers ...Modifier) MutatorFunc {
return func(state *LexerState) error {
for _, modifier := range modifiers {
if err := modifier.Mutate(state); err != nil {
return err
}
}
return nil
}
}
// Include the given state.
func Include(state string) Rule {
return Rule{
Modifier: PreprocessorFunc(func(rules map[string][]CompiledRule, self string, rule int) error {
includedRules, ok := rules[state]
if !ok {
return fmt.Errorf("invalid include state %q", state)
}
stateRules := rules[self]
stateRules = append(stateRules[:rule], append(includedRules, stateRules[rule+1:]...)...)
rules[self] = stateRules
return nil
}),
}
}
// Push states onto the stack.
func Push(states ...string) MutatorFunc {
return func(s *LexerState) error {
s.Stack = append(s.Stack, states...)
return nil
}
}
// Pop state from the stack when rule matches.
func Pop(n int) MutatorFunc {
return func(state *LexerState) error {
state.Stack = state.Stack[:len(state.Stack)-n]
return nil
}
}

View File

@ -1,6 +0,0 @@
package chroma
import "testing"
func TestPop(t *testing.T) {
}

90
mutators.go Normal file
View File

@ -0,0 +1,90 @@
package chroma
import (
"fmt"
"strings"
)
// A Mutator modifies the behaviour of the lexer.
type Mutator interface {
// Mutate the lexer state machine as it is processing.
Mutate(state *LexerState) error
}
// A MutatorFunc is a Mutator that mutates the lexer state machine as it is processing.
type MutatorFunc func(state *LexerState) error
func (m MutatorFunc) Mutate(state *LexerState) error { return m(state) }
// Mutators applies a set of Mutators in order.
func Mutators(modifiers ...Mutator) MutatorFunc {
return func(state *LexerState) error {
for _, modifier := range modifiers {
if err := modifier.Mutate(state); err != nil {
return err
}
}
return nil
}
}
// Include the given state.
func Include(state string) Rule {
return Rule{
Mutator: MutatorFunc(func(ls *LexerState) error {
includedRules, ok := ls.Rules[state]
if !ok {
return fmt.Errorf("invalid include state %q", state)
}
stateRules := ls.Rules[ls.State]
stateRules = append(stateRules[:ls.Rule], append(includedRules, stateRules[ls.Rule+1:]...)...)
ls.Rules[ls.State] = stateRules
return nil
}),
}
}
// Combined creates a new anonymous state from the given states, and pushes that state.
func Combined(states ...string) MutatorFunc {
return func(s *LexerState) error {
name := "__combined_" + strings.Join(states, "__")
if _, ok := s.Rules[name]; !ok {
combined := []CompiledRule{}
for _, state := range states {
rules, ok := s.Rules[state]
if !ok {
return fmt.Errorf("invalid combine state %q", state)
}
combined = append(combined, rules...)
}
s.Rules[name] = combined
}
s.Rules[s.State][s.Rule].Mutator = Push(name)
s.Stack = append(s.Stack, name)
return nil
}
}
// Push states onto the stack.
func Push(states ...string) MutatorFunc {
return func(s *LexerState) error {
if len(states) == 0 {
s.Stack = append(s.Stack, s.State)
} else {
s.Stack = append(s.Stack, states...)
}
return nil
}
}
// Pop state from the stack when rule matches.
func Pop(n int) MutatorFunc {
return func(state *LexerState) error {
state.Stack = state.Stack[:len(state.Stack)-n]
return nil
}
}
func Default(mutator Mutator) Rule {
return Rule{Mutator: mutator}
}

55
mutators_test.go Normal file
View File

@ -0,0 +1,55 @@
package chroma
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestInclude(t *testing.T) {
include := Include("other")
actual := CompiledRules{
"root": {
CompiledRule{Rule: include},
},
"other": {
CompiledRule{Rule: Rule{
Pattern: "//.+",
Type: Comment,
}},
CompiledRule{Rule: Rule{
Pattern: `"[^"]*"`,
Type: String,
}},
},
}
state := &LexerState{
State: "root",
Rules: actual,
}
err := include.Mutator.Mutate(state)
require.NoError(t, err)
expected := CompiledRules{
"root": {
CompiledRule{Rule: Rule{
Pattern: "//.+",
Type: Comment,
}},
CompiledRule{Rule: Rule{
Pattern: `"[^"]*"`,
Type: String,
}},
},
"other": {
CompiledRule{Rule: Rule{
Pattern: "//.+",
Type: Comment,
}},
CompiledRule{Rule: Rule{
Pattern: `"[^"]*"`,
Type: String,
}},
},
}
require.Equal(t, expected, actual)
}

View File

@ -4,7 +4,7 @@ package chroma
import "fmt"
const _TokenType_name = "EscapeErrorOtherKeywordKeywordConstantKeywordDeclarationKeywordNamespaceKeywordPseudoKeywordReservedKeywordTypeNameNameAttributeNameBuiltinNameBuiltinPseudoNameClassNameConstantNameDecoratorNameEntityNameExceptionNameFunctionNameFunctionMagicNamePropertyNameLabelNameNamespaceNameOtherNameTagNameVariableNameVariableClassNameVariableGlobalNameVariableInstanceNameVariableMagicLiteralLiteralDateLiteralStringLiteralStringAffixLiteralStringBacktickLiteralStringCharLiteralStringDelimiterLiteralStringDocLiteralStringDoubleLiteralStringEscapeLiteralStringHeredocLiteralStringInterpolLiteralStringOtherLiteralStringRegexLiteralStringSingleLiteralStringSymbolLiteralNumberLiteralNumberBinLiteralNumberFloatLiteralNumberHexLiteralNumberIntegerLiteralNumberIntegerLongLiteralNumberOctOperatorOperatorWordPunctuationCommentCommentHashbangCommentMultilineCommentPreprocCommentPreprocFileCommentSingleCommentSpecialGenericGenericDeletedGenericEmphGenericErrorGenericHeadingGenericInsertedGenericOutputGenericPromptGenericStrongGenericSubheadingGenericTracebackTextTextWhitespace"
const _TokenType_name = "EscapeErrorOtherKeywordKeywordConstantKeywordDeclarationKeywordNamespaceKeywordPseudoKeywordReservedKeywordTypeNameNameAttributeNameBuiltinNameBuiltinPseudoNameClassNameConstantNameDecoratorNameEntityNameExceptionNameFunctionNameFunctionMagicNamePropertyNameLabelNameNamespaceNameOtherNameTagNameVariableNameVariableClassNameVariableGlobalNameVariableInstanceNameVariableMagicLiteralLiteralDateLiteralStringLiteralStringAffixLiteralStringBacktickLiteralStringCharLiteralStringDelimiterLiteralStringDocLiteralStringDoubleLiteralStringEscapeLiteralStringHeredocLiteralStringInterpolLiteralStringOtherLiteralStringRegexLiteralStringSingleLiteralStringSymbolLiteralStringNameLiteralNumberLiteralNumberBinLiteralNumberFloatLiteralNumberHexLiteralNumberIntegerLiteralNumberIntegerLongLiteralNumberOctOperatorOperatorWordPunctuationCommentCommentHashbangCommentMultilineCommentSingleCommentSpecialCommentPreprocCommentPreprocFileGenericGenericDeletedGenericEmphGenericErrorGenericHeadingGenericInsertedGenericOutputGenericPromptGenericStrongGenericSubheadingGenericTracebackGenericUnderlineTextTextWhitespace"
var _TokenType_map = map[TokenType]string{
0: _TokenType_name[0:6],
@ -54,36 +54,38 @@ var _TokenType_map = map[TokenType]string{
3111: _TokenType_name[598:616],
3112: _TokenType_name[616:635],
3113: _TokenType_name[635:654],
3200: _TokenType_name[654:667],
3201: _TokenType_name[667:683],
3202: _TokenType_name[683:701],
3203: _TokenType_name[701:717],
3204: _TokenType_name[717:737],
3205: _TokenType_name[737:761],
3206: _TokenType_name[761:777],
4000: _TokenType_name[777:785],
4001: _TokenType_name[785:797],
5000: _TokenType_name[797:808],
6000: _TokenType_name[808:815],
6001: _TokenType_name[815:830],
6002: _TokenType_name[830:846],
6003: _TokenType_name[846:860],
6004: _TokenType_name[860:878],
6005: _TokenType_name[878:891],
6006: _TokenType_name[891:905],
7000: _TokenType_name[905:912],
7001: _TokenType_name[912:926],
7002: _TokenType_name[926:937],
7003: _TokenType_name[937:949],
7004: _TokenType_name[949:963],
7005: _TokenType_name[963:978],
7006: _TokenType_name[978:991],
7007: _TokenType_name[991:1004],
7008: _TokenType_name[1004:1017],
7009: _TokenType_name[1017:1034],
7010: _TokenType_name[1034:1050],
8000: _TokenType_name[1050:1054],
8001: _TokenType_name[1054:1068],
3114: _TokenType_name[654:671],
3200: _TokenType_name[671:684],
3201: _TokenType_name[684:700],
3202: _TokenType_name[700:718],
3203: _TokenType_name[718:734],
3204: _TokenType_name[734:754],
3205: _TokenType_name[754:778],
3206: _TokenType_name[778:794],
4000: _TokenType_name[794:802],
4001: _TokenType_name[802:814],
5000: _TokenType_name[814:825],
6000: _TokenType_name[825:832],
6001: _TokenType_name[832:847],
6002: _TokenType_name[847:863],
6003: _TokenType_name[863:876],
6004: _TokenType_name[876:890],
6100: _TokenType_name[890:904],
6101: _TokenType_name[904:922],
7000: _TokenType_name[922:929],
7001: _TokenType_name[929:943],
7002: _TokenType_name[943:954],
7003: _TokenType_name[954:966],
7004: _TokenType_name[966:980],
7005: _TokenType_name[980:995],
7006: _TokenType_name[995:1008],
7007: _TokenType_name[1008:1021],
7008: _TokenType_name[1021:1034],
7009: _TokenType_name[1034:1051],
7010: _TokenType_name[1051:1067],
7011: _TokenType_name[1067:1083],
8000: _TokenType_name[1083:1087],
8001: _TokenType_name[1087:1101],
}
func (i TokenType) String() string {

View File

@ -74,6 +74,7 @@ const (
LiteralStringRegex
LiteralStringSingle
LiteralStringSymbol
LiteralStringName
)
// Literals.
@ -103,12 +104,16 @@ const (
Comment TokenType = 6000 + iota
CommentHashbang
CommentMultiline
CommentPreproc
CommentPreprocFile
CommentSingle
CommentSpecial
)
// Preprocessor "comments".
const (
CommentPreproc TokenType = 6100 + iota
CommentPreprocFile
)
// Generic tokens.
const (
Generic TokenType = 7000 + iota
@ -122,6 +127,7 @@ const (
GenericStrong
GenericSubheading
GenericTraceback
GenericUnderline
)
// Text.
@ -176,6 +182,6 @@ func (t TokenType) InSubCategory(other TokenType) bool {
return t/100 == other/100
}
func (t TokenType) Emit(groups []string, out func(Token)) {
func (t TokenType) Emit(groups []string, lexer Lexer, out func(Token)) {
out(Token{Type: t, Value: groups[0]})
}