feat: a custom grammar for lexers

While I do prefer XML over YAML, it's also super verbose. ```xml <lexer> <config> <name>INI</name> <alias>ini</alias> <alias>cfg</alias> <alias>dosini</alias> <filename>*.ini</filename> <filename>*.cfg</filename> <filename>*.inf</filename> <filename>*.service</filename> <filename>*.socket</filename> <filename>.gitconfig</filename> <filename>.editorconfig</filename> <filename>pylintrc</filename> <filename>.pylintrc</filename> <mime_type>text/x-ini</mime_type> <mime_type>text/inf</mime_type> <priority>0.1</priority>  </config> <rules> <state name="root"> <rule pattern="\s+"> <token type="Text"/> </rule> <rule pattern="[;#].*"> <token type="CommentSingle"/> </rule> <rule pattern="\[.*?\]$"> <token type="Keyword"/> </rule> <rule pattern="(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)"> <bygroups> <token type="NameAttribute"/> <token type="Text"/> <token type="Operator"/> <token type="Text"/> <token type="LiteralString"/> </bygroups> </rule> <rule pattern="(.+?)$"> <token type="NameAttribute"/> </rule> </state> </rules> </lexer> ``` Pros: - Very succinct Cons: - No need to escape regexes - Bespoke syntax that people will have to learn - No syntax highlighting in editors, no validation beyond parser ``` config { name "INI" aliases "ini", "cfg" filenames "*.ini", "*.cfg", "*.inf", "*.service", "*.socket", ".gitconfig", ".editorconfig", "pylintrc", ".pylintrc" mime-types "text/x-ini", "text/inf" priority 0.1 } state root { /\s+/ text /[;#].*/ commentsingle /\[.*?\]$/ keyword /(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)/ by groups nameattribute, text, operator, text, literalstring /(.+?)$/ nameattribute } `) ``` Pros: - More succinct than XML - Can define a schema and have editors use it to validate. Cons: - Fucking YAML - Indentation is awful - Less succinct than bespoke syntax - Will need some way to discriminate between "emitters" and "mutators" when parsing, eg. `type: Keyword` vs. `type: {bygroups: [...]}` ```yaml config: name: "INI" aliases: ["ini", "cfg"] filenames: ["*.ini", "*.cfg", "*.inf", "*.service", "*.socket", ".gitconfig", ".editorconfig", "pylintrc", ".pylintrc"] mime-types: ["text/x-ini", "text/inf"] priority: 0.1 state: root: rule: - pattern: "\\s+" type: Text - pattern: "[;#].*" type: CommentSingle - pattern: "\\[.*?\\]" type: Keyword - pattern: "(.*?)([ \\t]*)(=)([ \\t]*)(.*(?:\\n[ \\t].+)*)" type: bygroups: [NameAttribute, Text, Operator, Text, LiteralString] - pattern: "(.+?)$" type: NameAttribute ```
2025-10-30 23:57:49 +02:00 · 2025-03-23 09:08:14 +11:00
parent e0c774731c
commit ee1172e04f
11 changed files with 184 additions and 61 deletions
--- a/cmd/chroma/go.mod
+++ b/cmd/chroma/go.mod
@@ -14,6 +14,7 @@ require (
 )

 require (
-	github.com/dlclark/regexp2 v1.11.4 // indirect
+	github.com/alecthomas/participle/v2 v2.1.3 // indirect
+	github.com/dlclark/regexp2 v1.11.5 // indirect
 	golang.org/x/sys v0.29.0 // indirect
 )
--- a/cmd/chroma/go.sum
+++ b/cmd/chroma/go.sum
@@ -1,41 +1,19 @@
-github.com/alecthomas/assert/v2 v2.2.1 h1:XivOgYcduV98QCahG8T5XTezV5bylXe+lBxLG2K2ink=
-github.com/alecthomas/kong v0.8.0 h1:ryDCzutfIqJPnNn0omnrgHLbAggDQM2VWHikE1xqK7s=
-github.com/alecthomas/kong v0.8.0/go.mod h1:n1iCIO2xS46oE8ZfYCNDqdR0b0wZNrXAIAqro/2132U=
-github.com/alecthomas/kong v0.8.1 h1:acZdn3m4lLRobeh3Zi2S2EpnXTd1mOL6U7xVml+vfkY=
-github.com/alecthomas/kong v0.8.1/go.mod h1:n1iCIO2xS46oE8ZfYCNDqdR0b0wZNrXAIAqro/2132U=
-github.com/alecthomas/kong v0.9.0 h1:G5diXxc85KvoV2f0ZRVuMsi45IrBgx9zDNGNj165aPA=
-github.com/alecthomas/kong v0.9.0/go.mod h1:Y47y5gKfHp1hDc7CH7OeXgLIpp+Q2m1Ni0L5s3bI8Os=
-github.com/alecthomas/kong v1.2.1 h1:E8jH4Tsgv6wCRX2nGrdPyHDUCSG83WH2qE4XLACD33Q=
-github.com/alecthomas/kong v1.2.1/go.mod h1:rKTSFhbdp3Ryefn8x5MOEprnRFQ7nlmMC01GKhehhBM=
-github.com/alecthomas/kong v1.4.0 h1:UL7tzGMnnY0YRMMvJyITIRX1EpO6RbBRZDNcCevy3HA=
-github.com/alecthomas/kong v1.4.0/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruPWXyMPQrU=
-github.com/alecthomas/kong v1.5.1 h1:9quB93P2aNGXf5C1kWNei85vjBgITNJQA4dSwJQGCOY=
-github.com/alecthomas/kong v1.5.1/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruPWXyMPQrU=
-github.com/alecthomas/kong v1.6.1 h1:/7bVimARU3uxPD0hbryPE8qWrS3Oz3kPQoxA/H2NKG8=
-github.com/alecthomas/kong v1.6.1/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruPWXyMPQrU=
-github.com/alecthomas/kong v1.7.0 h1:MnT8+5JxFDCvISeI6vgd/mFbAJwueJ/pqQNzZMsiqZE=
-github.com/alecthomas/kong v1.7.0/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruPWXyMPQrU=
+github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0=
+github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
 github.com/alecthomas/kong v1.8.1 h1:6aamvWBE/REnR/BCq10EcozmcpUPc5aGI1lPAWdB0EE=
 github.com/alecthomas/kong v1.8.1/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruPWXyMPQrU=
-github.com/alecthomas/repr v0.2.0 h1:HAzS41CIzNW5syS8Mf9UwXhNH1J9aix/BvDRf1Ml2Yk=
-github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
-github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
-github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI=
-github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
-github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
-github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/alecthomas/participle/v2 v2.1.3 h1:Vjvj97FD35IYkhLKCoiHEigpDfNZZ5de3wcDSm5vaG8=
+github.com/alecthomas/participle/v2 v2.1.3/go.mod h1:8tqVbpTX20Ru4NfYQgZf4mP18eXPTBViyMWiArNEgGI=
+github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
+github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
+github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ=
+github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
-github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
-github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
+github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
 github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
 github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
-github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
-github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
-github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
-golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
 golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
--- a/emitters.go
+++ b/emitters.go
@@ -27,7 +27,7 @@ func (e EmitterFunc) Emit(groups []string, state *LexerState) Iterator {
 type Emitters []Emitter

 type byGroupsEmitter struct {
-	Emitters
+	Emitters `parser:"'by' 'groups' @@ (',' @@)*"`
 }

 // ByGroups emits a token for each matching group in the rule's regex.
@@ -171,7 +171,7 @@ func UsingLexer(lexer Lexer) Emitter {
 }

 type usingEmitter struct {
-	Lexer string `xml:"lexer,attr"`
+	Lexer string `parser:"'using' @String" xml:"lexer,attr"`
 }

 func (u *usingEmitter) EmitterKind() string { return "using" }
@@ -199,7 +199,7 @@ func Using(lexer string) Emitter {
 }

 type usingSelfEmitter struct {
-	State string `xml:"state,attr"`
+	State string `parser:"'using' 'self' @String" xml:"state,attr"`
 }

 func (u *usingSelfEmitter) EmitterKind() string { return "usingself" }
--- a/go.mod
+++ b/go.mod
@@ -8,4 +8,7 @@ require (
 	github.com/dlclark/regexp2 v1.11.5
 )

-require github.com/hexops/gotextdiff v1.0.3 // indirect
+require (
+	github.com/alecthomas/participle/v2 v2.1.3
+	github.com/hexops/gotextdiff v1.0.3 // indirect
+)
--- a/go.sum
+++ b/go.sum
@@ -1,9 +1,9 @@
 github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0=
 github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
+github.com/alecthomas/participle/v2 v2.1.3 h1:Vjvj97FD35IYkhLKCoiHEigpDfNZZ5de3wcDSm5vaG8=
+github.com/alecthomas/participle/v2 v2.1.3/go.mod h1:8tqVbpTX20Ru4NfYQgZf4mP18eXPTBViyMWiArNEgGI=
 github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
 github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
-github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
-github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
 github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ=
 github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
--- a/lexer.go
+++ b/lexer.go
@@ -15,30 +15,30 @@ var (
 // Config for a lexer.
 type Config struct {
 	// Name of the lexer.
-	Name string `xml:"name,omitempty"`
+	Name string `parser:"('name' @String" xml:"name,omitempty"`

 	// Shortcuts for the lexer
-	Aliases []string `xml:"alias,omitempty"`
+	Aliases []string `parser:"| 'aliases' @String (',' @String)*" xml:"alias,omitempty"`

 	// File name globs
-	Filenames []string `xml:"filename,omitempty"`
+	Filenames []string `parser:"| 'filenames' @String (',' @String)*" xml:"filename,omitempty"`

 	// Secondary file name globs
-	AliasFilenames []string `xml:"alias_filename,omitempty"`
+	AliasFilenames []string `parser:"| 'alias-filenames' @String (',' @String)*" xml:"alias_filename,omitempty"`

 	// MIME types
-	MimeTypes []string `xml:"mime_type,omitempty"`
+	MimeTypes []string `parser:"| 'mime-types' @String (',' @String)*" xml:"mime_type,omitempty"`

 	// Regex matching is case-insensitive.
-	CaseInsensitive bool `xml:"case_insensitive,omitempty"`
+	CaseInsensitive bool `parser:"| @'case_insensitive'" xml:"case_insensitive,omitempty"`

 	// Regex matches all characters.
-	DotAll bool `xml:"dot_all,omitempty"`
+	DotAll bool `parser:"| @'dot-all'" xml:"dot_all,omitempty"`

 	// Regex does not match across lines ($ matches EOL).
 	//
 	// Defaults to multiline.
-	NotMultiline bool `xml:"not_multiline,omitempty"`
+	NotMultiline bool `parser:"| @'not-multiline'" xml:"not_multiline,omitempty"`

 	// Don't strip leading and trailing newlines from the input.
 	// DontStripNL bool
@@ -48,7 +48,7 @@ type Config struct {

 	// Make sure that the input ends with a newline. This
 	// is required for some lexers that consume input linewise.
-	EnsureNL bool `xml:"ensure_nl,omitempty"`
+	EnsureNL bool `parser:"| @'ensure-nl'" xml:"ensure_nl,omitempty"`

 	// If given and greater than 0, expand tabs in the input.
 	// TabSize int
@@ -56,27 +56,31 @@ type Config struct {
 	// Priority of lexer.
 	//
 	// If this is 0 it will be treated as a default of 1.
-	Priority float32 `xml:"priority,omitempty"`
+	Priority float32 `parser:"| 'priority' @Float" xml:"priority,omitempty"`

 	// Analyse is a list of regexes to match against the input.
 	//
 	// If a match is found, the score is returned if single attribute is set to true,
 	// otherwise the sum of all the score of matching patterns will be
 	// used as the final score.
-	Analyse *AnalyseConfig `xml:"analyse,omitempty"`
+	Analyse *AnalyseConfig `parser:"| @@ )*" xml:"analyse,omitempty"`
 }

 // AnalyseConfig defines the list of regexes analysers.
+//
+// Syntax:
+//
+//	analyse /foo/=1.0 /bar/=2.0 first
 type AnalyseConfig struct {
-	Regexes []RegexConfig `xml:"regex,omitempty"`
+	Regexes []RegexConfig `parser:"'analyse' @@*" xml:"regex,omitempty"`
 	// If true, the first matching score is returned.
-	First bool `xml:"first,attr"`
+	First bool `parser:"@'first'" xml:"first,attr"`
 }

 // RegexConfig defines a single regex pattern and its score in case of match.
 type RegexConfig struct {
-	Pattern string  `xml:"pattern,attr"`
-	Score   float32 `xml:"score,attr"`
+	Pattern string  `parser:"(@String | @Regex)" xml:"pattern,attr"`
+	Score   float32 `parser:"@Float"             xml:"score,attr"`
 }

 // Token output to formatter.
--- a/mutators.go
+++ b/mutators.go
@@ -33,7 +33,7 @@ type MutatorFunc func(state *LexerState) error
 func (m MutatorFunc) Mutate(state *LexerState) error { return m(state) } // nolint

 type multiMutator struct {
-	Mutators []Mutator `xml:"mutator"`
+	Mutators []Mutator `parser:"(@@ (',' @@)*)?" xml:"mutator"`
 }

 func (m *multiMutator) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
@@ -86,7 +86,7 @@ func Mutators(modifiers ...Mutator) Mutator {
 }

 type includeMutator struct {
-	State string `xml:"state,attr"`
+	State string `parser:"'include' @String" xml:"state,attr"`
 }

 // Include the given state.
@@ -110,7 +110,7 @@ func (i *includeMutator) MutateLexer(rules CompiledRules, state string, rule int
 }

 type combinedMutator struct {
-	States []string `xml:"state,attr"`
+	States []string `parser:"'combined' @Ident (',' @Ident)*" xml:"state,attr"`
 }

 func (c *combinedMutator) MutatorKind() string { return "combined" }
@@ -142,7 +142,7 @@ func (c *combinedMutator) MutateLexer(rules CompiledRules, state string, rule in
 }

 type pushMutator struct {
-	States []string `xml:"state,attr"`
+	States []string `parser:"'push' (@Ident (',' @Ident)*)?" xml:"state,attr"`
 }

 func (p *pushMutator) MutatorKind() string { return "push" }
@@ -168,7 +168,7 @@ func Push(states ...string) Mutator {
 }

 type popMutator struct {
-	Depth int `xml:"depth,attr"`
+	Depth int `parser:"'pop' @Int?" xml:"depth,attr"`
 }

 func (p *popMutator) MutatorKind() string { return "pop" }
--- a/parser.go
+++ b/parser.go
@@ -0,0 +1,50 @@
+package chroma
+
+import (
+	"github.com/alecthomas/participle/v2"
+	"github.com/alecthomas/participle/v2/lexer"
+)
+
+type AST struct {
+	Config *Config    `parser:"( 'config' '{' @@ '}'"`
+	States []stateAST `parser:"@@* )*"`
+}
+
+type stateAST struct {
+	Name  string `parser:"'state' @Ident '{'"`
+	Rules []Rule `parser:"@@* '}'"`
+}
+
+type tokenTypeAST struct {
+	Type TokenType `parser:"@Ident"`
+}
+
+func (t tokenTypeAST) Emit(groups []string, state *LexerState) Iterator {
+	return t.Type.Emit(groups, state)
+}
+
+var (
+	lex = lexer.MustSimple([]lexer.SimpleRule{
+		{"Punct", `[][={}(),:;]`},
+		{"Whitespace", `\s+`},
+		{"Comment", `//.*`},
+		{"Boolean", `\b(true|false)\b`},
+		{"Ident", `[a-zA-Z-][a-zA-Z0-9-]*`},
+		{"String", `"(\\.|[^"])*"`},
+		{"Float", `[-+]?\d*\.\d+([eE][-+]?\d+)?`},
+		{"Int", `[-+]?\d+`},
+		{"Regex", `/(\\.|[^/])+/`},
+	})
+	parser = participle.MustBuild[AST](
+		participle.Lexer(lex),
+		participle.Unquote("String"),
+		participle.Map(func(token lexer.Token) (lexer.Token, error) {
+			token.Value = token.Value[1 : len(token.Value)-1]
+			return token, nil
+		}, "Regex"),
+		participle.Elide("Whitespace", "Comment"),
+		participle.UseLookahead(1),
+		participle.Union[Emitter](&byGroupsEmitter{}, &usingEmitter{}, &usingSelfEmitter{}, &tokenTypeAST{}),
+		participle.Union[Mutator](&includeMutator{}, &combinedMutator{}, &pushMutator{}, &popMutator{}),
+	)
+)
--- a/parser_test.go
+++ b/parser_test.go
@@ -0,0 +1,72 @@
+package chroma
+
+import (
+	"testing"
+
+	assert "github.com/alecthomas/assert/v2"
+)
+
+func TestParser(t *testing.T) {
+	ast, err := parser.ParseString("", `
+config {
+	name "INI"
+	aliases "ini", "cfg"
+	filenames "*.ini", "*.cfg", "*.inf", "*.service", "*.socket", ".gitconfig",
+			  ".editorconfig", "pylintrc", ".pylintrc"
+	mime-types "text/x-ini", "text/inf"
+	priority 0.1
+}
+
+state root {
+	/\s+/ text
+	/[;#].*/ commentsingle
+	/\[.*?\]$/ keyword
+	/(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)/ by groups
+		nameattribute, text, operator, text, literalstring
+	/(.+?)$/ nameattribute
+}
+`)
+	assert.NoError(t, err)
+	assert.Equal(t, &AST{
+		Config: &Config{
+			Name: "INI",
+			Aliases: []string{
+				"ini",
+				"cfg",
+			},
+			Filenames: []string{
+				"*.ini",
+				"*.cfg",
+				"*.inf",
+				"*.service",
+				"*.socket",
+				".gitconfig",
+				".editorconfig",
+				"pylintrc",
+				".pylintrc",
+			},
+			MimeTypes: []string{
+				"text/x-ini",
+				"text/inf",
+			},
+			Priority: 0.1,
+		},
+		States: []stateAST{
+			{Name: "root",
+				Rules: []Rule{
+					{Pattern: `\s+`, Type: &tokenTypeAST{Text}},
+					{Pattern: `[;#].*`, Type: &tokenTypeAST{CommentSingle}},
+					{Pattern: `\[.*?\]$`, Type: &tokenTypeAST{Keyword}},
+					{Pattern: `(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)`, Type: &byGroupsEmitter{Emitters{
+						&tokenTypeAST{NameAttribute},
+						&tokenTypeAST{Text},
+						&tokenTypeAST{Operator},
+						&tokenTypeAST{Text},
+						&tokenTypeAST{LiteralString},
+					}}},
+					{Pattern: `(.+?)$`, Type: &tokenTypeAST{NameAttribute}},
+				},
+			},
+		},
+	}, ast)
+}
--- a/regexp.go
+++ b/regexp.go
@@ -16,9 +16,9 @@ import (

 // A Rule is the fundamental matching unit of the Regex lexer state machine.
 type Rule struct {
-	Pattern string
-	Type    Emitter
-	Mutator Mutator
+	Pattern string  `parser:"@Regex"`
+	Type    Emitter `parser:"@@"`
+	Mutator Mutator `parser:"@@?"`
 }

 // Words creates a regex that matches any of the given literal words.
--- a/types.go
+++ b/types.go
@@ -1,5 +1,10 @@
 package chroma

+import (
+	"github.com/alecthomas/participle/v2"
+	"github.com/alecthomas/participle/v2/lexer"
+)
+
 //go:generate enumer -text -type TokenType

 // TokenType is the type of token to highlight.
@@ -7,6 +12,16 @@ package chroma
 // It is also an Emitter, emitting a single token of itself
 type TokenType int

+// Parse implements participle.Parseable.
+func (t *TokenType) Parse(lex *lexer.PeekingLexer) error {
+	tok := lex.Peek()
+	if err := t.UnmarshalText([]byte(tok.Value)); err != nil {
+		return participle.NextMatch
+	}
+	lex.Next()
+	return nil
+}
+
 // Set of TokenTypes.
 //
 // Categories of types are grouped in ranges of 1000, while sub-categories are in ranges of 100. For