mirror of
				https://github.com/alecthomas/chroma.git
				synced 2025-10-30 23:57:49 +02:00 
			
		
		
		
	feat: a custom grammar for lexers
While I do prefer XML over YAML, it's also super verbose.
```xml
<lexer>
  <config>
    <name>INI</name>
    <alias>ini</alias>
    <alias>cfg</alias>
    <alias>dosini</alias>
    <filename>*.ini</filename>
    <filename>*.cfg</filename>
    <filename>*.inf</filename>
    <filename>*.service</filename>
    <filename>*.socket</filename>
    <filename>.gitconfig</filename>
    <filename>.editorconfig</filename>
    <filename>pylintrc</filename>
    <filename>.pylintrc</filename>
    <mime_type>text/x-ini</mime_type>
    <mime_type>text/inf</mime_type>
    <priority>0.1</priority> <!-- higher priority than Inform 6 -->
  </config>
  <rules>
    <state name="root">
      <rule pattern="\s+">
        <token type="Text"/>
      </rule>
      <rule pattern="[;#].*">
        <token type="CommentSingle"/>
      </rule>
      <rule pattern="\[.*?\]$">
        <token type="Keyword"/>
      </rule>
      <rule pattern="(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)">
        <bygroups>
          <token type="NameAttribute"/>
          <token type="Text"/>
          <token type="Operator"/>
          <token type="Text"/>
          <token type="LiteralString"/>
        </bygroups>
      </rule>
      <rule pattern="(.+?)$">
        <token type="NameAttribute"/>
      </rule>
    </state>
  </rules>
</lexer>
```
Pros:
- Very succinct
Cons:
- No need to escape regexes
- Bespoke syntax that people will have to learn
- No syntax highlighting in editors, no validation beyond parser
```
config {
  name "INI"
  aliases "ini", "cfg"
  filenames "*.ini", "*.cfg", "*.inf", "*.service", "*.socket", ".gitconfig",
            ".editorconfig", "pylintrc", ".pylintrc"
  mime-types "text/x-ini", "text/inf"
  priority 0.1
}
state root {
  /\s+/ text
  /[;#].*/ commentsingle
  /\[.*?\]$/ keyword
  /(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)/ by groups
    nameattribute, text, operator, text, literalstring
  /(.+?)$/ nameattribute
}
`)
```
Pros:
- More succinct than XML
- Can define a schema and have editors use it to validate.
Cons:
- Fucking YAML
  - Indentation is awful
- Less succinct than bespoke syntax
- Will need some way to discriminate between "emitters" and "mutators" when parsing, eg. `type: Keyword` vs. `type: {bygroups: [...]}`
```yaml
config:
  name: "INI"
  aliases: ["ini", "cfg"]
  filenames: ["*.ini", "*.cfg", "*.inf", "*.service", "*.socket", ".gitconfig",
              ".editorconfig", "pylintrc", ".pylintrc"]
  mime-types: ["text/x-ini", "text/inf"]
  priority: 0.1
state:
  root:
    rule:
      - pattern: "\\s+"
        type: Text
      - pattern: "[;#].*"
        type: CommentSingle
      - pattern: "\\[.*?\\]"
        type: Keyword
      - pattern: "(.*?)([ \\t]*)(=)([ \\t]*)(.*(?:\\n[ \\t].+)*)"
        type:
          bygroups: [NameAttribute, Text, Operator, Text, LiteralString]
      - pattern: "(.+?)$"
        type: NameAttribute
```
			
			
This commit is contained in:
		| @@ -14,6 +14,7 @@ require ( | ||||
| ) | ||||
|  | ||||
| require ( | ||||
| 	github.com/dlclark/regexp2 v1.11.4 // indirect | ||||
| 	github.com/alecthomas/participle/v2 v2.1.3 // indirect | ||||
| 	github.com/dlclark/regexp2 v1.11.5 // indirect | ||||
| 	golang.org/x/sys v0.29.0 // indirect | ||||
| ) | ||||
|   | ||||
| @@ -1,41 +1,19 @@ | ||||
| github.com/alecthomas/assert/v2 v2.2.1 h1:XivOgYcduV98QCahG8T5XTezV5bylXe+lBxLG2K2ink= | ||||
| github.com/alecthomas/kong v0.8.0 h1:ryDCzutfIqJPnNn0omnrgHLbAggDQM2VWHikE1xqK7s= | ||||
| github.com/alecthomas/kong v0.8.0/go.mod h1:n1iCIO2xS46oE8ZfYCNDqdR0b0wZNrXAIAqro/2132U= | ||||
| github.com/alecthomas/kong v0.8.1 h1:acZdn3m4lLRobeh3Zi2S2EpnXTd1mOL6U7xVml+vfkY= | ||||
| github.com/alecthomas/kong v0.8.1/go.mod h1:n1iCIO2xS46oE8ZfYCNDqdR0b0wZNrXAIAqro/2132U= | ||||
| github.com/alecthomas/kong v0.9.0 h1:G5diXxc85KvoV2f0ZRVuMsi45IrBgx9zDNGNj165aPA= | ||||
| github.com/alecthomas/kong v0.9.0/go.mod h1:Y47y5gKfHp1hDc7CH7OeXgLIpp+Q2m1Ni0L5s3bI8Os= | ||||
| github.com/alecthomas/kong v1.2.1 h1:E8jH4Tsgv6wCRX2nGrdPyHDUCSG83WH2qE4XLACD33Q= | ||||
| github.com/alecthomas/kong v1.2.1/go.mod h1:rKTSFhbdp3Ryefn8x5MOEprnRFQ7nlmMC01GKhehhBM= | ||||
| github.com/alecthomas/kong v1.4.0 h1:UL7tzGMnnY0YRMMvJyITIRX1EpO6RbBRZDNcCevy3HA= | ||||
| github.com/alecthomas/kong v1.4.0/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruPWXyMPQrU= | ||||
| github.com/alecthomas/kong v1.5.1 h1:9quB93P2aNGXf5C1kWNei85vjBgITNJQA4dSwJQGCOY= | ||||
| github.com/alecthomas/kong v1.5.1/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruPWXyMPQrU= | ||||
| github.com/alecthomas/kong v1.6.1 h1:/7bVimARU3uxPD0hbryPE8qWrS3Oz3kPQoxA/H2NKG8= | ||||
| github.com/alecthomas/kong v1.6.1/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruPWXyMPQrU= | ||||
| github.com/alecthomas/kong v1.7.0 h1:MnT8+5JxFDCvISeI6vgd/mFbAJwueJ/pqQNzZMsiqZE= | ||||
| github.com/alecthomas/kong v1.7.0/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruPWXyMPQrU= | ||||
| github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0= | ||||
| github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= | ||||
| github.com/alecthomas/kong v1.8.1 h1:6aamvWBE/REnR/BCq10EcozmcpUPc5aGI1lPAWdB0EE= | ||||
| github.com/alecthomas/kong v1.8.1/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruPWXyMPQrU= | ||||
| github.com/alecthomas/repr v0.2.0 h1:HAzS41CIzNW5syS8Mf9UwXhNH1J9aix/BvDRf1Ml2Yk= | ||||
| github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0= | ||||
| github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= | ||||
| github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI= | ||||
| github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= | ||||
| github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo= | ||||
| github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= | ||||
| github.com/alecthomas/participle/v2 v2.1.3 h1:Vjvj97FD35IYkhLKCoiHEigpDfNZZ5de3wcDSm5vaG8= | ||||
| github.com/alecthomas/participle/v2 v2.1.3/go.mod h1:8tqVbpTX20Ru4NfYQgZf4mP18eXPTBViyMWiArNEgGI= | ||||
| github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc= | ||||
| github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= | ||||
| github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ= | ||||
| github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= | ||||
| github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= | ||||
| github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= | ||||
| github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= | ||||
| github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= | ||||
| github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= | ||||
| github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= | ||||
| github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= | ||||
| github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= | ||||
| github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= | ||||
| github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= | ||||
| github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= | ||||
| golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||||
| golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= | ||||
| golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||||
| golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= | ||||
| golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= | ||||
|   | ||||
| @@ -27,7 +27,7 @@ func (e EmitterFunc) Emit(groups []string, state *LexerState) Iterator { | ||||
| type Emitters []Emitter | ||||
|  | ||||
| type byGroupsEmitter struct { | ||||
| 	Emitters | ||||
| 	Emitters `parser:"'by' 'groups' @@ (',' @@)*"` | ||||
| } | ||||
|  | ||||
| // ByGroups emits a token for each matching group in the rule's regex. | ||||
| @@ -171,7 +171,7 @@ func UsingLexer(lexer Lexer) Emitter { | ||||
| } | ||||
|  | ||||
| type usingEmitter struct { | ||||
| 	Lexer string `xml:"lexer,attr"` | ||||
| 	Lexer string `parser:"'using' @String" xml:"lexer,attr"` | ||||
| } | ||||
|  | ||||
| func (u *usingEmitter) EmitterKind() string { return "using" } | ||||
| @@ -199,7 +199,7 @@ func Using(lexer string) Emitter { | ||||
| } | ||||
|  | ||||
| type usingSelfEmitter struct { | ||||
| 	State string `xml:"state,attr"` | ||||
| 	State string `parser:"'using' 'self' @String" xml:"state,attr"` | ||||
| } | ||||
|  | ||||
| func (u *usingSelfEmitter) EmitterKind() string { return "usingself" } | ||||
|   | ||||
							
								
								
									
										5
									
								
								go.mod
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								go.mod
									
									
									
									
									
								
							| @@ -8,4 +8,7 @@ require ( | ||||
| 	github.com/dlclark/regexp2 v1.11.5 | ||||
| ) | ||||
|  | ||||
| require github.com/hexops/gotextdiff v1.0.3 // indirect | ||||
| require ( | ||||
| 	github.com/alecthomas/participle/v2 v2.1.3 | ||||
| 	github.com/hexops/gotextdiff v1.0.3 // indirect | ||||
| ) | ||||
|   | ||||
							
								
								
									
										4
									
								
								go.sum
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								go.sum
									
									
									
									
									
								
							| @@ -1,9 +1,9 @@ | ||||
| github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0= | ||||
| github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= | ||||
| github.com/alecthomas/participle/v2 v2.1.3 h1:Vjvj97FD35IYkhLKCoiHEigpDfNZZ5de3wcDSm5vaG8= | ||||
| github.com/alecthomas/participle/v2 v2.1.3/go.mod h1:8tqVbpTX20Ru4NfYQgZf4mP18eXPTBViyMWiArNEgGI= | ||||
| github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc= | ||||
| github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= | ||||
| github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo= | ||||
| github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= | ||||
| github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ= | ||||
| github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= | ||||
| github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= | ||||
|   | ||||
							
								
								
									
										34
									
								
								lexer.go
									
									
									
									
									
								
							
							
						
						
									
										34
									
								
								lexer.go
									
									
									
									
									
								
							| @@ -15,30 +15,30 @@ var ( | ||||
| // Config for a lexer. | ||||
| type Config struct { | ||||
| 	// Name of the lexer. | ||||
| 	Name string `xml:"name,omitempty"` | ||||
| 	Name string `parser:"('name' @String" xml:"name,omitempty"` | ||||
|  | ||||
| 	// Shortcuts for the lexer | ||||
| 	Aliases []string `xml:"alias,omitempty"` | ||||
| 	Aliases []string `parser:"| 'aliases' @String (',' @String)*" xml:"alias,omitempty"` | ||||
|  | ||||
| 	// File name globs | ||||
| 	Filenames []string `xml:"filename,omitempty"` | ||||
| 	Filenames []string `parser:"| 'filenames' @String (',' @String)*" xml:"filename,omitempty"` | ||||
|  | ||||
| 	// Secondary file name globs | ||||
| 	AliasFilenames []string `xml:"alias_filename,omitempty"` | ||||
| 	AliasFilenames []string `parser:"| 'alias-filenames' @String (',' @String)*" xml:"alias_filename,omitempty"` | ||||
|  | ||||
| 	// MIME types | ||||
| 	MimeTypes []string `xml:"mime_type,omitempty"` | ||||
| 	MimeTypes []string `parser:"| 'mime-types' @String (',' @String)*" xml:"mime_type,omitempty"` | ||||
|  | ||||
| 	// Regex matching is case-insensitive. | ||||
| 	CaseInsensitive bool `xml:"case_insensitive,omitempty"` | ||||
| 	CaseInsensitive bool `parser:"| @'case_insensitive'" xml:"case_insensitive,omitempty"` | ||||
|  | ||||
| 	// Regex matches all characters. | ||||
| 	DotAll bool `xml:"dot_all,omitempty"` | ||||
| 	DotAll bool `parser:"| @'dot-all'" xml:"dot_all,omitempty"` | ||||
|  | ||||
| 	// Regex does not match across lines ($ matches EOL). | ||||
| 	// | ||||
| 	// Defaults to multiline. | ||||
| 	NotMultiline bool `xml:"not_multiline,omitempty"` | ||||
| 	NotMultiline bool `parser:"| @'not-multiline'" xml:"not_multiline,omitempty"` | ||||
|  | ||||
| 	// Don't strip leading and trailing newlines from the input. | ||||
| 	// DontStripNL bool | ||||
| @@ -48,7 +48,7 @@ type Config struct { | ||||
|  | ||||
| 	// Make sure that the input ends with a newline. This | ||||
| 	// is required for some lexers that consume input linewise. | ||||
| 	EnsureNL bool `xml:"ensure_nl,omitempty"` | ||||
| 	EnsureNL bool `parser:"| @'ensure-nl'" xml:"ensure_nl,omitempty"` | ||||
|  | ||||
| 	// If given and greater than 0, expand tabs in the input. | ||||
| 	// TabSize int | ||||
| @@ -56,27 +56,31 @@ type Config struct { | ||||
| 	// Priority of lexer. | ||||
| 	// | ||||
| 	// If this is 0 it will be treated as a default of 1. | ||||
| 	Priority float32 `xml:"priority,omitempty"` | ||||
| 	Priority float32 `parser:"| 'priority' @Float" xml:"priority,omitempty"` | ||||
|  | ||||
| 	// Analyse is a list of regexes to match against the input. | ||||
| 	// | ||||
| 	// If a match is found, the score is returned if single attribute is set to true, | ||||
| 	// otherwise the sum of all the score of matching patterns will be | ||||
| 	// used as the final score. | ||||
| 	Analyse *AnalyseConfig `xml:"analyse,omitempty"` | ||||
| 	Analyse *AnalyseConfig `parser:"| @@ )*" xml:"analyse,omitempty"` | ||||
| } | ||||
|  | ||||
| // AnalyseConfig defines the list of regexes analysers. | ||||
| // | ||||
| // Syntax: | ||||
| // | ||||
| //	analyse /foo/=1.0 /bar/=2.0 first | ||||
| type AnalyseConfig struct { | ||||
| 	Regexes []RegexConfig `xml:"regex,omitempty"` | ||||
| 	Regexes []RegexConfig `parser:"'analyse' @@*" xml:"regex,omitempty"` | ||||
| 	// If true, the first matching score is returned. | ||||
| 	First bool `xml:"first,attr"` | ||||
| 	First bool `parser:"@'first'" xml:"first,attr"` | ||||
| } | ||||
|  | ||||
| // RegexConfig defines a single regex pattern and its score in case of match. | ||||
| type RegexConfig struct { | ||||
| 	Pattern string  `xml:"pattern,attr"` | ||||
| 	Score   float32 `xml:"score,attr"` | ||||
| 	Pattern string  `parser:"(@String | @Regex)" xml:"pattern,attr"` | ||||
| 	Score   float32 `parser:"@Float"             xml:"score,attr"` | ||||
| } | ||||
|  | ||||
| // Token output to formatter. | ||||
|   | ||||
							
								
								
									
										10
									
								
								mutators.go
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								mutators.go
									
									
									
									
									
								
							| @@ -33,7 +33,7 @@ type MutatorFunc func(state *LexerState) error | ||||
| func (m MutatorFunc) Mutate(state *LexerState) error { return m(state) } // nolint | ||||
|  | ||||
| type multiMutator struct { | ||||
| 	Mutators []Mutator `xml:"mutator"` | ||||
| 	Mutators []Mutator `parser:"(@@ (',' @@)*)?" xml:"mutator"` | ||||
| } | ||||
|  | ||||
| func (m *multiMutator) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | ||||
| @@ -86,7 +86,7 @@ func Mutators(modifiers ...Mutator) Mutator { | ||||
| } | ||||
|  | ||||
| type includeMutator struct { | ||||
| 	State string `xml:"state,attr"` | ||||
| 	State string `parser:"'include' @String" xml:"state,attr"` | ||||
| } | ||||
|  | ||||
| // Include the given state. | ||||
| @@ -110,7 +110,7 @@ func (i *includeMutator) MutateLexer(rules CompiledRules, state string, rule int | ||||
| } | ||||
|  | ||||
| type combinedMutator struct { | ||||
| 	States []string `xml:"state,attr"` | ||||
| 	States []string `parser:"'combined' @Ident (',' @Ident)*" xml:"state,attr"` | ||||
| } | ||||
|  | ||||
| func (c *combinedMutator) MutatorKind() string { return "combined" } | ||||
| @@ -142,7 +142,7 @@ func (c *combinedMutator) MutateLexer(rules CompiledRules, state string, rule in | ||||
| } | ||||
|  | ||||
| type pushMutator struct { | ||||
| 	States []string `xml:"state,attr"` | ||||
| 	States []string `parser:"'push' (@Ident (',' @Ident)*)?" xml:"state,attr"` | ||||
| } | ||||
|  | ||||
| func (p *pushMutator) MutatorKind() string { return "push" } | ||||
| @@ -168,7 +168,7 @@ func Push(states ...string) Mutator { | ||||
| } | ||||
|  | ||||
| type popMutator struct { | ||||
| 	Depth int `xml:"depth,attr"` | ||||
| 	Depth int `parser:"'pop' @Int?" xml:"depth,attr"` | ||||
| } | ||||
|  | ||||
| func (p *popMutator) MutatorKind() string { return "pop" } | ||||
|   | ||||
							
								
								
									
										50
									
								
								parser.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								parser.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | ||||
| package chroma | ||||
|  | ||||
| import ( | ||||
| 	"github.com/alecthomas/participle/v2" | ||||
| 	"github.com/alecthomas/participle/v2/lexer" | ||||
| ) | ||||
|  | ||||
| type AST struct { | ||||
| 	Config *Config    `parser:"( 'config' '{' @@ '}'"` | ||||
| 	States []stateAST `parser:"@@* )*"` | ||||
| } | ||||
|  | ||||
| type stateAST struct { | ||||
| 	Name  string `parser:"'state' @Ident '{'"` | ||||
| 	Rules []Rule `parser:"@@* '}'"` | ||||
| } | ||||
|  | ||||
| type tokenTypeAST struct { | ||||
| 	Type TokenType `parser:"@Ident"` | ||||
| } | ||||
|  | ||||
| func (t tokenTypeAST) Emit(groups []string, state *LexerState) Iterator { | ||||
| 	return t.Type.Emit(groups, state) | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	lex = lexer.MustSimple([]lexer.SimpleRule{ | ||||
| 		{"Punct", `[][={}(),:;]`}, | ||||
| 		{"Whitespace", `\s+`}, | ||||
| 		{"Comment", `//.*`}, | ||||
| 		{"Boolean", `\b(true|false)\b`}, | ||||
| 		{"Ident", `[a-zA-Z-][a-zA-Z0-9-]*`}, | ||||
| 		{"String", `"(\\.|[^"])*"`}, | ||||
| 		{"Float", `[-+]?\d*\.\d+([eE][-+]?\d+)?`}, | ||||
| 		{"Int", `[-+]?\d+`}, | ||||
| 		{"Regex", `/(\\.|[^/])+/`}, | ||||
| 	}) | ||||
| 	parser = participle.MustBuild[AST]( | ||||
| 		participle.Lexer(lex), | ||||
| 		participle.Unquote("String"), | ||||
| 		participle.Map(func(token lexer.Token) (lexer.Token, error) { | ||||
| 			token.Value = token.Value[1 : len(token.Value)-1] | ||||
| 			return token, nil | ||||
| 		}, "Regex"), | ||||
| 		participle.Elide("Whitespace", "Comment"), | ||||
| 		participle.UseLookahead(1), | ||||
| 		participle.Union[Emitter](&byGroupsEmitter{}, &usingEmitter{}, &usingSelfEmitter{}, &tokenTypeAST{}), | ||||
| 		participle.Union[Mutator](&includeMutator{}, &combinedMutator{}, &pushMutator{}, &popMutator{}), | ||||
| 	) | ||||
| ) | ||||
							
								
								
									
										72
									
								
								parser_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								parser_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,72 @@ | ||||
| package chroma | ||||
|  | ||||
| import ( | ||||
| 	"testing" | ||||
|  | ||||
| 	assert "github.com/alecthomas/assert/v2" | ||||
| ) | ||||
|  | ||||
| func TestParser(t *testing.T) { | ||||
| 	ast, err := parser.ParseString("", ` | ||||
| config { | ||||
| 	name "INI" | ||||
| 	aliases "ini", "cfg" | ||||
| 	filenames "*.ini", "*.cfg", "*.inf", "*.service", "*.socket", ".gitconfig", | ||||
| 			  ".editorconfig", "pylintrc", ".pylintrc" | ||||
| 	mime-types "text/x-ini", "text/inf" | ||||
| 	priority 0.1 | ||||
| } | ||||
|  | ||||
| state root { | ||||
| 	/\s+/ text | ||||
| 	/[;#].*/ commentsingle | ||||
| 	/\[.*?\]$/ keyword | ||||
| 	/(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)/ by groups | ||||
| 		nameattribute, text, operator, text, literalstring | ||||
| 	/(.+?)$/ nameattribute | ||||
| } | ||||
| `) | ||||
| 	assert.NoError(t, err) | ||||
| 	assert.Equal(t, &AST{ | ||||
| 		Config: &Config{ | ||||
| 			Name: "INI", | ||||
| 			Aliases: []string{ | ||||
| 				"ini", | ||||
| 				"cfg", | ||||
| 			}, | ||||
| 			Filenames: []string{ | ||||
| 				"*.ini", | ||||
| 				"*.cfg", | ||||
| 				"*.inf", | ||||
| 				"*.service", | ||||
| 				"*.socket", | ||||
| 				".gitconfig", | ||||
| 				".editorconfig", | ||||
| 				"pylintrc", | ||||
| 				".pylintrc", | ||||
| 			}, | ||||
| 			MimeTypes: []string{ | ||||
| 				"text/x-ini", | ||||
| 				"text/inf", | ||||
| 			}, | ||||
| 			Priority: 0.1, | ||||
| 		}, | ||||
| 		States: []stateAST{ | ||||
| 			{Name: "root", | ||||
| 				Rules: []Rule{ | ||||
| 					{Pattern: `\s+`, Type: &tokenTypeAST{Text}}, | ||||
| 					{Pattern: `[;#].*`, Type: &tokenTypeAST{CommentSingle}}, | ||||
| 					{Pattern: `\[.*?\]$`, Type: &tokenTypeAST{Keyword}}, | ||||
| 					{Pattern: `(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)`, Type: &byGroupsEmitter{Emitters{ | ||||
| 						&tokenTypeAST{NameAttribute}, | ||||
| 						&tokenTypeAST{Text}, | ||||
| 						&tokenTypeAST{Operator}, | ||||
| 						&tokenTypeAST{Text}, | ||||
| 						&tokenTypeAST{LiteralString}, | ||||
| 					}}}, | ||||
| 					{Pattern: `(.+?)$`, Type: &tokenTypeAST{NameAttribute}}, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	}, ast) | ||||
| } | ||||
| @@ -16,9 +16,9 @@ import ( | ||||
|  | ||||
| // A Rule is the fundamental matching unit of the Regex lexer state machine. | ||||
| type Rule struct { | ||||
| 	Pattern string | ||||
| 	Type    Emitter | ||||
| 	Mutator Mutator | ||||
| 	Pattern string  `parser:"@Regex"` | ||||
| 	Type    Emitter `parser:"@@"` | ||||
| 	Mutator Mutator `parser:"@@?"` | ||||
| } | ||||
|  | ||||
| // Words creates a regex that matches any of the given literal words. | ||||
|   | ||||
							
								
								
									
										15
									
								
								types.go
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								types.go
									
									
									
									
									
								
							| @@ -1,5 +1,10 @@ | ||||
| package chroma | ||||
|  | ||||
| import ( | ||||
| 	"github.com/alecthomas/participle/v2" | ||||
| 	"github.com/alecthomas/participle/v2/lexer" | ||||
| ) | ||||
|  | ||||
| //go:generate enumer -text -type TokenType | ||||
|  | ||||
| // TokenType is the type of token to highlight. | ||||
| @@ -7,6 +12,16 @@ package chroma | ||||
| // It is also an Emitter, emitting a single token of itself | ||||
| type TokenType int | ||||
|  | ||||
| // Parse implements participle.Parseable. | ||||
| func (t *TokenType) Parse(lex *lexer.PeekingLexer) error { | ||||
| 	tok := lex.Peek() | ||||
| 	if err := t.UnmarshalText([]byte(tok.Value)); err != nil { | ||||
| 		return participle.NextMatch | ||||
| 	} | ||||
| 	lex.Next() | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Set of TokenTypes. | ||||
| // | ||||
| // Categories of types are grouped in ranges of 1000, while sub-categories are in ranges of 100. For | ||||
|   | ||||
		Reference in New Issue
	
	Block a user