mirror of
https://github.com/alecthomas/chroma.git
synced 2025-12-24 00:01:39 +02:00
While I do prefer XML over YAML, it's also super verbose.
```xml
<lexer>
<config>
<name>INI</name>
<alias>ini</alias>
<alias>cfg</alias>
<alias>dosini</alias>
<filename>*.ini</filename>
<filename>*.cfg</filename>
<filename>*.inf</filename>
<filename>*.service</filename>
<filename>*.socket</filename>
<filename>.gitconfig</filename>
<filename>.editorconfig</filename>
<filename>pylintrc</filename>
<filename>.pylintrc</filename>
<mime_type>text/x-ini</mime_type>
<mime_type>text/inf</mime_type>
<priority>0.1</priority> <!-- higher priority than Inform 6 -->
</config>
<rules>
<state name="root">
<rule pattern="\s+">
<token type="Text"/>
</rule>
<rule pattern="[;#].*">
<token type="CommentSingle"/>
</rule>
<rule pattern="\[.*?\]$">
<token type="Keyword"/>
</rule>
<rule pattern="(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)">
<bygroups>
<token type="NameAttribute"/>
<token type="Text"/>
<token type="Operator"/>
<token type="Text"/>
<token type="LiteralString"/>
</bygroups>
</rule>
<rule pattern="(.+?)$">
<token type="NameAttribute"/>
</rule>
</state>
</rules>
</lexer>
```
Pros:
- Very succinct
Cons:
- No need to escape regexes
- Bespoke syntax that people will have to learn
- No syntax highlighting in editors, no validation beyond parser
```
config {
name "INI"
aliases "ini", "cfg"
filenames "*.ini", "*.cfg", "*.inf", "*.service", "*.socket", ".gitconfig",
".editorconfig", "pylintrc", ".pylintrc"
mime-types "text/x-ini", "text/inf"
priority 0.1
}
state root {
/\s+/ text
/[;#].*/ commentsingle
/\[.*?\]$/ keyword
/(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)/ by groups
nameattribute, text, operator, text, literalstring
/(.+?)$/ nameattribute
}
`)
```
Pros:
- More succinct than XML
- Can define a schema and have editors use it to validate.
Cons:
- Fucking YAML
- Indentation is awful
- Less succinct than bespoke syntax
- Will need some way to discriminate between "emitters" and "mutators" when parsing, eg. `type: Keyword` vs. `type: {bygroups: [...]}`
```yaml
config:
name: "INI"
aliases: ["ini", "cfg"]
filenames: ["*.ini", "*.cfg", "*.inf", "*.service", "*.socket", ".gitconfig",
".editorconfig", "pylintrc", ".pylintrc"]
mime-types: ["text/x-ini", "text/inf"]
priority: 0.1
state:
root:
rule:
- pattern: "\\s+"
type: Text
- pattern: "[;#].*"
type: CommentSingle
- pattern: "\\[.*?\\]"
type: Keyword
- pattern: "(.*?)([ \\t]*)(=)([ \\t]*)(.*(?:\\n[ \\t].+)*)"
type:
bygroups: [NameAttribute, Text, Operator, Text, LiteralString]
- pattern: "(.+?)$"
type: NameAttribute
```
73 lines
1.5 KiB
Go
73 lines
1.5 KiB
Go
package chroma
|
|
|
|
import (
|
|
"testing"
|
|
|
|
assert "github.com/alecthomas/assert/v2"
|
|
)
|
|
|
|
func TestParser(t *testing.T) {
|
|
ast, err := parser.ParseString("", `
|
|
config {
|
|
name "INI"
|
|
aliases "ini", "cfg"
|
|
filenames "*.ini", "*.cfg", "*.inf", "*.service", "*.socket", ".gitconfig",
|
|
".editorconfig", "pylintrc", ".pylintrc"
|
|
mime-types "text/x-ini", "text/inf"
|
|
priority 0.1
|
|
}
|
|
|
|
state root {
|
|
/\s+/ text
|
|
/[;#].*/ commentsingle
|
|
/\[.*?\]$/ keyword
|
|
/(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)/ by groups
|
|
nameattribute, text, operator, text, literalstring
|
|
/(.+?)$/ nameattribute
|
|
}
|
|
`)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, &AST{
|
|
Config: &Config{
|
|
Name: "INI",
|
|
Aliases: []string{
|
|
"ini",
|
|
"cfg",
|
|
},
|
|
Filenames: []string{
|
|
"*.ini",
|
|
"*.cfg",
|
|
"*.inf",
|
|
"*.service",
|
|
"*.socket",
|
|
".gitconfig",
|
|
".editorconfig",
|
|
"pylintrc",
|
|
".pylintrc",
|
|
},
|
|
MimeTypes: []string{
|
|
"text/x-ini",
|
|
"text/inf",
|
|
},
|
|
Priority: 0.1,
|
|
},
|
|
States: []stateAST{
|
|
{Name: "root",
|
|
Rules: []Rule{
|
|
{Pattern: `\s+`, Type: &tokenTypeAST{Text}},
|
|
{Pattern: `[;#].*`, Type: &tokenTypeAST{CommentSingle}},
|
|
{Pattern: `\[.*?\]$`, Type: &tokenTypeAST{Keyword}},
|
|
{Pattern: `(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)`, Type: &byGroupsEmitter{Emitters{
|
|
&tokenTypeAST{NameAttribute},
|
|
&tokenTypeAST{Text},
|
|
&tokenTypeAST{Operator},
|
|
&tokenTypeAST{Text},
|
|
&tokenTypeAST{LiteralString},
|
|
}}},
|
|
{Pattern: `(.+?)$`, Type: &tokenTypeAST{NameAttribute}},
|
|
},
|
|
},
|
|
},
|
|
}, ast)
|
|
}
|