mirror of
https://github.com/alecthomas/chroma.git
synced 2025-03-29 21:56:56 +02:00
This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
216 lines
6.6 KiB
Go
216 lines
6.6 KiB
Go
package lexers
|
|
|
|
import (
|
|
. "github.com/alecthomas/chroma/v2" // nolint
|
|
)
|
|
|
|
// caddyfileCommon are the rules common to both of the lexer variants
|
|
func caddyfileCommonRules() Rules {
|
|
return Rules{
|
|
"site_block_common": {
|
|
// Import keyword
|
|
{`(import)(\s+)([^\s]+)`, ByGroups(Keyword, Text, NameVariableMagic), nil},
|
|
// Matcher definition
|
|
{`@[^\s]+(?=\s)`, NameDecorator, Push("matcher")},
|
|
// Matcher token stub for docs
|
|
{`\[\<matcher\>\]`, NameDecorator, Push("matcher")},
|
|
// These cannot have matchers but may have things that look like
|
|
// matchers in their arguments, so we just parse as a subdirective.
|
|
{`try_files`, Keyword, Push("subdirective")},
|
|
// These are special, they can nest more directives
|
|
{`handle_errors|handle|route|handle_path|not`, Keyword, Push("nested_directive")},
|
|
// Any other directive
|
|
{`[^\s#]+`, Keyword, Push("directive")},
|
|
Include("base"),
|
|
},
|
|
"matcher": {
|
|
{`\{`, Punctuation, Push("block")},
|
|
// Not can be one-liner
|
|
{`not`, Keyword, Push("deep_not_matcher")},
|
|
// Any other same-line matcher
|
|
{`[^\s#]+`, Keyword, Push("arguments")},
|
|
// Terminators
|
|
{`\n`, Text, Pop(1)},
|
|
{`\}`, Punctuation, Pop(1)},
|
|
Include("base"),
|
|
},
|
|
"block": {
|
|
{`\}`, Punctuation, Pop(2)},
|
|
// Not can be one-liner
|
|
{`not`, Keyword, Push("not_matcher")},
|
|
// Any other subdirective
|
|
{`[^\s#]+`, Keyword, Push("subdirective")},
|
|
Include("base"),
|
|
},
|
|
"nested_block": {
|
|
{`\}`, Punctuation, Pop(2)},
|
|
// Matcher definition
|
|
{`@[^\s]+(?=\s)`, NameDecorator, Push("matcher")},
|
|
// Something that starts with literally < is probably a docs stub
|
|
{`\<[^#]+\>`, Keyword, Push("nested_directive")},
|
|
// Any other directive
|
|
{`[^\s#]+`, Keyword, Push("nested_directive")},
|
|
Include("base"),
|
|
},
|
|
"not_matcher": {
|
|
{`\}`, Punctuation, Pop(2)},
|
|
{`\{(?=\s)`, Punctuation, Push("block")},
|
|
{`[^\s#]+`, Keyword, Push("arguments")},
|
|
{`\s+`, Text, nil},
|
|
},
|
|
"deep_not_matcher": {
|
|
{`\}`, Punctuation, Pop(2)},
|
|
{`\{(?=\s)`, Punctuation, Push("block")},
|
|
{`[^\s#]+`, Keyword, Push("deep_subdirective")},
|
|
{`\s+`, Text, nil},
|
|
},
|
|
"directive": {
|
|
{`\{(?=\s)`, Punctuation, Push("block")},
|
|
Include("matcher_token"),
|
|
Include("comments_pop_1"),
|
|
{`\n`, Text, Pop(1)},
|
|
Include("base"),
|
|
},
|
|
"nested_directive": {
|
|
{`\{(?=\s)`, Punctuation, Push("nested_block")},
|
|
Include("matcher_token"),
|
|
Include("comments_pop_1"),
|
|
{`\n`, Text, Pop(1)},
|
|
Include("base"),
|
|
},
|
|
"subdirective": {
|
|
{`\{(?=\s)`, Punctuation, Push("block")},
|
|
Include("comments_pop_1"),
|
|
{`\n`, Text, Pop(1)},
|
|
Include("base"),
|
|
},
|
|
"arguments": {
|
|
{`\{(?=\s)`, Punctuation, Push("block")},
|
|
Include("comments_pop_2"),
|
|
{`\\\n`, Text, nil}, // Skip escaped newlines
|
|
{`\n`, Text, Pop(2)},
|
|
Include("base"),
|
|
},
|
|
"deep_subdirective": {
|
|
{`\{(?=\s)`, Punctuation, Push("block")},
|
|
Include("comments_pop_3"),
|
|
{`\n`, Text, Pop(3)},
|
|
Include("base"),
|
|
},
|
|
"matcher_token": {
|
|
{`@[^\s]+`, NameDecorator, Push("arguments")}, // Named matcher
|
|
{`/[^\s]+`, NameDecorator, Push("arguments")}, // Path matcher
|
|
{`\*`, NameDecorator, Push("arguments")}, // Wildcard path matcher
|
|
{`\[\<matcher\>\]`, NameDecorator, Push("arguments")}, // Matcher token stub for docs
|
|
},
|
|
"comments": {
|
|
{`^#.*\n`, CommentSingle, nil}, // Comment at start of line
|
|
{`\s+#.*\n`, CommentSingle, nil}, // Comment preceded by whitespace
|
|
},
|
|
"comments_pop_1": {
|
|
{`^#.*\n`, CommentSingle, Pop(1)}, // Comment at start of line
|
|
{`\s+#.*\n`, CommentSingle, Pop(1)}, // Comment preceded by whitespace
|
|
},
|
|
"comments_pop_2": {
|
|
{`^#.*\n`, CommentSingle, Pop(2)}, // Comment at start of line
|
|
{`\s+#.*\n`, CommentSingle, Pop(2)}, // Comment preceded by whitespace
|
|
},
|
|
"comments_pop_3": {
|
|
{`^#.*\n`, CommentSingle, Pop(3)}, // Comment at start of line
|
|
{`\s+#.*\n`, CommentSingle, Pop(3)}, // Comment preceded by whitespace
|
|
},
|
|
"base": {
|
|
Include("comments"),
|
|
{`(on|off|first|last|before|after|internal|strip_prefix|strip_suffix|replace)\b`, NameConstant, nil},
|
|
{`(https?://)?([a-z0-9.-]+)(:)([0-9]+)`, ByGroups(Name, Name, Punctuation, LiteralNumberInteger), nil},
|
|
{`[a-z-]+/[a-z-+]+`, LiteralString, nil},
|
|
{`[0-9]+[km]?\b`, LiteralNumberInteger, nil},
|
|
{`\{[\w+.\$-]+\}`, LiteralStringEscape, nil}, // Placeholder
|
|
{`\[(?=[^#{}$]+\])`, Punctuation, nil},
|
|
{`\]|\|`, Punctuation, nil},
|
|
{`[^\s#{}$\]]+`, LiteralString, nil},
|
|
{`/[^\s#]*`, Name, nil},
|
|
{`\s+`, Text, nil},
|
|
},
|
|
}
|
|
}
|
|
|
|
// Caddyfile lexer.
|
|
var Caddyfile = Register(MustNewLexer(
|
|
&Config{
|
|
Name: "Caddyfile",
|
|
Aliases: []string{"caddyfile", "caddy"},
|
|
Filenames: []string{"Caddyfile*"},
|
|
MimeTypes: []string{},
|
|
},
|
|
caddyfileRules,
|
|
))
|
|
|
|
func caddyfileRules() Rules {
|
|
return Rules{
|
|
"root": {
|
|
Include("comments"),
|
|
// Global options block
|
|
{`^\s*(\{)\s*$`, ByGroups(Punctuation), Push("globals")},
|
|
// Snippets
|
|
{`(\([^\s#]+\))(\s*)(\{)`, ByGroups(NameVariableAnonymous, Text, Punctuation), Push("snippet")},
|
|
// Site label
|
|
{`[^#{(\s,]+`, GenericHeading, Push("label")},
|
|
// Site label with placeholder
|
|
{`\{[\w+.\$-]+\}`, LiteralStringEscape, Push("label")},
|
|
{`\s+`, Text, nil},
|
|
},
|
|
"globals": {
|
|
{`\}`, Punctuation, Pop(1)},
|
|
{`[^\s#]+`, Keyword, Push("directive")},
|
|
Include("base"),
|
|
},
|
|
"snippet": {
|
|
{`\}`, Punctuation, Pop(1)},
|
|
// Matcher definition
|
|
{`@[^\s]+(?=\s)`, NameDecorator, Push("matcher")},
|
|
// Any directive
|
|
{`[^\s#]+`, Keyword, Push("directive")},
|
|
Include("base"),
|
|
},
|
|
"label": {
|
|
// Allow multiple labels, comma separated, newlines after
|
|
// a comma means another label is coming
|
|
{`,\s*\n?`, Text, nil},
|
|
{` `, Text, nil},
|
|
// Site label with placeholder
|
|
{`\{[\w+.\$-]+\}`, LiteralStringEscape, nil},
|
|
// Site label
|
|
{`[^#{(\s,]+`, GenericHeading, nil},
|
|
// Comment after non-block label (hack because comments end in \n)
|
|
{`#.*\n`, CommentSingle, Push("site_block")},
|
|
// Note: if \n, we'll never pop out of the site_block, it's valid
|
|
{`\{(?=\s)|\n`, Punctuation, Push("site_block")},
|
|
},
|
|
"site_block": {
|
|
{`\}`, Punctuation, Pop(2)},
|
|
Include("site_block_common"),
|
|
},
|
|
}.Merge(caddyfileCommonRules())
|
|
}
|
|
|
|
// Caddyfile directive-only lexer.
|
|
var CaddyfileDirectives = Register(MustNewLexer(
|
|
&Config{
|
|
Name: "Caddyfile Directives",
|
|
Aliases: []string{"caddyfile-directives", "caddyfile-d", "caddy-d"},
|
|
Filenames: []string{},
|
|
MimeTypes: []string{},
|
|
},
|
|
caddyfileDirectivesRules,
|
|
))
|
|
|
|
func caddyfileDirectivesRules() Rules {
|
|
return Rules{
|
|
// Same as "site_block" in Caddyfile
|
|
"root": {
|
|
Include("site_block_common"),
|
|
},
|
|
}.Merge(caddyfileCommonRules())
|
|
}
|