mirror of
https://github.com/alecthomas/chroma.git
synced 2025-02-05 13:05:18 +02:00
cc2dd5b8ad
This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
188 lines
4.2 KiB
Go
188 lines
4.2 KiB
Go
package chroma
|
|
|
|
import (
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
)
|
|
|
|
var (
|
|
ignoredSuffixes = [...]string{
|
|
// Editor backups
|
|
"~", ".bak", ".old", ".orig",
|
|
// Debian and derivatives apt/dpkg/ucf backups
|
|
".dpkg-dist", ".dpkg-old", ".ucf-dist", ".ucf-new", ".ucf-old",
|
|
// Red Hat and derivatives rpm backups
|
|
".rpmnew", ".rpmorig", ".rpmsave",
|
|
// Build system input/template files
|
|
".in",
|
|
}
|
|
)
|
|
|
|
// LexerRegistry is a registry of Lexers.
|
|
type LexerRegistry struct {
|
|
Lexers Lexers
|
|
byName map[string]Lexer
|
|
byAlias map[string]Lexer
|
|
}
|
|
|
|
// NewLexerRegistry creates a new LexerRegistry of Lexers.
|
|
func NewLexerRegistry() *LexerRegistry {
|
|
return &LexerRegistry{
|
|
byName: map[string]Lexer{},
|
|
byAlias: map[string]Lexer{},
|
|
}
|
|
}
|
|
|
|
// Names of all lexers, optionally including aliases.
|
|
func (l *LexerRegistry) Names(withAliases bool) []string {
|
|
out := []string{}
|
|
for _, lexer := range l.Lexers {
|
|
config := lexer.Config()
|
|
out = append(out, config.Name)
|
|
if withAliases {
|
|
out = append(out, config.Aliases...)
|
|
}
|
|
}
|
|
sort.Strings(out)
|
|
return out
|
|
}
|
|
|
|
// Get a Lexer by name, alias or file extension.
|
|
func (l *LexerRegistry) Get(name string) Lexer {
|
|
if lexer := l.byName[name]; lexer != nil {
|
|
return lexer
|
|
}
|
|
if lexer := l.byAlias[name]; lexer != nil {
|
|
return lexer
|
|
}
|
|
if lexer := l.byName[strings.ToLower(name)]; lexer != nil {
|
|
return lexer
|
|
}
|
|
if lexer := l.byAlias[strings.ToLower(name)]; lexer != nil {
|
|
return lexer
|
|
}
|
|
|
|
candidates := PrioritisedLexers{}
|
|
// Try file extension.
|
|
if lexer := l.Match("filename." + name); lexer != nil {
|
|
candidates = append(candidates, lexer)
|
|
}
|
|
// Try exact filename.
|
|
if lexer := l.Match(name); lexer != nil {
|
|
candidates = append(candidates, lexer)
|
|
}
|
|
if len(candidates) == 0 {
|
|
return nil
|
|
}
|
|
sort.Sort(candidates)
|
|
return candidates[0]
|
|
}
|
|
|
|
// MatchMimeType attempts to find a lexer for the given MIME type.
|
|
func (l *LexerRegistry) MatchMimeType(mimeType string) Lexer {
|
|
matched := PrioritisedLexers{}
|
|
for _, l := range l.Lexers {
|
|
for _, lmt := range l.Config().MimeTypes {
|
|
if mimeType == lmt {
|
|
matched = append(matched, l)
|
|
}
|
|
}
|
|
}
|
|
if len(matched) != 0 {
|
|
sort.Sort(matched)
|
|
return matched[0]
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Match returns the first lexer matching filename.
|
|
func (l *LexerRegistry) Match(filename string) Lexer {
|
|
filename = filepath.Base(filename)
|
|
matched := PrioritisedLexers{}
|
|
// First, try primary filename matches.
|
|
for _, lexer := range l.Lexers {
|
|
config := lexer.Config()
|
|
for _, glob := range config.Filenames {
|
|
ok, err := filepath.Match(glob, filename)
|
|
if err != nil { // nolint
|
|
panic(err)
|
|
} else if ok {
|
|
matched = append(matched, lexer)
|
|
} else {
|
|
for _, suf := range &ignoredSuffixes {
|
|
ok, err := filepath.Match(glob+suf, filename)
|
|
if err != nil {
|
|
panic(err)
|
|
} else if ok {
|
|
matched = append(matched, lexer)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if len(matched) > 0 {
|
|
sort.Sort(matched)
|
|
return matched[0]
|
|
}
|
|
matched = nil
|
|
// Next, try filename aliases.
|
|
for _, lexer := range l.Lexers {
|
|
config := lexer.Config()
|
|
for _, glob := range config.AliasFilenames {
|
|
ok, err := filepath.Match(glob, filename)
|
|
if err != nil { // nolint
|
|
panic(err)
|
|
} else if ok {
|
|
matched = append(matched, lexer)
|
|
} else {
|
|
for _, suf := range &ignoredSuffixes {
|
|
ok, err := filepath.Match(glob+suf, filename)
|
|
if err != nil {
|
|
panic(err)
|
|
} else if ok {
|
|
matched = append(matched, lexer)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if len(matched) > 0 {
|
|
sort.Sort(matched)
|
|
return matched[0]
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Analyse text content and return the "best" lexer..
|
|
func (l *LexerRegistry) Analyse(text string) Lexer {
|
|
var picked Lexer
|
|
highest := float32(0.0)
|
|
for _, lexer := range l.Lexers {
|
|
if analyser, ok := lexer.(Analyser); ok {
|
|
weight := analyser.AnalyseText(text)
|
|
if weight > highest {
|
|
picked = lexer
|
|
highest = weight
|
|
}
|
|
}
|
|
}
|
|
return picked
|
|
}
|
|
|
|
// Register a Lexer with the LexerRegistry.
|
|
func (l *LexerRegistry) Register(lexer Lexer) Lexer {
|
|
lexer.SetRegistry(l)
|
|
config := lexer.Config()
|
|
l.byName[config.Name] = lexer
|
|
l.byName[strings.ToLower(config.Name)] = lexer
|
|
for _, alias := range config.Aliases {
|
|
l.byAlias[alias] = lexer
|
|
l.byAlias[strings.ToLower(alias)] = lexer
|
|
}
|
|
l.Lexers = append(l.Lexers, lexer)
|
|
return lexer
|
|
}
|