1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-01-30 03:38:02 +02:00
chroma/mutators.go

202 lines
4.9 KiB
Go
Raw Normal View History

package chroma
import (
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
"encoding/xml"
"fmt"
"strings"
)
// A Mutator modifies the behaviour of the lexer.
type Mutator interface {
// Mutate the lexer state machine as it is processing.
Mutate(state *LexerState) error
}
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
// SerialisableMutator is a Mutator that can be serialised and deserialised.
type SerialisableMutator interface {
Mutator
MutatorKind() string
}
// A LexerMutator is an additional interface that a Mutator can implement
// to modify the lexer when it is compiled.
type LexerMutator interface {
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
// MutateLexer can be implemented to mutate the lexer itself.
//
// Rules are the lexer rules, state is the state key for the rule the mutator is associated with.
MutateLexer(rules CompiledRules, state string, rule int) error
}
// A MutatorFunc is a Mutator that mutates the lexer state machine as it is processing.
type MutatorFunc func(state *LexerState) error
func (m MutatorFunc) Mutate(state *LexerState) error { return m(state) } // nolint
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
type multiMutator struct {
Mutators []Mutator `xml:"mutator"`
}
func (m *multiMutator) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
for {
token, err := d.Token()
if err != nil {
return err
}
switch token := token.(type) {
case xml.StartElement:
mutator, err := unmarshalMutator(d, token)
if err != nil {
return err
}
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
m.Mutators = append(m.Mutators, mutator)
case xml.EndElement:
return nil
}
}
}
func (m *multiMutator) MarshalXML(e *xml.Encoder, start xml.StartElement) error {
name := xml.Name{Local: "mutators"}
if err := e.EncodeToken(xml.StartElement{Name: name}); err != nil {
return err
}
for _, m := range m.Mutators {
if err := marshalMutator(e, m); err != nil {
return err
}
}
return e.EncodeToken(xml.EndElement{Name: name})
}
func (m *multiMutator) MutatorKind() string { return "multiple" }
func (m *multiMutator) Mutate(state *LexerState) error {
for _, modifier := range m.Mutators {
if err := modifier.Mutate(state); err != nil {
return err
}
}
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
return nil
}
// Mutators applies a set of Mutators in order.
func Mutators(modifiers ...Mutator) Mutator {
return &multiMutator{modifiers}
}
type includeMutator struct {
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
State string `xml:"state,attr"`
}
// Include the given state.
func Include(state string) Rule {
return Rule{Mutator: &includeMutator{state}}
}
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
func (i *includeMutator) MutatorKind() string { return "include" }
func (i *includeMutator) Mutate(s *LexerState) error {
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
return fmt.Errorf("should never reach here Include(%q)", i.State)
}
func (i *includeMutator) MutateLexer(rules CompiledRules, state string, rule int) error {
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
includedRules, ok := rules[i.State]
if !ok {
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
return fmt.Errorf("invalid include state %q", i.State)
}
rules[state] = append(rules[state][:rule], append(includedRules, rules[state][rule+1:]...)...)
return nil
}
type combinedMutator struct {
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
States []string `xml:"state,attr"`
}
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
func (c *combinedMutator) MutatorKind() string { return "combined" }
// Combined creates a new anonymous state from the given states, and pushes that state.
func Combined(states ...string) Mutator {
return &combinedMutator{states}
}
func (c *combinedMutator) Mutate(s *LexerState) error {
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
return fmt.Errorf("should never reach here Combined(%v)", c.States)
}
func (c *combinedMutator) MutateLexer(rules CompiledRules, state string, rule int) error {
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
name := "__combined_" + strings.Join(c.States, "__")
if _, ok := rules[name]; !ok {
combined := []*CompiledRule{}
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
for _, state := range c.States {
rules, ok := rules[state]
if !ok {
return fmt.Errorf("invalid combine state %q", state)
}
combined = append(combined, rules...)
}
rules[name] = combined
}
rules[state][rule].Mutator = Push(name)
return nil
}
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
type pushMutator struct {
States []string `xml:"state,attr"`
}
func (p *pushMutator) MutatorKind() string { return "push" }
func (p *pushMutator) Mutate(s *LexerState) error {
if len(p.States) == 0 {
s.Stack = append(s.Stack, s.State)
} else {
for _, state := range p.States {
if state == "#pop" {
s.Stack = s.Stack[:len(s.Stack)-1]
} else {
s.Stack = append(s.Stack, state)
}
}
}
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
return nil
}
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
// Push states onto the stack.
func Push(states ...string) Mutator {
return &pushMutator{states}
}
type popMutator struct {
Depth int `xml:"depth,attr"`
}
func (p *popMutator) MutatorKind() string { return "pop" }
func (p *popMutator) Mutate(state *LexerState) error {
if len(state.Stack) == 0 {
return fmt.Errorf("nothing to pop")
}
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
state.Stack = state.Stack[:len(state.Stack)-p.Depth]
return nil
}
// Pop state from the stack when rule matches.
func Pop(n int) Mutator {
return &popMutator{n}
}
// Default returns a Rule that applies a set of Mutators.
func Default(mutators ...Mutator) Rule {
return Rule{Mutator: Mutators(mutators...)}
}
2018-03-19 10:32:36 +11:00
// Stringify returns the raw string for a set of tokens.
func Stringify(tokens ...Token) string {
2018-03-19 10:32:36 +11:00
out := []string{}
for _, t := range tokens {
out = append(out, t.Value)
}
return strings.Join(out, "")
}