2017-06-02 00:17:21 +10:00
|
|
|
package chroma
|
|
|
|
|
2022-11-02 15:23:14 +11:00
|
|
|
//go:generate enumer -text -type TokenType
|
2017-07-19 23:51:16 -07:00
|
|
|
|
2017-06-02 00:17:21 +10:00
|
|
|
// TokenType is the type of token to highlight.
|
|
|
|
//
|
|
|
|
// It is also an Emitter, emitting a single token of itself
|
|
|
|
type TokenType int
|
|
|
|
|
|
|
|
// Set of TokenTypes.
|
|
|
|
//
|
|
|
|
// Categories of types are grouped in ranges of 1000, while sub-categories are in ranges of 100. For
|
|
|
|
// example, the literal category is in the range 3000-3999. The sub-category for literal strings is
|
|
|
|
// in the range 3100-3199.
|
2017-07-19 23:51:16 -07:00
|
|
|
|
|
|
|
// Meta token types.
|
2017-06-02 00:17:21 +10:00
|
|
|
const (
|
2017-09-20 14:15:06 +10:00
|
|
|
// Default background style.
|
2017-07-19 23:51:16 -07:00
|
|
|
Background TokenType = -1 - iota
|
2021-11-07 10:08:48 +03:30
|
|
|
// PreWrapper style.
|
|
|
|
PreWrapper
|
|
|
|
// Line style.
|
|
|
|
Line
|
2017-09-20 14:15:06 +10:00
|
|
|
// Line numbers in output.
|
2017-09-20 13:30:46 +10:00
|
|
|
LineNumbers
|
2017-10-13 01:49:20 +02:00
|
|
|
// Line numbers in output when in table.
|
|
|
|
LineNumbersTable
|
2017-09-20 14:15:06 +10:00
|
|
|
// Line higlight style.
|
|
|
|
LineHighlight
|
2017-10-13 01:49:20 +02:00
|
|
|
// Line numbers table wrapper style.
|
|
|
|
LineTable
|
|
|
|
// Line numbers table TD wrapper style.
|
|
|
|
LineTableTD
|
2022-11-01 17:28:30 +11:00
|
|
|
// Line number links.
|
|
|
|
LineLink
|
2021-11-07 10:08:48 +03:30
|
|
|
// Code line wrapper style.
|
|
|
|
CodeLine
|
2017-09-20 14:15:06 +10:00
|
|
|
// Input that could not be tokenised.
|
2017-06-02 00:17:21 +10:00
|
|
|
Error
|
2017-09-20 14:15:06 +10:00
|
|
|
// Other is used by the Delegate lexer to indicate which tokens should be handled by the delegate.
|
2017-06-02 00:17:21 +10:00
|
|
|
Other
|
2017-09-20 14:15:06 +10:00
|
|
|
// No highlighting.
|
2017-09-18 11:16:44 +10:00
|
|
|
None
|
2018-11-03 16:22:51 -07:00
|
|
|
// Used as an EOF marker / nil token
|
|
|
|
EOFType TokenType = 0
|
2017-06-02 00:17:21 +10:00
|
|
|
)
|
|
|
|
|
|
|
|
// Keywords.
|
|
|
|
const (
|
|
|
|
Keyword TokenType = 1000 + iota
|
|
|
|
KeywordConstant
|
|
|
|
KeywordDeclaration
|
|
|
|
KeywordNamespace
|
|
|
|
KeywordPseudo
|
|
|
|
KeywordReserved
|
|
|
|
KeywordType
|
|
|
|
)
|
|
|
|
|
|
|
|
// Names.
|
|
|
|
const (
|
|
|
|
Name TokenType = 2000 + iota
|
|
|
|
NameAttribute
|
|
|
|
NameBuiltin
|
|
|
|
NameBuiltinPseudo
|
|
|
|
NameClass
|
|
|
|
NameConstant
|
|
|
|
NameDecorator
|
|
|
|
NameEntity
|
|
|
|
NameException
|
|
|
|
NameFunction
|
|
|
|
NameFunctionMagic
|
2017-09-18 11:16:44 +10:00
|
|
|
NameKeyword
|
2017-06-02 00:17:21 +10:00
|
|
|
NameLabel
|
|
|
|
NameNamespace
|
2017-07-19 23:51:16 -07:00
|
|
|
NameOperator
|
2017-09-18 11:16:44 +10:00
|
|
|
NameOther
|
|
|
|
NamePseudo
|
|
|
|
NameProperty
|
2017-06-02 00:17:21 +10:00
|
|
|
NameTag
|
|
|
|
NameVariable
|
2017-09-18 11:16:44 +10:00
|
|
|
NameVariableAnonymous
|
2017-06-02 00:17:21 +10:00
|
|
|
NameVariableClass
|
|
|
|
NameVariableGlobal
|
|
|
|
NameVariableInstance
|
|
|
|
NameVariableMagic
|
|
|
|
)
|
|
|
|
|
|
|
|
// Literals.
|
|
|
|
const (
|
|
|
|
Literal TokenType = 3000 + iota
|
|
|
|
LiteralDate
|
2017-09-18 11:16:44 +10:00
|
|
|
LiteralOther
|
2017-06-02 00:17:21 +10:00
|
|
|
)
|
|
|
|
|
|
|
|
// Strings.
|
|
|
|
const (
|
|
|
|
LiteralString TokenType = 3100 + iota
|
|
|
|
LiteralStringAffix
|
2017-09-18 11:16:44 +10:00
|
|
|
LiteralStringAtom
|
2017-06-02 00:17:21 +10:00
|
|
|
LiteralStringBacktick
|
2017-09-18 11:16:44 +10:00
|
|
|
LiteralStringBoolean
|
2017-06-02 00:17:21 +10:00
|
|
|
LiteralStringChar
|
|
|
|
LiteralStringDelimiter
|
|
|
|
LiteralStringDoc
|
|
|
|
LiteralStringDouble
|
|
|
|
LiteralStringEscape
|
|
|
|
LiteralStringHeredoc
|
|
|
|
LiteralStringInterpol
|
2017-09-18 11:16:44 +10:00
|
|
|
LiteralStringName
|
2017-06-02 00:17:21 +10:00
|
|
|
LiteralStringOther
|
|
|
|
LiteralStringRegex
|
|
|
|
LiteralStringSingle
|
|
|
|
LiteralStringSymbol
|
|
|
|
)
|
|
|
|
|
|
|
|
// Literals.
|
|
|
|
const (
|
|
|
|
LiteralNumber TokenType = 3200 + iota
|
|
|
|
LiteralNumberBin
|
|
|
|
LiteralNumberFloat
|
|
|
|
LiteralNumberHex
|
|
|
|
LiteralNumberInteger
|
|
|
|
LiteralNumberIntegerLong
|
|
|
|
LiteralNumberOct
|
|
|
|
)
|
|
|
|
|
|
|
|
// Operators.
|
|
|
|
const (
|
|
|
|
Operator TokenType = 4000 + iota
|
|
|
|
OperatorWord
|
|
|
|
)
|
|
|
|
|
|
|
|
// Punctuation.
|
|
|
|
const (
|
|
|
|
Punctuation TokenType = 5000 + iota
|
|
|
|
)
|
|
|
|
|
|
|
|
// Comments.
|
|
|
|
const (
|
|
|
|
Comment TokenType = 6000 + iota
|
|
|
|
CommentHashbang
|
|
|
|
CommentMultiline
|
|
|
|
CommentSingle
|
|
|
|
CommentSpecial
|
|
|
|
)
|
|
|
|
|
2017-06-04 22:18:35 +10:00
|
|
|
// Preprocessor "comments".
|
|
|
|
const (
|
|
|
|
CommentPreproc TokenType = 6100 + iota
|
|
|
|
CommentPreprocFile
|
|
|
|
)
|
|
|
|
|
2017-06-02 00:17:21 +10:00
|
|
|
// Generic tokens.
|
|
|
|
const (
|
|
|
|
Generic TokenType = 7000 + iota
|
|
|
|
GenericDeleted
|
|
|
|
GenericEmph
|
|
|
|
GenericError
|
|
|
|
GenericHeading
|
|
|
|
GenericInserted
|
|
|
|
GenericOutput
|
|
|
|
GenericPrompt
|
|
|
|
GenericStrong
|
|
|
|
GenericSubheading
|
|
|
|
GenericTraceback
|
2017-06-04 22:18:35 +10:00
|
|
|
GenericUnderline
|
2017-06-02 00:17:21 +10:00
|
|
|
)
|
|
|
|
|
|
|
|
// Text.
|
|
|
|
const (
|
|
|
|
Text TokenType = 8000 + iota
|
|
|
|
TextWhitespace
|
2017-09-18 11:16:44 +10:00
|
|
|
TextSymbol
|
|
|
|
TextPunctuation
|
2017-06-02 00:17:21 +10:00
|
|
|
)
|
|
|
|
|
|
|
|
// Aliases.
|
|
|
|
const (
|
|
|
|
Whitespace = TextWhitespace
|
|
|
|
|
|
|
|
Date = LiteralDate
|
|
|
|
|
|
|
|
String = LiteralString
|
|
|
|
StringAffix = LiteralStringAffix
|
|
|
|
StringBacktick = LiteralStringBacktick
|
|
|
|
StringChar = LiteralStringChar
|
|
|
|
StringDelimiter = LiteralStringDelimiter
|
|
|
|
StringDoc = LiteralStringDoc
|
|
|
|
StringDouble = LiteralStringDouble
|
|
|
|
StringEscape = LiteralStringEscape
|
|
|
|
StringHeredoc = LiteralStringHeredoc
|
|
|
|
StringInterpol = LiteralStringInterpol
|
|
|
|
StringOther = LiteralStringOther
|
|
|
|
StringRegex = LiteralStringRegex
|
|
|
|
StringSingle = LiteralStringSingle
|
|
|
|
StringSymbol = LiteralStringSymbol
|
|
|
|
|
|
|
|
Number = LiteralNumber
|
|
|
|
NumberBin = LiteralNumberBin
|
|
|
|
NumberFloat = LiteralNumberFloat
|
|
|
|
NumberHex = LiteralNumberHex
|
|
|
|
NumberInteger = LiteralNumberInteger
|
|
|
|
NumberIntegerLong = LiteralNumberIntegerLong
|
|
|
|
NumberOct = LiteralNumberOct
|
|
|
|
)
|
|
|
|
|
2017-09-25 21:46:25 +10:00
|
|
|
var (
|
|
|
|
StandardTypes = map[TokenType]string{
|
2021-11-07 10:08:48 +03:30
|
|
|
Background: "bg",
|
|
|
|
PreWrapper: "chroma",
|
|
|
|
Line: "line",
|
2017-10-13 01:49:20 +02:00
|
|
|
LineNumbers: "ln",
|
|
|
|
LineNumbersTable: "lnt",
|
|
|
|
LineHighlight: "hl",
|
|
|
|
LineTable: "lntable",
|
|
|
|
LineTableTD: "lntd",
|
2022-11-01 17:28:30 +11:00
|
|
|
LineLink: "lnlinks",
|
2021-11-07 10:08:48 +03:30
|
|
|
CodeLine: "cl",
|
2017-10-13 01:49:20 +02:00
|
|
|
Text: "",
|
|
|
|
Whitespace: "w",
|
|
|
|
Error: "err",
|
|
|
|
Other: "x",
|
2017-09-25 21:46:25 +10:00
|
|
|
// I have no idea what this is used for...
|
|
|
|
// Escape: "esc",
|
|
|
|
|
|
|
|
Keyword: "k",
|
|
|
|
KeywordConstant: "kc",
|
|
|
|
KeywordDeclaration: "kd",
|
|
|
|
KeywordNamespace: "kn",
|
|
|
|
KeywordPseudo: "kp",
|
|
|
|
KeywordReserved: "kr",
|
|
|
|
KeywordType: "kt",
|
|
|
|
|
|
|
|
Name: "n",
|
|
|
|
NameAttribute: "na",
|
|
|
|
NameBuiltin: "nb",
|
|
|
|
NameBuiltinPseudo: "bp",
|
|
|
|
NameClass: "nc",
|
|
|
|
NameConstant: "no",
|
|
|
|
NameDecorator: "nd",
|
|
|
|
NameEntity: "ni",
|
|
|
|
NameException: "ne",
|
|
|
|
NameFunction: "nf",
|
|
|
|
NameFunctionMagic: "fm",
|
|
|
|
NameProperty: "py",
|
|
|
|
NameLabel: "nl",
|
|
|
|
NameNamespace: "nn",
|
|
|
|
NameOther: "nx",
|
|
|
|
NameTag: "nt",
|
|
|
|
NameVariable: "nv",
|
|
|
|
NameVariableClass: "vc",
|
|
|
|
NameVariableGlobal: "vg",
|
|
|
|
NameVariableInstance: "vi",
|
|
|
|
NameVariableMagic: "vm",
|
|
|
|
|
|
|
|
Literal: "l",
|
|
|
|
LiteralDate: "ld",
|
|
|
|
|
|
|
|
String: "s",
|
|
|
|
StringAffix: "sa",
|
|
|
|
StringBacktick: "sb",
|
|
|
|
StringChar: "sc",
|
|
|
|
StringDelimiter: "dl",
|
|
|
|
StringDoc: "sd",
|
|
|
|
StringDouble: "s2",
|
|
|
|
StringEscape: "se",
|
|
|
|
StringHeredoc: "sh",
|
|
|
|
StringInterpol: "si",
|
|
|
|
StringOther: "sx",
|
|
|
|
StringRegex: "sr",
|
|
|
|
StringSingle: "s1",
|
|
|
|
StringSymbol: "ss",
|
|
|
|
|
|
|
|
Number: "m",
|
|
|
|
NumberBin: "mb",
|
|
|
|
NumberFloat: "mf",
|
|
|
|
NumberHex: "mh",
|
|
|
|
NumberInteger: "mi",
|
|
|
|
NumberIntegerLong: "il",
|
|
|
|
NumberOct: "mo",
|
|
|
|
|
|
|
|
Operator: "o",
|
|
|
|
OperatorWord: "ow",
|
|
|
|
|
|
|
|
Punctuation: "p",
|
|
|
|
|
|
|
|
Comment: "c",
|
|
|
|
CommentHashbang: "ch",
|
|
|
|
CommentMultiline: "cm",
|
|
|
|
CommentPreproc: "cp",
|
|
|
|
CommentPreprocFile: "cpf",
|
|
|
|
CommentSingle: "c1",
|
|
|
|
CommentSpecial: "cs",
|
|
|
|
|
|
|
|
Generic: "g",
|
|
|
|
GenericDeleted: "gd",
|
|
|
|
GenericEmph: "ge",
|
|
|
|
GenericError: "gr",
|
|
|
|
GenericHeading: "gh",
|
|
|
|
GenericInserted: "gi",
|
|
|
|
GenericOutput: "go",
|
|
|
|
GenericPrompt: "gp",
|
|
|
|
GenericStrong: "gs",
|
|
|
|
GenericSubheading: "gu",
|
|
|
|
GenericTraceback: "gt",
|
2018-08-01 16:50:37 -04:00
|
|
|
GenericUnderline: "gl",
|
2017-09-25 21:46:25 +10:00
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
func (t TokenType) Parent() TokenType {
|
|
|
|
if t%100 != 0 {
|
|
|
|
return t / 100 * 100
|
|
|
|
}
|
|
|
|
if t%1000 != 0 {
|
|
|
|
return t / 1000 * 1000
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
2017-06-02 00:17:21 +10:00
|
|
|
func (t TokenType) Category() TokenType {
|
|
|
|
return t / 1000 * 1000
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t TokenType) SubCategory() TokenType {
|
|
|
|
return t / 100 * 100
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t TokenType) InCategory(other TokenType) bool {
|
|
|
|
return t/1000 == other/1000
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t TokenType) InSubCategory(other TokenType) bool {
|
|
|
|
return t/100 == other/100
|
|
|
|
}
|
|
|
|
|
2021-05-06 14:37:30 +04:30
|
|
|
func (t TokenType) Emit(groups []string, _ *LexerState) Iterator {
|
2018-11-03 16:22:51 -07:00
|
|
|
return Literator(Token{Type: t, Value: groups[0]})
|
2017-06-02 00:17:21 +10:00
|
|
|
}
|
Version 2 of Chroma
This cleans up the API in general, removing a bunch of deprecated stuff,
cleaning up circular imports, etc.
But the biggest change is switching to an optional XML format for the
regex lexer.
Having lexers defined only in Go is not ideal for a couple of reasons.
Firstly, it impedes a significant portion of contributors who use Chroma
in Hugo, but don't know Go. Secondly, it bloats the binary size of any
project that imports Chroma.
Why XML? YAML is an abomination and JSON is not human editable. XML
also compresses very well (eg. Go template lexer XML compresses from
3239 bytes to 718).
Why a new syntax format? All major existing formats rely on the
Oniguruma regex engine, which is extremely complex and for which there
is no Go port.
Why not earlier? Prior to the existence of fs.FS this was not a viable
option.
Benchmarks:
$ hyperfine --warmup 3 \
'./chroma.master --version' \
'./chroma.xml-pre-opt --version' \
'./chroma.xml --version'
Benchmark 1: ./chroma.master --version
Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms]
Range (min … max): 4.2 ms … 6.6 ms 233 runs
Benchmark 2: ./chroma.xml-pre-opt --version
Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms]
Range (min … max): 49.2 ms … 51.5 ms 51 runs
Benchmark 3: ./chroma.xml --version
Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms]
Range (min … max): 5.7 ms … 19.9 ms 196 runs
Summary
'./chroma.master --version' ran
1.30 ± 0.23 times faster than './chroma.xml --version'
9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version'
A slight increase in init time, but I think this is okay given the
increase in flexibility.
And binary size difference:
$ du -h lexers.test*
$ du -sh chroma* 951371ms
8.8M chroma.master
7.8M chroma.xml
7.8M chroma.xml-pre-opt
Benchmarks:
$ hyperfine --warmup 3 \
'./chroma.master --version' \
'./chroma.xml-pre-opt --version' \
'./chroma.xml --version'
Benchmark 1: ./chroma.master --version
Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms]
Range (min … max): 4.2 ms … 6.6 ms 233 runs
Benchmark 2: ./chroma.xml-pre-opt --version
Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms]
Range (min … max): 49.2 ms … 51.5 ms 51 runs
Benchmark 3: ./chroma.xml --version
Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms]
Range (min … max): 5.7 ms … 19.9 ms 196 runs
Summary
'./chroma.master --version' ran
1.30 ± 0.23 times faster than './chroma.xml --version'
9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version'
Incompatible changes:
- (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer
- (*TokenType).UnmarshalJSON: removed
- Lexer.AnalyseText: added
- Lexer.SetAnalyser: added
- Lexer.SetRegistry: added
- MustNewLazyLexer: removed
- MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer
- Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator
- NewLazyLexer: removed
- NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error)
- Pop: changed from func(int) MutatorFunc to func(int) Mutator
- Push: changed from func(...string) MutatorFunc to func(...string) Mutator
- TokenType.MarshalJSON: removed
- Using: changed from func(Lexer) Emitter to func(string) Emitter
- UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
|
|
|
|
2022-11-02 15:23:14 +11:00
|
|
|
func (t TokenType) EmitterKind() string { return "token" }
|