1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-01-28 03:29:41 +02:00
chroma/types.go

341 lines
6.9 KiB
Go
Raw Normal View History

2017-06-02 00:17:21 +10:00
package chroma
//go:generate enumer -text -type TokenType
2017-06-02 00:17:21 +10:00
// TokenType is the type of token to highlight.
//
// It is also an Emitter, emitting a single token of itself
type TokenType int
// Set of TokenTypes.
//
// Categories of types are grouped in ranges of 1000, while sub-categories are in ranges of 100. For
// example, the literal category is in the range 3000-3999. The sub-category for literal strings is
// in the range 3100-3199.
// Meta token types.
2017-06-02 00:17:21 +10:00
const (
// Default background style.
Background TokenType = -1 - iota
// PreWrapper style.
PreWrapper
// Line style.
Line
// Line numbers in output.
2017-09-20 13:30:46 +10:00
LineNumbers
// Line numbers in output when in table.
LineNumbersTable
// Line higlight style.
LineHighlight
// Line numbers table wrapper style.
LineTable
// Line numbers table TD wrapper style.
LineTableTD
// Line number links.
LineLink
// Code line wrapper style.
CodeLine
// Input that could not be tokenised.
2017-06-02 00:17:21 +10:00
Error
// Other is used by the Delegate lexer to indicate which tokens should be handled by the delegate.
2017-06-02 00:17:21 +10:00
Other
// No highlighting.
None
// Used as an EOF marker / nil token
EOFType TokenType = 0
2017-06-02 00:17:21 +10:00
)
// Keywords.
const (
Keyword TokenType = 1000 + iota
KeywordConstant
KeywordDeclaration
KeywordNamespace
KeywordPseudo
KeywordReserved
KeywordType
)
// Names.
const (
Name TokenType = 2000 + iota
NameAttribute
NameBuiltin
NameBuiltinPseudo
NameClass
NameConstant
NameDecorator
NameEntity
NameException
NameFunction
NameFunctionMagic
NameKeyword
2017-06-02 00:17:21 +10:00
NameLabel
NameNamespace
NameOperator
NameOther
NamePseudo
NameProperty
2017-06-02 00:17:21 +10:00
NameTag
NameVariable
NameVariableAnonymous
2017-06-02 00:17:21 +10:00
NameVariableClass
NameVariableGlobal
NameVariableInstance
NameVariableMagic
)
// Literals.
const (
Literal TokenType = 3000 + iota
LiteralDate
LiteralOther
2017-06-02 00:17:21 +10:00
)
// Strings.
const (
LiteralString TokenType = 3100 + iota
LiteralStringAffix
LiteralStringAtom
2017-06-02 00:17:21 +10:00
LiteralStringBacktick
LiteralStringBoolean
2017-06-02 00:17:21 +10:00
LiteralStringChar
LiteralStringDelimiter
LiteralStringDoc
LiteralStringDouble
LiteralStringEscape
LiteralStringHeredoc
LiteralStringInterpol
LiteralStringName
2017-06-02 00:17:21 +10:00
LiteralStringOther
LiteralStringRegex
LiteralStringSingle
LiteralStringSymbol
)
// Literals.
const (
LiteralNumber TokenType = 3200 + iota
LiteralNumberBin
LiteralNumberFloat
LiteralNumberHex
LiteralNumberInteger
LiteralNumberIntegerLong
LiteralNumberOct
)
// Operators.
const (
Operator TokenType = 4000 + iota
OperatorWord
)
// Punctuation.
const (
Punctuation TokenType = 5000 + iota
)
// Comments.
const (
Comment TokenType = 6000 + iota
CommentHashbang
CommentMultiline
CommentSingle
CommentSpecial
)
// Preprocessor "comments".
const (
CommentPreproc TokenType = 6100 + iota
CommentPreprocFile
)
2017-06-02 00:17:21 +10:00
// Generic tokens.
const (
Generic TokenType = 7000 + iota
GenericDeleted
GenericEmph
GenericError
GenericHeading
GenericInserted
GenericOutput
GenericPrompt
GenericStrong
GenericSubheading
GenericTraceback
GenericUnderline
2017-06-02 00:17:21 +10:00
)
// Text.
const (
Text TokenType = 8000 + iota
TextWhitespace
TextSymbol
TextPunctuation
2017-06-02 00:17:21 +10:00
)
// Aliases.
const (
Whitespace = TextWhitespace
Date = LiteralDate
String = LiteralString
StringAffix = LiteralStringAffix
StringBacktick = LiteralStringBacktick
StringChar = LiteralStringChar
StringDelimiter = LiteralStringDelimiter
StringDoc = LiteralStringDoc
StringDouble = LiteralStringDouble
StringEscape = LiteralStringEscape
StringHeredoc = LiteralStringHeredoc
StringInterpol = LiteralStringInterpol
StringOther = LiteralStringOther
StringRegex = LiteralStringRegex
StringSingle = LiteralStringSingle
StringSymbol = LiteralStringSymbol
Number = LiteralNumber
NumberBin = LiteralNumberBin
NumberFloat = LiteralNumberFloat
NumberHex = LiteralNumberHex
NumberInteger = LiteralNumberInteger
NumberIntegerLong = LiteralNumberIntegerLong
NumberOct = LiteralNumberOct
)
var (
StandardTypes = map[TokenType]string{
Background: "bg",
PreWrapper: "chroma",
Line: "line",
LineNumbers: "ln",
LineNumbersTable: "lnt",
LineHighlight: "hl",
LineTable: "lntable",
LineTableTD: "lntd",
LineLink: "lnlinks",
CodeLine: "cl",
Text: "",
Whitespace: "w",
Error: "err",
Other: "x",
// I have no idea what this is used for...
// Escape: "esc",
Keyword: "k",
KeywordConstant: "kc",
KeywordDeclaration: "kd",
KeywordNamespace: "kn",
KeywordPseudo: "kp",
KeywordReserved: "kr",
KeywordType: "kt",
Name: "n",
NameAttribute: "na",
NameBuiltin: "nb",
NameBuiltinPseudo: "bp",
NameClass: "nc",
NameConstant: "no",
NameDecorator: "nd",
NameEntity: "ni",
NameException: "ne",
NameFunction: "nf",
NameFunctionMagic: "fm",
NameProperty: "py",
NameLabel: "nl",
NameNamespace: "nn",
NameOther: "nx",
NameTag: "nt",
NameVariable: "nv",
NameVariableClass: "vc",
NameVariableGlobal: "vg",
NameVariableInstance: "vi",
NameVariableMagic: "vm",
Literal: "l",
LiteralDate: "ld",
String: "s",
StringAffix: "sa",
StringBacktick: "sb",
StringChar: "sc",
StringDelimiter: "dl",
StringDoc: "sd",
StringDouble: "s2",
StringEscape: "se",
StringHeredoc: "sh",
StringInterpol: "si",
StringOther: "sx",
StringRegex: "sr",
StringSingle: "s1",
StringSymbol: "ss",
Number: "m",
NumberBin: "mb",
NumberFloat: "mf",
NumberHex: "mh",
NumberInteger: "mi",
NumberIntegerLong: "il",
NumberOct: "mo",
Operator: "o",
OperatorWord: "ow",
Punctuation: "p",
Comment: "c",
CommentHashbang: "ch",
CommentMultiline: "cm",
CommentPreproc: "cp",
CommentPreprocFile: "cpf",
CommentSingle: "c1",
CommentSpecial: "cs",
Generic: "g",
GenericDeleted: "gd",
GenericEmph: "ge",
GenericError: "gr",
GenericHeading: "gh",
GenericInserted: "gi",
GenericOutput: "go",
GenericPrompt: "gp",
GenericStrong: "gs",
GenericSubheading: "gu",
GenericTraceback: "gt",
GenericUnderline: "gl",
}
)
func (t TokenType) Parent() TokenType {
if t%100 != 0 {
return t / 100 * 100
}
if t%1000 != 0 {
return t / 1000 * 1000
}
return 0
}
2017-06-02 00:17:21 +10:00
func (t TokenType) Category() TokenType {
return t / 1000 * 1000
}
func (t TokenType) SubCategory() TokenType {
return t / 100 * 100
}
func (t TokenType) InCategory(other TokenType) bool {
return t/1000 == other/1000
}
func (t TokenType) InSubCategory(other TokenType) bool {
return t/100 == other/100
}
func (t TokenType) Emit(groups []string, _ *LexerState) Iterator {
return Literator(Token{Type: t, Value: groups[0]})
2017-06-02 00:17:21 +10:00
}
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
func (t TokenType) EmitterKind() string { return "token" }