1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-01-28 03:29:41 +02:00
chroma/lexer.go
Alec Thomas 40542a6255 refactor: migrate a bunch more Go-based lexers to XML
Also rename some existing XML lexers to their canonical XML name.
2023-09-09 12:29:23 +10:00

163 lines
4.4 KiB
Go

package chroma
import (
"fmt"
"strings"
)
var (
defaultOptions = &TokeniseOptions{
State: "root",
EnsureLF: true,
}
)
// Config for a lexer.
type Config struct {
// Name of the lexer.
Name string `xml:"name,omitempty"`
// Shortcuts for the lexer
Aliases []string `xml:"alias,omitempty"`
// File name globs
Filenames []string `xml:"filename,omitempty"`
// Secondary file name globs
AliasFilenames []string `xml:"alias_filename,omitempty"`
// MIME types
MimeTypes []string `xml:"mime_type,omitempty"`
// Regex matching is case-insensitive.
CaseInsensitive bool `xml:"case_insensitive,omitempty"`
// Regex matches all characters.
DotAll bool `xml:"dot_all,omitempty"`
// Regex does not match across lines ($ matches EOL).
//
// Defaults to multiline.
NotMultiline bool `xml:"not_multiline,omitempty"`
// Don't strip leading and trailing newlines from the input.
// DontStripNL bool
// Strip all leading and trailing whitespace from the input
// StripAll bool
// Make sure that the input ends with a newline. This
// is required for some lexers that consume input linewise.
EnsureNL bool `xml:"ensure_nl,omitempty"`
// If given and greater than 0, expand tabs in the input.
// TabSize int
// Priority of lexer.
//
// If this is 0 it will be treated as a default of 1.
Priority float32 `xml:"priority,omitempty"`
// Analyse is a list of regexes to match against the input.
//
// If a match is found, the score is returned if single attribute is set to true,
// otherwise the sum of all the score of matching patterns will be
// used as the final score.
Analyse *AnalyseConfig `xml:"analyse,omitempty"`
}
// AnalyseConfig defines the list of regexes analysers.
type AnalyseConfig struct {
Regexes []RegexConfig `xml:"regex,omitempty"`
// If true, the first matching score is returned.
First bool `xml:"first,attr"`
}
// RegexConfig defines a single regex pattern and its score in case of match.
type RegexConfig struct {
Pattern string `xml:"pattern,attr"`
Score float32 `xml:"score,attr"`
}
// Token output to formatter.
type Token struct {
Type TokenType `json:"type"`
Value string `json:"value"`
}
func (t *Token) String() string { return t.Value }
func (t *Token) GoString() string { return fmt.Sprintf("&Token{%s, %q}", t.Type, t.Value) }
// Clone returns a clone of the Token.
func (t *Token) Clone() Token {
return *t
}
// EOF is returned by lexers at the end of input.
var EOF Token
// TokeniseOptions contains options for tokenisers.
type TokeniseOptions struct {
// State to start tokenisation in. Defaults to "root".
State string
// Nested tokenisation.
Nested bool
// If true, all EOLs are converted into LF
// by replacing CRLF and CR
EnsureLF bool
}
// A Lexer for tokenising source code.
type Lexer interface {
// Config describing the features of the Lexer.
Config() *Config
// Tokenise returns an Iterator over tokens in text.
Tokenise(options *TokeniseOptions, text string) (Iterator, error)
// SetRegistry sets the registry this Lexer is associated with.
//
// The registry should be used by the Lexer if it needs to look up other
// lexers.
SetRegistry(registry *LexerRegistry) Lexer
// SetAnalyser sets a function the Lexer should use for scoring how
// likely a fragment of text is to match this lexer, between 0.0 and 1.0.
// A value of 1 indicates high confidence.
//
// Lexers may ignore this if they implement their own analysers.
SetAnalyser(analyser func(text string) float32) Lexer
// AnalyseText scores how likely a fragment of text is to match
// this lexer, between 0.0 and 1.0. A value of 1 indicates high confidence.
AnalyseText(text string) float32
}
// Lexers is a slice of lexers sortable by name.
type Lexers []Lexer
func (l Lexers) Len() int { return len(l) }
func (l Lexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l Lexers) Less(i, j int) bool {
return strings.ToLower(l[i].Config().Name) < strings.ToLower(l[j].Config().Name)
}
// PrioritisedLexers is a slice of lexers sortable by priority.
type PrioritisedLexers []Lexer
func (l PrioritisedLexers) Len() int { return len(l) }
func (l PrioritisedLexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l PrioritisedLexers) Less(i, j int) bool {
ip := l[i].Config().Priority
if ip == 0 {
ip = 1
}
jp := l[j].Config().Priority
if jp == 0 {
jp = 1
}
return ip > jp
}
// Analyser determines how appropriate this lexer is for the given text.
type Analyser interface {
AnalyseText(text string) float32
}