1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-01-26 03:20:10 +02:00
chroma/lexer.go

115 lines
2.5 KiB
Go
Raw Normal View History

2017-06-02 00:17:21 +10:00
package chroma
import (
"fmt"
)
var (
defaultOptions = &TokeniseOptions{
State: "root",
}
)
2017-06-02 00:17:21 +10:00
// Config for a lexer.
type Config struct {
// Name of the lexer.
Name string
// Shortcuts for the lexer
Aliases []string
// File name globs
Filenames []string
// Secondary file name globs
AliasFilenames []string
// MIME types
MimeTypes []string
// Regex matching is case-insensitive.
CaseInsensitive bool
2017-06-05 09:55:19 +10:00
// Regex matches all characters.
DotAll bool
// Regex does not match across lines ($ matches EOL).
//
// Defaults to multiline.
NotMultiline bool
2017-06-02 00:17:21 +10:00
// Don't strip leading and trailing newlines from the input.
// DontStripNL bool
2017-06-02 00:17:21 +10:00
// Strip all leading and trailing whitespace from the input
// StripAll bool
2017-06-02 00:17:21 +10:00
// Make sure that the input does not end with a newline. This
// is required for some lexers that consume input linewise.
// DontEnsureNL bool
2017-06-02 00:17:21 +10:00
// If given and greater than 0, expand tabs in the input.
// TabSize int
2017-06-02 00:17:21 +10:00
}
// Token output to formatter.
2017-06-02 00:17:21 +10:00
type Token struct {
Type TokenType
Value string
}
func (t *Token) String() string { return t.Value }
func (t *Token) GoString() string { return fmt.Sprintf("Token{%s, %q}", t.Type, t.Value) }
2017-06-02 00:17:21 +10:00
2017-09-20 13:30:46 +10:00
func (t *Token) Clone() *Token {
clone := &Token{}
*clone = *t
return clone
}
type TokeniseOptions struct {
// State to start tokenisation in. Defaults to "root".
State string
}
// A Lexer for tokenising source code.
2017-06-02 00:17:21 +10:00
type Lexer interface {
// Config describing the features of the Lexer.
2017-06-02 00:17:21 +10:00
Config() *Config
// Tokenise text and call out for each generated token.
//
// A token of type EOF will be passed to out() to signify the end of the stream.
Tokenise(options *TokeniseOptions, text string, out func(*Token)) error
2017-06-02 00:17:21 +10:00
}
2017-06-07 10:27:10 +10:00
type Lexers []Lexer
// Pick attempts to pick the best Lexer for a piece of source code. May return nil.
func (l Lexers) Pick(text string) Lexer {
if len(l) == 0 {
return nil
}
var picked Lexer
highest := float32(-1)
for _, lexer := range l {
if analyser, ok := lexer.(Analyser); ok {
score := analyser.AnalyseText(text)
if score > highest {
highest = score
picked = lexer
continue
}
}
}
return picked
}
2017-09-19 10:47:22 +10:00
func (l Lexers) Len() int { return len(l) }
func (l Lexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l Lexers) Less(i, j int) bool { return l[i].Config().Name < l[j].Config().Name }
// Analyser determines how appropriate this lexer is for the given text.
2017-06-02 00:17:21 +10:00
type Analyser interface {
AnalyseText(text string) float32
}