2017-06-02 00:17:21 +10:00
|
|
|
package chroma
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
)
|
|
|
|
|
2017-06-04 22:18:35 +10:00
|
|
|
var (
|
|
|
|
defaultOptions = &TokeniseOptions{
|
|
|
|
State: "root",
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2017-06-02 00:17:21 +10:00
|
|
|
// Config for a lexer.
|
|
|
|
type Config struct {
|
|
|
|
// Name of the lexer.
|
|
|
|
Name string
|
|
|
|
|
|
|
|
// Shortcuts for the lexer
|
|
|
|
Aliases []string
|
|
|
|
|
|
|
|
// File name globs
|
|
|
|
Filenames []string
|
|
|
|
|
|
|
|
// Secondary file name globs
|
|
|
|
AliasFilenames []string
|
|
|
|
|
|
|
|
// MIME types
|
|
|
|
MimeTypes []string
|
|
|
|
|
2017-06-04 22:18:35 +10:00
|
|
|
// Regex matching is case-insensitive.
|
|
|
|
CaseInsensitive bool
|
|
|
|
|
2017-06-05 09:55:19 +10:00
|
|
|
// Regex matches all characters.
|
|
|
|
DotAll bool
|
|
|
|
|
|
|
|
// Regex does not match across lines ($ matches EOL).
|
|
|
|
//
|
|
|
|
// Defaults to multiline.
|
|
|
|
NotMultiline bool
|
|
|
|
|
2017-06-02 00:17:21 +10:00
|
|
|
// Don't strip leading and trailing newlines from the input.
|
2017-06-04 22:18:35 +10:00
|
|
|
// DontStripNL bool
|
2017-06-02 00:17:21 +10:00
|
|
|
|
|
|
|
// Strip all leading and trailing whitespace from the input
|
2017-06-04 22:18:35 +10:00
|
|
|
// StripAll bool
|
2017-06-02 00:17:21 +10:00
|
|
|
|
|
|
|
// Make sure that the input does not end with a newline. This
|
|
|
|
// is required for some lexers that consume input linewise.
|
2017-06-04 22:18:35 +10:00
|
|
|
// DontEnsureNL bool
|
2017-06-02 00:17:21 +10:00
|
|
|
|
|
|
|
// If given and greater than 0, expand tabs in the input.
|
2017-06-04 22:18:35 +10:00
|
|
|
// TabSize int
|
2017-06-02 00:17:21 +10:00
|
|
|
}
|
|
|
|
|
2017-06-05 10:29:50 +10:00
|
|
|
// Token output to formatter.
|
2017-06-02 00:17:21 +10:00
|
|
|
type Token struct {
|
|
|
|
Type TokenType
|
|
|
|
Value string
|
|
|
|
}
|
|
|
|
|
2017-06-05 10:29:50 +10:00
|
|
|
func (t *Token) String() string { return t.Value }
|
|
|
|
func (t *Token) GoString() string { return fmt.Sprintf("Token{%s, %q}", t.Type, t.Value) }
|
2017-06-02 00:17:21 +10:00
|
|
|
|
2017-09-20 13:30:46 +10:00
|
|
|
func (t *Token) Clone() *Token {
|
|
|
|
clone := &Token{}
|
|
|
|
*clone = *t
|
|
|
|
return clone
|
|
|
|
}
|
|
|
|
|
2017-06-04 22:18:35 +10:00
|
|
|
type TokeniseOptions struct {
|
|
|
|
// State to start tokenisation in. Defaults to "root".
|
|
|
|
State string
|
|
|
|
}
|
|
|
|
|
2017-07-19 23:51:16 -07:00
|
|
|
// A Lexer for tokenising source code.
|
2017-06-02 00:17:21 +10:00
|
|
|
type Lexer interface {
|
2017-07-19 23:51:16 -07:00
|
|
|
// Config describing the features of the Lexer.
|
2017-06-02 00:17:21 +10:00
|
|
|
Config() *Config
|
2017-07-19 23:51:16 -07:00
|
|
|
// Tokenise text and call out for each generated token.
|
2017-09-18 13:15:07 +10:00
|
|
|
//
|
|
|
|
// A token of type EOF will be passed to out() to signify the end of the stream.
|
2017-06-05 10:29:50 +10:00
|
|
|
Tokenise(options *TokeniseOptions, text string, out func(*Token)) error
|
2017-06-02 00:17:21 +10:00
|
|
|
}
|
|
|
|
|
2017-06-07 10:27:10 +10:00
|
|
|
type Lexers []Lexer
|
|
|
|
|
|
|
|
// Pick attempts to pick the best Lexer for a piece of source code. May return nil.
|
|
|
|
func (l Lexers) Pick(text string) Lexer {
|
|
|
|
if len(l) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
var picked Lexer
|
|
|
|
highest := float32(-1)
|
|
|
|
for _, lexer := range l {
|
|
|
|
if analyser, ok := lexer.(Analyser); ok {
|
|
|
|
score := analyser.AnalyseText(text)
|
|
|
|
if score > highest {
|
|
|
|
highest = score
|
|
|
|
picked = lexer
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return picked
|
|
|
|
}
|
|
|
|
|
2017-09-19 10:47:22 +10:00
|
|
|
func (l Lexers) Len() int { return len(l) }
|
|
|
|
func (l Lexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
|
|
|
|
func (l Lexers) Less(i, j int) bool { return l[i].Config().Name < l[j].Config().Name }
|
|
|
|
|
2017-07-19 23:51:16 -07:00
|
|
|
// Analyser determines how appropriate this lexer is for the given text.
|
2017-06-02 00:17:21 +10:00
|
|
|
type Analyser interface {
|
|
|
|
AnalyseText(text string) float32
|
|
|
|
}
|