1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-01-26 03:20:10 +02:00

Wire up content sniffing.

This commit is contained in:
Alec Thomas 2017-06-07 10:27:10 +10:00
parent 7b6a07b9bb
commit 7ae55eb265
20 changed files with 191 additions and 131 deletions

View File

@ -2,7 +2,6 @@
Chroma is inspired by [Pygments](http://pygments.org/).
## Unsupported Pygments features
- Autodetection from content.

View File

@ -67,7 +67,6 @@ func listAll() {
fmt.Printf(" aliases: %s\n", strings.Join(config.Aliases, " "))
fmt.Printf(" filenames: %s\n", strings.Join(filenames, " "))
fmt.Printf(" mimetypes: %s\n", strings.Join(config.MimeTypes, " "))
fmt.Printf(" priority: %d\n", config.Priority)
}
fmt.Println()
fmt.Printf("styles:")
@ -83,16 +82,26 @@ func listAll() {
}
func lex(path string, contents string, writer func(*chroma.Token)) {
lexer := chroma.Coalesce(selexer(path))
lexer := selexer(path, contents)
if lexer == nil {
lexer = lexers.Fallback
}
lexer = chroma.Coalesce(lexer)
err := lexer.Tokenise(nil, string(contents), writer)
kingpin.FatalIfError(err, "")
}
func selexer(path string) chroma.Lexer {
func selexer(path, contents string) (lexer chroma.Lexer) {
if *lexerFlag != "autodetect" {
return lexers.Get(*lexerFlag)
}
return lexers.Match(path)[0]
if path != "" {
lexer := lexers.Match(path)
if lexer != nil {
return lexer
}
}
return lexers.Analyse(contents)
}
func getWriter(w io.Writer) func(*chroma.Token) {

View File

@ -29,9 +29,6 @@ type Config struct {
// MIME types
MimeTypes []string
// Priority, should multiple lexers match and no content is provided
Priority int
// Regex matching is case-insensitive.
CaseInsensitive bool
@ -76,6 +73,28 @@ type Lexer interface {
Tokenise(options *TokeniseOptions, text string, out func(*Token)) error
}
type Lexers []Lexer
// Pick attempts to pick the best Lexer for a piece of source code. May return nil.
func (l Lexers) Pick(text string) Lexer {
if len(l) == 0 {
return nil
}
var picked Lexer
highest := float32(-1)
for _, lexer := range l {
if analyser, ok := lexer.(Analyser); ok {
score := analyser.AnalyseText(text)
if score > highest {
highest = score
picked = lexer
continue
}
}
}
return picked
}
// Analyser determines if this lexer is appropriate for the given text.
type Analyser interface {
AnalyseText(text string) float32
@ -139,7 +158,7 @@ func Words(words ...string) string {
type Rules map[string][]Rule
// MustNewLexer creates a new Lexer or panics.
func MustNewLexer(config *Config, rules Rules) Lexer {
func MustNewLexer(config *Config, rules Rules) *RegexLexer {
lexer, err := NewLexer(config, rules)
if err != nil {
panic(err)
@ -151,7 +170,7 @@ func MustNewLexer(config *Config, rules Rules) Lexer {
//
// "rules" is a state machine transitition map. Each key is a state. Values are sets of rules
// that match input, optionally modify lexer state, and output tokens.
func NewLexer(config *Config, rules Rules) (Lexer, error) {
func NewLexer(config *Config, rules Rules) (*RegexLexer, error) {
if config == nil {
config = &Config{}
}
@ -180,7 +199,7 @@ func NewLexer(config *Config, rules Rules) (Lexer, error) {
compiledRules[state] = append(compiledRules[state], crule)
}
}
return &regexLexer{
return &RegexLexer{
config: config,
rules: compiledRules,
}, nil
@ -205,16 +224,30 @@ type LexerState struct {
Groups []string
}
type regexLexer struct {
config *Config
rules map[string][]CompiledRule
type RegexLexer struct {
config *Config
rules map[string][]CompiledRule
analyser func(text string) float32
}
func (r *regexLexer) Config() *Config {
// SetAnalyser sets the analyser function used to perform content inspection.
func (r *RegexLexer) SetAnalyser(analyser func(text string) float32) *RegexLexer {
r.analyser = analyser
return r
}
func (r *RegexLexer) AnalyseText(text string) float32 {
if r.analyser != nil {
return r.analyser(text)
}
return 0.0
}
func (r *RegexLexer) Config() *Config {
return r.config
}
func (r *regexLexer) Tokenise(options *TokeniseOptions, text string, out func(*Token)) error {
func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string, out func(*Token)) error {
if options == nil {
options = defaultOptions
}

View File

@ -2,19 +2,12 @@ package lexers
import (
"path/filepath"
"sort"
"github.com/danwakefield/fnmatch"
"github.com/alecthomas/chroma"
)
type prioritisedLexers []chroma.Lexer
func (p prioritisedLexers) Len() int { return len(p) }
func (p prioritisedLexers) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p prioritisedLexers) Less(i, j int) bool { return p[i].Config().Priority < p[j].Config().Priority }
// Registry of Lexers.
var Registry = struct {
Lexers []chroma.Lexer
@ -42,31 +35,41 @@ func Get(name string) chroma.Lexer {
if ok {
return lexer
}
return Fallback
return nil
}
// Match returns all lexers matching filename.
func Match(filename string) []chroma.Lexer {
func Match(filename string) chroma.Lexer {
filename = filepath.Base(filename)
lexers := prioritisedLexers{}
for _, lexer := range Registry.Lexers {
config := lexer.Config()
for _, glob := range config.Filenames {
if fnmatch.Match(glob, filename, 0) {
lexers = append(lexers, lexer)
break
return lexer
}
}
}
sort.Sort(lexers)
return lexers
return nil
}
// Analyse text content and return the "best" lexer..
func Analyse(text string) chroma.Lexer {
var picked chroma.Lexer
highest := float32(0.0)
for _, lexer := range Registry.Lexers {
if analyser, ok := lexer.(chroma.Analyser); ok {
weight := analyser.AnalyseText(text)
if weight > highest {
picked = lexer
highest = weight
}
}
}
return picked
}
// Register a Lexer with the global registry.
func Register(lexer chroma.Lexer, err error) chroma.Lexer {
if err != nil {
panic(err)
}
func Register(lexer chroma.Lexer) chroma.Lexer {
config := lexer.Config()
Registry.byName[config.Name] = lexer
for _, alias := range config.Aliases {

View File

@ -1,11 +1,13 @@
package lexers
import (
"strings"
. "github.com/alecthomas/chroma" // nolint
)
// Bash lexer.
var Bash = Register(NewLexer(
var Bash = Register(MustNewLexer(
&Config{
Name: "Bash",
Aliases: []string{"bash", "sh", "ksh", "zsh", "shell"},
@ -85,4 +87,11 @@ var Bash = Register(NewLexer(
Include("root"),
},
},
))
).SetAnalyser(func(content string) float32 {
if strings.HasPrefix(content, "#!/bin/sh") ||
strings.HasPrefix(content, "#!/bin/bash") ||
strings.HasPrefix(content, "#!/bin/zsh") {
return 1.0
}
return 0.0
}))

View File

@ -5,7 +5,7 @@ import (
)
// C lexer.
var C = Register(NewLexer(
var C = Register(MustNewLexer(
&Config{
Name: "C",
Aliases: []string{"c"},

View File

@ -5,7 +5,7 @@ import (
)
// CPP is a C++ lexer.
var CPP = Register(NewLexer(
var CPP = Register(MustNewLexer(
&Config{
Name: "C++",
Aliases: []string{"cpp", "c++"},

View File

@ -5,7 +5,7 @@ import (
)
// CSS lexer.
var CSS = Register(NewLexer(
var CSS = Register(MustNewLexer(
&Config{
Name: "CSS",
Aliases: []string{"css"},

View File

@ -1,17 +1,16 @@
package lexers
import (
. "github.com/alecthomas/chroma" // nolint
"github.com/alecthomas/chroma"
)
// Fallback lexer if no other is found.
var Fallback = Register(NewLexer(&Config{
var Fallback chroma.Lexer = chroma.MustNewLexer(&chroma.Config{
Name: "fallback",
Filenames: []string{"*"},
Priority: 99,
}, Rules{
"root": []Rule{
{`.+`, Text, nil},
{`\n`, Text, nil},
}, chroma.Rules{
"root": []chroma.Rule{
{`.+`, chroma.Text, nil},
{`\n`, chroma.Text, nil},
},
}))
})

View File

@ -1,11 +1,13 @@
package lexers
import (
"strings"
. "github.com/alecthomas/chroma" // nolint: golint
)
// Go lexer.
var Go = Register(NewLexer(
var Go = Register(MustNewLexer(
&Config{
Name: "Go",
Filenames: []string{"*.go"},
@ -65,4 +67,12 @@ var Go = Register(NewLexer(
{`[^\W\d]\w*`, NameOther, nil},
},
},
))
).SetAnalyser(func(text string) float32 {
if strings.Contains(text, "fmt.") {
return 0.5
}
if strings.Contains(text, "package ") {
return 0.1
}
return 0.0
}))

View File

@ -5,7 +5,7 @@ import (
)
// HTML lexer.
var HTML = Register(NewLexer(
var HTML = Register(MustNewLexer(
&Config{
Name: "HTML",
Aliases: []string{"html"},

View File

@ -4,7 +4,7 @@ import (
. "github.com/alecthomas/chroma" // nolint
)
var INI = Register(NewLexer(
var INI = Register(MustNewLexer(
&Config{
Name: "INI",
Aliases: []string{"ini", "cfg", "dosini"},

View File

@ -1,4 +1,3 @@
package lexers
import (
@ -6,47 +5,46 @@ import (
)
// Java lexer.
var Java = Register(NewLexer(
var Java = Register(MustNewLexer(
&Config{
Name: "Java",
Aliases: []string{ "java", },
Filenames: []string{ "*.java", },
MimeTypes: []string{ "text/x-java", },
DotAll: true,
Aliases: []string{"java"},
Filenames: []string{"*.java"},
MimeTypes: []string{"text/x-java"},
DotAll: true,
},
Rules{
"root": {
{ `[^\S\n]+`, Text, nil },
{ `//.*?\n`, CommentSingle, nil },
{ `/\*.*?\*/`, CommentMultiline, nil },
{ `(assert|break|case|catch|continue|default|do|else|finally|for|if|goto|instanceof|new|return|switch|this|throw|try|while)\b`, Keyword, nil },
{ `((?:(?:[^\W\d]|\$)[\w.\[\]$<>]*\s+)+?)((?:[^\W\d]|\$)[\w$]*)(\s*)(\()`, ByGroups(UsingSelf("root"), NameFunction, Text, Operator), nil },
{ `@[^\W\d][\w.]*`, NameDecorator, nil },
{ `(abstract|const|enum|extends|final|implements|native|private|protected|public|static|strictfp|super|synchronized|throws|transient|volatile)\b`, KeywordDeclaration, nil },
{ `(boolean|byte|char|double|float|int|long|short|void)\b`, KeywordType, nil },
{ `(package)(\s+)`, ByGroups(KeywordNamespace, Text), Push("import") },
{ `(true|false|null)\b`, KeywordConstant, nil },
{ `(class|interface)(\s+)`, ByGroups(KeywordDeclaration, Text), Push("class") },
{ `(import(?:\s+static)?)(\s+)`, ByGroups(KeywordNamespace, Text), Push("import") },
{ `"(\\\\|\\"|[^"])*"`, LiteralString, nil },
{ `'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'`, LiteralStringChar, nil },
{ `(\.)((?:[^\W\d]|\$)[\w$]*)`, ByGroups(Operator, NameAttribute), nil },
{ `^\s*([^\W\d]|\$)[\w$]*:`, NameLabel, nil },
{ `([^\W\d]|\$)[\w$]*`, Name, nil },
{ `([0-9][0-9_]*\.([0-9][0-9_]*)?|\.[0-9][0-9_]*)([eE][+\-]?[0-9][0-9_]*)?[fFdD]?|[0-9][eE][+\-]?[0-9][0-9_]*[fFdD]?|[0-9]([eE][+\-]?[0-9][0-9_]*)?[fFdD]|0[xX]([0-9a-fA-F][0-9a-fA-F_]*\.?|([0-9a-fA-F][0-9a-fA-F_]*)?\.[0-9a-fA-F][0-9a-fA-F_]*)[pP][+\-]?[0-9][0-9_]*[fFdD]?`, LiteralNumberFloat, nil },
{ `0[xX][0-9a-fA-F][0-9a-fA-F_]*[lL]?`, LiteralNumberHex, nil },
{ `0[bB][01][01_]*[lL]?`, LiteralNumberBin, nil },
{ `0[0-7_]+[lL]?`, LiteralNumberOct, nil },
{ `0|[1-9][0-9_]*[lL]?`, LiteralNumberInteger, nil },
{ `[~^*!%&\[\](){}<>|+=:;,./?-]`, Operator, nil },
{ `\n`, Text, nil },
{`[^\S\n]+`, Text, nil},
{`//.*?\n`, CommentSingle, nil},
{`/\*.*?\*/`, CommentMultiline, nil},
{`(assert|break|case|catch|continue|default|do|else|finally|for|if|goto|instanceof|new|return|switch|this|throw|try|while)\b`, Keyword, nil},
{`((?:(?:[^\W\d]|\$)[\w.\[\]$<>]*\s+)+?)((?:[^\W\d]|\$)[\w$]*)(\s*)(\()`, ByGroups(UsingSelf("root"), NameFunction, Text, Operator), nil},
{`@[^\W\d][\w.]*`, NameDecorator, nil},
{`(abstract|const|enum|extends|final|implements|native|private|protected|public|static|strictfp|super|synchronized|throws|transient|volatile)\b`, KeywordDeclaration, nil},
{`(boolean|byte|char|double|float|int|long|short|void)\b`, KeywordType, nil},
{`(package)(\s+)`, ByGroups(KeywordNamespace, Text), Push("import")},
{`(true|false|null)\b`, KeywordConstant, nil},
{`(class|interface)(\s+)`, ByGroups(KeywordDeclaration, Text), Push("class")},
{`(import(?:\s+static)?)(\s+)`, ByGroups(KeywordNamespace, Text), Push("import")},
{`"(\\\\|\\"|[^"])*"`, LiteralString, nil},
{`'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'`, LiteralStringChar, nil},
{`(\.)((?:[^\W\d]|\$)[\w$]*)`, ByGroups(Operator, NameAttribute), nil},
{`^\s*([^\W\d]|\$)[\w$]*:`, NameLabel, nil},
{`([^\W\d]|\$)[\w$]*`, Name, nil},
{`([0-9][0-9_]*\.([0-9][0-9_]*)?|\.[0-9][0-9_]*)([eE][+\-]?[0-9][0-9_]*)?[fFdD]?|[0-9][eE][+\-]?[0-9][0-9_]*[fFdD]?|[0-9]([eE][+\-]?[0-9][0-9_]*)?[fFdD]|0[xX]([0-9a-fA-F][0-9a-fA-F_]*\.?|([0-9a-fA-F][0-9a-fA-F_]*)?\.[0-9a-fA-F][0-9a-fA-F_]*)[pP][+\-]?[0-9][0-9_]*[fFdD]?`, LiteralNumberFloat, nil},
{`0[xX][0-9a-fA-F][0-9a-fA-F_]*[lL]?`, LiteralNumberHex, nil},
{`0[bB][01][01_]*[lL]?`, LiteralNumberBin, nil},
{`0[0-7_]+[lL]?`, LiteralNumberOct, nil},
{`0|[1-9][0-9_]*[lL]?`, LiteralNumberInteger, nil},
{`[~^*!%&\[\](){}<>|+=:;,./?-]`, Operator, nil},
{`\n`, Text, nil},
},
"class": {
{ `([^\W\d]|\$)[\w$]*`, NameClass, Pop(1) },
{`([^\W\d]|\$)[\w$]*`, NameClass, Pop(1)},
},
"import": {
{ `[\w.]+\*?`, NameNamespace, Pop(1) },
{`[\w.]+\*?`, NameNamespace, Pop(1)},
},
},
))

View File

@ -5,7 +5,7 @@ import (
)
// JavaScript lexer.
var JavaScript = Register(NewLexer(
var JavaScript = Register(MustNewLexer(
&Config{
Name: "JavaScript",
Aliases: []string{"js", "javascript"},

View File

@ -5,7 +5,7 @@ import (
)
// Makefile lexer.
var Makefile = Register(NewLexer(
var Makefile = Register(MustNewLexer(
&Config{
Name: "Makefile",
Aliases: []string{"make", "makefile", "mf", "bsdmake"},

View File

@ -5,7 +5,7 @@ import (
)
// Markdown lexer.
var Markdown = Register(NewLexer(
var Markdown = Register(MustNewLexer(
&Config{
Name: "markdown",
Aliases: []string{"md"},

View File

@ -5,7 +5,7 @@ import (
)
// Php lexer.
var Php = Register(NewLexer(
var Php = Register(MustNewLexer(
&Config{
Name: "PHP",
Aliases: []string{"php", "php3", "php4", "php5"},

View File

@ -5,7 +5,7 @@ import (
)
// Python lexer.
var Python = Register(NewLexer(
var Python = Register(MustNewLexer(
&Config{
Name: "Python",
Aliases: []string{"python", "py", "sage"},

View File

@ -5,7 +5,7 @@ import (
)
// SQL lexer.
var SQL = Register(NewLexer(
var SQL = Register(MustNewLexer(
&Config{
Name: "SQL",
Aliases: []string{"sql", "mysql", "postgresql", "postgres", "mssql"},

View File

@ -4,51 +4,51 @@ import (
"github.com/alecthomas/chroma"
)
// Pygments theme.
// Pygments default theme.
var Pygments = Register(chroma.NewStyle("pygments", map[chroma.TokenType]string{
chroma.Whitespace: "#bbbbbb",
chroma.Comment: "italic #408080",
chroma.CommentPreproc: "noitalic #BC7A00",
chroma.Whitespace: "#bbbbbb",
chroma.Comment: "italic #408080",
chroma.CommentPreproc: "noitalic #BC7A00",
chroma.Keyword: "bold #008000",
chroma.KeywordPseudo: "nobold",
chroma.KeywordType: "nobold #B00040",
chroma.Keyword: "bold #008000",
chroma.KeywordPseudo: "nobold",
chroma.KeywordType: "nobold #B00040",
chroma.Operator: "#666666",
chroma.OperatorWord: "bold #AA22FF",
chroma.Operator: "#666666",
chroma.OperatorWord: "bold #AA22FF",
chroma.NameBuiltin: "#008000",
chroma.NameFunction: "#0000FF",
chroma.NameClass: "bold #0000FF",
chroma.NameNamespace: "bold #0000FF",
chroma.NameException: "bold #D2413A",
chroma.NameVariable: "#19177C",
chroma.NameConstant: "#880000",
chroma.NameLabel: "#A0A000",
chroma.NameEntity: "bold #999999",
chroma.NameAttribute: "#7D9029",
chroma.NameTag: "bold #008000",
chroma.NameDecorator: "#AA22FF",
chroma.NameBuiltin: "#008000",
chroma.NameFunction: "#0000FF",
chroma.NameClass: "bold #0000FF",
chroma.NameNamespace: "bold #0000FF",
chroma.NameException: "bold #D2413A",
chroma.NameVariable: "#19177C",
chroma.NameConstant: "#880000",
chroma.NameLabel: "#A0A000",
chroma.NameEntity: "bold #999999",
chroma.NameAttribute: "#7D9029",
chroma.NameTag: "bold #008000",
chroma.NameDecorator: "#AA22FF",
chroma.String: "#BA2121",
chroma.StringDoc: "italic",
chroma.StringInterpol: "bold #BB6688",
chroma.StringEscape: "bold #BB6622",
chroma.StringRegex: "#BB6688",
chroma.StringSymbol: "#19177C",
chroma.StringOther: "#008000",
chroma.Number: "#666666",
chroma.String: "#BA2121",
chroma.StringDoc: "italic",
chroma.StringInterpol: "bold #BB6688",
chroma.StringEscape: "bold #BB6622",
chroma.StringRegex: "#BB6688",
chroma.StringSymbol: "#19177C",
chroma.StringOther: "#008000",
chroma.Number: "#666666",
chroma.GenericHeading: "bold #000080",
chroma.GenericSubheading: "bold #800080",
chroma.GenericDeleted: "#A00000",
chroma.GenericInserted: "#00A000",
chroma.GenericError: "#FF0000",
chroma.GenericEmph: "italic",
chroma.GenericStrong: "bold",
chroma.GenericPrompt: "bold #000080",
chroma.GenericOutput: "#888",
chroma.GenericTraceback: "#04D",
chroma.GenericHeading: "bold #000080",
chroma.GenericSubheading: "bold #800080",
chroma.GenericDeleted: "#A00000",
chroma.GenericInserted: "#00A000",
chroma.GenericError: "#FF0000",
chroma.GenericEmph: "italic",
chroma.GenericStrong: "bold",
chroma.GenericPrompt: "bold #000080",
chroma.GenericOutput: "#888",
chroma.GenericTraceback: "#04D",
chroma.Error: "border:#FF0000"
}))
chroma.Error: "border:#FF0000",
}))