1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-03-29 21:56:56 +02:00

Add Markdown processor. A bunch of performance improvements.

This commit is contained in:
Alec Thomas 2017-06-02 11:42:52 +10:00
parent b2fb8edf77
commit 6dd81b044b
6 changed files with 103 additions and 15 deletions

View File

@ -1,6 +1,7 @@
package main
import (
"bufio"
"fmt"
"io/ioutil"
"os"
@ -27,6 +28,8 @@ func main() {
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}
w := bufio.NewWriterSize(os.Stdout, 16384)
defer w.Flush()
formatter := formatters.Console(formatters.DefaultConsoleTheme)
for _, filename := range *filesArgs {
lexers := lexers.Registry.Match(filename)
@ -41,7 +44,7 @@ func main() {
fmt.Println(token)
}
} else {
formatter.Format(os.Stdout, tokens)
formatter.Format(w, tokens)
}
}
}

View File

@ -1,18 +1,19 @@
package formatters
import (
"bufio"
"fmt"
"io"
. "github.com/alecthomas/chroma" // nolint
"github.com/alecthomas/colour"
)
var DefaultConsoleTheme = map[TokenType]string{
Number: "^B^3",
Comment: "^5",
String: "^B^5",
Keyword: "^B^7",
Number: "\033[1m\033[33m",
Comment: "\033[36m",
String: "\033[1m\033[36m",
Keyword: "\033[1m\033[37m",
GenericHeading: "\033[1m",
GenericSubheading: "\033[1m",
}
// Console formatter.
@ -27,8 +28,6 @@ type consoleFormatter struct {
}
func (c *consoleFormatter) Format(w io.Writer, tokens []Token) error {
bw := bufio.NewWriterSize(w, 1024)
printer := colour.Colour(bw)
for _, token := range tokens {
clr, ok := c.theme[token.Type]
if !ok {
@ -36,12 +35,12 @@ func (c *consoleFormatter) Format(w io.Writer, tokens []Token) error {
if !ok {
clr, ok = c.theme[token.Type.Category()]
if !ok {
clr = "^R"
clr = "\033[0m"
}
}
}
printer.Printf(clr+"%s", token.Value)
fmt.Fprint(w, clr)
fmt.Fprint(w, token.Value)
}
bw.Flush()
return nil
}

View File

@ -80,10 +80,10 @@ type EmitterFunc func(groups []string) []Token
func (e EmitterFunc) Emit(groups []string) []Token { return e(groups) }
// ByGroups emits a token for each matching group in the rule's regex.
func ByGroups(types ...TokenType) Emitter {
func ByGroups(emitters ...Emitter) Emitter {
return EmitterFunc(func(groups []string) (out []Token) {
for i, group := range groups[1:] {
out = append(out, Token{types[i], group})
out = append(out, emitters[i].Emit([]string{group})...)
}
return
})

View File

@ -12,6 +12,7 @@ var Go = Register(NewLexer(
Aliases: []string{"go", "golang"},
MimeTypes: []string{"text/x-gosrc"},
},
// TODO: Convert this Lexer to use text/scanner
Rules{
`root`: []Rule{
{`\n`, Text, nil},

81
lexers/markdown.go Normal file
View File

@ -0,0 +1,81 @@
package lexers
import (
. "github.com/alecthomas/chroma" // nolint
)
// Markdown lexer.
var Markdown = Register(NewLexer(
&Config{
Name: "markdown",
Aliases: []string{"md"},
Filenames: []string{"*.md"},
MimeTypes: []string{"text/x-markdown"},
},
map[string][]Rule{
"root": []Rule{
// heading with pound prefix
{`^(#)([^#].+\n)`, ByGroups(GenericHeading, Text), nil},
{`^(#{2,6})(.+\n)`, ByGroups(GenericSubheading, Text), nil},
// task list
{`^(\s*)([*-] )(\[[ xX]\])( .+\n)`,
// ByGroups(Text, Keyword, Keyword, using(this, state='inline')), nil},
ByGroups(Text, Keyword, Keyword, Text), nil},
// bulleted lists
{`^(\s*)([*-])(\s)(.+\n)`,
// ByGroups(Text, Keyword, Text, using(this, state='inline')), nil},
ByGroups(Text, Keyword, Text, Text), nil},
// numbered lists
{`^(\s*)([0-9]+\.)( .+\n)`,
// ByGroups(Text, Keyword, using(this, state='inline')), nil},
ByGroups(Text, Keyword, Text), nil},
// quote
{`^(\s*>\s)(.+\n)`, ByGroups(Keyword, GenericEmph), nil},
// text block
{"^(```\n)([\\w\\W]*?)(^```$)", ByGroups(String, Text, String), nil},
// code block with language
{"^(```)(\\w+)(\n)([\\w\\W]*?)(^```$)", EmitterFunc(HandleCodeblock), nil},
Include(`inline`),
},
`inline`: []Rule{
// escape
{`\\.`, Text, nil},
// italics
{`(\s)([*_][^*_]+[*_])(\W|\n)`, ByGroups(Text, GenericEmph, Text), nil},
// bold
// warning: the following rule eats internal tags. eg. **foo _bar_ baz** bar is not italics
{`(\s)(\*\*.*\*\*)`, ByGroups(Text, GenericStrong), nil},
// strikethrough
{`(\s)(~~[^~]+~~)`, ByGroups(Text, GenericDeleted), nil},
// inline code
{"`[^`]+`", StringBacktick, nil},
// mentions and topics (twitter and github stuff)
{`[@#][\w/:]+`, NameEntity, nil},
// (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)
{`(!?\[)([^]]+)(\])(\()([^)]+)(\))`, ByGroups(Text, NameTag, Text, Text, NameAttribute, Text), nil},
// general text, must come last!
{`[^\\\s]+`, Text, nil},
{`.`, Text, nil},
},
},
))
func HandleCodeblock(groups []string) []Token {
out := []Token{
{String, groups[1]},
{String, groups[2]},
{Text, groups[3]},
}
code := groups[4]
lexer := Registry.Get(groups[2])
tokens, err := lexer.Tokenise(code)
if err == nil {
out = append(out, tokens...)
} else {
out = append(out, Token{Error, code})
}
out = append(out, Token{String, groups[5]})
return out
}

View File

@ -37,7 +37,11 @@ func (r *registry) Names(withAliases bool) []string {
// Get a Lexer by name.
func (r *registry) Get(name string) chroma.Lexer {
return r.byName[name]
lexer, ok := r.byName[name]
if ok {
return lexer
}
return Default
}
// Match returns all lexers matching filename.