1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-03-25 21:39:02 +02:00

Implement compile-time lexer mutators.

This should fix #15.
This commit is contained in:
Alec Thomas 2017-09-21 19:59:10 +10:00
parent 60797cc03f
commit 1724aab879
5 changed files with 155 additions and 127 deletions

View File

@ -208,8 +208,7 @@ func (f *Formatter) styleToCSS(style *chroma.Style) map[chroma.TokenType]string
bg := style.Get(chroma.Background)
classes := map[chroma.TokenType]string{}
// Convert the style.
for t := range style.Entries {
e := style.Entries[t]
for t, e := range style.Entries {
if t != chroma.Background {
e = e.Sub(bg)
}

View File

@ -11,6 +11,12 @@ type Mutator interface {
Mutate(state *LexerState) error
}
// A LexerMutator is an additional interface that a Mutator can implement
// to modify the lexer when it is compiled.
type LexerMutator interface {
MutateLexer(lexer *RegexLexer, rule *CompiledRule) error
}
// A MutatorFunc is a Mutator that mutates the lexer state machine as it is processing.
type MutatorFunc func(state *LexerState) error
@ -44,25 +50,32 @@ func Include(state string) Rule {
}
}
// Combined creates a new anonymous state from the given states, and pushes that state.
func Combined(states ...string) MutatorFunc {
return func(s *LexerState) error {
name := "__combined_" + strings.Join(states, "__")
if _, ok := s.Rules[name]; !ok {
combined := []CompiledRule{}
for _, state := range states {
rules, ok := s.Rules[state]
if !ok {
return fmt.Errorf("invalid combine state %q", state)
}
combined = append(combined, rules...)
type combinedMutator struct {
states []string
}
func (c *combinedMutator) Mutate(s *LexerState) error { return nil }
func (c *combinedMutator) MutateLexer(lexer *RegexLexer, rule *CompiledRule) error {
name := "__combined_" + strings.Join(c.states, "__")
if _, ok := lexer.rules[name]; !ok {
combined := []*CompiledRule{}
for _, state := range c.states {
rules, ok := lexer.rules[state]
if !ok {
return fmt.Errorf("invalid combine state %q", state)
}
s.Rules[name] = combined
combined = append(combined, rules...)
}
s.Rules[s.State][s.Rule].Mutator = Push(name)
s.Stack = append(s.Stack, name)
return nil
lexer.rules[name] = combined
}
rule.Mutator = Push(name)
return nil
}
// Combined creates a new anonymous state from the given states, and pushes that state.
func Combined(states ...string) Mutator {
return &combinedMutator{states}
}
// Push states onto the stack.

View File

@ -3,6 +3,7 @@ package chroma
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -10,14 +11,14 @@ func TestInclude(t *testing.T) {
include := Include("other")
actual := CompiledRules{
"root": {
CompiledRule{Rule: include},
{Rule: include},
},
"other": {
CompiledRule{Rule: Rule{
{Rule: Rule{
Pattern: "//.+",
Type: Comment,
}},
CompiledRule{Rule: Rule{
{Rule: Rule{
Pattern: `"[^"]*"`,
Type: String,
}},
@ -31,21 +32,21 @@ func TestInclude(t *testing.T) {
require.NoError(t, err)
expected := CompiledRules{
"root": {
CompiledRule{Rule: Rule{
{Rule: Rule{
Pattern: "//.+",
Type: Comment,
}},
CompiledRule{Rule: Rule{
{Rule: Rule{
Pattern: `"[^"]*"`,
Type: String,
}},
},
"other": {
CompiledRule{Rule: Rule{
{Rule: Rule{
Pattern: "//.+",
Type: Comment,
}},
CompiledRule{Rule: Rule{
{Rule: Rule{
Pattern: `"[^"]*"`,
Type: String,
}},
@ -53,3 +54,16 @@ func TestInclude(t *testing.T) {
}
require.Equal(t, expected, actual)
}
func TestCombine(t *testing.T) {
l := MustNewLexer(nil, Rules{
"root": {{`hello`, String, Combined("world", "bye", "space")}},
"world": {{`world`, Name, nil}},
"bye": {{`bye`, Name, nil}},
"space": {{`\s+`, Whitespace, nil}},
})
it, err := l.Tokenise(nil, "hello world")
require.NoError(t, err)
expected := []*Token{{String, `hello`}, {Whitespace, ` `}, {Name, `world`}}
assert.Equal(t, expected, it.Tokens())
}

View File

@ -108,7 +108,7 @@ func NewLexer(config *Config, rules Rules) (*RegexLexer, error) {
if _, ok := rules["root"]; !ok {
return nil, fmt.Errorf("no \"root\" state")
}
compiledRules := map[string][]CompiledRule{}
compiledRules := map[string][]*CompiledRule{}
for state, rules := range rules {
for _, rule := range rules {
flags := ""
@ -121,7 +121,7 @@ func NewLexer(config *Config, rules Rules) (*RegexLexer, error) {
if config.DotAll {
flags += "s"
}
compiledRules[state] = append(compiledRules[state], CompiledRule{Rule: rule, flags: flags})
compiledRules[state] = append(compiledRules[state], &CompiledRule{Rule: rule, flags: flags})
}
}
return &RegexLexer{
@ -144,13 +144,13 @@ type CompiledRule struct {
flags string
}
type CompiledRules map[string][]CompiledRule
type CompiledRules map[string][]*CompiledRule
type LexerState struct {
Lexer *RegexLexer
Text []rune
Pos int
Rules map[string][]CompiledRule
Rules CompiledRules
Stack []string
State string
Rule int
@ -234,7 +234,7 @@ type RegexLexer struct {
mu sync.Mutex
compiled bool
rules map[string][]CompiledRule
rules map[string][]*CompiledRule
}
// SetAnalyser sets the analyser function used to perform content inspection.
@ -269,7 +269,11 @@ func (r *RegexLexer) maybeCompile() (err error) {
return fmt.Errorf("failed to compile rule %s.%d: %s", state, i, err)
}
}
rules[i] = rule
if compile, ok := rule.Mutator.(LexerMutator); ok {
if err := compile.MutateLexer(r, rule); err != nil {
return err
}
}
}
}
r.compiled = true
@ -293,7 +297,7 @@ func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string) (Iterator,
return state.Iterator(), nil
}
func matchRules(text []rune, rules []CompiledRule) (int, CompiledRule, []string) {
func matchRules(text []rune, rules []*CompiledRule) (int, *CompiledRule, []string) {
for i, rule := range rules {
match, err := rule.Regexp.FindRunesMatch(text)
if match != nil && err == nil {
@ -304,5 +308,5 @@ func matchRules(text []rune, rules []CompiledRule) (int, CompiledRule, []string)
return i, rule, groups
}
}
return 0, CompiledRule{}, nil
return 0, &CompiledRule{}, nil
}

View File

@ -4,102 +4,100 @@ package chroma
import "fmt"
const _TokenType_name = "EOFNoneOtherErrorHighlightLineHighlightLineNumbersBackgroundKeywordKeywordConstantKeywordDeclarationKeywordNamespaceKeywordPseudoKeywordReservedKeywordTypeNameNameAttributeNameBuiltinNameBuiltinPseudoNameClassNameConstantNameDecoratorNameEntityNameExceptionNameFunctionNameFunctionMagicNameKeywordNameLabelNameNamespaceNameOperatorNameOtherNamePseudoNamePropertyNameTagNameVariableNameVariableAnonymousNameVariableClassNameVariableGlobalNameVariableInstanceNameVariableMagicLiteralLiteralDateLiteralOtherLiteralStringLiteralStringAffixLiteralStringAtomLiteralStringBacktickLiteralStringBooleanLiteralStringCharLiteralStringDelimiterLiteralStringDocLiteralStringDoubleLiteralStringEscapeLiteralStringHeredocLiteralStringInterpolLiteralStringNameLiteralStringOtherLiteralStringRegexLiteralStringSingleLiteralStringSymbolLiteralNumberLiteralNumberBinLiteralNumberFloatLiteralNumberHexLiteralNumberIntegerLiteralNumberIntegerLongLiteralNumberOctOperatorOperatorWordPunctuationCommentCommentHashbangCommentMultilineCommentSingleCommentSpecialCommentPreprocCommentPreprocFileGenericGenericDeletedGenericEmphGenericErrorGenericHeadingGenericInsertedGenericOutputGenericPromptGenericStrongGenericSubheadingGenericTracebackGenericUnderlineTextTextWhitespaceTextSymbolTextPunctuation"
const _TokenType_name = "NoneOtherErrorLineHighlightLineNumbersBackgroundKeywordKeywordConstantKeywordDeclarationKeywordNamespaceKeywordPseudoKeywordReservedKeywordTypeNameNameAttributeNameBuiltinNameBuiltinPseudoNameClassNameConstantNameDecoratorNameEntityNameExceptionNameFunctionNameFunctionMagicNameKeywordNameLabelNameNamespaceNameOperatorNameOtherNamePseudoNamePropertyNameTagNameVariableNameVariableAnonymousNameVariableClassNameVariableGlobalNameVariableInstanceNameVariableMagicLiteralLiteralDateLiteralOtherLiteralStringLiteralStringAffixLiteralStringAtomLiteralStringBacktickLiteralStringBooleanLiteralStringCharLiteralStringDelimiterLiteralStringDocLiteralStringDoubleLiteralStringEscapeLiteralStringHeredocLiteralStringInterpolLiteralStringNameLiteralStringOtherLiteralStringRegexLiteralStringSingleLiteralStringSymbolLiteralNumberLiteralNumberBinLiteralNumberFloatLiteralNumberHexLiteralNumberIntegerLiteralNumberIntegerLongLiteralNumberOctOperatorOperatorWordPunctuationCommentCommentHashbangCommentMultilineCommentSingleCommentSpecialCommentPreprocCommentPreprocFileGenericGenericDeletedGenericEmphGenericErrorGenericHeadingGenericInsertedGenericOutputGenericPromptGenericStrongGenericSubheadingGenericTracebackGenericUnderlineTextTextWhitespaceTextSymbolTextPunctuation"
var _TokenType_map = map[TokenType]string{
-8: _TokenType_name[0:3],
-7: _TokenType_name[3:7],
-6: _TokenType_name[7:12],
-5: _TokenType_name[12:17],
-4: _TokenType_name[17:26],
-3: _TokenType_name[26:39],
-2: _TokenType_name[39:50],
-1: _TokenType_name[50:60],
1000: _TokenType_name[60:67],
1001: _TokenType_name[67:82],
1002: _TokenType_name[82:100],
1003: _TokenType_name[100:116],
1004: _TokenType_name[116:129],
1005: _TokenType_name[129:144],
1006: _TokenType_name[144:155],
2000: _TokenType_name[155:159],
2001: _TokenType_name[159:172],
2002: _TokenType_name[172:183],
2003: _TokenType_name[183:200],
2004: _TokenType_name[200:209],
2005: _TokenType_name[209:221],
2006: _TokenType_name[221:234],
2007: _TokenType_name[234:244],
2008: _TokenType_name[244:257],
2009: _TokenType_name[257:269],
2010: _TokenType_name[269:286],
2011: _TokenType_name[286:297],
2012: _TokenType_name[297:306],
2013: _TokenType_name[306:319],
2014: _TokenType_name[319:331],
2015: _TokenType_name[331:340],
2016: _TokenType_name[340:350],
2017: _TokenType_name[350:362],
2018: _TokenType_name[362:369],
2019: _TokenType_name[369:381],
2020: _TokenType_name[381:402],
2021: _TokenType_name[402:419],
2022: _TokenType_name[419:437],
2023: _TokenType_name[437:457],
2024: _TokenType_name[457:474],
3000: _TokenType_name[474:481],
3001: _TokenType_name[481:492],
3002: _TokenType_name[492:504],
3100: _TokenType_name[504:517],
3101: _TokenType_name[517:535],
3102: _TokenType_name[535:552],
3103: _TokenType_name[552:573],
3104: _TokenType_name[573:593],
3105: _TokenType_name[593:610],
3106: _TokenType_name[610:632],
3107: _TokenType_name[632:648],
3108: _TokenType_name[648:667],
3109: _TokenType_name[667:686],
3110: _TokenType_name[686:706],
3111: _TokenType_name[706:727],
3112: _TokenType_name[727:744],
3113: _TokenType_name[744:762],
3114: _TokenType_name[762:780],
3115: _TokenType_name[780:799],
3116: _TokenType_name[799:818],
3200: _TokenType_name[818:831],
3201: _TokenType_name[831:847],
3202: _TokenType_name[847:865],
3203: _TokenType_name[865:881],
3204: _TokenType_name[881:901],
3205: _TokenType_name[901:925],
3206: _TokenType_name[925:941],
4000: _TokenType_name[941:949],
4001: _TokenType_name[949:961],
5000: _TokenType_name[961:972],
6000: _TokenType_name[972:979],
6001: _TokenType_name[979:994],
6002: _TokenType_name[994:1010],
6003: _TokenType_name[1010:1023],
6004: _TokenType_name[1023:1037],
6100: _TokenType_name[1037:1051],
6101: _TokenType_name[1051:1069],
7000: _TokenType_name[1069:1076],
7001: _TokenType_name[1076:1090],
7002: _TokenType_name[1090:1101],
7003: _TokenType_name[1101:1113],
7004: _TokenType_name[1113:1127],
7005: _TokenType_name[1127:1142],
7006: _TokenType_name[1142:1155],
7007: _TokenType_name[1155:1168],
7008: _TokenType_name[1168:1181],
7009: _TokenType_name[1181:1198],
7010: _TokenType_name[1198:1214],
7011: _TokenType_name[1214:1230],
8000: _TokenType_name[1230:1234],
8001: _TokenType_name[1234:1248],
8002: _TokenType_name[1248:1258],
8003: _TokenType_name[1258:1273],
-6: _TokenType_name[0:4],
-5: _TokenType_name[4:9],
-4: _TokenType_name[9:14],
-3: _TokenType_name[14:27],
-2: _TokenType_name[27:38],
-1: _TokenType_name[38:48],
1000: _TokenType_name[48:55],
1001: _TokenType_name[55:70],
1002: _TokenType_name[70:88],
1003: _TokenType_name[88:104],
1004: _TokenType_name[104:117],
1005: _TokenType_name[117:132],
1006: _TokenType_name[132:143],
2000: _TokenType_name[143:147],
2001: _TokenType_name[147:160],
2002: _TokenType_name[160:171],
2003: _TokenType_name[171:188],
2004: _TokenType_name[188:197],
2005: _TokenType_name[197:209],
2006: _TokenType_name[209:222],
2007: _TokenType_name[222:232],
2008: _TokenType_name[232:245],
2009: _TokenType_name[245:257],
2010: _TokenType_name[257:274],
2011: _TokenType_name[274:285],
2012: _TokenType_name[285:294],
2013: _TokenType_name[294:307],
2014: _TokenType_name[307:319],
2015: _TokenType_name[319:328],
2016: _TokenType_name[328:338],
2017: _TokenType_name[338:350],
2018: _TokenType_name[350:357],
2019: _TokenType_name[357:369],
2020: _TokenType_name[369:390],
2021: _TokenType_name[390:407],
2022: _TokenType_name[407:425],
2023: _TokenType_name[425:445],
2024: _TokenType_name[445:462],
3000: _TokenType_name[462:469],
3001: _TokenType_name[469:480],
3002: _TokenType_name[480:492],
3100: _TokenType_name[492:505],
3101: _TokenType_name[505:523],
3102: _TokenType_name[523:540],
3103: _TokenType_name[540:561],
3104: _TokenType_name[561:581],
3105: _TokenType_name[581:598],
3106: _TokenType_name[598:620],
3107: _TokenType_name[620:636],
3108: _TokenType_name[636:655],
3109: _TokenType_name[655:674],
3110: _TokenType_name[674:694],
3111: _TokenType_name[694:715],
3112: _TokenType_name[715:732],
3113: _TokenType_name[732:750],
3114: _TokenType_name[750:768],
3115: _TokenType_name[768:787],
3116: _TokenType_name[787:806],
3200: _TokenType_name[806:819],
3201: _TokenType_name[819:835],
3202: _TokenType_name[835:853],
3203: _TokenType_name[853:869],
3204: _TokenType_name[869:889],
3205: _TokenType_name[889:913],
3206: _TokenType_name[913:929],
4000: _TokenType_name[929:937],
4001: _TokenType_name[937:949],
5000: _TokenType_name[949:960],
6000: _TokenType_name[960:967],
6001: _TokenType_name[967:982],
6002: _TokenType_name[982:998],
6003: _TokenType_name[998:1011],
6004: _TokenType_name[1011:1025],
6100: _TokenType_name[1025:1039],
6101: _TokenType_name[1039:1057],
7000: _TokenType_name[1057:1064],
7001: _TokenType_name[1064:1078],
7002: _TokenType_name[1078:1089],
7003: _TokenType_name[1089:1101],
7004: _TokenType_name[1101:1115],
7005: _TokenType_name[1115:1130],
7006: _TokenType_name[1130:1143],
7007: _TokenType_name[1143:1156],
7008: _TokenType_name[1156:1169],
7009: _TokenType_name[1169:1186],
7010: _TokenType_name[1186:1202],
7011: _TokenType_name[1202:1218],
8000: _TokenType_name[1218:1222],
8001: _TokenType_name[1222:1236],
8002: _TokenType_name[1236:1246],
8003: _TokenType_name[1246:1261],
}
func (i TokenType) String() string {