1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-11-27 22:38:42 +02:00

Implement compile-time lexer mutators.

This should fix #15.
This commit is contained in:
Alec Thomas
2017-09-21 19:59:10 +10:00
parent 60797cc03f
commit 1724aab879
5 changed files with 155 additions and 127 deletions

View File

@@ -208,8 +208,7 @@ func (f *Formatter) styleToCSS(style *chroma.Style) map[chroma.TokenType]string
bg := style.Get(chroma.Background) bg := style.Get(chroma.Background)
classes := map[chroma.TokenType]string{} classes := map[chroma.TokenType]string{}
// Convert the style. // Convert the style.
for t := range style.Entries { for t, e := range style.Entries {
e := style.Entries[t]
if t != chroma.Background { if t != chroma.Background {
e = e.Sub(bg) e = e.Sub(bg)
} }

View File

@@ -11,6 +11,12 @@ type Mutator interface {
Mutate(state *LexerState) error Mutate(state *LexerState) error
} }
// A LexerMutator is an additional interface that a Mutator can implement
// to modify the lexer when it is compiled.
type LexerMutator interface {
MutateLexer(lexer *RegexLexer, rule *CompiledRule) error
}
// A MutatorFunc is a Mutator that mutates the lexer state machine as it is processing. // A MutatorFunc is a Mutator that mutates the lexer state machine as it is processing.
type MutatorFunc func(state *LexerState) error type MutatorFunc func(state *LexerState) error
@@ -44,25 +50,32 @@ func Include(state string) Rule {
} }
} }
// Combined creates a new anonymous state from the given states, and pushes that state. type combinedMutator struct {
func Combined(states ...string) MutatorFunc { states []string
return func(s *LexerState) error { }
name := "__combined_" + strings.Join(states, "__")
if _, ok := s.Rules[name]; !ok { func (c *combinedMutator) Mutate(s *LexerState) error { return nil }
combined := []CompiledRule{}
for _, state := range states { func (c *combinedMutator) MutateLexer(lexer *RegexLexer, rule *CompiledRule) error {
rules, ok := s.Rules[state] name := "__combined_" + strings.Join(c.states, "__")
if _, ok := lexer.rules[name]; !ok {
combined := []*CompiledRule{}
for _, state := range c.states {
rules, ok := lexer.rules[state]
if !ok { if !ok {
return fmt.Errorf("invalid combine state %q", state) return fmt.Errorf("invalid combine state %q", state)
} }
combined = append(combined, rules...) combined = append(combined, rules...)
} }
s.Rules[name] = combined lexer.rules[name] = combined
} }
s.Rules[s.State][s.Rule].Mutator = Push(name) rule.Mutator = Push(name)
s.Stack = append(s.Stack, name)
return nil return nil
} }
// Combined creates a new anonymous state from the given states, and pushes that state.
func Combined(states ...string) Mutator {
return &combinedMutator{states}
} }
// Push states onto the stack. // Push states onto the stack.

View File

@@ -3,6 +3,7 @@ package chroma
import ( import (
"testing" "testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
@@ -10,14 +11,14 @@ func TestInclude(t *testing.T) {
include := Include("other") include := Include("other")
actual := CompiledRules{ actual := CompiledRules{
"root": { "root": {
CompiledRule{Rule: include}, {Rule: include},
}, },
"other": { "other": {
CompiledRule{Rule: Rule{ {Rule: Rule{
Pattern: "//.+", Pattern: "//.+",
Type: Comment, Type: Comment,
}}, }},
CompiledRule{Rule: Rule{ {Rule: Rule{
Pattern: `"[^"]*"`, Pattern: `"[^"]*"`,
Type: String, Type: String,
}}, }},
@@ -31,21 +32,21 @@ func TestInclude(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
expected := CompiledRules{ expected := CompiledRules{
"root": { "root": {
CompiledRule{Rule: Rule{ {Rule: Rule{
Pattern: "//.+", Pattern: "//.+",
Type: Comment, Type: Comment,
}}, }},
CompiledRule{Rule: Rule{ {Rule: Rule{
Pattern: `"[^"]*"`, Pattern: `"[^"]*"`,
Type: String, Type: String,
}}, }},
}, },
"other": { "other": {
CompiledRule{Rule: Rule{ {Rule: Rule{
Pattern: "//.+", Pattern: "//.+",
Type: Comment, Type: Comment,
}}, }},
CompiledRule{Rule: Rule{ {Rule: Rule{
Pattern: `"[^"]*"`, Pattern: `"[^"]*"`,
Type: String, Type: String,
}}, }},
@@ -53,3 +54,16 @@ func TestInclude(t *testing.T) {
} }
require.Equal(t, expected, actual) require.Equal(t, expected, actual)
} }
func TestCombine(t *testing.T) {
l := MustNewLexer(nil, Rules{
"root": {{`hello`, String, Combined("world", "bye", "space")}},
"world": {{`world`, Name, nil}},
"bye": {{`bye`, Name, nil}},
"space": {{`\s+`, Whitespace, nil}},
})
it, err := l.Tokenise(nil, "hello world")
require.NoError(t, err)
expected := []*Token{{String, `hello`}, {Whitespace, ` `}, {Name, `world`}}
assert.Equal(t, expected, it.Tokens())
}

View File

@@ -108,7 +108,7 @@ func NewLexer(config *Config, rules Rules) (*RegexLexer, error) {
if _, ok := rules["root"]; !ok { if _, ok := rules["root"]; !ok {
return nil, fmt.Errorf("no \"root\" state") return nil, fmt.Errorf("no \"root\" state")
} }
compiledRules := map[string][]CompiledRule{} compiledRules := map[string][]*CompiledRule{}
for state, rules := range rules { for state, rules := range rules {
for _, rule := range rules { for _, rule := range rules {
flags := "" flags := ""
@@ -121,7 +121,7 @@ func NewLexer(config *Config, rules Rules) (*RegexLexer, error) {
if config.DotAll { if config.DotAll {
flags += "s" flags += "s"
} }
compiledRules[state] = append(compiledRules[state], CompiledRule{Rule: rule, flags: flags}) compiledRules[state] = append(compiledRules[state], &CompiledRule{Rule: rule, flags: flags})
} }
} }
return &RegexLexer{ return &RegexLexer{
@@ -144,13 +144,13 @@ type CompiledRule struct {
flags string flags string
} }
type CompiledRules map[string][]CompiledRule type CompiledRules map[string][]*CompiledRule
type LexerState struct { type LexerState struct {
Lexer *RegexLexer Lexer *RegexLexer
Text []rune Text []rune
Pos int Pos int
Rules map[string][]CompiledRule Rules CompiledRules
Stack []string Stack []string
State string State string
Rule int Rule int
@@ -234,7 +234,7 @@ type RegexLexer struct {
mu sync.Mutex mu sync.Mutex
compiled bool compiled bool
rules map[string][]CompiledRule rules map[string][]*CompiledRule
} }
// SetAnalyser sets the analyser function used to perform content inspection. // SetAnalyser sets the analyser function used to perform content inspection.
@@ -269,7 +269,11 @@ func (r *RegexLexer) maybeCompile() (err error) {
return fmt.Errorf("failed to compile rule %s.%d: %s", state, i, err) return fmt.Errorf("failed to compile rule %s.%d: %s", state, i, err)
} }
} }
rules[i] = rule if compile, ok := rule.Mutator.(LexerMutator); ok {
if err := compile.MutateLexer(r, rule); err != nil {
return err
}
}
} }
} }
r.compiled = true r.compiled = true
@@ -293,7 +297,7 @@ func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string) (Iterator,
return state.Iterator(), nil return state.Iterator(), nil
} }
func matchRules(text []rune, rules []CompiledRule) (int, CompiledRule, []string) { func matchRules(text []rune, rules []*CompiledRule) (int, *CompiledRule, []string) {
for i, rule := range rules { for i, rule := range rules {
match, err := rule.Regexp.FindRunesMatch(text) match, err := rule.Regexp.FindRunesMatch(text)
if match != nil && err == nil { if match != nil && err == nil {
@@ -304,5 +308,5 @@ func matchRules(text []rune, rules []CompiledRule) (int, CompiledRule, []string)
return i, rule, groups return i, rule, groups
} }
} }
return 0, CompiledRule{}, nil return 0, &CompiledRule{}, nil
} }

View File

@@ -4,102 +4,100 @@ package chroma
import "fmt" import "fmt"
const _TokenType_name = "EOFNoneOtherErrorHighlightLineHighlightLineNumbersBackgroundKeywordKeywordConstantKeywordDeclarationKeywordNamespaceKeywordPseudoKeywordReservedKeywordTypeNameNameAttributeNameBuiltinNameBuiltinPseudoNameClassNameConstantNameDecoratorNameEntityNameExceptionNameFunctionNameFunctionMagicNameKeywordNameLabelNameNamespaceNameOperatorNameOtherNamePseudoNamePropertyNameTagNameVariableNameVariableAnonymousNameVariableClassNameVariableGlobalNameVariableInstanceNameVariableMagicLiteralLiteralDateLiteralOtherLiteralStringLiteralStringAffixLiteralStringAtomLiteralStringBacktickLiteralStringBooleanLiteralStringCharLiteralStringDelimiterLiteralStringDocLiteralStringDoubleLiteralStringEscapeLiteralStringHeredocLiteralStringInterpolLiteralStringNameLiteralStringOtherLiteralStringRegexLiteralStringSingleLiteralStringSymbolLiteralNumberLiteralNumberBinLiteralNumberFloatLiteralNumberHexLiteralNumberIntegerLiteralNumberIntegerLongLiteralNumberOctOperatorOperatorWordPunctuationCommentCommentHashbangCommentMultilineCommentSingleCommentSpecialCommentPreprocCommentPreprocFileGenericGenericDeletedGenericEmphGenericErrorGenericHeadingGenericInsertedGenericOutputGenericPromptGenericStrongGenericSubheadingGenericTracebackGenericUnderlineTextTextWhitespaceTextSymbolTextPunctuation" const _TokenType_name = "NoneOtherErrorLineHighlightLineNumbersBackgroundKeywordKeywordConstantKeywordDeclarationKeywordNamespaceKeywordPseudoKeywordReservedKeywordTypeNameNameAttributeNameBuiltinNameBuiltinPseudoNameClassNameConstantNameDecoratorNameEntityNameExceptionNameFunctionNameFunctionMagicNameKeywordNameLabelNameNamespaceNameOperatorNameOtherNamePseudoNamePropertyNameTagNameVariableNameVariableAnonymousNameVariableClassNameVariableGlobalNameVariableInstanceNameVariableMagicLiteralLiteralDateLiteralOtherLiteralStringLiteralStringAffixLiteralStringAtomLiteralStringBacktickLiteralStringBooleanLiteralStringCharLiteralStringDelimiterLiteralStringDocLiteralStringDoubleLiteralStringEscapeLiteralStringHeredocLiteralStringInterpolLiteralStringNameLiteralStringOtherLiteralStringRegexLiteralStringSingleLiteralStringSymbolLiteralNumberLiteralNumberBinLiteralNumberFloatLiteralNumberHexLiteralNumberIntegerLiteralNumberIntegerLongLiteralNumberOctOperatorOperatorWordPunctuationCommentCommentHashbangCommentMultilineCommentSingleCommentSpecialCommentPreprocCommentPreprocFileGenericGenericDeletedGenericEmphGenericErrorGenericHeadingGenericInsertedGenericOutputGenericPromptGenericStrongGenericSubheadingGenericTracebackGenericUnderlineTextTextWhitespaceTextSymbolTextPunctuation"
var _TokenType_map = map[TokenType]string{ var _TokenType_map = map[TokenType]string{
-8: _TokenType_name[0:3], -6: _TokenType_name[0:4],
-7: _TokenType_name[3:7], -5: _TokenType_name[4:9],
-6: _TokenType_name[7:12], -4: _TokenType_name[9:14],
-5: _TokenType_name[12:17], -3: _TokenType_name[14:27],
-4: _TokenType_name[17:26], -2: _TokenType_name[27:38],
-3: _TokenType_name[26:39], -1: _TokenType_name[38:48],
-2: _TokenType_name[39:50], 1000: _TokenType_name[48:55],
-1: _TokenType_name[50:60], 1001: _TokenType_name[55:70],
1000: _TokenType_name[60:67], 1002: _TokenType_name[70:88],
1001: _TokenType_name[67:82], 1003: _TokenType_name[88:104],
1002: _TokenType_name[82:100], 1004: _TokenType_name[104:117],
1003: _TokenType_name[100:116], 1005: _TokenType_name[117:132],
1004: _TokenType_name[116:129], 1006: _TokenType_name[132:143],
1005: _TokenType_name[129:144], 2000: _TokenType_name[143:147],
1006: _TokenType_name[144:155], 2001: _TokenType_name[147:160],
2000: _TokenType_name[155:159], 2002: _TokenType_name[160:171],
2001: _TokenType_name[159:172], 2003: _TokenType_name[171:188],
2002: _TokenType_name[172:183], 2004: _TokenType_name[188:197],
2003: _TokenType_name[183:200], 2005: _TokenType_name[197:209],
2004: _TokenType_name[200:209], 2006: _TokenType_name[209:222],
2005: _TokenType_name[209:221], 2007: _TokenType_name[222:232],
2006: _TokenType_name[221:234], 2008: _TokenType_name[232:245],
2007: _TokenType_name[234:244], 2009: _TokenType_name[245:257],
2008: _TokenType_name[244:257], 2010: _TokenType_name[257:274],
2009: _TokenType_name[257:269], 2011: _TokenType_name[274:285],
2010: _TokenType_name[269:286], 2012: _TokenType_name[285:294],
2011: _TokenType_name[286:297], 2013: _TokenType_name[294:307],
2012: _TokenType_name[297:306], 2014: _TokenType_name[307:319],
2013: _TokenType_name[306:319], 2015: _TokenType_name[319:328],
2014: _TokenType_name[319:331], 2016: _TokenType_name[328:338],
2015: _TokenType_name[331:340], 2017: _TokenType_name[338:350],
2016: _TokenType_name[340:350], 2018: _TokenType_name[350:357],
2017: _TokenType_name[350:362], 2019: _TokenType_name[357:369],
2018: _TokenType_name[362:369], 2020: _TokenType_name[369:390],
2019: _TokenType_name[369:381], 2021: _TokenType_name[390:407],
2020: _TokenType_name[381:402], 2022: _TokenType_name[407:425],
2021: _TokenType_name[402:419], 2023: _TokenType_name[425:445],
2022: _TokenType_name[419:437], 2024: _TokenType_name[445:462],
2023: _TokenType_name[437:457], 3000: _TokenType_name[462:469],
2024: _TokenType_name[457:474], 3001: _TokenType_name[469:480],
3000: _TokenType_name[474:481], 3002: _TokenType_name[480:492],
3001: _TokenType_name[481:492], 3100: _TokenType_name[492:505],
3002: _TokenType_name[492:504], 3101: _TokenType_name[505:523],
3100: _TokenType_name[504:517], 3102: _TokenType_name[523:540],
3101: _TokenType_name[517:535], 3103: _TokenType_name[540:561],
3102: _TokenType_name[535:552], 3104: _TokenType_name[561:581],
3103: _TokenType_name[552:573], 3105: _TokenType_name[581:598],
3104: _TokenType_name[573:593], 3106: _TokenType_name[598:620],
3105: _TokenType_name[593:610], 3107: _TokenType_name[620:636],
3106: _TokenType_name[610:632], 3108: _TokenType_name[636:655],
3107: _TokenType_name[632:648], 3109: _TokenType_name[655:674],
3108: _TokenType_name[648:667], 3110: _TokenType_name[674:694],
3109: _TokenType_name[667:686], 3111: _TokenType_name[694:715],
3110: _TokenType_name[686:706], 3112: _TokenType_name[715:732],
3111: _TokenType_name[706:727], 3113: _TokenType_name[732:750],
3112: _TokenType_name[727:744], 3114: _TokenType_name[750:768],
3113: _TokenType_name[744:762], 3115: _TokenType_name[768:787],
3114: _TokenType_name[762:780], 3116: _TokenType_name[787:806],
3115: _TokenType_name[780:799], 3200: _TokenType_name[806:819],
3116: _TokenType_name[799:818], 3201: _TokenType_name[819:835],
3200: _TokenType_name[818:831], 3202: _TokenType_name[835:853],
3201: _TokenType_name[831:847], 3203: _TokenType_name[853:869],
3202: _TokenType_name[847:865], 3204: _TokenType_name[869:889],
3203: _TokenType_name[865:881], 3205: _TokenType_name[889:913],
3204: _TokenType_name[881:901], 3206: _TokenType_name[913:929],
3205: _TokenType_name[901:925], 4000: _TokenType_name[929:937],
3206: _TokenType_name[925:941], 4001: _TokenType_name[937:949],
4000: _TokenType_name[941:949], 5000: _TokenType_name[949:960],
4001: _TokenType_name[949:961], 6000: _TokenType_name[960:967],
5000: _TokenType_name[961:972], 6001: _TokenType_name[967:982],
6000: _TokenType_name[972:979], 6002: _TokenType_name[982:998],
6001: _TokenType_name[979:994], 6003: _TokenType_name[998:1011],
6002: _TokenType_name[994:1010], 6004: _TokenType_name[1011:1025],
6003: _TokenType_name[1010:1023], 6100: _TokenType_name[1025:1039],
6004: _TokenType_name[1023:1037], 6101: _TokenType_name[1039:1057],
6100: _TokenType_name[1037:1051], 7000: _TokenType_name[1057:1064],
6101: _TokenType_name[1051:1069], 7001: _TokenType_name[1064:1078],
7000: _TokenType_name[1069:1076], 7002: _TokenType_name[1078:1089],
7001: _TokenType_name[1076:1090], 7003: _TokenType_name[1089:1101],
7002: _TokenType_name[1090:1101], 7004: _TokenType_name[1101:1115],
7003: _TokenType_name[1101:1113], 7005: _TokenType_name[1115:1130],
7004: _TokenType_name[1113:1127], 7006: _TokenType_name[1130:1143],
7005: _TokenType_name[1127:1142], 7007: _TokenType_name[1143:1156],
7006: _TokenType_name[1142:1155], 7008: _TokenType_name[1156:1169],
7007: _TokenType_name[1155:1168], 7009: _TokenType_name[1169:1186],
7008: _TokenType_name[1168:1181], 7010: _TokenType_name[1186:1202],
7009: _TokenType_name[1181:1198], 7011: _TokenType_name[1202:1218],
7010: _TokenType_name[1198:1214], 8000: _TokenType_name[1218:1222],
7011: _TokenType_name[1214:1230], 8001: _TokenType_name[1222:1236],
8000: _TokenType_name[1230:1234], 8002: _TokenType_name[1236:1246],
8001: _TokenType_name[1234:1248], 8003: _TokenType_name[1246:1261],
8002: _TokenType_name[1248:1258],
8003: _TokenType_name[1258:1273],
} }
func (i TokenType) String() string { func (i TokenType) String() string {