1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-01-30 03:38:02 +02:00

Pass *LexerState as context to emitters

Useful for accessing named capture groups and context set by
`mutators` and other field and methods LexerState provides.
This commit is contained in:
Siavash Askari Nasr 2021-05-06 14:37:30 +04:30 committed by Alec Thomas
parent 2cc8645d62
commit 225e1862d3
5 changed files with 30 additions and 28 deletions

View File

@ -37,14 +37,14 @@ func httpRules() Rules {
}
}
func httpContentBlock(groups []string, lexer Lexer) Iterator {
func httpContentBlock(groups []string, state *LexerState) Iterator {
tokens := []Token{
{Generic, groups[0]},
}
return Literator(tokens...)
}
func httpHeaderBlock(groups []string, lexer Lexer) Iterator {
func httpHeaderBlock(groups []string, state *LexerState) Iterator {
tokens := []Token{
{Name, groups[1]},
{Text, groups[2]},
@ -56,7 +56,7 @@ func httpHeaderBlock(groups []string, lexer Lexer) Iterator {
return Literator(tokens...)
}
func httpContinuousHeaderBlock(groups []string, lexer Lexer) Iterator {
func httpContinuousHeaderBlock(groups []string, state *LexerState) Iterator {
tokens := []Token{
{Text, groups[1]},
{Literal, groups[2]},

View File

@ -1319,7 +1319,7 @@ func makeRuleAndPushMaybe(config RuleMakingConfig) *CompiledRule {
// Emitter for colon pairs, changes token state based on key and brackets
func colonPair(tokenClass TokenType) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
return EmitterFunc(func(groups []string, state *LexerState) Iterator {
iterators := []Iterator{}
tokens := []Token{
{Punctuation, groups[1]},
@ -1343,7 +1343,7 @@ func colonPair(tokenClass TokenType) Emitter {
// Use token state to Tokenise key
if keyTokenState != "" {
iterator, err := lexer.Tokenise(
iterator, err := state.Lexer.Tokenise(
&TokeniseOptions{
State: keyTokenState,
Nested: true,
@ -1374,7 +1374,7 @@ func colonPair(tokenClass TokenType) Emitter {
// Use token state to Tokenise value
if valueTokenState != "" {
iterator, err := lexer.Tokenise(
iterator, err := state.Lexer.Tokenise(
&TokeniseOptions{
State: valueTokenState,
Nested: true,
@ -1394,7 +1394,7 @@ func colonPair(tokenClass TokenType) Emitter {
}
// Emitter for quoting constructs, changes token state based on quote name and adverbs
func quote(groups []string, lexer Lexer) Iterator {
func quote(groups []string, state *LexerState) Iterator {
iterators := []Iterator{}
tokens := []Token{
{Keyword, groups[1]},
@ -1443,7 +1443,7 @@ func quote(groups []string, lexer Lexer) Iterator {
tokenState = "Q"
}
iterator, err := lexer.Tokenise(
iterator, err := state.Lexer.Tokenise(
&TokeniseOptions{
State: tokenState,
Nested: true,
@ -1462,9 +1462,9 @@ func quote(groups []string, lexer Lexer) Iterator {
}
// Emitter for pod config, tokenises the properties with "colon-pair-attribute" state
func podConfig(groups []string, lexer Lexer) Iterator {
func podConfig(groups []string, state *LexerState) Iterator {
// Tokenise pod config
iterator, err := lexer.Tokenise(
iterator, err := state.Lexer.Tokenise(
&TokeniseOptions{
State: "colon-pair-attribute",
Nested: true,
@ -1478,7 +1478,7 @@ func podConfig(groups []string, lexer Lexer) Iterator {
}
// Emitter for pod code, tokenises the code based on the lang specified
func podCode(groups []string, lexer Lexer) Iterator {
func podCode(groups []string, state *LexerState) Iterator {
iterators := []Iterator{}
tokens := []Token{
{Keyword, groups[1]},
@ -1496,7 +1496,7 @@ func podCode(groups []string, lexer Lexer) Iterator {
iterators = append(iterators, Literator(tokens[:4]...))
// Tokenise pod config
iterators = append(iterators, podConfig([]string{groups[5]}, lexer))
iterators = append(iterators, podConfig([]string{groups[5]}, state))
langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(groups[5])
var lang string

View File

@ -63,7 +63,7 @@ func restructuredtextRules() Rules {
}
}
func rstCodeBlock(groups []string, lexer Lexer) Iterator {
func rstCodeBlock(groups []string, state *LexerState) Iterator {
iterators := []Iterator{}
tokens := []Token{
{Punctuation, groups[1]},
@ -75,7 +75,7 @@ func rstCodeBlock(groups []string, lexer Lexer) Iterator {
{Text, groups[7]},
}
code := strings.Join(groups[8:], "")
lexer = internal.Get(groups[6])
lexer := internal.Get(groups[6])
if lexer == nil {
tokens = append(tokens, Token{String, code})
iterators = append(iterators, Literator(tokens...))

View File

@ -22,25 +22,27 @@ type Rule struct {
// An Emitter takes group matches and returns tokens.
type Emitter interface {
// Emit tokens for the given regex groups.
Emit(groups []string, lexer Lexer) Iterator
Emit(groups []string, state *LexerState) Iterator
}
// EmitterFunc is a function that is an Emitter.
type EmitterFunc func(groups []string, lexer Lexer) Iterator
type EmitterFunc func(groups []string, state *LexerState) Iterator
// Emit tokens for groups.
func (e EmitterFunc) Emit(groups []string, lexer Lexer) Iterator { return e(groups, lexer) }
func (e EmitterFunc) Emit(groups []string, state *LexerState) Iterator {
return e(groups, state)
}
// ByGroups emits a token for each matching group in the rule's regex.
func ByGroups(emitters ...Emitter) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
return EmitterFunc(func(groups []string, state *LexerState) Iterator {
iterators := make([]Iterator, 0, len(groups)-1)
if len(emitters) != len(groups)-1 {
iterators = append(iterators, Error.Emit(groups, lexer))
iterators = append(iterators, Error.Emit(groups, state))
// panic(errors.Errorf("number of groups %q does not match number of emitters %v", groups, emitters))
} else {
for i, group := range groups[1:] {
iterators = append(iterators, emitters[i].Emit([]string{group}, lexer))
iterators = append(iterators, emitters[i].Emit([]string{group}, state))
}
}
return Concaterator(iterators...)
@ -88,7 +90,7 @@ func ByGroups(emitters ...Emitter) Emitter {
// Note: panic's if the number emitters does not equal the number of matched
// groups in the regex.
func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
return EmitterFunc(func(groups []string, state *LexerState) Iterator {
// bounds check
if len(emitters) != len(groups)-1 {
panic("UsingByGroup expects number of emitters to be the same as len(groups)-1")
@ -107,7 +109,7 @@ func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGro
panic(err)
}
} else {
iterators[i] = emitters[i].Emit([]string{group}, lexer)
iterators[i] = emitters[i].Emit([]string{group}, state)
}
}
@ -117,7 +119,7 @@ func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGro
// Using returns an Emitter that uses a given Lexer for parsing and emitting.
func Using(lexer Lexer) Emitter {
return EmitterFunc(func(groups []string, _ Lexer) Iterator {
return EmitterFunc(func(groups []string, _ *LexerState) Iterator {
it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
if err != nil {
panic(err)
@ -127,9 +129,9 @@ func Using(lexer Lexer) Emitter {
}
// UsingSelf is like Using, but uses the current Lexer.
func UsingSelf(state string) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
it, err := lexer.Tokenise(&TokeniseOptions{State: state, Nested: true}, groups[0])
func UsingSelf(stateName string) Emitter {
return EmitterFunc(func(groups []string, state *LexerState) Iterator {
it, err := state.Lexer.Tokenise(&TokeniseOptions{State: stateName, Nested: true}, groups[0])
if err != nil {
panic(err)
}
@ -329,7 +331,7 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit
}
}
if rule.Type != nil {
l.iteratorStack = append(l.iteratorStack, rule.Type.Emit(l.Groups, l.Lexer))
l.iteratorStack = append(l.iteratorStack, rule.Type.Emit(l.Groups, l))
}
}
// Exhaust the IteratorStack, if any.

View File

@ -342,6 +342,6 @@ func (t TokenType) InSubCategory(other TokenType) bool {
return t/100 == other/100
}
func (t TokenType) Emit(groups []string, lexer Lexer) Iterator {
func (t TokenType) Emit(groups []string, _ *LexerState) Iterator {
return Literator(Token{Type: t, Value: groups[0]})
}