1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-07-15 01:14:21 +02:00

Pass *LexerState as context to emitters

Useful for accessing named capture groups and context set by
`mutators` and other field and methods LexerState provides.
This commit is contained in:
Siavash Askari Nasr
2021-05-06 14:37:30 +04:30
committed by Alec Thomas
parent 2cc8645d62
commit 225e1862d3
5 changed files with 30 additions and 28 deletions

View File

@ -37,14 +37,14 @@ func httpRules() Rules {
} }
} }
func httpContentBlock(groups []string, lexer Lexer) Iterator { func httpContentBlock(groups []string, state *LexerState) Iterator {
tokens := []Token{ tokens := []Token{
{Generic, groups[0]}, {Generic, groups[0]},
} }
return Literator(tokens...) return Literator(tokens...)
} }
func httpHeaderBlock(groups []string, lexer Lexer) Iterator { func httpHeaderBlock(groups []string, state *LexerState) Iterator {
tokens := []Token{ tokens := []Token{
{Name, groups[1]}, {Name, groups[1]},
{Text, groups[2]}, {Text, groups[2]},
@ -56,7 +56,7 @@ func httpHeaderBlock(groups []string, lexer Lexer) Iterator {
return Literator(tokens...) return Literator(tokens...)
} }
func httpContinuousHeaderBlock(groups []string, lexer Lexer) Iterator { func httpContinuousHeaderBlock(groups []string, state *LexerState) Iterator {
tokens := []Token{ tokens := []Token{
{Text, groups[1]}, {Text, groups[1]},
{Literal, groups[2]}, {Literal, groups[2]},

View File

@ -1319,7 +1319,7 @@ func makeRuleAndPushMaybe(config RuleMakingConfig) *CompiledRule {
// Emitter for colon pairs, changes token state based on key and brackets // Emitter for colon pairs, changes token state based on key and brackets
func colonPair(tokenClass TokenType) Emitter { func colonPair(tokenClass TokenType) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer) Iterator { return EmitterFunc(func(groups []string, state *LexerState) Iterator {
iterators := []Iterator{} iterators := []Iterator{}
tokens := []Token{ tokens := []Token{
{Punctuation, groups[1]}, {Punctuation, groups[1]},
@ -1343,7 +1343,7 @@ func colonPair(tokenClass TokenType) Emitter {
// Use token state to Tokenise key // Use token state to Tokenise key
if keyTokenState != "" { if keyTokenState != "" {
iterator, err := lexer.Tokenise( iterator, err := state.Lexer.Tokenise(
&TokeniseOptions{ &TokeniseOptions{
State: keyTokenState, State: keyTokenState,
Nested: true, Nested: true,
@ -1374,7 +1374,7 @@ func colonPair(tokenClass TokenType) Emitter {
// Use token state to Tokenise value // Use token state to Tokenise value
if valueTokenState != "" { if valueTokenState != "" {
iterator, err := lexer.Tokenise( iterator, err := state.Lexer.Tokenise(
&TokeniseOptions{ &TokeniseOptions{
State: valueTokenState, State: valueTokenState,
Nested: true, Nested: true,
@ -1394,7 +1394,7 @@ func colonPair(tokenClass TokenType) Emitter {
} }
// Emitter for quoting constructs, changes token state based on quote name and adverbs // Emitter for quoting constructs, changes token state based on quote name and adverbs
func quote(groups []string, lexer Lexer) Iterator { func quote(groups []string, state *LexerState) Iterator {
iterators := []Iterator{} iterators := []Iterator{}
tokens := []Token{ tokens := []Token{
{Keyword, groups[1]}, {Keyword, groups[1]},
@ -1443,7 +1443,7 @@ func quote(groups []string, lexer Lexer) Iterator {
tokenState = "Q" tokenState = "Q"
} }
iterator, err := lexer.Tokenise( iterator, err := state.Lexer.Tokenise(
&TokeniseOptions{ &TokeniseOptions{
State: tokenState, State: tokenState,
Nested: true, Nested: true,
@ -1462,9 +1462,9 @@ func quote(groups []string, lexer Lexer) Iterator {
} }
// Emitter for pod config, tokenises the properties with "colon-pair-attribute" state // Emitter for pod config, tokenises the properties with "colon-pair-attribute" state
func podConfig(groups []string, lexer Lexer) Iterator { func podConfig(groups []string, state *LexerState) Iterator {
// Tokenise pod config // Tokenise pod config
iterator, err := lexer.Tokenise( iterator, err := state.Lexer.Tokenise(
&TokeniseOptions{ &TokeniseOptions{
State: "colon-pair-attribute", State: "colon-pair-attribute",
Nested: true, Nested: true,
@ -1478,7 +1478,7 @@ func podConfig(groups []string, lexer Lexer) Iterator {
} }
// Emitter for pod code, tokenises the code based on the lang specified // Emitter for pod code, tokenises the code based on the lang specified
func podCode(groups []string, lexer Lexer) Iterator { func podCode(groups []string, state *LexerState) Iterator {
iterators := []Iterator{} iterators := []Iterator{}
tokens := []Token{ tokens := []Token{
{Keyword, groups[1]}, {Keyword, groups[1]},
@ -1496,7 +1496,7 @@ func podCode(groups []string, lexer Lexer) Iterator {
iterators = append(iterators, Literator(tokens[:4]...)) iterators = append(iterators, Literator(tokens[:4]...))
// Tokenise pod config // Tokenise pod config
iterators = append(iterators, podConfig([]string{groups[5]}, lexer)) iterators = append(iterators, podConfig([]string{groups[5]}, state))
langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(groups[5]) langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(groups[5])
var lang string var lang string

View File

@ -63,7 +63,7 @@ func restructuredtextRules() Rules {
} }
} }
func rstCodeBlock(groups []string, lexer Lexer) Iterator { func rstCodeBlock(groups []string, state *LexerState) Iterator {
iterators := []Iterator{} iterators := []Iterator{}
tokens := []Token{ tokens := []Token{
{Punctuation, groups[1]}, {Punctuation, groups[1]},
@ -75,7 +75,7 @@ func rstCodeBlock(groups []string, lexer Lexer) Iterator {
{Text, groups[7]}, {Text, groups[7]},
} }
code := strings.Join(groups[8:], "") code := strings.Join(groups[8:], "")
lexer = internal.Get(groups[6]) lexer := internal.Get(groups[6])
if lexer == nil { if lexer == nil {
tokens = append(tokens, Token{String, code}) tokens = append(tokens, Token{String, code})
iterators = append(iterators, Literator(tokens...)) iterators = append(iterators, Literator(tokens...))

View File

@ -22,25 +22,27 @@ type Rule struct {
// An Emitter takes group matches and returns tokens. // An Emitter takes group matches and returns tokens.
type Emitter interface { type Emitter interface {
// Emit tokens for the given regex groups. // Emit tokens for the given regex groups.
Emit(groups []string, lexer Lexer) Iterator Emit(groups []string, state *LexerState) Iterator
} }
// EmitterFunc is a function that is an Emitter. // EmitterFunc is a function that is an Emitter.
type EmitterFunc func(groups []string, lexer Lexer) Iterator type EmitterFunc func(groups []string, state *LexerState) Iterator
// Emit tokens for groups. // Emit tokens for groups.
func (e EmitterFunc) Emit(groups []string, lexer Lexer) Iterator { return e(groups, lexer) } func (e EmitterFunc) Emit(groups []string, state *LexerState) Iterator {
return e(groups, state)
}
// ByGroups emits a token for each matching group in the rule's regex. // ByGroups emits a token for each matching group in the rule's regex.
func ByGroups(emitters ...Emitter) Emitter { func ByGroups(emitters ...Emitter) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer) Iterator { return EmitterFunc(func(groups []string, state *LexerState) Iterator {
iterators := make([]Iterator, 0, len(groups)-1) iterators := make([]Iterator, 0, len(groups)-1)
if len(emitters) != len(groups)-1 { if len(emitters) != len(groups)-1 {
iterators = append(iterators, Error.Emit(groups, lexer)) iterators = append(iterators, Error.Emit(groups, state))
// panic(errors.Errorf("number of groups %q does not match number of emitters %v", groups, emitters)) // panic(errors.Errorf("number of groups %q does not match number of emitters %v", groups, emitters))
} else { } else {
for i, group := range groups[1:] { for i, group := range groups[1:] {
iterators = append(iterators, emitters[i].Emit([]string{group}, lexer)) iterators = append(iterators, emitters[i].Emit([]string{group}, state))
} }
} }
return Concaterator(iterators...) return Concaterator(iterators...)
@ -88,7 +90,7 @@ func ByGroups(emitters ...Emitter) Emitter {
// Note: panic's if the number emitters does not equal the number of matched // Note: panic's if the number emitters does not equal the number of matched
// groups in the regex. // groups in the regex.
func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter { func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer) Iterator { return EmitterFunc(func(groups []string, state *LexerState) Iterator {
// bounds check // bounds check
if len(emitters) != len(groups)-1 { if len(emitters) != len(groups)-1 {
panic("UsingByGroup expects number of emitters to be the same as len(groups)-1") panic("UsingByGroup expects number of emitters to be the same as len(groups)-1")
@ -107,7 +109,7 @@ func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGro
panic(err) panic(err)
} }
} else { } else {
iterators[i] = emitters[i].Emit([]string{group}, lexer) iterators[i] = emitters[i].Emit([]string{group}, state)
} }
} }
@ -117,7 +119,7 @@ func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGro
// Using returns an Emitter that uses a given Lexer for parsing and emitting. // Using returns an Emitter that uses a given Lexer for parsing and emitting.
func Using(lexer Lexer) Emitter { func Using(lexer Lexer) Emitter {
return EmitterFunc(func(groups []string, _ Lexer) Iterator { return EmitterFunc(func(groups []string, _ *LexerState) Iterator {
it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0]) it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
if err != nil { if err != nil {
panic(err) panic(err)
@ -127,9 +129,9 @@ func Using(lexer Lexer) Emitter {
} }
// UsingSelf is like Using, but uses the current Lexer. // UsingSelf is like Using, but uses the current Lexer.
func UsingSelf(state string) Emitter { func UsingSelf(stateName string) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer) Iterator { return EmitterFunc(func(groups []string, state *LexerState) Iterator {
it, err := lexer.Tokenise(&TokeniseOptions{State: state, Nested: true}, groups[0]) it, err := state.Lexer.Tokenise(&TokeniseOptions{State: stateName, Nested: true}, groups[0])
if err != nil { if err != nil {
panic(err) panic(err)
} }
@ -329,7 +331,7 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit
} }
} }
if rule.Type != nil { if rule.Type != nil {
l.iteratorStack = append(l.iteratorStack, rule.Type.Emit(l.Groups, l.Lexer)) l.iteratorStack = append(l.iteratorStack, rule.Type.Emit(l.Groups, l))
} }
} }
// Exhaust the IteratorStack, if any. // Exhaust the IteratorStack, if any.

View File

@ -342,6 +342,6 @@ func (t TokenType) InSubCategory(other TokenType) bool {
return t/100 == other/100 return t/100 == other/100
} }
func (t TokenType) Emit(groups []string, lexer Lexer) Iterator { func (t TokenType) Emit(groups []string, _ *LexerState) Iterator {
return Literator(Token{Type: t, Value: groups[0]}) return Literator(Token{Type: t, Value: groups[0]})
} }