mirror of
https://github.com/alecthomas/chroma.git
synced 2025-07-15 01:14:21 +02:00
Pass *LexerState
as context to emitters
Useful for accessing named capture groups and context set by `mutators` and other field and methods LexerState provides.
This commit is contained in:
committed by
Alec Thomas
parent
2cc8645d62
commit
225e1862d3
@ -37,14 +37,14 @@ func httpRules() Rules {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func httpContentBlock(groups []string, lexer Lexer) Iterator {
|
func httpContentBlock(groups []string, state *LexerState) Iterator {
|
||||||
tokens := []Token{
|
tokens := []Token{
|
||||||
{Generic, groups[0]},
|
{Generic, groups[0]},
|
||||||
}
|
}
|
||||||
return Literator(tokens...)
|
return Literator(tokens...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func httpHeaderBlock(groups []string, lexer Lexer) Iterator {
|
func httpHeaderBlock(groups []string, state *LexerState) Iterator {
|
||||||
tokens := []Token{
|
tokens := []Token{
|
||||||
{Name, groups[1]},
|
{Name, groups[1]},
|
||||||
{Text, groups[2]},
|
{Text, groups[2]},
|
||||||
@ -56,7 +56,7 @@ func httpHeaderBlock(groups []string, lexer Lexer) Iterator {
|
|||||||
return Literator(tokens...)
|
return Literator(tokens...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func httpContinuousHeaderBlock(groups []string, lexer Lexer) Iterator {
|
func httpContinuousHeaderBlock(groups []string, state *LexerState) Iterator {
|
||||||
tokens := []Token{
|
tokens := []Token{
|
||||||
{Text, groups[1]},
|
{Text, groups[1]},
|
||||||
{Literal, groups[2]},
|
{Literal, groups[2]},
|
||||||
|
@ -1319,7 +1319,7 @@ func makeRuleAndPushMaybe(config RuleMakingConfig) *CompiledRule {
|
|||||||
|
|
||||||
// Emitter for colon pairs, changes token state based on key and brackets
|
// Emitter for colon pairs, changes token state based on key and brackets
|
||||||
func colonPair(tokenClass TokenType) Emitter {
|
func colonPair(tokenClass TokenType) Emitter {
|
||||||
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
|
return EmitterFunc(func(groups []string, state *LexerState) Iterator {
|
||||||
iterators := []Iterator{}
|
iterators := []Iterator{}
|
||||||
tokens := []Token{
|
tokens := []Token{
|
||||||
{Punctuation, groups[1]},
|
{Punctuation, groups[1]},
|
||||||
@ -1343,7 +1343,7 @@ func colonPair(tokenClass TokenType) Emitter {
|
|||||||
|
|
||||||
// Use token state to Tokenise key
|
// Use token state to Tokenise key
|
||||||
if keyTokenState != "" {
|
if keyTokenState != "" {
|
||||||
iterator, err := lexer.Tokenise(
|
iterator, err := state.Lexer.Tokenise(
|
||||||
&TokeniseOptions{
|
&TokeniseOptions{
|
||||||
State: keyTokenState,
|
State: keyTokenState,
|
||||||
Nested: true,
|
Nested: true,
|
||||||
@ -1374,7 +1374,7 @@ func colonPair(tokenClass TokenType) Emitter {
|
|||||||
|
|
||||||
// Use token state to Tokenise value
|
// Use token state to Tokenise value
|
||||||
if valueTokenState != "" {
|
if valueTokenState != "" {
|
||||||
iterator, err := lexer.Tokenise(
|
iterator, err := state.Lexer.Tokenise(
|
||||||
&TokeniseOptions{
|
&TokeniseOptions{
|
||||||
State: valueTokenState,
|
State: valueTokenState,
|
||||||
Nested: true,
|
Nested: true,
|
||||||
@ -1394,7 +1394,7 @@ func colonPair(tokenClass TokenType) Emitter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Emitter for quoting constructs, changes token state based on quote name and adverbs
|
// Emitter for quoting constructs, changes token state based on quote name and adverbs
|
||||||
func quote(groups []string, lexer Lexer) Iterator {
|
func quote(groups []string, state *LexerState) Iterator {
|
||||||
iterators := []Iterator{}
|
iterators := []Iterator{}
|
||||||
tokens := []Token{
|
tokens := []Token{
|
||||||
{Keyword, groups[1]},
|
{Keyword, groups[1]},
|
||||||
@ -1443,7 +1443,7 @@ func quote(groups []string, lexer Lexer) Iterator {
|
|||||||
tokenState = "Q"
|
tokenState = "Q"
|
||||||
}
|
}
|
||||||
|
|
||||||
iterator, err := lexer.Tokenise(
|
iterator, err := state.Lexer.Tokenise(
|
||||||
&TokeniseOptions{
|
&TokeniseOptions{
|
||||||
State: tokenState,
|
State: tokenState,
|
||||||
Nested: true,
|
Nested: true,
|
||||||
@ -1462,9 +1462,9 @@ func quote(groups []string, lexer Lexer) Iterator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Emitter for pod config, tokenises the properties with "colon-pair-attribute" state
|
// Emitter for pod config, tokenises the properties with "colon-pair-attribute" state
|
||||||
func podConfig(groups []string, lexer Lexer) Iterator {
|
func podConfig(groups []string, state *LexerState) Iterator {
|
||||||
// Tokenise pod config
|
// Tokenise pod config
|
||||||
iterator, err := lexer.Tokenise(
|
iterator, err := state.Lexer.Tokenise(
|
||||||
&TokeniseOptions{
|
&TokeniseOptions{
|
||||||
State: "colon-pair-attribute",
|
State: "colon-pair-attribute",
|
||||||
Nested: true,
|
Nested: true,
|
||||||
@ -1478,7 +1478,7 @@ func podConfig(groups []string, lexer Lexer) Iterator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Emitter for pod code, tokenises the code based on the lang specified
|
// Emitter for pod code, tokenises the code based on the lang specified
|
||||||
func podCode(groups []string, lexer Lexer) Iterator {
|
func podCode(groups []string, state *LexerState) Iterator {
|
||||||
iterators := []Iterator{}
|
iterators := []Iterator{}
|
||||||
tokens := []Token{
|
tokens := []Token{
|
||||||
{Keyword, groups[1]},
|
{Keyword, groups[1]},
|
||||||
@ -1496,7 +1496,7 @@ func podCode(groups []string, lexer Lexer) Iterator {
|
|||||||
iterators = append(iterators, Literator(tokens[:4]...))
|
iterators = append(iterators, Literator(tokens[:4]...))
|
||||||
|
|
||||||
// Tokenise pod config
|
// Tokenise pod config
|
||||||
iterators = append(iterators, podConfig([]string{groups[5]}, lexer))
|
iterators = append(iterators, podConfig([]string{groups[5]}, state))
|
||||||
|
|
||||||
langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(groups[5])
|
langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(groups[5])
|
||||||
var lang string
|
var lang string
|
||||||
|
@ -63,7 +63,7 @@ func restructuredtextRules() Rules {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func rstCodeBlock(groups []string, lexer Lexer) Iterator {
|
func rstCodeBlock(groups []string, state *LexerState) Iterator {
|
||||||
iterators := []Iterator{}
|
iterators := []Iterator{}
|
||||||
tokens := []Token{
|
tokens := []Token{
|
||||||
{Punctuation, groups[1]},
|
{Punctuation, groups[1]},
|
||||||
@ -75,7 +75,7 @@ func rstCodeBlock(groups []string, lexer Lexer) Iterator {
|
|||||||
{Text, groups[7]},
|
{Text, groups[7]},
|
||||||
}
|
}
|
||||||
code := strings.Join(groups[8:], "")
|
code := strings.Join(groups[8:], "")
|
||||||
lexer = internal.Get(groups[6])
|
lexer := internal.Get(groups[6])
|
||||||
if lexer == nil {
|
if lexer == nil {
|
||||||
tokens = append(tokens, Token{String, code})
|
tokens = append(tokens, Token{String, code})
|
||||||
iterators = append(iterators, Literator(tokens...))
|
iterators = append(iterators, Literator(tokens...))
|
||||||
|
28
regexp.go
28
regexp.go
@ -22,25 +22,27 @@ type Rule struct {
|
|||||||
// An Emitter takes group matches and returns tokens.
|
// An Emitter takes group matches and returns tokens.
|
||||||
type Emitter interface {
|
type Emitter interface {
|
||||||
// Emit tokens for the given regex groups.
|
// Emit tokens for the given regex groups.
|
||||||
Emit(groups []string, lexer Lexer) Iterator
|
Emit(groups []string, state *LexerState) Iterator
|
||||||
}
|
}
|
||||||
|
|
||||||
// EmitterFunc is a function that is an Emitter.
|
// EmitterFunc is a function that is an Emitter.
|
||||||
type EmitterFunc func(groups []string, lexer Lexer) Iterator
|
type EmitterFunc func(groups []string, state *LexerState) Iterator
|
||||||
|
|
||||||
// Emit tokens for groups.
|
// Emit tokens for groups.
|
||||||
func (e EmitterFunc) Emit(groups []string, lexer Lexer) Iterator { return e(groups, lexer) }
|
func (e EmitterFunc) Emit(groups []string, state *LexerState) Iterator {
|
||||||
|
return e(groups, state)
|
||||||
|
}
|
||||||
|
|
||||||
// ByGroups emits a token for each matching group in the rule's regex.
|
// ByGroups emits a token for each matching group in the rule's regex.
|
||||||
func ByGroups(emitters ...Emitter) Emitter {
|
func ByGroups(emitters ...Emitter) Emitter {
|
||||||
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
|
return EmitterFunc(func(groups []string, state *LexerState) Iterator {
|
||||||
iterators := make([]Iterator, 0, len(groups)-1)
|
iterators := make([]Iterator, 0, len(groups)-1)
|
||||||
if len(emitters) != len(groups)-1 {
|
if len(emitters) != len(groups)-1 {
|
||||||
iterators = append(iterators, Error.Emit(groups, lexer))
|
iterators = append(iterators, Error.Emit(groups, state))
|
||||||
// panic(errors.Errorf("number of groups %q does not match number of emitters %v", groups, emitters))
|
// panic(errors.Errorf("number of groups %q does not match number of emitters %v", groups, emitters))
|
||||||
} else {
|
} else {
|
||||||
for i, group := range groups[1:] {
|
for i, group := range groups[1:] {
|
||||||
iterators = append(iterators, emitters[i].Emit([]string{group}, lexer))
|
iterators = append(iterators, emitters[i].Emit([]string{group}, state))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Concaterator(iterators...)
|
return Concaterator(iterators...)
|
||||||
@ -88,7 +90,7 @@ func ByGroups(emitters ...Emitter) Emitter {
|
|||||||
// Note: panic's if the number emitters does not equal the number of matched
|
// Note: panic's if the number emitters does not equal the number of matched
|
||||||
// groups in the regex.
|
// groups in the regex.
|
||||||
func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter {
|
func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter {
|
||||||
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
|
return EmitterFunc(func(groups []string, state *LexerState) Iterator {
|
||||||
// bounds check
|
// bounds check
|
||||||
if len(emitters) != len(groups)-1 {
|
if len(emitters) != len(groups)-1 {
|
||||||
panic("UsingByGroup expects number of emitters to be the same as len(groups)-1")
|
panic("UsingByGroup expects number of emitters to be the same as len(groups)-1")
|
||||||
@ -107,7 +109,7 @@ func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGro
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
iterators[i] = emitters[i].Emit([]string{group}, lexer)
|
iterators[i] = emitters[i].Emit([]string{group}, state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,7 +119,7 @@ func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGro
|
|||||||
|
|
||||||
// Using returns an Emitter that uses a given Lexer for parsing and emitting.
|
// Using returns an Emitter that uses a given Lexer for parsing and emitting.
|
||||||
func Using(lexer Lexer) Emitter {
|
func Using(lexer Lexer) Emitter {
|
||||||
return EmitterFunc(func(groups []string, _ Lexer) Iterator {
|
return EmitterFunc(func(groups []string, _ *LexerState) Iterator {
|
||||||
it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
|
it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
@ -127,9 +129,9 @@ func Using(lexer Lexer) Emitter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// UsingSelf is like Using, but uses the current Lexer.
|
// UsingSelf is like Using, but uses the current Lexer.
|
||||||
func UsingSelf(state string) Emitter {
|
func UsingSelf(stateName string) Emitter {
|
||||||
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
|
return EmitterFunc(func(groups []string, state *LexerState) Iterator {
|
||||||
it, err := lexer.Tokenise(&TokeniseOptions{State: state, Nested: true}, groups[0])
|
it, err := state.Lexer.Tokenise(&TokeniseOptions{State: stateName, Nested: true}, groups[0])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@ -329,7 +331,7 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if rule.Type != nil {
|
if rule.Type != nil {
|
||||||
l.iteratorStack = append(l.iteratorStack, rule.Type.Emit(l.Groups, l.Lexer))
|
l.iteratorStack = append(l.iteratorStack, rule.Type.Emit(l.Groups, l))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Exhaust the IteratorStack, if any.
|
// Exhaust the IteratorStack, if any.
|
||||||
|
2
types.go
2
types.go
@ -342,6 +342,6 @@ func (t TokenType) InSubCategory(other TokenType) bool {
|
|||||||
return t/100 == other/100
|
return t/100 == other/100
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t TokenType) Emit(groups []string, lexer Lexer) Iterator {
|
func (t TokenType) Emit(groups []string, _ *LexerState) Iterator {
|
||||||
return Literator(Token{Type: t, Value: groups[0]})
|
return Literator(Token{Type: t, Value: groups[0]})
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user