1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-07-15 01:14:21 +02:00

Add ByGroupNames function, same as ByGroups but use named groups (#519)

For named groups that are not given, an Error will be emitted anyway.

This also handles the case when an Emitter for group `0` is provided
or not. Since numbers can also be used for names.
But it might be over-doing, because why would anyone use ByGroupNames
if they wanted to assign a token to the whole match?!
This commit is contained in:
Siavash Askari Nasr
2021-06-08 16:56:59 +04:30
committed by GitHub
parent 22cbca546a
commit 10329f849e
2 changed files with 113 additions and 0 deletions

View File

@ -52,6 +52,34 @@ func ByGroups(emitters ...Emitter) Emitter {
})
}
// ByGroupNames emits a token for each named matching group in the rule's regex.
func ByGroupNames(emitters map[string]Emitter) Emitter {
return EmitterFunc(func(groups []string, state *LexerState) Iterator {
iterators := make([]Iterator, 0, len(state.NamedGroups)-1)
if len(state.NamedGroups)-1 == 0 {
if emitter, ok := emitters[`0`]; ok {
iterators = append(iterators, emitter.Emit(groups, state))
} else {
iterators = append(iterators, Error.Emit(groups, state))
}
} else {
ruleRegex := state.Rules[state.State][state.Rule].Regexp
for i := 1; i < len(state.NamedGroups); i++ {
groupName := ruleRegex.GroupNameFromNumber(i)
group := state.NamedGroups[groupName]
if emitter, ok := emitters[groupName]; ok {
if emitter != nil {
iterators = append(iterators, emitter.Emit([]string{group}, state))
}
} else {
iterators = append(iterators, Error.Emit([]string{group}, state))
}
}
}
return Concaterator(iterators...)
})
}
// UsingByGroup emits tokens for the matched groups in the regex using a
// "sublexer". Used when lexing code blocks where the name of a sublexer is
// contained within the block, for example on a Markdown text block or SQL

View File

@ -99,3 +99,88 @@ func TestEnsureLFFunc(t *testing.T) {
assert.Equal(t, out, test.out)
}
}
func TestByGroupNames(t *testing.T) {
l := Coalesce(MustNewLexer(nil, Rules{ // nolint: forbidigo
"root": {
{
`(?<key>\w+)(?<operator>=)(?<value>\w+)`,
ByGroupNames(map[string]Emitter{
`key`: String,
`operator`: Operator,
`value`: String,
}),
nil,
},
},
}))
it, err := l.Tokenise(nil, `abc=123`)
assert.NoError(t, err)
assert.Equal(t, []Token{{String, `abc`}, {Operator, `=`}, {String, `123`}}, it.Tokens())
l = Coalesce(MustNewLexer(nil, Rules{ // nolint: forbidigo
"root": {
{
`(?<key>\w+)(?<operator>=)(?<value>\w+)`,
ByGroupNames(map[string]Emitter{
`key`: String,
`value`: String,
}),
nil,
},
},
}))
it, err = l.Tokenise(nil, `abc=123`)
assert.NoError(t, err)
assert.Equal(t, []Token{{String, `abc`}, {Error, `=`}, {String, `123`}}, it.Tokens())
l = Coalesce(MustNewLexer(nil, Rules{ // nolint: forbidigo
"root": {
{
`(?<key>\w+)=(?<value>\w+)`,
ByGroupNames(map[string]Emitter{
`key`: String,
`value`: String,
}),
nil,
},
},
}))
it, err = l.Tokenise(nil, `abc=123`)
assert.NoError(t, err)
assert.Equal(t, []Token{{String, `abc123`}}, it.Tokens())
l = Coalesce(MustNewLexer(nil, Rules{ // nolint: forbidigo
"root": {
{
`(?<key>\w+)(?<op>=)(?<value>\w+)`,
ByGroupNames(map[string]Emitter{
`key`: String,
`operator`: Operator,
`value`: String,
}),
nil,
},
},
}))
it, err = l.Tokenise(nil, `abc=123`)
assert.NoError(t, err)
assert.Equal(t, []Token{{String, `abc`}, {Error, `=`}, {String, `123`}}, it.Tokens())
l = Coalesce(MustNewLexer(nil, Rules{ // nolint: forbidigo
"root": {
{
`\w+=\w+`,
ByGroupNames(map[string]Emitter{
`key`: String,
`operator`: Operator,
`value`: String,
}),
nil,
},
},
}))
it, err = l.Tokenise(nil, `abc=123`)
assert.NoError(t, err)
assert.Equal(t, []Token{{Error, `abc=123`}}, it.Tokens())
}