1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-07-05 00:38:52 +02:00

Don't output extra whitespace in YAML multiline (#993)

This resolves a particular issue with parsing YAML multiline, for
example:
```yaml
a: |
  multiline literal
  line 2
```

The regex used would capture the amount of indentation in the third
capture group and then use that as a kind of "status" to know which
lines are part of the indented multiline. However, because its a
captured group it has to be assigned a token which was `TextWhitespace`.
This meant that the indentation was outputted after the multiline,
technically it should be seen as an non-captured group, but then its no
longer to refer to it in the regex. Therefore I've gone with the
solution to add a new token, Ignore, which will not be emitted as a
token in the iterator, which can safely be used to make use of capture
groups but not have them show up in the output.

## Before

![image](https://github.com/user-attachments/assets/c29353c5-9e15-4f14-a733-57a60fb51910)

## After

![image](https://github.com/user-attachments/assets/57b5d129-a9d3-4b84-ae1f-dc05182b9ad3)
This commit is contained in:
Gusted
2024-08-22 22:58:31 +02:00
committed by GitHub
parent 895a0488b5
commit 4d11870090
6 changed files with 448 additions and 424 deletions

View File

@ -192,3 +192,14 @@ func TestByGroupNames(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, []Token{{Error, `abc=123`}}, it.Tokens())
}
func TestIgnoreToken(t *testing.T) {
l := Coalesce(mustNewLexer(t, &Config{EnsureNL: true}, Rules{ // nolint: forbidigo
"root": {
{`(\s*)(\w+)(?:\1)(\n)`, ByGroups(Ignore, Keyword, Whitespace), nil},
},
}))
it, err := l.Tokenise(nil, ` hello `)
assert.NoError(t, err)
assert.Equal(t, []Token{{Keyword, "hello"}, {TextWhitespace, "\n"}}, it.Tokens())
}