mirror of
https://github.com/alecthomas/chroma.git
synced 2025-10-08 22:52:04 +02:00
When tokens are split into lines, tokens that end with a newline are
emitted again as an empty token, which is confusing and doesn't have any
benefit. This conversion shouldn't emit a empty tail token.
Adds a test.
I noticed this issue by a CI failure for the output of a Go program that
was changed because of
d0ad679444
and the new output contained a
empty whitespace token at the beginning of most lines.
94 lines
2.1 KiB
Go
94 lines
2.1 KiB
Go
package chroma
|
|
|
|
import "strings"
|
|
|
|
// An Iterator across tokens.
|
|
//
|
|
// EOF will be returned at the end of the Token stream.
|
|
//
|
|
// If an error occurs within an Iterator, it may propagate this in a panic. Formatters should recover.
|
|
type Iterator func() Token
|
|
|
|
// Tokens consumes all tokens from the iterator and returns them as a slice.
|
|
func (i Iterator) Tokens() []Token {
|
|
var out []Token
|
|
for t := i(); t != EOF; t = i() {
|
|
out = append(out, t)
|
|
}
|
|
return out
|
|
}
|
|
|
|
// Stdlib converts a Chroma iterator to a Go 1.23-compatible iterator.
|
|
func (i Iterator) Stdlib() func(yield func(Token) bool) {
|
|
return func(yield func(Token) bool) {
|
|
for t := i(); t != EOF; t = i() {
|
|
if !yield(t) {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Concaterator concatenates tokens from a series of iterators.
|
|
func Concaterator(iterators ...Iterator) Iterator {
|
|
return func() Token {
|
|
for len(iterators) > 0 {
|
|
t := iterators[0]()
|
|
if t != EOF {
|
|
return t
|
|
}
|
|
iterators = iterators[1:]
|
|
}
|
|
return EOF
|
|
}
|
|
}
|
|
|
|
// Literator converts a sequence of literal Tokens into an Iterator.
|
|
func Literator(tokens ...Token) Iterator {
|
|
return func() Token {
|
|
if len(tokens) == 0 {
|
|
return EOF
|
|
}
|
|
token := tokens[0]
|
|
tokens = tokens[1:]
|
|
return token
|
|
}
|
|
}
|
|
|
|
// SplitTokensIntoLines splits tokens containing newlines in two.
|
|
func SplitTokensIntoLines(tokens []Token) (out [][]Token) {
|
|
var line []Token // nolint: prealloc
|
|
tokenLoop:
|
|
for _, token := range tokens {
|
|
for strings.Contains(token.Value, "\n") {
|
|
parts := strings.SplitAfterN(token.Value, "\n", 2)
|
|
// Token becomes the tail.
|
|
token.Value = parts[1]
|
|
|
|
// Append the head to the line and flush the line.
|
|
clone := token.Clone()
|
|
clone.Value = parts[0]
|
|
line = append(line, clone)
|
|
out = append(out, line)
|
|
line = nil
|
|
|
|
// If the tail token is empty, don't emit it.
|
|
if len(token.Value) == 0 {
|
|
continue tokenLoop
|
|
}
|
|
}
|
|
line = append(line, token)
|
|
}
|
|
if len(line) > 0 {
|
|
out = append(out, line)
|
|
}
|
|
// Strip empty trailing token line.
|
|
if len(out) > 0 {
|
|
last := out[len(out)-1]
|
|
if len(last) == 1 && last[0].Value == "" {
|
|
out = out[:len(out)-1]
|
|
}
|
|
}
|
|
return out
|
|
}
|