fix: don't emit empty tail tokens (#1121)

When tokens are split into lines, tokens that end with a newline are emitted again as an empty token, which is confusing and doesn't have any benefit. This conversion shouldn't emit a empty tail token. Adds a test. I noticed this issue by a CI failure for the output of a Go program that was changed because of d0ad679444 and the new output contained a empty whitespace token at the beginning of most lines.
2025-10-08 22:52:04 +02:00 · 2025-08-27 11:51:01 +02:00
parent 4e1403e088
commit e9ffd5a79a
2 changed files with 49 additions and 1 deletions
--- a/formatters/html/html_test.go
+++ b/formatters/html/html_test.go
@@ -49,6 +49,48 @@ func TestSplitTokensIntoLines(t *testing.T) {
 	}
 	actual := chroma.SplitTokensIntoLines(in)
 	assert.Equal(t, expected, actual)
+
+	in = []chroma.Token{
+		{Value: "func", Type: chroma.KeywordDeclaration},
+		{Value: " ", Type: chroma.TextWhitespace},
+		{Value: "main", Type: chroma.NameFunction},
+		{Value: "()", Type: chroma.Punctuation},
+		{Value: " ", Type: chroma.TextWhitespace},
+		{Value: "{", Type: chroma.Punctuation},
+		{Value: "\n\t", Type: chroma.TextWhitespace},
+		{Value: "println", Type: chroma.NameBuiltin},
+		{Value: "(", Type: chroma.Punctuation},
+		{Value: `"mark this"`, Type: chroma.LiteralString},
+		{Value: ")", Type: chroma.Punctuation},
+		{Value: "\n", Type: chroma.TextWhitespace},
+		{Value: "}", Type: chroma.Punctuation},
+		{Value: "\n", Type: chroma.TextWhitespace},
+	}
+	expected = [][]chroma.Token{
+		{
+			{Type: chroma.KeywordDeclaration, Value: "func"},
+			{Type: chroma.TextWhitespace, Value: " "},
+			{Type: chroma.NameFunction, Value: "main"},
+			{Type: chroma.Punctuation, Value: "()"},
+			{Type: chroma.TextWhitespace, Value: " "},
+			{Type: chroma.Punctuation, Value: "{"},
+			{Type: chroma.TextWhitespace, Value: "\n"},
+		},
+		{
+			{Type: chroma.TextWhitespace, Value: "\t"},
+			{Type: chroma.NameBuiltin, Value: "println"},
+			{Type: chroma.Punctuation, Value: "("},
+			{Type: chroma.LiteralString, Value: `"mark this"`},
+			{Type: chroma.Punctuation, Value: ")"},
+			{Type: chroma.TextWhitespace, Value: "\n"},
+		},
+		{
+			{Type: chroma.Punctuation, Value: "}"},
+			{Type: chroma.TextWhitespace, Value: "\n"},
+		},
+	}
+	actual = chroma.SplitTokensIntoLines(in)
+	assert.Equal(t, expected, actual)
 }

 func TestFormatterStyleToCSS(t *testing.T) {
--- a/iterator.go
+++ b/iterator.go
@@ -58,6 +58,7 @@ func Literator(tokens ...Token) Iterator {
 // SplitTokensIntoLines splits tokens containing newlines in two.
 func SplitTokensIntoLines(tokens []Token) (out [][]Token) {
 	var line []Token // nolint: prealloc
+tokenLoop:
 	for _, token := range tokens {
 		for strings.Contains(token.Value, "\n") {
 			parts := strings.SplitAfterN(token.Value, "\n", 2)
@@ -70,6 +71,11 @@ func SplitTokensIntoLines(tokens []Token) (out [][]Token) {
 			line = append(line, clone)
 			out = append(out, line)
 			line = nil
+
+			// If the tail token is empty, don't emit it.
+			if len(token.Value) == 0 {
+				continue tokenLoop
+			}
 		}
 		line = append(line, token)
 	}
@@ -83,5 +89,5 @@ func SplitTokensIntoLines(tokens []Token) (out [][]Token) {
 			out = out[:len(out)-1]
 		}
 	}
-	return
+	return out
 }