From e9ffd5a79aa07afbfc6347f0822a5d7d5f6be48a Mon Sep 17 00:00:00 2001
From: Gusted <postmaster@gusted.xyz>
Date: Wed, 27 Aug 2025 11:51:01 +0200
Subject: [PATCH] fix: don't emit empty tail tokens (#1121)

When tokens are split into lines, tokens that end with a newline are
emitted again as an empty token, which is confusing and doesn't have any
benefit. This conversion shouldn't emit a empty tail token.

Adds a test.

I noticed this issue by a CI failure for the output of a Go program that
was changed because of
d0ad67944442bcb84ff4710fe8dfa8f76ecedc01 and the new output contained a
empty whitespace token at the beginning of most lines.
---
 formatters/html/html_test.go | 42 ++++++++++++++++++++++++++++++++++++
 iterator.go                  |  8 ++++++-
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/formatters/html/html_test.go b/formatters/html/html_test.go
index 7bd0cde..acf4ae2 100644
--- a/formatters/html/html_test.go
+++ b/formatters/html/html_test.go
@@ -49,6 +49,48 @@ func TestSplitTokensIntoLines(t *testing.T) {
 	}
 	actual := chroma.SplitTokensIntoLines(in)
 	assert.Equal(t, expected, actual)
+
+	in = []chroma.Token{
+		{Value: "func", Type: chroma.KeywordDeclaration},
+		{Value: " ", Type: chroma.TextWhitespace},
+		{Value: "main", Type: chroma.NameFunction},
+		{Value: "()", Type: chroma.Punctuation},
+		{Value: " ", Type: chroma.TextWhitespace},
+		{Value: "{", Type: chroma.Punctuation},
+		{Value: "\n\t", Type: chroma.TextWhitespace},
+		{Value: "println", Type: chroma.NameBuiltin},
+		{Value: "(", Type: chroma.Punctuation},
+		{Value: `"mark this"`, Type: chroma.LiteralString},
+		{Value: ")", Type: chroma.Punctuation},
+		{Value: "\n", Type: chroma.TextWhitespace},
+		{Value: "}", Type: chroma.Punctuation},
+		{Value: "\n", Type: chroma.TextWhitespace},
+	}
+	expected = [][]chroma.Token{
+		{
+			{Type: chroma.KeywordDeclaration, Value: "func"},
+			{Type: chroma.TextWhitespace, Value: " "},
+			{Type: chroma.NameFunction, Value: "main"},
+			{Type: chroma.Punctuation, Value: "()"},
+			{Type: chroma.TextWhitespace, Value: " "},
+			{Type: chroma.Punctuation, Value: "{"},
+			{Type: chroma.TextWhitespace, Value: "\n"},
+		},
+		{
+			{Type: chroma.TextWhitespace, Value: "\t"},
+			{Type: chroma.NameBuiltin, Value: "println"},
+			{Type: chroma.Punctuation, Value: "("},
+			{Type: chroma.LiteralString, Value: `"mark this"`},
+			{Type: chroma.Punctuation, Value: ")"},
+			{Type: chroma.TextWhitespace, Value: "\n"},
+		},
+		{
+			{Type: chroma.Punctuation, Value: "}"},
+			{Type: chroma.TextWhitespace, Value: "\n"},
+		},
+	}
+	actual = chroma.SplitTokensIntoLines(in)
+	assert.Equal(t, expected, actual)
 }
 
 func TestFormatterStyleToCSS(t *testing.T) {
diff --git a/iterator.go b/iterator.go
index b1e0b57..cf39bb5 100644
--- a/iterator.go
+++ b/iterator.go
@@ -58,6 +58,7 @@ func Literator(tokens ...Token) Iterator {
 // SplitTokensIntoLines splits tokens containing newlines in two.
 func SplitTokensIntoLines(tokens []Token) (out [][]Token) {
 	var line []Token // nolint: prealloc
+tokenLoop:
 	for _, token := range tokens {
 		for strings.Contains(token.Value, "\n") {
 			parts := strings.SplitAfterN(token.Value, "\n", 2)
@@ -70,6 +71,11 @@ func SplitTokensIntoLines(tokens []Token) (out [][]Token) {
 			line = append(line, clone)
 			out = append(out, line)
 			line = nil
+
+			// If the tail token is empty, don't emit it.
+			if len(token.Value) == 0 {
+				continue tokenLoop
+			}
 		}
 		line = append(line, token)
 	}
@@ -83,5 +89,5 @@ func SplitTokensIntoLines(tokens []Token) (out [][]Token) {
 			out = out[:len(out)-1]
 		}
 	}
-	return
+	return out
 }