From e9ffd5a79aa07afbfc6347f0822a5d7d5f6be48a Mon Sep 17 00:00:00 2001 From: Gusted Date: Wed, 27 Aug 2025 11:51:01 +0200 Subject: [PATCH] fix: don't emit empty tail tokens (#1121) When tokens are split into lines, tokens that end with a newline are emitted again as an empty token, which is confusing and doesn't have any benefit. This conversion shouldn't emit a empty tail token. Adds a test. I noticed this issue by a CI failure for the output of a Go program that was changed because of d0ad67944442bcb84ff4710fe8dfa8f76ecedc01 and the new output contained a empty whitespace token at the beginning of most lines. --- formatters/html/html_test.go | 42 ++++++++++++++++++++++++++++++++++++ iterator.go | 8 ++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/formatters/html/html_test.go b/formatters/html/html_test.go index 7bd0cde..acf4ae2 100644 --- a/formatters/html/html_test.go +++ b/formatters/html/html_test.go @@ -49,6 +49,48 @@ func TestSplitTokensIntoLines(t *testing.T) { } actual := chroma.SplitTokensIntoLines(in) assert.Equal(t, expected, actual) + + in = []chroma.Token{ + {Value: "func", Type: chroma.KeywordDeclaration}, + {Value: " ", Type: chroma.TextWhitespace}, + {Value: "main", Type: chroma.NameFunction}, + {Value: "()", Type: chroma.Punctuation}, + {Value: " ", Type: chroma.TextWhitespace}, + {Value: "{", Type: chroma.Punctuation}, + {Value: "\n\t", Type: chroma.TextWhitespace}, + {Value: "println", Type: chroma.NameBuiltin}, + {Value: "(", Type: chroma.Punctuation}, + {Value: `"mark this"`, Type: chroma.LiteralString}, + {Value: ")", Type: chroma.Punctuation}, + {Value: "\n", Type: chroma.TextWhitespace}, + {Value: "}", Type: chroma.Punctuation}, + {Value: "\n", Type: chroma.TextWhitespace}, + } + expected = [][]chroma.Token{ + { + {Type: chroma.KeywordDeclaration, Value: "func"}, + {Type: chroma.TextWhitespace, Value: " "}, + {Type: chroma.NameFunction, Value: "main"}, + {Type: chroma.Punctuation, Value: "()"}, + {Type: chroma.TextWhitespace, Value: " "}, + {Type: chroma.Punctuation, Value: "{"}, + {Type: chroma.TextWhitespace, Value: "\n"}, + }, + { + {Type: chroma.TextWhitespace, Value: "\t"}, + {Type: chroma.NameBuiltin, Value: "println"}, + {Type: chroma.Punctuation, Value: "("}, + {Type: chroma.LiteralString, Value: `"mark this"`}, + {Type: chroma.Punctuation, Value: ")"}, + {Type: chroma.TextWhitespace, Value: "\n"}, + }, + { + {Type: chroma.Punctuation, Value: "}"}, + {Type: chroma.TextWhitespace, Value: "\n"}, + }, + } + actual = chroma.SplitTokensIntoLines(in) + assert.Equal(t, expected, actual) } func TestFormatterStyleToCSS(t *testing.T) { diff --git a/iterator.go b/iterator.go index b1e0b57..cf39bb5 100644 --- a/iterator.go +++ b/iterator.go @@ -58,6 +58,7 @@ func Literator(tokens ...Token) Iterator { // SplitTokensIntoLines splits tokens containing newlines in two. func SplitTokensIntoLines(tokens []Token) (out [][]Token) { var line []Token // nolint: prealloc +tokenLoop: for _, token := range tokens { for strings.Contains(token.Value, "\n") { parts := strings.SplitAfterN(token.Value, "\n", 2) @@ -70,6 +71,11 @@ func SplitTokensIntoLines(tokens []Token) (out [][]Token) { line = append(line, clone) out = append(out, line) line = nil + + // If the tail token is empty, don't emit it. + if len(token.Value) == 0 { + continue tokenLoop + } } line = append(line, token) } @@ -83,5 +89,5 @@ func SplitTokensIntoLines(tokens []Token) (out [][]Token) { out = out[:len(out)-1] } } - return + return out }