diff --git a/internal/tools/snippets.go b/internal/tools/snippets.go index 8e93b83..4746101 100644 --- a/internal/tools/snippets.go +++ b/internal/tools/snippets.go @@ -26,7 +26,7 @@ func CreateSnippet(text, html string) string { return data } - return data[0:limit] + "..." + return truncate(data, limit) + "..." } if text != "" { @@ -37,8 +37,33 @@ func CreateSnippet(text, html string) string { return text } - return text[0:limit] + "..." + return truncate(text, limit) + "..." } return "" } + +// Truncate a string allowing for multi-byte encoding. +// Shamelessly borrowed from Tailscale. +// See https://github.com/tailscale/tailscale/blob/main/util/truncate/truncate.go +func truncate(s string, n int) string { + if n >= len(s) { + return s + } + + // Back up until we find the beginning of a UTF-8 encoding. + for n > 0 && s[n-1]&0xc0 == 0x80 { // 0x10... is a continuation byte + n-- + } + + // If we're at the beginning of a multi-byte encoding, back up one more to + // skip it. It's possible the value was already complete, but it's simpler + // if we only have to check in one direction. + // + // Otherwise, we have a single-byte code (0x00... or 0x01...). + if n > 0 && s[n-1]&0xc0 == 0xc0 { // 0x11... starts a multibyte encoding + n-- + } + + return s[:n] +}