mailpit/internal/tools/snippets.go

package tools

import (
	"regexp"
	"strings"

	"github.com/axllent/mailpit/internal/html2text"
)

// CreateSnippet returns a message snippet. It will use the HTML version (if it exists)
// otherwise the text version.
func CreateSnippet(text, html string) string {
	text = strings.TrimSpace(text)
	html = strings.TrimSpace(html)
	limit := 200
	spaceRe := regexp.MustCompile(`\s+`)

	if text == "" && html == "" {
		return ""
	}

	if html != "" {
		data := html2text.Strip(html, false)

		if len(data) <= limit {
			return data
		}

		return truncate(data, limit) + "..."
	}

	if text != "" {
		// replace \uFEFF with space, see https://github.com/golang/go/issues/42274#issuecomment-1017258184
		text = strings.ReplaceAll(text, string('\uFEFF'), " ")
		text = strings.TrimSpace(spaceRe.ReplaceAllString(text, " "))
		if len(text) <= limit {
			return text
		}

		return truncate(text, limit) + "..."
	}

	return ""
}

// Truncate a string allowing for multi-byte encoding.
// Shamelessly borrowed from Tailscale.
// See https://github.com/tailscale/tailscale/blob/main/util/truncate/truncate.go
func truncate(s string, n int) string {
	if n >= len(s) {
		return s
	}

	// Back up until we find the beginning of a UTF-8 encoding.
	for n > 0 && s[n-1]&0xc0 == 0x80 { // 0x10... is a continuation byte
		n--
	}

	// If we're at the beginning of a multi-byte encoding, back up one more to
	// skip it. It's possible the value was already complete, but it's simpler
	// if we only have to check in one direction.
	//
	// Otherwise, we have a single-byte code (0x00... or 0x01...).
	if n > 0 && s[n-1]&0xc0 == 0xc0 { // 0x11... starts a multibyte encoding
		n--
	}

	return s[:n]
}