From b7a49efa88d5c711d1da32163e95734f2b02f758 Mon Sep 17 00:00:00 2001 From: Gani Georgiev Date: Fri, 27 Oct 2023 17:34:45 +0300 Subject: [PATCH] fixed excerpt modifier to properly add spaces after block tags --- CHANGELOG.md | 2 ++ tools/rest/excerpt_modifier.go | 43 +++++++++++++++++------------ tools/rest/excerpt_modifier_test.go | 4 +-- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78474a7d..6a6262e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ An option to return also the empty found tokens was also added via `Tokenizer.KeepEmptyTokens(true)`. _This should fix the parsing of whitespace charactes around view query column names when no quotes are used ([#3616](https://github.com/pocketbase/pocketbase/discussions/3616#discussioncomment-7398564))._ +- Fixed the `:excerpt(max, withEllipsis?)` `field` query param modifier to properly add space to the generated text fragment after block tags. + ## v0.19.0 diff --git a/tools/rest/excerpt_modifier.go b/tools/rest/excerpt_modifier.go index 107a8fcf..07da1a06 100644 --- a/tools/rest/excerpt_modifier.go +++ b/tools/rest/excerpt_modifier.go @@ -78,8 +78,7 @@ func (m *excerptModifier) Modify(value any) (any, error) { return "", err } - var isNotEmpty bool - var needSpace bool + var hasPrevSpace bool // for all node types and more details check // https://pkg.go.dev/golang.org/x/net/html#Parse @@ -87,37 +86,47 @@ func (m *excerptModifier) Modify(value any) (any, error) { stripTags = func(n *html.Node) { switch n.Type { case html.TextNode: - if txt := strings.TrimSpace(whitespaceRegex.ReplaceAllString(n.Data, " ")); txt != "" { - if isNotEmpty && needSpace { - needSpace = false - builder.WriteString(" ") - } + // collapse multiple spaces into one + txt := whitespaceRegex.ReplaceAllString(n.Data, " ") + + if hasPrevSpace { + txt = strings.TrimLeft(txt, " ") + } + + if txt != "" { + hasPrevSpace = strings.HasSuffix(txt, " ") builder.WriteString(txt) - - if !isNotEmpty { - isNotEmpty = true - } - } - case html.ElementNode: - if !needSpace && !list.ExistInSlice(n.Data, inlineTags) { - needSpace = true } } - if builder.Len() > m.max { + // excerpt max has been reached => no need to further iterate + // (+2 for the extra whitespace suffix/prefix that will be trimmed later) + if builder.Len() > m.max+2 { return } for c := n.FirstChild; c != nil; c = c.NextSibling { if c.Type != html.ElementNode || !list.ExistInSlice(c.Data, excludeTags) { + isBlock := c.Type == html.ElementNode && !list.ExistInSlice(c.Data, inlineTags) + + if isBlock && !hasPrevSpace { + builder.WriteString(" ") + hasPrevSpace = true + } + stripTags(c) + + if isBlock && !hasPrevSpace { + builder.WriteString(" ") + hasPrevSpace = true + } } } } stripTags(doc) - result := builder.String() + result := strings.TrimSpace(builder.String()) if len(result) > m.max { result = strings.TrimSpace(result[:m.max]) diff --git a/tools/rest/excerpt_modifier_test.go b/tools/rest/excerpt_modifier_test.go index 67c69850..eedb25a8 100644 --- a/tools/rest/excerpt_modifier_test.go +++ b/tools/rest/excerpt_modifier_test.go @@ -86,9 +86,9 @@ func TestNewExcerptModifier(t *testing.T) { func TestExcerptModifierModify(t *testing.T) { // plain text value: "Hello t est12 3 word" html := `

Hello

t est12 - 3

word

` + 3456word 7 89!? a b c#

title

` - plainText := "Hello t est12 3 word" + plainText := "Hello t est12 3456 word 7 89!? a b c# title" scenarios := []struct { name string