From 6013d14bc691cdcaa652b72482a0a6f48b812dc1 Mon Sep 17 00:00:00 2001 From: Gani Georgiev Date: Mon, 18 Sep 2023 15:16:06 +0300 Subject: [PATCH] added support for :excerpt(max, withEllipsis?) fields modifier --- CHANGELOG.md | 8 ++ tools/rest/excerpt_modifier.go | 131 +++++++++++++++++++++++ tools/rest/excerpt_modifier_test.go | 157 ++++++++++++++++++++++++++++ tools/rest/json_serializer.go | 140 ++++++++++++++++++++----- tools/rest/json_serializer_test.go | 27 +++++ 5 files changed, 434 insertions(+), 29 deletions(-) create mode 100644 tools/rest/excerpt_modifier.go create mode 100644 tools/rest/excerpt_modifier_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e60540b..2865334f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ - Added Patreon OAuth2 provider ([#3323](https://github.com/pocketbase/pocketbase/pull/3323); thanks @ghostdevv). +- (@todo docs) Added support for `:excerpt(max, withEllipsis?)` `fields` modifier that will return a short plain text version of any string value (html tags are stripped). + This could be used to minimize the downloaded json data when listing records with large `editor` html values. + ```js + await pb.collection("example").getList(1, 20, { + "fields": "*,description:excerpt(100)" + }) + ``` + ## v0.18.6 diff --git a/tools/rest/excerpt_modifier.go b/tools/rest/excerpt_modifier.go new file mode 100644 index 00000000..107a8fcf --- /dev/null +++ b/tools/rest/excerpt_modifier.go @@ -0,0 +1,131 @@ +package rest + +import ( + "errors" + "regexp" + "strings" + + "github.com/pocketbase/pocketbase/tools/list" + "github.com/spf13/cast" + "golang.org/x/net/html" +) + +var whitespaceRegex = regexp.MustCompile(`\s+`) + +var excludeTags = []string{ + "head", "style", "script", "iframe", "embed", "applet", "object", + "svg", "img", "picture", "dialog", "template", "button", "form", + "textarea", "input", "select", "option", +} + +var inlineTags = []string{ + "a", "abbr", "acronym", "b", "bdo", "big", "br", "button", + "cite", "code", "em", "i", "label", "q", "small", "span", + "strong", "strike", "sub", "sup", "time", +} + +var _ FieldModifier = (*excerptModifier)(nil) + +type excerptModifier struct { + max int // approximate max excerpt length + withEllipsis bool // if enabled will add ellipsis when the plain text length > max +} + +// newExcerptModifier validates the specified raw string arguments and +// initializes a new excerptModifier. +// +// This method is usually invoked in initModifer(). +func newExcerptModifier(args ...string) (*excerptModifier, error) { + totalArgs := len(args) + + if totalArgs == 0 { + return nil, errors.New("max argument is required - expected (max, withEllipsis?)") + } + + if totalArgs > 2 { + return nil, errors.New("too many arguments - expected (max, withEllipsis?)") + } + + max := cast.ToInt(args[0]) + if max == 0 { + return nil, errors.New("max argument must be > 0") + } + + var withEllipsis bool + if totalArgs > 1 { + withEllipsis = cast.ToBool(args[1]) + } + + return &excerptModifier{max, withEllipsis}, nil +} + +// Modify implements the [FieldModifier.Modify] interface method. +// +// It returns a plain text excerpt/short-description from a formatted +// html string (non-string values are kept untouched). +func (m *excerptModifier) Modify(value any) (any, error) { + strValue, ok := value.(string) + if !ok { + // not a string -> return as it is without applying the modifier + // (we don't throw an error because the modifier could be applied for a missing expand field) + return value, nil + } + + var builder strings.Builder + + doc, err := html.Parse(strings.NewReader(strValue)) + if err != nil { + return "", err + } + + var isNotEmpty bool + var needSpace bool + + // for all node types and more details check + // https://pkg.go.dev/golang.org/x/net/html#Parse + var stripTags func(*html.Node) + stripTags = func(n *html.Node) { + switch n.Type { + case html.TextNode: + if txt := strings.TrimSpace(whitespaceRegex.ReplaceAllString(n.Data, " ")); txt != "" { + if isNotEmpty && needSpace { + needSpace = false + builder.WriteString(" ") + } + + builder.WriteString(txt) + + if !isNotEmpty { + isNotEmpty = true + } + } + case html.ElementNode: + if !needSpace && !list.ExistInSlice(n.Data, inlineTags) { + needSpace = true + } + } + + if builder.Len() > m.max { + return + } + + for c := n.FirstChild; c != nil; c = c.NextSibling { + if c.Type != html.ElementNode || !list.ExistInSlice(c.Data, excludeTags) { + stripTags(c) + } + } + } + stripTags(doc) + + result := builder.String() + + if len(result) > m.max { + result = strings.TrimSpace(result[:m.max]) + + if m.withEllipsis { + result += "..." + } + } + + return result, nil +} diff --git a/tools/rest/excerpt_modifier_test.go b/tools/rest/excerpt_modifier_test.go new file mode 100644 index 00000000..67c69850 --- /dev/null +++ b/tools/rest/excerpt_modifier_test.go @@ -0,0 +1,157 @@ +package rest + +import ( + "fmt" + "testing" + + "github.com/spf13/cast" +) + +func TestNewExcerptModifier(t *testing.T) { + scenarios := []struct { + name string + args []string + expectError bool + }{ + { + "no arguments", + nil, + true, + }, + { + "too many arguments", + []string{"12", "false", "something"}, + true, + }, + { + "non-numeric max argument", + []string{"something"}, // should fallback to 0 which is not allowed + true, + }, + { + "numeric max argument", + []string{"12"}, + false, + }, + { + "non-bool withEllipsis argument", + []string{"12", "something"}, // should fallback to false which is allowed + false, + }, + { + "truthy withEllipsis argument", + []string{"12", "t"}, + false, + }, + } + + for _, s := range scenarios { + t.Run(s.name, func(t *testing.T) { + m, err := newExcerptModifier(s.args...) + + hasErr := err != nil + if hasErr != s.expectError { + t.Fatalf("Expected hasErr %v, got %v (%v)", s.expectError, hasErr, err) + } + + if hasErr { + if m != nil { + t.Fatalf("Expected nil modifier, got %v", m) + } + + return + } + + var argMax int + if len(s.args) > 0 { + argMax = cast.ToInt(s.args[0]) + } + + var argWithEllipsis bool + if len(s.args) > 1 { + argWithEllipsis = cast.ToBool(s.args[1]) + } + + if m.max != argMax { + t.Fatalf("Expected max %d, got %d", argMax, m.max) + } + + if m.withEllipsis != argWithEllipsis { + t.Fatalf("Expected withEllipsis %v, got %v", argWithEllipsis, m.withEllipsis) + } + }) + } +} + +func TestExcerptModifierModify(t *testing.T) { + // plain text value: "Hello t est12 3 word" + html := `

Hello

t est12 + 3

word

` + + plainText := "Hello t est12 3 word" + + scenarios := []struct { + name string + args []string + value string + expected string + }{ + // without ellipsis + { + "only max < len(plainText)", + []string{"2"}, + html, + plainText[:2], + }, + { + "only max = len(plainText)", + []string{fmt.Sprint(len(plainText))}, + html, + plainText, + }, + { + "only max > len(plainText)", + []string{fmt.Sprint(len(plainText) + 5)}, + html, + plainText, + }, + + // with ellipsis + { + "with ellipsis and max < len(plainText)", + []string{"2", "t"}, + html, + plainText[:2] + "...", + }, + { + "with ellipsis and max = len(plainText)", + []string{fmt.Sprint(len(plainText)), "t"}, + html, + plainText, + }, + { + "with ellipsis and max > len(plainText)", + []string{fmt.Sprint(len(plainText) + 5), "t"}, + html, + plainText, + }, + } + + for _, s := range scenarios { + t.Run(s.name, func(t *testing.T) { + m, err := newExcerptModifier(s.args...) + if err != nil { + t.Fatal(err) + } + + raw, err := m.Modify(s.value) + if err != nil { + t.Fatal(err) + } + + if v := cast.ToString(raw); v != s.expected { + t.Fatalf("Expected %q, got %q", s.expected, v) + } + }) + } +} diff --git a/tools/rest/json_serializer.go b/tools/rest/json_serializer.go index cda7b422..a6581105 100644 --- a/tools/rest/json_serializer.go +++ b/tools/rest/json_serializer.go @@ -2,13 +2,19 @@ package rest import ( "encoding/json" + "fmt" "strings" "github.com/labstack/echo/v5" - "github.com/pocketbase/pocketbase/tools/list" "github.com/pocketbase/pocketbase/tools/search" + "github.com/pocketbase/pocketbase/tools/tokenizer" ) +type FieldModifier interface { + // Modify executes the modifier and returns a new modified value. + Modify(value any) (any, error) +} + // Serializer represents custom REST JSON serializer based on echo.DefaultJSONSerializer, // with support for additional generic response data transformation (eg. fields picker). type Serializer struct { @@ -28,14 +34,14 @@ func (s *Serializer) Serialize(c echo.Context, i any, indent string) error { statusCode := c.Response().Status - param := c.QueryParam(fieldsParam) - if param == "" || statusCode < 200 || statusCode > 299 { + rawFields := c.QueryParam(fieldsParam) + if rawFields == "" || statusCode < 200 || statusCode > 299 { return s.DefaultJSONSerializer.Serialize(c, i, indent) } - fields := strings.Split(param, ",") - for i, f := range fields { - fields[i] = strings.TrimSpace(f) + parsedFields, err := parseFields(rawFields) + if err != nil { + return err } encoded, err := json.Marshal(i) @@ -44,13 +50,11 @@ func (s *Serializer) Serialize(c echo.Context, i any, indent string) error { } var decoded any - if err := json.Unmarshal(encoded, &decoded); err != nil { return err } var isSearchResult bool - switch i.(type) { case search.Result, *search.Result: isSearchResult = true @@ -58,49 +62,111 @@ func (s *Serializer) Serialize(c echo.Context, i any, indent string) error { if isSearchResult { if decodedMap, ok := decoded.(map[string]any); ok { - pickFields(decodedMap["items"], fields) + pickFields(decodedMap["items"], parsedFields) } } else { - pickFields(decoded, fields) + pickFields(decoded, parsedFields) } return s.DefaultJSONSerializer.Serialize(c, decoded, indent) } -func pickFields(data any, fields []string) { +func parseFields(rawFields string) (map[string]FieldModifier, error) { + t := tokenizer.NewFromString(rawFields) + + fields, err := t.ScanAll() + if err != nil { + return nil, err + } + + result := make(map[string]FieldModifier, len(fields)) + + for _, f := range fields { + parts := strings.SplitN(strings.TrimSpace(f), ":", 2) + + if len(parts) > 1 { + m, err := initModifer(parts[1]) + if err != nil { + return nil, err + } + result[parts[0]] = m + } else { + result[parts[0]] = nil + } + } + + return result, nil +} + +func initModifer(rawModifier string) (FieldModifier, error) { + t := tokenizer.NewFromString(rawModifier) + t.Separators('(', ')', ',', ' ') + t.IgnoreParenthesis(true) + + parts, err := t.ScanAll() + if err != nil { + return nil, err + } + + if len(parts) == 0 { + return nil, fmt.Errorf("invalid or empty modifier expression %q", rawModifier) + } + + name := parts[0] + args := parts[1:] + + switch name { + case "excerpt": + m, err := newExcerptModifier(args...) + if err != nil { + return nil, fmt.Errorf("invalid excerpt modifier: %w", err) + } + return m, nil + } + + return nil, fmt.Errorf("missing or invalid modifier %q", name) +} + +func pickFields(data any, fields map[string]FieldModifier) error { switch v := data.(type) { case map[string]any: pickMapFields(v, fields) case []map[string]any: for _, item := range v { - pickMapFields(item, fields) + if err := pickMapFields(item, fields); err != nil { + return err + } } case []any: if len(v) == 0 { - return // nothing to pick + return nil // nothing to pick } if _, ok := v[0].(map[string]any); !ok { - return // for now ignore non-map values + return nil // for now ignore non-map values } for _, item := range v { - pickMapFields(item.(map[string]any), fields) + if err := pickMapFields(item.(map[string]any), fields); err != nil { + return nil + } } } + + return nil } -func pickMapFields(data map[string]any, fields []string) { +func pickMapFields(data map[string]any, fields map[string]FieldModifier) error { if len(fields) == 0 { - return // nothing to pick + return nil // nothing to pick } - if list.ExistInSlice("*", fields) { + if m, ok := fields["*"]; ok { // append all missing root level data keys for k := range data { var exists bool - for _, f := range fields { + for f := range fields { if strings.HasPrefix(f+".", k+".") { exists = true break @@ -108,17 +174,17 @@ func pickMapFields(data map[string]any, fields []string) { } if !exists { - fields = append(fields, k) + fields[k] = m } } } DataLoop: for k := range data { - matchingFields := make([]string, 0, len(fields)) - for _, f := range fields { + matchingFields := make(map[string]FieldModifier, len(fields)) + for f, m := range fields { if strings.HasPrefix(f+".", k+".") { - matchingFields = append(matchingFields, f) + matchingFields[f] = m continue } } @@ -128,15 +194,31 @@ DataLoop: continue DataLoop } - // trim the key from the fields - for i, v := range matchingFields { - trimmed := strings.TrimSuffix(strings.TrimPrefix(v+".", k+"."), ".") - if trimmed == "" { + // remove the current key from the matching fields path + for f, m := range matchingFields { + remains := strings.TrimSuffix(strings.TrimPrefix(f+".", k+"."), ".") + + // final key + if remains == "" { + if m != nil { + var err error + data[k], err = m.Modify(data[k]) + if err != nil { + return err + } + } continue DataLoop } - matchingFields[i] = trimmed + + // cleanup the old field key and continue with the rest of the field path + delete(matchingFields, f) + matchingFields[remains] = m } - pickFields(data[k], matchingFields) + if err := pickFields(data[k], matchingFields); err != nil { + return err + } } + + return nil } diff --git a/tools/rest/json_serializer_test.go b/tools/rest/json_serializer_test.go index 6c13f500..3de0f0fa 100644 --- a/tools/rest/json_serializer_test.go +++ b/tools/rest/json_serializer_test.go @@ -274,6 +274,33 @@ func TestSerialize(t *testing.T) { "fields=id,rel.*,rel.sub.id", `{"id":"123","rel":{"id":"456","sub":{"id":"789"},"title":"rel_title"}}`, }, + { + "invalid excerpt modifier", + rest.Serializer{}, + 400, + map[string]any{"a": 1, "b": 2, "c": "test"}, + "fields=*:excerpt", + `{"a":1,"b":2,"c":"test"}`, + }, + { + "valid excerpt modifier", + rest.Serializer{}, + 200, + map[string]any{ + "id": "123", + "title": "lorem", + "rel": map[string]any{ + "id": "456", + "title": "

rel_title

", + "sub": map[string]any{ + "id": "789", + "title": "sub_title", + }, + }, + }, + "fields=*:excerpt(2),rel.title:excerpt(3, true)", + `{"id":"12","rel":{"title":"rel..."},"title":"lo"}`, + }, } for _, s := range scenarios {