From 08b856faeb38931d448bc67062b22abe71da9c17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20Paj=C4=85k?= Date: Thu, 28 Dec 2023 18:21:44 +0100 Subject: [PATCH] baggage: Member.String encodes only when necessary (#4775) --- CHANGELOG.md | 1 + baggage/baggage.go | 64 ++++++++++++++++++++++++++++++++++--- baggage/baggage_test.go | 70 ++++++++++++++++++++++++++++++++++++----- 3 files changed, 123 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c283b9cbe..b94281d2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Improve `go.opentelemetry.io/otel/trace.TraceState`'s performance. (#4722) - Improve `go.opentelemetry.io/otel/propagation.TraceContext`'s performance. (#4721) - Improve `go.opentelemetry.io/otel/baggage` performance. (#4743) +- `Member.String` in `go.opentelemetry.io/otel/baggage` percent-encodes only when necessary. (#4775) ### Fixed diff --git a/baggage/baggage.go b/baggage/baggage.go index 5869a4ed6..89a066420 100644 --- a/baggage/baggage.go +++ b/baggage/baggage.go @@ -323,7 +323,7 @@ func (m Member) String() string { // A key is just an ASCII string. A value is restricted to be // US-ASCII characters excluding CTLs, whitespace, // DQUOTE, comma, semicolon, and backslash. - s := fmt.Sprintf("%s%s%s", m.key, keyValueDelimiter, url.PathEscape(m.value)) + s := fmt.Sprintf("%s%s%s", m.key, keyValueDelimiter, valueEscape(m.value)) if len(m.properties) > 0 { s = fmt.Sprintf("%s%s%s", s, propertyDelimiter, m.properties.String()) } @@ -652,9 +652,65 @@ func validateValue(s string) bool { } func validateValueChar(c int32) bool { - return (c >= 0x23 && c <= 0x2b) || + return c == 0x21 || + (c >= 0x23 && c <= 0x2b) || (c >= 0x2d && c <= 0x3a) || (c >= 0x3c && c <= 0x5b) || - (c >= 0x5d && c <= 0x7e) || - c == 0x21 + (c >= 0x5d && c <= 0x7e) +} + +// valueEscape escapes the string so it can be safely placed inside a baggage value, +// replacing special characters with %XX sequences as needed. +// +// The implementation is based on: +// https://github.com/golang/go/blob/f6509cf5cdbb5787061b784973782933c47f1782/src/net/url/url.go#L285. +func valueEscape(s string) string { + hexCount := 0 + for i := 0; i < len(s); i++ { + c := s[i] + if shouldEscape(c) { + hexCount++ + } + } + + if hexCount == 0 { + return s + } + + var buf [64]byte + var t []byte + + required := len(s) + 2*hexCount + if required <= len(buf) { + t = buf[:required] + } else { + t = make([]byte, required) + } + + j := 0 + for i := 0; i < len(s); i++ { + c := s[i] + if shouldEscape(s[i]) { + const upperhex = "0123456789ABCDEF" + t[j] = '%' + t[j+1] = upperhex[c>>4] + t[j+2] = upperhex[c&15] + j += 3 + } else { + t[j] = c + j++ + } + } + + return string(t) +} + +// shouldEscape returns true if the specified byte should be escaped when +// appearing in a baggage value string. +func shouldEscape(c byte) bool { + if c == '%' { + // The percent character must be encoded so that percent-encoding can work. + return true + } + return !validateValueChar(int32(c)) } diff --git a/baggage/baggage_test.go b/baggage/baggage_test.go index 19b2c6e94..e6664f0e0 100644 --- a/baggage/baggage_test.go +++ b/baggage/baggage_test.go @@ -22,6 +22,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "go.opentelemetry.io/otel/internal/baggage" ) @@ -390,12 +391,19 @@ func TestBaggageParse(t *testing.T) { }, }, { - name: "url encoded value", + name: "encoded ASCII string", in: "key1=val%252%2C", want: baggage.List{ "key1": {Value: "val%2,"}, }, }, + { + name: "encoded UTF-8 string", + in: "foo=%C4%85%C5%9B%C4%87", + want: baggage.List{ + "foo": {Value: "ąść"}, + }, + }, { name: "invalid member: empty", in: "foo=,,bar=", @@ -501,13 +509,7 @@ func TestBaggageString(t *testing.T) { // Meaning, US-ASCII characters excluding CTLs, whitespace, // DQUOTE, comma, semicolon, and backslash. All excluded // characters need to be percent encoded. - // - // Ideally, the want result is: - // out: "foo=%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20!%22#$%25&'()*+%2C-./0123456789:%3B<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[%5C]^_%60abcdefghijklmnopqrstuvwxyz{|}~%7F", - // However, the following characters are escaped: - // !#'()*/<>?[]^{|} - // It is not necessary, but still provides a correct result. - out: "foo=%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20%21%22%23$%25&%27%28%29%2A+%2C-.%2F0123456789:%3B%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~%7F", + out: "foo=%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20!%22#$%25&'()*+%2C-./0123456789:%3B<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[%5C]^_`abcdefghijklmnopqrstuvwxyz{|}~%7F", baggage: baggage.List{ "foo": {Value: func() string { // All US-ASCII characters. @@ -519,6 +521,13 @@ func TestBaggageString(t *testing.T) { }()}, }, }, + { + name: "non-ASCII UTF-8 string", + out: "foo=%C4%85%C5%9B%C4%87", + baggage: baggage.List{ + "foo": {Value: "ąść"}, + }, + }, { name: "plus", out: "foo=1+1", @@ -937,3 +946,48 @@ func BenchmarkParse(b *testing.B) { benchBaggage, _ = Parse(`userId=alice,serverNode = DF28 , isProduction = false,hasProp=stuff;propKey;propWValue=value`) } } + +func BenchmarkString(b *testing.B) { + var members []Member + addMember := func(k, v string) { + m, err := NewMember(k, valueEscape(v)) + require.NoError(b, err) + members = append(members, m) + } + + addMember("key1", "val1") + addMember("key2", " ;,%") + addMember("key3", "Witaj świecie!") + addMember("key4", strings.Repeat("Hello world!", 10)) + + bg, err := New(members...) + require.NoError(b, err) + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + _ = bg.String() + } +} + +func BenchmarkValueEscape(b *testing.B) { + testCases := []struct { + name string + in string + }{ + {name: "nothing to escape", in: "value"}, + {name: "requires escaping", in: " ;,%"}, + {name: "long value", in: strings.Repeat("Hello world!", 20)}, + } + + for _, tc := range testCases { + b.Run(tc.name, func(b *testing.B) { + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + _ = valueEscape(tc.in) + } + }) + } +}