diff --git a/CHANGELOG.md b/CHANGELOG.md index c01e6998e..dcb4dec15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Fix stale timestamps reported by the last-value aggregation. (#5517) - Indicate the `Exporter` in `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp` must be created by the `New` method. (#5521) - Improved performance in all `{Bool,Int64,Float64,String}SliceValue` functions of `go.opentelemetry.io/attributes` by reducing the number of allocations. (#5549) +- Replace invalid percent-encoded octet sequences with replacement char in `go.opentelemetry.io/otel/baggage`. (#5528) ## [1.27.0/0.49.0/0.3.0] 2024-05-21 diff --git a/baggage/baggage.go b/baggage/baggage.go index c40c896cc..42cd46d84 100644 --- a/baggage/baggage.go +++ b/baggage/baggage.go @@ -294,19 +294,45 @@ func parseMember(member string) (Member, error) { return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidKey, key) } - val := strings.TrimSpace(v) - if !validateValue(val) { + rawVal := strings.TrimSpace(v) + if !validateValue(rawVal) { return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidValue, v) } // Decode a percent-encoded value. - value, err := url.PathUnescape(val) + unescapeVal, err := url.PathUnescape(rawVal) if err != nil { return newInvalidMember(), fmt.Errorf("%w: %w", errInvalidValue, err) } + + value := replaceInvalidUTF8Sequences(len(rawVal), unescapeVal) return Member{key: key, value: value, properties: props, hasData: true}, nil } +// replaceInvalidUTF8Sequences replaces invalid UTF-8 sequences with '�'. +func replaceInvalidUTF8Sequences(cap int, unescapeVal string) string { + if utf8.ValidString(unescapeVal) { + return unescapeVal + } + // W3C baggage spec: + // https://github.com/w3c/baggage/blob/8c215efbeebd3fa4b1aceb937a747e56444f22f3/baggage/HTTP_HEADER_FORMAT.md?plain=1#L69 + + var b strings.Builder + b.Grow(cap) + for i := 0; i < len(unescapeVal); { + r, size := utf8.DecodeRuneInString(unescapeVal[i:]) + if r == utf8.RuneError && size == 1 { + // Invalid UTF-8 sequence found, replace it with '�' + _, _ = b.WriteString("�") + } else { + _, _ = b.WriteRune(r) + } + i += size + } + + return b.String() +} + // validate ensures m conforms to the W3C Baggage specification. // A key must be an ASCII string, returning an error otherwise. func (m Member) validate() error { @@ -607,10 +633,12 @@ func parsePropertyInternal(s string) (p Property, ok bool) { } // Decode a percent-encoded value. - value, err := url.PathUnescape(s[valueStart:valueEnd]) + rawVal := s[valueStart:valueEnd] + unescapeVal, err := url.PathUnescape(rawVal) if err != nil { return } + value := replaceInvalidUTF8Sequences(len(rawVal), unescapeVal) ok = true p.key = s[keyStart:keyEnd] diff --git a/baggage/baggage_test.go b/baggage/baggage_test.go index 30150e4f9..146673846 100644 --- a/baggage/baggage_test.go +++ b/baggage/baggage_test.go @@ -9,6 +9,7 @@ import ( "slices" "strings" "testing" + "unicode/utf8" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -469,6 +470,18 @@ func TestBaggageParse(t *testing.T) { in: tooManyMembers, err: errMemberNumber, }, + { + name: "percent-encoded octet sequences do not match the UTF-8 encoding scheme", + in: "k=aa%ffcc;p=d%fff", + want: baggage.List{ + "k": { + Value: "aa�cc", + Properties: []baggage.Property{ + {Key: "p", Value: "d�f", HasValue: true}, + }, + }, + }, + }, } for _, tc := range testcases { @@ -480,6 +493,53 @@ func TestBaggageParse(t *testing.T) { } } +func TestBaggageParseValue(t *testing.T) { + testcases := []struct { + name string + in string + valueWant string + valueWantSize int + }{ + { + name: "percent encoded octet sequence matches UTF-8 encoding scheme", + in: "k=aa%26cc", + valueWant: "aa&cc", + valueWantSize: 5, + }, + { + name: "percent encoded octet sequence doesn't match UTF-8 encoding scheme", + in: "k=aa%ffcc", + valueWant: "aa�cc", + valueWantSize: 7, + }, + { + name: "multiple percent encoded octet sequences don't match UTF-8 encoding scheme", + in: "k=aa%ffcc%fedd%fa", + valueWant: "aa�cc�dd�", + valueWantSize: 15, + }, + { + name: "raw value", + in: "k=aacc", + valueWant: "aacc", + valueWantSize: 4, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + b, err := Parse(tc.in) + assert.Empty(t, err) + + val := b.Members()[0].Value() + + assert.EqualValues(t, val, tc.valueWant) + assert.Equal(t, len(val), tc.valueWantSize) + assert.True(t, utf8.ValidString(val)) + }) + } +} + func TestBaggageString(t *testing.T) { testcases := []struct { name string @@ -979,7 +1039,7 @@ func BenchmarkParse(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { - benchBaggage, _ = Parse(`userId=alice,serverNode = DF28 , isProduction = false,hasProp=stuff;propKey;propWValue=value`) + benchBaggage, _ = Parse("userId=alice,serverNode = DF28 , isProduction = false,hasProp=stuff;propKey;propWValue=value, invalidUtf8=pr%ffo%ffp%fcValue") } }