You've already forked opentelemetry-go
mirror of
https://github.com/open-telemetry/opentelemetry-go.git
synced 2025-08-10 22:31:50 +02:00
baggage: Fix invalid percent-encoded octet sequences (#5528)
# Goal Replace the percent encoded octet sequence with the replacement code point (U+FFFD) when it doesn't match the UTF-8 encoding schema. Issue: https://github.com/open-telemetry/opentelemetry-go/issues/5519 Current behavior: ``` package main import ( "fmt" "log" "unicode/utf8" "go.opentelemetry.io/otel/baggage" ) func main() { kv := "k=aa%ffcc" b, err := baggage.Parse(kv) if err != nil { log.Fatal(err) } val := b.Members()[0].Value() fmt.Println(len(val)) # 5 fmt.Println(utf8.ValidString(val)) # false } ``` Expected behavior: ``` package main import ( "fmt" "log" "unicode/utf8" "go.opentelemetry.io/otel/baggage" ) func main() { kv := "k=aa%ffcc" b, err := baggage.Parse(kv) if err != nil { log.Fatal(err) } val := b.Members()[0].Value() fmt.Println(len(val)) # 7 fmt.Println(utf8.ValidString(val)) # true } ``` ## Benchmark - `go test -bench=BenchmarkParse -count 20 > old.txt` ``` goos: darwin goarch: arm64 pkg: go.opentelemetry.io/otel/baggage BenchmarkParse-10 1548118 774.3 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1547653 786.0 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1544949 770.5 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1558972 770.2 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1554973 774.7 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1550200 779.6 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1545100 774.3 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1549634 777.5 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1552530 769.6 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1536499 855.0 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1552244 770.4 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1560225 767.4 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1562738 772.3 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1556679 838.9 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1562500 777.1 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1530901 836.5 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1000000 1372 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1534678 780.3 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1366180 822.4 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1539852 796.8 ns/op 864 B/op 8 allocs/op PASS ok go.opentelemetry.io/otel/baggage 40.839s ``` - `go test -bench=BenchmarkParse -count 20 > new.txt` ``` goos: darwin goarch: arm64 pkg: go.opentelemetry.io/otel/baggage BenchmarkParse-10 1355893 886.6 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1349192 883.1 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1363053 880.4 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1372404 875.7 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1359979 880.7 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1360497 874.7 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1375520 870.2 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1375268 882.8 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1361998 964.8 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1373461 961.5 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1378065 872.6 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1377290 879.0 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1362094 885.6 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1352175 915.9 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1364914 887.9 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1355782 890.5 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1361848 1245 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1163396 878.8 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1370886 916.6 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1340149 1175 ns/op 888 B/op 9 allocs/op PASS ok go.opentelemetry.io/otel/baggage 44.347s ``` - `benchstat old.txt new.txt` ``` goos: darwin goarch: arm64 pkg: go.opentelemetry.io/otel/baggage │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ Parse-10 777.3n ± 3% 884.4n ± 4% +13.77% (p=0.000 n=20) │ old.txt │ new.txt │ │ B/op │ B/op vs base │ Parse-10 864.0 ± 0% 888.0 ± 0% +2.78% (p=0.000 n=20) │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ Parse-10 8.000 ± 0% 9.000 ± 0% +12.50% (p=0.000 n=20) ``` --------- Co-authored-by: Damien Mathieu <42@dmathieu.com> Co-authored-by: Robert Pająk <pellared@hotmail.com>
This commit is contained in:
@@ -49,6 +49,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
|
||||
- Fix stale timestamps reported by the last-value aggregation. (#5517)
|
||||
- Indicate the `Exporter` in `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp` must be created by the `New` method. (#5521)
|
||||
- Improved performance in all `{Bool,Int64,Float64,String}SliceValue` functions of `go.opentelemetry.io/attributes` by reducing the number of allocations. (#5549)
|
||||
- Replace invalid percent-encoded octet sequences with replacement char in `go.opentelemetry.io/otel/baggage`. (#5528)
|
||||
|
||||
## [1.27.0/0.49.0/0.3.0] 2024-05-21
|
||||
|
||||
|
@@ -294,19 +294,45 @@ func parseMember(member string) (Member, error) {
|
||||
return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidKey, key)
|
||||
}
|
||||
|
||||
val := strings.TrimSpace(v)
|
||||
if !validateValue(val) {
|
||||
rawVal := strings.TrimSpace(v)
|
||||
if !validateValue(rawVal) {
|
||||
return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidValue, v)
|
||||
}
|
||||
|
||||
// Decode a percent-encoded value.
|
||||
value, err := url.PathUnescape(val)
|
||||
unescapeVal, err := url.PathUnescape(rawVal)
|
||||
if err != nil {
|
||||
return newInvalidMember(), fmt.Errorf("%w: %w", errInvalidValue, err)
|
||||
}
|
||||
|
||||
value := replaceInvalidUTF8Sequences(len(rawVal), unescapeVal)
|
||||
return Member{key: key, value: value, properties: props, hasData: true}, nil
|
||||
}
|
||||
|
||||
// replaceInvalidUTF8Sequences replaces invalid UTF-8 sequences with '�'.
|
||||
func replaceInvalidUTF8Sequences(cap int, unescapeVal string) string {
|
||||
if utf8.ValidString(unescapeVal) {
|
||||
return unescapeVal
|
||||
}
|
||||
// W3C baggage spec:
|
||||
// https://github.com/w3c/baggage/blob/8c215efbeebd3fa4b1aceb937a747e56444f22f3/baggage/HTTP_HEADER_FORMAT.md?plain=1#L69
|
||||
|
||||
var b strings.Builder
|
||||
b.Grow(cap)
|
||||
for i := 0; i < len(unescapeVal); {
|
||||
r, size := utf8.DecodeRuneInString(unescapeVal[i:])
|
||||
if r == utf8.RuneError && size == 1 {
|
||||
// Invalid UTF-8 sequence found, replace it with '�'
|
||||
_, _ = b.WriteString("�")
|
||||
} else {
|
||||
_, _ = b.WriteRune(r)
|
||||
}
|
||||
i += size
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// validate ensures m conforms to the W3C Baggage specification.
|
||||
// A key must be an ASCII string, returning an error otherwise.
|
||||
func (m Member) validate() error {
|
||||
@@ -607,10 +633,12 @@ func parsePropertyInternal(s string) (p Property, ok bool) {
|
||||
}
|
||||
|
||||
// Decode a percent-encoded value.
|
||||
value, err := url.PathUnescape(s[valueStart:valueEnd])
|
||||
rawVal := s[valueStart:valueEnd]
|
||||
unescapeVal, err := url.PathUnescape(rawVal)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
value := replaceInvalidUTF8Sequences(len(rawVal), unescapeVal)
|
||||
|
||||
ok = true
|
||||
p.key = s[keyStart:keyEnd]
|
||||
|
@@ -9,6 +9,7 @@ import (
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -469,6 +470,18 @@ func TestBaggageParse(t *testing.T) {
|
||||
in: tooManyMembers,
|
||||
err: errMemberNumber,
|
||||
},
|
||||
{
|
||||
name: "percent-encoded octet sequences do not match the UTF-8 encoding scheme",
|
||||
in: "k=aa%ffcc;p=d%fff",
|
||||
want: baggage.List{
|
||||
"k": {
|
||||
Value: "aa�cc",
|
||||
Properties: []baggage.Property{
|
||||
{Key: "p", Value: "d�f", HasValue: true},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testcases {
|
||||
@@ -480,6 +493,53 @@ func TestBaggageParse(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBaggageParseValue(t *testing.T) {
|
||||
testcases := []struct {
|
||||
name string
|
||||
in string
|
||||
valueWant string
|
||||
valueWantSize int
|
||||
}{
|
||||
{
|
||||
name: "percent encoded octet sequence matches UTF-8 encoding scheme",
|
||||
in: "k=aa%26cc",
|
||||
valueWant: "aa&cc",
|
||||
valueWantSize: 5,
|
||||
},
|
||||
{
|
||||
name: "percent encoded octet sequence doesn't match UTF-8 encoding scheme",
|
||||
in: "k=aa%ffcc",
|
||||
valueWant: "aa�cc",
|
||||
valueWantSize: 7,
|
||||
},
|
||||
{
|
||||
name: "multiple percent encoded octet sequences don't match UTF-8 encoding scheme",
|
||||
in: "k=aa%ffcc%fedd%fa",
|
||||
valueWant: "aa�cc�dd�",
|
||||
valueWantSize: 15,
|
||||
},
|
||||
{
|
||||
name: "raw value",
|
||||
in: "k=aacc",
|
||||
valueWant: "aacc",
|
||||
valueWantSize: 4,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testcases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
b, err := Parse(tc.in)
|
||||
assert.Empty(t, err)
|
||||
|
||||
val := b.Members()[0].Value()
|
||||
|
||||
assert.EqualValues(t, val, tc.valueWant)
|
||||
assert.Equal(t, len(val), tc.valueWantSize)
|
||||
assert.True(t, utf8.ValidString(val))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBaggageString(t *testing.T) {
|
||||
testcases := []struct {
|
||||
name string
|
||||
@@ -979,7 +1039,7 @@ func BenchmarkParse(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
benchBaggage, _ = Parse(`userId=alice,serverNode = DF28 , isProduction = false,hasProp=stuff;propKey;propWValue=value`)
|
||||
benchBaggage, _ = Parse("userId=alice,serverNode = DF28 , isProduction = false,hasProp=stuff;propKey;propWValue=value, invalidUtf8=pr%ffo%ffp%fcValue")
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user