You've already forked opentelemetry-go
							
							
				mirror of
				https://github.com/open-telemetry/opentelemetry-go.git
				synced 2025-10-31 00:07:40 +02:00 
			
		
		
		
	baggage: Fix invalid percent-encoded octet sequences (#5528)
# Goal Replace the percent encoded octet sequence with the replacement code point (U+FFFD) when it doesn't match the UTF-8 encoding schema. Issue: https://github.com/open-telemetry/opentelemetry-go/issues/5519 Current behavior: ``` package main import ( "fmt" "log" "unicode/utf8" "go.opentelemetry.io/otel/baggage" ) func main() { kv := "k=aa%ffcc" b, err := baggage.Parse(kv) if err != nil { log.Fatal(err) } val := b.Members()[0].Value() fmt.Println(len(val)) # 5 fmt.Println(utf8.ValidString(val)) # false } ``` Expected behavior: ``` package main import ( "fmt" "log" "unicode/utf8" "go.opentelemetry.io/otel/baggage" ) func main() { kv := "k=aa%ffcc" b, err := baggage.Parse(kv) if err != nil { log.Fatal(err) } val := b.Members()[0].Value() fmt.Println(len(val)) # 7 fmt.Println(utf8.ValidString(val)) # true } ``` ## Benchmark - `go test -bench=BenchmarkParse -count 20 > old.txt` ``` goos: darwin goarch: arm64 pkg: go.opentelemetry.io/otel/baggage BenchmarkParse-10 1548118 774.3 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1547653 786.0 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1544949 770.5 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1558972 770.2 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1554973 774.7 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1550200 779.6 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1545100 774.3 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1549634 777.5 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1552530 769.6 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1536499 855.0 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1552244 770.4 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1560225 767.4 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1562738 772.3 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1556679 838.9 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1562500 777.1 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1530901 836.5 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1000000 1372 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1534678 780.3 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1366180 822.4 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1539852 796.8 ns/op 864 B/op 8 allocs/op PASS ok go.opentelemetry.io/otel/baggage 40.839s ``` - `go test -bench=BenchmarkParse -count 20 > new.txt` ``` goos: darwin goarch: arm64 pkg: go.opentelemetry.io/otel/baggage BenchmarkParse-10 1355893 886.6 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1349192 883.1 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1363053 880.4 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1372404 875.7 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1359979 880.7 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1360497 874.7 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1375520 870.2 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1375268 882.8 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1361998 964.8 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1373461 961.5 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1378065 872.6 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1377290 879.0 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1362094 885.6 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1352175 915.9 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1364914 887.9 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1355782 890.5 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1361848 1245 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1163396 878.8 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1370886 916.6 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1340149 1175 ns/op 888 B/op 9 allocs/op PASS ok go.opentelemetry.io/otel/baggage 44.347s ``` - `benchstat old.txt new.txt` ``` goos: darwin goarch: arm64 pkg: go.opentelemetry.io/otel/baggage │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ Parse-10 777.3n ± 3% 884.4n ± 4% +13.77% (p=0.000 n=20) │ old.txt │ new.txt │ │ B/op │ B/op vs base │ Parse-10 864.0 ± 0% 888.0 ± 0% +2.78% (p=0.000 n=20) │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ Parse-10 8.000 ± 0% 9.000 ± 0% +12.50% (p=0.000 n=20) ``` --------- Co-authored-by: Damien Mathieu <42@dmathieu.com> Co-authored-by: Robert Pająk <pellared@hotmail.com>
This commit is contained in:
		| @@ -49,6 +49,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm | ||||
| - Fix stale timestamps reported by the last-value aggregation. (#5517) | ||||
| - Indicate the `Exporter` in `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp` must be created by the `New` method. (#5521) | ||||
| - Improved performance in all `{Bool,Int64,Float64,String}SliceValue` functions of `go.opentelemetry.io/attributes` by reducing the number of allocations. (#5549) | ||||
| - Replace invalid percent-encoded octet sequences with replacement char in `go.opentelemetry.io/otel/baggage`. (#5528) | ||||
|  | ||||
| ## [1.27.0/0.49.0/0.3.0] 2024-05-21 | ||||
|  | ||||
|   | ||||
| @@ -294,19 +294,45 @@ func parseMember(member string) (Member, error) { | ||||
| 		return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidKey, key) | ||||
| 	} | ||||
|  | ||||
| 	val := strings.TrimSpace(v) | ||||
| 	if !validateValue(val) { | ||||
| 	rawVal := strings.TrimSpace(v) | ||||
| 	if !validateValue(rawVal) { | ||||
| 		return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidValue, v) | ||||
| 	} | ||||
|  | ||||
| 	// Decode a percent-encoded value. | ||||
| 	value, err := url.PathUnescape(val) | ||||
| 	unescapeVal, err := url.PathUnescape(rawVal) | ||||
| 	if err != nil { | ||||
| 		return newInvalidMember(), fmt.Errorf("%w: %w", errInvalidValue, err) | ||||
| 	} | ||||
|  | ||||
| 	value := replaceInvalidUTF8Sequences(len(rawVal), unescapeVal) | ||||
| 	return Member{key: key, value: value, properties: props, hasData: true}, nil | ||||
| } | ||||
|  | ||||
| // replaceInvalidUTF8Sequences replaces invalid UTF-8 sequences with '�'. | ||||
| func replaceInvalidUTF8Sequences(cap int, unescapeVal string) string { | ||||
| 	if utf8.ValidString(unescapeVal) { | ||||
| 		return unescapeVal | ||||
| 	} | ||||
| 	// W3C baggage spec: | ||||
| 	// https://github.com/w3c/baggage/blob/8c215efbeebd3fa4b1aceb937a747e56444f22f3/baggage/HTTP_HEADER_FORMAT.md?plain=1#L69 | ||||
|  | ||||
| 	var b strings.Builder | ||||
| 	b.Grow(cap) | ||||
| 	for i := 0; i < len(unescapeVal); { | ||||
| 		r, size := utf8.DecodeRuneInString(unescapeVal[i:]) | ||||
| 		if r == utf8.RuneError && size == 1 { | ||||
| 			// Invalid UTF-8 sequence found, replace it with '�' | ||||
| 			_, _ = b.WriteString("�") | ||||
| 		} else { | ||||
| 			_, _ = b.WriteRune(r) | ||||
| 		} | ||||
| 		i += size | ||||
| 	} | ||||
|  | ||||
| 	return b.String() | ||||
| } | ||||
|  | ||||
| // validate ensures m conforms to the W3C Baggage specification. | ||||
| // A key must be an ASCII string, returning an error otherwise. | ||||
| func (m Member) validate() error { | ||||
| @@ -607,10 +633,12 @@ func parsePropertyInternal(s string) (p Property, ok bool) { | ||||
| 	} | ||||
|  | ||||
| 	// Decode a percent-encoded value. | ||||
| 	value, err := url.PathUnescape(s[valueStart:valueEnd]) | ||||
| 	rawVal := s[valueStart:valueEnd] | ||||
| 	unescapeVal, err := url.PathUnescape(rawVal) | ||||
| 	if err != nil { | ||||
| 		return | ||||
| 	} | ||||
| 	value := replaceInvalidUTF8Sequences(len(rawVal), unescapeVal) | ||||
|  | ||||
| 	ok = true | ||||
| 	p.key = s[keyStart:keyEnd] | ||||
|   | ||||
| @@ -9,6 +9,7 @@ import ( | ||||
| 	"slices" | ||||
| 	"strings" | ||||
| 	"testing" | ||||
| 	"unicode/utf8" | ||||
|  | ||||
| 	"github.com/stretchr/testify/assert" | ||||
| 	"github.com/stretchr/testify/require" | ||||
| @@ -469,6 +470,18 @@ func TestBaggageParse(t *testing.T) { | ||||
| 			in:   tooManyMembers, | ||||
| 			err:  errMemberNumber, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name: "percent-encoded octet sequences do not match the UTF-8 encoding scheme", | ||||
| 			in:   "k=aa%ffcc;p=d%fff", | ||||
| 			want: baggage.List{ | ||||
| 				"k": { | ||||
| 					Value: "aa�cc", | ||||
| 					Properties: []baggage.Property{ | ||||
| 						{Key: "p", Value: "d�f", HasValue: true}, | ||||
| 					}, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	for _, tc := range testcases { | ||||
| @@ -480,6 +493,53 @@ func TestBaggageParse(t *testing.T) { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestBaggageParseValue(t *testing.T) { | ||||
| 	testcases := []struct { | ||||
| 		name          string | ||||
| 		in            string | ||||
| 		valueWant     string | ||||
| 		valueWantSize int | ||||
| 	}{ | ||||
| 		{ | ||||
| 			name:          "percent encoded octet sequence matches UTF-8 encoding scheme", | ||||
| 			in:            "k=aa%26cc", | ||||
| 			valueWant:     "aa&cc", | ||||
| 			valueWantSize: 5, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:          "percent encoded octet sequence doesn't match UTF-8 encoding scheme", | ||||
| 			in:            "k=aa%ffcc", | ||||
| 			valueWant:     "aa�cc", | ||||
| 			valueWantSize: 7, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:          "multiple percent encoded octet sequences don't match UTF-8 encoding scheme", | ||||
| 			in:            "k=aa%ffcc%fedd%fa", | ||||
| 			valueWant:     "aa�cc�dd�", | ||||
| 			valueWantSize: 15, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:          "raw value", | ||||
| 			in:            "k=aacc", | ||||
| 			valueWant:     "aacc", | ||||
| 			valueWantSize: 4, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	for _, tc := range testcases { | ||||
| 		t.Run(tc.name, func(t *testing.T) { | ||||
| 			b, err := Parse(tc.in) | ||||
| 			assert.Empty(t, err) | ||||
|  | ||||
| 			val := b.Members()[0].Value() | ||||
|  | ||||
| 			assert.EqualValues(t, val, tc.valueWant) | ||||
| 			assert.Equal(t, len(val), tc.valueWantSize) | ||||
| 			assert.True(t, utf8.ValidString(val)) | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestBaggageString(t *testing.T) { | ||||
| 	testcases := []struct { | ||||
| 		name    string | ||||
| @@ -979,7 +1039,7 @@ func BenchmarkParse(b *testing.B) { | ||||
| 	b.ReportAllocs() | ||||
|  | ||||
| 	for i := 0; i < b.N; i++ { | ||||
| 		benchBaggage, _ = Parse(`userId=alice,serverNode = DF28 , isProduction = false,hasProp=stuff;propKey;propWValue=value`) | ||||
| 		benchBaggage, _ = Parse("userId=alice,serverNode = DF28 , isProduction = false,hasProp=stuff;propKey;propWValue=value, invalidUtf8=pr%ffo%ffp%fcValue") | ||||
| 	} | ||||
| } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user