1
0
mirror of https://github.com/open-telemetry/opentelemetry-go.git synced 2025-11-29 23:07:45 +02:00

baggage: Accept non-ASCII keys (#5132)

resolves #4946

I also add additional test cases to cover more lines.

benchmark results:

```
goos: darwin
goarch: arm64
pkg: go.opentelemetry.io/otel/baggage
                                 │   old.txt   │               new.txt               │
                                 │   sec/op    │   sec/op     vs base                │
New-10                             402.3n ± 1%   422.4n ± 6%   +4.98% (p=0.000 n=10)
NewMemberRaw-10                    10.82n ± 0%   13.90n ± 1%  +28.51% (p=0.000 n=10)
Parse-10                           803.8n ± 1%   795.0n ± 1%   -1.09% (p=0.011 n=10)
String-10                          682.6n ± 0%   610.0n ± 2%  -10.63% (p=0.000 n=10)
ValueEscape/nothing_to_escape-10   4.856n ± 0%   4.849n ± 0%        ~ (p=0.279 n=10)
ValueEscape/requires_escaping-10   22.47n ± 1%   22.36n ± 1%        ~ (p=0.342 n=10)
ValueEscape/long_value-10          513.3n ± 1%   510.1n ± 0%   -0.62% (p=0.006 n=10)
MemberString-10                    430.8n ± 2%   471.3n ± 2%   +9.41% (p=0.000 n=10)
geomean                            124.5n        128.5n        +3.22%

                                 │   old.txt    │               new.txt                │
                                 │     B/op     │    B/op     vs base                  │
New-10                             704.0 ± 0%     704.0 ± 0%        ~ (p=1.000 n=10) ¹
NewMemberRaw-10                    0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=10) ¹
Parse-10                           888.0 ± 0%     888.0 ± 0%        ~ (p=1.000 n=10) ¹
String-10                          936.0 ± 0%     840.0 ± 0%  -10.26% (p=0.000 n=10)
ValueEscape/nothing_to_escape-10   0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=10) ¹
ValueEscape/requires_escaping-10   16.00 ± 0%     16.00 ± 0%        ~ (p=1.000 n=10) ¹
ValueEscape/long_value-10          576.0 ± 0%     576.0 ± 0%        ~ (p=1.000 n=10) ¹
MemberString-10                    656.0 ± 0%     656.0 ± 0%        ~ (p=1.000 n=10) ¹
geomean                                       ²                -1.34%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean

                                 │    old.txt    │               new.txt                │
                                 │   allocs/op   │ allocs/op   vs base                  │
New-10                              8.000 ± 0%     8.000 ± 0%        ~ (p=1.000 n=10) ¹
NewMemberRaw-10                     0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=10) ¹
Parse-10                            9.000 ± 0%     9.000 ± 0%        ~ (p=1.000 n=10) ¹
String-10                          10.000 ± 0%     8.000 ± 0%  -20.00% (p=0.000 n=10)
ValueEscape/nothing_to_escape-10    0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=10) ¹
ValueEscape/requires_escaping-10    1.000 ± 0%     1.000 ± 0%        ~ (p=1.000 n=10) ¹
ValueEscape/long_value-10           2.000 ± 0%     2.000 ± 0%        ~ (p=1.000 n=10) ¹
MemberString-10                     4.000 ± 0%     4.000 ± 0%        ~ (p=1.000 n=10) ¹
geomean                                        ²                -2.75%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean
```

FYI, the old implementation of `NewMemberRaw` didn't verify the value,
so the benchmark result of `NewMemberRaw` is not an apple-to-apple
comparison of `utf8.ValidString` and `validateKey`.

---------

Co-authored-by: Chester Cheung <cheung.zhy.csu@gmail.com>
Co-authored-by: Robert Pająk <pellared@hotmail.com>
This commit is contained in:
Sam Xie
2024-08-15 16:50:34 -07:00
committed by GitHub
parent 1dc952215d
commit d61bbf18f5
3 changed files with 235 additions and 20 deletions

View File

@@ -26,6 +26,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
- `SimpleProcessor` in `go.opentelemetry.io/otel/sdk/log` synchronizes `OnEmit` calls. (#5666)
- The `SimpleProcessor` type in `go.opentelemetry.io/otel/sdk/log` is no longer comparable. (#5693)
- The `BatchProcessor` type in `go.opentelemetry.io/otel/sdk/log` is no longer comparable. (#5693)
- `NewMemberRaw`, `NewKeyProperty` and `NewKeyValuePropertyRaw` in `go.opentelemetry.io/otel/baggage` allow UTF-8 string in key. (#5132)
### Fixed

View File

@@ -44,9 +44,15 @@ type Property struct {
// NewKeyProperty returns a new Property for key.
//
// The passed key must be valid, non-empty UTF-8 string.
// If key is invalid, an error will be returned.
// However, the specific Propagators that are used to transmit baggage entries across
// component boundaries may impose their own restrictions on Property key.
// For example, the W3C Baggage specification restricts the Property keys to strings that
// satisfy the token definition from RFC7230, Section 3.2.6.
// For maximum compatibility, alpha-numeric value are strongly recommended to be used as Property key.
func NewKeyProperty(key string) (Property, error) {
if !validateKey(key) {
if !validateBaggageName(key) {
return newInvalidProperty(), fmt.Errorf("%w: %q", errInvalidKey, key)
}
@@ -62,6 +68,10 @@ func NewKeyProperty(key string) (Property, error) {
// Notice: Consider using [NewKeyValuePropertyRaw] instead
// that does not require percent-encoding of the value.
func NewKeyValueProperty(key, value string) (Property, error) {
if !validateKey(key) {
return newInvalidProperty(), fmt.Errorf("%w: %q", errInvalidKey, key)
}
if !validateValue(value) {
return newInvalidProperty(), fmt.Errorf("%w: %q", errInvalidValue, value)
}
@@ -74,11 +84,20 @@ func NewKeyValueProperty(key, value string) (Property, error) {
// NewKeyValuePropertyRaw returns a new Property for key with value.
//
// The passed key must be compliant with W3C Baggage specification.
// The passed key must be valid, non-empty UTF-8 string.
// The passed value must be valid UTF-8 string.
// However, the specific Propagators that are used to transmit baggage entries across
// component boundaries may impose their own restrictions on Property key.
// For example, the W3C Baggage specification restricts the Property keys to strings that
// satisfy the token definition from RFC7230, Section 3.2.6.
// For maximum compatibility, alpha-numeric value are strongly recommended to be used as Property key.
func NewKeyValuePropertyRaw(key, value string) (Property, error) {
if !validateKey(key) {
if !validateBaggageName(key) {
return newInvalidProperty(), fmt.Errorf("%w: %q", errInvalidKey, key)
}
if !validateBaggageValue(value) {
return newInvalidProperty(), fmt.Errorf("%w: %q", errInvalidValue, value)
}
p := Property{
key: key,
@@ -115,12 +134,15 @@ func (p Property) validate() error {
return fmt.Errorf("invalid property: %w", err)
}
if !validateKey(p.key) {
if !validateBaggageName(p.key) {
return errFunc(fmt.Errorf("%w: %q", errInvalidKey, p.key))
}
if !p.hasValue && p.value != "" {
return errFunc(errors.New("inconsistent value"))
}
if p.hasValue && !validateBaggageValue(p.value) {
return errFunc(fmt.Errorf("%w: %q", errInvalidValue, p.value))
}
return nil
}
@@ -138,7 +160,15 @@ func (p Property) Value() (string, bool) {
// String encodes Property into a header string compliant with the W3C Baggage
// specification.
// It would return empty string if the key is invalid with the W3C Baggage
// specification. This could happen for a UTF-8 key, as it may contain
// invalid characters.
func (p Property) String() string {
// W3C Baggage specification does not allow percent-encoded keys.
if !validateKey(p.key) {
return ""
}
if p.hasValue {
return fmt.Sprintf("%s%s%v", p.key, keyValueDelimiter, valueEscape(p.value))
}
@@ -203,9 +233,14 @@ func (p properties) validate() error {
// String encodes properties into a header string compliant with the W3C Baggage
// specification.
func (p properties) String() string {
props := make([]string, len(p))
for i, prop := range p {
props[i] = prop.String()
props := make([]string, 0, len(p))
for _, prop := range p {
s := prop.String()
// Ignored empty properties.
if s != "" {
props = append(props, s)
}
}
return strings.Join(props, propertyDelimiter)
}
@@ -230,6 +265,10 @@ type Member struct {
// Notice: Consider using [NewMemberRaw] instead
// that does not require percent-encoding of the value.
func NewMember(key, value string, props ...Property) (Member, error) {
if !validateKey(key) {
return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidKey, key)
}
if !validateValue(value) {
return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidValue, value)
}
@@ -242,7 +281,13 @@ func NewMember(key, value string, props ...Property) (Member, error) {
// NewMemberRaw returns a new Member from the passed arguments.
//
// The passed key must be compliant with W3C Baggage specification.
// The passed key must be valid, non-empty UTF-8 string.
// The passed value must be valid UTF-8 string.
// However, the specific Propagators that are used to transmit baggage entries across
// component boundaries may impose their own restrictions on baggage key.
// For example, the W3C Baggage specification restricts the baggage keys to strings that
// satisfy the token definition from RFC7230, Section 3.2.6.
// For maximum compatibility, alpha-numeric value are strongly recommended to be used as baggage key.
func NewMemberRaw(key, value string, props ...Property) (Member, error) {
m := Member{
key: key,
@@ -340,9 +385,12 @@ func (m Member) validate() error {
return fmt.Errorf("%w: %q", errInvalidMember, m)
}
if !validateKey(m.key) {
if !validateBaggageName(m.key) {
return fmt.Errorf("%w: %q", errInvalidKey, m.key)
}
if !validateBaggageValue(m.value) {
return fmt.Errorf("%w: %q", errInvalidValue, m.value)
}
return m.properties.validate()
}
@@ -357,10 +405,15 @@ func (m Member) Properties() []Property { return m.properties.Copy() }
// String encodes Member into a header string compliant with the W3C Baggage
// specification.
// It would return empty string if the key is invalid with the W3C Baggage
// specification. This could happen for a UTF-8 key, as it may contain
// invalid characters.
func (m Member) String() string {
// A key is just an ASCII string. A value is restricted to be
// US-ASCII characters excluding CTLs, whitespace,
// DQUOTE, comma, semicolon, and backslash.
// W3C Baggage specification does not allow percent-encoded keys.
if !validateKey(m.key) {
return ""
}
s := m.key + keyValueDelimiter + valueEscape(m.value)
if len(m.properties) > 0 {
s += propertyDelimiter + m.properties.String()
@@ -554,14 +607,22 @@ func (b Baggage) Len() int {
// String encodes Baggage into a header string compliant with the W3C Baggage
// specification.
// It would ignore members where the member key is invalid with the W3C Baggage
// specification. This could happen for a UTF-8 key, as it may contain
// invalid characters.
func (b Baggage) String() string {
members := make([]string, 0, len(b.list))
for k, v := range b.list {
members = append(members, Member{
s := Member{
key: k,
value: v.Value,
properties: fromInternalProperties(v.Properties),
}.String())
}.String()
// Ignored empty members.
if s != "" {
members = append(members, s)
}
}
return strings.Join(members, listDelimiter)
}
@@ -748,6 +809,24 @@ var safeKeyCharset = [utf8.RuneSelf]bool{
'~': true,
}
// validateBaggageName checks if the string is a valid OpenTelemetry Baggage name.
// Baggage name is a valid, non-empty UTF-8 string.
func validateBaggageName(s string) bool {
if len(s) == 0 {
return false
}
return utf8.ValidString(s)
}
// validateBaggageValue checks if the string is a valid OpenTelemetry Baggage value.
// Baggage value is a valid UTF-8 strings.
// Empty string is also a valid UTF-8 string.
func validateBaggageValue(s string) bool {
return utf8.ValidString(s)
}
// validateKey checks if the string is a valid W3C Baggage key.
func validateKey(s string) bool {
if len(s) == 0 {
return false
@@ -766,6 +845,7 @@ func validateKeyChar(c int32) bool {
return c >= 0 && c < int32(utf8.RuneSelf) && safeKeyCharset[c]
}
// validateValue checks if the string is a valid W3C Baggage value.
func validateValue(s string) bool {
for _, c := range s {
if !validateValueChar(c) {

View File

@@ -124,12 +124,22 @@ func TestParsePropertyError(t *testing.T) {
func TestNewKeyProperty(t *testing.T) {
p, err := NewKeyProperty(" ")
assert.ErrorIs(t, err, errInvalidKey)
assert.Equal(t, Property{}, p)
assert.NoError(t, err)
assert.Equal(t, Property{key: " "}, p)
p, err = NewKeyProperty("key")
assert.NoError(t, err)
assert.Equal(t, Property{key: "key"}, p)
// UTF-8 key
p, err = NewKeyProperty("B% 💼")
assert.NoError(t, err)
assert.Equal(t, Property{key: "B% 💼"}, p)
// Invalid UTF-8 key
p, err = NewKeyProperty(string([]byte{255}))
assert.ErrorIs(t, err, errInvalidKey)
assert.Equal(t, Property{}, p)
}
func TestNewKeyValueProperty(t *testing.T) {
@@ -141,19 +151,46 @@ func TestNewKeyValueProperty(t *testing.T) {
assert.ErrorIs(t, err, errInvalidValue)
assert.Equal(t, Property{}, p)
// it won't use percent decoding for key
p, err = NewKeyValueProperty("%zzzzz", "value")
assert.NoError(t, err)
assert.Equal(t, Property{key: "%zzzzz", value: "value", hasValue: true}, p)
// wrong value with wrong decoding
p, err = NewKeyValueProperty("key", "%zzzzz")
assert.ErrorIs(t, err, errInvalidValue)
assert.Equal(t, Property{}, p)
p, err = NewKeyValueProperty("key", "value")
assert.NoError(t, err)
assert.Equal(t, Property{key: "key", value: "value", hasValue: true}, p)
// Percent-encoded value
p, err = NewKeyValueProperty("key", "%C4%85%C5%9B%C4%87")
assert.NoError(t, err)
assert.Equal(t, Property{key: "key", value: "ąść", hasValue: true}, p)
}
func TestNewKeyValuePropertyRaw(t *testing.T) {
p, err := NewKeyValuePropertyRaw(" ", "")
// Empty key
p, err := NewKeyValuePropertyRaw("", " ")
assert.ErrorIs(t, err, errInvalidKey)
assert.Equal(t, Property{}, p)
p, err = NewKeyValuePropertyRaw("key", "Witaj Świecie!")
// Empty value
// Empty string is also a valid UTF-8 string.
p, err = NewKeyValuePropertyRaw(" ", "")
assert.NoError(t, err)
assert.Equal(t, Property{key: "key", value: "Witaj Świecie!", hasValue: true}, p)
assert.Equal(t, Property{key: " ", hasValue: true}, p)
// Space value
p, err = NewKeyValuePropertyRaw(" ", " ")
assert.NoError(t, err)
assert.Equal(t, Property{key: " ", value: " ", hasValue: true}, p)
p, err = NewKeyValuePropertyRaw("B% 💼", "Witaj Świecie!")
assert.NoError(t, err)
assert.Equal(t, Property{key: "B% 💼", value: "Witaj Świecie!", hasValue: true}, p)
}
func TestPropertyValidate(t *testing.T) {
@@ -168,6 +205,10 @@ func TestPropertyValidate(t *testing.T) {
p.hasValue = true
assert.NoError(t, p.validate())
// Invalid value
p.value = string([]byte{255})
assert.ErrorIs(t, p.validate(), errInvalidValue)
}
func TestNewEmptyBaggage(t *testing.T) {
@@ -410,6 +451,24 @@ func TestBaggageParse(t *testing.T) {
"foo": {Value: "ąść"},
},
},
{
name: "encoded UTF-8 string in key",
in: "a=b,%C4%85%C5%9B%C4%87=%C4%85%C5%9B%C4%87",
want: baggage.List{
"a": {Value: "b"},
"%C4%85%C5%9B%C4%87": {Value: "ąść"},
},
},
{
name: "encoded UTF-8 string in property",
in: "a=b,%C4%85%C5%9B%C4%87=%C4%85%C5%9B%C4%87;%C4%85%C5%9B%C4%87=%C4%85%C5%9B%C4%87",
want: baggage.List{
"a": {Value: "b"},
"%C4%85%C5%9B%C4%87": {Value: "ąść", Properties: []baggage.Property{
{Key: "%C4%85%C5%9B%C4%87", HasValue: true, Value: "ąść"},
}},
},
},
{
name: "invalid member: empty",
in: "foo=,,bar=",
@@ -455,6 +514,11 @@ func TestBaggageParse(t *testing.T) {
in: "foo=1;key=\\",
err: errInvalidProperty,
},
{
name: "invalid property: improper url encoded value",
in: "foo=1;key=val%",
err: errInvalidProperty,
},
{
name: "invalid baggage string: too large",
in: tooLarge,
@@ -664,6 +728,29 @@ func TestBaggageString(t *testing.T) {
},
},
},
{
// W3C does not allow percent-encoded keys.
// The baggage that has percent-encoded keys should be ignored.
name: "utf-8 key and value",
out: "foo=B%25%20%F0%9F%92%BC-2;foo-1=B%25%20%F0%9F%92%BC-4;foo-2",
baggage: baggage.List{
"ąść": {
Value: "B% 💼",
Properties: []baggage.Property{
{Key: "ąść-1", Value: "B% 💼-1", HasValue: true},
{Key: "ąść-2"},
},
},
"foo": {
Value: "B% 💼-2",
Properties: []baggage.Property{
{Key: "ąść", Value: "B% 💼-3", HasValue: true},
{Key: "foo-1", Value: "B% 💼-4", HasValue: true},
{Key: "foo-2"},
},
},
},
},
}
orderer := func(s string) string {
@@ -921,6 +1008,10 @@ func TestMemberValidation(t *testing.T) {
m.hasData = true
assert.ErrorIs(t, m.validate(), errInvalidKey)
// Invalid UTF-8 in value
m.key, m.value = "k", string([]byte{255})
assert.ErrorIs(t, m.validate(), errInvalidValue)
m.key, m.value = "k", "\\"
assert.NoError(t, m.validate())
}
@@ -942,6 +1033,24 @@ func TestNewMember(t *testing.T) {
}
assert.Equal(t, expected, m)
// it won't use percent decoding for key
key = "%3B"
m, err = NewMember(key, val, p)
assert.NoError(t, err)
expected = Member{
key: key,
value: val,
properties: properties{{key: "foo"}},
hasData: true,
}
assert.Equal(t, expected, m)
// wrong value with invalid token
key = "k"
val = ";"
_, err = NewMember(key, val, p)
assert.ErrorIs(t, err, errInvalidValue)
// wrong value with wrong decoding
val = "%zzzzz"
_, err = NewMember(key, val, p)
@@ -986,6 +1095,31 @@ func TestNewMemberRaw(t *testing.T) {
assert.Equal(t, expected, m)
}
func TestBaggageUTF8(t *testing.T) {
testCases := map[string]string{
"ąść": "B% 💼",
// Case sensitive
"a": "a",
"A": "A",
}
var members []Member
for k, v := range testCases {
m, err := NewMemberRaw(k, v)
require.NoError(t, err)
members = append(members, m)
}
b, err := New(members...)
require.NoError(t, err)
for k, v := range testCases {
assert.Equal(t, v, b.Member(k).Value())
}
}
func TestPropertiesValidate(t *testing.T) {
p := properties{{}}
assert.ErrorIs(t, p.validate(), errInvalidKey)
@@ -1053,7 +1187,7 @@ func BenchmarkString(b *testing.B) {
addMember("key1", "val1")
addMember("key2", " ;,%")
addMember("key3", "Witaj świecie!")
addMember("B% 💼", "Witaj świecie!")
addMember("key4", strings.Repeat("Hello world!", 10))
bg, err := New(members...)