1
0
mirror of https://github.com/open-telemetry/opentelemetry-go.git synced 2025-04-21 11:57:04 +02:00

418 lines
9.6 KiB
Go
Raw Normal View History

// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
package trace
import (
"bytes"
"context"
"fmt"
"testing"
"github.com/stretchr/testify/assert"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/trace"
)
func TestSetStatus(t *testing.T) {
tests := []struct {
name string
span recordingSpan
code codes.Code
description string
expected Status
}{
{
"Error and description should overwrite Unset",
recordingSpan{},
codes.Error,
"description",
Status{Code: codes.Error, Description: "description"},
},
{
"Ok should overwrite Unset and ignore description",
recordingSpan{},
codes.Ok,
"description",
Status{Code: codes.Ok},
},
{
"Error and description should return error and overwrite description",
recordingSpan{status: Status{Code: codes.Error, Description: "d1"}},
codes.Error,
"d2",
Status{Code: codes.Error, Description: "d2"},
},
{
"Ok should overwrite error and remove description",
recordingSpan{status: Status{Code: codes.Error, Description: "d1"}},
codes.Ok,
"d2",
Status{Code: codes.Ok},
},
{
"Error and description should be ignored when already Ok",
recordingSpan{status: Status{Code: codes.Ok}},
codes.Error,
"d2",
Status{Code: codes.Ok},
},
{
"Ok should be noop when already Ok",
recordingSpan{status: Status{Code: codes.Ok}},
codes.Ok,
"d2",
Status{Code: codes.Ok},
},
{
"Unset should be noop when already Ok",
recordingSpan{status: Status{Code: codes.Ok}},
codes.Unset,
"d2",
Status{Code: codes.Ok},
},
{
"Unset should be noop when already Error",
recordingSpan{status: Status{Code: codes.Error, Description: "d1"}},
codes.Unset,
"d2",
Status{Code: codes.Error, Description: "d1"},
},
}
for i := range tests {
tc := &tests[i]
t.Run(tc.name, func(t *testing.T) {
tc.span.SetStatus(tc.code, tc.description)
assert.Equal(t, tc.expected, tc.span.status)
})
}
}
func TestTruncateAttr(t *testing.T) {
const key = "key"
strAttr := attribute.String(key, "value")
strSliceAttr := attribute.StringSlice(key, []string{"value-0", "value-1"})
tests := []struct {
limit int
attr, want attribute.KeyValue
}{
{
limit: -1,
attr: strAttr,
want: strAttr,
},
{
limit: -1,
attr: strSliceAttr,
want: strSliceAttr,
},
{
limit: 0,
attr: attribute.Bool(key, true),
want: attribute.Bool(key, true),
},
{
limit: 0,
attr: attribute.BoolSlice(key, []bool{true, false}),
want: attribute.BoolSlice(key, []bool{true, false}),
},
{
limit: 0,
attr: attribute.Int(key, 42),
want: attribute.Int(key, 42),
},
{
limit: 0,
attr: attribute.IntSlice(key, []int{42, -1}),
want: attribute.IntSlice(key, []int{42, -1}),
},
{
limit: 0,
attr: attribute.Int64(key, 42),
want: attribute.Int64(key, 42),
},
{
limit: 0,
attr: attribute.Int64Slice(key, []int64{42, -1}),
want: attribute.Int64Slice(key, []int64{42, -1}),
},
{
limit: 0,
attr: attribute.Float64(key, 42),
want: attribute.Float64(key, 42),
},
{
limit: 0,
attr: attribute.Float64Slice(key, []float64{42, -1}),
want: attribute.Float64Slice(key, []float64{42, -1}),
},
{
limit: 0,
attr: strAttr,
want: attribute.String(key, ""),
},
{
limit: 0,
attr: strSliceAttr,
want: attribute.StringSlice(key, []string{"", ""}),
},
{
limit: 0,
attr: attribute.Stringer(key, bytes.NewBufferString("value")),
want: attribute.String(key, ""),
},
{
limit: 1,
attr: strAttr,
want: attribute.String(key, "v"),
},
{
limit: 1,
attr: strSliceAttr,
want: attribute.StringSlice(key, []string{"v", "v"}),
},
{
limit: 5,
attr: strAttr,
want: strAttr,
},
{
limit: 7,
attr: strSliceAttr,
want: strSliceAttr,
},
{
limit: 6,
attr: attribute.StringSlice(key, []string{"value", "value-1"}),
want: attribute.StringSlice(key, []string{"value", "value-"}),
},
{
limit: 128,
attr: strAttr,
want: strAttr,
},
{
limit: 128,
attr: strSliceAttr,
want: strSliceAttr,
},
Fix attribute value truncation (#5997) Fix #5996 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/trace cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 1.2300n ± 7% 0.8757n ± 3% -28.80% (p=0.000 n=10) Truncate/Zero-8 2.341n ± 2% 1.550n ± 9% -33.77% (p=0.000 n=10) Truncate/Short-8 31.6800n ± 3% 0.9960n ± 4% -96.86% (p=0.000 n=10) Truncate/ASCII-8 8.821n ± 1% 3.567n ± 3% -59.57% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.960n ± 1% 7.163n ± 1% -40.10% (p=0.000 n=10) Truncate/InvalidUTF-8-8 56.35n ± 0% 37.34n ± 18% -33.74% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.83n ± 1% 50.00n ± 1% -38.90% (p=0.000 n=10) geomean 12.37n 4.865n -60.68% │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ``` #### Values shorter than limit This is the default code path. Most attribute values will be shorter than the default 128 limit that users will not modify. The current code, `safeTruncate` requires a full iteration of the value to determine it is valid and under the limit. The replacement, `truncate`, first checks if the number of bytes in the value are less than or equal to the limit (which guarantees the number of characters are less than or equal to the limit) and returns immediately. This will mean that invalid encoding less than the limit is not changed, which meets the specification requirements. #### Values longer than the limit For values who's number of bytes exceeds the limit, they are iterated only once with the replacement, `truncate`. In comparison, the current code, `safeTruncate`, can iterate the string up to three separate times when the string contains invalid characters.
2024-11-26 00:41:55 -08:00
}
for _, test := range tests {
name := fmt.Sprintf("%s->%s(limit:%d)", test.attr.Key, test.attr.Value.Emit(), test.limit)
t.Run(name, func(t *testing.T) {
assert.Equal(t, test.want, truncateAttr(test.limit, test.attr))
})
}
}
func TestTruncate(t *testing.T) {
type group struct {
limit int
input string
expected string
}
tests := []struct {
name string
groups []group
}{
// Edge case: limit is negative, no truncation should occur
{
name: "NoTruncation",
groups: []group{
{-1, "No truncation!", "No truncation!"},
},
},
// Edge case: string is already shorter than the limit, no truncation
// should occur
{
Fix attribute value truncation (#5997) Fix #5996 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/trace cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 1.2300n ± 7% 0.8757n ± 3% -28.80% (p=0.000 n=10) Truncate/Zero-8 2.341n ± 2% 1.550n ± 9% -33.77% (p=0.000 n=10) Truncate/Short-8 31.6800n ± 3% 0.9960n ± 4% -96.86% (p=0.000 n=10) Truncate/ASCII-8 8.821n ± 1% 3.567n ± 3% -59.57% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.960n ± 1% 7.163n ± 1% -40.10% (p=0.000 n=10) Truncate/InvalidUTF-8-8 56.35n ± 0% 37.34n ± 18% -33.74% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.83n ± 1% 50.00n ± 1% -38.90% (p=0.000 n=10) geomean 12.37n 4.865n -60.68% │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ``` #### Values shorter than limit This is the default code path. Most attribute values will be shorter than the default 128 limit that users will not modify. The current code, `safeTruncate` requires a full iteration of the value to determine it is valid and under the limit. The replacement, `truncate`, first checks if the number of bytes in the value are less than or equal to the limit (which guarantees the number of characters are less than or equal to the limit) and returns immediately. This will mean that invalid encoding less than the limit is not changed, which meets the specification requirements. #### Values longer than the limit For values who's number of bytes exceeds the limit, they are iterated only once with the replacement, `truncate`. In comparison, the current code, `safeTruncate`, can iterate the string up to three separate times when the string contains invalid characters.
2024-11-26 00:41:55 -08:00
name: "ShortText",
groups: []group{
{10, "Short text", "Short text"},
{15, "Short text", "Short text"},
{100, "Short text", "Short text"},
},
},
Fix attribute value truncation (#5997) Fix #5996 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/trace cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 1.2300n ± 7% 0.8757n ± 3% -28.80% (p=0.000 n=10) Truncate/Zero-8 2.341n ± 2% 1.550n ± 9% -33.77% (p=0.000 n=10) Truncate/Short-8 31.6800n ± 3% 0.9960n ± 4% -96.86% (p=0.000 n=10) Truncate/ASCII-8 8.821n ± 1% 3.567n ± 3% -59.57% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.960n ± 1% 7.163n ± 1% -40.10% (p=0.000 n=10) Truncate/InvalidUTF-8-8 56.35n ± 0% 37.34n ± 18% -33.74% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.83n ± 1% 50.00n ± 1% -38.90% (p=0.000 n=10) geomean 12.37n 4.865n -60.68% │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ``` #### Values shorter than limit This is the default code path. Most attribute values will be shorter than the default 128 limit that users will not modify. The current code, `safeTruncate` requires a full iteration of the value to determine it is valid and under the limit. The replacement, `truncate`, first checks if the number of bytes in the value are less than or equal to the limit (which guarantees the number of characters are less than or equal to the limit) and returns immediately. This will mean that invalid encoding less than the limit is not changed, which meets the specification requirements. #### Values longer than the limit For values who's number of bytes exceeds the limit, they are iterated only once with the replacement, `truncate`. In comparison, the current code, `safeTruncate`, can iterate the string up to three separate times when the string contains invalid characters.
2024-11-26 00:41:55 -08:00
// Edge case: truncation happens with ASCII characters only
{
Fix attribute value truncation (#5997) Fix #5996 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/trace cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 1.2300n ± 7% 0.8757n ± 3% -28.80% (p=0.000 n=10) Truncate/Zero-8 2.341n ± 2% 1.550n ± 9% -33.77% (p=0.000 n=10) Truncate/Short-8 31.6800n ± 3% 0.9960n ± 4% -96.86% (p=0.000 n=10) Truncate/ASCII-8 8.821n ± 1% 3.567n ± 3% -59.57% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.960n ± 1% 7.163n ± 1% -40.10% (p=0.000 n=10) Truncate/InvalidUTF-8-8 56.35n ± 0% 37.34n ± 18% -33.74% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.83n ± 1% 50.00n ± 1% -38.90% (p=0.000 n=10) geomean 12.37n 4.865n -60.68% │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ``` #### Values shorter than limit This is the default code path. Most attribute values will be shorter than the default 128 limit that users will not modify. The current code, `safeTruncate` requires a full iteration of the value to determine it is valid and under the limit. The replacement, `truncate`, first checks if the number of bytes in the value are less than or equal to the limit (which guarantees the number of characters are less than or equal to the limit) and returns immediately. This will mean that invalid encoding less than the limit is not changed, which meets the specification requirements. #### Values longer than the limit For values who's number of bytes exceeds the limit, they are iterated only once with the replacement, `truncate`. In comparison, the current code, `safeTruncate`, can iterate the string up to three separate times when the string contains invalid characters.
2024-11-26 00:41:55 -08:00
name: "ASCIIOnly",
groups: []group{
{1, "Hello World!", "H"},
{5, "Hello World!", "Hello"},
{12, "Hello World!", "Hello World!"},
},
},
Fix attribute value truncation (#5997) Fix #5996 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/trace cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 1.2300n ± 7% 0.8757n ± 3% -28.80% (p=0.000 n=10) Truncate/Zero-8 2.341n ± 2% 1.550n ± 9% -33.77% (p=0.000 n=10) Truncate/Short-8 31.6800n ± 3% 0.9960n ± 4% -96.86% (p=0.000 n=10) Truncate/ASCII-8 8.821n ± 1% 3.567n ± 3% -59.57% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.960n ± 1% 7.163n ± 1% -40.10% (p=0.000 n=10) Truncate/InvalidUTF-8-8 56.35n ± 0% 37.34n ± 18% -33.74% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.83n ± 1% 50.00n ± 1% -38.90% (p=0.000 n=10) geomean 12.37n 4.865n -60.68% │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ``` #### Values shorter than limit This is the default code path. Most attribute values will be shorter than the default 128 limit that users will not modify. The current code, `safeTruncate` requires a full iteration of the value to determine it is valid and under the limit. The replacement, `truncate`, first checks if the number of bytes in the value are less than or equal to the limit (which guarantees the number of characters are less than or equal to the limit) and returns immediately. This will mean that invalid encoding less than the limit is not changed, which meets the specification requirements. #### Values longer than the limit For values who's number of bytes exceeds the limit, they are iterated only once with the replacement, `truncate`. In comparison, the current code, `safeTruncate`, can iterate the string up to three separate times when the string contains invalid characters.
2024-11-26 00:41:55 -08:00
// Truncation including multi-byte characters (UTF-8)
{
Fix attribute value truncation (#5997) Fix #5996 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/trace cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 1.2300n ± 7% 0.8757n ± 3% -28.80% (p=0.000 n=10) Truncate/Zero-8 2.341n ± 2% 1.550n ± 9% -33.77% (p=0.000 n=10) Truncate/Short-8 31.6800n ± 3% 0.9960n ± 4% -96.86% (p=0.000 n=10) Truncate/ASCII-8 8.821n ± 1% 3.567n ± 3% -59.57% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.960n ± 1% 7.163n ± 1% -40.10% (p=0.000 n=10) Truncate/InvalidUTF-8-8 56.35n ± 0% 37.34n ± 18% -33.74% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.83n ± 1% 50.00n ± 1% -38.90% (p=0.000 n=10) geomean 12.37n 4.865n -60.68% │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ``` #### Values shorter than limit This is the default code path. Most attribute values will be shorter than the default 128 limit that users will not modify. The current code, `safeTruncate` requires a full iteration of the value to determine it is valid and under the limit. The replacement, `truncate`, first checks if the number of bytes in the value are less than or equal to the limit (which guarantees the number of characters are less than or equal to the limit) and returns immediately. This will mean that invalid encoding less than the limit is not changed, which meets the specification requirements. #### Values longer than the limit For values who's number of bytes exceeds the limit, they are iterated only once with the replacement, `truncate`. In comparison, the current code, `safeTruncate`, can iterate the string up to three separate times when the string contains invalid characters.
2024-11-26 00:41:55 -08:00
name: "ValidUTF-8",
groups: []group{
{7, "Hello, 世界", "Hello, "},
{8, "Hello, 世界", "Hello, 世"},
{2, "こんにちは", "こん"},
{3, "こんにちは", "こんに"},
{5, "こんにちは", "こんにちは"},
{12, "こんにちは", "こんにちは"},
},
},
// Truncation with invalid UTF-8 characters
{
name: "InvalidUTF-8",
groups: []group{
{11, "Invalid\x80text", "Invalidtext"},
// Do not modify invalid text if equal to limit.
{11, "Valid text\x80", "Valid text\x80"},
// Do not modify invalid text if under limit.
{15, "Valid text\x80", "Valid text\x80"},
{5, "Hello\x80World", "Hello"},
{11, "Hello\x80World\x80!", "HelloWorld!"},
{15, "Hello\x80World\x80Test", "HelloWorldTest"},
{15, "Hello\x80\x80\x80World\x80Test", "HelloWorldTest"},
{15, "\x80\x80\x80Hello\x80\x80\x80World\x80Test\x80\x80", "HelloWorldTest"},
},
},
// Truncation with mixed validn and invalid UTF-8 characters
{
name: "MixedUTF-8",
groups: []group{
{6, "€"[0:2] + "hello€€", "hello€"},
{6, "€" + "€"[0:2] + "hello", "€hello"},
{11, "Valid text\x80📜", "Valid text📜"},
{11, "Valid text📜\x80", "Valid text📜"},
{14, "😊 Hello\x80World🌍🚀", "😊 HelloWorld🌍🚀"},
{14, "😊\x80 Hello\x80World🌍🚀", "😊 HelloWorld🌍🚀"},
{14, "😊\x80 Hello\x80World🌍\x80🚀", "😊 HelloWorld🌍🚀"},
{14, "😊\x80 Hello\x80World🌍\x80🚀\x80", "😊 HelloWorld🌍🚀"},
{14, "\x80😊\x80 Hello\x80World🌍\x80🚀\x80", "😊 HelloWorld🌍🚀"},
},
},
// Edge case: empty string, should return empty string
{
name: "Empty",
groups: []group{
{5, "", ""},
},
},
// Edge case: limit is 0, should return an empty string
{
name: "Zero",
groups: []group{
{0, "Some text", ""},
{0, "", ""},
},
},
}
Fix attribute value truncation (#5997) Fix #5996 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/trace cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 1.2300n ± 7% 0.8757n ± 3% -28.80% (p=0.000 n=10) Truncate/Zero-8 2.341n ± 2% 1.550n ± 9% -33.77% (p=0.000 n=10) Truncate/Short-8 31.6800n ± 3% 0.9960n ± 4% -96.86% (p=0.000 n=10) Truncate/ASCII-8 8.821n ± 1% 3.567n ± 3% -59.57% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.960n ± 1% 7.163n ± 1% -40.10% (p=0.000 n=10) Truncate/InvalidUTF-8-8 56.35n ± 0% 37.34n ± 18% -33.74% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.83n ± 1% 50.00n ± 1% -38.90% (p=0.000 n=10) geomean 12.37n 4.865n -60.68% │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ``` #### Values shorter than limit This is the default code path. Most attribute values will be shorter than the default 128 limit that users will not modify. The current code, `safeTruncate` requires a full iteration of the value to determine it is valid and under the limit. The replacement, `truncate`, first checks if the number of bytes in the value are less than or equal to the limit (which guarantees the number of characters are less than or equal to the limit) and returns immediately. This will mean that invalid encoding less than the limit is not changed, which meets the specification requirements. #### Values longer than the limit For values who's number of bytes exceeds the limit, they are iterated only once with the replacement, `truncate`. In comparison, the current code, `safeTruncate`, can iterate the string up to three separate times when the string contains invalid characters.
2024-11-26 00:41:55 -08:00
for _, tt := range tests {
for _, g := range tt.groups {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
got := truncate(g.limit, g.input)
assert.Equalf(
t, g.expected, got,
"input: %q([]rune%v))\ngot: %q([]rune%v)\nwant %q([]rune%v)",
g.input, []rune(g.input),
got, []rune(got),
g.expected, []rune(g.expected),
)
})
}
}
}
func BenchmarkTruncate(b *testing.B) {
run := func(limit int, input string) func(b *testing.B) {
return func(b *testing.B) {
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
var out string
for pb.Next() {
out = truncate(limit, input)
}
_ = out
})
}
}
Fix attribute value truncation (#5997) Fix #5996 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/trace cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 1.2300n ± 7% 0.8757n ± 3% -28.80% (p=0.000 n=10) Truncate/Zero-8 2.341n ± 2% 1.550n ± 9% -33.77% (p=0.000 n=10) Truncate/Short-8 31.6800n ± 3% 0.9960n ± 4% -96.86% (p=0.000 n=10) Truncate/ASCII-8 8.821n ± 1% 3.567n ± 3% -59.57% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.960n ± 1% 7.163n ± 1% -40.10% (p=0.000 n=10) Truncate/InvalidUTF-8-8 56.35n ± 0% 37.34n ± 18% -33.74% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.83n ± 1% 50.00n ± 1% -38.90% (p=0.000 n=10) geomean 12.37n 4.865n -60.68% │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-b6264913(old).txt │ commit-54c61ac2(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ``` #### Values shorter than limit This is the default code path. Most attribute values will be shorter than the default 128 limit that users will not modify. The current code, `safeTruncate` requires a full iteration of the value to determine it is valid and under the limit. The replacement, `truncate`, first checks if the number of bytes in the value are less than or equal to the limit (which guarantees the number of characters are less than or equal to the limit) and returns immediately. This will mean that invalid encoding less than the limit is not changed, which meets the specification requirements. #### Values longer than the limit For values who's number of bytes exceeds the limit, they are iterated only once with the replacement, `truncate`. In comparison, the current code, `safeTruncate`, can iterate the string up to three separate times when the string contains invalid characters.
2024-11-26 00:41:55 -08:00
b.Run("Unlimited", run(-1, "hello 😊 world 🌍🚀"))
b.Run("Zero", run(0, "Some text"))
b.Run("Short", run(10, "Short Text"))
b.Run("ASCII", run(5, "Hello, World!"))
b.Run("ValidUTF-8", run(10, "hello 😊 world 🌍🚀"))
b.Run("InvalidUTF-8", run(6, "€"[0:2]+"hello€€"))
b.Run("MixedUTF-8", run(14, "\x80😊\x80 Hello\x80World🌍\x80🚀\x80"))
}
func TestLogDropAttrs(t *testing.T) {
orig := logDropAttrs
t.Cleanup(func() { logDropAttrs = orig })
var called bool
logDropAttrs = func() { called = true }
s := &recordingSpan{}
s.addDroppedAttr(1)
assert.True(t, called, "logDropAttrs not called")
called = false
s.addDroppedAttr(1)
assert.False(t, called, "logDropAttrs called multiple times for same Span")
}
func BenchmarkRecordingSpanSetAttributes(b *testing.B) {
var attrs []attribute.KeyValue
for i := 0; i < 100; i++ {
attr := attribute.String(fmt.Sprintf("hello.attrib%d", i), fmt.Sprintf("goodbye.attrib%d", i))
attrs = append(attrs, attr)
}
ctx := context.Background()
for _, limit := range []bool{false, true} {
b.Run(fmt.Sprintf("WithLimit/%t", limit), func(b *testing.B) {
b.ReportAllocs()
sl := NewSpanLimits()
if limit {
sl.AttributeCountLimit = 50
}
tp := NewTracerProvider(WithSampler(AlwaysSample()), WithSpanLimits(sl))
tracer := tp.Tracer("tracer")
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, span := tracer.Start(ctx, "span")
span.SetAttributes(attrs...)
span.End()
}
})
}
}
func BenchmarkSpanEnd(b *testing.B) {
tracer := NewTracerProvider().Tracer("")
ctx := trace.ContextWithSpanContext(context.Background(), trace.SpanContext{})
spans := make([]trace.Span, b.N)
for i := 0; i < b.N; i++ {
_, span := tracer.Start(ctx, "")
spans[i] = span
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
spans[i].End()
}
}