1
0
mirror of https://github.com/open-telemetry/opentelemetry-go.git synced 2025-11-25 22:41:46 +02:00
Files
opentelemetry-go/sdk/log/record.go

552 lines
14 KiB
Go
Raw Normal View History

2024-03-13 17:47:07 +01:00
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
package log // import "go.opentelemetry.io/otel/sdk/log"
import (
2024-03-14 07:40:48 -07:00
"slices"
"strings"
"sync"
2024-03-13 17:47:07 +01:00
"time"
"unicode/utf8"
2024-03-13 17:47:07 +01:00
"go.opentelemetry.io/otel/internal/global"
2024-03-13 17:47:07 +01:00
"go.opentelemetry.io/otel/log"
"go.opentelemetry.io/otel/sdk/instrumentation"
"go.opentelemetry.io/otel/sdk/resource"
"go.opentelemetry.io/otel/trace"
)
2024-03-14 07:40:48 -07:00
// attributesInlineCount is the number of attributes that are efficiently
// stored in an array within a Record. This value is borrowed from slog which
// performed a quantitative survey of log library use and found this value to
// cover 95% of all use-cases (https://go.dev/blog/slog#performance).
const attributesInlineCount = 5
var logAttrDropped = sync.OnceFunc(func() {
global.Warn("limit reached: dropping log Record attributes")
})
// indexPool is a pool of index maps used for de-duplication.
var indexPool = sync.Pool{
New: func() any { return make(map[string]int) },
}
func getIndex() map[string]int {
return indexPool.Get().(map[string]int)
}
func putIndex(index map[string]int) {
clear(index)
indexPool.Put(index)
}
2024-03-13 17:47:07 +01:00
// Record is a log record emitted by the Logger.
// A log record with non-empty event name is interpreted as an event record.
//
// Do not create instances of Record on your own in production code.
// You can use [go.opentelemetry.io/otel/sdk/log/logtest.RecordFactory]
// for testing purposes.
2024-03-14 07:40:48 -07:00
type Record struct {
// Do not embed the log.Record. Attributes need to be overwrite-able and
// deep-copying needs to be possible.
eventName string
2024-03-14 07:40:48 -07:00
timestamp time.Time
observedTimestamp time.Time
severity log.Severity
severityText string
body log.Value
// The fields below are for optimizing the implementation of Attributes and
// AddAttributes. This design is borrowed from the slog Record type:
// https://cs.opensource.google/go/go/+/refs/tags/go1.22.0:src/log/slog/record.go;l=20
// Allocation optimization: an inline array sized to hold
// the majority of log calls (based on examination of open-source
// code). It holds the start of the list of attributes.
front [attributesInlineCount]log.KeyValue
// The number of attributes in front.
nFront int
// The list of attributes except for those in front.
// Invariants:
// - len(back) > 0 if nFront == len(front)
// - Unused array elements are zero-ed. Used to detect mistakes.
back []log.KeyValue
// dropped is the count of attributes that have been dropped when limits
// were reached.
dropped int
2024-03-14 07:40:48 -07:00
traceID trace.TraceID
spanID trace.SpanID
traceFlags trace.TraceFlags
// resource represents the entity that collected the log.
resource *resource.Resource
// scope is the Scope that the Logger was created with.
scope *instrumentation.Scope
attributeValueLengthLimit int
attributeCountLimit int
// specifies whether we should deduplicate any key value collections or not
allowDupKeys bool
noCmp [0]func() //nolint: unused // This is indeed used.
2024-03-14 07:40:48 -07:00
}
2024-03-13 17:47:07 +01:00
func (r *Record) addDropped(n int) {
logAttrDropped()
r.dropped += n
}
func (r *Record) setDropped(n int) {
logAttrDropped()
r.dropped = n
}
// EventName returns the event name.
// A log record with non-empty event name is interpreted as an event record.
func (r *Record) EventName() string {
return r.eventName
}
// SetEventName sets the event name.
// A log record with non-empty event name is interpreted as an event record.
func (r *Record) SetEventName(s string) {
r.eventName = s
}
2024-03-13 17:47:07 +01:00
// Timestamp returns the time when the log record occurred.
func (r *Record) Timestamp() time.Time {
2024-03-14 07:40:48 -07:00
return r.timestamp
2024-03-13 17:47:07 +01:00
}
// SetTimestamp sets the time when the log record occurred.
func (r *Record) SetTimestamp(t time.Time) {
2024-03-14 07:40:48 -07:00
r.timestamp = t
2024-03-13 17:47:07 +01:00
}
// ObservedTimestamp returns the time when the log record was observed.
func (r *Record) ObservedTimestamp() time.Time {
2024-03-14 07:40:48 -07:00
return r.observedTimestamp
2024-03-13 17:47:07 +01:00
}
// SetObservedTimestamp sets the time when the log record was observed.
func (r *Record) SetObservedTimestamp(t time.Time) {
2024-03-14 07:40:48 -07:00
r.observedTimestamp = t
2024-03-13 17:47:07 +01:00
}
// Severity returns the severity of the log record.
func (r *Record) Severity() log.Severity {
2024-03-14 07:40:48 -07:00
return r.severity
2024-03-13 17:47:07 +01:00
}
// SetSeverity sets the severity level of the log record.
func (r *Record) SetSeverity(level log.Severity) {
2024-03-14 07:40:48 -07:00
r.severity = level
2024-03-13 17:47:07 +01:00
}
// SeverityText returns severity (also known as log level) text. This is the
// original string representation of the severity as it is known at the source.
func (r *Record) SeverityText() string {
2024-03-14 07:40:48 -07:00
return r.severityText
2024-03-13 17:47:07 +01:00
}
// SetSeverityText sets severity (also known as log level) text. This is the
// original string representation of the severity as it is known at the source.
func (r *Record) SetSeverityText(text string) {
2024-03-14 07:40:48 -07:00
r.severityText = text
2024-03-13 17:47:07 +01:00
}
// Body returns the body of the log record.
func (r *Record) Body() log.Value {
2024-03-14 07:40:48 -07:00
return r.body
2024-03-13 17:47:07 +01:00
}
// SetBody sets the body of the log record.
func (r *Record) SetBody(v log.Value) {
if !r.allowDupKeys {
r.body = r.dedupeBodyCollections(v)
} else {
r.body = v
}
2024-03-13 17:47:07 +01:00
}
// WalkAttributes walks all attributes the log record holds by calling f for
// each on each [log.KeyValue] in the [Record]. Iteration stops if f returns false.
func (r *Record) WalkAttributes(f func(log.KeyValue) bool) {
2024-03-14 07:40:48 -07:00
for i := 0; i < r.nFront; i++ {
if !f(r.front[i]) {
return
}
}
for _, a := range r.back {
if !f(a) {
return
}
}
2024-03-13 17:47:07 +01:00
}
// AddAttributes adds attributes to the log record.
// Attributes in attrs will overwrite any attribute already added to r with the same key.
2024-03-13 17:47:07 +01:00
func (r *Record) AddAttributes(attrs ...log.KeyValue) {
n := r.AttributesLen()
if n == 0 {
// Avoid the more complex duplicate map lookups below.
var drop int
if !r.allowDupKeys {
attrs, drop = dedup(attrs)
r.setDropped(drop)
}
attrs, drop := head(attrs, r.attributeCountLimit)
r.addDropped(drop)
r.addAttrs(attrs)
return
}
if !r.allowDupKeys {
// Used to find duplicates between attrs and existing attributes in r.
rIndex := r.attrIndex()
defer putIndex(rIndex)
// Unique attrs that need to be added to r. This uses the same underlying
// array as attrs.
//
// Note, do not iterate attrs twice by just calling dedup(attrs) here.
unique := attrs[:0]
// Used to find duplicates within attrs itself. The index value is the
// index of the element in unique.
uIndex := getIndex()
defer putIndex(uIndex)
// Deduplicate attrs within the scope of all existing attributes.
for _, a := range attrs {
// Last-value-wins for any duplicates in attrs.
idx, found := uIndex[a.Key]
if found {
r.addDropped(1)
unique[idx] = a
continue
}
idx, found = rIndex[a.Key]
if found {
// New attrs overwrite any existing with the same key.
r.addDropped(1)
if idx < 0 {
r.front[-(idx + 1)] = a
} else {
r.back[idx] = a
}
} else {
// Unique attribute.
unique = append(unique, a)
uIndex[a.Key] = len(unique) - 1
}
}
attrs = unique
}
if r.attributeCountLimit > 0 && n+len(attrs) > r.attributeCountLimit {
// Truncate the now unique attributes to comply with limit.
//
// Do not use head(attrs, r.attributeCountLimit - n) here. If
// (r.attributeCountLimit - n) <= 0 attrs needs to be emptied.
Allow GC to collect unneeded slice elements (#5804) ```go type interInst struct { x int } type inter interface { } var sink []inter func BenchmarkX(b *testing.B) { sink = make([]inter, b.N) for i := 0; i < b.N; i++ { sink[i] = &interInst{} } clear(sink) sink = sink[:0] runtime.GC() var ms runtime.MemStats runtime.ReadMemStats(&ms) b.Log(b.N, ms.Frees) // Frees is the cumulative count of heap objects freed. } ``` ``` clear: ioz_test.go:35: 1 589 ioz_test.go:35: 100 711 ioz_test.go:35: 10000 10729 ioz_test.go:35: 1000000 1010750 <-- 1m+ freed ioz_test.go:35: 16076874 17087643 ioz_test.go:35: 19514749 36602412 ``` ``` no clear: ioz_test.go:35: 1 585 ioz_test.go:35: 100 606 ioz_test.go:35: 10000 725 ioz_test.go:35: 1000000 10745 <-- some "overhead" objects freed, not the slice. ioz_test.go:35: 16391445 1010765 ioz_test.go:35: 21765238 17402230 ``` This is documented at https://go.dev/wiki/SliceTricks: > NOTE If the type of the element is a pointer or a struct with pointer fields, which need to be garbage collected, the above implementations of Cut and Delete have a potential memory leak problem: some elements with values are still referenced by slice a’s underlying array, just not “visible” in the slice. Because the “deleted” value is referenced in the underlying array, the deleted value is still “reachable” during GC, even though the value cannot be referenced by your code. If the underlying array is long-lived, this represents a leak. Followed by examples of how zeroing out the slice elements solves the problem. This PR does the same.
2024-11-08 17:36:35 +11:00
last := max(0, r.attributeCountLimit-n)
r.addDropped(len(attrs) - last)
attrs = attrs[:last]
}
r.addAttrs(attrs)
}
// attrIndex returns an index map for all attributes in the Record r. The index
// maps the attribute key to location the attribute is stored. If the value is
// < 0 then -(value + 1) (e.g. -1 -> 0, -2 -> 1, -3 -> 2) represents the index
// in r.nFront. Otherwise, the index is the exact index of r.back.
//
// The returned index is taken from the indexPool. It is the callers
// responsibility to return the index to that pool (putIndex) when done.
func (r *Record) attrIndex() map[string]int {
index := getIndex()
for i := 0; i < r.nFront; i++ {
key := r.front[i].Key
index[key] = -i - 1 // stored in front: negative index.
}
for i := 0; i < len(r.back); i++ {
key := r.back[i].Key
index[key] = i // stored in back: positive index.
}
return index
}
// addAttrs adds attrs to the Record r. This does not validate any limits or
// duplication of attributes, these tasks are left to the caller to handle
// prior to calling.
func (r *Record) addAttrs(attrs []log.KeyValue) {
2024-03-14 07:40:48 -07:00
var i int
for i = 0; i < len(attrs) && r.nFront < len(r.front); i++ {
a := attrs[i]
r.front[r.nFront] = r.applyAttrLimits(a)
2024-03-14 07:40:48 -07:00
r.nFront++
}
for j, a := range attrs[i:] {
attrs[i+j] = r.applyAttrLimits(a)
}
2024-03-14 07:40:48 -07:00
r.back = slices.Grow(r.back, len(attrs[i:]))
r.back = append(r.back, attrs[i:]...)
2024-03-13 17:47:07 +01:00
}
// SetAttributes sets (and overrides) attributes to the log record.
func (r *Record) SetAttributes(attrs ...log.KeyValue) {
var drop int
r.setDropped(0)
if !r.allowDupKeys {
attrs, drop = dedup(attrs)
r.setDropped(drop)
}
attrs, drop = head(attrs, r.attributeCountLimit)
r.addDropped(drop)
2024-03-14 07:40:48 -07:00
r.nFront = 0
var i int
for i = 0; i < len(attrs) && r.nFront < len(r.front); i++ {
a := attrs[i]
r.front[r.nFront] = r.applyAttrLimits(a)
2024-03-14 07:40:48 -07:00
r.nFront++
}
r.back = slices.Clone(attrs[i:])
for i, a := range r.back {
r.back[i] = r.applyAttrLimits(a)
}
2024-03-13 17:47:07 +01:00
}
// head returns the first n values of kvs along with the number of elements
// dropped. If n is less than or equal to zero, kvs is returned with 0.
func head(kvs []log.KeyValue, n int) (out []log.KeyValue, dropped int) {
if n > 0 && len(kvs) > n {
return kvs[:n], len(kvs) - n
}
return kvs, 0
}
// dedup deduplicates kvs front-to-back with the last value saved.
func dedup(kvs []log.KeyValue) (unique []log.KeyValue, dropped int) {
index := getIndex()
defer putIndex(index)
unique = kvs[:0] // Use the same underlying array as kvs.
for _, a := range kvs {
idx, found := index[a.Key]
if found {
dropped++
unique[idx] = a
} else {
unique = append(unique, a)
index[a.Key] = len(unique) - 1
}
}
return unique, dropped
}
2024-03-13 17:47:07 +01:00
// AttributesLen returns the number of attributes in the log record.
func (r *Record) AttributesLen() int {
2024-03-14 07:40:48 -07:00
return r.nFront + len(r.back)
2024-03-13 17:47:07 +01:00
}
// DroppedAttributes returns the number of attributes dropped due to limits
// being reached.
func (r *Record) DroppedAttributes() int {
return r.dropped
}
2024-03-13 17:47:07 +01:00
// TraceID returns the trace ID or empty array.
func (r *Record) TraceID() trace.TraceID {
2024-03-14 07:40:48 -07:00
return r.traceID
2024-03-13 17:47:07 +01:00
}
// SetTraceID sets the trace ID.
func (r *Record) SetTraceID(id trace.TraceID) {
2024-03-14 07:40:48 -07:00
r.traceID = id
2024-03-13 17:47:07 +01:00
}
// SpanID returns the span ID or empty array.
func (r *Record) SpanID() trace.SpanID {
2024-03-14 07:40:48 -07:00
return r.spanID
2024-03-13 17:47:07 +01:00
}
// SetSpanID sets the span ID.
func (r *Record) SetSpanID(id trace.SpanID) {
2024-03-14 07:40:48 -07:00
r.spanID = id
2024-03-13 17:47:07 +01:00
}
// TraceFlags returns the trace flags.
func (r *Record) TraceFlags() trace.TraceFlags {
2024-03-14 07:40:48 -07:00
return r.traceFlags
2024-03-13 17:47:07 +01:00
}
// SetTraceFlags sets the trace flags.
func (r *Record) SetTraceFlags(flags trace.TraceFlags) {
2024-03-14 07:40:48 -07:00
r.traceFlags = flags
2024-03-13 17:47:07 +01:00
}
// Resource returns the entity that collected the log.
func (r *Record) Resource() *resource.Resource {
return r.resource
2024-03-13 17:47:07 +01:00
}
// InstrumentationScope returns the scope that the Logger was created with.
func (r *Record) InstrumentationScope() instrumentation.Scope {
2024-03-14 07:40:48 -07:00
if r.scope == nil {
return instrumentation.Scope{}
}
return *r.scope
2024-03-13 17:47:07 +01:00
}
// Clone returns a copy of the record with no shared state. The original record
// and the clone can both be modified without interfering with each other.
func (r *Record) Clone() Record {
2024-03-14 07:40:48 -07:00
res := *r
res.back = slices.Clone(r.back)
return res
2024-03-13 17:47:07 +01:00
}
func (r *Record) applyAttrLimits(attr log.KeyValue) log.KeyValue {
attr.Value = r.applyValueLimits(attr.Value)
return attr
}
func (r *Record) applyValueLimits(val log.Value) log.Value {
switch val.Kind() {
case log.KindString:
s := val.AsString()
if len(s) > r.attributeValueLengthLimit {
Fix sdk/log record attr value limit (#6032) Fix #6004 Copy of #5997 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/log cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 0.8323n ± 3% 0.7367n ± 3% -11.49% (p=0.000 n=10) Truncate/Zero-8 1.923n ± 32% 1.359n ± 2% -29.34% (p=0.000 n=10) Truncate/Short-8 14.6050n ± 4% 0.8785n ± 1% -93.98% (p=0.000 n=10) Truncate/ASCII-8 8.205n ± 2% 3.601n ± 7% -56.12% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.335n ± 1% 7.206n ± 1% -36.43% (p=0.000 n=10) Truncate/InvalidUTF-8-8 58.26n ± 1% 36.61n ± 1% -37.17% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.16n ± 1% 52.30n ± 1% -35.56% (p=0.000 n=10) geomean 10.04n 4.601n -54.16% │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ```
2024-12-12 00:52:40 -08:00
val = log.StringValue(truncate(r.attributeValueLengthLimit, s))
}
case log.KindSlice:
sl := val.AsSlice()
for i := range sl {
sl[i] = r.applyValueLimits(sl[i])
}
val = log.SliceValue(sl...)
case log.KindMap:
kvs := val.AsMap()
if !r.allowDupKeys {
// Deduplicate then truncate. Do not do at the same time to avoid
// wasted truncation operations.
var dropped int
kvs, dropped = dedup(kvs)
r.addDropped(dropped)
}
for i := range kvs {
kvs[i] = r.applyAttrLimits(kvs[i])
}
val = log.MapValue(kvs...)
}
return val
}
func (r *Record) dedupeBodyCollections(val log.Value) log.Value {
switch val.Kind() {
case log.KindSlice:
sl := val.AsSlice()
for i := range sl {
sl[i] = r.dedupeBodyCollections(sl[i])
}
val = log.SliceValue(sl...)
case log.KindMap:
kvs, _ := dedup(val.AsMap())
for i := range kvs {
kvs[i].Value = r.dedupeBodyCollections(kvs[i].Value)
}
val = log.MapValue(kvs...)
}
return val
}
Fix sdk/log record attr value limit (#6032) Fix #6004 Copy of #5997 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/log cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 0.8323n ± 3% 0.7367n ± 3% -11.49% (p=0.000 n=10) Truncate/Zero-8 1.923n ± 32% 1.359n ± 2% -29.34% (p=0.000 n=10) Truncate/Short-8 14.6050n ± 4% 0.8785n ± 1% -93.98% (p=0.000 n=10) Truncate/ASCII-8 8.205n ± 2% 3.601n ± 7% -56.12% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.335n ± 1% 7.206n ± 1% -36.43% (p=0.000 n=10) Truncate/InvalidUTF-8-8 58.26n ± 1% 36.61n ± 1% -37.17% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.16n ± 1% 52.30n ± 1% -35.56% (p=0.000 n=10) geomean 10.04n 4.601n -54.16% │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ```
2024-12-12 00:52:40 -08:00
// truncate returns a truncated version of s such that it contains less than
// the limit number of characters. Truncation is applied by returning the limit
// number of valid characters contained in s.
//
Fix sdk/log record attr value limit (#6032) Fix #6004 Copy of #5997 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/log cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 0.8323n ± 3% 0.7367n ± 3% -11.49% (p=0.000 n=10) Truncate/Zero-8 1.923n ± 32% 1.359n ± 2% -29.34% (p=0.000 n=10) Truncate/Short-8 14.6050n ± 4% 0.8785n ± 1% -93.98% (p=0.000 n=10) Truncate/ASCII-8 8.205n ± 2% 3.601n ± 7% -56.12% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.335n ± 1% 7.206n ± 1% -36.43% (p=0.000 n=10) Truncate/InvalidUTF-8-8 58.26n ± 1% 36.61n ± 1% -37.17% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.16n ± 1% 52.30n ± 1% -35.56% (p=0.000 n=10) geomean 10.04n 4.601n -54.16% │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ```
2024-12-12 00:52:40 -08:00
// If limit is negative, it returns the original string.
//
Fix sdk/log record attr value limit (#6032) Fix #6004 Copy of #5997 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/log cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 0.8323n ± 3% 0.7367n ± 3% -11.49% (p=0.000 n=10) Truncate/Zero-8 1.923n ± 32% 1.359n ± 2% -29.34% (p=0.000 n=10) Truncate/Short-8 14.6050n ± 4% 0.8785n ± 1% -93.98% (p=0.000 n=10) Truncate/ASCII-8 8.205n ± 2% 3.601n ± 7% -56.12% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.335n ± 1% 7.206n ± 1% -36.43% (p=0.000 n=10) Truncate/InvalidUTF-8-8 58.26n ± 1% 36.61n ± 1% -37.17% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.16n ± 1% 52.30n ± 1% -35.56% (p=0.000 n=10) geomean 10.04n 4.601n -54.16% │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ```
2024-12-12 00:52:40 -08:00
// UTF-8 is supported. When truncating, all invalid characters are dropped
// before applying truncation.
//
// If s already contains less than the limit number of bytes, it is returned
// unchanged. No invalid characters are removed.
func truncate(limit int, s string) string {
// This prioritize performance in the following order based on the most
// common expected use-cases.
//
// - Short values less than the default limit (128).
// - Strings with valid encodings that exceed the limit.
// - No limit.
// - Strings with invalid encodings that exceed the limit.
if limit < 0 || len(s) <= limit {
return s
}
Fix sdk/log record attr value limit (#6032) Fix #6004 Copy of #5997 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/log cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 0.8323n ± 3% 0.7367n ± 3% -11.49% (p=0.000 n=10) Truncate/Zero-8 1.923n ± 32% 1.359n ± 2% -29.34% (p=0.000 n=10) Truncate/Short-8 14.6050n ± 4% 0.8785n ± 1% -93.98% (p=0.000 n=10) Truncate/ASCII-8 8.205n ± 2% 3.601n ± 7% -56.12% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.335n ± 1% 7.206n ± 1% -36.43% (p=0.000 n=10) Truncate/InvalidUTF-8-8 58.26n ± 1% 36.61n ± 1% -37.17% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.16n ± 1% 52.30n ± 1% -35.56% (p=0.000 n=10) geomean 10.04n 4.601n -54.16% │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ```
2024-12-12 00:52:40 -08:00
// Optimistically, assume all valid UTF-8.
var b strings.Builder
count := 0
for i, c := range s {
if c != utf8.RuneError {
count++
if count > limit {
return s[:i]
}
Fix sdk/log record attr value limit (#6032) Fix #6004 Copy of #5997 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/log cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 0.8323n ± 3% 0.7367n ± 3% -11.49% (p=0.000 n=10) Truncate/Zero-8 1.923n ± 32% 1.359n ± 2% -29.34% (p=0.000 n=10) Truncate/Short-8 14.6050n ± 4% 0.8785n ± 1% -93.98% (p=0.000 n=10) Truncate/ASCII-8 8.205n ± 2% 3.601n ± 7% -56.12% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.335n ± 1% 7.206n ± 1% -36.43% (p=0.000 n=10) Truncate/InvalidUTF-8-8 58.26n ± 1% 36.61n ± 1% -37.17% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.16n ± 1% 52.30n ± 1% -35.56% (p=0.000 n=10) geomean 10.04n 4.601n -54.16% │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ```
2024-12-12 00:52:40 -08:00
continue
}
_, size := utf8.DecodeRuneInString(s[i:])
if size == 1 {
// Invalid encoding.
b.Grow(len(s) - 1)
_, _ = b.WriteString(s[:i])
s = s[i:]
break
}
}
Fix sdk/log record attr value limit (#6032) Fix #6004 Copy of #5997 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/log cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 0.8323n ± 3% 0.7367n ± 3% -11.49% (p=0.000 n=10) Truncate/Zero-8 1.923n ± 32% 1.359n ± 2% -29.34% (p=0.000 n=10) Truncate/Short-8 14.6050n ± 4% 0.8785n ± 1% -93.98% (p=0.000 n=10) Truncate/ASCII-8 8.205n ± 2% 3.601n ± 7% -56.12% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.335n ± 1% 7.206n ± 1% -36.43% (p=0.000 n=10) Truncate/InvalidUTF-8-8 58.26n ± 1% 36.61n ± 1% -37.17% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.16n ± 1% 52.30n ± 1% -35.56% (p=0.000 n=10) geomean 10.04n 4.601n -54.16% │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ```
2024-12-12 00:52:40 -08:00
// Fast-path, no invalid input.
if b.Cap() == 0 {
return s
}
Fix sdk/log record attr value limit (#6032) Fix #6004 Copy of #5997 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/log cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ sec/op │ sec/op vs base │ Truncate/Unlimited-8 0.8323n ± 3% 0.7367n ± 3% -11.49% (p=0.000 n=10) Truncate/Zero-8 1.923n ± 32% 1.359n ± 2% -29.34% (p=0.000 n=10) Truncate/Short-8 14.6050n ± 4% 0.8785n ± 1% -93.98% (p=0.000 n=10) Truncate/ASCII-8 8.205n ± 2% 3.601n ± 7% -56.12% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.335n ± 1% 7.206n ± 1% -36.43% (p=0.000 n=10) Truncate/InvalidUTF-8-8 58.26n ± 1% 36.61n ± 1% -37.17% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.16n ± 1% 52.30n ± 1% -35.56% (p=0.000 n=10) geomean 10.04n 4.601n -54.16% │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ B/op │ B/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 16.00 ± 0% 16.00 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ commit-e9c7aac2(old).txt │ commit-878043b9(new).txt │ │ allocs/op │ allocs/op vs base │ Truncate/Unlimited-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Zero-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/Short-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ASCII-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/ValidUTF-8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/InvalidUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ Truncate/MixedUTF-8-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ```
2024-12-12 00:52:40 -08:00
// Truncate while validating UTF-8.
for i := 0; i < len(s) && count < limit; {
c := s[i]
if c < utf8.RuneSelf {
// Optimization for single byte runes (common case).
_ = b.WriteByte(c)
i++
count++
continue
}
_, size := utf8.DecodeRuneInString(s[i:])
if size == 1 {
// We checked for all 1-byte runes above, this is a RuneError.
i++
continue
}
_, _ = b.WriteString(s[i : i+size])
i += size
count++
}
return b.String()
}