1
0
mirror of https://github.com/open-telemetry/opentelemetry-go.git synced 2025-10-31 00:07:40 +02:00

Use Set hash in Distinct (2nd attempt) (#7175)

Re-opening https://github.com/open-telemetry/opentelemetry-go/pull/5028
with new benchmarks. For cases with 10 attributes, this reduces the
overhead of metric measurements by ~80-90% (depending on lock
contention). It introduces a small probability of collision for
attribute sets in the metrics SDK. For an instrument with 1 million
different attribute sets, the probability of a collision is
approximately 2 * 10^-8. For a more "normal" cardinality of 1000 on an
instrument, it is approximately 2 * 10^-17.

```
goos: linux
goarch: amd64
pkg: go.opentelemetry.io/otel/attribute
cpu: Intel(R) Xeon(R) CPU @ 2.20GHz
                                            │   main.txt    │              hash.txt               │
                                            │    sec/op     │    sec/op     vs base               │
EquivalentMapAccess/Empty-24                    32.01n ± 2%   10.12n ±  4%  -68.37% (p=0.002 n=6)
EquivalentMapAccess/1_string_attribute-24      106.25n ± 2%   10.01n ±  5%  -90.58% (p=0.002 n=6)
EquivalentMapAccess/10_string_attributes-24   826.250n ± 1%   9.982n ± 11%  -98.79% (p=0.002 n=6)
EquivalentMapAccess/1_int_attribute-24         106.65n ± 2%   10.13n ±  3%  -90.50% (p=0.002 n=6)
EquivalentMapAccess/10_int_attributes-24       833.25n ± 2%   10.04n ±  5%  -98.80% (p=0.002 n=6)
geomean                                         190.3n        10.06n        -94.72%
```

Parallel benchmarks:

```
goos: linux
goarch: amd64
pkg: go.opentelemetry.io/otel/sdk/metric
cpu: Intel(R) Xeon(R) CPU @ 2.20GHz
                                                         │  main24.txt   │              new24.txt              │
                                                         │    sec/op     │    sec/op     vs base               │
SyncMeasure/NoView/Int64Counter/Attributes/0-24             288.4n ± 13%   267.0n ± 16%        ~ (p=0.180 n=6)
SyncMeasure/NoView/Int64Counter/Attributes/1-24             372.7n ± 24%   303.3n ±  6%  -18.61% (p=0.002 n=6)
SyncMeasure/NoView/Int64Counter/Attributes/10-24           1862.5n ± 11%   302.2n ±  6%  -83.77% (p=0.002 n=6)
SyncMeasure/NoView/Float64Counter/Attributes/0-24           288.2n ±  5%   291.8n ± 14%        ~ (p=0.589 n=6)
SyncMeasure/NoView/Float64Counter/Attributes/1-24           374.8n ± 22%   326.2n ± 15%  -12.98% (p=0.002 n=6)
SyncMeasure/NoView/Float64Counter/Attributes/10-24         1984.0n ± 10%   277.9n ± 15%  -85.99% (p=0.002 n=6)
SyncMeasure/NoView/Int64UpDownCounter/Attributes/0-24       286.8n ± 13%   279.4n ± 14%        ~ (p=0.818 n=6)
SyncMeasure/NoView/Int64UpDownCounter/Attributes/1-24       415.4n ± 14%   309.5n ± 11%  -25.47% (p=0.002 n=6)
SyncMeasure/NoView/Int64UpDownCounter/Attributes/10-24     1923.0n ± 19%   294.1n ± 17%  -84.71% (p=0.002 n=6)
SyncMeasure/NoView/Float64UpDownCounter/Attributes/0-24     284.9n ±  5%   271.6n ± 11%        ~ (p=0.240 n=6)
SyncMeasure/NoView/Float64UpDownCounter/Attributes/1-24     382.9n ± 23%   295.7n ± 13%  -22.78% (p=0.002 n=6)
SyncMeasure/NoView/Float64UpDownCounter/Attributes/10-24   1787.0n ± 28%   289.2n ± 12%  -83.81% (p=0.002 n=6)
SyncMeasure/NoView/Int64Histogram/Attributes/0-24           283.4n ±  8%   269.9n ±  9%        ~ (p=0.589 n=6)
SyncMeasure/NoView/Int64Histogram/Attributes/1-24           300.7n ±  8%   270.1n ± 15%  -10.16% (p=0.026 n=6)
SyncMeasure/NoView/Int64Histogram/Attributes/10-24         1046.8n ± 24%   299.2n ± 16%  -71.42% (p=0.002 n=6)
SyncMeasure/NoView/Float64Histogram/Attributes/0-24         264.3n ± 12%   295.9n ±  5%  +11.93% (p=0.026 n=6)
SyncMeasure/NoView/Float64Histogram/Attributes/1-24         321.0n ±  8%   269.4n ± 11%  -16.09% (p=0.002 n=6)
SyncMeasure/NoView/Float64Histogram/Attributes/10-24       1052.2n ± 10%   274.6n ±  5%  -73.90% (p=0.002 n=6)
geomean                                                     540.0n         287.7n        -46.72%
```

Single-threaded benchmarks:

```
goos: linux
goarch: amd64
pkg: go.opentelemetry.io/otel/sdk/metric
cpu: Intel(R) Xeon(R) CPU @ 2.20GHz
                                                      │   main1.txt   │              new1.txt               │
                                                      │    sec/op     │    sec/op     vs base               │
SyncMeasure/NoView/Int64Counter/Attributes/0            130.95n ±  1%   97.99n ± 21%  -25.17% (p=0.002 n=6)
SyncMeasure/NoView/Int64Counter/Attributes/1             300.8n ±  7%   104.6n ±  3%  -65.21% (p=0.002 n=6)
SyncMeasure/NoView/Int64Counter/Attributes/10           1646.0n ±  2%   105.8n ±  2%  -93.58% (p=0.002 n=6)
SyncMeasure/NoView/Float64Counter/Attributes/0          132.65n ±  1%   99.28n ±  4%  -25.16% (p=0.002 n=6)
SyncMeasure/NoView/Float64Counter/Attributes/1           295.4n ±  3%   107.7n ±  3%  -63.54% (p=0.002 n=6)
SyncMeasure/NoView/Float64Counter/Attributes/10         1620.0n ±  1%   109.6n ±  4%  -93.23% (p=0.002 n=6)
SyncMeasure/NoView/Int64UpDownCounter/Attributes/0      132.85n ± 80%   99.34n ±  1%  -25.22% (p=0.002 n=6)
SyncMeasure/NoView/Int64UpDownCounter/Attributes/1       300.4n ±  1%   106.0n ±  1%  -64.71% (p=0.002 n=6)
SyncMeasure/NoView/Int64UpDownCounter/Attributes/10     1622.0n ±  1%   105.8n ±  1%  -93.48% (p=0.002 n=6)
SyncMeasure/NoView/Float64UpDownCounter/Attributes/0    134.90n ± 51%   99.16n ±  4%  -26.49% (p=0.002 n=6)
SyncMeasure/NoView/Float64UpDownCounter/Attributes/1     312.4n ± 34%   107.8n ±  2%  -65.51% (p=0.002 n=6)
SyncMeasure/NoView/Float64UpDownCounter/Attributes/10   1613.0n ± 23%   106.1n ±  1%  -93.43% (p=0.002 n=6)
SyncMeasure/NoView/Int64Histogram/Attributes/0          103.50n ± 17%   88.53n ±  1%  -14.46% (p=0.002 n=6)
SyncMeasure/NoView/Int64Histogram/Attributes/1          199.50n ± 16%   95.44n ±  2%  -52.16% (p=0.002 n=6)
SyncMeasure/NoView/Int64Histogram/Attributes/10         878.70n ±  2%   95.78n ±  2%  -89.10% (p=0.002 n=6)
SyncMeasure/NoView/Float64Histogram/Attributes/0        108.55n ± 54%   88.45n ±  1%  -18.51% (p=0.002 n=6)
SyncMeasure/NoView/Float64Histogram/Attributes/1        257.30n ± 14%   95.05n ±  2%  -63.06% (p=0.002 n=6)
SyncMeasure/NoView/Float64Histogram/Attributes/10       882.70n ± 18%   96.28n ±  1%  -89.09% (p=0.002 n=6)
geomean                                                  355.2n         100.3n        -71.77%
```

---------

Co-authored-by: Tyler Yahn <MrAlias@users.noreply.github.com>
Co-authored-by: Robert Pająk <pellared@hotmail.com>
This commit is contained in:
David Ashpole
2025-09-16 16:04:50 -04:00
committed by GitHub
parent 666f95c114
commit 9d52bde6d6
7 changed files with 654 additions and 54 deletions

View File

@@ -12,6 +12,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
- Add `WithInstrumentationAttributeSet` option to `go.opentelemetry.io/otel/log`, `go.opentelemetry.io/otel/metric`, and `go.opentelemetry.io/otel/trace` packages.
This provides a concurrent-safe and performant alternative to `WithInstrumentationAttributes` by accepting a pre-constructed `attribute.Set`. (#7287)
- Greatly reduce the cost of recording metrics in `go.opentelemetry.io/otel/sdk/metric` using hashing for map keys. (#7175)
### Fixed
@@ -29,6 +30,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
- `WithInstrumentationAttributes` in `go.opentelemetry.io/otel/trace` synchronously de-duplicates the passed attributes instead of delegating it to the returned `TracerOption`. (#7266)
- `WithInstrumentationAttributes` in `go.opentelemetry.io/otel/meter` synchronously de-duplicates the passed attributes instead of delegating it to the returned `MeterOption`. (#7266)
- `WithInstrumentationAttributes` in `go.opentelemetry.io/otel/log` synchronously de-duplicates the passed attributes instead of delegating it to the returned `LoggerOption`. (#7266)
- `Distinct` in `go.opentelemetry.io/otel/attribute` is no longer guaranteed to uniquely identify an attribute set. Collisions between `Distinct` values for different Sets are possible with extremely high cardinality (billions of series per instrument), but are highly unlikely. (#7175)
<!-- Released section -->
<!-- Don't change this section unless doing release -->

View File

@@ -146,11 +146,12 @@ build-tests/%:
# Tests
TEST_TARGETS := test-default test-bench test-short test-verbose test-race test-concurrent-safe
TEST_TARGETS := test-default test-bench test-short test-verbose test-race test-concurrent-safe test-fuzz
.PHONY: $(TEST_TARGETS) test
test-default test-race: ARGS=-race
test-bench: ARGS=-run=xxxxxMatchNothingxxxxx -test.benchtime=1ms -bench=.
test-short: ARGS=-short
test-fuzz: ARGS=-fuzztime=10s -fuzz
test-verbose: ARGS=-v -race
test-concurrent-safe: ARGS=-run=ConcurrentSafe -count=100 -race
test-concurrent-safe: TIMEOUT=120

92
attribute/hash.go Normal file
View File

@@ -0,0 +1,92 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
package attribute // import "go.opentelemetry.io/otel/attribute"
import (
"fmt"
"reflect"
"go.opentelemetry.io/otel/attribute/internal/fnv"
)
// Type identifiers. These identifiers are hashed before the value of the
// corresponding type. This is done to distinguish values that are hashed with
// the same value representation (e.g. `int64(1)` and `true`, []int64{0} and
// int64(0)).
//
// These are all 8 byte length strings converted to a uint64 representation. A
// uint64 is used instead of the string directly as an optimization, it avoids
// the for loop in [fnv] which adds minor overhead.
const (
boolID uint64 = 7953749933313450591 // "_boolean" (little endian)
int64ID uint64 = 7592915492740740150 // "64_bit_i" (little endian)
float64ID uint64 = 7376742710626956342 // "64_bit_f" (little endian)
stringID uint64 = 6874584755375207263 // "_string_" (little endian)
boolSliceID uint64 = 6875993255270243167 // "_[]bool_" (little endian)
int64SliceID uint64 = 3762322556277578591 // "_[]int64" (little endian)
float64SliceID uint64 = 7308324551835016539 // "[]double" (little endian)
stringSliceID uint64 = 7453010373645655387 // "[]string" (little endian)
)
// hashKVs returns a new FNV-1a hash of kvs.
func hashKVs(kvs []KeyValue) fnv.Hash {
h := fnv.New()
for _, kv := range kvs {
h = hashKV(h, kv)
}
return h
}
// hashKV returns the FNV-1a hash of kv with h as the base.
func hashKV(h fnv.Hash, kv KeyValue) fnv.Hash {
h = h.String(string(kv.Key))
switch kv.Value.Type() {
case BOOL:
h = h.Uint64(boolID)
h = h.Uint64(kv.Value.numeric)
case INT64:
h = h.Uint64(int64ID)
h = h.Uint64(kv.Value.numeric)
case FLOAT64:
h = h.Uint64(float64ID)
// Assumes numeric stored with math.Float64bits.
h = h.Uint64(kv.Value.numeric)
case STRING:
h = h.Uint64(stringID)
h = h.String(kv.Value.stringly)
case BOOLSLICE:
h = h.Uint64(boolSliceID)
rv := reflect.ValueOf(kv.Value.slice)
for i := 0; i < rv.Len(); i++ {
h = h.Bool(rv.Index(i).Bool())
}
case INT64SLICE:
h = h.Uint64(int64SliceID)
rv := reflect.ValueOf(kv.Value.slice)
for i := 0; i < rv.Len(); i++ {
h = h.Int64(rv.Index(i).Int())
}
case FLOAT64SLICE:
h = h.Uint64(float64SliceID)
rv := reflect.ValueOf(kv.Value.slice)
for i := 0; i < rv.Len(); i++ {
h = h.Float64(rv.Index(i).Float())
}
case STRINGSLICE:
h = h.Uint64(stringSliceID)
rv := reflect.ValueOf(kv.Value.slice)
for i := 0; i < rv.Len(); i++ {
h = h.String(rv.Index(i).String())
}
case INVALID:
default:
// Logging is an alternative, but using the internal logger here
// causes an import cycle so it is not done.
v := kv.Value.AsInterface()
msg := fmt.Sprintf("unknown value type: %[1]v (%[1]T)", v)
panic(msg)
}
return h
}

317
attribute/hash_test.go Normal file
View File

@@ -0,0 +1,317 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
package attribute // import "go.opentelemetry.io/otel/attribute"
import (
"cmp"
"fmt"
"math"
"reflect"
"slices"
"strings"
"testing"
"go.opentelemetry.io/otel/attribute/internal/fnv"
)
// keyVals is all the KeyValue generators that are used for testing. This is
// not []KeyValue so different keys can be used with the test Values.
var keyVals = []func(string) KeyValue{
func(k string) KeyValue { return Bool(k, true) },
func(k string) KeyValue { return Bool(k, false) },
func(k string) KeyValue { return BoolSlice(k, []bool{false, true}) },
func(k string) KeyValue { return BoolSlice(k, []bool{true, true, false}) },
func(k string) KeyValue { return Int(k, -1278) },
func(k string) KeyValue { return Int(k, 0) }, // Should be different than false above.
func(k string) KeyValue { return IntSlice(k, []int{3, 23, 21, -8, 0}) },
func(k string) KeyValue { return IntSlice(k, []int{1}) },
func(k string) KeyValue { return Int64(k, 1) }, // Should be different from true and []int{1}.
func(k string) KeyValue { return Int64(k, 29369) },
func(k string) KeyValue { return Int64Slice(k, []int64{3826, -38, -29, -1}) },
func(k string) KeyValue { return Int64Slice(k, []int64{8, -328, 29, 0}) },
func(k string) KeyValue { return Float64(k, -0.3812381) },
func(k string) KeyValue { return Float64(k, 1e32) },
func(k string) KeyValue { return Float64Slice(k, []float64{0.1, -3.8, -29., 0.3321}) },
func(k string) KeyValue { return Float64Slice(k, []float64{-13e8, -32.8, 4., 1e28}) },
func(k string) KeyValue { return String(k, "foo") },
func(k string) KeyValue { return String(k, "bar") },
func(k string) KeyValue { return StringSlice(k, []string{"foo", "bar", "baz"}) },
func(k string) KeyValue { return StringSlice(k, []string{"[]i1"}) },
}
func TestHashKVsEquality(t *testing.T) {
type testcase struct {
hash fnv.Hash
kvs []KeyValue
}
keys := []string{"k0", "k1"}
// Test all combinations up to length 3.
n := len(keyVals)
result := make([]testcase, 0, 1+len(keys)*(n+(n*n)+(n*n*n)))
result = append(result, testcase{hashKVs(nil), nil})
for _, key := range keys {
for i := 0; i < len(keyVals); i++ {
kvs := []KeyValue{keyVals[i](key)}
hash := hashKVs(kvs)
result = append(result, testcase{hash, kvs})
for j := 0; j < len(keyVals); j++ {
kvs := []KeyValue{
keyVals[i](key),
keyVals[j](key),
}
hash := hashKVs(kvs)
result = append(result, testcase{hash, kvs})
for k := 0; k < len(keyVals); k++ {
kvs := []KeyValue{
keyVals[i](key),
keyVals[j](key),
keyVals[k](key),
}
hash := hashKVs(kvs)
result = append(result, testcase{hash, kvs})
}
}
}
}
for i := 0; i < len(result); i++ {
hI, kvI := result[i].hash, result[i].kvs
for j := 0; j < len(result); j++ {
hJ, kvJ := result[j].hash, result[j].kvs
m := msg{i: i, j: j, hI: hI, hJ: hJ, kvI: kvI, kvJ: kvJ}
if i == j {
m.cmp = "=="
if hI != hJ {
t.Errorf("hashes not equal: %s", m)
}
} else {
m.cmp = "!="
if hI == hJ {
// Do not use testify/assert here. It is slow.
t.Errorf("hashes equal: %s", m)
}
}
}
}
}
type msg struct {
cmp string
i, j int
hI, hJ fnv.Hash
kvI, kvJ []KeyValue
}
func (m msg) String() string {
return fmt.Sprintf(
"(%d: %d)%s %s (%d: %d)%s",
m.i, m.hI, slice(m.kvI), m.cmp, m.j, m.hJ, slice(m.kvJ),
)
}
func slice(kvs []KeyValue) string {
if len(kvs) == 0 {
return "[]"
}
var b strings.Builder
_, _ = b.WriteRune('[')
_, _ = b.WriteString(string(kvs[0].Key))
_, _ = b.WriteRune(':')
_, _ = b.WriteString(kvs[0].Value.Emit())
for _, kv := range kvs[1:] {
_, _ = b.WriteRune(',')
_, _ = b.WriteString(string(kv.Key))
_, _ = b.WriteRune(':')
_, _ = b.WriteString(kv.Value.Emit())
}
_, _ = b.WriteRune(']')
return b.String()
}
func BenchmarkHashKVs(b *testing.B) {
attrs := make([]KeyValue, len(keyVals))
for i := range keyVals {
attrs[i] = keyVals[i]("k")
}
b.ResetTimer()
b.ReportAllocs()
for b.Loop() {
hashKVs(attrs)
}
}
func FuzzHashKVs(f *testing.F) {
// Add seed inputs to ensure coverage of edge cases.
f.Add("", "", "", "", "", "", 0, int64(0), 0.0, false, uint8(0))
f.Add("key", "value", "🌍", "test", "bool", "float", -1, int64(-1), -1.0, true, uint8(1))
f.Add("duplicate", "duplicate", "duplicate", "duplicate", "duplicate", "NaN",
0, int64(0), math.Inf(1), false, uint8(2))
f.Fuzz(func(t *testing.T, k1, k2, k3, k4, k5, s string, i int, i64 int64, fVal float64, b bool, sliceType uint8) {
// Test variable number of attributes (0-10).
numAttrs := len(k1) % 11 // Use key length to determine number of attributes.
if numAttrs == 0 && k1 == "" {
// Test empty set.
h := hashKVs(nil)
if h == 0 {
t.Error("hash of empty slice should not be zero")
}
return
}
var kvs []KeyValue
// Add basic types.
if numAttrs > 0 {
kvs = append(kvs, String(k1, s))
}
if numAttrs > 1 {
kvs = append(kvs, Int(k2, i))
}
if numAttrs > 2 {
kvs = append(kvs, Int64(k3, i64))
}
if numAttrs > 3 {
kvs = append(kvs, Float64(k4, fVal))
}
if numAttrs > 4 {
kvs = append(kvs, Bool(k5, b))
}
// Add slice types based on sliceType parameter
if numAttrs > 5 {
switch sliceType % 4 {
case 0:
// Test BoolSlice with variable length.
bools := make([]bool, len(s)%5) // 0-4 elements
for i := range bools {
bools[i] = (i+len(k1))%2 == 0
}
kvs = append(kvs, BoolSlice("boolslice", bools))
case 1:
// Test IntSlice with variable length.
ints := make([]int, len(s)%6) // 0-5 elements
for i := range ints {
ints[i] = i + len(k2)
}
kvs = append(kvs, IntSlice("intslice", ints))
case 2:
// Test Int64Slice with variable length.
int64s := make([]int64, len(s)%4) // 0-3 elements
for i := range int64s {
int64s[i] = int64(i) + i64
}
kvs = append(kvs, Int64Slice("int64slice", int64s))
case 3:
// Test Float64Slice with variable length and special values.
float64s := make([]float64, len(s)%5) // 0-4 elements
for i := range float64s {
switch i % 4 {
case 0:
float64s[i] = fVal
case 1:
float64s[i] = math.Inf(1) // +Inf
case 2:
float64s[i] = math.Inf(-1) // -Inf
case 3:
float64s[i] = math.NaN() // NaN
}
}
kvs = append(kvs, Float64Slice("float64slice", float64s))
}
}
// Add StringSlice.
if numAttrs > 6 {
strings := make([]string, len(k1)%4) // 0-3 elements
for i := range strings {
strings[i] = fmt.Sprintf("%s_%d", s, i)
}
kvs = append(kvs, StringSlice("stringslice", strings))
}
// Test duplicate keys (should be handled by Set construction).
if numAttrs > 7 && k1 != "" {
kvs = append(kvs, String(k1, "duplicate_key_value"))
}
// Add more attributes with Unicode keys.
if numAttrs > 8 {
kvs = append(kvs, String("🔑", "unicode_key"))
}
if numAttrs > 9 {
kvs = append(kvs, String("empty", ""))
}
// Sort to ensure consistent ordering (as Set would do).
slices.SortFunc(kvs, func(a, b KeyValue) int {
return cmp.Compare(string(a.Key), string(b.Key))
})
// Remove duplicates (as Set will do).
if len(kvs) > 1 {
j := 0
for i := 1; i < len(kvs); i++ {
if kvs[j].Key != kvs[i].Key {
j++
kvs[j] = kvs[i]
} else {
// Keep the later value for duplicate keys.
kvs[j] = kvs[i]
}
}
kvs = kvs[:j+1]
}
// Hash the key-value pairs.
h1 := hashKVs(kvs)
h2 := hashKVs(kvs) // Should be deterministic
if h1 != h2 {
t.Errorf("hash is not deterministic: %d != %d for kvs=%v", h1, h2, kvs)
}
if h1 == 0 && len(kvs) > 0 {
t.Errorf("hash should not be zero for non-empty input: kvs=%v", kvs)
}
// Test that different inputs produce different hashes (most of the time).
// This is a probabilistic test - collisions are possible but rare.
if len(kvs) > 0 {
// Modify one value slightly.
modifiedKvs := make([]KeyValue, len(kvs))
copy(modifiedKvs, kvs)
if len(modifiedKvs) > 0 {
switch modifiedKvs[0].Value.Type() {
case STRING:
modifiedKvs[0] = String(string(modifiedKvs[0].Key), modifiedKvs[0].Value.AsString()+"_modified")
case INT64:
modifiedKvs[0] = Int64(string(modifiedKvs[0].Key), modifiedKvs[0].Value.AsInt64()+1)
case BOOL:
modifiedKvs[0] = Bool(string(modifiedKvs[0].Key), !modifiedKvs[0].Value.AsBool())
case FLOAT64:
val := modifiedKvs[0].Value.AsFloat64()
if !math.IsNaN(val) && !math.IsInf(val, 0) {
modifiedKvs[0] = Float64(string(modifiedKvs[0].Key), val+1.0)
}
}
h3 := hashKVs(modifiedKvs)
// Note: We don't assert h1 != h3 because hash collisions are theoretically possible
// but we can log suspicious cases for manual review.
if h1 == h3 && !reflect.DeepEqual(kvs, modifiedKvs) {
t.Logf("Potential hash collision detected: original=%v, modified=%v, hash=%d", kvs, modifiedKvs, h1)
}
}
}
})
}

View File

@@ -0,0 +1,76 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package fnv provides an efficient and allocation free implementation of the
// FNV-1a, non-cryptographic hash functions created by Glenn Fowler, Landon
// Curt Noll, and Phong Vo. See
// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function.
//
// This implementation is provided as an alternative to "hash/fnv". The
// built-in implementation requires two allocations per Write for a string (one
// for the hash pointer and the other to convert a string to a []byte). This
// implementation is more efficientient and does not require any allocations.
package fnv // import "go.opentelemetry.io/otel/attribute/internal/fnv"
import (
"math"
)
// Taken from "hash/fnv". Verified at:
//
// - https://datatracker.ietf.org/doc/html/draft-eastlake-fnv-17.html
// - http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-param
const (
offset64 = 14695981039346656037
prime64 = 1099511628211
)
// Hash is an FNV-1a hash with appropriate hashing functions for methods.
type Hash uint64
// New returns a new initialized 64-bit FNV-1a Hash. Its value is laid out in
// big-endian byte order.
func New() Hash {
return offset64
}
func (h Hash) Uint64(val uint64) Hash {
v := uint64(h)
v = (v ^ ((val >> 56) & 0xFF)) * prime64
v = (v ^ ((val >> 48) & 0xFF)) * prime64
v = (v ^ ((val >> 40) & 0xFF)) * prime64
v = (v ^ ((val >> 32) & 0xFF)) * prime64
v = (v ^ ((val >> 24) & 0xFF)) * prime64
v = (v ^ ((val >> 16) & 0xFF)) * prime64
v = (v ^ ((val >> 8) & 0xFF)) * prime64
v = (v ^ ((val >> 0) & 0xFF)) * prime64
return Hash(v)
}
func (h Hash) Bool(val bool) Hash { // nolint:revive // val is not a flag.
if val {
return h.Uint64(1)
}
return h.Uint64(0)
}
func (h Hash) Float64(val float64) Hash {
return h.Uint64(math.Float64bits(val))
}
func (h Hash) Int64(val int64) Hash {
return h.Uint64(uint64(val)) // nolint:gosec // overflow doesn't matter since we are hashing.
}
func (h Hash) String(val string) Hash {
v := uint64(h)
for i := 0; i < len(val); i++ {
v ^= uint64(val[i])
v *= prime64
}
return Hash(v)
}

View File

@@ -0,0 +1,98 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package fnv
import (
"encoding/binary"
"hash/fnv"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestStringHashCorrectness(t *testing.T) {
input := []string{"", "a", "ab", "abc", "世界"}
refH := fnv.New64a()
for _, in := range input {
h := New()
got := h.String(in)
refH.Reset()
n, err := refH.Write([]byte(in))
require.NoError(t, err)
require.Equalf(t, len(in), n, "wrote only %d out of %d bytes", n, len(in))
want := refH.Sum64()
assert.Equal(t, want, uint64(got), in)
}
}
func TestUint64HashCorrectness(t *testing.T) {
input := []uint64{0, 10, 312984238623, 1024}
buf := make([]byte, 8)
refH := fnv.New64a()
for _, in := range input {
h := New()
got := h.Uint64(in)
refH.Reset()
binary.BigEndian.PutUint64(buf, in)
n, err := refH.Write(buf)
require.NoError(t, err)
require.Equalf(t, 8, n, "wrote only %d out of 8 bytes", n)
want := refH.Sum64()
assert.Equal(t, want, uint64(got), in)
}
}
func TestIntegrity(t *testing.T) {
data := []byte{'1', '2', 3, 4, 5, 6, 7, 8, 9, 10}
h0 := New()
want := h0.String(string(data))
h1 := New()
got := h1.String(string(data[:2]))
num := binary.BigEndian.Uint64(data[2:])
got = got.Uint64(num)
assert.Equal(t, want, got)
}
var result Hash
func BenchmarkStringKB(b *testing.B) {
b.SetBytes(1024)
data := make([]byte, 1024)
for i := range data {
data[i] = byte(i)
}
s := string(data)
h := New()
b.ReportAllocs()
b.ResetTimer()
for range b.N {
result = h.String(s)
}
}
func BenchmarkUint64KB(b *testing.B) {
b.SetBytes(8)
i := uint64(192386739218721)
h := New()
b.ReportAllocs()
b.ResetTimer()
for range b.N {
result = h.Uint64(i)
}
}

View File

@@ -9,6 +9,8 @@ import (
"reflect"
"slices"
"sort"
"go.opentelemetry.io/otel/attribute/internal/fnv"
)
type (
@@ -23,19 +25,19 @@ type (
// the Equals method to ensure stable equivalence checking.
//
// Users should also use the Distinct returned from Equivalent as a map key
// instead of a Set directly. In addition to that type providing guarantees
// on stable equivalence, it may also provide performance improvements.
// instead of a Set directly. Set has relatively poor performance when used
// as a map key compared to Distinct.
Set struct {
equivalent Distinct
hash fnv.Hash
data any
}
// Distinct is a unique identifier of a Set.
// Distinct is an identifier of a Set which is very likely to be unique.
//
// Distinct is designed to ensure equivalence stability: comparisons will
// return the same value across versions. For this reason, Distinct should
// always be used as a map key instead of a Set.
// Distinct should be used as a map key instead of a Set for to provide better
// performance for map operations.
Distinct struct {
iface any
hash fnv.Hash
}
// Sortable implements sort.Interface, used for sorting KeyValue.
@@ -46,6 +48,14 @@ type (
Sortable []KeyValue
)
// Compile time check these types remain comparable.
var (
_ = isComparable(Set{})
_ = isComparable(Distinct{})
)
func isComparable[T comparable](t T) T { return t }
var (
// keyValueType is used in computeDistinctReflect.
keyValueType = reflect.TypeOf(KeyValue{})
@@ -56,15 +66,13 @@ var (
//
// This is kept for backwards compatibility, but should not be used in new code.
userDefinedEmptySet = &Set{
equivalent: Distinct{
iface: [0]KeyValue{},
},
hash: fnv.New(),
data: [0]KeyValue{},
}
emptySet = Set{
equivalent: Distinct{
iface: [0]KeyValue{},
},
hash: fnv.New(),
data: [0]KeyValue{},
}
)
@@ -79,30 +87,28 @@ func EmptySet() *Set {
return userDefinedEmptySet
}
// reflectValue abbreviates reflect.ValueOf(d).
func (d Distinct) reflectValue() reflect.Value {
return reflect.ValueOf(d.iface)
}
// Valid reports whether this value refers to a valid Set.
func (d Distinct) Valid() bool {
return d.iface != nil
func (d Distinct) Valid() bool { return d.hash != 0 }
// reflectValue abbreviates reflect.ValueOf(d).
func (l Set) reflectValue() reflect.Value {
return reflect.ValueOf(l.data)
}
// Len returns the number of attributes in this set.
func (l *Set) Len() int {
if l == nil || !l.equivalent.Valid() {
if l == nil || l.hash == 0 {
return 0
}
return l.equivalent.reflectValue().Len()
return l.reflectValue().Len()
}
// Get returns the KeyValue at ordered position idx in this set.
func (l *Set) Get(idx int) (KeyValue, bool) {
if l == nil || !l.equivalent.Valid() {
if l == nil || l.hash == 0 {
return KeyValue{}, false
}
value := l.equivalent.reflectValue()
value := l.reflectValue()
if idx >= 0 && idx < value.Len() {
// Note: The Go compiler successfully avoids an allocation for
@@ -115,10 +121,10 @@ func (l *Set) Get(idx int) (KeyValue, bool) {
// Value returns the value of a specified key in this set.
func (l *Set) Value(k Key) (Value, bool) {
if l == nil || !l.equivalent.Valid() {
if l == nil || l.hash == 0 {
return Value{}, false
}
rValue := l.equivalent.reflectValue()
rValue := l.reflectValue()
vlen := rValue.Len()
idx := sort.Search(vlen, func(idx int) bool {
@@ -158,20 +164,29 @@ func (l *Set) ToSlice() []KeyValue {
return iter.ToSlice()
}
// Equivalent returns a value that may be used as a map key. The Distinct type
// guarantees that the result will equal the equivalent. Distinct value of any
// Equivalent returns a value that may be used as a map key. Equal Distinct
// values are very likely to be equivalent attribute Sets. Distinct value of any
// attribute set with the same elements as this, where sets are made unique by
// choosing the last value in the input for any given key.
func (l *Set) Equivalent() Distinct {
if l == nil || !l.equivalent.Valid() {
return emptySet.equivalent
if l == nil || l.hash == 0 {
return Distinct{hash: emptySet.hash}
}
return l.equivalent
return Distinct{hash: l.hash}
}
// Equals reports whether the argument set is equivalent to this set.
func (l *Set) Equals(o *Set) bool {
return l.Equivalent() == o.Equivalent()
if l.Equivalent() != o.Equivalent() {
return false
}
if l == nil || l.hash == 0 {
l = &emptySet
}
if o == nil || o.hash == 0 {
o = &emptySet
}
return l.data == o.data
}
// Encoded returns the encoded form of this set, according to encoder.
@@ -241,10 +256,10 @@ func NewSetWithFiltered(kvs []KeyValue, filter Filter) (Set, []KeyValue) {
if filter != nil {
if div := filteredToFront(kvs, filter); div != 0 {
return Set{equivalent: computeDistinct(kvs[div:])}, kvs[:div]
return newSet(kvs[div:]), kvs[:div]
}
}
return Set{equivalent: computeDistinct(kvs)}, nil
return newSet(kvs), nil
}
// NewSetWithSortableFiltered returns a new Set.
@@ -324,7 +339,7 @@ func (l *Set) Filter(re Filter) (Set, []KeyValue) {
if first == 0 {
// It is safe to assume len(slice) >= 1 given we found at least one
// attribute above that needs to be filtered out.
return Set{equivalent: computeDistinct(slice[1:])}, slice[:1]
return newSet(slice[1:]), slice[:1]
}
// Move the filtered slice[first] to the front (preserving order).
@@ -334,25 +349,24 @@ func (l *Set) Filter(re Filter) (Set, []KeyValue) {
// Do not re-evaluate re(slice[first+1:]).
div := filteredToFront(slice[1:first+1], re) + 1
return Set{equivalent: computeDistinct(slice[div:])}, slice[:div]
return newSet(slice[div:]), slice[:div]
}
// computeDistinct returns a Distinct using either the fixed- or
// reflect-oriented code path, depending on the size of the input. The input
// slice is assumed to already be sorted and de-duplicated.
func computeDistinct(kvs []KeyValue) Distinct {
iface := computeDistinctFixed(kvs)
if iface == nil {
iface = computeDistinctReflect(kvs)
// newSet returns a new set based on the sorted and uniqued kvs.
func newSet(kvs []KeyValue) Set {
s := Set{
hash: hashKVs(kvs),
data: computeDataFixed(kvs),
}
return Distinct{
iface: iface,
if s.data == nil {
s.data = computeDataReflect(kvs)
}
return s
}
// computeDistinctFixed computes a Distinct for small slices. It returns nil
// if the input is too large for this code path.
func computeDistinctFixed(kvs []KeyValue) any {
// computeDataFixed computes a Set data for small slices. It returns nil if the
// input is too large for this code path.
func computeDataFixed(kvs []KeyValue) any {
switch len(kvs) {
case 1:
return [1]KeyValue(kvs)
@@ -379,9 +393,9 @@ func computeDistinctFixed(kvs []KeyValue) any {
}
}
// computeDistinctReflect computes a Distinct using reflection, works for any
// size input.
func computeDistinctReflect(kvs []KeyValue) any {
// computeDataReflect computes a Set data using reflection, works for any size
// input.
func computeDataReflect(kvs []KeyValue) any {
at := reflect.New(reflect.ArrayOf(len(kvs), keyValueType)).Elem()
for i, keyValue := range kvs {
*(at.Index(i).Addr().Interface().(*KeyValue)) = keyValue
@@ -391,7 +405,7 @@ func computeDistinctReflect(kvs []KeyValue) any {
// MarshalJSON returns the JSON encoding of the Set.
func (l *Set) MarshalJSON() ([]byte, error) {
return json.Marshal(l.equivalent.iface)
return json.Marshal(l.data)
}
// MarshalLog is the marshaling function used by the logging system to represent this Set.