You've already forked opentelemetry-go
mirror of
https://github.com/open-telemetry/opentelemetry-go.git
synced 2026-06-03 18:35:08 +02:00
Use Set hash in Distinct (2nd attempt) (#7175)
Re-opening https://github.com/open-telemetry/opentelemetry-go/pull/5028 with new benchmarks. For cases with 10 attributes, this reduces the overhead of metric measurements by ~80-90% (depending on lock contention). It introduces a small probability of collision for attribute sets in the metrics SDK. For an instrument with 1 million different attribute sets, the probability of a collision is approximately 2 * 10^-8. For a more "normal" cardinality of 1000 on an instrument, it is approximately 2 * 10^-17. ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/attribute cpu: Intel(R) Xeon(R) CPU @ 2.20GHz │ main.txt │ hash.txt │ │ sec/op │ sec/op vs base │ EquivalentMapAccess/Empty-24 32.01n ± 2% 10.12n ± 4% -68.37% (p=0.002 n=6) EquivalentMapAccess/1_string_attribute-24 106.25n ± 2% 10.01n ± 5% -90.58% (p=0.002 n=6) EquivalentMapAccess/10_string_attributes-24 826.250n ± 1% 9.982n ± 11% -98.79% (p=0.002 n=6) EquivalentMapAccess/1_int_attribute-24 106.65n ± 2% 10.13n ± 3% -90.50% (p=0.002 n=6) EquivalentMapAccess/10_int_attributes-24 833.25n ± 2% 10.04n ± 5% -98.80% (p=0.002 n=6) geomean 190.3n 10.06n -94.72% ``` Parallel benchmarks: ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/metric cpu: Intel(R) Xeon(R) CPU @ 2.20GHz │ main24.txt │ new24.txt │ │ sec/op │ sec/op vs base │ SyncMeasure/NoView/Int64Counter/Attributes/0-24 288.4n ± 13% 267.0n ± 16% ~ (p=0.180 n=6) SyncMeasure/NoView/Int64Counter/Attributes/1-24 372.7n ± 24% 303.3n ± 6% -18.61% (p=0.002 n=6) SyncMeasure/NoView/Int64Counter/Attributes/10-24 1862.5n ± 11% 302.2n ± 6% -83.77% (p=0.002 n=6) SyncMeasure/NoView/Float64Counter/Attributes/0-24 288.2n ± 5% 291.8n ± 14% ~ (p=0.589 n=6) SyncMeasure/NoView/Float64Counter/Attributes/1-24 374.8n ± 22% 326.2n ± 15% -12.98% (p=0.002 n=6) SyncMeasure/NoView/Float64Counter/Attributes/10-24 1984.0n ± 10% 277.9n ± 15% -85.99% (p=0.002 n=6) SyncMeasure/NoView/Int64UpDownCounter/Attributes/0-24 286.8n ± 13% 279.4n ± 14% ~ (p=0.818 n=6) SyncMeasure/NoView/Int64UpDownCounter/Attributes/1-24 415.4n ± 14% 309.5n ± 11% -25.47% (p=0.002 n=6) SyncMeasure/NoView/Int64UpDownCounter/Attributes/10-24 1923.0n ± 19% 294.1n ± 17% -84.71% (p=0.002 n=6) SyncMeasure/NoView/Float64UpDownCounter/Attributes/0-24 284.9n ± 5% 271.6n ± 11% ~ (p=0.240 n=6) SyncMeasure/NoView/Float64UpDownCounter/Attributes/1-24 382.9n ± 23% 295.7n ± 13% -22.78% (p=0.002 n=6) SyncMeasure/NoView/Float64UpDownCounter/Attributes/10-24 1787.0n ± 28% 289.2n ± 12% -83.81% (p=0.002 n=6) SyncMeasure/NoView/Int64Histogram/Attributes/0-24 283.4n ± 8% 269.9n ± 9% ~ (p=0.589 n=6) SyncMeasure/NoView/Int64Histogram/Attributes/1-24 300.7n ± 8% 270.1n ± 15% -10.16% (p=0.026 n=6) SyncMeasure/NoView/Int64Histogram/Attributes/10-24 1046.8n ± 24% 299.2n ± 16% -71.42% (p=0.002 n=6) SyncMeasure/NoView/Float64Histogram/Attributes/0-24 264.3n ± 12% 295.9n ± 5% +11.93% (p=0.026 n=6) SyncMeasure/NoView/Float64Histogram/Attributes/1-24 321.0n ± 8% 269.4n ± 11% -16.09% (p=0.002 n=6) SyncMeasure/NoView/Float64Histogram/Attributes/10-24 1052.2n ± 10% 274.6n ± 5% -73.90% (p=0.002 n=6) geomean 540.0n 287.7n -46.72% ``` Single-threaded benchmarks: ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/metric cpu: Intel(R) Xeon(R) CPU @ 2.20GHz │ main1.txt │ new1.txt │ │ sec/op │ sec/op vs base │ SyncMeasure/NoView/Int64Counter/Attributes/0 130.95n ± 1% 97.99n ± 21% -25.17% (p=0.002 n=6) SyncMeasure/NoView/Int64Counter/Attributes/1 300.8n ± 7% 104.6n ± 3% -65.21% (p=0.002 n=6) SyncMeasure/NoView/Int64Counter/Attributes/10 1646.0n ± 2% 105.8n ± 2% -93.58% (p=0.002 n=6) SyncMeasure/NoView/Float64Counter/Attributes/0 132.65n ± 1% 99.28n ± 4% -25.16% (p=0.002 n=6) SyncMeasure/NoView/Float64Counter/Attributes/1 295.4n ± 3% 107.7n ± 3% -63.54% (p=0.002 n=6) SyncMeasure/NoView/Float64Counter/Attributes/10 1620.0n ± 1% 109.6n ± 4% -93.23% (p=0.002 n=6) SyncMeasure/NoView/Int64UpDownCounter/Attributes/0 132.85n ± 80% 99.34n ± 1% -25.22% (p=0.002 n=6) SyncMeasure/NoView/Int64UpDownCounter/Attributes/1 300.4n ± 1% 106.0n ± 1% -64.71% (p=0.002 n=6) SyncMeasure/NoView/Int64UpDownCounter/Attributes/10 1622.0n ± 1% 105.8n ± 1% -93.48% (p=0.002 n=6) SyncMeasure/NoView/Float64UpDownCounter/Attributes/0 134.90n ± 51% 99.16n ± 4% -26.49% (p=0.002 n=6) SyncMeasure/NoView/Float64UpDownCounter/Attributes/1 312.4n ± 34% 107.8n ± 2% -65.51% (p=0.002 n=6) SyncMeasure/NoView/Float64UpDownCounter/Attributes/10 1613.0n ± 23% 106.1n ± 1% -93.43% (p=0.002 n=6) SyncMeasure/NoView/Int64Histogram/Attributes/0 103.50n ± 17% 88.53n ± 1% -14.46% (p=0.002 n=6) SyncMeasure/NoView/Int64Histogram/Attributes/1 199.50n ± 16% 95.44n ± 2% -52.16% (p=0.002 n=6) SyncMeasure/NoView/Int64Histogram/Attributes/10 878.70n ± 2% 95.78n ± 2% -89.10% (p=0.002 n=6) SyncMeasure/NoView/Float64Histogram/Attributes/0 108.55n ± 54% 88.45n ± 1% -18.51% (p=0.002 n=6) SyncMeasure/NoView/Float64Histogram/Attributes/1 257.30n ± 14% 95.05n ± 2% -63.06% (p=0.002 n=6) SyncMeasure/NoView/Float64Histogram/Attributes/10 882.70n ± 18% 96.28n ± 1% -89.09% (p=0.002 n=6) geomean 355.2n 100.3n -71.77% ``` --------- Co-authored-by: Tyler Yahn <MrAlias@users.noreply.github.com> Co-authored-by: Robert Pająk <pellared@hotmail.com>
This commit is contained in:
@@ -0,0 +1,92 @@
|
||||
// Copyright The OpenTelemetry Authors
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package attribute // import "go.opentelemetry.io/otel/attribute"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute/internal/fnv"
|
||||
)
|
||||
|
||||
// Type identifiers. These identifiers are hashed before the value of the
|
||||
// corresponding type. This is done to distinguish values that are hashed with
|
||||
// the same value representation (e.g. `int64(1)` and `true`, []int64{0} and
|
||||
// int64(0)).
|
||||
//
|
||||
// These are all 8 byte length strings converted to a uint64 representation. A
|
||||
// uint64 is used instead of the string directly as an optimization, it avoids
|
||||
// the for loop in [fnv] which adds minor overhead.
|
||||
const (
|
||||
boolID uint64 = 7953749933313450591 // "_boolean" (little endian)
|
||||
int64ID uint64 = 7592915492740740150 // "64_bit_i" (little endian)
|
||||
float64ID uint64 = 7376742710626956342 // "64_bit_f" (little endian)
|
||||
stringID uint64 = 6874584755375207263 // "_string_" (little endian)
|
||||
boolSliceID uint64 = 6875993255270243167 // "_[]bool_" (little endian)
|
||||
int64SliceID uint64 = 3762322556277578591 // "_[]int64" (little endian)
|
||||
float64SliceID uint64 = 7308324551835016539 // "[]double" (little endian)
|
||||
stringSliceID uint64 = 7453010373645655387 // "[]string" (little endian)
|
||||
)
|
||||
|
||||
// hashKVs returns a new FNV-1a hash of kvs.
|
||||
func hashKVs(kvs []KeyValue) fnv.Hash {
|
||||
h := fnv.New()
|
||||
for _, kv := range kvs {
|
||||
h = hashKV(h, kv)
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
||||
// hashKV returns the FNV-1a hash of kv with h as the base.
|
||||
func hashKV(h fnv.Hash, kv KeyValue) fnv.Hash {
|
||||
h = h.String(string(kv.Key))
|
||||
|
||||
switch kv.Value.Type() {
|
||||
case BOOL:
|
||||
h = h.Uint64(boolID)
|
||||
h = h.Uint64(kv.Value.numeric)
|
||||
case INT64:
|
||||
h = h.Uint64(int64ID)
|
||||
h = h.Uint64(kv.Value.numeric)
|
||||
case FLOAT64:
|
||||
h = h.Uint64(float64ID)
|
||||
// Assumes numeric stored with math.Float64bits.
|
||||
h = h.Uint64(kv.Value.numeric)
|
||||
case STRING:
|
||||
h = h.Uint64(stringID)
|
||||
h = h.String(kv.Value.stringly)
|
||||
case BOOLSLICE:
|
||||
h = h.Uint64(boolSliceID)
|
||||
rv := reflect.ValueOf(kv.Value.slice)
|
||||
for i := 0; i < rv.Len(); i++ {
|
||||
h = h.Bool(rv.Index(i).Bool())
|
||||
}
|
||||
case INT64SLICE:
|
||||
h = h.Uint64(int64SliceID)
|
||||
rv := reflect.ValueOf(kv.Value.slice)
|
||||
for i := 0; i < rv.Len(); i++ {
|
||||
h = h.Int64(rv.Index(i).Int())
|
||||
}
|
||||
case FLOAT64SLICE:
|
||||
h = h.Uint64(float64SliceID)
|
||||
rv := reflect.ValueOf(kv.Value.slice)
|
||||
for i := 0; i < rv.Len(); i++ {
|
||||
h = h.Float64(rv.Index(i).Float())
|
||||
}
|
||||
case STRINGSLICE:
|
||||
h = h.Uint64(stringSliceID)
|
||||
rv := reflect.ValueOf(kv.Value.slice)
|
||||
for i := 0; i < rv.Len(); i++ {
|
||||
h = h.String(rv.Index(i).String())
|
||||
}
|
||||
case INVALID:
|
||||
default:
|
||||
// Logging is an alternative, but using the internal logger here
|
||||
// causes an import cycle so it is not done.
|
||||
v := kv.Value.AsInterface()
|
||||
msg := fmt.Sprintf("unknown value type: %[1]v (%[1]T)", v)
|
||||
panic(msg)
|
||||
}
|
||||
return h
|
||||
}
|
||||
@@ -0,0 +1,317 @@
|
||||
// Copyright The OpenTelemetry Authors
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package attribute // import "go.opentelemetry.io/otel/attribute"
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute/internal/fnv"
|
||||
)
|
||||
|
||||
// keyVals is all the KeyValue generators that are used for testing. This is
|
||||
// not []KeyValue so different keys can be used with the test Values.
|
||||
var keyVals = []func(string) KeyValue{
|
||||
func(k string) KeyValue { return Bool(k, true) },
|
||||
func(k string) KeyValue { return Bool(k, false) },
|
||||
func(k string) KeyValue { return BoolSlice(k, []bool{false, true}) },
|
||||
func(k string) KeyValue { return BoolSlice(k, []bool{true, true, false}) },
|
||||
func(k string) KeyValue { return Int(k, -1278) },
|
||||
func(k string) KeyValue { return Int(k, 0) }, // Should be different than false above.
|
||||
func(k string) KeyValue { return IntSlice(k, []int{3, 23, 21, -8, 0}) },
|
||||
func(k string) KeyValue { return IntSlice(k, []int{1}) },
|
||||
func(k string) KeyValue { return Int64(k, 1) }, // Should be different from true and []int{1}.
|
||||
func(k string) KeyValue { return Int64(k, 29369) },
|
||||
func(k string) KeyValue { return Int64Slice(k, []int64{3826, -38, -29, -1}) },
|
||||
func(k string) KeyValue { return Int64Slice(k, []int64{8, -328, 29, 0}) },
|
||||
func(k string) KeyValue { return Float64(k, -0.3812381) },
|
||||
func(k string) KeyValue { return Float64(k, 1e32) },
|
||||
func(k string) KeyValue { return Float64Slice(k, []float64{0.1, -3.8, -29., 0.3321}) },
|
||||
func(k string) KeyValue { return Float64Slice(k, []float64{-13e8, -32.8, 4., 1e28}) },
|
||||
func(k string) KeyValue { return String(k, "foo") },
|
||||
func(k string) KeyValue { return String(k, "bar") },
|
||||
func(k string) KeyValue { return StringSlice(k, []string{"foo", "bar", "baz"}) },
|
||||
func(k string) KeyValue { return StringSlice(k, []string{"[]i1"}) },
|
||||
}
|
||||
|
||||
func TestHashKVsEquality(t *testing.T) {
|
||||
type testcase struct {
|
||||
hash fnv.Hash
|
||||
kvs []KeyValue
|
||||
}
|
||||
|
||||
keys := []string{"k0", "k1"}
|
||||
|
||||
// Test all combinations up to length 3.
|
||||
n := len(keyVals)
|
||||
result := make([]testcase, 0, 1+len(keys)*(n+(n*n)+(n*n*n)))
|
||||
|
||||
result = append(result, testcase{hashKVs(nil), nil})
|
||||
|
||||
for _, key := range keys {
|
||||
for i := 0; i < len(keyVals); i++ {
|
||||
kvs := []KeyValue{keyVals[i](key)}
|
||||
hash := hashKVs(kvs)
|
||||
result = append(result, testcase{hash, kvs})
|
||||
|
||||
for j := 0; j < len(keyVals); j++ {
|
||||
kvs := []KeyValue{
|
||||
keyVals[i](key),
|
||||
keyVals[j](key),
|
||||
}
|
||||
hash := hashKVs(kvs)
|
||||
result = append(result, testcase{hash, kvs})
|
||||
|
||||
for k := 0; k < len(keyVals); k++ {
|
||||
kvs := []KeyValue{
|
||||
keyVals[i](key),
|
||||
keyVals[j](key),
|
||||
keyVals[k](key),
|
||||
}
|
||||
hash := hashKVs(kvs)
|
||||
result = append(result, testcase{hash, kvs})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < len(result); i++ {
|
||||
hI, kvI := result[i].hash, result[i].kvs
|
||||
for j := 0; j < len(result); j++ {
|
||||
hJ, kvJ := result[j].hash, result[j].kvs
|
||||
m := msg{i: i, j: j, hI: hI, hJ: hJ, kvI: kvI, kvJ: kvJ}
|
||||
if i == j {
|
||||
m.cmp = "=="
|
||||
if hI != hJ {
|
||||
t.Errorf("hashes not equal: %s", m)
|
||||
}
|
||||
} else {
|
||||
m.cmp = "!="
|
||||
if hI == hJ {
|
||||
// Do not use testify/assert here. It is slow.
|
||||
t.Errorf("hashes equal: %s", m)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type msg struct {
|
||||
cmp string
|
||||
i, j int
|
||||
hI, hJ fnv.Hash
|
||||
kvI, kvJ []KeyValue
|
||||
}
|
||||
|
||||
func (m msg) String() string {
|
||||
return fmt.Sprintf(
|
||||
"(%d: %d)%s %s (%d: %d)%s",
|
||||
m.i, m.hI, slice(m.kvI), m.cmp, m.j, m.hJ, slice(m.kvJ),
|
||||
)
|
||||
}
|
||||
|
||||
func slice(kvs []KeyValue) string {
|
||||
if len(kvs) == 0 {
|
||||
return "[]"
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
_, _ = b.WriteRune('[')
|
||||
_, _ = b.WriteString(string(kvs[0].Key))
|
||||
_, _ = b.WriteRune(':')
|
||||
_, _ = b.WriteString(kvs[0].Value.Emit())
|
||||
for _, kv := range kvs[1:] {
|
||||
_, _ = b.WriteRune(',')
|
||||
_, _ = b.WriteString(string(kv.Key))
|
||||
_, _ = b.WriteRune(':')
|
||||
_, _ = b.WriteString(kv.Value.Emit())
|
||||
}
|
||||
_, _ = b.WriteRune(']')
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func BenchmarkHashKVs(b *testing.B) {
|
||||
attrs := make([]KeyValue, len(keyVals))
|
||||
for i := range keyVals {
|
||||
attrs[i] = keyVals[i]("k")
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
for b.Loop() {
|
||||
hashKVs(attrs)
|
||||
}
|
||||
}
|
||||
|
||||
func FuzzHashKVs(f *testing.F) {
|
||||
// Add seed inputs to ensure coverage of edge cases.
|
||||
f.Add("", "", "", "", "", "", 0, int64(0), 0.0, false, uint8(0))
|
||||
f.Add("key", "value", "🌍", "test", "bool", "float", -1, int64(-1), -1.0, true, uint8(1))
|
||||
f.Add("duplicate", "duplicate", "duplicate", "duplicate", "duplicate", "NaN",
|
||||
0, int64(0), math.Inf(1), false, uint8(2))
|
||||
|
||||
f.Fuzz(func(t *testing.T, k1, k2, k3, k4, k5, s string, i int, i64 int64, fVal float64, b bool, sliceType uint8) {
|
||||
// Test variable number of attributes (0-10).
|
||||
numAttrs := len(k1) % 11 // Use key length to determine number of attributes.
|
||||
if numAttrs == 0 && k1 == "" {
|
||||
// Test empty set.
|
||||
h := hashKVs(nil)
|
||||
if h == 0 {
|
||||
t.Error("hash of empty slice should not be zero")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
var kvs []KeyValue
|
||||
|
||||
// Add basic types.
|
||||
if numAttrs > 0 {
|
||||
kvs = append(kvs, String(k1, s))
|
||||
}
|
||||
if numAttrs > 1 {
|
||||
kvs = append(kvs, Int(k2, i))
|
||||
}
|
||||
if numAttrs > 2 {
|
||||
kvs = append(kvs, Int64(k3, i64))
|
||||
}
|
||||
if numAttrs > 3 {
|
||||
kvs = append(kvs, Float64(k4, fVal))
|
||||
}
|
||||
if numAttrs > 4 {
|
||||
kvs = append(kvs, Bool(k5, b))
|
||||
}
|
||||
|
||||
// Add slice types based on sliceType parameter
|
||||
if numAttrs > 5 {
|
||||
switch sliceType % 4 {
|
||||
case 0:
|
||||
// Test BoolSlice with variable length.
|
||||
bools := make([]bool, len(s)%5) // 0-4 elements
|
||||
for i := range bools {
|
||||
bools[i] = (i+len(k1))%2 == 0
|
||||
}
|
||||
kvs = append(kvs, BoolSlice("boolslice", bools))
|
||||
case 1:
|
||||
// Test IntSlice with variable length.
|
||||
ints := make([]int, len(s)%6) // 0-5 elements
|
||||
for i := range ints {
|
||||
ints[i] = i + len(k2)
|
||||
}
|
||||
kvs = append(kvs, IntSlice("intslice", ints))
|
||||
case 2:
|
||||
// Test Int64Slice with variable length.
|
||||
int64s := make([]int64, len(s)%4) // 0-3 elements
|
||||
for i := range int64s {
|
||||
int64s[i] = int64(i) + i64
|
||||
}
|
||||
kvs = append(kvs, Int64Slice("int64slice", int64s))
|
||||
case 3:
|
||||
// Test Float64Slice with variable length and special values.
|
||||
float64s := make([]float64, len(s)%5) // 0-4 elements
|
||||
for i := range float64s {
|
||||
switch i % 4 {
|
||||
case 0:
|
||||
float64s[i] = fVal
|
||||
case 1:
|
||||
float64s[i] = math.Inf(1) // +Inf
|
||||
case 2:
|
||||
float64s[i] = math.Inf(-1) // -Inf
|
||||
case 3:
|
||||
float64s[i] = math.NaN() // NaN
|
||||
}
|
||||
}
|
||||
kvs = append(kvs, Float64Slice("float64slice", float64s))
|
||||
}
|
||||
}
|
||||
|
||||
// Add StringSlice.
|
||||
if numAttrs > 6 {
|
||||
strings := make([]string, len(k1)%4) // 0-3 elements
|
||||
for i := range strings {
|
||||
strings[i] = fmt.Sprintf("%s_%d", s, i)
|
||||
}
|
||||
kvs = append(kvs, StringSlice("stringslice", strings))
|
||||
}
|
||||
|
||||
// Test duplicate keys (should be handled by Set construction).
|
||||
if numAttrs > 7 && k1 != "" {
|
||||
kvs = append(kvs, String(k1, "duplicate_key_value"))
|
||||
}
|
||||
|
||||
// Add more attributes with Unicode keys.
|
||||
if numAttrs > 8 {
|
||||
kvs = append(kvs, String("🔑", "unicode_key"))
|
||||
}
|
||||
if numAttrs > 9 {
|
||||
kvs = append(kvs, String("empty", ""))
|
||||
}
|
||||
|
||||
// Sort to ensure consistent ordering (as Set would do).
|
||||
slices.SortFunc(kvs, func(a, b KeyValue) int {
|
||||
return cmp.Compare(string(a.Key), string(b.Key))
|
||||
})
|
||||
|
||||
// Remove duplicates (as Set will do).
|
||||
if len(kvs) > 1 {
|
||||
j := 0
|
||||
for i := 1; i < len(kvs); i++ {
|
||||
if kvs[j].Key != kvs[i].Key {
|
||||
j++
|
||||
kvs[j] = kvs[i]
|
||||
} else {
|
||||
// Keep the later value for duplicate keys.
|
||||
kvs[j] = kvs[i]
|
||||
}
|
||||
}
|
||||
kvs = kvs[:j+1]
|
||||
}
|
||||
|
||||
// Hash the key-value pairs.
|
||||
h1 := hashKVs(kvs)
|
||||
h2 := hashKVs(kvs) // Should be deterministic
|
||||
|
||||
if h1 != h2 {
|
||||
t.Errorf("hash is not deterministic: %d != %d for kvs=%v", h1, h2, kvs)
|
||||
}
|
||||
|
||||
if h1 == 0 && len(kvs) > 0 {
|
||||
t.Errorf("hash should not be zero for non-empty input: kvs=%v", kvs)
|
||||
}
|
||||
|
||||
// Test that different inputs produce different hashes (most of the time).
|
||||
// This is a probabilistic test - collisions are possible but rare.
|
||||
if len(kvs) > 0 {
|
||||
// Modify one value slightly.
|
||||
modifiedKvs := make([]KeyValue, len(kvs))
|
||||
copy(modifiedKvs, kvs)
|
||||
if len(modifiedKvs) > 0 {
|
||||
switch modifiedKvs[0].Value.Type() {
|
||||
case STRING:
|
||||
modifiedKvs[0] = String(string(modifiedKvs[0].Key), modifiedKvs[0].Value.AsString()+"_modified")
|
||||
case INT64:
|
||||
modifiedKvs[0] = Int64(string(modifiedKvs[0].Key), modifiedKvs[0].Value.AsInt64()+1)
|
||||
case BOOL:
|
||||
modifiedKvs[0] = Bool(string(modifiedKvs[0].Key), !modifiedKvs[0].Value.AsBool())
|
||||
case FLOAT64:
|
||||
val := modifiedKvs[0].Value.AsFloat64()
|
||||
if !math.IsNaN(val) && !math.IsInf(val, 0) {
|
||||
modifiedKvs[0] = Float64(string(modifiedKvs[0].Key), val+1.0)
|
||||
}
|
||||
}
|
||||
|
||||
h3 := hashKVs(modifiedKvs)
|
||||
// Note: We don't assert h1 != h3 because hash collisions are theoretically possible
|
||||
// but we can log suspicious cases for manual review.
|
||||
if h1 == h3 && !reflect.DeepEqual(kvs, modifiedKvs) {
|
||||
t.Logf("Potential hash collision detected: original=%v, modified=%v, hash=%d", kvs, modifiedKvs, h1)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
// Copyright The OpenTelemetry Authors
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package fnv provides an efficient and allocation free implementation of the
|
||||
// FNV-1a, non-cryptographic hash functions created by Glenn Fowler, Landon
|
||||
// Curt Noll, and Phong Vo. See
|
||||
// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function.
|
||||
//
|
||||
// This implementation is provided as an alternative to "hash/fnv". The
|
||||
// built-in implementation requires two allocations per Write for a string (one
|
||||
// for the hash pointer and the other to convert a string to a []byte). This
|
||||
// implementation is more efficientient and does not require any allocations.
|
||||
package fnv // import "go.opentelemetry.io/otel/attribute/internal/fnv"
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
// Taken from "hash/fnv". Verified at:
|
||||
//
|
||||
// - https://datatracker.ietf.org/doc/html/draft-eastlake-fnv-17.html
|
||||
// - http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-param
|
||||
const (
|
||||
offset64 = 14695981039346656037
|
||||
prime64 = 1099511628211
|
||||
)
|
||||
|
||||
// Hash is an FNV-1a hash with appropriate hashing functions for methods.
|
||||
type Hash uint64
|
||||
|
||||
// New returns a new initialized 64-bit FNV-1a Hash. Its value is laid out in
|
||||
// big-endian byte order.
|
||||
func New() Hash {
|
||||
return offset64
|
||||
}
|
||||
|
||||
func (h Hash) Uint64(val uint64) Hash {
|
||||
v := uint64(h)
|
||||
v = (v ^ ((val >> 56) & 0xFF)) * prime64
|
||||
v = (v ^ ((val >> 48) & 0xFF)) * prime64
|
||||
v = (v ^ ((val >> 40) & 0xFF)) * prime64
|
||||
v = (v ^ ((val >> 32) & 0xFF)) * prime64
|
||||
v = (v ^ ((val >> 24) & 0xFF)) * prime64
|
||||
v = (v ^ ((val >> 16) & 0xFF)) * prime64
|
||||
v = (v ^ ((val >> 8) & 0xFF)) * prime64
|
||||
v = (v ^ ((val >> 0) & 0xFF)) * prime64
|
||||
return Hash(v)
|
||||
}
|
||||
|
||||
func (h Hash) Bool(val bool) Hash { // nolint:revive // val is not a flag.
|
||||
if val {
|
||||
return h.Uint64(1)
|
||||
}
|
||||
return h.Uint64(0)
|
||||
}
|
||||
|
||||
func (h Hash) Float64(val float64) Hash {
|
||||
return h.Uint64(math.Float64bits(val))
|
||||
}
|
||||
|
||||
func (h Hash) Int64(val int64) Hash {
|
||||
return h.Uint64(uint64(val)) // nolint:gosec // overflow doesn't matter since we are hashing.
|
||||
}
|
||||
|
||||
func (h Hash) String(val string) Hash {
|
||||
v := uint64(h)
|
||||
for i := 0; i < len(val); i++ {
|
||||
v ^= uint64(val[i])
|
||||
v *= prime64
|
||||
}
|
||||
return Hash(v)
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
// Copyright The OpenTelemetry Authors
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package fnv
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"hash/fnv"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestStringHashCorrectness(t *testing.T) {
|
||||
input := []string{"", "a", "ab", "abc", "世界"}
|
||||
|
||||
refH := fnv.New64a()
|
||||
for _, in := range input {
|
||||
h := New()
|
||||
got := h.String(in)
|
||||
|
||||
refH.Reset()
|
||||
n, err := refH.Write([]byte(in))
|
||||
require.NoError(t, err)
|
||||
require.Equalf(t, len(in), n, "wrote only %d out of %d bytes", n, len(in))
|
||||
want := refH.Sum64()
|
||||
|
||||
assert.Equal(t, want, uint64(got), in)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUint64HashCorrectness(t *testing.T) {
|
||||
input := []uint64{0, 10, 312984238623, 1024}
|
||||
|
||||
buf := make([]byte, 8)
|
||||
refH := fnv.New64a()
|
||||
for _, in := range input {
|
||||
h := New()
|
||||
got := h.Uint64(in)
|
||||
|
||||
refH.Reset()
|
||||
binary.BigEndian.PutUint64(buf, in)
|
||||
n, err := refH.Write(buf)
|
||||
require.NoError(t, err)
|
||||
require.Equalf(t, 8, n, "wrote only %d out of 8 bytes", n)
|
||||
want := refH.Sum64()
|
||||
|
||||
assert.Equal(t, want, uint64(got), in)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegrity(t *testing.T) {
|
||||
data := []byte{'1', '2', 3, 4, 5, 6, 7, 8, 9, 10}
|
||||
h0 := New()
|
||||
want := h0.String(string(data))
|
||||
|
||||
h1 := New()
|
||||
got := h1.String(string(data[:2]))
|
||||
num := binary.BigEndian.Uint64(data[2:])
|
||||
got = got.Uint64(num)
|
||||
|
||||
assert.Equal(t, want, got)
|
||||
}
|
||||
|
||||
var result Hash
|
||||
|
||||
func BenchmarkStringKB(b *testing.B) {
|
||||
b.SetBytes(1024)
|
||||
data := make([]byte, 1024)
|
||||
for i := range data {
|
||||
data[i] = byte(i)
|
||||
}
|
||||
s := string(data)
|
||||
h := New()
|
||||
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
result = h.String(s)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkUint64KB(b *testing.B) {
|
||||
b.SetBytes(8)
|
||||
i := uint64(192386739218721)
|
||||
h := New()
|
||||
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
result = h.Uint64(i)
|
||||
}
|
||||
}
|
||||
+67
-53
@@ -9,6 +9,8 @@ import (
|
||||
"reflect"
|
||||
"slices"
|
||||
"sort"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute/internal/fnv"
|
||||
)
|
||||
|
||||
type (
|
||||
@@ -23,19 +25,19 @@ type (
|
||||
// the Equals method to ensure stable equivalence checking.
|
||||
//
|
||||
// Users should also use the Distinct returned from Equivalent as a map key
|
||||
// instead of a Set directly. In addition to that type providing guarantees
|
||||
// on stable equivalence, it may also provide performance improvements.
|
||||
// instead of a Set directly. Set has relatively poor performance when used
|
||||
// as a map key compared to Distinct.
|
||||
Set struct {
|
||||
equivalent Distinct
|
||||
hash fnv.Hash
|
||||
data any
|
||||
}
|
||||
|
||||
// Distinct is a unique identifier of a Set.
|
||||
// Distinct is an identifier of a Set which is very likely to be unique.
|
||||
//
|
||||
// Distinct is designed to ensure equivalence stability: comparisons will
|
||||
// return the same value across versions. For this reason, Distinct should
|
||||
// always be used as a map key instead of a Set.
|
||||
// Distinct should be used as a map key instead of a Set for to provide better
|
||||
// performance for map operations.
|
||||
Distinct struct {
|
||||
iface any
|
||||
hash fnv.Hash
|
||||
}
|
||||
|
||||
// Sortable implements sort.Interface, used for sorting KeyValue.
|
||||
@@ -46,6 +48,14 @@ type (
|
||||
Sortable []KeyValue
|
||||
)
|
||||
|
||||
// Compile time check these types remain comparable.
|
||||
var (
|
||||
_ = isComparable(Set{})
|
||||
_ = isComparable(Distinct{})
|
||||
)
|
||||
|
||||
func isComparable[T comparable](t T) T { return t }
|
||||
|
||||
var (
|
||||
// keyValueType is used in computeDistinctReflect.
|
||||
keyValueType = reflect.TypeOf(KeyValue{})
|
||||
@@ -56,15 +66,13 @@ var (
|
||||
//
|
||||
// This is kept for backwards compatibility, but should not be used in new code.
|
||||
userDefinedEmptySet = &Set{
|
||||
equivalent: Distinct{
|
||||
iface: [0]KeyValue{},
|
||||
},
|
||||
hash: fnv.New(),
|
||||
data: [0]KeyValue{},
|
||||
}
|
||||
|
||||
emptySet = Set{
|
||||
equivalent: Distinct{
|
||||
iface: [0]KeyValue{},
|
||||
},
|
||||
hash: fnv.New(),
|
||||
data: [0]KeyValue{},
|
||||
}
|
||||
)
|
||||
|
||||
@@ -79,30 +87,28 @@ func EmptySet() *Set {
|
||||
return userDefinedEmptySet
|
||||
}
|
||||
|
||||
// reflectValue abbreviates reflect.ValueOf(d).
|
||||
func (d Distinct) reflectValue() reflect.Value {
|
||||
return reflect.ValueOf(d.iface)
|
||||
}
|
||||
|
||||
// Valid reports whether this value refers to a valid Set.
|
||||
func (d Distinct) Valid() bool {
|
||||
return d.iface != nil
|
||||
func (d Distinct) Valid() bool { return d.hash != 0 }
|
||||
|
||||
// reflectValue abbreviates reflect.ValueOf(d).
|
||||
func (l Set) reflectValue() reflect.Value {
|
||||
return reflect.ValueOf(l.data)
|
||||
}
|
||||
|
||||
// Len returns the number of attributes in this set.
|
||||
func (l *Set) Len() int {
|
||||
if l == nil || !l.equivalent.Valid() {
|
||||
if l == nil || l.hash == 0 {
|
||||
return 0
|
||||
}
|
||||
return l.equivalent.reflectValue().Len()
|
||||
return l.reflectValue().Len()
|
||||
}
|
||||
|
||||
// Get returns the KeyValue at ordered position idx in this set.
|
||||
func (l *Set) Get(idx int) (KeyValue, bool) {
|
||||
if l == nil || !l.equivalent.Valid() {
|
||||
if l == nil || l.hash == 0 {
|
||||
return KeyValue{}, false
|
||||
}
|
||||
value := l.equivalent.reflectValue()
|
||||
value := l.reflectValue()
|
||||
|
||||
if idx >= 0 && idx < value.Len() {
|
||||
// Note: The Go compiler successfully avoids an allocation for
|
||||
@@ -115,10 +121,10 @@ func (l *Set) Get(idx int) (KeyValue, bool) {
|
||||
|
||||
// Value returns the value of a specified key in this set.
|
||||
func (l *Set) Value(k Key) (Value, bool) {
|
||||
if l == nil || !l.equivalent.Valid() {
|
||||
if l == nil || l.hash == 0 {
|
||||
return Value{}, false
|
||||
}
|
||||
rValue := l.equivalent.reflectValue()
|
||||
rValue := l.reflectValue()
|
||||
vlen := rValue.Len()
|
||||
|
||||
idx := sort.Search(vlen, func(idx int) bool {
|
||||
@@ -158,20 +164,29 @@ func (l *Set) ToSlice() []KeyValue {
|
||||
return iter.ToSlice()
|
||||
}
|
||||
|
||||
// Equivalent returns a value that may be used as a map key. The Distinct type
|
||||
// guarantees that the result will equal the equivalent. Distinct value of any
|
||||
// Equivalent returns a value that may be used as a map key. Equal Distinct
|
||||
// values are very likely to be equivalent attribute Sets. Distinct value of any
|
||||
// attribute set with the same elements as this, where sets are made unique by
|
||||
// choosing the last value in the input for any given key.
|
||||
func (l *Set) Equivalent() Distinct {
|
||||
if l == nil || !l.equivalent.Valid() {
|
||||
return emptySet.equivalent
|
||||
if l == nil || l.hash == 0 {
|
||||
return Distinct{hash: emptySet.hash}
|
||||
}
|
||||
return l.equivalent
|
||||
return Distinct{hash: l.hash}
|
||||
}
|
||||
|
||||
// Equals reports whether the argument set is equivalent to this set.
|
||||
func (l *Set) Equals(o *Set) bool {
|
||||
return l.Equivalent() == o.Equivalent()
|
||||
if l.Equivalent() != o.Equivalent() {
|
||||
return false
|
||||
}
|
||||
if l == nil || l.hash == 0 {
|
||||
l = &emptySet
|
||||
}
|
||||
if o == nil || o.hash == 0 {
|
||||
o = &emptySet
|
||||
}
|
||||
return l.data == o.data
|
||||
}
|
||||
|
||||
// Encoded returns the encoded form of this set, according to encoder.
|
||||
@@ -241,10 +256,10 @@ func NewSetWithFiltered(kvs []KeyValue, filter Filter) (Set, []KeyValue) {
|
||||
|
||||
if filter != nil {
|
||||
if div := filteredToFront(kvs, filter); div != 0 {
|
||||
return Set{equivalent: computeDistinct(kvs[div:])}, kvs[:div]
|
||||
return newSet(kvs[div:]), kvs[:div]
|
||||
}
|
||||
}
|
||||
return Set{equivalent: computeDistinct(kvs)}, nil
|
||||
return newSet(kvs), nil
|
||||
}
|
||||
|
||||
// NewSetWithSortableFiltered returns a new Set.
|
||||
@@ -324,7 +339,7 @@ func (l *Set) Filter(re Filter) (Set, []KeyValue) {
|
||||
if first == 0 {
|
||||
// It is safe to assume len(slice) >= 1 given we found at least one
|
||||
// attribute above that needs to be filtered out.
|
||||
return Set{equivalent: computeDistinct(slice[1:])}, slice[:1]
|
||||
return newSet(slice[1:]), slice[:1]
|
||||
}
|
||||
|
||||
// Move the filtered slice[first] to the front (preserving order).
|
||||
@@ -334,25 +349,24 @@ func (l *Set) Filter(re Filter) (Set, []KeyValue) {
|
||||
|
||||
// Do not re-evaluate re(slice[first+1:]).
|
||||
div := filteredToFront(slice[1:first+1], re) + 1
|
||||
return Set{equivalent: computeDistinct(slice[div:])}, slice[:div]
|
||||
return newSet(slice[div:]), slice[:div]
|
||||
}
|
||||
|
||||
// computeDistinct returns a Distinct using either the fixed- or
|
||||
// reflect-oriented code path, depending on the size of the input. The input
|
||||
// slice is assumed to already be sorted and de-duplicated.
|
||||
func computeDistinct(kvs []KeyValue) Distinct {
|
||||
iface := computeDistinctFixed(kvs)
|
||||
if iface == nil {
|
||||
iface = computeDistinctReflect(kvs)
|
||||
// newSet returns a new set based on the sorted and uniqued kvs.
|
||||
func newSet(kvs []KeyValue) Set {
|
||||
s := Set{
|
||||
hash: hashKVs(kvs),
|
||||
data: computeDataFixed(kvs),
|
||||
}
|
||||
return Distinct{
|
||||
iface: iface,
|
||||
if s.data == nil {
|
||||
s.data = computeDataReflect(kvs)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// computeDistinctFixed computes a Distinct for small slices. It returns nil
|
||||
// if the input is too large for this code path.
|
||||
func computeDistinctFixed(kvs []KeyValue) any {
|
||||
// computeDataFixed computes a Set data for small slices. It returns nil if the
|
||||
// input is too large for this code path.
|
||||
func computeDataFixed(kvs []KeyValue) any {
|
||||
switch len(kvs) {
|
||||
case 1:
|
||||
return [1]KeyValue(kvs)
|
||||
@@ -379,9 +393,9 @@ func computeDistinctFixed(kvs []KeyValue) any {
|
||||
}
|
||||
}
|
||||
|
||||
// computeDistinctReflect computes a Distinct using reflection, works for any
|
||||
// size input.
|
||||
func computeDistinctReflect(kvs []KeyValue) any {
|
||||
// computeDataReflect computes a Set data using reflection, works for any size
|
||||
// input.
|
||||
func computeDataReflect(kvs []KeyValue) any {
|
||||
at := reflect.New(reflect.ArrayOf(len(kvs), keyValueType)).Elem()
|
||||
for i, keyValue := range kvs {
|
||||
*(at.Index(i).Addr().Interface().(*KeyValue)) = keyValue
|
||||
@@ -391,7 +405,7 @@ func computeDistinctReflect(kvs []KeyValue) any {
|
||||
|
||||
// MarshalJSON returns the JSON encoding of the Set.
|
||||
func (l *Set) MarshalJSON() ([]byte, error) {
|
||||
return json.Marshal(l.equivalent.iface)
|
||||
return json.Marshal(l.data)
|
||||
}
|
||||
|
||||
// MarshalLog is the marshaling function used by the logging system to represent this Set.
|
||||
|
||||
Reference in New Issue
Block a user