1
0
mirror of https://github.com/open-telemetry/opentelemetry-go.git synced 2026-06-03 18:35:08 +02:00
Files
opentelemetry-go/attribute/hash.go
T
Preeti Dewani 49292857b7 Replace fnv with xxhash (#7497)
**Objective**:
- Performance comparison between fnv and xxhash in terms of ops/sec,
allocations and collisions
- Implement xxhash according to first objective

**Changes**:
- fnv is replaced by xxhash. 
  Perform stats:
  - **Collision**: No collision upto 100M
  - **Allocations**: Same in both cases
- **Ops/sec**: xxhash performed better in cases with medium to large
strings
 
 **Benchmarks**:
 ```
 benchstat old.txt new.txt
goos: darwin
goarch: arm64
pkg: go.opentelemetry.io/otel/attribute
cpu: Apple M2
│ old.txt │ new.txt │
│ sec/op │ sec/op vs base │
NewSet-8 205.5n ± 1% 229.4n ± 1% +11.61% (p=0.002 n=6)
NewSetSmallStrings-8 160.5n ± 1% 169.0n ± 5% +5.26% (p=0.002 n=6)
NewSetMediumStrings-8 263.8n ± 6% 185.0n ± 1% -29.89% (p=0.002 n=6)
NewSetLargeStrings-8 426.4n ± 9% 210.2n ± 1% -50.72% (p=0.002 n=6)
NewSetVeryLargeStrings-8 1012.5n ± 7% 238.7n ± 2% -76.43% (p=0.002 n=6)
NewSetHugeStrings-8 3622.0n ± 8% 397.1n ± 1% -89.04% (p=0.002 n=6)
geomean                     488.6n        228.6n       -53.21%

│ old.txt │ new.txt │
│ B/op │ B/op vs base │
NewSet-8 448.0 ± 0% 448.0 ± 0% ~ (p=1.000 n=6) ¹
NewSetSmallStrings-8 320.0 ± 0% 320.0 ± 0% ~ (p=1.000 n=6) ¹
NewSetMediumStrings-8 320.0 ± 0% 320.0 ± 0% ~ (p=1.000 n=6) ¹
NewSetLargeStrings-8 320.0 ± 0% 320.0 ± 0% ~ (p=1.000 n=6) ¹
NewSetVeryLargeStrings-8 320.0 ± 0% 320.0 ± 0% ~ (p=1.000 n=6) ¹
NewSetHugeStrings-8 320.0 ± 0% 320.0 ± 0% ~ (p=1.000 n=6) ¹
geomean                    338.5        338.5       +0.00%
¹ all samples are equal

│ old.txt │ new.txt │
│ allocs/op │ allocs/op vs base │
NewSet-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹
NewSetSmallStrings-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹
NewSetMediumStrings-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹
NewSetLargeStrings-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹
NewSetVeryLargeStrings-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹
NewSetHugeStrings-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹
geomean                    1.000        1.000       +0.00%
¹ all samples are equal
```

Previous implementation for reference: 
https://github.com/open-telemetry/opentelemetry-go/blame/d0483a7c89d936dcced557fb523465daeac16967/CHANGELOG.md#L16

---------

Co-authored-by: Robert Pająk <pellared@hotmail.com>
2025-11-19 11:06:20 +01:00

93 lines
2.8 KiB
Go

// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
package attribute // import "go.opentelemetry.io/otel/attribute"
import (
"fmt"
"reflect"
"go.opentelemetry.io/otel/attribute/internal/xxhash"
)
// Type identifiers. These identifiers are hashed before the value of the
// corresponding type. This is done to distinguish values that are hashed with
// the same value representation (e.g. `int64(1)` and `true`, []int64{0} and
// int64(0)).
//
// These are all 8 byte length strings converted to a uint64 representation. A
// uint64 is used instead of the string directly as an optimization, it avoids
// the for loop in [xxhash] which adds minor overhead.
const (
boolID uint64 = 7953749933313450591 // "_boolean" (little endian)
int64ID uint64 = 7592915492740740150 // "64_bit_i" (little endian)
float64ID uint64 = 7376742710626956342 // "64_bit_f" (little endian)
stringID uint64 = 6874584755375207263 // "_string_" (little endian)
boolSliceID uint64 = 6875993255270243167 // "_[]bool_" (little endian)
int64SliceID uint64 = 3762322556277578591 // "_[]int64" (little endian)
float64SliceID uint64 = 7308324551835016539 // "[]double" (little endian)
stringSliceID uint64 = 7453010373645655387 // "[]string" (little endian)
)
// hashKVs returns a new xxHash64 hash of kvs.
func hashKVs(kvs []KeyValue) uint64 {
h := xxhash.New()
for _, kv := range kvs {
h = hashKV(h, kv)
}
return h.Sum64()
}
// hashKV returns the xxHash64 hash of kv with h as the base.
func hashKV(h xxhash.Hash, kv KeyValue) xxhash.Hash {
h = h.String(string(kv.Key))
switch kv.Value.Type() {
case BOOL:
h = h.Uint64(boolID)
h = h.Uint64(kv.Value.numeric)
case INT64:
h = h.Uint64(int64ID)
h = h.Uint64(kv.Value.numeric)
case FLOAT64:
h = h.Uint64(float64ID)
// Assumes numeric stored with math.Float64bits.
h = h.Uint64(kv.Value.numeric)
case STRING:
h = h.Uint64(stringID)
h = h.String(kv.Value.stringly)
case BOOLSLICE:
h = h.Uint64(boolSliceID)
rv := reflect.ValueOf(kv.Value.slice)
for i := 0; i < rv.Len(); i++ {
h = h.Bool(rv.Index(i).Bool())
}
case INT64SLICE:
h = h.Uint64(int64SliceID)
rv := reflect.ValueOf(kv.Value.slice)
for i := 0; i < rv.Len(); i++ {
h = h.Int64(rv.Index(i).Int())
}
case FLOAT64SLICE:
h = h.Uint64(float64SliceID)
rv := reflect.ValueOf(kv.Value.slice)
for i := 0; i < rv.Len(); i++ {
h = h.Float64(rv.Index(i).Float())
}
case STRINGSLICE:
h = h.Uint64(stringSliceID)
rv := reflect.ValueOf(kv.Value.slice)
for i := 0; i < rv.Len(); i++ {
h = h.String(rv.Index(i).String())
}
case INVALID:
default:
// Logging is an alternative, but using the internal logger here
// causes an import cycle so it is not done.
v := kv.Value.AsInterface()
msg := fmt.Sprintf("unknown value type: %[1]v (%[1]T)", v)
panic(msg)
}
return h
}