You've already forked opentelemetry-go
mirror of
https://github.com/open-telemetry/opentelemetry-go.git
synced 2026-06-03 18:35:08 +02:00
49292857b7
**Objective**: - Performance comparison between fnv and xxhash in terms of ops/sec, allocations and collisions - Implement xxhash according to first objective **Changes**: - fnv is replaced by xxhash. Perform stats: - **Collision**: No collision upto 100M - **Allocations**: Same in both cases - **Ops/sec**: xxhash performed better in cases with medium to large strings **Benchmarks**: ``` benchstat old.txt new.txt goos: darwin goarch: arm64 pkg: go.opentelemetry.io/otel/attribute cpu: Apple M2 │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ NewSet-8 205.5n ± 1% 229.4n ± 1% +11.61% (p=0.002 n=6) NewSetSmallStrings-8 160.5n ± 1% 169.0n ± 5% +5.26% (p=0.002 n=6) NewSetMediumStrings-8 263.8n ± 6% 185.0n ± 1% -29.89% (p=0.002 n=6) NewSetLargeStrings-8 426.4n ± 9% 210.2n ± 1% -50.72% (p=0.002 n=6) NewSetVeryLargeStrings-8 1012.5n ± 7% 238.7n ± 2% -76.43% (p=0.002 n=6) NewSetHugeStrings-8 3622.0n ± 8% 397.1n ± 1% -89.04% (p=0.002 n=6) geomean 488.6n 228.6n -53.21% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ NewSet-8 448.0 ± 0% 448.0 ± 0% ~ (p=1.000 n=6) ¹ NewSetSmallStrings-8 320.0 ± 0% 320.0 ± 0% ~ (p=1.000 n=6) ¹ NewSetMediumStrings-8 320.0 ± 0% 320.0 ± 0% ~ (p=1.000 n=6) ¹ NewSetLargeStrings-8 320.0 ± 0% 320.0 ± 0% ~ (p=1.000 n=6) ¹ NewSetVeryLargeStrings-8 320.0 ± 0% 320.0 ± 0% ~ (p=1.000 n=6) ¹ NewSetHugeStrings-8 320.0 ± 0% 320.0 ± 0% ~ (p=1.000 n=6) ¹ geomean 338.5 338.5 +0.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ NewSet-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹ NewSetSmallStrings-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹ NewSetMediumStrings-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹ NewSetLargeStrings-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹ NewSetVeryLargeStrings-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹ NewSetHugeStrings-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` Previous implementation for reference: https://github.com/open-telemetry/opentelemetry-go/blame/d0483a7c89d936dcced557fb523465daeac16967/CHANGELOG.md#L16 --------- Co-authored-by: Robert Pająk <pellared@hotmail.com>
93 lines
2.8 KiB
Go
93 lines
2.8 KiB
Go
// Copyright The OpenTelemetry Authors
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
|
|
package attribute // import "go.opentelemetry.io/otel/attribute"
|
|
|
|
import (
|
|
"fmt"
|
|
"reflect"
|
|
|
|
"go.opentelemetry.io/otel/attribute/internal/xxhash"
|
|
)
|
|
|
|
// Type identifiers. These identifiers are hashed before the value of the
|
|
// corresponding type. This is done to distinguish values that are hashed with
|
|
// the same value representation (e.g. `int64(1)` and `true`, []int64{0} and
|
|
// int64(0)).
|
|
//
|
|
// These are all 8 byte length strings converted to a uint64 representation. A
|
|
// uint64 is used instead of the string directly as an optimization, it avoids
|
|
// the for loop in [xxhash] which adds minor overhead.
|
|
const (
|
|
boolID uint64 = 7953749933313450591 // "_boolean" (little endian)
|
|
int64ID uint64 = 7592915492740740150 // "64_bit_i" (little endian)
|
|
float64ID uint64 = 7376742710626956342 // "64_bit_f" (little endian)
|
|
stringID uint64 = 6874584755375207263 // "_string_" (little endian)
|
|
boolSliceID uint64 = 6875993255270243167 // "_[]bool_" (little endian)
|
|
int64SliceID uint64 = 3762322556277578591 // "_[]int64" (little endian)
|
|
float64SliceID uint64 = 7308324551835016539 // "[]double" (little endian)
|
|
stringSliceID uint64 = 7453010373645655387 // "[]string" (little endian)
|
|
)
|
|
|
|
// hashKVs returns a new xxHash64 hash of kvs.
|
|
func hashKVs(kvs []KeyValue) uint64 {
|
|
h := xxhash.New()
|
|
for _, kv := range kvs {
|
|
h = hashKV(h, kv)
|
|
}
|
|
return h.Sum64()
|
|
}
|
|
|
|
// hashKV returns the xxHash64 hash of kv with h as the base.
|
|
func hashKV(h xxhash.Hash, kv KeyValue) xxhash.Hash {
|
|
h = h.String(string(kv.Key))
|
|
|
|
switch kv.Value.Type() {
|
|
case BOOL:
|
|
h = h.Uint64(boolID)
|
|
h = h.Uint64(kv.Value.numeric)
|
|
case INT64:
|
|
h = h.Uint64(int64ID)
|
|
h = h.Uint64(kv.Value.numeric)
|
|
case FLOAT64:
|
|
h = h.Uint64(float64ID)
|
|
// Assumes numeric stored with math.Float64bits.
|
|
h = h.Uint64(kv.Value.numeric)
|
|
case STRING:
|
|
h = h.Uint64(stringID)
|
|
h = h.String(kv.Value.stringly)
|
|
case BOOLSLICE:
|
|
h = h.Uint64(boolSliceID)
|
|
rv := reflect.ValueOf(kv.Value.slice)
|
|
for i := 0; i < rv.Len(); i++ {
|
|
h = h.Bool(rv.Index(i).Bool())
|
|
}
|
|
case INT64SLICE:
|
|
h = h.Uint64(int64SliceID)
|
|
rv := reflect.ValueOf(kv.Value.slice)
|
|
for i := 0; i < rv.Len(); i++ {
|
|
h = h.Int64(rv.Index(i).Int())
|
|
}
|
|
case FLOAT64SLICE:
|
|
h = h.Uint64(float64SliceID)
|
|
rv := reflect.ValueOf(kv.Value.slice)
|
|
for i := 0; i < rv.Len(); i++ {
|
|
h = h.Float64(rv.Index(i).Float())
|
|
}
|
|
case STRINGSLICE:
|
|
h = h.Uint64(stringSliceID)
|
|
rv := reflect.ValueOf(kv.Value.slice)
|
|
for i := 0; i < rv.Len(); i++ {
|
|
h = h.String(rv.Index(i).String())
|
|
}
|
|
case INVALID:
|
|
default:
|
|
// Logging is an alternative, but using the internal logger here
|
|
// causes an import cycle so it is not done.
|
|
v := kv.Value.AsInterface()
|
|
msg := fmt.Sprintf("unknown value type: %[1]v (%[1]T)", v)
|
|
panic(msg)
|
|
}
|
|
return h
|
|
}
|