1
0
mirror of https://github.com/open-telemetry/opentelemetry-go.git synced 2026-06-03 18:35:08 +02:00

Replace fnv with xxhash (#7497)

**Objective**:
- Performance comparison between fnv and xxhash in terms of ops/sec,
allocations and collisions
- Implement xxhash according to first objective

**Changes**:
- fnv is replaced by xxhash. 
  Perform stats:
  - **Collision**: No collision upto 100M
  - **Allocations**: Same in both cases
- **Ops/sec**: xxhash performed better in cases with medium to large
strings
 
 **Benchmarks**:
 ```
 benchstat old.txt new.txt
goos: darwin
goarch: arm64
pkg: go.opentelemetry.io/otel/attribute
cpu: Apple M2
│ old.txt │ new.txt │
│ sec/op │ sec/op vs base │
NewSet-8 205.5n ± 1% 229.4n ± 1% +11.61% (p=0.002 n=6)
NewSetSmallStrings-8 160.5n ± 1% 169.0n ± 5% +5.26% (p=0.002 n=6)
NewSetMediumStrings-8 263.8n ± 6% 185.0n ± 1% -29.89% (p=0.002 n=6)
NewSetLargeStrings-8 426.4n ± 9% 210.2n ± 1% -50.72% (p=0.002 n=6)
NewSetVeryLargeStrings-8 1012.5n ± 7% 238.7n ± 2% -76.43% (p=0.002 n=6)
NewSetHugeStrings-8 3622.0n ± 8% 397.1n ± 1% -89.04% (p=0.002 n=6)
geomean                     488.6n        228.6n       -53.21%

│ old.txt │ new.txt │
│ B/op │ B/op vs base │
NewSet-8 448.0 ± 0% 448.0 ± 0% ~ (p=1.000 n=6) ¹
NewSetSmallStrings-8 320.0 ± 0% 320.0 ± 0% ~ (p=1.000 n=6) ¹
NewSetMediumStrings-8 320.0 ± 0% 320.0 ± 0% ~ (p=1.000 n=6) ¹
NewSetLargeStrings-8 320.0 ± 0% 320.0 ± 0% ~ (p=1.000 n=6) ¹
NewSetVeryLargeStrings-8 320.0 ± 0% 320.0 ± 0% ~ (p=1.000 n=6) ¹
NewSetHugeStrings-8 320.0 ± 0% 320.0 ± 0% ~ (p=1.000 n=6) ¹
geomean                    338.5        338.5       +0.00%
¹ all samples are equal

│ old.txt │ new.txt │
│ allocs/op │ allocs/op vs base │
NewSet-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹
NewSetSmallStrings-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹
NewSetMediumStrings-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹
NewSetLargeStrings-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹
NewSetVeryLargeStrings-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹
NewSetHugeStrings-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=6) ¹
geomean                    1.000        1.000       +0.00%
¹ all samples are equal
```

Previous implementation for reference: 
https://github.com/open-telemetry/opentelemetry-go/blame/d0483a7c89d936dcced557fb523465daeac16967/CHANGELOG.md#L16

---------

Co-authored-by: Robert Pająk <pellared@hotmail.com>
This commit is contained in:
Preeti Dewani
2025-11-19 15:36:20 +05:30
committed by GitHub
parent 98eb065c75
commit 49292857b7
55 changed files with 403 additions and 195 deletions
+8 -8
View File
@@ -7,7 +7,7 @@ import (
"fmt"
"reflect"
"go.opentelemetry.io/otel/attribute/internal/fnv"
"go.opentelemetry.io/otel/attribute/internal/xxhash"
)
// Type identifiers. These identifiers are hashed before the value of the
@@ -17,7 +17,7 @@ import (
//
// These are all 8 byte length strings converted to a uint64 representation. A
// uint64 is used instead of the string directly as an optimization, it avoids
// the for loop in [fnv] which adds minor overhead.
// the for loop in [xxhash] which adds minor overhead.
const (
boolID uint64 = 7953749933313450591 // "_boolean" (little endian)
int64ID uint64 = 7592915492740740150 // "64_bit_i" (little endian)
@@ -29,17 +29,17 @@ const (
stringSliceID uint64 = 7453010373645655387 // "[]string" (little endian)
)
// hashKVs returns a new FNV-1a hash of kvs.
func hashKVs(kvs []KeyValue) fnv.Hash {
h := fnv.New()
// hashKVs returns a new xxHash64 hash of kvs.
func hashKVs(kvs []KeyValue) uint64 {
h := xxhash.New()
for _, kv := range kvs {
h = hashKV(h, kv)
}
return h
return h.Sum64()
}
// hashKV returns the FNV-1a hash of kv with h as the base.
func hashKV(h fnv.Hash, kv KeyValue) fnv.Hash {
// hashKV returns the xxHash64 hash of kv with h as the base.
func hashKV(h xxhash.Hash, kv KeyValue) xxhash.Hash {
h = h.String(string(kv.Key))
switch kv.Value.Type() {
+2 -4
View File
@@ -11,8 +11,6 @@ import (
"slices"
"strings"
"testing"
"go.opentelemetry.io/otel/attribute/internal/fnv"
)
// keyVals is all the KeyValue generators that are used for testing. This is
@@ -42,7 +40,7 @@ var keyVals = []func(string) KeyValue{
func TestHashKVsEquality(t *testing.T) {
type testcase struct {
hash fnv.Hash
hash uint64
kvs []KeyValue
}
@@ -105,7 +103,7 @@ func TestHashKVsEquality(t *testing.T) {
type msg struct {
cmp string
i, j int
hI, hJ fnv.Hash
hI, hJ uint64
kvI, kvJ []KeyValue
}
-76
View File
@@ -1,76 +0,0 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package fnv provides an efficient and allocation free implementation of the
// FNV-1a, non-cryptographic hash functions created by Glenn Fowler, Landon
// Curt Noll, and Phong Vo. See
// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function.
//
// This implementation is provided as an alternative to "hash/fnv". The
// built-in implementation requires two allocations per Write for a string (one
// for the hash pointer and the other to convert a string to a []byte). This
// implementation is more efficientient and does not require any allocations.
package fnv // import "go.opentelemetry.io/otel/attribute/internal/fnv"
import (
"math"
)
// Taken from "hash/fnv". Verified at:
//
// - https://datatracker.ietf.org/doc/html/draft-eastlake-fnv-17.html
// - http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-param
const (
offset64 = 14695981039346656037
prime64 = 1099511628211
)
// Hash is an FNV-1a hash with appropriate hashing functions for methods.
type Hash uint64
// New returns a new initialized 64-bit FNV-1a Hash. Its value is laid out in
// big-endian byte order.
func New() Hash {
return offset64
}
func (h Hash) Uint64(val uint64) Hash {
v := uint64(h)
v = (v ^ ((val >> 56) & 0xFF)) * prime64
v = (v ^ ((val >> 48) & 0xFF)) * prime64
v = (v ^ ((val >> 40) & 0xFF)) * prime64
v = (v ^ ((val >> 32) & 0xFF)) * prime64
v = (v ^ ((val >> 24) & 0xFF)) * prime64
v = (v ^ ((val >> 16) & 0xFF)) * prime64
v = (v ^ ((val >> 8) & 0xFF)) * prime64
v = (v ^ ((val >> 0) & 0xFF)) * prime64
return Hash(v)
}
func (h Hash) Bool(val bool) Hash { // nolint:revive // val is not a flag.
if val {
return h.Uint64(1)
}
return h.Uint64(0)
}
func (h Hash) Float64(val float64) Hash {
return h.Uint64(math.Float64bits(val))
}
func (h Hash) Int64(val int64) Hash {
return h.Uint64(uint64(val)) // nolint:gosec // overflow doesn't matter since we are hashing.
}
func (h Hash) String(val string) Hash {
v := uint64(h)
for i := 0; i < len(val); i++ {
v ^= uint64(val[i])
v *= prime64
}
return Hash(v)
}
-98
View File
@@ -1,98 +0,0 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package fnv
import (
"encoding/binary"
"hash/fnv"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestStringHashCorrectness(t *testing.T) {
input := []string{"", "a", "ab", "abc", "世界"}
refH := fnv.New64a()
for _, in := range input {
h := New()
got := h.String(in)
refH.Reset()
n, err := refH.Write([]byte(in))
require.NoError(t, err)
require.Equalf(t, len(in), n, "wrote only %d out of %d bytes", n, len(in))
want := refH.Sum64()
assert.Equal(t, want, uint64(got), in)
}
}
func TestUint64HashCorrectness(t *testing.T) {
input := []uint64{0, 10, 312984238623, 1024}
buf := make([]byte, 8)
refH := fnv.New64a()
for _, in := range input {
h := New()
got := h.Uint64(in)
refH.Reset()
binary.BigEndian.PutUint64(buf, in)
n, err := refH.Write(buf)
require.NoError(t, err)
require.Equalf(t, 8, n, "wrote only %d out of 8 bytes", n)
want := refH.Sum64()
assert.Equal(t, want, uint64(got), in)
}
}
func TestIntegrity(t *testing.T) {
data := []byte{'1', '2', 3, 4, 5, 6, 7, 8, 9, 10}
h0 := New()
want := h0.String(string(data))
h1 := New()
got := h1.String(string(data[:2]))
num := binary.BigEndian.Uint64(data[2:])
got = got.Uint64(num)
assert.Equal(t, want, got)
}
var result Hash
func BenchmarkStringKB(b *testing.B) {
b.SetBytes(1024)
data := make([]byte, 1024)
for i := range data {
data[i] = byte(i)
}
s := string(data)
h := New()
b.ReportAllocs()
b.ResetTimer()
for range b.N {
result = h.String(s)
}
}
func BenchmarkUint64KB(b *testing.B) {
b.SetBytes(8)
i := uint64(192386739218721)
h := New()
b.ReportAllocs()
b.ResetTimer()
for range b.N {
result = h.Uint64(i)
}
}
+64
View File
@@ -0,0 +1,64 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
// Package xxhash provides a wrapper around the xxhash library for attribute hashing.
package xxhash // import "go.opentelemetry.io/otel/attribute/internal/xxhash"
import (
"encoding/binary"
"math"
"github.com/cespare/xxhash/v2"
)
// Hash wraps xxhash.Digest to provide an API friendly for hashing attribute values.
type Hash struct {
d *xxhash.Digest
}
// New returns a new initialized xxHash64 hasher.
func New() Hash {
return Hash{d: xxhash.New()}
}
func (h Hash) Uint64(val uint64) Hash {
var buf [8]byte
binary.LittleEndian.PutUint64(buf[:], val)
// errors from Write are always nil for xxhash
// if it returns an err then panic
_, err := h.d.Write(buf[:])
if err != nil {
panic("xxhash write of uint64 failed: " + err.Error())
}
return h
}
func (h Hash) Bool(val bool) Hash { // nolint:revive // This is a hashing function.
if val {
return h.Uint64(1)
}
return h.Uint64(0)
}
func (h Hash) Float64(val float64) Hash {
return h.Uint64(math.Float64bits(val))
}
func (h Hash) Int64(val int64) Hash {
return h.Uint64(uint64(val)) // nolint:gosec // Overflow doesn't matter since we are hashing.
}
func (h Hash) String(val string) Hash {
// errors from WriteString are always nil for xxhash
// if it returns an err then panic
_, err := h.d.WriteString(val)
if err != nil {
panic("xxhash write of string failed: " + err.Error())
}
return h
}
// Sum64 returns the current hash value.
func (h Hash) Sum64() uint64 {
return h.d.Sum64()
}
+197
View File
@@ -0,0 +1,197 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
package xxhash
import (
"encoding/binary"
"testing"
"github.com/stretchr/testify/assert"
)
func TestIntegrity(t *testing.T) {
data := []byte{'1', '2', 3, 4, 5, 6, 7, 8, 9, 10}
h0 := New()
want := h0.String(string(data))
h1 := New()
got := h1.String(string(data[:2]))
num := binary.LittleEndian.Uint64(data[2:])
got = got.Uint64(num)
assert.Equal(t, want.Sum64(), got.Sum64())
}
func TestNew(t *testing.T) {
h1 := New()
h2 := New()
// Test that the underlying digest is properly initialized.
if h1.Sum64() != h2.Sum64() {
t.Errorf("New() should return consistent initial value: %d != %d", h1.Sum64(), h2.Sum64())
}
}
func TestUint64(t *testing.T) {
h1 := New().Uint64(42)
h2 := New().Uint64(42)
if h1.Sum64() != h2.Sum64() {
t.Errorf("Uint64() should be deterministic: %d != %d", h1.Sum64(), h2.Sum64())
}
h3 := New().Uint64(43)
if h1.Sum64() == h3.Sum64() {
t.Errorf("Different inputs should produce different hashes: %d == %d", h1.Sum64(), h3.Sum64())
}
}
func TestBool(t *testing.T) {
h1 := New().Bool(true)
h2 := New().Bool(true)
if h1.Sum64() != h2.Sum64() {
t.Errorf("Bool() should be deterministic: %d != %d", h1.Sum64(), h2.Sum64())
}
h3 := New().Bool(false)
if h1.Sum64() == h3.Sum64() {
t.Errorf("Different bool values should produce different hashes: %d == %d", h1.Sum64(), h3.Sum64())
}
}
func TestFloat64(t *testing.T) {
h1 := New().Float64(3.14)
h2 := New().Float64(3.14)
if h1.Sum64() != h2.Sum64() {
t.Errorf("Float64() should be deterministic: %d != %d", h1.Sum64(), h2.Sum64())
}
h3 := New().Float64(2.71)
if h1.Sum64() == h3.Sum64() {
t.Errorf("Different float values should produce different hashes: %d == %d", h1.Sum64(), h3.Sum64())
}
}
func TestInt64(t *testing.T) {
h1 := New().Int64(42)
h2 := New().Int64(42)
if h1.Sum64() != h2.Sum64() {
t.Errorf("Int64() should be deterministic: %d != %d", h1.Sum64(), h2.Sum64())
}
h3 := New().Int64(43)
if h1.Sum64() == h3.Sum64() {
t.Errorf("Different int64 values should produce different hashes: %d == %d", h1.Sum64(), h3.Sum64())
}
}
func TestString(t *testing.T) {
h1 := New().String("hello")
h2 := New().String("hello")
if h1.Sum64() != h2.Sum64() {
t.Errorf("String() should be deterministic: %d != %d", h1.Sum64(), h2.Sum64())
}
h3 := New().String("world")
if h1.Sum64() == h3.Sum64() {
t.Errorf("Different strings should produce different hashes: %d == %d", h1.Sum64(), h3.Sum64())
}
}
func TestChaining(t *testing.T) {
// Test that methods can be chained and produce different results
h1 := New().String("key").Uint64(42).Bool(true)
h2 := New().String("key").Uint64(42).Bool(true)
h3 := New().String("key").Uint64(43).Bool(true)
if h1.Sum64() != h2.Sum64() {
t.Errorf("Chained operations should be deterministic: %d != %d", h1.Sum64(), h2.Sum64())
}
if h1.Sum64() == h3.Sum64() {
t.Errorf("Different chained operations should produce different hashes: %d == %d", h1.Sum64(), h3.Sum64())
}
}
func BenchmarkStringKB(b *testing.B) {
b.SetBytes(1024)
data := make([]byte, 1024)
for i := range data {
data[i] = byte(i)
}
s := string(data)
h := New()
b.ReportAllocs()
b.ResetTimer()
for b.Loop() {
h.String(s)
}
}
func BenchmarkUint64KB(b *testing.B) {
b.SetBytes(8)
i := uint64(192386739218721)
h := New()
b.ReportAllocs()
b.ResetTimer()
for b.Loop() {
h.Uint64(i)
}
}
func BenchmarkUint64(b *testing.B) {
h := New()
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
h = h.Uint64(uint64(i))
}
}
func BenchmarkString(b *testing.B) {
h := New()
str := "benchmark_string_value"
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
h = h.String(str)
}
}
func BenchmarkBool(b *testing.B) {
h := New()
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
h = h.Bool(i%2 == 0)
}
}
func BenchmarkFloat64(b *testing.B) {
h := New()
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
h = h.Float64(float64(i) * 3.14159)
}
}
func BenchmarkInt64(b *testing.B) {
h := New()
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
h = h.Int64(int64(i))
}
}
func BenchmarkSum64(b *testing.B) {
h := New().String("key").Uint64(42).Bool(true)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = h.Sum64()
}
}
+8 -5
View File
@@ -10,7 +10,7 @@ import (
"slices"
"sort"
"go.opentelemetry.io/otel/attribute/internal/fnv"
"go.opentelemetry.io/otel/attribute/internal/xxhash"
)
type (
@@ -28,7 +28,7 @@ type (
// instead of a Set directly. Set has relatively poor performance when used
// as a map key compared to Distinct.
Set struct {
hash fnv.Hash
hash uint64
data any
}
@@ -37,7 +37,7 @@ type (
// Distinct should be used as a map key instead of a Set for to provide better
// performance for map operations.
Distinct struct {
hash fnv.Hash
hash uint64
}
// Sortable implements sort.Interface, used for sorting KeyValue.
@@ -60,18 +60,21 @@ var (
// keyValueType is used in computeDistinctReflect.
keyValueType = reflect.TypeOf(KeyValue{})
// emptyHash is the hash of an empty set.
emptyHash = xxhash.New().Sum64()
// userDefinedEmptySet is an empty set. It was mistakenly exposed to users
// as something they can assign to, so it must remain addressable and
// mutable.
//
// This is kept for backwards compatibility, but should not be used in new code.
userDefinedEmptySet = &Set{
hash: fnv.New(),
hash: emptyHash,
data: [0]KeyValue{},
}
emptySet = Set{
hash: fnv.New(),
hash: emptyHash,
data: [0]KeyValue{},
}
)
+54 -4
View File
@@ -528,8 +528,6 @@ func BenchmarkFiltering(b *testing.B) {
b.Run("AllDropped", benchFn(func(attribute.KeyValue) bool { return false }))
}
var sinkSet attribute.Set
func BenchmarkNewSet(b *testing.B) {
attrs := []attribute.KeyValue{
attribute.String("B1", "2"),
@@ -542,7 +540,59 @@ func BenchmarkNewSet(b *testing.B) {
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
sinkSet = attribute.NewSet(attrs...)
for b.Loop() {
attribute.NewSet(attrs...)
}
}
// generateStringAttrsWithSize creates 5 string attributes with specified key and value lengths.
func generateStringAttrsWithSize(keyLen, valueLen int) []attribute.KeyValue {
// Generate base strings of specified lengths
keyBase := ""
valueBase := ""
// Build key base string
for i := 0; i < keyLen; i++ {
keyBase += string(rune('a' + i%26))
}
// Build value base string
for i := 0; i < valueLen; i++ {
valueBase += string(rune('0' + i%10))
}
// Create 5 attributes with different suffixes to ensure uniqueness
attrs := []attribute.KeyValue{
attribute.String(keyBase+"1", valueBase+"x"),
attribute.String(keyBase+"2", valueBase+"y"),
attribute.String(keyBase+"3", valueBase+"z"),
attribute.String(keyBase+"4", valueBase+"w"),
attribute.String(keyBase+"5", valueBase+"v"),
}
return attrs
}
func BenchmarkNewSetStringAttrs(b *testing.B) {
testCases := []struct {
name string
keyLen int
valueLen int
}{
{"SmallStrings", 2, 1}, // B1="2"
{"MediumStrings", 10, 10}, // realistic service names, etc.
{"LargeStrings", 25, 25}, // longer service names, URLs, etc.
{"VeryLargeStrings", 50, 100}, // very long values like URLs, descriptions
{"HugeStrings", 100, 500}, // extremely large like full URLs, JSON, etc.
}
for _, tc := range testCases {
b.Run(tc.name, func(b *testing.B) {
attrs := generateStringAttrsWithSize(tc.keyLen, tc.valueLen)
b.ReportAllocs()
b.ResetTimer()
for b.Loop() {
attribute.NewSet(attrs...)
}
})
}
}