Use Set hash in Distinct (2nd attempt) (#7175)

Re-opening https://github.com/open-telemetry/opentelemetry-go/pull/5028 with new benchmarks. For cases with 10 attributes, this reduces the overhead of metric measurements by ~80-90% (depending on lock contention). It introduces a small probability of collision for attribute sets in the metrics SDK. For an instrument with 1 million different attribute sets, the probability of a collision is approximately 2 * 10^-8. For a more "normal" cardinality of 1000 on an instrument, it is approximately 2 * 10^-17. ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/attribute cpu: Intel(R) Xeon(R) CPU @ 2.20GHz │ main.txt │ hash.txt │ │ sec/op │ sec/op vs base │ EquivalentMapAccess/Empty-24 32.01n ± 2% 10.12n ± 4% -68.37% (p=0.002 n=6) EquivalentMapAccess/1_string_attribute-24 106.25n ± 2% 10.01n ± 5% -90.58% (p=0.002 n=6) EquivalentMapAccess/10_string_attributes-24 826.250n ± 1% 9.982n ± 11% -98.79% (p=0.002 n=6) EquivalentMapAccess/1_int_attribute-24 106.65n ± 2% 10.13n ± 3% -90.50% (p=0.002 n=6) EquivalentMapAccess/10_int_attributes-24 833.25n ± 2% 10.04n ± 5% -98.80% (p=0.002 n=6) geomean 190.3n 10.06n -94.72% ``` Parallel benchmarks: ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/metric cpu: Intel(R) Xeon(R) CPU @ 2.20GHz │ main24.txt │ new24.txt │ │ sec/op │ sec/op vs base │ SyncMeasure/NoView/Int64Counter/Attributes/0-24 288.4n ± 13% 267.0n ± 16% ~ (p=0.180 n=6) SyncMeasure/NoView/Int64Counter/Attributes/1-24 372.7n ± 24% 303.3n ± 6% -18.61% (p=0.002 n=6) SyncMeasure/NoView/Int64Counter/Attributes/10-24 1862.5n ± 11% 302.2n ± 6% -83.77% (p=0.002 n=6) SyncMeasure/NoView/Float64Counter/Attributes/0-24 288.2n ± 5% 291.8n ± 14% ~ (p=0.589 n=6) SyncMeasure/NoView/Float64Counter/Attributes/1-24 374.8n ± 22% 326.2n ± 15% -12.98% (p=0.002 n=6) SyncMeasure/NoView/Float64Counter/Attributes/10-24 1984.0n ± 10% 277.9n ± 15% -85.99% (p=0.002 n=6) SyncMeasure/NoView/Int64UpDownCounter/Attributes/0-24 286.8n ± 13% 279.4n ± 14% ~ (p=0.818 n=6) SyncMeasure/NoView/Int64UpDownCounter/Attributes/1-24 415.4n ± 14% 309.5n ± 11% -25.47% (p=0.002 n=6) SyncMeasure/NoView/Int64UpDownCounter/Attributes/10-24 1923.0n ± 19% 294.1n ± 17% -84.71% (p=0.002 n=6) SyncMeasure/NoView/Float64UpDownCounter/Attributes/0-24 284.9n ± 5% 271.6n ± 11% ~ (p=0.240 n=6) SyncMeasure/NoView/Float64UpDownCounter/Attributes/1-24 382.9n ± 23% 295.7n ± 13% -22.78% (p=0.002 n=6) SyncMeasure/NoView/Float64UpDownCounter/Attributes/10-24 1787.0n ± 28% 289.2n ± 12% -83.81% (p=0.002 n=6) SyncMeasure/NoView/Int64Histogram/Attributes/0-24 283.4n ± 8% 269.9n ± 9% ~ (p=0.589 n=6) SyncMeasure/NoView/Int64Histogram/Attributes/1-24 300.7n ± 8% 270.1n ± 15% -10.16% (p=0.026 n=6) SyncMeasure/NoView/Int64Histogram/Attributes/10-24 1046.8n ± 24% 299.2n ± 16% -71.42% (p=0.002 n=6) SyncMeasure/NoView/Float64Histogram/Attributes/0-24 264.3n ± 12% 295.9n ± 5% +11.93% (p=0.026 n=6) SyncMeasure/NoView/Float64Histogram/Attributes/1-24 321.0n ± 8% 269.4n ± 11% -16.09% (p=0.002 n=6) SyncMeasure/NoView/Float64Histogram/Attributes/10-24 1052.2n ± 10% 274.6n ± 5% -73.90% (p=0.002 n=6) geomean 540.0n 287.7n -46.72% ``` Single-threaded benchmarks: ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/metric cpu: Intel(R) Xeon(R) CPU @ 2.20GHz │ main1.txt │ new1.txt │ │ sec/op │ sec/op vs base │ SyncMeasure/NoView/Int64Counter/Attributes/0 130.95n ± 1% 97.99n ± 21% -25.17% (p=0.002 n=6) SyncMeasure/NoView/Int64Counter/Attributes/1 300.8n ± 7% 104.6n ± 3% -65.21% (p=0.002 n=6) SyncMeasure/NoView/Int64Counter/Attributes/10 1646.0n ± 2% 105.8n ± 2% -93.58% (p=0.002 n=6) SyncMeasure/NoView/Float64Counter/Attributes/0 132.65n ± 1% 99.28n ± 4% -25.16% (p=0.002 n=6) SyncMeasure/NoView/Float64Counter/Attributes/1 295.4n ± 3% 107.7n ± 3% -63.54% (p=0.002 n=6) SyncMeasure/NoView/Float64Counter/Attributes/10 1620.0n ± 1% 109.6n ± 4% -93.23% (p=0.002 n=6) SyncMeasure/NoView/Int64UpDownCounter/Attributes/0 132.85n ± 80% 99.34n ± 1% -25.22% (p=0.002 n=6) SyncMeasure/NoView/Int64UpDownCounter/Attributes/1 300.4n ± 1% 106.0n ± 1% -64.71% (p=0.002 n=6) SyncMeasure/NoView/Int64UpDownCounter/Attributes/10 1622.0n ± 1% 105.8n ± 1% -93.48% (p=0.002 n=6) SyncMeasure/NoView/Float64UpDownCounter/Attributes/0 134.90n ± 51% 99.16n ± 4% -26.49% (p=0.002 n=6) SyncMeasure/NoView/Float64UpDownCounter/Attributes/1 312.4n ± 34% 107.8n ± 2% -65.51% (p=0.002 n=6) SyncMeasure/NoView/Float64UpDownCounter/Attributes/10 1613.0n ± 23% 106.1n ± 1% -93.43% (p=0.002 n=6) SyncMeasure/NoView/Int64Histogram/Attributes/0 103.50n ± 17% 88.53n ± 1% -14.46% (p=0.002 n=6) SyncMeasure/NoView/Int64Histogram/Attributes/1 199.50n ± 16% 95.44n ± 2% -52.16% (p=0.002 n=6) SyncMeasure/NoView/Int64Histogram/Attributes/10 878.70n ± 2% 95.78n ± 2% -89.10% (p=0.002 n=6) SyncMeasure/NoView/Float64Histogram/Attributes/0 108.55n ± 54% 88.45n ± 1% -18.51% (p=0.002 n=6) SyncMeasure/NoView/Float64Histogram/Attributes/1 257.30n ± 14% 95.05n ± 2% -63.06% (p=0.002 n=6) SyncMeasure/NoView/Float64Histogram/Attributes/10 882.70n ± 18% 96.28n ± 1% -89.09% (p=0.002 n=6) geomean 355.2n 100.3n -71.77% ``` --------- Co-authored-by: Tyler Yahn <MrAlias@users.noreply.github.com> Co-authored-by: Robert Pająk <pellared@hotmail.com>
2026-06-03 18:35:08 +02:00 · 2025-09-16 16:04:50 -04:00
parent 666f95c114
commit 9d52bde6d6
7 changed files with 654 additions and 54 deletions
@@ -0,0 +1,92 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package attribute // import "go.opentelemetry.io/otel/attribute"
+
+import (
+	"fmt"
+	"reflect"
+
+	"go.opentelemetry.io/otel/attribute/internal/fnv"
+)
+
+// Type identifiers. These identifiers are hashed before the value of the
+// corresponding type. This is done to distinguish values that are hashed with
+// the same value representation (e.g. `int64(1)` and `true`, []int64{0} and
+// int64(0)).
+//
+// These are all 8 byte length strings converted to a uint64 representation. A
+// uint64 is used instead of the string directly as an optimization, it avoids
+// the for loop in [fnv] which adds minor overhead.
+const (
+	boolID         uint64 = 7953749933313450591 // "_boolean" (little endian)
+	int64ID        uint64 = 7592915492740740150 // "64_bit_i" (little endian)
+	float64ID      uint64 = 7376742710626956342 // "64_bit_f" (little endian)
+	stringID       uint64 = 6874584755375207263 // "_string_" (little endian)
+	boolSliceID    uint64 = 6875993255270243167 // "_[]bool_" (little endian)
+	int64SliceID   uint64 = 3762322556277578591 // "_[]int64" (little endian)
+	float64SliceID uint64 = 7308324551835016539 // "[]double" (little endian)
+	stringSliceID  uint64 = 7453010373645655387 // "[]string" (little endian)
+)
+
+// hashKVs returns a new FNV-1a hash of kvs.
+func hashKVs(kvs []KeyValue) fnv.Hash {
+	h := fnv.New()
+	for _, kv := range kvs {
+		h = hashKV(h, kv)
+	}
+	return h
+}
+
+// hashKV returns the FNV-1a hash of kv with h as the base.
+func hashKV(h fnv.Hash, kv KeyValue) fnv.Hash {
+	h = h.String(string(kv.Key))
+
+	switch kv.Value.Type() {
+	case BOOL:
+		h = h.Uint64(boolID)
+		h = h.Uint64(kv.Value.numeric)
+	case INT64:
+		h = h.Uint64(int64ID)
+		h = h.Uint64(kv.Value.numeric)
+	case FLOAT64:
+		h = h.Uint64(float64ID)
+		// Assumes numeric stored with math.Float64bits.
+		h = h.Uint64(kv.Value.numeric)
+	case STRING:
+		h = h.Uint64(stringID)
+		h = h.String(kv.Value.stringly)
+	case BOOLSLICE:
+		h = h.Uint64(boolSliceID)
+		rv := reflect.ValueOf(kv.Value.slice)
+		for i := 0; i < rv.Len(); i++ {
+			h = h.Bool(rv.Index(i).Bool())
+		}
+	case INT64SLICE:
+		h = h.Uint64(int64SliceID)
+		rv := reflect.ValueOf(kv.Value.slice)
+		for i := 0; i < rv.Len(); i++ {
+			h = h.Int64(rv.Index(i).Int())
+		}
+	case FLOAT64SLICE:
+		h = h.Uint64(float64SliceID)
+		rv := reflect.ValueOf(kv.Value.slice)
+		for i := 0; i < rv.Len(); i++ {
+			h = h.Float64(rv.Index(i).Float())
+		}
+	case STRINGSLICE:
+		h = h.Uint64(stringSliceID)
+		rv := reflect.ValueOf(kv.Value.slice)
+		for i := 0; i < rv.Len(); i++ {
+			h = h.String(rv.Index(i).String())
+		}
+	case INVALID:
+	default:
+		// Logging is an alternative, but using the internal logger here
+		// causes an import cycle so it is not done.
+		v := kv.Value.AsInterface()
+		msg := fmt.Sprintf("unknown value type: %[1]v (%[1]T)", v)
+		panic(msg)
+	}
+	return h
+}
@@ -0,0 +1,317 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package attribute // import "go.opentelemetry.io/otel/attribute"
+
+import (
+	"cmp"
+	"fmt"
+	"math"
+	"reflect"
+	"slices"
+	"strings"
+	"testing"
+
+	"go.opentelemetry.io/otel/attribute/internal/fnv"
+)
+
+// keyVals is all the KeyValue generators that are used for testing. This is
+// not []KeyValue so different keys can be used with the test Values.
+var keyVals = []func(string) KeyValue{
+	func(k string) KeyValue { return Bool(k, true) },
+	func(k string) KeyValue { return Bool(k, false) },
+	func(k string) KeyValue { return BoolSlice(k, []bool{false, true}) },
+	func(k string) KeyValue { return BoolSlice(k, []bool{true, true, false}) },
+	func(k string) KeyValue { return Int(k, -1278) },
+	func(k string) KeyValue { return Int(k, 0) }, // Should be different than false above.
+	func(k string) KeyValue { return IntSlice(k, []int{3, 23, 21, -8, 0}) },
+	func(k string) KeyValue { return IntSlice(k, []int{1}) },
+	func(k string) KeyValue { return Int64(k, 1) }, // Should be different from true and []int{1}.
+	func(k string) KeyValue { return Int64(k, 29369) },
+	func(k string) KeyValue { return Int64Slice(k, []int64{3826, -38, -29, -1}) },
+	func(k string) KeyValue { return Int64Slice(k, []int64{8, -328, 29, 0}) },
+	func(k string) KeyValue { return Float64(k, -0.3812381) },
+	func(k string) KeyValue { return Float64(k, 1e32) },
+	func(k string) KeyValue { return Float64Slice(k, []float64{0.1, -3.8, -29., 0.3321}) },
+	func(k string) KeyValue { return Float64Slice(k, []float64{-13e8, -32.8, 4., 1e28}) },
+	func(k string) KeyValue { return String(k, "foo") },
+	func(k string) KeyValue { return String(k, "bar") },
+	func(k string) KeyValue { return StringSlice(k, []string{"foo", "bar", "baz"}) },
+	func(k string) KeyValue { return StringSlice(k, []string{"[]i1"}) },
+}
+
+func TestHashKVsEquality(t *testing.T) {
+	type testcase struct {
+		hash fnv.Hash
+		kvs  []KeyValue
+	}
+
+	keys := []string{"k0", "k1"}
+
+	// Test all combinations up to length 3.
+	n := len(keyVals)
+	result := make([]testcase, 0, 1+len(keys)*(n+(n*n)+(n*n*n)))
+
+	result = append(result, testcase{hashKVs(nil), nil})
+
+	for _, key := range keys {
+		for i := 0; i < len(keyVals); i++ {
+			kvs := []KeyValue{keyVals[i](key)}
+			hash := hashKVs(kvs)
+			result = append(result, testcase{hash, kvs})
+
+			for j := 0; j < len(keyVals); j++ {
+				kvs := []KeyValue{
+					keyVals[i](key),
+					keyVals[j](key),
+				}
+				hash := hashKVs(kvs)
+				result = append(result, testcase{hash, kvs})
+
+				for k := 0; k < len(keyVals); k++ {
+					kvs := []KeyValue{
+						keyVals[i](key),
+						keyVals[j](key),
+						keyVals[k](key),
+					}
+					hash := hashKVs(kvs)
+					result = append(result, testcase{hash, kvs})
+				}
+			}
+		}
+	}
+
+	for i := 0; i < len(result); i++ {
+		hI, kvI := result[i].hash, result[i].kvs
+		for j := 0; j < len(result); j++ {
+			hJ, kvJ := result[j].hash, result[j].kvs
+			m := msg{i: i, j: j, hI: hI, hJ: hJ, kvI: kvI, kvJ: kvJ}
+			if i == j {
+				m.cmp = "=="
+				if hI != hJ {
+					t.Errorf("hashes not equal: %s", m)
+				}
+			} else {
+				m.cmp = "!="
+				if hI == hJ {
+					// Do not use testify/assert here. It is slow.
+					t.Errorf("hashes equal: %s", m)
+				}
+			}
+		}
+	}
+}
+
+type msg struct {
+	cmp      string
+	i, j     int
+	hI, hJ   fnv.Hash
+	kvI, kvJ []KeyValue
+}
+
+func (m msg) String() string {
+	return fmt.Sprintf(
+		"(%d: %d)%s %s (%d: %d)%s",
+		m.i, m.hI, slice(m.kvI), m.cmp, m.j, m.hJ, slice(m.kvJ),
+	)
+}
+
+func slice(kvs []KeyValue) string {
+	if len(kvs) == 0 {
+		return "[]"
+	}
+
+	var b strings.Builder
+	_, _ = b.WriteRune('[')
+	_, _ = b.WriteString(string(kvs[0].Key))
+	_, _ = b.WriteRune(':')
+	_, _ = b.WriteString(kvs[0].Value.Emit())
+	for _, kv := range kvs[1:] {
+		_, _ = b.WriteRune(',')
+		_, _ = b.WriteString(string(kv.Key))
+		_, _ = b.WriteRune(':')
+		_, _ = b.WriteString(kv.Value.Emit())
+	}
+	_, _ = b.WriteRune(']')
+	return b.String()
+}
+
+func BenchmarkHashKVs(b *testing.B) {
+	attrs := make([]KeyValue, len(keyVals))
+	for i := range keyVals {
+		attrs[i] = keyVals[i]("k")
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		hashKVs(attrs)
+	}
+}
+
+func FuzzHashKVs(f *testing.F) {
+	// Add seed inputs to ensure coverage of edge cases.
+	f.Add("", "", "", "", "", "", 0, int64(0), 0.0, false, uint8(0))
+	f.Add("key", "value", "🌍", "test", "bool", "float", -1, int64(-1), -1.0, true, uint8(1))
+	f.Add("duplicate", "duplicate", "duplicate", "duplicate", "duplicate", "NaN",
+		0, int64(0), math.Inf(1), false, uint8(2))
+
+	f.Fuzz(func(t *testing.T, k1, k2, k3, k4, k5, s string, i int, i64 int64, fVal float64, b bool, sliceType uint8) {
+		// Test variable number of attributes (0-10).
+		numAttrs := len(k1) % 11 // Use key length to determine number of attributes.
+		if numAttrs == 0 && k1 == "" {
+			// Test empty set.
+			h := hashKVs(nil)
+			if h == 0 {
+				t.Error("hash of empty slice should not be zero")
+			}
+			return
+		}
+
+		var kvs []KeyValue
+
+		// Add basic types.
+		if numAttrs > 0 {
+			kvs = append(kvs, String(k1, s))
+		}
+		if numAttrs > 1 {
+			kvs = append(kvs, Int(k2, i))
+		}
+		if numAttrs > 2 {
+			kvs = append(kvs, Int64(k3, i64))
+		}
+		if numAttrs > 3 {
+			kvs = append(kvs, Float64(k4, fVal))
+		}
+		if numAttrs > 4 {
+			kvs = append(kvs, Bool(k5, b))
+		}
+
+		// Add slice types based on sliceType parameter
+		if numAttrs > 5 {
+			switch sliceType % 4 {
+			case 0:
+				// Test BoolSlice with variable length.
+				bools := make([]bool, len(s)%5) // 0-4 elements
+				for i := range bools {
+					bools[i] = (i+len(k1))%2 == 0
+				}
+				kvs = append(kvs, BoolSlice("boolslice", bools))
+			case 1:
+				// Test IntSlice with variable length.
+				ints := make([]int, len(s)%6) // 0-5 elements
+				for i := range ints {
+					ints[i] = i + len(k2)
+				}
+				kvs = append(kvs, IntSlice("intslice", ints))
+			case 2:
+				// Test Int64Slice with variable length.
+				int64s := make([]int64, len(s)%4) // 0-3 elements
+				for i := range int64s {
+					int64s[i] = int64(i) + i64
+				}
+				kvs = append(kvs, Int64Slice("int64slice", int64s))
+			case 3:
+				// Test Float64Slice with variable length and special values.
+				float64s := make([]float64, len(s)%5) // 0-4 elements
+				for i := range float64s {
+					switch i % 4 {
+					case 0:
+						float64s[i] = fVal
+					case 1:
+						float64s[i] = math.Inf(1) // +Inf
+					case 2:
+						float64s[i] = math.Inf(-1) // -Inf
+					case 3:
+						float64s[i] = math.NaN() // NaN
+					}
+				}
+				kvs = append(kvs, Float64Slice("float64slice", float64s))
+			}
+		}
+
+		// Add StringSlice.
+		if numAttrs > 6 {
+			strings := make([]string, len(k1)%4) // 0-3 elements
+			for i := range strings {
+				strings[i] = fmt.Sprintf("%s_%d", s, i)
+			}
+			kvs = append(kvs, StringSlice("stringslice", strings))
+		}
+
+		// Test duplicate keys (should be handled by Set construction).
+		if numAttrs > 7 && k1 != "" {
+			kvs = append(kvs, String(k1, "duplicate_key_value"))
+		}
+
+		// Add more attributes with Unicode keys.
+		if numAttrs > 8 {
+			kvs = append(kvs, String("🔑", "unicode_key"))
+		}
+		if numAttrs > 9 {
+			kvs = append(kvs, String("empty", ""))
+		}
+
+		// Sort to ensure consistent ordering (as Set would do).
+		slices.SortFunc(kvs, func(a, b KeyValue) int {
+			return cmp.Compare(string(a.Key), string(b.Key))
+		})
+
+		// Remove duplicates (as Set will do).
+		if len(kvs) > 1 {
+			j := 0
+			for i := 1; i < len(kvs); i++ {
+				if kvs[j].Key != kvs[i].Key {
+					j++
+					kvs[j] = kvs[i]
+				} else {
+					// Keep the later value for duplicate keys.
+					kvs[j] = kvs[i]
+				}
+			}
+			kvs = kvs[:j+1]
+		}
+
+		// Hash the key-value pairs.
+		h1 := hashKVs(kvs)
+		h2 := hashKVs(kvs) // Should be deterministic
+
+		if h1 != h2 {
+			t.Errorf("hash is not deterministic: %d != %d for kvs=%v", h1, h2, kvs)
+		}
+
+		if h1 == 0 && len(kvs) > 0 {
+			t.Errorf("hash should not be zero for non-empty input: kvs=%v", kvs)
+		}
+
+		// Test that different inputs produce different hashes (most of the time).
+		// This is a probabilistic test - collisions are possible but rare.
+		if len(kvs) > 0 {
+			// Modify one value slightly.
+			modifiedKvs := make([]KeyValue, len(kvs))
+			copy(modifiedKvs, kvs)
+			if len(modifiedKvs) > 0 {
+				switch modifiedKvs[0].Value.Type() {
+				case STRING:
+					modifiedKvs[0] = String(string(modifiedKvs[0].Key), modifiedKvs[0].Value.AsString()+"_modified")
+				case INT64:
+					modifiedKvs[0] = Int64(string(modifiedKvs[0].Key), modifiedKvs[0].Value.AsInt64()+1)
+				case BOOL:
+					modifiedKvs[0] = Bool(string(modifiedKvs[0].Key), !modifiedKvs[0].Value.AsBool())
+				case FLOAT64:
+					val := modifiedKvs[0].Value.AsFloat64()
+					if !math.IsNaN(val) && !math.IsInf(val, 0) {
+						modifiedKvs[0] = Float64(string(modifiedKvs[0].Key), val+1.0)
+					}
+				}
+
+				h3 := hashKVs(modifiedKvs)
+				// Note: We don't assert h1 != h3 because hash collisions are theoretically possible
+				// but we can log suspicious cases for manual review.
+				if h1 == h3 && !reflect.DeepEqual(kvs, modifiedKvs) {
+					t.Logf("Potential hash collision detected: original=%v, modified=%v, hash=%d", kvs, modifiedKvs, h1)
+				}
+			}
+		}
+	})
+}
@@ -0,0 +1,76 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package fnv provides an efficient and allocation free implementation of the
+// FNV-1a, non-cryptographic hash functions created by Glenn Fowler, Landon
+// Curt Noll, and Phong Vo. See
+// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function.
+//
+// This implementation is provided as an alternative to "hash/fnv". The
+// built-in implementation requires two allocations per Write for a string (one
+// for the hash pointer and the other to convert a string to a []byte). This
+// implementation is more efficientient and does not require any allocations.
+package fnv // import "go.opentelemetry.io/otel/attribute/internal/fnv"
+
+import (
+	"math"
+)
+
+// Taken from "hash/fnv". Verified at:
+//
+//   - https://datatracker.ietf.org/doc/html/draft-eastlake-fnv-17.html
+//   - http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-param
+const (
+	offset64 = 14695981039346656037
+	prime64  = 1099511628211
+)
+
+// Hash is an FNV-1a hash with appropriate hashing functions for methods.
+type Hash uint64
+
+// New returns a new initialized 64-bit FNV-1a Hash. Its value is laid out in
+// big-endian byte order.
+func New() Hash {
+	return offset64
+}
+
+func (h Hash) Uint64(val uint64) Hash {
+	v := uint64(h)
+	v = (v ^ ((val >> 56) & 0xFF)) * prime64
+	v = (v ^ ((val >> 48) & 0xFF)) * prime64
+	v = (v ^ ((val >> 40) & 0xFF)) * prime64
+	v = (v ^ ((val >> 32) & 0xFF)) * prime64
+	v = (v ^ ((val >> 24) & 0xFF)) * prime64
+	v = (v ^ ((val >> 16) & 0xFF)) * prime64
+	v = (v ^ ((val >> 8) & 0xFF)) * prime64
+	v = (v ^ ((val >> 0) & 0xFF)) * prime64
+	return Hash(v)
+}
+
+func (h Hash) Bool(val bool) Hash { // nolint:revive  // val is not a flag.
+	if val {
+		return h.Uint64(1)
+	}
+	return h.Uint64(0)
+}
+
+func (h Hash) Float64(val float64) Hash {
+	return h.Uint64(math.Float64bits(val))
+}
+
+func (h Hash) Int64(val int64) Hash {
+	return h.Uint64(uint64(val)) // nolint:gosec // overflow doesn't matter since we are hashing.
+}
+
+func (h Hash) String(val string) Hash {
+	v := uint64(h)
+	for i := 0; i < len(val); i++ {
+		v ^= uint64(val[i])
+		v *= prime64
+	}
+	return Hash(v)
+}
@@ -0,0 +1,98 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package fnv
+
+import (
+	"encoding/binary"
+	"hash/fnv"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestStringHashCorrectness(t *testing.T) {
+	input := []string{"", "a", "ab", "abc", "世界"}
+
+	refH := fnv.New64a()
+	for _, in := range input {
+		h := New()
+		got := h.String(in)
+
+		refH.Reset()
+		n, err := refH.Write([]byte(in))
+		require.NoError(t, err)
+		require.Equalf(t, len(in), n, "wrote only %d out of %d bytes", n, len(in))
+		want := refH.Sum64()
+
+		assert.Equal(t, want, uint64(got), in)
+	}
+}
+
+func TestUint64HashCorrectness(t *testing.T) {
+	input := []uint64{0, 10, 312984238623, 1024}
+
+	buf := make([]byte, 8)
+	refH := fnv.New64a()
+	for _, in := range input {
+		h := New()
+		got := h.Uint64(in)
+
+		refH.Reset()
+		binary.BigEndian.PutUint64(buf, in)
+		n, err := refH.Write(buf)
+		require.NoError(t, err)
+		require.Equalf(t, 8, n, "wrote only %d out of 8 bytes", n)
+		want := refH.Sum64()
+
+		assert.Equal(t, want, uint64(got), in)
+	}
+}
+
+func TestIntegrity(t *testing.T) {
+	data := []byte{'1', '2', 3, 4, 5, 6, 7, 8, 9, 10}
+	h0 := New()
+	want := h0.String(string(data))
+
+	h1 := New()
+	got := h1.String(string(data[:2]))
+	num := binary.BigEndian.Uint64(data[2:])
+	got = got.Uint64(num)
+
+	assert.Equal(t, want, got)
+}
+
+var result Hash
+
+func BenchmarkStringKB(b *testing.B) {
+	b.SetBytes(1024)
+	data := make([]byte, 1024)
+	for i := range data {
+		data[i] = byte(i)
+	}
+	s := string(data)
+	h := New()
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for range b.N {
+		result = h.String(s)
+	}
+}
+
+func BenchmarkUint64KB(b *testing.B) {
+	b.SetBytes(8)
+	i := uint64(192386739218721)
+	h := New()
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for range b.N {
+		result = h.Uint64(i)
+	}
+}
@@ -9,6 +9,8 @@ import (
 	"reflect"
 	"slices"
 	"sort"
+
+	"go.opentelemetry.io/otel/attribute/internal/fnv"
 )

 type (
@@ -23,19 +25,19 @@ type (
 	// the Equals method to ensure stable equivalence checking.
 	//
 	// Users should also use the Distinct returned from Equivalent as a map key
-	// instead of a Set directly. In addition to that type providing guarantees
-	// on stable equivalence, it may also provide performance improvements.
+	// instead of a Set directly. Set has relatively poor performance when used
+	// as a map key compared to Distinct.
 	Set struct {
-		equivalent Distinct
+		hash fnv.Hash
+		data any
 	}

-	// Distinct is a unique identifier of a Set.
+	// Distinct is an identifier of a Set which is very likely to be unique.
 	//
-	// Distinct is designed to ensure equivalence stability: comparisons will
-	// return the same value across versions. For this reason, Distinct should
-	// always be used as a map key instead of a Set.
+	// Distinct should be used as a map key instead of a Set for to provide better
+	// performance for map operations.
 	Distinct struct {
-		iface any
+		hash fnv.Hash
 	}

 	// Sortable implements sort.Interface, used for sorting KeyValue.
@@ -46,6 +48,14 @@ type (
 	Sortable []KeyValue
 )

+// Compile time check these types remain comparable.
+var (
+	_ = isComparable(Set{})
+	_ = isComparable(Distinct{})
+)
+
+func isComparable[T comparable](t T) T { return t }
+
 var (
 	// keyValueType is used in computeDistinctReflect.
 	keyValueType = reflect.TypeOf(KeyValue{})
@@ -56,15 +66,13 @@ var (
 	//
 	// This is kept for backwards compatibility, but should not be used in new code.
 	userDefinedEmptySet = &Set{
-		equivalent: Distinct{
-			iface: [0]KeyValue{},
-		},
+		hash: fnv.New(),
+		data: [0]KeyValue{},
 	}

 	emptySet = Set{
-		equivalent: Distinct{
-			iface: [0]KeyValue{},
-		},
+		hash: fnv.New(),
+		data: [0]KeyValue{},
 	}
 )

@@ -79,30 +87,28 @@ func EmptySet() *Set {
 	return userDefinedEmptySet
 }

-// reflectValue abbreviates reflect.ValueOf(d).
-func (d Distinct) reflectValue() reflect.Value {
-	return reflect.ValueOf(d.iface)
-}
-
 // Valid reports whether this value refers to a valid Set.
-func (d Distinct) Valid() bool {
-	return d.iface != nil
+func (d Distinct) Valid() bool { return d.hash != 0 }
+
+// reflectValue abbreviates reflect.ValueOf(d).
+func (l Set) reflectValue() reflect.Value {
+	return reflect.ValueOf(l.data)
 }

 // Len returns the number of attributes in this set.
 func (l *Set) Len() int {
-	if l == nil || !l.equivalent.Valid() {
+	if l == nil || l.hash == 0 {
 		return 0
 	}
-	return l.equivalent.reflectValue().Len()
+	return l.reflectValue().Len()
 }

 // Get returns the KeyValue at ordered position idx in this set.
 func (l *Set) Get(idx int) (KeyValue, bool) {
-	if l == nil || !l.equivalent.Valid() {
+	if l == nil || l.hash == 0 {
 		return KeyValue{}, false
 	}
-	value := l.equivalent.reflectValue()
+	value := l.reflectValue()

 	if idx >= 0 && idx < value.Len() {
 		// Note: The Go compiler successfully avoids an allocation for
@@ -115,10 +121,10 @@ func (l *Set) Get(idx int) (KeyValue, bool) {

 // Value returns the value of a specified key in this set.
 func (l *Set) Value(k Key) (Value, bool) {
-	if l == nil || !l.equivalent.Valid() {
+	if l == nil || l.hash == 0 {
 		return Value{}, false
 	}
-	rValue := l.equivalent.reflectValue()
+	rValue := l.reflectValue()
 	vlen := rValue.Len()

 	idx := sort.Search(vlen, func(idx int) bool {
@@ -158,20 +164,29 @@ func (l *Set) ToSlice() []KeyValue {
 	return iter.ToSlice()
 }

-// Equivalent returns a value that may be used as a map key. The Distinct type
-// guarantees that the result will equal the equivalent. Distinct value of any
+// Equivalent returns a value that may be used as a map key. Equal Distinct
+// values are very likely to be equivalent attribute Sets. Distinct value of any
 // attribute set with the same elements as this, where sets are made unique by
 // choosing the last value in the input for any given key.
 func (l *Set) Equivalent() Distinct {
-	if l == nil || !l.equivalent.Valid() {
-		return emptySet.equivalent
+	if l == nil || l.hash == 0 {
+		return Distinct{hash: emptySet.hash}
 	}
-	return l.equivalent
+	return Distinct{hash: l.hash}
 }

 // Equals reports whether the argument set is equivalent to this set.
 func (l *Set) Equals(o *Set) bool {
-	return l.Equivalent() == o.Equivalent()
+	if l.Equivalent() != o.Equivalent() {
+		return false
+	}
+	if l == nil || l.hash == 0 {
+		l = &emptySet
+	}
+	if o == nil || o.hash == 0 {
+		o = &emptySet
+	}
+	return l.data == o.data
 }

 // Encoded returns the encoded form of this set, according to encoder.
@@ -241,10 +256,10 @@ func NewSetWithFiltered(kvs []KeyValue, filter Filter) (Set, []KeyValue) {

 	if filter != nil {
 		if div := filteredToFront(kvs, filter); div != 0 {
-			return Set{equivalent: computeDistinct(kvs[div:])}, kvs[:div]
+			return newSet(kvs[div:]), kvs[:div]
 		}
 	}
-	return Set{equivalent: computeDistinct(kvs)}, nil
+	return newSet(kvs), nil
 }

 // NewSetWithSortableFiltered returns a new Set.
@@ -324,7 +339,7 @@ func (l *Set) Filter(re Filter) (Set, []KeyValue) {
 	if first == 0 {
 		// It is safe to assume len(slice) >= 1 given we found at least one
 		// attribute above that needs to be filtered out.
-		return Set{equivalent: computeDistinct(slice[1:])}, slice[:1]
+		return newSet(slice[1:]), slice[:1]
 	}

 	// Move the filtered slice[first] to the front (preserving order).
@@ -334,25 +349,24 @@ func (l *Set) Filter(re Filter) (Set, []KeyValue) {

 	// Do not re-evaluate re(slice[first+1:]).
 	div := filteredToFront(slice[1:first+1], re) + 1
-	return Set{equivalent: computeDistinct(slice[div:])}, slice[:div]
+	return newSet(slice[div:]), slice[:div]
 }

-// computeDistinct returns a Distinct using either the fixed- or
-// reflect-oriented code path, depending on the size of the input. The input
-// slice is assumed to already be sorted and de-duplicated.
-func computeDistinct(kvs []KeyValue) Distinct {
-	iface := computeDistinctFixed(kvs)
-	if iface == nil {
-		iface = computeDistinctReflect(kvs)
+// newSet returns a new set based on the sorted and uniqued kvs.
+func newSet(kvs []KeyValue) Set {
+	s := Set{
+		hash: hashKVs(kvs),
+		data: computeDataFixed(kvs),
 	}
-	return Distinct{
-		iface: iface,
+	if s.data == nil {
+		s.data = computeDataReflect(kvs)
 	}
+	return s
 }

-// computeDistinctFixed computes a Distinct for small slices. It returns nil
-// if the input is too large for this code path.
-func computeDistinctFixed(kvs []KeyValue) any {
+// computeDataFixed computes a Set data for small slices. It returns nil if the
+// input is too large for this code path.
+func computeDataFixed(kvs []KeyValue) any {
 	switch len(kvs) {
 	case 1:
 		return [1]KeyValue(kvs)
@@ -379,9 +393,9 @@ func computeDistinctFixed(kvs []KeyValue) any {
 	}
 }

-// computeDistinctReflect computes a Distinct using reflection, works for any
-// size input.
-func computeDistinctReflect(kvs []KeyValue) any {
+// computeDataReflect computes a Set data using reflection, works for any size
+// input.
+func computeDataReflect(kvs []KeyValue) any {
 	at := reflect.New(reflect.ArrayOf(len(kvs), keyValueType)).Elem()
 	for i, keyValue := range kvs {
 		*(at.Index(i).Addr().Interface().(*KeyValue)) = keyValue
@@ -391,7 +405,7 @@ func computeDistinctReflect(kvs []KeyValue) any {

 // MarshalJSON returns the JSON encoding of the Set.
 func (l *Set) MarshalJSON() ([]byte, error) {
-	return json.Marshal(l.equivalent.iface)
+	return json.Marshal(l.data)
 }

 // MarshalLog is the marshaling function used by the logging system to represent this Set.