From eb4f1dc4a13cd7bd1b89a8a34490c78cc2dfcecc Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Wed, 6 Aug 2025 14:45:22 -0400 Subject: [PATCH] Add benchmark for map access using attribute Equivalent (#7123) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I am looking into I am looking into https://promlabs.com/blog/2025/07/17/why-i-recommend-native-prometheus-instrumentation-over-opentelemetry/#comparing-counter-increment-performance, and was trying to figure out why incrementing a counter with 10 attributes was so much slower than incrementing a counter with no attributes, or 1 attribute: ``` $ go test -run=xxxxxMatchNothingxxxxx -cpu=1 -test.benchtime=1s -bench=BenchmarkSyncMeasure/NoView/Int64Counter/Attributes goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/metric cpu: Intel(R) Xeon(R) CPU @ 2.20GHz BenchmarkSyncMeasure/NoView/Int64Counter/Attributes/0 9905773 121.3 ns/op BenchmarkSyncMeasure/NoView/Int64Counter/Attributes/1 4079145 296.5 ns/op BenchmarkSyncMeasure/NoView/Int64Counter/Attributes/10 781627 1531 ns/op ``` Looking at the profile, most of the time is spent in "runtime.mapKeyError2" within "runtime.mapaccess2". My best guess is that whatever we are using for Equivalent() is not very performant when used as a map key. This seems like a good opportunity to greatly improve the performance of our metrics (and probably other signals) API + SDK. To start, i'm adding a simple benchmark within the attribute package to isolate the issue. Results: ``` $ go test -run '^$' -bench '^BenchmarkEquivalentMapAccess' -benchtime .1s -cpu 1 -benchmem goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/attribute cpu: Intel(R) Xeon(R) CPU @ 2.20GHz BenchmarkEquivalentMapAccess/Empty 2220508 53.58 ns/op 0 B/op 0 allocs/op BenchmarkEquivalentMapAccess/1_string_attribute 622770 196.7 ns/op 0 B/op 0 allocs/op BenchmarkEquivalentMapAccess/10_string_attributes 77462 1558 ns/op 0 B/op 0 allocs/op BenchmarkEquivalentMapAccess/1_int_attribute 602163 197.7 ns/op 0 B/op 0 allocs/op BenchmarkEquivalentMapAccess/10_int_attributes 76603 1569 ns/op 0 B/op 0 allocs/op ``` This shows that it is the map lookup and storage itself that is making the metrics API+SDK perform much worse with more attributes. Some optimization ideas include: * Most attribute sets are likely to be just numbers and strings. Can we make a fast path for sets that don't include complex attributes? * We encourage improving performance of the metrics API by re-using attribute sets where possible. If we can lazily compute+cache a "faster" map key, that will have a big performance improvement when attribute sets are re-used. * compute a uint64 hash using something like https://github.com/gohugoio/hashstructure, or something similar to what prometheus/client_golang does: https://github.com/prometheus/common/blob/c79a891c6c28ce135a2ac082b721c2dacc2269a8/model/signature.go#L31 --------- Co-authored-by: Tyler Yahn Co-authored-by: Flcă‚› --- attribute/benchmark_test.go | 56 +++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/attribute/benchmark_test.go b/attribute/benchmark_test.go index d81bbe829..1822123e8 100644 --- a/attribute/benchmark_test.go +++ b/attribute/benchmark_test.go @@ -271,3 +271,59 @@ func BenchmarkStringSlice(b *testing.B) { }) b.Run("Emit", benchmarkEmit(kv)) } + +// BenchmarkEquivalentMapAccess measures how expensive it is to use +// Equivalent() as a map key. This is on the hot path for making synchronous +// measurements on the metrics API/SDK. It will likely be on the hot path for +// the trace and logs API/SDK in the future. +func BenchmarkEquivalentMapAccess(b *testing.B) { + b.Run("Empty", func(b *testing.B) { + benchmarkEquivalentMapAccess(b, attribute.EmptySet()) + }) + b.Run("1 string attribute", func(b *testing.B) { + set := attribute.NewSet(attribute.String("string", "42")) + benchmarkEquivalentMapAccess(b, &set) + }) + b.Run("10 string attributes", func(b *testing.B) { + set := attribute.NewSet( + attribute.String("a", "42"), + attribute.String("b", "42"), + attribute.String("c", "42"), + attribute.String("d", "42"), + attribute.String("e", "42"), + attribute.String("f", "42"), + attribute.String("g", "42"), + attribute.String("h", "42"), + attribute.String("i", "42"), + attribute.String("j", "42"), + ) + benchmarkEquivalentMapAccess(b, &set) + }) + b.Run("1 int attribute", func(b *testing.B) { + set := attribute.NewSet(attribute.Int("string", 42)) + benchmarkEquivalentMapAccess(b, &set) + }) + b.Run("10 int attributes", func(b *testing.B) { + set := attribute.NewSet( + attribute.Int("a", 42), + attribute.Int("b", 42), + attribute.Int("c", 42), + attribute.Int("d", 42), + attribute.Int("e", 42), + attribute.Int("f", 42), + attribute.Int("g", 42), + attribute.Int("h", 42), + attribute.Int("i", 42), + attribute.Int("j", 42), + ) + benchmarkEquivalentMapAccess(b, &set) + }) +} + +func benchmarkEquivalentMapAccess(b *testing.B, set *attribute.Set) { + values := map[attribute.Distinct]int{} + b.ResetTimer() + for range b.N { + values[set.Equivalent()]++ + } +}