From b7610a72a7a3e510f45b6268420db4137b4aaa7c Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Wed, 6 Aug 2025 08:50:18 -0400 Subject: [PATCH] Testing: Run sync measure benchmarks in parallel (#7113) I am looking into https://promlabs.com/blog/2025/07/17/why-i-recommend-native-prometheus-instrumentation-over-opentelemetry/#comparing-counter-increment-performance, which seems to suggest the OTel metrics SDK performs poorly when a counter is incremented concurrently. It is potentially a bit of an artificial benchmark, but does suggest there is some contention beyond just the fact that they are incrementing an atomic integer... Original benchmarks from the blog post: https://github.com/promlabs/prometheus-otel-benchmarks/blob/main/otel_test.go ``` $ go test -run=xxxxxMatchNothingxxxxx -cpu=24 -test.benchtime=1s -bench=BenchmarkSyncMeasure/NoView/ goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/metric cpu: Intel(R) Xeon(R) CPU @ 2.20GHz BenchmarkSyncMeasure/NoView/Int64Counter/Attributes/0-24 3946789 313.2 ns/op BenchmarkSyncMeasure/NoView/Int64Counter/Attributes/1-24 3420992 374.4 ns/op BenchmarkSyncMeasure/NoView/Int64Counter/Attributes/10-24 574608 1745 ns/op BenchmarkSyncMeasure/NoView/Float64Counter/Attributes/0-24 3996166 281.1 ns/op BenchmarkSyncMeasure/NoView/Float64Counter/Attributes/1-24 3091573 367.1 ns/op BenchmarkSyncMeasure/NoView/Float64Counter/Attributes/10-24 705693 1660 ns/op BenchmarkSyncMeasure/NoView/Int64UpDownCounter/Attributes/0-24 4098727 296.4 ns/op BenchmarkSyncMeasure/NoView/Int64UpDownCounter/Attributes/1-24 3029276 355.4 ns/op BenchmarkSyncMeasure/NoView/Int64UpDownCounter/Attributes/10-24 605174 1803 ns/op BenchmarkSyncMeasure/NoView/Float64UpDownCounter/Attributes/0-24 4057765 298.6 ns/op BenchmarkSyncMeasure/NoView/Float64UpDownCounter/Attributes/1-24 3384812 366.9 ns/op BenchmarkSyncMeasure/NoView/Float64UpDownCounter/Attributes/10-24 714900 1742 ns/op BenchmarkSyncMeasure/NoView/Int64Histogram/Attributes/0-24 3274644 364.3 ns/op BenchmarkSyncMeasure/NoView/Int64Histogram/Attributes/1-24 3780115 316.1 ns/op BenchmarkSyncMeasure/NoView/Int64Histogram/Attributes/10-24 1294364 993.5 ns/op BenchmarkSyncMeasure/NoView/Float64Histogram/Attributes/0-24 3543817 343.2 ns/op BenchmarkSyncMeasure/NoView/Float64Histogram/Attributes/1-24 3523102 335.8 ns/op BenchmarkSyncMeasure/NoView/Float64Histogram/Attributes/10-24 1329352 956.3 ns/op PASS ok go.opentelemetry.io/otel/sdk/metric 27.504s ``` ``` $ go test -run=xxxxxMatchNothingxxxxx -cpu=1 -test.benchtime=1s -bench=BenchmarkSyncMeasure/NoView/ goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/metric cpu: Intel(R) Xeon(R) CPU @ 2.20GHz BenchmarkSyncMeasure/NoView/Int64Counter/Attributes/0 9905773 121.3 ns/op BenchmarkSyncMeasure/NoView/Int64Counter/Attributes/1 4079145 296.5 ns/op BenchmarkSyncMeasure/NoView/Int64Counter/Attributes/10 781627 1531 ns/op BenchmarkSyncMeasure/NoView/Float64Counter/Attributes/0 10017988 120.2 ns/op BenchmarkSyncMeasure/NoView/Float64Counter/Attributes/1 4055418 296.4 ns/op BenchmarkSyncMeasure/NoView/Float64Counter/Attributes/10 761139 1540 ns/op BenchmarkSyncMeasure/NoView/Int64UpDownCounter/Attributes/0 10017126 121.1 ns/op BenchmarkSyncMeasure/NoView/Int64UpDownCounter/Attributes/1 4037232 295.3 ns/op BenchmarkSyncMeasure/NoView/Int64UpDownCounter/Attributes/10 757010 1539 ns/op BenchmarkSyncMeasure/NoView/Float64UpDownCounter/Attributes/0 10122925 119.0 ns/op BenchmarkSyncMeasure/NoView/Float64UpDownCounter/Attributes/1 4070942 293.8 ns/op BenchmarkSyncMeasure/NoView/Float64UpDownCounter/Attributes/10 788176 1542 ns/op BenchmarkSyncMeasure/NoView/Int64Histogram/Attributes/0 10794142 110.8 ns/op BenchmarkSyncMeasure/NoView/Int64Histogram/Attributes/1 5929494 201.0 ns/op BenchmarkSyncMeasure/NoView/Int64Histogram/Attributes/10 1449292 825.4 ns/op BenchmarkSyncMeasure/NoView/Float64Histogram/Attributes/0 10875385 110.1 ns/op BenchmarkSyncMeasure/NoView/Float64Histogram/Attributes/1 5903116 202.4 ns/op BenchmarkSyncMeasure/NoView/Float64Histogram/Attributes/10 1459578 827.4 ns/op PASS ok go.opentelemetry.io/otel/sdk/metric 25.688s ``` Results are significantly worse (almost > 2x in some cases) with parallelism, but don't initially seem as bad as the blog post suggests. I only have 24 cores, so I can't test higher numbers. Do we want to have parallel benchmarks in addition to our current non-parallel ones? --- sdk/metric/benchmark_test.go | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/sdk/metric/benchmark_test.go b/sdk/metric/benchmark_test.go index bf6296370..d73759cbd 100644 --- a/sdk/metric/benchmark_test.go +++ b/sdk/metric/benchmark_test.go @@ -113,19 +113,19 @@ func benchMeasAttrs(meas measF) func(*testing.B) { return func(b *testing.B) { b.Run("Attributes/0", func(b *testing.B) { f := meas(*attribute.EmptySet()) - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - f() - } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + f() + } + }) }) b.Run("Attributes/1", func(b *testing.B) { f := meas(attribute.NewSet(attribute.Bool("K", true))) - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - f() - } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + f() + } + }) }) b.Run("Attributes/10", func(b *testing.B) { n := 10 @@ -135,11 +135,11 @@ func benchMeasAttrs(meas measF) func(*testing.B) { attrs = append(attrs, attribute.Int(strconv.Itoa(i), i)) } f := meas(attribute.NewSet(attrs...)) - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - f() - } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + f() + } + }) }) } }