From 047df28b88f837688fd7ab3ea9d92c3495d8c0c3 Mon Sep 17 00:00:00 2001 From: Tyler Yahn Date: Mon, 3 Jun 2024 02:11:20 -0700 Subject: [PATCH] Guard rng in exemplar rand computation (#5456) Fix #5455 The `math/rand.Rand` type is not safe for concurrent access. Concurrent measurements, and therefore concurrent exemplar computation, are allowed. Ensure this concurrent design does not lead to data races with `rng`. --- CHANGELOG.md | 1 + sdk/metric/internal/exemplar/rand.go | 18 +++++++++++++----- sdk/metric/internal/exemplar/rand_test.go | 16 ++++++++++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6fe8f662..eb1b86f31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Identify the `Meter` returned from the global `MeterProvider` in `go.opentelemetry.io/otel/global` with its schema URL. (#5426) - Log a warning to the OpenTelemetry internal logger when a `Span` in `go.opentelemetry.io/otel/sdk/trace` drops an attribute, event, or link due to a limit being reached. (#5434) - Document instrument name requirements in `go.opentelemetry.io/otel/metric`. (#5435) +- Prevent random number generation data-race for experimental rand exemplars in `go.opentelemetry.io/otel/sdk/metric`. (#5456) ## [1.27.0/0.49.0/0.3.0] 2024-05-21 diff --git a/sdk/metric/internal/exemplar/rand.go b/sdk/metric/internal/exemplar/rand.go index 6753e1166..199a2608f 100644 --- a/sdk/metric/internal/exemplar/rand.go +++ b/sdk/metric/internal/exemplar/rand.go @@ -7,16 +7,21 @@ import ( "context" "math" "math/rand" + "sync" "time" "go.opentelemetry.io/otel/attribute" ) -// rng is used to make sampling decisions. -// -// Do not use crypto/rand. There is no reason for the decrease in performance -// given this is not a security sensitive decision. -var rng = rand.New(rand.NewSource(time.Now().UnixNano())) +var ( + // rng is used to make sampling decisions. + // + // Do not use crypto/rand. There is no reason for the decrease in performance + // given this is not a security sensitive decision. + rng = rand.New(rand.NewSource(time.Now().UnixNano())) + // Ensure concurrent safe accecess to rng and its underlying source. + rngMu sync.Mutex +) // random returns, as a float64, a uniform pseudo-random number in the open // interval (0.0,1.0). @@ -38,6 +43,9 @@ func random() float64 { // // There are likely many other methods to explore here as well. + rngMu.Lock() + defer rngMu.Unlock() + f := rng.Float64() for f == 0 { f = rng.Float64() diff --git a/sdk/metric/internal/exemplar/rand_test.go b/sdk/metric/internal/exemplar/rand_test.go index 7668d3e21..a4c42dcf7 100644 --- a/sdk/metric/internal/exemplar/rand_test.go +++ b/sdk/metric/internal/exemplar/rand_test.go @@ -7,6 +7,7 @@ import ( "context" "math" "slices" + "sync" "testing" "github.com/stretchr/testify/assert" @@ -49,3 +50,18 @@ func TestFixedSizeSamplingCorrectness(t *testing.T) { // ensuring no bias in our random sampling algorithm. assert.InDelta(t, 1/mean, intensity, 0.02) // Within 5σ. } + +func TestRandomConcurrentSafe(t *testing.T) { + const goRoutines = 10 + + var wg sync.WaitGroup + for n := 0; n < goRoutines; n++ { + wg.Add(1) + go func() { + defer wg.Done() + _ = random() + }() + } + + wg.Wait() +}