From 047df28b88f837688fd7ab3ea9d92c3495d8c0c3 Mon Sep 17 00:00:00 2001
From: Tyler Yahn <MrAlias@users.noreply.github.com>
Date: Mon, 3 Jun 2024 02:11:20 -0700
Subject: [PATCH] Guard rng in exemplar rand computation (#5456)

Fix #5455

The `math/rand.Rand` type is not safe for concurrent access. Concurrent
measurements, and therefore concurrent exemplar computation, are
allowed. Ensure this concurrent design does not lead to data races with
`rng`.
---
 CHANGELOG.md                              |  1 +
 sdk/metric/internal/exemplar/rand.go      | 18 +++++++++++++-----
 sdk/metric/internal/exemplar/rand_test.go | 16 ++++++++++++++++
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c6fe8f662..eb1b86f31 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 - Identify the `Meter` returned from the global `MeterProvider` in `go.opentelemetry.io/otel/global` with its schema URL. (#5426)
 - Log a warning to the OpenTelemetry internal logger when a `Span` in `go.opentelemetry.io/otel/sdk/trace` drops an attribute, event, or link due to a limit being reached. (#5434)
 - Document instrument name requirements in `go.opentelemetry.io/otel/metric`. (#5435)
+- Prevent random number generation data-race for experimental rand exemplars in `go.opentelemetry.io/otel/sdk/metric`. (#5456)
 
 ## [1.27.0/0.49.0/0.3.0] 2024-05-21
 
diff --git a/sdk/metric/internal/exemplar/rand.go b/sdk/metric/internal/exemplar/rand.go
index 6753e1166..199a2608f 100644
--- a/sdk/metric/internal/exemplar/rand.go
+++ b/sdk/metric/internal/exemplar/rand.go
@@ -7,16 +7,21 @@ import (
 	"context"
 	"math"
 	"math/rand"
+	"sync"
 	"time"
 
 	"go.opentelemetry.io/otel/attribute"
 )
 
-// rng is used to make sampling decisions.
-//
-// Do not use crypto/rand. There is no reason for the decrease in performance
-// given this is not a security sensitive decision.
-var rng = rand.New(rand.NewSource(time.Now().UnixNano()))
+var (
+	// rng is used to make sampling decisions.
+	//
+	// Do not use crypto/rand. There is no reason for the decrease in performance
+	// given this is not a security sensitive decision.
+	rng = rand.New(rand.NewSource(time.Now().UnixNano()))
+	// Ensure concurrent safe accecess to rng and its underlying source.
+	rngMu sync.Mutex
+)
 
 // random returns, as a float64, a uniform pseudo-random number in the open
 // interval (0.0,1.0).
@@ -38,6 +43,9 @@ func random() float64 {
 	//
 	// There are likely many other methods to explore here as well.
 
+	rngMu.Lock()
+	defer rngMu.Unlock()
+
 	f := rng.Float64()
 	for f == 0 {
 		f = rng.Float64()
diff --git a/sdk/metric/internal/exemplar/rand_test.go b/sdk/metric/internal/exemplar/rand_test.go
index 7668d3e21..a4c42dcf7 100644
--- a/sdk/metric/internal/exemplar/rand_test.go
+++ b/sdk/metric/internal/exemplar/rand_test.go
@@ -7,6 +7,7 @@ import (
 	"context"
 	"math"
 	"slices"
+	"sync"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -49,3 +50,18 @@ func TestFixedSizeSamplingCorrectness(t *testing.T) {
 	// ensuring no bias in our random sampling algorithm.
 	assert.InDelta(t, 1/mean, intensity, 0.02) // Within 5σ.
 }
+
+func TestRandomConcurrentSafe(t *testing.T) {
+	const goRoutines = 10
+
+	var wg sync.WaitGroup
+	for n := 0; n < goRoutines; n++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			_ = random()
+		}()
+	}
+
+	wg.Wait()
+}