From 8fbaa970843f0899228bc19c1e45a31f5a5bb73e Mon Sep 17 00:00:00 2001 From: Warnar Boekkooi Date: Tue, 8 Oct 2024 09:43:14 +0200 Subject: [PATCH] Reduce `newEvictedQueueLink` and `newEvictedQueueEvent` memory allocations (#5858) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Good day, While doing some profile with pprof on one of our services I notices that `sync.OnceFunc` was allocating a nice amount of objects. These `sync.OnceFunc` calls where done by `newEvictedQueueEvent` and `newEvictedQueueLink`. So to avoid these extra allocation I created this PR which replaces the `sync.OnceFunc` with `sync.Once` which is now part of the evictedQueue. This resulted in the following benchstat result (commit https://github.com/open-telemetry/opentelemetry-go/pull/5858/commits/baad07e574017ebcc38254b0228b2470f3ef1dc2): ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/trace cpu: 11th Gen Intel(R) Core(TM) i5-11400H @ 2.70GHz │ 1727937489-attr-old.txt │ evic-new1728018933.txt │ │ sec/op │ sec/op vs base │ RecordingSpanSetAttributes/WithLimit/false-12 6.579µ ± 9% 5.875µ ± 8% -10.71% (p=0.000 n=10) RecordingSpanSetAttributes/WithLimit/true-12 14.27µ ± 10% 11.82µ ± 16% -17.18% (p=0.009 n=10) SpanEnd-12 63.44n ± 0% 75.97n ± 2% +19.75% (p=0.000 n=10) TraceStart/with_a_simple_span-12 728.0n ± 3% 412.7n ± 1% -43.31% (p=0.000 n=10) TraceStart/with_several_links-12 881.5n ± 1% 553.2n ± 3% -37.25% (p=0.000 n=10) TraceStart/with_attributes-12 916.5n ± 2% 598.4n ± 2% -34.70% (p=0.000 n=10) SpanProcessorOnEnd/batch:_10,_spans:_10-12 151.9n ± 3% 144.1n ± 3% -5.13% (p=0.000 n=10) SpanProcessorOnEnd/batch:_10,_spans:_100-12 1.497µ ± 2% 1.435µ ± 2% -4.14% (p=0.000 n=10) SpanProcessorOnEnd/batch:_100,_spans:_10-12 148.5n ± 2% 143.0n ± 2% -3.67% (p=0.000 n=10) SpanProcessorOnEnd/batch:_100,_spans:_100-12 1.481µ ± 3% 1.442µ ± 4% -2.67% (p=0.022 n=10) SpanProcessorVerboseLogging-12 9.376µ ± 1% 6.662µ ± 2% -28.94% (p=0.000 n=10) SpanLimits/AttributeValueLengthLimit-12 7.221µ ± 4% 6.605µ ± 3% -8.54% (p=0.002 n=10) SpanLimits/AttributeCountLimit-12 6.300µ ± 6% 6.091µ ± 6% -3.30% (p=0.007 n=10) SpanLimits/EventCountLimit-12 5.921µ ± 4% 5.506µ ± 7% -7.02% (p=0.011 n=10) SpanLimits/LinkCountLimit-12 6.073µ ± 6% 5.423µ ± 3% -10.71% (p=0.000 n=10) SpanLimits/AttributePerEventCountLimit-12 6.455µ ± 3% 5.294µ ± 9% -17.98% (p=0.000 n=10) SpanLimits/AttributePerLinkCountLimit-12 6.263µ ± 4% 5.850µ ± 7% -6.59% (p=0.000 n=10) SpanSetAttributesOverCapacity-12 1.683µ ± 1% 1.391µ ± 0% -17.35% (p=0.000 n=10) StartEndSpan/AlwaysSample-12 803.0n ± 4% 531.4n ± 5% -33.82% (p=0.000 n=10) StartEndSpan/NeverSample-12 216.0n ± 2% 212.7n ± 2% ~ (p=0.055 n=10) SpanWithAttributes_4/AlwaysSample-12 1216.5n ± 2% 877.6n ± 9% -27.85% (p=0.000 n=10) SpanWithAttributes_4/NeverSample-12 370.4n ± 5% 374.5n ± 3% ~ (p=0.197 n=10) SpanWithAttributes_8/AlwaysSample-12 1.494µ ± 4% 1.117µ ± 4% -25.27% (p=0.000 n=10) SpanWithAttributes_8/NeverSample-12 477.1n ± 3% 475.1n ± 7% ~ (p=0.739 n=10) SpanWithAttributes_all/AlwaysSample-12 1310.5n ± 5% 971.5n ± 6% -25.87% (p=0.000 n=10) SpanWithAttributes_all/NeverSample-12 388.0n ± 5% 389.1n ± 6% ~ (p=0.699 n=10) SpanWithAttributes_all_2x/AlwaysSample-12 1.664µ ± 2% 1.236µ ± 3% -25.72% (p=0.000 n=10) SpanWithAttributes_all_2x/NeverSample-12 522.7n ± 7% 527.2n ± 9% ~ (p=0.912 n=10) SpanWithEvents_4/AlwaysSample-12 1.411µ ± 4% 1.058µ ± 2% -24.99% (p=0.000 n=10) SpanWithEvents_4/NeverSample-12 218.4n ± 3% 218.9n ± 2% ~ (p=0.971 n=10) SpanWithEvents_8/AlwaysSample-12 1.997µ ± 5% 1.657µ ± 4% -17.03% (p=0.000 n=10) SpanWithEvents_8/NeverSample-12 226.6n ± 3% 221.7n ± 2% -2.14% (p=0.015 n=10) SpanWithEvents_WithStackTrace/AlwaysSample-12 1016.5n ± 2% 712.5n ± 2% -29.91% (p=0.000 n=10) SpanWithEvents_WithStackTrace/NeverSample-12 242.0n ± 3% 241.0n ± 1% ~ (p=0.254 n=10) SpanWithEvents_WithTimestamp/AlwaysSample-12 1019.5n ± 3% 713.2n ± 6% -30.04% (p=0.000 n=10) SpanWithEvents_WithTimestamp/NeverSample-12 276.4n ± 3% 276.6n ± 2% ~ (p=0.723 n=10) TraceID_DotString-12 71.94n ± 3% 75.71n ± 3% +5.23% (p=0.001 n=10) SpanID_DotString-12 50.88n ± 3% 52.35n ± 1% +2.88% (p=0.001 n=10) geomean 952.8n 822.2n -13.71% │ 1727937489-attr-old.txt │ evic-new1728018933.txt │ │ B/op │ B/op vs base │ RecordingSpanSetAttributes/WithLimit/false-12 7.062Ki ± 0% 6.891Ki ± 0% -2.43% (p=0.000 n=10) RecordingSpanSetAttributes/WithLimit/true-12 7.564Ki ± 0% 7.393Ki ± 0% -2.27% (p=0.000 n=10) SpanEnd-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ TraceStart/with_a_simple_span-12 704.0 ± 0% 528.0 ± 0% -25.00% (p=0.000 n=10) TraceStart/with_several_links-12 880.0 ± 0% 704.0 ± 0% -20.00% (p=0.000 n=10) TraceStart/with_attributes-12 960.0 ± 0% 784.0 ± 0% -18.33% (p=0.000 n=10) SpanProcessorOnEnd/batch:_10,_spans:_10-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorOnEnd/batch:_10,_spans:_100-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorOnEnd/batch:_100,_spans:_10-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorOnEnd/batch:_100,_spans:_100-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorVerboseLogging-12 10.938Ki ± 0% 9.219Ki ± 0% -15.71% (p=0.000 n=10) SpanLimits/AttributeValueLengthLimit-12 10.76Ki ± 0% 10.59Ki ± 0% -1.60% (p=0.000 n=10) SpanLimits/AttributeCountLimit-12 10.016Ki ± 0% 9.844Ki ± 0% -1.72% (p=0.000 n=10) SpanLimits/EventCountLimit-12 9.594Ki ± 0% 9.422Ki ± 0% -1.79% (p=0.000 n=10) SpanLimits/LinkCountLimit-12 9.203Ki ± 0% 9.031Ki ± 0% -1.87% (p=0.000 n=10) SpanLimits/AttributePerEventCountLimit-12 10.64Ki ± 0% 10.47Ki ± 0% -1.62% (p=0.000 n=10) SpanLimits/AttributePerLinkCountLimit-12 10.64Ki ± 0% 10.47Ki ± 0% -1.62% (p=0.000 n=10) SpanSetAttributesOverCapacity-12 768.0 ± 0% 592.0 ± 0% -22.92% (p=0.000 n=10) StartEndSpan/AlwaysSample-12 704.0 ± 0% 528.0 ± 0% -25.00% (p=0.000 n=10) StartEndSpan/NeverSample-12 144.0 ± 0% 144.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_4/AlwaysSample-12 1.188Ki ± 0% 1.016Ki ± 0% -14.47% (p=0.000 n=10) SpanWithAttributes_4/NeverSample-12 400.0 ± 0% 400.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_8/AlwaysSample-12 1.688Ki ± 0% 1.516Ki ± 0% -10.19% (p=0.000 n=10) SpanWithAttributes_8/NeverSample-12 656.0 ± 0% 656.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_all/AlwaysSample-12 1.312Ki ± 0% 1.141Ki ± 0% -13.10% (p=0.000 n=10) SpanWithAttributes_all/NeverSample-12 464.0 ± 0% 464.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_all_2x/AlwaysSample-12 2.062Ki ± 0% 1.891Ki ± 0% -8.33% (p=0.000 n=10) SpanWithAttributes_all_2x/NeverSample-12 848.0 ± 0% 848.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_4/AlwaysSample-12 1.188Ki ± 0% 1.016Ki ± 0% -14.47% (p=0.000 n=10) SpanWithEvents_4/NeverSample-12 144.0 ± 0% 144.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_8/AlwaysSample-12 1.812Ki ± 0% 1.641Ki ± 0% -9.48% (p=0.000 n=10) SpanWithEvents_8/NeverSample-12 144.0 ± 0% 144.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_WithStackTrace/AlwaysSample-12 800.0 ± 0% 624.0 ± 0% -22.00% (p=0.000 n=10) SpanWithEvents_WithStackTrace/NeverSample-12 160.0 ± 0% 160.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_WithTimestamp/AlwaysSample-12 824.0 ± 0% 648.0 ± 0% -21.36% (p=0.000 n=10) SpanWithEvents_WithTimestamp/NeverSample-12 184.0 ± 0% 184.0 ± 0% ~ (p=1.000 n=10) ¹ TraceID_DotString-12 64.00 ± 0% 64.00 ± 0% ~ (p=1.000 n=10) ¹ SpanID_DotString-12 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² -7.15% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ 1727937489-attr-old.txt │ evic-new1728018933.txt │ │ allocs/op │ allocs/op vs base │ RecordingSpanSetAttributes/WithLimit/false-12 15.000 ± 0% 3.000 ± 0% -80.00% (p=0.000 n=10) RecordingSpanSetAttributes/WithLimit/true-12 20.000 ± 0% 8.000 ± 0% -60.00% (p=0.000 n=10) SpanEnd-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ TraceStart/with_a_simple_span-12 14.000 ± 0% 2.000 ± 0% -85.71% (p=0.000 n=10) TraceStart/with_several_links-12 15.000 ± 0% 3.000 ± 0% -80.00% (p=0.000 n=10) TraceStart/with_attributes-12 16.000 ± 0% 4.000 ± 0% -75.00% (p=0.000 n=10) SpanProcessorOnEnd/batch:_10,_spans:_10-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorOnEnd/batch:_10,_spans:_100-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorOnEnd/batch:_100,_spans:_10-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorOnEnd/batch:_100,_spans:_100-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorVerboseLogging-12 155.00 ± 0% 35.00 ± 0% -77.42% (p=0.000 n=10) SpanLimits/AttributeValueLengthLimit-12 54.00 ± 0% 42.00 ± 0% -22.22% (p=0.000 n=10) SpanLimits/AttributeCountLimit-12 50.00 ± 0% 38.00 ± 0% -24.00% (p=0.000 n=10) SpanLimits/EventCountLimit-12 47.00 ± 0% 35.00 ± 0% -25.53% (p=0.000 n=10) SpanLimits/LinkCountLimit-12 47.00 ± 0% 35.00 ± 0% -25.53% (p=0.000 n=10) SpanLimits/AttributePerEventCountLimit-12 50.00 ± 0% 38.00 ± 0% -24.00% (p=0.000 n=10) SpanLimits/AttributePerLinkCountLimit-12 50.00 ± 0% 38.00 ± 0% -24.00% (p=0.000 n=10) SpanSetAttributesOverCapacity-12 15.000 ± 0% 3.000 ± 0% -80.00% (p=0.000 n=10) StartEndSpan/AlwaysSample-12 14.000 ± 0% 2.000 ± 0% -85.71% (p=0.000 n=10) StartEndSpan/NeverSample-12 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_4/AlwaysSample-12 16.000 ± 0% 4.000 ± 0% -75.00% (p=0.000 n=10) SpanWithAttributes_4/NeverSample-12 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_8/AlwaysSample-12 16.000 ± 0% 4.000 ± 0% -75.00% (p=0.000 n=10) SpanWithAttributes_8/NeverSample-12 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_all/AlwaysSample-12 16.000 ± 0% 4.000 ± 0% -75.00% (p=0.000 n=10) SpanWithAttributes_all/NeverSample-12 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_all_2x/AlwaysSample-12 16.000 ± 0% 4.000 ± 0% -75.00% (p=0.000 n=10) SpanWithAttributes_all_2x/NeverSample-12 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_4/AlwaysSample-12 17.000 ± 0% 5.000 ± 0% -70.59% (p=0.000 n=10) SpanWithEvents_4/NeverSample-12 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_8/AlwaysSample-12 18.000 ± 0% 6.000 ± 0% -66.67% (p=0.000 n=10) SpanWithEvents_8/NeverSample-12 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_WithStackTrace/AlwaysSample-12 16.000 ± 0% 4.000 ± 0% -75.00% (p=0.000 n=10) SpanWithEvents_WithStackTrace/NeverSample-12 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_WithTimestamp/AlwaysSample-12 17.000 ± 0% 5.000 ± 0% -70.59% (p=0.000 n=10) SpanWithEvents_WithTimestamp/NeverSample-12 4.000 ± 0% 4.000 ± 0% ~ (p=1.000 n=10) ¹ TraceID_DotString-12 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) ¹ SpanID_DotString-12 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² -47.73% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ``` --------- Co-authored-by: Damien Mathieu <42@dmathieu.com> Co-authored-by: Robert Pająk --- CHANGELOG.md | 1 + sdk/trace/evictedqueue.go | 21 +++++++++++++-------- sdk/trace/evictedqueue_test.go | 23 +++++++++++++++++------ 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0b871ebc..68a8a9d7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - `FilterProcessor.Enabled` in `go.opentelemetry.io/otel/sdk/log/internal/x` now accepts `EnabledParameters` instead of `Record`. (#5791) - The `Record` type in `go.opentelemetry.io/otel/log` is no longer comparable. (#5847) - Performance improvements for the trace SDK `SetAttributes` method in `Span`. (#5864) +- Reduce memory allocations for the `Event` and `Link` lists in `Span`. (#5858) ### Deprecated diff --git a/sdk/trace/evictedqueue.go b/sdk/trace/evictedqueue.go index 821c83faa..8c308dd60 100644 --- a/sdk/trace/evictedqueue.go +++ b/sdk/trace/evictedqueue.go @@ -12,25 +12,26 @@ import ( // evictedQueue is a FIFO queue with a configurable capacity. type evictedQueue[T any] struct { - queue []T - capacity int - droppedCount int - logDropped func() + queue []T + capacity int + droppedCount int + logDroppedMsg string + logDroppedOnce sync.Once } func newEvictedQueueEvent(capacity int) evictedQueue[Event] { // Do not pre-allocate queue, do this lazily. return evictedQueue[Event]{ - capacity: capacity, - logDropped: sync.OnceFunc(func() { global.Warn("limit reached: dropping trace trace.Event") }), + capacity: capacity, + logDroppedMsg: "limit reached: dropping trace trace.Event", } } func newEvictedQueueLink(capacity int) evictedQueue[Link] { // Do not pre-allocate queue, do this lazily. return evictedQueue[Link]{ - capacity: capacity, - logDropped: sync.OnceFunc(func() { global.Warn("limit reached: dropping trace trace.Link") }), + capacity: capacity, + logDroppedMsg: "limit reached: dropping trace trace.Link", } } @@ -53,6 +54,10 @@ func (eq *evictedQueue[T]) add(value T) { eq.queue = append(eq.queue, value) } +func (eq *evictedQueue[T]) logDropped() { + eq.logDroppedOnce.Do(func() { global.Warn(eq.logDroppedMsg) }) +} + // copy returns a copy of the evictedQueue. func (eq *evictedQueue[T]) copy() []T { return slices.Clone(eq.queue) diff --git a/sdk/trace/evictedqueue_test.go b/sdk/trace/evictedqueue_test.go index 7b88d63d0..662693958 100644 --- a/sdk/trace/evictedqueue_test.go +++ b/sdk/trace/evictedqueue_test.go @@ -7,7 +7,11 @@ import ( "reflect" "testing" + "github.com/go-logr/logr" + "github.com/go-logr/logr/funcr" "github.com/stretchr/testify/assert" + + "go.opentelemetry.io/otel/internal/global" ) func init() { @@ -36,18 +40,25 @@ func TestCopy(t *testing.T) { func TestDropCount(t *testing.T) { q := newEvictedQueueEvent(3) - var called bool - q.logDropped = func() { called = true } + + var called int + t.Cleanup(func(l logr.Logger) func() { + return func() { global.SetLogger(l) } + }(global.GetLogger())) + global.SetLogger(funcr.New(func(prefix, args string) { + called++ + }, funcr.Options{Verbosity: 1})) q.add(Event{Name: "value1"}) - assert.False(t, called, `"value1" logged as dropped`) + assert.Equal(t, 0, called, `"value1" logged as dropped`) q.add(Event{Name: "value2"}) - assert.False(t, called, `"value2" logged as dropped`) + assert.Equal(t, 0, called, `"value2" logged as dropped`) q.add(Event{Name: "value3"}) - assert.False(t, called, `"value3" logged as dropped`) + assert.Equal(t, 0, called, `"value3" logged as dropped`) q.add(Event{Name: "value1"}) - assert.True(t, called, `"value2" not logged as dropped`) + assert.Equal(t, 1, called, `"value2" not logged as dropped`) q.add(Event{Name: "value4"}) + assert.Equal(t, 1, called, `"value4" logged as dropped`) if wantLen, gotLen := 3, len(q.queue); wantLen != gotLen { t.Errorf("got queue length %d want %d", gotLen, wantLen) }