1
0
mirror of https://github.com/open-telemetry/opentelemetry-go.git synced 2025-06-06 23:16:14 +02:00
opentelemetry-go/sdk/trace/batch_span_processor.go
Johannes Liebermann c3c4273ecc
Add RO/RW span interfaces (#1360)
* Store span data directly in the span

- Nesting only some of a span's data in a `data` field (with the rest
  of the data living direclty in the `span` struct) is confusing.
- export.SpanData is meant to be an immutable *snapshot* of a span,
  not the "authoritative" state of the span.
- Refactor attributesMap.toSpanData into toKeyValue and make it
  return a []label.KeyValue which is clearer than modifying a struct
  passed to the function.
- Read droppedCount from the attributesMap as a separate operation
  instead of setting it from within attributesMap.toSpanData.
- Set a span's end time in the span itself rather than in the
  SpanData to allow reading the span's end time after a span has
  ended.
- Set a span's end time as soon as possible within span.End so that
  we don't influence the span's end time with operations such as
  fetching span processors and generating span data.
- Remove error handling for uninitialized spans. This check seems to
  be necessary only because we used to have an *export.SpanData field
  which could be nil. Now that we no longer have this field I think we
  can safely remove the check. The error isn't used anywhere else so
  remove it, too.

* Store parent as trace.SpanContext

The spec requires that the parent field of a Span be a Span, a
SpanContext or null.

Rather than extracting the parent's span ID from the trace.SpanContext
which we get from the tracer, store the trace.SpanContext as is and
explicitly extract the parent's span ID where necessary.

* Add ReadOnlySpan interface

Use this interface instead of export.SpanData in places where reading
information from a span is necessary. Use export.SpanData only when
exporting spans.

* Add ReadWriteSpan interface

Use this interface instead of export.SpanData in places where it is
necessary to read information from a span and write to it at the same
time.

* Rename export.SpanData to SpanSnapshot

SpanSnapshot represents the nature of this type as well as its
intended use more accurately.

Clarify the purpose of SpanSnapshot in the docs and emphasize what
should and should not be done with it.

* Rephrase attributesMap doc comment

"refreshes" is wrong for plural ("updates").

* Refactor span.End()

- Improve accuracy of span duration. Record span end time ASAP. We
  want to measure a user operation as accurately as possible, which
  means we want to mark the end time of a span as soon as possible
  after span.End() is called. Any operations we do inside span.End()
  before storing the end time affect the total duration of the span,
  and although these operations are rather fast at the moment they
  still seem to affect the duration of the span by "artificially"
  adding time between the start and end timestamps. This is relevant
  only in cases where the end time isn't explicitly specified.
- Remove redundant idempotence check. Now that IsRecording() is based
  on the value of span.endTime, IsRecording() will always return
  false after span.End() had been called because span.endTime won't
  be zero. This means we no longer need span.endOnce.
- Improve TestEndSpanTwice so that it also ensures subsequent calls
  to span.End() don't modify the span's end time.

* Update changelog

Co-authored-by: Tyler Yahn <codingalias@gmail.com>
Co-authored-by: Tyler Yahn <MrAlias@users.noreply.github.com>
2020-12-10 21:15:44 -08:00

280 lines
7.1 KiB
Go

// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package trace // import "go.opentelemetry.io/otel/sdk/trace"
import (
"context"
"runtime"
"sync"
"sync/atomic"
"time"
"go.opentelemetry.io/otel"
export "go.opentelemetry.io/otel/sdk/export/trace"
)
const (
DefaultMaxQueueSize = 2048
DefaultBatchTimeout = 5000 * time.Millisecond
DefaultMaxExportBatchSize = 512
)
type BatchSpanProcessorOption func(o *BatchSpanProcessorOptions)
type BatchSpanProcessorOptions struct {
// MaxQueueSize is the maximum queue size to buffer spans for delayed processing. If the
// queue gets full it drops the spans. Use BlockOnQueueFull to change this behavior.
// The default value of MaxQueueSize is 2048.
MaxQueueSize int
// BatchTimeout is the maximum duration for constructing a batch. Processor
// forcefully sends available spans when timeout is reached.
// The default value of BatchTimeout is 5000 msec.
BatchTimeout time.Duration
// MaxExportBatchSize is the maximum number of spans to process in a single batch.
// If there are more than one batch worth of spans then it processes multiple batches
// of spans one batch after the other without any delay.
// The default value of MaxExportBatchSize is 512.
MaxExportBatchSize int
// BlockOnQueueFull blocks onEnd() and onStart() method if the queue is full
// AND if BlockOnQueueFull is set to true.
// Blocking option should be used carefully as it can severely affect the performance of an
// application.
BlockOnQueueFull bool
}
// BatchSpanProcessor is a SpanProcessor that batches asynchronously-received
// SpanSnapshots and sends them to a trace.Exporter when complete.
type BatchSpanProcessor struct {
e export.SpanExporter
o BatchSpanProcessorOptions
queue chan *export.SpanSnapshot
dropped uint32
batch []*export.SpanSnapshot
batchMutex sync.Mutex
timer *time.Timer
stopWait sync.WaitGroup
stopOnce sync.Once
stopCh chan struct{}
}
var _ SpanProcessor = (*BatchSpanProcessor)(nil)
// NewBatchSpanProcessor creates a new BatchSpanProcessor that will send
// SpanSnapshot batches to the exporters with the supplied options.
//
// The returned BatchSpanProcessor needs to be registered with the SDK using
// the RegisterSpanProcessor method for it to process spans.
//
// If the exporter is nil, the span processor will preform no action.
func NewBatchSpanProcessor(exporter export.SpanExporter, options ...BatchSpanProcessorOption) *BatchSpanProcessor {
o := BatchSpanProcessorOptions{
BatchTimeout: DefaultBatchTimeout,
MaxQueueSize: DefaultMaxQueueSize,
MaxExportBatchSize: DefaultMaxExportBatchSize,
}
for _, opt := range options {
opt(&o)
}
bsp := &BatchSpanProcessor{
e: exporter,
o: o,
batch: make([]*export.SpanSnapshot, 0, o.MaxExportBatchSize),
timer: time.NewTimer(o.BatchTimeout),
queue: make(chan *export.SpanSnapshot, o.MaxQueueSize),
stopCh: make(chan struct{}),
}
bsp.stopWait.Add(1)
go func() {
defer bsp.stopWait.Done()
bsp.processQueue()
bsp.drainQueue()
}()
return bsp
}
// OnStart method does nothing.
func (bsp *BatchSpanProcessor) OnStart(parent context.Context, s ReadWriteSpan) {}
// OnEnd method enqueues a ReadOnlySpan for later processing.
func (bsp *BatchSpanProcessor) OnEnd(s ReadOnlySpan) {
// Do not enqueue spans if we are just going to drop them.
if bsp.e == nil {
return
}
bsp.enqueue(s.Snapshot())
}
// Shutdown flushes the queue and waits until all spans are processed.
// It only executes once. Subsequent call does nothing.
func (bsp *BatchSpanProcessor) Shutdown(ctx context.Context) error {
var err error
bsp.stopOnce.Do(func() {
wait := make(chan struct{})
go func() {
close(bsp.stopCh)
bsp.stopWait.Wait()
close(wait)
}()
// Wait until the wait group is done or the context is cancelled
select {
case <-wait:
case <-ctx.Done():
err = ctx.Err()
}
})
return err
}
// ForceFlush exports all ended spans that have not yet been exported.
func (bsp *BatchSpanProcessor) ForceFlush() {
bsp.exportSpans()
}
func WithMaxQueueSize(size int) BatchSpanProcessorOption {
return func(o *BatchSpanProcessorOptions) {
o.MaxQueueSize = size
}
}
func WithMaxExportBatchSize(size int) BatchSpanProcessorOption {
return func(o *BatchSpanProcessorOptions) {
o.MaxExportBatchSize = size
}
}
func WithBatchTimeout(delay time.Duration) BatchSpanProcessorOption {
return func(o *BatchSpanProcessorOptions) {
o.BatchTimeout = delay
}
}
func WithBlocking() BatchSpanProcessorOption {
return func(o *BatchSpanProcessorOptions) {
o.BlockOnQueueFull = true
}
}
// exportSpans is a subroutine of processing and draining the queue.
func (bsp *BatchSpanProcessor) exportSpans() {
bsp.timer.Reset(bsp.o.BatchTimeout)
bsp.batchMutex.Lock()
defer bsp.batchMutex.Unlock()
if len(bsp.batch) > 0 {
if err := bsp.e.ExportSpans(context.Background(), bsp.batch); err != nil {
otel.Handle(err)
}
bsp.batch = bsp.batch[:0]
}
}
// processQueue removes spans from the `queue` channel until processor
// is shut down. It calls the exporter in batches of up to MaxExportBatchSize
// waiting up to BatchTimeout to form a batch.
func (bsp *BatchSpanProcessor) processQueue() {
defer bsp.timer.Stop()
for {
select {
case <-bsp.stopCh:
return
case <-bsp.timer.C:
bsp.exportSpans()
case sd := <-bsp.queue:
bsp.batchMutex.Lock()
bsp.batch = append(bsp.batch, sd)
shouldExport := len(bsp.batch) == bsp.o.MaxExportBatchSize
bsp.batchMutex.Unlock()
if shouldExport {
if !bsp.timer.Stop() {
<-bsp.timer.C
}
bsp.exportSpans()
}
}
}
}
// drainQueue awaits the any caller that had added to bsp.stopWait
// to finish the enqueue, then exports the final batch.
func (bsp *BatchSpanProcessor) drainQueue() {
for {
select {
case sd := <-bsp.queue:
if sd == nil {
bsp.exportSpans()
return
}
bsp.batchMutex.Lock()
bsp.batch = append(bsp.batch, sd)
shouldExport := len(bsp.batch) == bsp.o.MaxExportBatchSize
bsp.batchMutex.Unlock()
if shouldExport {
bsp.exportSpans()
}
default:
close(bsp.queue)
}
}
}
func (bsp *BatchSpanProcessor) enqueue(sd *export.SpanSnapshot) {
if !sd.SpanContext.IsSampled() {
return
}
// This ensures the bsp.queue<- below does not panic as the
// processor shuts down.
defer func() {
x := recover()
switch err := x.(type) {
case nil:
return
case runtime.Error:
if err.Error() == "send on closed channel" {
return
}
}
panic(x)
}()
select {
case <-bsp.stopCh:
return
default:
}
if bsp.o.BlockOnQueueFull {
bsp.queue <- sd
return
}
select {
case bsp.queue <- sd:
default:
atomic.AddUint32(&bsp.dropped, 1)
}
}