1
0
mirror of https://github.com/open-telemetry/opentelemetry-go.git synced 2024-12-12 10:04:29 +02:00
opentelemetry-go/sdk/metric/sdk.go
Joshua MacDonald 9878f3b700 Metrics stdout export pipeline (#265)
* Add MetricAggregator.Merge() implementations

* Update from feedback

* Type

* Ckpt

* Ckpt

* Add push controller

* Ckpt

* Add aggregator interfaces, stdout encoder

* Modify basic main.go

* Main is working

* Batch stdout output

* Sum udpate

* Rename stdout

* Add stateless/stateful Batcher options

* Undo a for-loop in the example, remove a done TODO

* Update imports

* Add note

* Rename defaultkeys

* Support variable label encoder to speed OpenMetrics/Statsd export

* Lint

* Doc

* Precommit/lint

* Simplify Aggregator API

* Record->Identifier

* Remove export.Record a.k.a. Identifier

* Checkpoint

* Propagate errors to the SDK, remove a bunch of 'TODO warn'

* Checkpoint

* Introduce export.Labels

* Comments in export/metric.go

* Comment

* More merge

* More doc

* Complete example

* Lint fixes

* Add a testable example

* Lint

* Let Export return an error

* add a basic stdout exporter test

* Add measure test; fix aggregator APIs

* Use JSON numbers, not strings

* Test stdout exporter error

* Add a test for the call to RangeTest

* Add error handler API to improve correctness test; return errors from RecordOne

* Undo the previous -- do not expose errors

* Add simple selector variations, test

* Repair examples

* Test push controller error handling

* Add SDK label encoder tests

* Add a defaultkeys batcher test

* Add an ungrouped batcher test

* Lint new tests

* Respond to krnowak's feedback

* Undo comment

* Use concrete receivers for export records and labels, since the constructors return structs not pointers

* Bug fix for stateful batchers; clone an aggregator for long term storage

* Remove TODO addressed in #318

* Add errors to all aggregator interfaces

* Handle ErrNoLastValue case in stdout exporter

* Move aggregator API into sdk/export/metric/aggregator

* Update all aggregator exported-method comments

* Document the aggregator APIs

* More aggregator comments

* Add multiple updates to the ungrouped test

* Fixes for feedback from Gustavo and Liz

* Producer->CheckpointSet; add FinishedCollection

* Process takes an export.Record

* ReadCheckpoint->CheckpointSet

* EncodeLabels->Encode

* Format a better inconsistent type error; add more aggregator API tests

* More RangeTest test coverage

* Make benbjohnson/clock a test-only dependency

* Handle ErrNoLastValue in stress_test
2019-11-15 13:01:20 -08:00

498 lines
14 KiB
Go

// Copyright 2019, OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metric
import (
"context"
"fmt"
"os"
"sort"
"sync"
"sync/atomic"
"unsafe"
"go.opentelemetry.io/otel/api/core"
"go.opentelemetry.io/otel/api/metric"
api "go.opentelemetry.io/otel/api/metric"
export "go.opentelemetry.io/otel/sdk/export/metric"
"go.opentelemetry.io/otel/sdk/export/metric/aggregator"
)
type (
// SDK implements the OpenTelemetry Meter API. The SDK is
// bound to a single export.Batcher in `New()`.
//
// The SDK supports a Collect() API to gather and export
// current data. Collect() should be arranged according to
// the batcher model. Push-based batchers will setup a
// timer to call Collect() periodically. Pull-based batchers
// will call Collect() when a pull request arrives.
SDK struct {
// current maps `mapkey` to *record.
current sync.Map
// empty is the (singleton) result of Labels()
// w/ zero arguments.
empty labels
// records is the head of both the primary and the
// reclaim records lists.
records doublePtr
// currentEpoch is the current epoch number. It is
// incremented in `Collect()`.
currentEpoch int64
// batcher is the configured batcher+configuration.
batcher export.Batcher
// lencoder determines how labels are uniquely encoded.
labelEncoder export.LabelEncoder
// collectLock prevents simultaneous calls to Collect().
collectLock sync.Mutex
// errorHandler supports delivering errors to the user.
errorHandler ErrorHandler
}
instrument struct {
descriptor *export.Descriptor
meter *SDK
}
// sortedLabels are used to de-duplicate and canonicalize labels.
sortedLabels []core.KeyValue
// labels implements the OpenTelemetry LabelSet API,
// represents an internalized set of labels that may be used
// repeatedly.
labels struct {
meter *SDK
sorted sortedLabels
encoded string
}
// mapkey uniquely describes a metric instrument in terms of
// its InstrumentID and the encoded form of its LabelSet.
mapkey struct {
descriptor *export.Descriptor
encoded string
}
// record maintains the state of one metric instrument. Due
// the use of lock-free algorithms, there may be more than one
// `record` in existence at a time, although at most one can
// be referenced from the `SDK.current` map.
record struct {
// labels is the LabelSet passed by the user.
labels *labels
// descriptor describes the metric instrument.
descriptor *export.Descriptor
// refcount counts the number of active handles on
// referring to this record. active handles prevent
// removing the record from the current map.
refcount int64
// collectedEpoch is the epoch number for which this
// record has been exported. This is modified by the
// `Collect()` method.
collectedEpoch int64
// modifiedEpoch is the latest epoch number for which
// this record was updated. Generally, if
// modifiedEpoch is less than collectedEpoch, this
// record is due for reclaimation.
modifiedEpoch int64
// reclaim is an atomic to control the start of reclaiming.
reclaim int64
// recorder implements the actual RecordOne() API,
// depending on the type of aggregation. If nil, the
// metric was disabled by the exporter.
recorder export.Aggregator
// next contains the next pointer for both the primary
// and the reclaim lists.
next doublePtr
}
ErrorHandler func(error)
// singlePointer wraps an unsafe.Pointer and supports basic
// load(), store(), clear(), and swapNil() operations.
singlePtr struct {
ptr unsafe.Pointer
}
// doublePtr is used for the head and next links of two lists.
doublePtr struct {
primary singlePtr
reclaim singlePtr
}
)
var (
_ api.Meter = &SDK{}
_ api.LabelSet = &labels{}
_ api.InstrumentImpl = &instrument{}
_ api.HandleImpl = &record{}
// hazardRecord is used as a pointer value that indicates the
// value is not included in any list. (`nil` would be
// ambiguous, since the final element in a list has `nil` as
// the next pointer).
hazardRecord = &record{}
)
func (i *instrument) Meter() api.Meter {
return i.meter
}
func (m *SDK) SetErrorHandler(f ErrorHandler) {
m.errorHandler = f
}
func (i *instrument) acquireHandle(ls *labels) *record {
// Create lookup key for sync.Map (one allocation)
mk := mapkey{
descriptor: i.descriptor,
encoded: ls.encoded,
}
if actual, ok := i.meter.current.Load(mk); ok {
// Existing record case, only one allocation so far.
rec := actual.(*record)
atomic.AddInt64(&rec.refcount, 1)
return rec
}
// There's a memory allocation here.
rec := &record{
labels: ls,
descriptor: i.descriptor,
refcount: 1,
collectedEpoch: -1,
modifiedEpoch: 0,
recorder: i.meter.batcher.AggregatorFor(i.descriptor),
}
// Load/Store: there's a memory allocation to place `mk` into
// an interface here.
if actual, loaded := i.meter.current.LoadOrStore(mk, rec); loaded {
// Existing record case.
rec = actual.(*record)
atomic.AddInt64(&rec.refcount, 1)
return rec
}
i.meter.addPrimary(rec)
return rec
}
func (i *instrument) AcquireHandle(ls api.LabelSet) api.HandleImpl {
labs := i.meter.labsFor(ls)
return i.acquireHandle(labs)
}
func (i *instrument) RecordOne(ctx context.Context, number core.Number, ls api.LabelSet) {
ourLs := i.meter.labsFor(ls)
h := i.acquireHandle(ourLs)
defer h.Release()
h.RecordOne(ctx, number)
}
// New constructs a new SDK for the given batcher. This SDK supports
// only a single batcher.
//
// The SDK does not start any background process to collect itself
// periodically, this responsbility lies with the batcher, typically,
// depending on the type of export. For example, a pull-based
// batcher will call Collect() when it receives a request to scrape
// current metric values. A push-based batcher should configure its
// own periodic collection.
func New(batcher export.Batcher, labelEncoder export.LabelEncoder) *SDK {
m := &SDK{
batcher: batcher,
labelEncoder: labelEncoder,
errorHandler: DefaultErrorHandler,
}
m.empty.meter = m
return m
}
func DefaultErrorHandler(err error) {
fmt.Fprintln(os.Stderr, "Metrics SDK error:", err)
}
// Labels returns a LabelSet corresponding to the arguments. Passed
// labels are de-duplicated, with last-value-wins semantics.
func (m *SDK) Labels(kvs ...core.KeyValue) api.LabelSet {
// Note: This computes a canonical encoding of the labels to
// use as a map key. It happens to use the encoding used by
// statsd for labels, allowing an optimization for statsd
// batchers. This could be made configurable in the
// constructor, to support the same optimization for different
// batchers.
// Check for empty set.
if len(kvs) == 0 {
return &m.empty
}
ls := &labels{
meter: m,
sorted: kvs,
}
// Sort and de-duplicate.
sort.Stable(&ls.sorted)
oi := 1
for i := 1; i < len(ls.sorted); i++ {
if ls.sorted[i-1].Key == ls.sorted[i].Key {
ls.sorted[oi-1] = ls.sorted[i]
continue
}
ls.sorted[oi] = ls.sorted[i]
oi++
}
ls.sorted = ls.sorted[0:oi]
ls.encoded = m.labelEncoder.Encode(ls.sorted)
return ls
}
// labsFor sanitizes the input LabelSet. The input will be rejected
// if it was created by another Meter instance, for example.
func (m *SDK) labsFor(ls api.LabelSet) *labels {
if l, _ := ls.(*labels); l != nil && l.meter == m {
return l
}
return &m.empty
}
func (m *SDK) newInstrument(name string, metricKind export.MetricKind, numberKind core.NumberKind, opts *api.Options) *instrument {
descriptor := export.NewDescriptor(
name,
metricKind,
opts.Keys,
opts.Description,
opts.Unit,
numberKind,
opts.Alternate)
return &instrument{
descriptor: descriptor,
meter: m,
}
}
func (m *SDK) newCounterInstrument(name string, numberKind core.NumberKind, cos ...api.CounterOptionApplier) *instrument {
opts := api.Options{}
api.ApplyCounterOptions(&opts, cos...)
return m.newInstrument(name, export.CounterKind, numberKind, &opts)
}
func (m *SDK) newGaugeInstrument(name string, numberKind core.NumberKind, gos ...api.GaugeOptionApplier) *instrument {
opts := api.Options{}
api.ApplyGaugeOptions(&opts, gos...)
return m.newInstrument(name, export.GaugeKind, numberKind, &opts)
}
func (m *SDK) newMeasureInstrument(name string, numberKind core.NumberKind, mos ...api.MeasureOptionApplier) *instrument {
opts := api.Options{}
api.ApplyMeasureOptions(&opts, mos...)
return m.newInstrument(name, export.MeasureKind, numberKind, &opts)
}
func (m *SDK) NewInt64Counter(name string, cos ...api.CounterOptionApplier) api.Int64Counter {
return api.WrapInt64CounterInstrument(m.newCounterInstrument(name, core.Int64NumberKind, cos...))
}
func (m *SDK) NewFloat64Counter(name string, cos ...api.CounterOptionApplier) api.Float64Counter {
return api.WrapFloat64CounterInstrument(m.newCounterInstrument(name, core.Float64NumberKind, cos...))
}
func (m *SDK) NewInt64Gauge(name string, gos ...api.GaugeOptionApplier) api.Int64Gauge {
return api.WrapInt64GaugeInstrument(m.newGaugeInstrument(name, core.Int64NumberKind, gos...))
}
func (m *SDK) NewFloat64Gauge(name string, gos ...api.GaugeOptionApplier) api.Float64Gauge {
return api.WrapFloat64GaugeInstrument(m.newGaugeInstrument(name, core.Float64NumberKind, gos...))
}
func (m *SDK) NewInt64Measure(name string, mos ...api.MeasureOptionApplier) api.Int64Measure {
return api.WrapInt64MeasureInstrument(m.newMeasureInstrument(name, core.Int64NumberKind, mos...))
}
func (m *SDK) NewFloat64Measure(name string, mos ...api.MeasureOptionApplier) api.Float64Measure {
return api.WrapFloat64MeasureInstrument(m.newMeasureInstrument(name, core.Float64NumberKind, mos...))
}
// saveFromReclaim puts a record onto the "reclaim" list when it
// detects an attempt to delete the record while it is still in use.
func (m *SDK) saveFromReclaim(rec *record) {
for {
reclaimed := atomic.LoadInt64(&rec.reclaim)
if reclaimed != 0 {
return
}
if atomic.CompareAndSwapInt64(&rec.reclaim, 0, 1) {
break
}
}
m.addReclaim(rec)
}
// Collect traverses the list of active records and exports data for
// each active instrument. Collect() may not be called concurrently.
//
// During the collection pass, the export.Batcher will receive
// one Export() call per current aggregation.
//
// Returns the number of records that were checkpointed.
func (m *SDK) Collect(ctx context.Context) int {
m.collectLock.Lock()
defer m.collectLock.Unlock()
checkpointed := 0
var next *record
for inuse := m.records.primary.swapNil(); inuse != nil; inuse = next {
next = inuse.next.primary.load()
refcount := atomic.LoadInt64(&inuse.refcount)
if refcount > 0 {
checkpointed += m.checkpoint(ctx, inuse)
m.addPrimary(inuse)
continue
}
modified := atomic.LoadInt64(&inuse.modifiedEpoch)
collected := atomic.LoadInt64(&inuse.collectedEpoch)
checkpointed += m.checkpoint(ctx, inuse)
if modified >= collected {
atomic.StoreInt64(&inuse.collectedEpoch, m.currentEpoch)
m.addPrimary(inuse)
continue
}
// Remove this entry.
m.current.Delete(inuse.mapkey())
inuse.next.primary.store(hazardRecord)
}
for chances := m.records.reclaim.swapNil(); chances != nil; chances = next {
atomic.StoreInt64(&chances.collectedEpoch, m.currentEpoch)
next = chances.next.reclaim.load()
chances.next.reclaim.clear()
atomic.StoreInt64(&chances.reclaim, 0)
if chances.next.primary.load() == hazardRecord {
checkpointed += m.checkpoint(ctx, chances)
m.addPrimary(chances)
}
}
m.currentEpoch++
return checkpointed
}
func (m *SDK) checkpoint(ctx context.Context, r *record) int {
if r.recorder == nil {
return 0
}
r.recorder.Checkpoint(ctx, r.descriptor)
labels := export.NewLabels(r.labels.sorted, r.labels.encoded, m.labelEncoder)
err := m.batcher.Process(ctx, export.NewRecord(r.descriptor, labels, r.recorder))
if err != nil {
m.errorHandler(err)
}
return 1
}
// RecordBatch enters a batch of metric events.
func (m *SDK) RecordBatch(ctx context.Context, ls api.LabelSet, measurements ...api.Measurement) {
for _, meas := range measurements {
meas.InstrumentImpl().RecordOne(ctx, meas.Number(), ls)
}
}
// GetDescriptor returns the descriptor of an instrument, which is not
// part of the public metric API.
func (m *SDK) GetDescriptor(inst metric.InstrumentImpl) *export.Descriptor {
if ii, ok := inst.(*instrument); ok {
return ii.descriptor
}
return nil
}
func (r *record) RecordOne(ctx context.Context, number core.Number) {
if r.recorder == nil {
// The instrument is disabled according to the AggregationSelector.
return
}
if err := aggregator.RangeTest(number, r.descriptor); err != nil {
r.labels.meter.errorHandler(err)
return
}
if err := r.recorder.Update(ctx, number, r.descriptor); err != nil {
r.labels.meter.errorHandler(err)
return
}
}
func (r *record) Release() {
for {
collected := atomic.LoadInt64(&r.collectedEpoch)
modified := atomic.LoadInt64(&r.modifiedEpoch)
updated := collected + 1
if modified == updated {
// No change
break
}
if !atomic.CompareAndSwapInt64(&r.modifiedEpoch, modified, updated) {
continue
}
if modified < collected {
// This record could have been reclaimed.
r.labels.meter.saveFromReclaim(r)
}
break
}
_ = atomic.AddInt64(&r.refcount, -1)
}
func (r *record) mapkey() mapkey {
return mapkey{
descriptor: r.descriptor,
encoded: r.labels.encoded,
}
}