mirror of
https://github.com/open-telemetry/opentelemetry-go.git
synced 2025-01-12 02:28:07 +02:00
31661dd6be
* Centralize the retry package for the otlp exporters * Add dependabot config * Use unified internal/retry module in otlp exporters * Remove otlpmetric and otlptrace internal/retry module * Run go mod tidy in otlp{metric,trace}
276 lines
8.3 KiB
Go
276 lines
8.3 KiB
Go
// Copyright The OpenTelemetry Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package otlpmetricgrpc // import "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"sync"
|
|
"time"
|
|
|
|
"google.golang.org/genproto/googleapis/rpc/errdetails"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/codes"
|
|
"google.golang.org/grpc/metadata"
|
|
"google.golang.org/grpc/status"
|
|
|
|
"go.opentelemetry.io/otel/exporters/otlp/internal/retry"
|
|
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric"
|
|
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/internal/otlpconfig"
|
|
colmetricpb "go.opentelemetry.io/proto/otlp/collector/metrics/v1"
|
|
metricpb "go.opentelemetry.io/proto/otlp/metrics/v1"
|
|
)
|
|
|
|
type client struct {
|
|
endpoint string
|
|
dialOpts []grpc.DialOption
|
|
metadata metadata.MD
|
|
exportTimeout time.Duration
|
|
requestFunc retry.RequestFunc
|
|
|
|
// stopCtx is used as a parent context for all exports. Therefore, when it
|
|
// is canceled with the stopFunc all exports are canceled.
|
|
stopCtx context.Context
|
|
// stopFunc cancels stopCtx, stopping any active exports.
|
|
stopFunc context.CancelFunc
|
|
|
|
// ourConn keeps track of where conn was created: true if created here on
|
|
// Start, or false if passed with an option. This is important on Shutdown
|
|
// as the conn should only be closed if created here on start. Otherwise,
|
|
// it is up to the processes that passed the conn to close it.
|
|
ourConn bool
|
|
conn *grpc.ClientConn
|
|
mscMu sync.RWMutex
|
|
msc colmetricpb.MetricsServiceClient
|
|
}
|
|
|
|
// Compile time check *client implements otlpmetric.Client.
|
|
var _ otlpmetric.Client = (*client)(nil)
|
|
|
|
// NewClient creates a new gRPC metric client.
|
|
func NewClient(opts ...Option) otlpmetric.Client {
|
|
return newClient(opts...)
|
|
}
|
|
|
|
func newClient(opts ...Option) *client {
|
|
cfg := otlpconfig.NewGRPCConfig(asGRPCOptions(opts)...)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
c := &client{
|
|
endpoint: cfg.Metrics.Endpoint,
|
|
exportTimeout: cfg.Metrics.Timeout,
|
|
requestFunc: cfg.RetryConfig.RequestFunc(retryable),
|
|
dialOpts: cfg.DialOptions,
|
|
stopCtx: ctx,
|
|
stopFunc: cancel,
|
|
conn: cfg.GRPCConn,
|
|
}
|
|
|
|
if len(cfg.Metrics.Headers) > 0 {
|
|
c.metadata = metadata.New(cfg.Metrics.Headers)
|
|
}
|
|
|
|
return c
|
|
}
|
|
|
|
// Start establishes a gRPC connection to the collector.
|
|
func (c *client) Start(ctx context.Context) error {
|
|
if c.conn == nil {
|
|
// If the caller did not provide a ClientConn when the client was
|
|
// created, create one using the configuration they did provide.
|
|
conn, err := grpc.DialContext(ctx, c.endpoint, c.dialOpts...)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// Keep track that we own the lifecycle of this conn and need to close
|
|
// it on Shutdown.
|
|
c.ourConn = true
|
|
c.conn = conn
|
|
}
|
|
|
|
// The otlpmetric.Client interface states this method is called just once,
|
|
// so no need to check if already started.
|
|
c.mscMu.Lock()
|
|
c.msc = colmetricpb.NewMetricsServiceClient(c.conn)
|
|
c.mscMu.Unlock()
|
|
|
|
return nil
|
|
}
|
|
|
|
var errAlreadyStopped = errors.New("the client is already stopped")
|
|
|
|
// Stop shuts down the client.
|
|
//
|
|
// Any active connections to a remote endpoint are closed if they were created
|
|
// by the client. Any gRPC connection passed during creation using
|
|
// WithGRPCConn will not be closed. It is the caller's responsibility to
|
|
// handle cleanup of that resource.
|
|
//
|
|
// This method synchronizes with the UploadMetrics method of the client. It
|
|
// will wait for any active calls to that method to complete unimpeded, or it
|
|
// will cancel any active calls if ctx expires. If ctx expires, the context
|
|
// error will be forwarded as the returned error. All client held resources
|
|
// will still be released in this situation.
|
|
//
|
|
// If the client has already stopped, an error will be returned describing
|
|
// this.
|
|
func (c *client) Stop(ctx context.Context) error {
|
|
// Acquire the c.mscMu lock within the ctx lifetime.
|
|
acquired := make(chan struct{})
|
|
go func() {
|
|
c.mscMu.Lock()
|
|
close(acquired)
|
|
}()
|
|
var err error
|
|
select {
|
|
case <-ctx.Done():
|
|
// The Stop timeout is reached. Kill any remaining exports to force
|
|
// the clear of the lock and save the timeout error to return and
|
|
// signal the shutdown timed out before cleanly stopping.
|
|
c.stopFunc()
|
|
err = ctx.Err()
|
|
|
|
// To ensure the client is not left in a dirty state c.msc needs to be
|
|
// set to nil. To avoid the race condition when doing this, ensure
|
|
// that all the exports are killed (initiated by c.stopFunc).
|
|
<-acquired
|
|
case <-acquired:
|
|
}
|
|
// Hold the mscMu lock for the rest of the function to ensure no new
|
|
// exports are started.
|
|
defer c.mscMu.Unlock()
|
|
|
|
// The otlpmetric.Client interface states this method is called only
|
|
// once, but there is no guarantee it is called after Start. Ensure the
|
|
// client is started before doing anything and let the called know if they
|
|
// made a mistake.
|
|
if c.msc == nil {
|
|
return errAlreadyStopped
|
|
}
|
|
|
|
// Clear c.msc to signal the client is stopped.
|
|
c.msc = nil
|
|
|
|
if c.ourConn {
|
|
closeErr := c.conn.Close()
|
|
// A context timeout error takes precedence over this error.
|
|
if err == nil && closeErr != nil {
|
|
err = closeErr
|
|
}
|
|
}
|
|
return err
|
|
}
|
|
|
|
var errShutdown = errors.New("the client is shutdown")
|
|
|
|
// UploadMetrics sends a batch of spans.
|
|
//
|
|
// Retryable errors from the server will be handled according to any
|
|
// RetryConfig the client was created with.
|
|
func (c *client) UploadMetrics(ctx context.Context, protoMetrics []*metricpb.ResourceMetrics) error {
|
|
// Hold a read lock to ensure a shut down initiated after this starts does
|
|
// not abandon the export. This read lock acquire has less priority than a
|
|
// write lock acquire (i.e. Stop), meaning if the client is shutting down
|
|
// this will come after the shut down.
|
|
c.mscMu.RLock()
|
|
defer c.mscMu.RUnlock()
|
|
|
|
if c.msc == nil {
|
|
return errShutdown
|
|
}
|
|
|
|
ctx, cancel := c.exportContext(ctx)
|
|
defer cancel()
|
|
|
|
return c.requestFunc(ctx, func(iCtx context.Context) error {
|
|
_, err := c.msc.Export(iCtx, &colmetricpb.ExportMetricsServiceRequest{
|
|
ResourceMetrics: protoMetrics,
|
|
})
|
|
// nil is converted to OK.
|
|
if status.Code(err) == codes.OK {
|
|
// Success.
|
|
return nil
|
|
}
|
|
return err
|
|
})
|
|
}
|
|
|
|
// exportContext returns a copy of parent with an appropriate deadline and
|
|
// cancellation function.
|
|
//
|
|
// It is the callers responsibility to cancel the returned context once its
|
|
// use is complete, via the parent or directly with the returned CancelFunc, to
|
|
// ensure all resources are correctly released.
|
|
func (c *client) exportContext(parent context.Context) (context.Context, context.CancelFunc) {
|
|
var (
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
)
|
|
|
|
if c.exportTimeout > 0 {
|
|
ctx, cancel = context.WithTimeout(parent, c.exportTimeout)
|
|
} else {
|
|
ctx, cancel = context.WithCancel(parent)
|
|
}
|
|
|
|
if c.metadata.Len() > 0 {
|
|
ctx = metadata.NewOutgoingContext(ctx, c.metadata)
|
|
}
|
|
|
|
// Unify the client stopCtx with the parent.
|
|
go func() {
|
|
select {
|
|
case <-ctx.Done():
|
|
case <-c.stopCtx.Done():
|
|
// Cancel the export as the shutdown has timed out.
|
|
cancel()
|
|
}
|
|
}()
|
|
|
|
return ctx, cancel
|
|
}
|
|
|
|
// retryable returns if err identifies a request that can be retried and a
|
|
// duration to wait for if an explicit throttle time is included in err.
|
|
func retryable(err error) (bool, time.Duration) {
|
|
//func retryable(err error) (bool, time.Duration) {
|
|
s := status.Convert(err)
|
|
switch s.Code() {
|
|
case codes.Canceled,
|
|
codes.DeadlineExceeded,
|
|
codes.ResourceExhausted,
|
|
codes.Aborted,
|
|
codes.OutOfRange,
|
|
codes.Unavailable,
|
|
codes.DataLoss:
|
|
return true, throttleDelay(s)
|
|
}
|
|
|
|
// Not a retry-able error.
|
|
return false, 0
|
|
}
|
|
|
|
// throttleDelay returns a duration to wait for if an explicit throttle time
|
|
// is included in the response status.
|
|
func throttleDelay(status *status.Status) time.Duration {
|
|
for _, detail := range status.Details() {
|
|
if t, ok := detail.(*errdetails.RetryInfo); ok {
|
|
return t.RetryDelay.AsDuration()
|
|
}
|
|
}
|
|
return 0
|
|
}
|