mirror of
https://github.com/open-telemetry/opentelemetry-go.git
synced 2025-01-26 03:52:03 +02:00
31661dd6be
* Centralize the retry package for the otlp exporters * Add dependabot config * Use unified internal/retry module in otlp exporters * Remove otlpmetric and otlptrace internal/retry module * Run go mod tidy in otlp{metric,trace}
151 lines
4.5 KiB
Go
151 lines
4.5 KiB
Go
// Copyright The OpenTelemetry Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Package retry provides request retry functionality that can perform
|
|
// configurable exponential backoff for transient errors and honor any
|
|
// explicit throttle responses received.
|
|
package retry // import "go.opentelemetry.io/otel/exporters/otlp/internal/retry"
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/cenkalti/backoff/v4"
|
|
)
|
|
|
|
// DefaultConfig are the recommended defaults to use.
|
|
var DefaultConfig = Config{
|
|
Enabled: true,
|
|
InitialInterval: 5 * time.Second,
|
|
MaxInterval: 30 * time.Second,
|
|
MaxElapsedTime: time.Minute,
|
|
}
|
|
|
|
// Config defines configuration for retrying batches in case of export failure
|
|
// using an exponential backoff.
|
|
type Config struct {
|
|
// Enabled indicates whether to not retry sending batches in case of
|
|
// export failure.
|
|
Enabled bool
|
|
// InitialInterval the time to wait after the first failure before
|
|
// retrying.
|
|
InitialInterval time.Duration
|
|
// MaxInterval is the upper bound on backoff interval. Once this value is
|
|
// reached the delay between consecutive retries will always be
|
|
// `MaxInterval`.
|
|
MaxInterval time.Duration
|
|
// MaxElapsedTime is the maximum amount of time (including retries) spent
|
|
// trying to send a request/batch. Once this value is reached, the data
|
|
// is discarded.
|
|
MaxElapsedTime time.Duration
|
|
}
|
|
|
|
// RequestFunc wraps a request with retry logic.
|
|
type RequestFunc func(context.Context, func(context.Context) error) error
|
|
|
|
// EvaluateFunc returns if an error is retry-able and if an explicit throttle
|
|
// duration should be honored that was included in the error.
|
|
//
|
|
// The function must return true if the error argument is retry-able,
|
|
// otherwise it must return false for the first return parameter.
|
|
//
|
|
// The function must return a non-zero time.Duration if the error contains
|
|
// explicit throttle duration that should be honored, otherwise it must return
|
|
// a zero valued time.Duration.
|
|
type EvaluateFunc func(error) (bool, time.Duration)
|
|
|
|
// RequestFunc returns a RequestFunc using the evaluate function to determine
|
|
// if requests can be retried and based on the exponential backoff
|
|
// configuration of c.
|
|
func (c Config) RequestFunc(evaluate EvaluateFunc) RequestFunc {
|
|
if !c.Enabled {
|
|
return func(ctx context.Context, fn func(context.Context) error) error {
|
|
return fn(ctx)
|
|
}
|
|
}
|
|
|
|
// Do not use NewExponentialBackOff since it calls Reset and the code here
|
|
// must call Reset after changing the InitialInterval (this saves an
|
|
// unnecessary call to Now).
|
|
b := &backoff.ExponentialBackOff{
|
|
InitialInterval: c.InitialInterval,
|
|
RandomizationFactor: backoff.DefaultRandomizationFactor,
|
|
Multiplier: backoff.DefaultMultiplier,
|
|
MaxInterval: c.MaxInterval,
|
|
MaxElapsedTime: c.MaxElapsedTime,
|
|
Stop: backoff.Stop,
|
|
Clock: backoff.SystemClock,
|
|
}
|
|
b.Reset()
|
|
|
|
return func(ctx context.Context, fn func(context.Context) error) error {
|
|
for {
|
|
err := fn(ctx)
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
|
|
retryable, throttle := evaluate(err)
|
|
if !retryable {
|
|
return err
|
|
}
|
|
|
|
bOff := b.NextBackOff()
|
|
if bOff == backoff.Stop {
|
|
return fmt.Errorf("max retry time elapsed: %w", err)
|
|
}
|
|
|
|
// Wait for the greater of the backoff or throttle delay.
|
|
var delay time.Duration
|
|
if bOff > throttle {
|
|
delay = bOff
|
|
} else {
|
|
elapsed := b.GetElapsedTime()
|
|
if b.MaxElapsedTime != 0 && elapsed+throttle > b.MaxElapsedTime {
|
|
return fmt.Errorf("max retry time would elapse: %w", err)
|
|
}
|
|
delay = throttle
|
|
}
|
|
|
|
if err := waitFunc(ctx, delay); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Allow override for testing.
|
|
var waitFunc = wait
|
|
|
|
func wait(ctx context.Context, delay time.Duration) error {
|
|
timer := time.NewTimer(delay)
|
|
defer timer.Stop()
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
// Handle the case where the timer and context deadline end
|
|
// simultaneously by prioritizing the timer expiration nil value
|
|
// response.
|
|
select {
|
|
case <-timer.C:
|
|
default:
|
|
return ctx.Err()
|
|
}
|
|
case <-timer.C:
|
|
}
|
|
|
|
return nil
|
|
}
|