1
0
mirror of https://github.com/open-telemetry/opentelemetry-go.git synced 2025-11-23 22:34:47 +02:00
Files
ian 5dd35ce873 feat: logs SDK observability - otlploggrpc exporter metrics (#7353)
This PR adds support for experimental metrics in `otlploggrpc`

- `otel.sdk.exporter.log.inflight`
- `otel.sdk.exporter.log.exported`
- `otel.sdk.exporter.operation.duration`

References:

-  #7084 
-  https://github.com/open-telemetry/opentelemetry-go/issues/7019
- [Follow
guidelines](a5dcd68ebb/CONTRIBUTING.md (encapsulation)).

-----
```txt
goos: darwin
goarch: arm64
pkg: go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc
cpu: Apple M3
                                   │ disabled.txt │          enabled.txt          │
                                   │    sec/op    │   sec/op     vs base          │
ExporterExportLogs/Observability-8    681.5µ ± 3%   684.3µ ± 6%  ~ (p=0.315 n=10)

                                   │ disabled.txt │          enabled.txt           │
                                   │     B/op     │     B/op      vs base          │
ExporterExportLogs/Observability-8   672.8Ki ± 0%   673.6Ki ± 1%  ~ (p=0.247 n=10)

                                   │ disabled.txt │            enabled.txt             │
                                   │  allocs/op   │  allocs/op   vs base               │
ExporterExportLogs/Observability-8    9.224k ± 0%   9.232k ± 0%  +0.09% (p=0.000 n=10)
```

-----
```txt
goos: darwin
goarch: arm64
pkg: go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc/internal/observ
cpu: Apple M3
                                         │  bench.txt  │
                                         │   sec/op    │
InstrumentationExportLogs/NoError-8        162.6n ± 3%
InstrumentationExportLogs/PartialError-8   705.5n ± 5%
InstrumentationExportLogs/FullError-8      592.1n ± 1%
geomean                                    408.0n

                                         │ bench.txt  │
                                         │    B/op    │
InstrumentationExportLogs/NoError-8        152.0 ± 0%
InstrumentationExportLogs/PartialError-8   697.0 ± 0%
InstrumentationExportLogs/FullError-8      616.0 ± 0%
geomean                                    402.6

                                         │ bench.txt  │
                                         │ allocs/op  │
InstrumentationExportLogs/NoError-8        3.000 ± 0%
InstrumentationExportLogs/PartialError-8   10.00 ± 0%
InstrumentationExportLogs/FullError-8      8.000 ± 0%
geomean                                    6.214
```

-----
```txt
pkg: go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc/internal/observ
cpu: Apple M3
                                 │ parse_target.txt │
                                 │      sec/op      │
ParseTarget/HostName-8                 38.00n ± ∞ ¹
ParseTarget/HostPort-8                 51.33n ± ∞ ¹
ParseTarget/IPv4WithoutPort-8          44.74n ± ∞ ¹
ParseTarget/IPv4WithPort-8             62.56n ± ∞ ¹
ParseTarget/IPv6Bare-8                 94.89n ± ∞ ¹
ParseTarget/IPv6Bracket-8              93.78n ± ∞ ¹
ParseTarget/IPv6WithPort-8             57.57n ± ∞ ¹
ParseTarget/UnixSocket-8               8.329n ± ∞ ¹
ParseTarget/UnixAbstractSocket-8       9.082n ± ∞ ¹
ParseTarget/Passthrough-8              58.06n ± ∞ ¹
geomean                                40.64n
¹ need >= 6 samples for confidence interval at level 0.95

                                 │ parse_target.txt │
                                 │       B/op       │
ParseTarget/HostName-8                  48.00 ± ∞ ¹
ParseTarget/HostPort-8                  48.00 ± ∞ ¹
ParseTarget/IPv4WithoutPort-8           16.00 ± ∞ ¹
ParseTarget/IPv4WithPort-8              48.00 ± ∞ ¹
ParseTarget/IPv6Bare-8                  16.00 ± ∞ ¹
ParseTarget/IPv6Bracket-8               16.00 ± ∞ ¹
ParseTarget/IPv6WithPort-8              48.00 ± ∞ ¹
ParseTarget/UnixSocket-8                0.000 ± ∞ ¹
ParseTarget/UnixAbstractSocket-8        0.000 ± ∞ ¹
ParseTarget/Passthrough-8               48.00 ± ∞ ¹
geomean                                           ²
¹ need >= 6 samples for confidence interval at level 0.95
² summaries must be >0 to compute geomean

                                 │ parse_target.txt │
                                 │    allocs/op     │
ParseTarget/HostName-8                  1.000 ± ∞ ¹
ParseTarget/HostPort-8                  1.000 ± ∞ ¹
ParseTarget/IPv4WithoutPort-8           1.000 ± ∞ ¹
ParseTarget/IPv4WithPort-8              1.000 ± ∞ ¹
ParseTarget/IPv6Bare-8                  1.000 ± ∞ ¹
ParseTarget/IPv6Bracket-8               1.000 ± ∞ ¹
ParseTarget/IPv6WithPort-8              1.000 ± ∞ ¹
ParseTarget/UnixSocket-8                0.000 ± ∞ ¹
ParseTarget/UnixAbstractSocket-8        0.000 ± ∞ ¹
ParseTarget/Passthrough-8               1.000 ± ∞ ¹
geomean                                           ²
¹ need >= 6 samples for confidence interval at level 0.95
² summaries must be >0 to compute geomean
```

---------

Co-authored-by: Tyler Yahn <MrAlias@users.noreply.github.com>
2025-10-02 10:15:41 -07:00

285 lines
8.4 KiB
Go

// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
package otlploggrpc // import "go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc"
import (
"context"
"errors"
"sync/atomic"
"time"
collogpb "go.opentelemetry.io/proto/otlp/collector/logs/v1"
logpb "go.opentelemetry.io/proto/otlp/logs/v1"
"google.golang.org/genproto/googleapis/rpc/errdetails"
"google.golang.org/grpc"
"google.golang.org/grpc/backoff"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/encoding/gzip"
"google.golang.org/grpc/metadata"
"google.golang.org/grpc/status"
"go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc/internal"
"go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc/internal/observ"
"go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc/internal/retry"
)
// The methods of this type are not expected to be called concurrently.
type client struct {
metadata metadata.MD
exportTimeout time.Duration
requestFunc retry.RequestFunc
// ourConn keeps track of where conn was created: true if created here in
// NewClient, or false if passed with an option. This is important on
// Shutdown as conn should only be closed if we created it. Otherwise,
// it is up to the processes that passed conn to close it.
ourConn bool
conn *grpc.ClientConn
lsc collogpb.LogsServiceClient
instrumentation *observ.Instrumentation
}
// Used for testing.
var newGRPCClientFn = grpc.NewClient
// newClient creates a new gRPC log client.
func newClient(cfg config) (*client, error) {
c := &client{
exportTimeout: cfg.timeout.Value,
requestFunc: cfg.retryCfg.Value.RequestFunc(retryable),
conn: cfg.gRPCConn.Value,
}
if len(cfg.headers.Value) > 0 {
c.metadata = metadata.New(cfg.headers.Value)
}
if c.conn == nil {
// If the caller did not provide a ClientConn when the client was
// created, create one using the configuration they did provide.
dialOpts := newGRPCDialOptions(cfg)
conn, err := newGRPCClientFn(cfg.endpoint.Value, dialOpts...)
if err != nil {
return nil, err
}
// Keep track that we own the lifecycle of this conn and need to close
// it on Shutdown.
c.ourConn = true
c.conn = conn
}
c.lsc = collogpb.NewLogsServiceClient(c.conn)
var err error
id := nextExporterID()
c.instrumentation, err = observ.NewInstrumentation(id, c.conn.CanonicalTarget())
return c, err
}
var exporterN atomic.Int64
// nextExporterID returns the next unique ID for an exporter.
func nextExporterID() int64 {
const inc = 1
return exporterN.Add(inc) - inc
}
func newGRPCDialOptions(cfg config) []grpc.DialOption {
userAgent := "OTel Go OTLP over gRPC logs exporter/" + Version()
dialOpts := []grpc.DialOption{grpc.WithUserAgent(userAgent)}
dialOpts = append(dialOpts, cfg.dialOptions.Value...)
// Convert other grpc configs to the dial options.
// Service config
if cfg.serviceConfig.Value != "" {
dialOpts = append(dialOpts, grpc.WithDefaultServiceConfig(cfg.serviceConfig.Value))
}
// Prioritize GRPCCredentials over Insecure (passing both is an error).
switch {
case cfg.gRPCCredentials.Value != nil:
dialOpts = append(dialOpts, grpc.WithTransportCredentials(cfg.gRPCCredentials.Value))
case cfg.insecure.Value:
dialOpts = append(dialOpts, grpc.WithTransportCredentials(insecure.NewCredentials()))
default:
// Default to using the host's root CA.
dialOpts = append(dialOpts, grpc.WithTransportCredentials(
credentials.NewTLS(nil),
))
}
// Compression
if cfg.compression.Value == GzipCompression {
dialOpts = append(dialOpts, grpc.WithDefaultCallOptions(grpc.UseCompressor(gzip.Name)))
}
// Reconnection period
if cfg.reconnectionPeriod.Value != 0 {
p := grpc.ConnectParams{
Backoff: backoff.DefaultConfig,
MinConnectTimeout: cfg.reconnectionPeriod.Value,
}
dialOpts = append(dialOpts, grpc.WithConnectParams(p))
}
return dialOpts
}
// UploadLogs sends proto logs to connected endpoint.
//
// Retryable errors from the server will be handled according to any
// RetryConfig the client was created with.
//
// The otlplog.Exporter synchronizes access to client methods, and
// ensures this is not called after the Exporter is shutdown. Only thing
// to do here is send data.
func (c *client) UploadLogs(ctx context.Context, rl []*logpb.ResourceLogs) (uploadErr error) {
select {
case <-ctx.Done():
// Do not upload if the context is already expired.
return ctx.Err()
default:
}
ctx, cancel := c.exportContext(ctx)
defer cancel()
count := int64(len(rl))
if c.instrumentation != nil {
eo := c.instrumentation.ExportLogs(ctx, count)
defer func() {
eo.End(uploadErr)
}()
}
return errors.Join(uploadErr, c.requestFunc(ctx, func(ctx context.Context) error {
resp, err := c.lsc.Export(ctx, &collogpb.ExportLogsServiceRequest{
ResourceLogs: rl,
})
if resp != nil && resp.PartialSuccess != nil {
msg := resp.PartialSuccess.GetErrorMessage()
n := resp.PartialSuccess.GetRejectedLogRecords()
if n != 0 || msg != "" {
err := internal.LogPartialSuccessError(n, msg)
uploadErr = errors.Join(uploadErr, err)
}
}
// nil is converted to OK.
if status.Code(err) == codes.OK {
// Success.
return nil
}
return err
}))
}
// Shutdown shuts down the client, freeing all resources.
//
// Any active connections to a remote endpoint are closed if they were created
// by the client. Any gRPC connection passed during creation using
// WithGRPCConn will not be closed. It is the caller's responsibility to
// handle cleanup of that resource.
//
// The otlplog.Exporter synchronizes access to client methods and
// ensures this is called only once. The only thing that needs to be done
// here is to release any computational resources the client holds.
func (c *client) Shutdown(ctx context.Context) error {
c.metadata = nil
c.requestFunc = nil
c.lsc = nil
// Release the connection if we created it.
err := ctx.Err()
if c.ourConn {
closeErr := c.conn.Close()
// A context timeout error takes precedence over this error.
if err == nil && closeErr != nil {
err = closeErr
}
}
c.conn = nil
return err
}
// exportContext returns a copy of parent with an appropriate deadline and
// cancellation function based on the clients configured export timeout.
//
// It is the callers responsibility to cancel the returned context once its
// use is complete, via the parent or directly with the returned CancelFunc, to
// ensure all resources are correctly released.
func (c *client) exportContext(parent context.Context) (context.Context, context.CancelFunc) {
var (
ctx context.Context
cancel context.CancelFunc
)
if c.exportTimeout > 0 {
ctx, cancel = context.WithTimeoutCause(parent, c.exportTimeout, errors.New("exporter export timeout"))
} else {
ctx, cancel = context.WithCancel(parent)
}
if c.metadata.Len() > 0 {
md := c.metadata
if outMD, ok := metadata.FromOutgoingContext(ctx); ok {
md = metadata.Join(md, outMD)
}
ctx = metadata.NewOutgoingContext(ctx, md)
}
return ctx, cancel
}
type noopClient struct{}
func newNoopClient() *noopClient {
return &noopClient{}
}
func (*noopClient) UploadLogs(context.Context, []*logpb.ResourceLogs) error { return nil }
func (*noopClient) Shutdown(context.Context) error { return nil }
// retryable returns if err identifies a request that can be retried and a
// duration to wait for if an explicit throttle time is included in err.
func retryable(err error) (bool, time.Duration) {
s := status.Convert(err)
return retryableGRPCStatus(s)
}
func retryableGRPCStatus(s *status.Status) (bool, time.Duration) {
switch s.Code() {
// Follows the retryable error codes defined in
// https://opentelemetry.io/docs/specs/otlp/#failures
case codes.Canceled,
codes.DeadlineExceeded,
codes.Aborted,
codes.OutOfRange,
codes.Unavailable,
codes.DataLoss:
// Additionally, handle RetryInfo.
_, d := throttleDelay(s)
return true, d
case codes.ResourceExhausted:
// Retry only if the server signals that the recovery from resource exhaustion is possible.
return throttleDelay(s)
}
// Not a retry-able error.
return false, 0
}
// throttleDelay returns if the status is RetryInfo
// and the duration to wait for if an explicit throttle time is included.
func throttleDelay(s *status.Status) (bool, time.Duration) {
for _, detail := range s.Details() {
if t, ok := detail.(*errdetails.RetryInfo); ok {
return true, t.RetryDelay.AsDuration()
}
}
return false, 0
}