mirror of
https://github.com/open-telemetry/opentelemetry-go.git
synced 2025-01-30 04:40:41 +02:00
Split large jaeger span batch to admire the udp packet size limit (#1853)
* Split large jaeger span batch to admire the udp packet size * Refactory EmitBatch and produce complaining error msg when serialization fails * Add tests for large jaeger spans Update CHANGELOG.md * Update CHANGELOG.md * Fix compatibility-test on windows. * Add test case for exporting spans with multiple errors.
This commit is contained in:
parent
42a845093e
commit
c99d5e999c
@ -93,6 +93,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
|
|||||||
- `OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE`
|
- `OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE`
|
||||||
- `OTEL_EXPORTER_OTLP_METRICS_CERTIFICATE`
|
- `OTEL_EXPORTER_OTLP_METRICS_CERTIFICATE`
|
||||||
- Adds `otlpgrpc.WithTimeout` option for configuring timeout to the otlp/gRPC exporter. (#1821)
|
- Adds `otlpgrpc.WithTimeout` option for configuring timeout to the otlp/gRPC exporter. (#1821)
|
||||||
|
- Adds `jaeger.WithMaxPacketSize` option for configuring maximum UDP packet size used when connecting to the Jaeger agent. (#1853)
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
||||||
@ -104,6 +105,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
|
|||||||
- Zipkin Exporter: Ensure mapping between OTel and Zipkin span data complies with the specification. (#1688)
|
- Zipkin Exporter: Ensure mapping between OTel and Zipkin span data complies with the specification. (#1688)
|
||||||
- Fixed typo for default service name in Jaeger Exporter. (#1797)
|
- Fixed typo for default service name in Jaeger Exporter. (#1797)
|
||||||
- Fix flaky OTLP for the reconnnection of the client connection. (#1527, #1814)
|
- Fix flaky OTLP for the reconnnection of the client connection. (#1527, #1814)
|
||||||
|
- Fix Jaeger exporter dropping of span batches that exceed the UDP packet size limit.
|
||||||
|
Instead, the exporter now splits the batch into smaller sendable batches. (#1828)
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"net"
|
"net"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"go.opentelemetry.io/otel/exporters/trace/jaeger/internal/third_party/thrift/lib/go/thrift"
|
"go.opentelemetry.io/otel/exporters/trace/jaeger/internal/third_party/thrift/lib/go/thrift"
|
||||||
@ -36,10 +37,11 @@ type agentClientUDP struct {
|
|||||||
genAgent.Agent
|
genAgent.Agent
|
||||||
io.Closer
|
io.Closer
|
||||||
|
|
||||||
connUDP udpConn
|
connUDP udpConn
|
||||||
client *genAgent.AgentClient
|
client *genAgent.AgentClient
|
||||||
maxPacketSize int // max size of datagram in bytes
|
maxPacketSize int // max size of datagram in bytes
|
||||||
thriftBuffer *thrift.TMemoryBuffer // buffer used to calculate byte size of a span
|
thriftBuffer *thrift.TMemoryBuffer // buffer used to calculate byte size of a span
|
||||||
|
thriftProtocol thrift.TProtocol
|
||||||
}
|
}
|
||||||
|
|
||||||
type udpConn interface {
|
type udpConn interface {
|
||||||
@ -75,6 +77,7 @@ func newAgentClientUDP(params agentClientUDPParams) (*agentClientUDP, error) {
|
|||||||
|
|
||||||
thriftBuffer := thrift.NewTMemoryBufferLen(params.MaxPacketSize)
|
thriftBuffer := thrift.NewTMemoryBufferLen(params.MaxPacketSize)
|
||||||
protocolFactory := thrift.NewTCompactProtocolFactoryConf(&thrift.TConfiguration{})
|
protocolFactory := thrift.NewTCompactProtocolFactoryConf(&thrift.TConfiguration{})
|
||||||
|
thriftProtocol := protocolFactory.GetProtocol(thriftBuffer)
|
||||||
client := genAgent.NewAgentClientFactory(thriftBuffer, protocolFactory)
|
client := genAgent.NewAgentClientFactory(thriftBuffer, protocolFactory)
|
||||||
|
|
||||||
var connUDP udpConn
|
var connUDP udpConn
|
||||||
@ -103,15 +106,78 @@ func newAgentClientUDP(params agentClientUDPParams) (*agentClientUDP, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return &agentClientUDP{
|
return &agentClientUDP{
|
||||||
connUDP: connUDP,
|
connUDP: connUDP,
|
||||||
client: client,
|
client: client,
|
||||||
maxPacketSize: params.MaxPacketSize,
|
maxPacketSize: params.MaxPacketSize,
|
||||||
thriftBuffer: thriftBuffer,
|
thriftBuffer: thriftBuffer,
|
||||||
|
thriftProtocol: thriftProtocol,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// EmitBatch implements EmitBatch() of Agent interface
|
// EmitBatch buffers batch to fit into UDP packets and sends the data to the agent.
|
||||||
func (a *agentClientUDP) EmitBatch(ctx context.Context, batch *gen.Batch) error {
|
func (a *agentClientUDP) EmitBatch(ctx context.Context, batch *gen.Batch) error {
|
||||||
|
var errs []error
|
||||||
|
processSize, err := a.calcSizeOfSerializedThrift(ctx, batch.Process)
|
||||||
|
if err != nil {
|
||||||
|
// drop the batch if serialization of process fails.
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
totalSize := processSize
|
||||||
|
var spans []*gen.Span
|
||||||
|
for _, span := range batch.Spans {
|
||||||
|
spanSize, err := a.calcSizeOfSerializedThrift(ctx, span)
|
||||||
|
if err != nil {
|
||||||
|
errs = append(errs, fmt.Errorf("thrift serialization failed: %v", span))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if spanSize+processSize >= a.maxPacketSize {
|
||||||
|
// drop the span that exceeds the limit.
|
||||||
|
errs = append(errs, fmt.Errorf("span too large to send: %v", span))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if totalSize+spanSize >= a.maxPacketSize {
|
||||||
|
if err := a.flush(ctx, &gen.Batch{
|
||||||
|
Process: batch.Process,
|
||||||
|
Spans: spans,
|
||||||
|
}); err != nil {
|
||||||
|
errs = append(errs, err)
|
||||||
|
}
|
||||||
|
spans = spans[:0]
|
||||||
|
totalSize = processSize
|
||||||
|
}
|
||||||
|
totalSize += spanSize
|
||||||
|
spans = append(spans, span)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(spans) > 0 {
|
||||||
|
if err := a.flush(ctx, &gen.Batch{
|
||||||
|
Process: batch.Process,
|
||||||
|
Spans: spans,
|
||||||
|
}); err != nil {
|
||||||
|
errs = append(errs, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(errs) == 1 {
|
||||||
|
return errs[0]
|
||||||
|
} else if len(errs) > 1 {
|
||||||
|
joined := a.makeJoinedErrorString(errs)
|
||||||
|
return fmt.Errorf("multiple errors during transform: %s", joined)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// makeJoinedErrorString join all the errors to one error message.
|
||||||
|
func (a *agentClientUDP) makeJoinedErrorString(errs []error) string {
|
||||||
|
var errMsgs []string
|
||||||
|
for _, err := range errs {
|
||||||
|
errMsgs = append(errMsgs, err.Error())
|
||||||
|
}
|
||||||
|
return strings.Join(errMsgs, ", ")
|
||||||
|
}
|
||||||
|
|
||||||
|
// flush will send the batch of spans to the agent.
|
||||||
|
func (a *agentClientUDP) flush(ctx context.Context, batch *gen.Batch) error {
|
||||||
a.thriftBuffer.Reset()
|
a.thriftBuffer.Reset()
|
||||||
if err := a.client.EmitBatch(ctx, batch); err != nil {
|
if err := a.client.EmitBatch(ctx, batch); err != nil {
|
||||||
return err
|
return err
|
||||||
@ -124,6 +190,13 @@ func (a *agentClientUDP) EmitBatch(ctx context.Context, batch *gen.Batch) error
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// calcSizeOfSerializedThrift calculate the serialized thrift packet size.
|
||||||
|
func (a *agentClientUDP) calcSizeOfSerializedThrift(ctx context.Context, thriftStruct thrift.TStruct) (int, error) {
|
||||||
|
a.thriftBuffer.Reset()
|
||||||
|
err := thriftStruct.Write(ctx, a.thriftProtocol)
|
||||||
|
return a.thriftBuffer.Len(), err
|
||||||
|
}
|
||||||
|
|
||||||
// Close implements Close() of io.Closer and closes the underlying UDP connection.
|
// Close implements Close() of io.Closer and closes the underlying UDP connection.
|
||||||
func (a *agentClientUDP) Close() error {
|
func (a *agentClientUDP) Close() error {
|
||||||
return a.connUDP.Close()
|
return a.connUDP.Close()
|
||||||
|
@ -14,12 +14,16 @@
|
|||||||
package jaeger
|
package jaeger
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"log"
|
"log"
|
||||||
"net"
|
"net"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"go.opentelemetry.io/otel"
|
||||||
|
tracesdk "go.opentelemetry.io/otel/sdk/trace"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestNewAgentClientUDPWithParamsBadHostport(t *testing.T) {
|
func TestNewAgentClientUDPWithParamsBadHostport(t *testing.T) {
|
||||||
@ -99,3 +103,74 @@ func TestNewAgentClientUDPWithParamsReconnectingDisabled(t *testing.T) {
|
|||||||
|
|
||||||
assert.NoError(t, agentClient.Close())
|
assert.NoError(t, agentClient.Close())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type errorHandler struct{ t *testing.T }
|
||||||
|
|
||||||
|
func (eh errorHandler) Handle(err error) { assert.NoError(eh.t, err) }
|
||||||
|
|
||||||
|
func TestJaegerAgentUDPLimitBatching(t *testing.T) {
|
||||||
|
otel.SetErrorHandler(errorHandler{t})
|
||||||
|
|
||||||
|
// 1500 spans, size 79559, does not fit within one UDP packet with the default size of 65000.
|
||||||
|
n := 1500
|
||||||
|
s := make([]*tracesdk.SpanSnapshot, n)
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
s[i] = &tracesdk.SpanSnapshot{}
|
||||||
|
}
|
||||||
|
|
||||||
|
exp, err := NewRawExporter(
|
||||||
|
WithAgentEndpoint(WithAgentHost("localhost"), WithAgentPort("6831")),
|
||||||
|
)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
assert.NoError(t, exp.ExportSpans(ctx, s))
|
||||||
|
assert.NoError(t, exp.Shutdown(ctx))
|
||||||
|
}
|
||||||
|
|
||||||
|
// generateALargeSpan generates a span with a long name.
|
||||||
|
func generateALargeSpan() *tracesdk.SpanSnapshot {
|
||||||
|
span := &tracesdk.SpanSnapshot{
|
||||||
|
Name: "a-longer-name-that-makes-it-exceeds-limit",
|
||||||
|
}
|
||||||
|
return span
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSpanExceedsMaxPacketLimit(t *testing.T) {
|
||||||
|
otel.SetErrorHandler(errorHandler{t})
|
||||||
|
|
||||||
|
// 106 is the serialized size of a span with default values.
|
||||||
|
maxSize := 106
|
||||||
|
span := generateALargeSpan()
|
||||||
|
|
||||||
|
largeSpans := []*tracesdk.SpanSnapshot{span, {}}
|
||||||
|
normalSpans := []*tracesdk.SpanSnapshot{{}, {}}
|
||||||
|
|
||||||
|
exp, err := NewRawExporter(
|
||||||
|
WithAgentEndpoint(WithAgentHost("localhost"), WithAgentPort("6831"), WithMaxPacketSize(maxSize+1)),
|
||||||
|
)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
assert.Error(t, exp.ExportSpans(ctx, largeSpans))
|
||||||
|
assert.NoError(t, exp.ExportSpans(ctx, normalSpans))
|
||||||
|
assert.NoError(t, exp.Shutdown(ctx))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmitBatchWithMultipleErrors(t *testing.T) {
|
||||||
|
otel.SetErrorHandler(errorHandler{t})
|
||||||
|
|
||||||
|
span := generateALargeSpan()
|
||||||
|
largeSpans := []*tracesdk.SpanSnapshot{span, span}
|
||||||
|
// make max packet size smaller than span
|
||||||
|
maxSize := len(span.Name)
|
||||||
|
exp, err := NewRawExporter(
|
||||||
|
WithAgentEndpoint(WithAgentHost("localhost"), WithAgentPort("6831"), WithMaxPacketSize(maxSize)),
|
||||||
|
)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
err = exp.ExportSpans(ctx, largeSpans)
|
||||||
|
assert.Error(t, err)
|
||||||
|
require.Contains(t, err.Error(), "multiple errors")
|
||||||
|
}
|
||||||
|
@ -114,6 +114,13 @@ func WithAttemptReconnectingInterval(interval time.Duration) AgentEndpointOption
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithMaxPacketSize sets the maximum UDP packet size for transport to the Jaeger agent.
|
||||||
|
func WithMaxPacketSize(size int) AgentEndpointOption {
|
||||||
|
return func(o *AgentEndpointOptions) {
|
||||||
|
o.MaxPacketSize = size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// WithCollectorEndpoint defines the full url to the Jaeger HTTP Thrift collector. This will
|
// WithCollectorEndpoint defines the full url to the Jaeger HTTP Thrift collector. This will
|
||||||
// use the following environment variables for configuration if no explicit option is provided:
|
// use the following environment variables for configuration if no explicit option is provided:
|
||||||
//
|
//
|
||||||
|
Loading…
x
Reference in New Issue
Block a user