From bfaff3a097d9bf695e396e499c7bef1757d70a10 Mon Sep 17 00:00:00 2001 From: Damien Mathieu <42@dmathieu.com> Date: Thu, 22 Sep 2022 16:23:16 +0200 Subject: [PATCH] Sanitize metric names in the prometheus exporter (#3212) * sanitize metric names in the prometheus exporter * add changelog entry * fix now invalid comment * replace sanitizeName algorithm with one based on strings.Map * check that digits as first characters are replaced * Update exporters/prometheus/exporter.go Co-authored-by: Tyler Yahn * add missing utf8 import * add explicit tests for sanitizeName * fix testdata with digit prepend Co-authored-by: Tyler Yahn Co-authored-by: Chester Cheung --- CHANGELOG.md | 1 + exporters/prometheus/exporter.go | 60 ++++++++++++++++++- exporters/prometheus/exporter_test.go | 42 +++++++++++-- .../prometheus/testdata/sanitized_names.txt | 24 ++++++++ 4 files changed, 118 insertions(+), 9 deletions(-) create mode 100644 exporters/prometheus/testdata/sanitized_names.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 03ea381e1..208d45e11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - The metric SDK in `go.opentelemetry.io/otel/sdk/metric` is completely refactored to comply with the OpenTelemetry specification. Please see the package documentation for how the new SDK is initialized and configured. (#3175) - Update the minimum supported go version to go1.18. Removes support for go1.17 (#3179) +- Instead of dropping metric, the Prometheus exporter will replace any invalid character in metric names with `_`. (#3212) ### Removed diff --git a/exporters/prometheus/exporter.go b/exporters/prometheus/exporter.go index 498b90eb2..3fe848f1e 100644 --- a/exporters/prometheus/exporter.go +++ b/exporters/prometheus/exporter.go @@ -19,6 +19,7 @@ import ( "sort" "strings" "unicode" + "unicode/utf8" "github.com/prometheus/client_golang/prometheus" @@ -142,7 +143,7 @@ func getHistogramMetricData(histogram metricdata.Histogram, m metricdata.Metrics dataPoints := make([]*metricData, 0, len(histogram.DataPoints)) for _, dp := range histogram.DataPoints { keys, values := getAttrs(dp.Attributes) - desc := prometheus.NewDesc(m.Name, m.Description, keys, nil) + desc := prometheus.NewDesc(sanitizeName(m.Name), m.Description, keys, nil) buckets := make(map[float64]uint64, len(dp.Bounds)) for i, bound := range dp.Bounds { buckets[bound] = dp.BucketCounts[i] @@ -165,7 +166,7 @@ func getSumMetricData[N int64 | float64](sum metricdata.Sum[N], m metricdata.Met dataPoints := make([]*metricData, 0, len(sum.DataPoints)) for _, dp := range sum.DataPoints { keys, values := getAttrs(dp.Attributes) - desc := prometheus.NewDesc(m.Name, m.Description, keys, nil) + desc := prometheus.NewDesc(sanitizeName(m.Name), m.Description, keys, nil) md := &metricData{ name: m.Name, description: desc, @@ -182,7 +183,7 @@ func getGaugeMetricData[N int64 | float64](gauge metricdata.Gauge[N], m metricda dataPoints := make([]*metricData, 0, len(gauge.DataPoints)) for _, dp := range gauge.DataPoints { keys, values := getAttrs(dp.Attributes) - desc := prometheus.NewDesc(m.Name, m.Description, keys, nil) + desc := prometheus.NewDesc(sanitizeName(m.Name), m.Description, keys, nil) md := &metricData{ name: m.Name, description: desc, @@ -230,3 +231,56 @@ func sanitizeRune(r rune) rune { } return '_' } + +func sanitizeName(n string) string { + // This algorithm is based on strings.Map from Go 1.19. + const replacement = '_' + + valid := func(i int, r rune) bool { + // Taken from + // https://github.com/prometheus/common/blob/dfbc25bd00225c70aca0d94c3c4bb7744f28ace0/model/metric.go#L92-L102 + if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || r == '_' || r == ':' || (r >= '0' && r <= '9' && i > 0) { + return true + } + return false + } + + // This output buffer b is initialized on demand, the first time a + // character needs to be replaced. + var b strings.Builder + for i, c := range n { + if valid(i, c) { + continue + } + + if i == 0 && c >= '0' && c <= '9' { + // Prefix leading number with replacement character. + b.Grow(len(n) + 1) + b.WriteByte(byte(replacement)) + break + } + b.Grow(len(n)) + b.WriteString(n[:i]) + b.WriteByte(byte(replacement)) + width := utf8.RuneLen(c) + n = n[i+width:] + break + } + + // Fast path for unchanged input. + if b.Cap() == 0 { // b.Grow was not called above. + return n + } + + for _, c := range n { + // Due to inlining, it is more performant to invoke WriteByte rather then + // WriteRune. + if valid(1, c) { // We are guaranteed to not be at the start. + b.WriteByte(byte(c)) + } else { + b.WriteByte(byte(replacement)) + } + } + + return b.String() +} diff --git a/exporters/prometheus/exporter_test.go b/exporters/prometheus/exporter_test.go index 461b9d709..6d46c3be0 100644 --- a/exporters/prometheus/exporter_test.go +++ b/exporters/prometheus/exporter_test.go @@ -103,8 +103,8 @@ func TestPrometheusExporter(t *testing.T) { }, }, { - name: "invalid instruments are dropped", - expectedFile: "testdata/gauge.txt", + name: "invalid instruments are renamed", + expectedFile: "testdata/sanitized_names.txt", recordMetrics: func(ctx context.Context, meter otelmetric.Meter) { attrs := []attribute.KeyValue{ attribute.Key("A").String("B"), @@ -116,16 +116,16 @@ func TestPrometheusExporter(t *testing.T) { gauge.Add(ctx, 100, attrs...) gauge.Add(ctx, -25, attrs...) - // Invalid, should be dropped. - gauge, err = meter.SyncFloat64().UpDownCounter("invalid.gauge.name") + // Invalid, will be renamed. + gauge, err = meter.SyncFloat64().UpDownCounter("invalid.gauge.name", instrument.WithDescription("a gauge with an invalid name")) require.NoError(t, err) gauge.Add(ctx, 100, attrs...) - counter, err := meter.SyncFloat64().Counter("invalid.counter.name") + counter, err := meter.SyncFloat64().Counter("0invalid.counter.name", instrument.WithDescription("a counter with an invalid name")) require.NoError(t, err) counter.Add(ctx, 100, attrs...) - histogram, err := meter.SyncFloat64().Histogram("invalid.hist.name") + histogram, err := meter.SyncFloat64().Histogram("invalid.hist.name", instrument.WithDescription("a histogram with an invalid name")) require.NoError(t, err) histogram.Record(ctx, 23, attrs...) }, @@ -155,3 +155,33 @@ func TestPrometheusExporter(t *testing.T) { }) } } + +func TestSantitizeName(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"nam€_with_3_width_rune", "nam__with_3_width_rune"}, + {"`", "_"}, + { + `! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWKYZ[]\^_abcdefghijklmnopqrstuvwkyz{|}~`, + `________________0123456789:______ABCDEFGHIJKLMNOPQRSTUVWKYZ_____abcdefghijklmnopqrstuvwkyz____`, + }, + + // Test cases taken from + // https://github.com/prometheus/common/blob/dfbc25bd00225c70aca0d94c3c4bb7744f28ace0/model/metric_test.go#L85-L136 + {"Avalid_23name", "Avalid_23name"}, + {"_Avalid_23name", "_Avalid_23name"}, + {"1valid_23name", "_1valid_23name"}, + {"avalid_23name", "avalid_23name"}, + {"Ava:lid_23name", "Ava:lid_23name"}, + {"a lid_23name", "a_lid_23name"}, + {":leading_colon", ":leading_colon"}, + {"colon:in:the:middle", "colon:in:the:middle"}, + {"", ""}, + } + + for _, test := range tests { + require.Equalf(t, test.want, sanitizeName(test.input), "input: %q", test.input) + } +} diff --git a/exporters/prometheus/testdata/sanitized_names.txt b/exporters/prometheus/testdata/sanitized_names.txt new file mode 100644 index 000000000..5e9516716 --- /dev/null +++ b/exporters/prometheus/testdata/sanitized_names.txt @@ -0,0 +1,24 @@ +# HELP bar a fun little gauge +# TYPE bar counter +bar{A="B",C="D"} 75 +# HELP _0invalid_counter_name a counter with an invalid name +# TYPE _0invalid_counter_name counter +_0invalid_counter_name{A="B",C="D"} 100 +# HELP invalid_gauge_name a gauge with an invalid name +# TYPE invalid_gauge_name counter +invalid_gauge_name{A="B",C="D"} 100 +# HELP invalid_hist_name a histogram with an invalid name +# TYPE invalid_hist_name histogram +invalid_hist_name_bucket{A="B",C="D",le="0"} 0 +invalid_hist_name_bucket{A="B",C="D",le="5"} 0 +invalid_hist_name_bucket{A="B",C="D",le="10"} 0 +invalid_hist_name_bucket{A="B",C="D",le="25"} 1 +invalid_hist_name_bucket{A="B",C="D",le="50"} 0 +invalid_hist_name_bucket{A="B",C="D",le="75"} 0 +invalid_hist_name_bucket{A="B",C="D",le="100"} 0 +invalid_hist_name_bucket{A="B",C="D",le="250"} 0 +invalid_hist_name_bucket{A="B",C="D",le="500"} 0 +invalid_hist_name_bucket{A="B",C="D",le="1000"} 0 +invalid_hist_name_bucket{A="B",C="D",le="+Inf"} 1 +invalid_hist_name_sum{A="B",C="D"} 23 +invalid_hist_name_count{A="B",C="D"} 1