// Copyright 2013 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package model import ( "errors" "fmt" "regexp" "sort" "strconv" "strings" "unicode/utf8" dto "github.com/prometheus/client_model/go" "google.golang.org/protobuf/proto" ) var ( // NameValidationScheme determines the method of name validation to be used by // all calls to IsValidMetricName() and LabelName IsValid(). Setting UTF-8 mode // in isolation from other components that don't support UTF-8 may result in // bugs or other undefined behavior. This value is intended to be set by // UTF-8-aware binaries as part of their startup. To avoid need for locking, // this value should be set once, ideally in an init(), before multiple // goroutines are started. NameValidationScheme = LegacyValidation // NameEscapingScheme defines the default way that names will be escaped when // presented to systems that do not support UTF-8 names. If the Content-Type // "escaping" term is specified, that will override this value. // NameEscapingScheme should not be set to the NoEscaping value. That string // is used in content negotiation to indicate that a system supports UTF-8 and // has that feature enabled. NameEscapingScheme = UnderscoreEscaping ) // ValidationScheme is a Go enum for determining how metric and label names will // be validated by this library. type ValidationScheme int const ( // LegacyValidation is a setting that requirets that metric and label names // conform to the original Prometheus character requirements described by // MetricNameRE and LabelNameRE. LegacyValidation ValidationScheme = iota // UTF8Validation only requires that metric and label names be valid UTF-8 // strings. UTF8Validation ) type EscapingScheme int const ( // NoEscaping indicates that a name will not be escaped. Unescaped names that // do not conform to the legacy validity check will use a new exposition // format syntax that will be officially standardized in future versions. NoEscaping EscapingScheme = iota // UnderscoreEscaping replaces all legacy-invalid characters with underscores. UnderscoreEscaping // DotsEscaping is similar to UnderscoreEscaping, except that dots are // converted to `_dot_` and pre-existing underscores are converted to `__`. DotsEscaping // ValueEncodingEscaping prepends the name with `U__` and replaces all invalid // characters with the unicode value, surrounded by underscores. Single // underscores are replaced with double underscores. ValueEncodingEscaping ) const ( // EscapingKey is the key in an Accept or Content-Type header that defines how // metric and label names that do not conform to the legacy character // requirements should be escaped when being scraped by a legacy prometheus // system. If a system does not explicitly pass an escaping parameter in the // Accept header, the default NameEscapingScheme will be used. EscapingKey = "escaping" // Possible values for Escaping Key: AllowUTF8 = "allow-utf-8" // No escaping required. EscapeUnderscores = "underscores" EscapeDots = "dots" EscapeValues = "values" ) // MetricNameRE is a regular expression matching valid metric // names. Note that the IsValidMetricName function performs the same // check but faster than a match with this regular expression. var MetricNameRE = regexp.MustCompile(`^[a-zA-Z_:][a-zA-Z0-9_:]*$`) // A Metric is similar to a LabelSet, but the key difference is that a Metric is // a singleton and refers to one and only one stream of samples. type Metric LabelSet // Equal compares the metrics. func (m Metric) Equal(o Metric) bool { return LabelSet(m).Equal(LabelSet(o)) } // Before compares the metrics' underlying label sets. func (m Metric) Before(o Metric) bool { return LabelSet(m).Before(LabelSet(o)) } // Clone returns a copy of the Metric. func (m Metric) Clone() Metric { clone := make(Metric, len(m)) for k, v := range m { clone[k] = v } return clone } func (m Metric) String() string { metricName, hasName := m[MetricNameLabel] numLabels := len(m) - 1 if !hasName { numLabels = len(m) } labelStrings := make([]string, 0, numLabels) for label, value := range m { if label != MetricNameLabel { labelStrings = append(labelStrings, fmt.Sprintf("%s=%q", label, value)) } } switch numLabels { case 0: if hasName { return string(metricName) } return "{}" default: sort.Strings(labelStrings) return fmt.Sprintf("%s{%s}", metricName, strings.Join(labelStrings, ", ")) } } // Fingerprint returns a Metric's Fingerprint. func (m Metric) Fingerprint() Fingerprint { return LabelSet(m).Fingerprint() } // FastFingerprint returns a Metric's Fingerprint calculated by a faster hashing // algorithm, which is, however, more susceptible to hash collisions. func (m Metric) FastFingerprint() Fingerprint { return LabelSet(m).FastFingerprint() } // IsValidMetricName returns true iff name matches the pattern of MetricNameRE // for legacy names, and iff it's valid UTF-8 if the UTF8Validation scheme is // selected. func IsValidMetricName(n LabelValue) bool { switch NameValidationScheme { case LegacyValidation: return IsValidLegacyMetricName(string(n)) case UTF8Validation: if len(n) == 0 { return false } return utf8.ValidString(string(n)) default: panic(fmt.Sprintf("Invalid name validation scheme requested: %d", NameValidationScheme)) } } // IsValidLegacyMetricName is similar to IsValidMetricName but always uses the // legacy validation scheme regardless of the value of NameValidationScheme. // This function, however, does not use MetricNameRE for the check but a much // faster hardcoded implementation. func IsValidLegacyMetricName(n string) bool { if len(n) == 0 { return false } for i, b := range n { if !isValidLegacyRune(b, i) { return false } } return true } // EscapeMetricFamily escapes the given metric names and labels with the given // escaping scheme. Returns a new object that uses the same pointers to fields // when possible and creates new escaped versions so as not to mutate the // input. func EscapeMetricFamily(v *dto.MetricFamily, scheme EscapingScheme) *dto.MetricFamily { if v == nil { return nil } if scheme == NoEscaping { return v } out := &dto.MetricFamily{ Help: v.Help, Type: v.Type, Unit: v.Unit, } // If the name is nil, copy as-is, don't try to escape. if v.Name == nil || IsValidLegacyMetricName(v.GetName()) { out.Name = v.Name } else { out.Name = proto.String(EscapeName(v.GetName(), scheme)) } for _, m := range v.Metric { if !metricNeedsEscaping(m) { out.Metric = append(out.Metric, m) continue } escaped := &dto.Metric{ Gauge: m.Gauge, Counter: m.Counter, Summary: m.Summary, Untyped: m.Untyped, Histogram: m.Histogram, TimestampMs: m.TimestampMs, } for _, l := range m.Label { if l.GetName() == MetricNameLabel { if l.Value == nil || IsValidLegacyMetricName(l.GetValue()) { escaped.Label = append(escaped.Label, l) continue } escaped.Label = append(escaped.Label, &dto.LabelPair{ Name: proto.String(MetricNameLabel), Value: proto.String(EscapeName(l.GetValue(), scheme)), }) continue } if l.Name == nil || IsValidLegacyMetricName(l.GetName()) { escaped.Label = append(escaped.Label, l) continue } escaped.Label = append(escaped.Label, &dto.LabelPair{ Name: proto.String(EscapeName(l.GetName(), scheme)), Value: l.Value, }) } out.Metric = append(out.Metric, escaped) } return out } func metricNeedsEscaping(m *dto.Metric) bool { for _, l := range m.Label { if l.GetName() == MetricNameLabel && !IsValidLegacyMetricName(l.GetValue()) { return true } if !IsValidLegacyMetricName(l.GetName()) { return true } } return false } // EscapeName escapes the incoming name according to the provided escaping // scheme. Depending on the rules of escaping, this may cause no change in the // string that is returned. (Especially NoEscaping, which by definition is a // noop). This function does not do any validation of the name. func EscapeName(name string, scheme EscapingScheme) string { if len(name) == 0 { return name } var escaped strings.Builder switch scheme { case NoEscaping: return name case UnderscoreEscaping: if IsValidLegacyMetricName(name) { return name } for i, b := range name { if isValidLegacyRune(b, i) { escaped.WriteRune(b) } else { escaped.WriteRune('_') } } return escaped.String() case DotsEscaping: // Do not early return for legacy valid names, we still escape underscores. for i, b := range name { if b == '_' { escaped.WriteString("__") } else if b == '.' { escaped.WriteString("_dot_") } else if isValidLegacyRune(b, i) { escaped.WriteRune(b) } else { escaped.WriteString("__") } } return escaped.String() case ValueEncodingEscaping: if IsValidLegacyMetricName(name) { return name } escaped.WriteString("U__") for i, b := range name { if b == '_' { escaped.WriteString("__") } else if isValidLegacyRune(b, i) { escaped.WriteRune(b) } else if !utf8.ValidRune(b) { escaped.WriteString("_FFFD_") } else { escaped.WriteRune('_') escaped.WriteString(strconv.FormatInt(int64(b), 16)) escaped.WriteRune('_') } } return escaped.String() default: panic(fmt.Sprintf("invalid escaping scheme %d", scheme)) } } // lower function taken from strconv.atoi func lower(c byte) byte { return c | ('x' - 'X') } // UnescapeName unescapes the incoming name according to the provided escaping // scheme if possible. Some schemes are partially or totally non-roundtripable. // If any error is enountered, returns the original input. func UnescapeName(name string, scheme EscapingScheme) string { if len(name) == 0 { return name } switch scheme { case NoEscaping: return name case UnderscoreEscaping: // It is not possible to unescape from underscore replacement. return name case DotsEscaping: name = strings.ReplaceAll(name, "_dot_", ".") name = strings.ReplaceAll(name, "__", "_") return name case ValueEncodingEscaping: escapedName, found := strings.CutPrefix(name, "U__") if !found { return name } var unescaped strings.Builder TOP: for i := 0; i < len(escapedName); i++ { // All non-underscores are treated normally. if escapedName[i] != '_' { unescaped.WriteByte(escapedName[i]) continue } i++ if i >= len(escapedName) { return name } // A double underscore is a single underscore. if escapedName[i] == '_' { unescaped.WriteByte('_') continue } // We think we are in a UTF-8 code, process it. var utf8Val uint for j := 0; i < len(escapedName); j++ { // This is too many characters for a utf8 value based on the MaxRune // value of '\U0010FFFF'. if j >= 6 { return name } // Found a closing underscore, convert to a rune, check validity, and append. if escapedName[i] == '_' { utf8Rune := rune(utf8Val) if !utf8.ValidRune(utf8Rune) { return name } unescaped.WriteRune(utf8Rune) continue TOP } r := lower(escapedName[i]) utf8Val *= 16 if r >= '0' && r <= '9' { utf8Val += uint(r) - '0' } else if r >= 'a' && r <= 'f' { utf8Val += uint(r) - 'a' + 10 } else { return name } i++ } // Didn't find closing underscore, invalid. return name } return unescaped.String() default: panic(fmt.Sprintf("invalid escaping scheme %d", scheme)) } } func isValidLegacyRune(b rune, i int) bool { return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == ':' || (b >= '0' && b <= '9' && i > 0) } func (e EscapingScheme) String() string { switch e { case NoEscaping: return AllowUTF8 case UnderscoreEscaping: return EscapeUnderscores case DotsEscaping: return EscapeDots case ValueEncodingEscaping: return EscapeValues default: panic(fmt.Sprintf("unknown format scheme %d", e)) } } func ToEscapingScheme(s string) (EscapingScheme, error) { if s == "" { return NoEscaping, errors.New("got empty string instead of escaping scheme") } switch s { case AllowUTF8: return NoEscaping, nil case EscapeUnderscores: return UnderscoreEscaping, nil case EscapeDots: return DotsEscaping, nil case EscapeValues: return ValueEncodingEscaping, nil default: return NoEscaping, fmt.Errorf("unknown format scheme %s", s) } }