metric.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. // Copyright 2013 The Prometheus Authors
  2. // Licensed under the Apache License, Version 2.0 (the "License");
  3. // you may not use this file except in compliance with the License.
  4. // You may obtain a copy of the License at
  5. //
  6. // http://www.apache.org/licenses/LICENSE-2.0
  7. //
  8. // Unless required by applicable law or agreed to in writing, software
  9. // distributed under the License is distributed on an "AS IS" BASIS,
  10. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. package model
  14. import (
  15. "errors"
  16. "fmt"
  17. "regexp"
  18. "sort"
  19. "strconv"
  20. "strings"
  21. "unicode/utf8"
  22. dto "github.com/prometheus/client_model/go"
  23. "google.golang.org/protobuf/proto"
  24. )
  25. var (
  26. // NameValidationScheme determines the global default method of the name
  27. // validation to be used by all calls to IsValidMetricName() and LabelName
  28. // IsValid().
  29. //
  30. // Deprecated: This variable should not be used and might be removed in the
  31. // far future. If you wish to stick to the legacy name validation use
  32. // `IsValidLegacyMetricName()` and `LabelName.IsValidLegacy()` methods
  33. // instead. This variable is here as an escape hatch for emergency cases,
  34. // given the recent change from `LegacyValidation` to `UTF8Validation`, e.g.,
  35. // to delay UTF-8 migrations in time or aid in debugging unforeseen results of
  36. // the change. In such a case, a temporary assignment to `LegacyValidation`
  37. // value in the `init()` function in your main.go or so, could be considered.
  38. //
  39. // Historically we opted for a global variable for feature gating different
  40. // validation schemes in operations that were not otherwise easily adjustable
  41. // (e.g. Labels yaml unmarshaling). That could have been a mistake, a separate
  42. // Labels structure or package might have been a better choice. Given the
  43. // change was made and many upgraded the common already, we live this as-is
  44. // with this warning and learning for the future.
  45. NameValidationScheme = UTF8Validation
  46. // NameEscapingScheme defines the default way that names will be escaped when
  47. // presented to systems that do not support UTF-8 names. If the Content-Type
  48. // "escaping" term is specified, that will override this value.
  49. // NameEscapingScheme should not be set to the NoEscaping value. That string
  50. // is used in content negotiation to indicate that a system supports UTF-8 and
  51. // has that feature enabled.
  52. NameEscapingScheme = UnderscoreEscaping
  53. )
  54. // ValidationScheme is a Go enum for determining how metric and label names will
  55. // be validated by this library.
  56. type ValidationScheme int
  57. const (
  58. // LegacyValidation is a setting that requires that all metric and label names
  59. // conform to the original Prometheus character requirements described by
  60. // MetricNameRE and LabelNameRE.
  61. LegacyValidation ValidationScheme = iota
  62. // UTF8Validation only requires that metric and label names be valid UTF-8
  63. // strings.
  64. UTF8Validation
  65. )
  66. type EscapingScheme int
  67. const (
  68. // NoEscaping indicates that a name will not be escaped. Unescaped names that
  69. // do not conform to the legacy validity check will use a new exposition
  70. // format syntax that will be officially standardized in future versions.
  71. NoEscaping EscapingScheme = iota
  72. // UnderscoreEscaping replaces all legacy-invalid characters with underscores.
  73. UnderscoreEscaping
  74. // DotsEscaping is similar to UnderscoreEscaping, except that dots are
  75. // converted to `_dot_` and pre-existing underscores are converted to `__`.
  76. DotsEscaping
  77. // ValueEncodingEscaping prepends the name with `U__` and replaces all invalid
  78. // characters with the unicode value, surrounded by underscores. Single
  79. // underscores are replaced with double underscores.
  80. ValueEncodingEscaping
  81. )
  82. const (
  83. // EscapingKey is the key in an Accept or Content-Type header that defines how
  84. // metric and label names that do not conform to the legacy character
  85. // requirements should be escaped when being scraped by a legacy prometheus
  86. // system. If a system does not explicitly pass an escaping parameter in the
  87. // Accept header, the default NameEscapingScheme will be used.
  88. EscapingKey = "escaping"
  89. // Possible values for Escaping Key:
  90. AllowUTF8 = "allow-utf-8" // No escaping required.
  91. EscapeUnderscores = "underscores"
  92. EscapeDots = "dots"
  93. EscapeValues = "values"
  94. )
  95. // MetricNameRE is a regular expression matching valid metric
  96. // names. Note that the IsValidMetricName function performs the same
  97. // check but faster than a match with this regular expression.
  98. var MetricNameRE = regexp.MustCompile(`^[a-zA-Z_:][a-zA-Z0-9_:]*$`)
  99. // A Metric is similar to a LabelSet, but the key difference is that a Metric is
  100. // a singleton and refers to one and only one stream of samples.
  101. type Metric LabelSet
  102. // Equal compares the metrics.
  103. func (m Metric) Equal(o Metric) bool {
  104. return LabelSet(m).Equal(LabelSet(o))
  105. }
  106. // Before compares the metrics' underlying label sets.
  107. func (m Metric) Before(o Metric) bool {
  108. return LabelSet(m).Before(LabelSet(o))
  109. }
  110. // Clone returns a copy of the Metric.
  111. func (m Metric) Clone() Metric {
  112. clone := make(Metric, len(m))
  113. for k, v := range m {
  114. clone[k] = v
  115. }
  116. return clone
  117. }
  118. func (m Metric) String() string {
  119. metricName, hasName := m[MetricNameLabel]
  120. numLabels := len(m) - 1
  121. if !hasName {
  122. numLabels = len(m)
  123. }
  124. labelStrings := make([]string, 0, numLabels)
  125. for label, value := range m {
  126. if label != MetricNameLabel {
  127. labelStrings = append(labelStrings, fmt.Sprintf("%s=%q", label, value))
  128. }
  129. }
  130. switch numLabels {
  131. case 0:
  132. if hasName {
  133. return string(metricName)
  134. }
  135. return "{}"
  136. default:
  137. sort.Strings(labelStrings)
  138. return fmt.Sprintf("%s{%s}", metricName, strings.Join(labelStrings, ", "))
  139. }
  140. }
  141. // Fingerprint returns a Metric's Fingerprint.
  142. func (m Metric) Fingerprint() Fingerprint {
  143. return LabelSet(m).Fingerprint()
  144. }
  145. // FastFingerprint returns a Metric's Fingerprint calculated by a faster hashing
  146. // algorithm, which is, however, more susceptible to hash collisions.
  147. func (m Metric) FastFingerprint() Fingerprint {
  148. return LabelSet(m).FastFingerprint()
  149. }
  150. // IsValidMetricName returns true iff name matches the pattern of MetricNameRE
  151. // for legacy names, and iff it's valid UTF-8 if the UTF8Validation scheme is
  152. // selected.
  153. func IsValidMetricName(n LabelValue) bool {
  154. switch NameValidationScheme {
  155. case LegacyValidation:
  156. return IsValidLegacyMetricName(string(n))
  157. case UTF8Validation:
  158. if len(n) == 0 {
  159. return false
  160. }
  161. return utf8.ValidString(string(n))
  162. default:
  163. panic(fmt.Sprintf("Invalid name validation scheme requested: %d", NameValidationScheme))
  164. }
  165. }
  166. // IsValidLegacyMetricName is similar to IsValidMetricName but always uses the
  167. // legacy validation scheme regardless of the value of NameValidationScheme.
  168. // This function, however, does not use MetricNameRE for the check but a much
  169. // faster hardcoded implementation.
  170. func IsValidLegacyMetricName(n string) bool {
  171. if len(n) == 0 {
  172. return false
  173. }
  174. for i, b := range n {
  175. if !isValidLegacyRune(b, i) {
  176. return false
  177. }
  178. }
  179. return true
  180. }
  181. // EscapeMetricFamily escapes the given metric names and labels with the given
  182. // escaping scheme. Returns a new object that uses the same pointers to fields
  183. // when possible and creates new escaped versions so as not to mutate the
  184. // input.
  185. func EscapeMetricFamily(v *dto.MetricFamily, scheme EscapingScheme) *dto.MetricFamily {
  186. if v == nil {
  187. return nil
  188. }
  189. if scheme == NoEscaping {
  190. return v
  191. }
  192. out := &dto.MetricFamily{
  193. Help: v.Help,
  194. Type: v.Type,
  195. Unit: v.Unit,
  196. }
  197. // If the name is nil, copy as-is, don't try to escape.
  198. if v.Name == nil || IsValidLegacyMetricName(v.GetName()) {
  199. out.Name = v.Name
  200. } else {
  201. out.Name = proto.String(EscapeName(v.GetName(), scheme))
  202. }
  203. for _, m := range v.Metric {
  204. if !metricNeedsEscaping(m) {
  205. out.Metric = append(out.Metric, m)
  206. continue
  207. }
  208. escaped := &dto.Metric{
  209. Gauge: m.Gauge,
  210. Counter: m.Counter,
  211. Summary: m.Summary,
  212. Untyped: m.Untyped,
  213. Histogram: m.Histogram,
  214. TimestampMs: m.TimestampMs,
  215. }
  216. for _, l := range m.Label {
  217. if l.GetName() == MetricNameLabel {
  218. if l.Value == nil || IsValidLegacyMetricName(l.GetValue()) {
  219. escaped.Label = append(escaped.Label, l)
  220. continue
  221. }
  222. escaped.Label = append(escaped.Label, &dto.LabelPair{
  223. Name: proto.String(MetricNameLabel),
  224. Value: proto.String(EscapeName(l.GetValue(), scheme)),
  225. })
  226. continue
  227. }
  228. if l.Name == nil || IsValidLegacyMetricName(l.GetName()) {
  229. escaped.Label = append(escaped.Label, l)
  230. continue
  231. }
  232. escaped.Label = append(escaped.Label, &dto.LabelPair{
  233. Name: proto.String(EscapeName(l.GetName(), scheme)),
  234. Value: l.Value,
  235. })
  236. }
  237. out.Metric = append(out.Metric, escaped)
  238. }
  239. return out
  240. }
  241. func metricNeedsEscaping(m *dto.Metric) bool {
  242. for _, l := range m.Label {
  243. if l.GetName() == MetricNameLabel && !IsValidLegacyMetricName(l.GetValue()) {
  244. return true
  245. }
  246. if !IsValidLegacyMetricName(l.GetName()) {
  247. return true
  248. }
  249. }
  250. return false
  251. }
  252. // EscapeName escapes the incoming name according to the provided escaping
  253. // scheme. Depending on the rules of escaping, this may cause no change in the
  254. // string that is returned. (Especially NoEscaping, which by definition is a
  255. // noop). This function does not do any validation of the name.
  256. func EscapeName(name string, scheme EscapingScheme) string {
  257. if len(name) == 0 {
  258. return name
  259. }
  260. var escaped strings.Builder
  261. switch scheme {
  262. case NoEscaping:
  263. return name
  264. case UnderscoreEscaping:
  265. if IsValidLegacyMetricName(name) {
  266. return name
  267. }
  268. for i, b := range name {
  269. if isValidLegacyRune(b, i) {
  270. escaped.WriteRune(b)
  271. } else {
  272. escaped.WriteRune('_')
  273. }
  274. }
  275. return escaped.String()
  276. case DotsEscaping:
  277. // Do not early return for legacy valid names, we still escape underscores.
  278. for i, b := range name {
  279. if b == '_' {
  280. escaped.WriteString("__")
  281. } else if b == '.' {
  282. escaped.WriteString("_dot_")
  283. } else if isValidLegacyRune(b, i) {
  284. escaped.WriteRune(b)
  285. } else {
  286. escaped.WriteString("__")
  287. }
  288. }
  289. return escaped.String()
  290. case ValueEncodingEscaping:
  291. if IsValidLegacyMetricName(name) {
  292. return name
  293. }
  294. escaped.WriteString("U__")
  295. for i, b := range name {
  296. if b == '_' {
  297. escaped.WriteString("__")
  298. } else if isValidLegacyRune(b, i) {
  299. escaped.WriteRune(b)
  300. } else if !utf8.ValidRune(b) {
  301. escaped.WriteString("_FFFD_")
  302. } else {
  303. escaped.WriteRune('_')
  304. escaped.WriteString(strconv.FormatInt(int64(b), 16))
  305. escaped.WriteRune('_')
  306. }
  307. }
  308. return escaped.String()
  309. default:
  310. panic(fmt.Sprintf("invalid escaping scheme %d", scheme))
  311. }
  312. }
  313. // lower function taken from strconv.atoi
  314. func lower(c byte) byte {
  315. return c | ('x' - 'X')
  316. }
  317. // UnescapeName unescapes the incoming name according to the provided escaping
  318. // scheme if possible. Some schemes are partially or totally non-roundtripable.
  319. // If any error is enountered, returns the original input.
  320. func UnescapeName(name string, scheme EscapingScheme) string {
  321. if len(name) == 0 {
  322. return name
  323. }
  324. switch scheme {
  325. case NoEscaping:
  326. return name
  327. case UnderscoreEscaping:
  328. // It is not possible to unescape from underscore replacement.
  329. return name
  330. case DotsEscaping:
  331. name = strings.ReplaceAll(name, "_dot_", ".")
  332. name = strings.ReplaceAll(name, "__", "_")
  333. return name
  334. case ValueEncodingEscaping:
  335. escapedName, found := strings.CutPrefix(name, "U__")
  336. if !found {
  337. return name
  338. }
  339. var unescaped strings.Builder
  340. TOP:
  341. for i := 0; i < len(escapedName); i++ {
  342. // All non-underscores are treated normally.
  343. if escapedName[i] != '_' {
  344. unescaped.WriteByte(escapedName[i])
  345. continue
  346. }
  347. i++
  348. if i >= len(escapedName) {
  349. return name
  350. }
  351. // A double underscore is a single underscore.
  352. if escapedName[i] == '_' {
  353. unescaped.WriteByte('_')
  354. continue
  355. }
  356. // We think we are in a UTF-8 code, process it.
  357. var utf8Val uint
  358. for j := 0; i < len(escapedName); j++ {
  359. // This is too many characters for a utf8 value based on the MaxRune
  360. // value of '\U0010FFFF'.
  361. if j >= 6 {
  362. return name
  363. }
  364. // Found a closing underscore, convert to a rune, check validity, and append.
  365. if escapedName[i] == '_' {
  366. utf8Rune := rune(utf8Val)
  367. if !utf8.ValidRune(utf8Rune) {
  368. return name
  369. }
  370. unescaped.WriteRune(utf8Rune)
  371. continue TOP
  372. }
  373. r := lower(escapedName[i])
  374. utf8Val *= 16
  375. if r >= '0' && r <= '9' {
  376. utf8Val += uint(r) - '0'
  377. } else if r >= 'a' && r <= 'f' {
  378. utf8Val += uint(r) - 'a' + 10
  379. } else {
  380. return name
  381. }
  382. i++
  383. }
  384. // Didn't find closing underscore, invalid.
  385. return name
  386. }
  387. return unescaped.String()
  388. default:
  389. panic(fmt.Sprintf("invalid escaping scheme %d", scheme))
  390. }
  391. }
  392. func isValidLegacyRune(b rune, i int) bool {
  393. return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == ':' || (b >= '0' && b <= '9' && i > 0)
  394. }
  395. func (e EscapingScheme) String() string {
  396. switch e {
  397. case NoEscaping:
  398. return AllowUTF8
  399. case UnderscoreEscaping:
  400. return EscapeUnderscores
  401. case DotsEscaping:
  402. return EscapeDots
  403. case ValueEncodingEscaping:
  404. return EscapeValues
  405. default:
  406. panic(fmt.Sprintf("unknown format scheme %d", e))
  407. }
  408. }
  409. func ToEscapingScheme(s string) (EscapingScheme, error) {
  410. if s == "" {
  411. return NoEscaping, errors.New("got empty string instead of escaping scheme")
  412. }
  413. switch s {
  414. case AllowUTF8:
  415. return NoEscaping, nil
  416. case EscapeUnderscores:
  417. return UnderscoreEscaping, nil
  418. case EscapeDots:
  419. return DotsEscaping, nil
  420. case EscapeValues:
  421. return ValueEncodingEscaping, nil
  422. default:
  423. return NoEscaping, fmt.Errorf("unknown format scheme %s", s)
  424. }
  425. }