metric.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. // Copyright 2013 The Prometheus Authors
  2. // Licensed under the Apache License, Version 2.0 (the "License");
  3. // you may not use this file except in compliance with the License.
  4. // You may obtain a copy of the License at
  5. //
  6. // http://www.apache.org/licenses/LICENSE-2.0
  7. //
  8. // Unless required by applicable law or agreed to in writing, software
  9. // distributed under the License is distributed on an "AS IS" BASIS,
  10. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. package model
  14. import (
  15. "errors"
  16. "fmt"
  17. "regexp"
  18. "sort"
  19. "strconv"
  20. "strings"
  21. "unicode/utf8"
  22. dto "github.com/prometheus/client_model/go"
  23. "google.golang.org/protobuf/proto"
  24. )
  25. var (
  26. // NameValidationScheme determines the method of name validation to be used by
  27. // all calls to IsValidMetricName() and LabelName IsValid(). Setting UTF-8
  28. // mode in isolation from other components that don't support UTF-8 may result
  29. // in bugs or other undefined behavior. This value can be set to
  30. // LegacyValidation during startup if a binary is not UTF-8-aware binaries. To
  31. // avoid need for locking, this value should be set once, ideally in an
  32. // init(), before multiple goroutines are started.
  33. NameValidationScheme = UTF8Validation
  34. // NameEscapingScheme defines the default way that names will be escaped when
  35. // presented to systems that do not support UTF-8 names. If the Content-Type
  36. // "escaping" term is specified, that will override this value.
  37. // NameEscapingScheme should not be set to the NoEscaping value. That string
  38. // is used in content negotiation to indicate that a system supports UTF-8 and
  39. // has that feature enabled.
  40. NameEscapingScheme = UnderscoreEscaping
  41. )
  42. // ValidationScheme is a Go enum for determining how metric and label names will
  43. // be validated by this library.
  44. type ValidationScheme int
  45. const (
  46. // LegacyValidation is a setting that requirets that metric and label names
  47. // conform to the original Prometheus character requirements described by
  48. // MetricNameRE and LabelNameRE.
  49. LegacyValidation ValidationScheme = iota
  50. // UTF8Validation only requires that metric and label names be valid UTF-8
  51. // strings.
  52. UTF8Validation
  53. )
  54. type EscapingScheme int
  55. const (
  56. // NoEscaping indicates that a name will not be escaped. Unescaped names that
  57. // do not conform to the legacy validity check will use a new exposition
  58. // format syntax that will be officially standardized in future versions.
  59. NoEscaping EscapingScheme = iota
  60. // UnderscoreEscaping replaces all legacy-invalid characters with underscores.
  61. UnderscoreEscaping
  62. // DotsEscaping is similar to UnderscoreEscaping, except that dots are
  63. // converted to `_dot_` and pre-existing underscores are converted to `__`.
  64. DotsEscaping
  65. // ValueEncodingEscaping prepends the name with `U__` and replaces all invalid
  66. // characters with the unicode value, surrounded by underscores. Single
  67. // underscores are replaced with double underscores.
  68. ValueEncodingEscaping
  69. )
  70. const (
  71. // EscapingKey is the key in an Accept or Content-Type header that defines how
  72. // metric and label names that do not conform to the legacy character
  73. // requirements should be escaped when being scraped by a legacy prometheus
  74. // system. If a system does not explicitly pass an escaping parameter in the
  75. // Accept header, the default NameEscapingScheme will be used.
  76. EscapingKey = "escaping"
  77. // Possible values for Escaping Key:
  78. AllowUTF8 = "allow-utf-8" // No escaping required.
  79. EscapeUnderscores = "underscores"
  80. EscapeDots = "dots"
  81. EscapeValues = "values"
  82. )
  83. // MetricNameRE is a regular expression matching valid metric
  84. // names. Note that the IsValidMetricName function performs the same
  85. // check but faster than a match with this regular expression.
  86. var MetricNameRE = regexp.MustCompile(`^[a-zA-Z_:][a-zA-Z0-9_:]*$`)
  87. // A Metric is similar to a LabelSet, but the key difference is that a Metric is
  88. // a singleton and refers to one and only one stream of samples.
  89. type Metric LabelSet
  90. // Equal compares the metrics.
  91. func (m Metric) Equal(o Metric) bool {
  92. return LabelSet(m).Equal(LabelSet(o))
  93. }
  94. // Before compares the metrics' underlying label sets.
  95. func (m Metric) Before(o Metric) bool {
  96. return LabelSet(m).Before(LabelSet(o))
  97. }
  98. // Clone returns a copy of the Metric.
  99. func (m Metric) Clone() Metric {
  100. clone := make(Metric, len(m))
  101. for k, v := range m {
  102. clone[k] = v
  103. }
  104. return clone
  105. }
  106. func (m Metric) String() string {
  107. metricName, hasName := m[MetricNameLabel]
  108. numLabels := len(m) - 1
  109. if !hasName {
  110. numLabels = len(m)
  111. }
  112. labelStrings := make([]string, 0, numLabels)
  113. for label, value := range m {
  114. if label != MetricNameLabel {
  115. labelStrings = append(labelStrings, fmt.Sprintf("%s=%q", label, value))
  116. }
  117. }
  118. switch numLabels {
  119. case 0:
  120. if hasName {
  121. return string(metricName)
  122. }
  123. return "{}"
  124. default:
  125. sort.Strings(labelStrings)
  126. return fmt.Sprintf("%s{%s}", metricName, strings.Join(labelStrings, ", "))
  127. }
  128. }
  129. // Fingerprint returns a Metric's Fingerprint.
  130. func (m Metric) Fingerprint() Fingerprint {
  131. return LabelSet(m).Fingerprint()
  132. }
  133. // FastFingerprint returns a Metric's Fingerprint calculated by a faster hashing
  134. // algorithm, which is, however, more susceptible to hash collisions.
  135. func (m Metric) FastFingerprint() Fingerprint {
  136. return LabelSet(m).FastFingerprint()
  137. }
  138. // IsValidMetricName returns true iff name matches the pattern of MetricNameRE
  139. // for legacy names, and iff it's valid UTF-8 if the UTF8Validation scheme is
  140. // selected.
  141. func IsValidMetricName(n LabelValue) bool {
  142. switch NameValidationScheme {
  143. case LegacyValidation:
  144. return IsValidLegacyMetricName(string(n))
  145. case UTF8Validation:
  146. if len(n) == 0 {
  147. return false
  148. }
  149. return utf8.ValidString(string(n))
  150. default:
  151. panic(fmt.Sprintf("Invalid name validation scheme requested: %d", NameValidationScheme))
  152. }
  153. }
  154. // IsValidLegacyMetricName is similar to IsValidMetricName but always uses the
  155. // legacy validation scheme regardless of the value of NameValidationScheme.
  156. // This function, however, does not use MetricNameRE for the check but a much
  157. // faster hardcoded implementation.
  158. func IsValidLegacyMetricName(n string) bool {
  159. if len(n) == 0 {
  160. return false
  161. }
  162. for i, b := range n {
  163. if !isValidLegacyRune(b, i) {
  164. return false
  165. }
  166. }
  167. return true
  168. }
  169. // EscapeMetricFamily escapes the given metric names and labels with the given
  170. // escaping scheme. Returns a new object that uses the same pointers to fields
  171. // when possible and creates new escaped versions so as not to mutate the
  172. // input.
  173. func EscapeMetricFamily(v *dto.MetricFamily, scheme EscapingScheme) *dto.MetricFamily {
  174. if v == nil {
  175. return nil
  176. }
  177. if scheme == NoEscaping {
  178. return v
  179. }
  180. out := &dto.MetricFamily{
  181. Help: v.Help,
  182. Type: v.Type,
  183. Unit: v.Unit,
  184. }
  185. // If the name is nil, copy as-is, don't try to escape.
  186. if v.Name == nil || IsValidLegacyMetricName(v.GetName()) {
  187. out.Name = v.Name
  188. } else {
  189. out.Name = proto.String(EscapeName(v.GetName(), scheme))
  190. }
  191. for _, m := range v.Metric {
  192. if !metricNeedsEscaping(m) {
  193. out.Metric = append(out.Metric, m)
  194. continue
  195. }
  196. escaped := &dto.Metric{
  197. Gauge: m.Gauge,
  198. Counter: m.Counter,
  199. Summary: m.Summary,
  200. Untyped: m.Untyped,
  201. Histogram: m.Histogram,
  202. TimestampMs: m.TimestampMs,
  203. }
  204. for _, l := range m.Label {
  205. if l.GetName() == MetricNameLabel {
  206. if l.Value == nil || IsValidLegacyMetricName(l.GetValue()) {
  207. escaped.Label = append(escaped.Label, l)
  208. continue
  209. }
  210. escaped.Label = append(escaped.Label, &dto.LabelPair{
  211. Name: proto.String(MetricNameLabel),
  212. Value: proto.String(EscapeName(l.GetValue(), scheme)),
  213. })
  214. continue
  215. }
  216. if l.Name == nil || IsValidLegacyMetricName(l.GetName()) {
  217. escaped.Label = append(escaped.Label, l)
  218. continue
  219. }
  220. escaped.Label = append(escaped.Label, &dto.LabelPair{
  221. Name: proto.String(EscapeName(l.GetName(), scheme)),
  222. Value: l.Value,
  223. })
  224. }
  225. out.Metric = append(out.Metric, escaped)
  226. }
  227. return out
  228. }
  229. func metricNeedsEscaping(m *dto.Metric) bool {
  230. for _, l := range m.Label {
  231. if l.GetName() == MetricNameLabel && !IsValidLegacyMetricName(l.GetValue()) {
  232. return true
  233. }
  234. if !IsValidLegacyMetricName(l.GetName()) {
  235. return true
  236. }
  237. }
  238. return false
  239. }
  240. // EscapeName escapes the incoming name according to the provided escaping
  241. // scheme. Depending on the rules of escaping, this may cause no change in the
  242. // string that is returned. (Especially NoEscaping, which by definition is a
  243. // noop). This function does not do any validation of the name.
  244. func EscapeName(name string, scheme EscapingScheme) string {
  245. if len(name) == 0 {
  246. return name
  247. }
  248. var escaped strings.Builder
  249. switch scheme {
  250. case NoEscaping:
  251. return name
  252. case UnderscoreEscaping:
  253. if IsValidLegacyMetricName(name) {
  254. return name
  255. }
  256. for i, b := range name {
  257. if isValidLegacyRune(b, i) {
  258. escaped.WriteRune(b)
  259. } else {
  260. escaped.WriteRune('_')
  261. }
  262. }
  263. return escaped.String()
  264. case DotsEscaping:
  265. // Do not early return for legacy valid names, we still escape underscores.
  266. for i, b := range name {
  267. if b == '_' {
  268. escaped.WriteString("__")
  269. } else if b == '.' {
  270. escaped.WriteString("_dot_")
  271. } else if isValidLegacyRune(b, i) {
  272. escaped.WriteRune(b)
  273. } else {
  274. escaped.WriteString("__")
  275. }
  276. }
  277. return escaped.String()
  278. case ValueEncodingEscaping:
  279. if IsValidLegacyMetricName(name) {
  280. return name
  281. }
  282. escaped.WriteString("U__")
  283. for i, b := range name {
  284. if b == '_' {
  285. escaped.WriteString("__")
  286. } else if isValidLegacyRune(b, i) {
  287. escaped.WriteRune(b)
  288. } else if !utf8.ValidRune(b) {
  289. escaped.WriteString("_FFFD_")
  290. } else {
  291. escaped.WriteRune('_')
  292. escaped.WriteString(strconv.FormatInt(int64(b), 16))
  293. escaped.WriteRune('_')
  294. }
  295. }
  296. return escaped.String()
  297. default:
  298. panic(fmt.Sprintf("invalid escaping scheme %d", scheme))
  299. }
  300. }
  301. // lower function taken from strconv.atoi
  302. func lower(c byte) byte {
  303. return c | ('x' - 'X')
  304. }
  305. // UnescapeName unescapes the incoming name according to the provided escaping
  306. // scheme if possible. Some schemes are partially or totally non-roundtripable.
  307. // If any error is enountered, returns the original input.
  308. func UnescapeName(name string, scheme EscapingScheme) string {
  309. if len(name) == 0 {
  310. return name
  311. }
  312. switch scheme {
  313. case NoEscaping:
  314. return name
  315. case UnderscoreEscaping:
  316. // It is not possible to unescape from underscore replacement.
  317. return name
  318. case DotsEscaping:
  319. name = strings.ReplaceAll(name, "_dot_", ".")
  320. name = strings.ReplaceAll(name, "__", "_")
  321. return name
  322. case ValueEncodingEscaping:
  323. escapedName, found := strings.CutPrefix(name, "U__")
  324. if !found {
  325. return name
  326. }
  327. var unescaped strings.Builder
  328. TOP:
  329. for i := 0; i < len(escapedName); i++ {
  330. // All non-underscores are treated normally.
  331. if escapedName[i] != '_' {
  332. unescaped.WriteByte(escapedName[i])
  333. continue
  334. }
  335. i++
  336. if i >= len(escapedName) {
  337. return name
  338. }
  339. // A double underscore is a single underscore.
  340. if escapedName[i] == '_' {
  341. unescaped.WriteByte('_')
  342. continue
  343. }
  344. // We think we are in a UTF-8 code, process it.
  345. var utf8Val uint
  346. for j := 0; i < len(escapedName); j++ {
  347. // This is too many characters for a utf8 value based on the MaxRune
  348. // value of '\U0010FFFF'.
  349. if j >= 6 {
  350. return name
  351. }
  352. // Found a closing underscore, convert to a rune, check validity, and append.
  353. if escapedName[i] == '_' {
  354. utf8Rune := rune(utf8Val)
  355. if !utf8.ValidRune(utf8Rune) {
  356. return name
  357. }
  358. unescaped.WriteRune(utf8Rune)
  359. continue TOP
  360. }
  361. r := lower(escapedName[i])
  362. utf8Val *= 16
  363. if r >= '0' && r <= '9' {
  364. utf8Val += uint(r) - '0'
  365. } else if r >= 'a' && r <= 'f' {
  366. utf8Val += uint(r) - 'a' + 10
  367. } else {
  368. return name
  369. }
  370. i++
  371. }
  372. // Didn't find closing underscore, invalid.
  373. return name
  374. }
  375. return unescaped.String()
  376. default:
  377. panic(fmt.Sprintf("invalid escaping scheme %d", scheme))
  378. }
  379. }
  380. func isValidLegacyRune(b rune, i int) bool {
  381. return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == ':' || (b >= '0' && b <= '9' && i > 0)
  382. }
  383. func (e EscapingScheme) String() string {
  384. switch e {
  385. case NoEscaping:
  386. return AllowUTF8
  387. case UnderscoreEscaping:
  388. return EscapeUnderscores
  389. case DotsEscaping:
  390. return EscapeDots
  391. case ValueEncodingEscaping:
  392. return EscapeValues
  393. default:
  394. panic(fmt.Sprintf("unknown format scheme %d", e))
  395. }
  396. }
  397. func ToEscapingScheme(s string) (EscapingScheme, error) {
  398. if s == "" {
  399. return NoEscaping, errors.New("got empty string instead of escaping scheme")
  400. }
  401. switch s {
  402. case AllowUTF8:
  403. return NoEscaping, nil
  404. case EscapeUnderscores:
  405. return UnderscoreEscaping, nil
  406. case EscapeDots:
  407. return DotsEscaping, nil
  408. case EscapeValues:
  409. return ValueEncodingEscaping, nil
  410. default:
  411. return NoEscaping, fmt.Errorf("unknown format scheme %s", s)
  412. }
  413. }