1use anyhow::{anyhow, bail, Result};
5use const_format::formatcp;
6use dyn_clone::DynClone;
7use once_cell::sync::Lazy;
8use regex::Regex;
9use schemars::{
10 gen::SchemaGenerator,
11 schema::{InstanceType, Metadata, ObjectValidation, Schema, SchemaObject, SingleOrVec},
12 JsonSchema,
13};
14use serde::{Deserialize, Serialize};
15use serde_json::json;
16use std::{
17 borrow::Cow,
18 collections::{BTreeMap, BTreeSet, HashMap},
19 module_path,
20};
21
22#[allow(dead_code)]
23const HISTOGRAM_TYPE: &str = "HISTOGRAM";
24#[allow(dead_code)]
25const SUMMARY_TYPE: &str = "SUMMARY";
26
27static METRIC_REGEX_NO_LABEL: Lazy<&Regex> = Lazy::new(|| {
28 static RE: once_cell::sync::OnceCell<Regex> = once_cell::sync::OnceCell::new();
29 RE.get_or_init(|| Regex::new(r"([a-zA-Z_:][a-zA-Z0-9_:]*)\s(-?[\d.]+(?:e-?\d+)?|NaN)").unwrap())
30});
31
32static METRIC_REGEX_WITH_LABEL: Lazy<&Regex> = Lazy::new(|| {
33 static RE: once_cell::sync::OnceCell<Regex> = once_cell::sync::OnceCell::new();
34 RE.get_or_init(|| {
35 Regex::new(r"[a-zA-Z_:][a-zA-Z0-9_:]*\{(.*)\}\s(-?[\d.]+(?:e-?\d+)?|NaN)").unwrap()
36 })
37});
38
39static LABELS_REGEX: Lazy<&Regex> = Lazy::new(|| {
40 static RE: once_cell::sync::OnceCell<Regex> = once_cell::sync::OnceCell::new();
41 RE.get_or_init(|| Regex::new("([a-zA-Z0-9_:]*)=\"([^\"]+)\"").unwrap())
42});
43
44static MULTI_NEWLINE: Lazy<&Regex> = Lazy::new(|| {
45 static RE: once_cell::sync::OnceCell<Regex> = once_cell::sync::OnceCell::new();
46 RE.get_or_init(|| Regex::new(r"\n\n").unwrap())
47});
48
49type Labels = HashMap<String, String>;
50type Value = String;
51
52#[derive(Clone, Serialize, JsonSchema)]
53#[allow(missing_debug_implementations)]
55#[schemars(title = "Metrics data", description = "Prometheus metrics data")]
56pub struct PrometheusData {
57 metrics: Vec<MetricFamily>,
58}
59
60impl PrometheusData {
61 pub(crate) fn from_string(s: &str) -> Result<PrometheusData> {
63 let text = MULTI_NEWLINE.replace_all(s, "\n");
64 let mut metrics = Vec::new();
65 let mut metric_lines = Vec::new();
66 let mut num_comment_lines = 0;
67 for line in text.lines() {
68 if line.starts_with('#') {
69 if num_comment_lines == 2 {
70 metrics.push(MetricFamily::from_raw(&metric_lines)?);
72 metric_lines = vec![line];
73 num_comment_lines = 1;
74 } else {
75 num_comment_lines += 1;
76 metric_lines.push(line);
77 }
78 } else {
79 metric_lines.push(line)
80 }
81 }
82 Ok(PrometheusData { metrics })
83 }
84}
85
86#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
87struct Metric {
88 labels: Option<Labels>,
89 value: Value,
90}
91
92impl JsonSchema for Metric {
93 fn schema_name() -> String {
94 "gauge".to_owned()
95 }
96
97 fn schema_id() -> Cow<'static, str> {
98 Cow::Borrowed(formatcp!("{}::Metric", module_path!()))
99 }
100
101 fn json_schema(gen: &mut SchemaGenerator) -> Schema {
102 let type_schema = SchemaObject {
103 instance_type: Some(SingleOrVec::Single(InstanceType::String.into())),
104 const_value: Some(json!("metric")),
105 ..Default::default()
106 };
107
108 let schema = SchemaObject {
109 instance_type: Some(SingleOrVec::Single(InstanceType::Object.into())),
110 metadata: Some(Box::new(Metadata {
111 title: Some("Gauge data".to_string()),
112 description: Some("A gauge metric".to_string()),
113 ..Default::default()
114 })),
115 object: Some(Box::new(ObjectValidation {
116 properties: BTreeMap::from([
117 ("type".to_string(), Schema::Object(type_schema)),
118 ("labels".to_string(), <Option<Labels>>::json_schema(gen)),
119 ("value".to_string(), <String>::json_schema(gen)),
120 ]),
121 required: BTreeSet::from(["type".to_string(), "value".to_string()]),
122 ..Default::default()
123 })),
124 ..Default::default()
125 };
126
127 schema.into()
128 }
129}
130
131#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
132struct Summary {
133 labels: Option<Labels>,
134 quantiles: Labels,
135 count: Value,
136 sum: Value,
137}
138
139#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
140struct Histogram {
141 labels: Option<HashMap<String, String>>,
142 buckets: Labels,
143 count: Value,
144 sum: Value,
145}
146
147#[derive(Debug, Clone, PartialEq, Serialize, JsonSchema)]
148#[serde(rename_all = "lowercase")]
149#[schemars(title = "Metric type")]
150enum MetricType {
151 Gauge,
152 Histogram,
153 Summary,
154}
155
156#[derive(Clone, Serialize)]
157struct MetricFamily {
158 metric_type: MetricType,
159 metric_name: String,
160 help: String,
161 data: Vec<Box<dyn MetricLike>>,
162}
163
164impl JsonSchema for MetricFamily {
165 fn schema_name() -> String {
166 "metric".to_owned()
167 }
168
169 fn schema_id() -> Cow<'static, str> {
170 Cow::Borrowed(formatcp!("{}::MetricFamily", module_path!()))
171 }
172
173 fn json_schema(gen: &mut SchemaGenerator) -> Schema {
174 struct DataConditional {
175 if_schema: Schema,
176 then_schema: Schema,
177 else_schema: Schema,
178 }
179
180 fn data_conditional(gen: &mut SchemaGenerator) -> DataConditional {
181 let if_schema = SchemaObject {
182 instance_type: None,
183 object: Some(Box::new(ObjectValidation {
184 properties: BTreeMap::from([(
185 "metric_type".to_owned(),
186 Schema::Object(SchemaObject {
187 instance_type: Some(SingleOrVec::Single(InstanceType::String.into())),
188 const_value: Some(json!("gauge")),
189 ..Default::default()
190 }),
191 )]),
192 ..Default::default()
193 })),
194 ..Default::default()
195 };
196
197 let then_schema = SchemaObject {
198 instance_type: None,
199 object: Some(Box::new(ObjectValidation {
200 properties: BTreeMap::from([("data".to_string(), <Metric>::json_schema(gen))]),
201 ..Default::default()
202 })),
203 ..Default::default()
204 };
205
206 DataConditional {
207 if_schema: Schema::Object(if_schema),
208 then_schema: Schema::Object(then_schema),
209 else_schema: Schema::Bool(false),
210 }
211 }
212
213 let mut schema = SchemaObject {
214 instance_type: Some(SingleOrVec::Single(InstanceType::Object.into())),
215 metadata: Some(Box::new(Metadata {
216 title: Some("Metric family".to_string()),
217 description: Some("A prometheus gauge, summary, or histogram metric".to_string()),
218 ..Default::default()
219 })),
220 object: Some(Box::new(ObjectValidation {
221 properties: BTreeMap::from([
222 ("metric_type".to_string(), <MetricType>::json_schema(gen)),
223 ("metric_name".to_string(), <String>::json_schema(gen)),
224 ("help".to_string(), <String>::json_schema(gen)),
225 ]),
226 required: BTreeSet::from([
227 "metric_type".to_string(),
228 "metric_name".to_string(),
229 "help".to_string(),
230 "data".to_string(),
231 ]),
232 ..Default::default()
233 })),
234 ..Default::default()
235 };
236
237 let data = data_conditional(gen);
238 schema.subschemas().if_schema = Some(Box::new(data.if_schema));
239 schema.subschemas().then_schema = Some(Box::new(data.then_schema));
240 schema.subschemas().else_schema = Some(Box::new(data.else_schema));
241
242 schema.into()
243 }
244}
245
246#[typetag::serde(tag = "type")]
247trait MetricLike: DynClone {
248 fn parse_from_string(s: &str) -> Result<(Value, Option<Labels>)>
249 where
250 Self: Sized,
251 {
252 if let Some(caps) = METRIC_REGEX_NO_LABEL.captures(s) {
253 Ok((caps[2].to_string(), None))
254 } else if let Some(caps) = METRIC_REGEX_WITH_LABEL.captures(s) {
255 let value = caps[2].to_string();
256 let mut labels: HashMap<String, String> = HashMap::new();
257 for cap in LABELS_REGEX.captures_iter(&caps[1]) {
258 labels.insert(cap[1].to_string(), cap[2].to_string());
259 }
260 Ok((value, Some(labels)))
261 } else {
262 Err(anyhow!("invalid format {}", s))
263 }
264 }
265
266 fn metric_type() -> String
267 where
268 Self: Sized;
269}
270
271dyn_clone::clone_trait_object!(MetricLike);
272
273impl Metric {
274 fn from_string(s: &str) -> Result<Metric> {
275 let (value, labels) = Self::parse_from_string(s)?;
276 Ok(Metric { labels, value })
277 }
278}
279
280#[typetag::serde(name = "metric")]
281impl MetricLike for Metric {
282 fn metric_type() -> String {
283 String::from("DEFAULT")
284 }
285}
286
287impl Summary {
288 fn from_raw(metric_name: &str, raw_lines: &Vec<&str>) -> Result<Summary> {
289 let mut sum = String::from("");
290 let mut count = String::from("");
291 let sum_prefix = format!("{}_sum", metric_name);
292 let count_prefix = format!("{}_count", metric_name);
293 let mut labels = HashMap::new();
294 let mut quantiles = HashMap::new();
295 for raw_line in raw_lines {
296 if raw_line.starts_with(&sum_prefix) {
297 sum = Summary::parse_from_string(raw_line)?.0;
298 } else if raw_line.starts_with(&count_prefix) {
299 count = Summary::parse_from_string(raw_line)?.0;
300 } else if let Some(caps) = METRIC_REGEX_WITH_LABEL.captures(raw_line) {
301 for cap in LABELS_REGEX.captures_iter(&caps[1]) {
302 let key = &cap[1];
303 let value = &cap[2];
304 match key {
305 "quantile" => quantiles.insert(key.to_string(), value.to_string()),
306 _ => labels.insert(key.to_string(), value.to_string()),
307 };
308 }
309 } else {
310 bail!("invalid format {}", raw_line);
311 }
312 }
313
314 Ok(Summary {
315 sum,
316 count,
317 labels: Some(labels),
318 quantiles,
319 })
320 }
321}
322
323#[typetag::serde]
324impl MetricLike for Summary {
325 fn metric_type() -> String {
326 String::from(SUMMARY_TYPE)
327 }
328}
329
330impl Histogram {
331 fn from_raw(metric_name: &str, raw_lines: &Vec<&str>) -> Result<Histogram> {
332 let mut sum = String::from("");
333 let mut count = String::from("");
334 let sum_prefix = format!("{}_sum", metric_name);
335 let count_prefix = format!("{}_count", metric_name);
336 let mut labels: HashMap<String, String> = HashMap::new();
337 let mut buckets: HashMap<String, String> = HashMap::new();
338 for raw_line in raw_lines {
339 if raw_line.starts_with(&sum_prefix) {
340 sum = Summary::parse_from_string(raw_line)?.0;
341 } else if raw_line.starts_with(&count_prefix) {
342 count = Summary::parse_from_string(raw_line)?.0;
343 } else if let Some(caps) = METRIC_REGEX_WITH_LABEL.captures(raw_line) {
344 for cap in LABELS_REGEX.captures_iter(&caps[1]) {
345 let key = &cap[1];
346 let value = &cap[2];
347 match key {
348 "le" => buckets.insert(value.to_string(), caps[2].to_string()),
349 _ => labels.insert(key.to_string(), value.to_string()),
350 };
351 }
352 } else {
353 bail!("invalid format {}", raw_line)
354 }
355 }
356
357 Ok(Histogram {
358 sum,
359 count,
360 labels: Some(labels),
361 buckets,
362 })
363 }
364}
365
366#[typetag::serde]
367impl MetricLike for Histogram {
368 fn metric_type() -> String {
369 String::from(HISTOGRAM_TYPE)
370 }
371}
372
373impl MetricFamily {
374 fn from_raw(raw: &[&str]) -> Result<MetricFamily> {
375 let mut raw_iter = raw.iter();
376 let help = MetricFamily::metric_help_fron_raw(
377 raw_iter
378 .next()
379 .ok_or(anyhow!("invalid metric help{}", raw.join("\n")))?,
380 );
381 let (metric_name, metric_type) = MetricFamily::metric_name_and_type(
382 raw_iter
383 .next()
384 .ok_or(anyhow!("invalid metric name/type {}", raw.join("\n")))?,
385 )?;
386 let mut data: Vec<Box<dyn MetricLike>> = Vec::new();
387 match metric_type {
388 MetricType::Gauge => {
389 for raw_line in raw_iter {
390 data.push(Box::new(Metric::from_string(raw_line)?))
391 }
392 }
393 MetricType::Histogram => {
394 let count_prefix = format!("{}_count", metric_name);
395 let mut histogram_lines: Vec<&str> = Vec::new();
396 for raw_line in raw_iter {
397 histogram_lines.push(raw_line);
398 if raw_line.starts_with(&count_prefix) {
399 data.push(Box::new(Histogram::from_raw(
400 &metric_name,
401 &histogram_lines,
402 )?));
403 histogram_lines = Vec::new();
404 }
405 }
406 }
407 MetricType::Summary => {
408 let count_prefix = format!("{}_count", metric_name);
409 let mut summary_lines: Vec<&str> = Vec::new();
410 for raw_line in raw_iter {
411 summary_lines.push(raw_line);
412 if raw_line.starts_with(&count_prefix) {
413 data.push(Box::new(Summary::from_raw(&metric_name, &summary_lines)?));
414 summary_lines = Vec::new();
415 }
416 }
417 }
418 }
419 Ok(MetricFamily {
420 metric_type,
421 metric_name,
422 help,
423 data,
424 })
425 }
426
427 fn metric_name_and_type(type_line: &str) -> Result<(String, MetricType)> {
428 let tags: Vec<&str> = type_line.split_whitespace().collect();
429 let (name, type_raw) = (tags[2], tags[3]);
430 let metric_type = match type_raw {
431 "gauge" => MetricType::Gauge,
432 "counter" => MetricType::Gauge,
433 "histogram" => MetricType::Histogram,
434 "summary" => MetricType::Summary,
435 _ => bail!("invalid metric type {}", type_raw),
436 };
437
438 Ok((name.to_string(), metric_type))
439 }
440
441 fn metric_help_fron_raw(help_line: &str) -> String {
442 let tags: Vec<&str> = help_line.split_whitespace().collect();
443 tags[3..].join(" ").to_string()
444 }
445}
446
447#[cfg(test)]
448mod test {
449 use super::*;
450 use maplit::hashmap;
451
452 #[test]
453 fn parse_metric() {
454 assert_eq!(
455 Metric {
456 labels: None,
457 value: String::from("205632")
458 },
459 Metric::from_string("go_memstats_mspan_inuse_bytes 205632").unwrap()
460 );
461 assert_eq!(
462 Metric {
463 labels: Some(hashmap!{
464 "dialer_name".to_string() => "default".to_string(),
465 "reason".to_string() => "unknown".to_string(),
466 }),
467 value: String::from("0")
468 },
469 Metric::from_string("net_conntrack_dialer_conn_failed_total{dialer_name=\"default\",reason=\"unknown\"} 0").unwrap()
470 )
471 }
472
473 #[test]
474 fn parse_metric_raw_data() {
475 let raw_data = "# HELP go_goroutines Number of goroutines that currently exist.
476# TYPE go_goroutines gauge
477go_goroutines 31
478# HELP go_info Information about the Go environment.
479# TYPE go_info gauge
480go_info{version=\"go1.15.5\"} 1";
481 let prom_data = PrometheusData::from_string(raw_data).unwrap();
482 assert_eq!(MetricType::Gauge, prom_data.metrics[0].metric_type)
483 }
484
485 #[test]
486 fn parse_metric_summary() {
487 let raw_data =
488 "prometheus_engine_query_duration_seconds{slice=\"inner_eval\",quantile=\"0.5\"} NaN
489prometheus_engine_query_duration_seconds{slice=\"inner_eval\",quantile=\"0.9\"} NaN
490prometheus_engine_query_duration_seconds{slice=\"inner_eval\",quantile=\"0.99\"} NaN
491prometheus_engine_query_duration_seconds_sum{slice=\"inner_eval\"} 12
492prometheus_engine_query_duration_seconds_count{slice=\"inner_eval\"} 0";
493 let summary = Summary::from_raw(
494 "prometheus_engine_query_duration_seconds",
495 &raw_data.lines().collect(),
496 )
497 .unwrap();
498 assert_eq!(summary.sum, "12".to_string());
499 assert_eq!(
500 summary.labels,
501 Some(hashmap! {"slice".to_string() => "inner_eval".to_string()})
502 );
503 }
504
505 #[test]
506 fn parse_metric_histogram() {
507 let raw_data = r#"prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="0.1"} 10871
508prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="0.2"} 10871
509prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="0.4"} 10871
510prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="1"} 10871
511prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="3"} 10871
512prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="8"} 10871
513prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="20"} 10871
514prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="60"} 10871
515prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="120"} 10871
516prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="+Inf"} 10871
517prometheus_http_request_duration_seconds_sum{handler="/metrics"} 67.48398663499978
518prometheus_http_request_duration_seconds_count{handler="/metrics"} 10871"#;
519 let histogram = Histogram::from_raw(
520 "prometheus_http_request_duration_seconds",
521 &raw_data.lines().collect(),
522 )
523 .unwrap();
524 assert_eq!(histogram.sum, "67.48398663499978");
525 assert_eq!(
526 histogram.labels,
527 Some(hashmap! {"handler".to_string() => "/metrics".to_string()})
528 );
529 }
530
531 #[test]
532 fn parse_metric_collection_to_json() {
533 let raw_data = r#"# HELP homestar_process_disk_total_read_bytes Total bytes read from disk.
534# TYPE homestar_process_disk_total_read_bytes gauge
535homestar_process_disk_total_read_bytes 45969408
536
537# HELP homestar_process_virtual_memory_bytes The virtual memory size in bytes.
538# TYPE homestar_process_virtual_memory_bytes gauge
539homestar_process_virtual_memory_bytes 418935930880
540
541# HELP homestar_network_received_bytes The bytes received since last refresh.
542# TYPE homestar_network_received_bytes gauge
543homestar_network_received_bytes 0
544
545# HELP homestar_system_available_memory_bytes The amount of available memory.
546# TYPE homestar_system_available_memory_bytes gauge
547homestar_system_available_memory_bytes 0
548
549# HELP homestar_system_disk_available_space_bytes The total amount of available disk space.
550# TYPE homestar_system_disk_available_space_bytes gauge
551homestar_system_disk_available_space_bytes 0
552
553# HELP homestar_system_load_average_percentage The load average over a five minute interval.
554# TYPE homestar_system_load_average_percentage gauge
555homestar_system_load_average_percentage 6.26611328125"#;
556
557 let prom_data = PrometheusData::from_string(raw_data).unwrap();
558 let json_string = serde_json::to_string(&prom_data).unwrap();
559 let root: serde_json::Value = serde_json::from_str(&json_string).unwrap();
560
561 let check = root
562 .get("metrics")
563 .and_then(|v| v.get(0))
564 .and_then(|v| v.get("data"))
565 .and_then(|v| v.get(0))
566 .and_then(|v| v.get("value"))
567 .unwrap();
568
569 assert_eq!(check, &serde_json::Value::String("45969408".to_string()));
570 }
571}