homestar_runtime/network/webserver/
prom.rs

1/// A module to parse prometheus metrics data into json
2///
3/// Influenced by https://crates.io/crates/prom2jsonrs/0.1.0.
4use anyhow::{anyhow, bail, Result};
5use const_format::formatcp;
6use dyn_clone::DynClone;
7use once_cell::sync::Lazy;
8use regex::Regex;
9use schemars::{
10    gen::SchemaGenerator,
11    schema::{InstanceType, Metadata, ObjectValidation, Schema, SchemaObject, SingleOrVec},
12    JsonSchema,
13};
14use serde::{Deserialize, Serialize};
15use serde_json::json;
16use std::{
17    borrow::Cow,
18    collections::{BTreeMap, BTreeSet, HashMap},
19    module_path,
20};
21
22#[allow(dead_code)]
23const HISTOGRAM_TYPE: &str = "HISTOGRAM";
24#[allow(dead_code)]
25const SUMMARY_TYPE: &str = "SUMMARY";
26
27static METRIC_REGEX_NO_LABEL: Lazy<&Regex> = Lazy::new(|| {
28    static RE: once_cell::sync::OnceCell<Regex> = once_cell::sync::OnceCell::new();
29    RE.get_or_init(|| Regex::new(r"([a-zA-Z_:][a-zA-Z0-9_:]*)\s(-?[\d.]+(?:e-?\d+)?|NaN)").unwrap())
30});
31
32static METRIC_REGEX_WITH_LABEL: Lazy<&Regex> = Lazy::new(|| {
33    static RE: once_cell::sync::OnceCell<Regex> = once_cell::sync::OnceCell::new();
34    RE.get_or_init(|| {
35        Regex::new(r"[a-zA-Z_:][a-zA-Z0-9_:]*\{(.*)\}\s(-?[\d.]+(?:e-?\d+)?|NaN)").unwrap()
36    })
37});
38
39static LABELS_REGEX: Lazy<&Regex> = Lazy::new(|| {
40    static RE: once_cell::sync::OnceCell<Regex> = once_cell::sync::OnceCell::new();
41    RE.get_or_init(|| Regex::new("([a-zA-Z0-9_:]*)=\"([^\"]+)\"").unwrap())
42});
43
44static MULTI_NEWLINE: Lazy<&Regex> = Lazy::new(|| {
45    static RE: once_cell::sync::OnceCell<Regex> = once_cell::sync::OnceCell::new();
46    RE.get_or_init(|| Regex::new(r"\n\n").unwrap())
47});
48
49type Labels = HashMap<String, String>;
50type Value = String;
51
52#[derive(Clone, Serialize, JsonSchema)]
53/// A parsed representation of the prometheus metrics data
54#[allow(missing_debug_implementations)]
55#[schemars(title = "Metrics data", description = "Prometheus metrics data")]
56pub struct PrometheusData {
57    metrics: Vec<MetricFamily>,
58}
59
60impl PrometheusData {
61    /// Parse promethues metric data from string
62    pub(crate) fn from_string(s: &str) -> Result<PrometheusData> {
63        let text = MULTI_NEWLINE.replace_all(s, "\n");
64        let mut metrics = Vec::new();
65        let mut metric_lines = Vec::new();
66        let mut num_comment_lines = 0;
67        for line in text.lines() {
68            if line.starts_with('#') {
69                if num_comment_lines == 2 {
70                    // One set complete
71                    metrics.push(MetricFamily::from_raw(&metric_lines)?);
72                    metric_lines = vec![line];
73                    num_comment_lines = 1;
74                } else {
75                    num_comment_lines += 1;
76                    metric_lines.push(line);
77                }
78            } else {
79                metric_lines.push(line)
80            }
81        }
82        Ok(PrometheusData { metrics })
83    }
84}
85
86#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
87struct Metric {
88    labels: Option<Labels>,
89    value: Value,
90}
91
92impl JsonSchema for Metric {
93    fn schema_name() -> String {
94        "gauge".to_owned()
95    }
96
97    fn schema_id() -> Cow<'static, str> {
98        Cow::Borrowed(formatcp!("{}::Metric", module_path!()))
99    }
100
101    fn json_schema(gen: &mut SchemaGenerator) -> Schema {
102        let type_schema = SchemaObject {
103            instance_type: Some(SingleOrVec::Single(InstanceType::String.into())),
104            const_value: Some(json!("metric")),
105            ..Default::default()
106        };
107
108        let schema = SchemaObject {
109            instance_type: Some(SingleOrVec::Single(InstanceType::Object.into())),
110            metadata: Some(Box::new(Metadata {
111                title: Some("Gauge data".to_string()),
112                description: Some("A gauge metric".to_string()),
113                ..Default::default()
114            })),
115            object: Some(Box::new(ObjectValidation {
116                properties: BTreeMap::from([
117                    ("type".to_string(), Schema::Object(type_schema)),
118                    ("labels".to_string(), <Option<Labels>>::json_schema(gen)),
119                    ("value".to_string(), <String>::json_schema(gen)),
120                ]),
121                required: BTreeSet::from(["type".to_string(), "value".to_string()]),
122                ..Default::default()
123            })),
124            ..Default::default()
125        };
126
127        schema.into()
128    }
129}
130
131#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
132struct Summary {
133    labels: Option<Labels>,
134    quantiles: Labels,
135    count: Value,
136    sum: Value,
137}
138
139#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
140struct Histogram {
141    labels: Option<HashMap<String, String>>,
142    buckets: Labels,
143    count: Value,
144    sum: Value,
145}
146
147#[derive(Debug, Clone, PartialEq, Serialize, JsonSchema)]
148#[serde(rename_all = "lowercase")]
149#[schemars(title = "Metric type")]
150enum MetricType {
151    Gauge,
152    Histogram,
153    Summary,
154}
155
156#[derive(Clone, Serialize)]
157struct MetricFamily {
158    metric_type: MetricType,
159    metric_name: String,
160    help: String,
161    data: Vec<Box<dyn MetricLike>>,
162}
163
164impl JsonSchema for MetricFamily {
165    fn schema_name() -> String {
166        "metric".to_owned()
167    }
168
169    fn schema_id() -> Cow<'static, str> {
170        Cow::Borrowed(formatcp!("{}::MetricFamily", module_path!()))
171    }
172
173    fn json_schema(gen: &mut SchemaGenerator) -> Schema {
174        struct DataConditional {
175            if_schema: Schema,
176            then_schema: Schema,
177            else_schema: Schema,
178        }
179
180        fn data_conditional(gen: &mut SchemaGenerator) -> DataConditional {
181            let if_schema = SchemaObject {
182                instance_type: None,
183                object: Some(Box::new(ObjectValidation {
184                    properties: BTreeMap::from([(
185                        "metric_type".to_owned(),
186                        Schema::Object(SchemaObject {
187                            instance_type: Some(SingleOrVec::Single(InstanceType::String.into())),
188                            const_value: Some(json!("gauge")),
189                            ..Default::default()
190                        }),
191                    )]),
192                    ..Default::default()
193                })),
194                ..Default::default()
195            };
196
197            let then_schema = SchemaObject {
198                instance_type: None,
199                object: Some(Box::new(ObjectValidation {
200                    properties: BTreeMap::from([("data".to_string(), <Metric>::json_schema(gen))]),
201                    ..Default::default()
202                })),
203                ..Default::default()
204            };
205
206            DataConditional {
207                if_schema: Schema::Object(if_schema),
208                then_schema: Schema::Object(then_schema),
209                else_schema: Schema::Bool(false),
210            }
211        }
212
213        let mut schema = SchemaObject {
214            instance_type: Some(SingleOrVec::Single(InstanceType::Object.into())),
215            metadata: Some(Box::new(Metadata {
216                title: Some("Metric family".to_string()),
217                description: Some("A prometheus gauge, summary, or histogram metric".to_string()),
218                ..Default::default()
219            })),
220            object: Some(Box::new(ObjectValidation {
221                properties: BTreeMap::from([
222                    ("metric_type".to_string(), <MetricType>::json_schema(gen)),
223                    ("metric_name".to_string(), <String>::json_schema(gen)),
224                    ("help".to_string(), <String>::json_schema(gen)),
225                ]),
226                required: BTreeSet::from([
227                    "metric_type".to_string(),
228                    "metric_name".to_string(),
229                    "help".to_string(),
230                    "data".to_string(),
231                ]),
232                ..Default::default()
233            })),
234            ..Default::default()
235        };
236
237        let data = data_conditional(gen);
238        schema.subschemas().if_schema = Some(Box::new(data.if_schema));
239        schema.subschemas().then_schema = Some(Box::new(data.then_schema));
240        schema.subschemas().else_schema = Some(Box::new(data.else_schema));
241
242        schema.into()
243    }
244}
245
246#[typetag::serde(tag = "type")]
247trait MetricLike: DynClone {
248    fn parse_from_string(s: &str) -> Result<(Value, Option<Labels>)>
249    where
250        Self: Sized,
251    {
252        if let Some(caps) = METRIC_REGEX_NO_LABEL.captures(s) {
253            Ok((caps[2].to_string(), None))
254        } else if let Some(caps) = METRIC_REGEX_WITH_LABEL.captures(s) {
255            let value = caps[2].to_string();
256            let mut labels: HashMap<String, String> = HashMap::new();
257            for cap in LABELS_REGEX.captures_iter(&caps[1]) {
258                labels.insert(cap[1].to_string(), cap[2].to_string());
259            }
260            Ok((value, Some(labels)))
261        } else {
262            Err(anyhow!("invalid format {}", s))
263        }
264    }
265
266    fn metric_type() -> String
267    where
268        Self: Sized;
269}
270
271dyn_clone::clone_trait_object!(MetricLike);
272
273impl Metric {
274    fn from_string(s: &str) -> Result<Metric> {
275        let (value, labels) = Self::parse_from_string(s)?;
276        Ok(Metric { labels, value })
277    }
278}
279
280#[typetag::serde(name = "metric")]
281impl MetricLike for Metric {
282    fn metric_type() -> String {
283        String::from("DEFAULT")
284    }
285}
286
287impl Summary {
288    fn from_raw(metric_name: &str, raw_lines: &Vec<&str>) -> Result<Summary> {
289        let mut sum = String::from("");
290        let mut count = String::from("");
291        let sum_prefix = format!("{}_sum", metric_name);
292        let count_prefix = format!("{}_count", metric_name);
293        let mut labels = HashMap::new();
294        let mut quantiles = HashMap::new();
295        for raw_line in raw_lines {
296            if raw_line.starts_with(&sum_prefix) {
297                sum = Summary::parse_from_string(raw_line)?.0;
298            } else if raw_line.starts_with(&count_prefix) {
299                count = Summary::parse_from_string(raw_line)?.0;
300            } else if let Some(caps) = METRIC_REGEX_WITH_LABEL.captures(raw_line) {
301                for cap in LABELS_REGEX.captures_iter(&caps[1]) {
302                    let key = &cap[1];
303                    let value = &cap[2];
304                    match key {
305                        "quantile" => quantiles.insert(key.to_string(), value.to_string()),
306                        _ => labels.insert(key.to_string(), value.to_string()),
307                    };
308                }
309            } else {
310                bail!("invalid format {}", raw_line);
311            }
312        }
313
314        Ok(Summary {
315            sum,
316            count,
317            labels: Some(labels),
318            quantiles,
319        })
320    }
321}
322
323#[typetag::serde]
324impl MetricLike for Summary {
325    fn metric_type() -> String {
326        String::from(SUMMARY_TYPE)
327    }
328}
329
330impl Histogram {
331    fn from_raw(metric_name: &str, raw_lines: &Vec<&str>) -> Result<Histogram> {
332        let mut sum = String::from("");
333        let mut count = String::from("");
334        let sum_prefix = format!("{}_sum", metric_name);
335        let count_prefix = format!("{}_count", metric_name);
336        let mut labels: HashMap<String, String> = HashMap::new();
337        let mut buckets: HashMap<String, String> = HashMap::new();
338        for raw_line in raw_lines {
339            if raw_line.starts_with(&sum_prefix) {
340                sum = Summary::parse_from_string(raw_line)?.0;
341            } else if raw_line.starts_with(&count_prefix) {
342                count = Summary::parse_from_string(raw_line)?.0;
343            } else if let Some(caps) = METRIC_REGEX_WITH_LABEL.captures(raw_line) {
344                for cap in LABELS_REGEX.captures_iter(&caps[1]) {
345                    let key = &cap[1];
346                    let value = &cap[2];
347                    match key {
348                        "le" => buckets.insert(value.to_string(), caps[2].to_string()),
349                        _ => labels.insert(key.to_string(), value.to_string()),
350                    };
351                }
352            } else {
353                bail!("invalid format {}", raw_line)
354            }
355        }
356
357        Ok(Histogram {
358            sum,
359            count,
360            labels: Some(labels),
361            buckets,
362        })
363    }
364}
365
366#[typetag::serde]
367impl MetricLike for Histogram {
368    fn metric_type() -> String {
369        String::from(HISTOGRAM_TYPE)
370    }
371}
372
373impl MetricFamily {
374    fn from_raw(raw: &[&str]) -> Result<MetricFamily> {
375        let mut raw_iter = raw.iter();
376        let help = MetricFamily::metric_help_fron_raw(
377            raw_iter
378                .next()
379                .ok_or(anyhow!("invalid metric help{}", raw.join("\n")))?,
380        );
381        let (metric_name, metric_type) = MetricFamily::metric_name_and_type(
382            raw_iter
383                .next()
384                .ok_or(anyhow!("invalid metric name/type {}", raw.join("\n")))?,
385        )?;
386        let mut data: Vec<Box<dyn MetricLike>> = Vec::new();
387        match metric_type {
388            MetricType::Gauge => {
389                for raw_line in raw_iter {
390                    data.push(Box::new(Metric::from_string(raw_line)?))
391                }
392            }
393            MetricType::Histogram => {
394                let count_prefix = format!("{}_count", metric_name);
395                let mut histogram_lines: Vec<&str> = Vec::new();
396                for raw_line in raw_iter {
397                    histogram_lines.push(raw_line);
398                    if raw_line.starts_with(&count_prefix) {
399                        data.push(Box::new(Histogram::from_raw(
400                            &metric_name,
401                            &histogram_lines,
402                        )?));
403                        histogram_lines = Vec::new();
404                    }
405                }
406            }
407            MetricType::Summary => {
408                let count_prefix = format!("{}_count", metric_name);
409                let mut summary_lines: Vec<&str> = Vec::new();
410                for raw_line in raw_iter {
411                    summary_lines.push(raw_line);
412                    if raw_line.starts_with(&count_prefix) {
413                        data.push(Box::new(Summary::from_raw(&metric_name, &summary_lines)?));
414                        summary_lines = Vec::new();
415                    }
416                }
417            }
418        }
419        Ok(MetricFamily {
420            metric_type,
421            metric_name,
422            help,
423            data,
424        })
425    }
426
427    fn metric_name_and_type(type_line: &str) -> Result<(String, MetricType)> {
428        let tags: Vec<&str> = type_line.split_whitespace().collect();
429        let (name, type_raw) = (tags[2], tags[3]);
430        let metric_type = match type_raw {
431            "gauge" => MetricType::Gauge,
432            "counter" => MetricType::Gauge,
433            "histogram" => MetricType::Histogram,
434            "summary" => MetricType::Summary,
435            _ => bail!("invalid metric type {}", type_raw),
436        };
437
438        Ok((name.to_string(), metric_type))
439    }
440
441    fn metric_help_fron_raw(help_line: &str) -> String {
442        let tags: Vec<&str> = help_line.split_whitespace().collect();
443        tags[3..].join(" ").to_string()
444    }
445}
446
447#[cfg(test)]
448mod test {
449    use super::*;
450    use maplit::hashmap;
451
452    #[test]
453    fn parse_metric() {
454        assert_eq!(
455            Metric {
456                labels: None,
457                value: String::from("205632")
458            },
459            Metric::from_string("go_memstats_mspan_inuse_bytes 205632").unwrap()
460        );
461        assert_eq!(
462            Metric {
463                labels: Some(hashmap!{
464                    "dialer_name".to_string() => "default".to_string(),
465                    "reason".to_string() => "unknown".to_string(),
466                }),
467                value: String::from("0")
468            },
469            Metric::from_string("net_conntrack_dialer_conn_failed_total{dialer_name=\"default\",reason=\"unknown\"} 0").unwrap()
470        )
471    }
472
473    #[test]
474    fn parse_metric_raw_data() {
475        let raw_data = "# HELP go_goroutines Number of goroutines that currently exist.
476# TYPE go_goroutines gauge
477go_goroutines 31
478# HELP go_info Information about the Go environment.
479# TYPE go_info gauge
480go_info{version=\"go1.15.5\"} 1";
481        let prom_data = PrometheusData::from_string(raw_data).unwrap();
482        assert_eq!(MetricType::Gauge, prom_data.metrics[0].metric_type)
483    }
484
485    #[test]
486    fn parse_metric_summary() {
487        let raw_data =
488            "prometheus_engine_query_duration_seconds{slice=\"inner_eval\",quantile=\"0.5\"} NaN
489prometheus_engine_query_duration_seconds{slice=\"inner_eval\",quantile=\"0.9\"} NaN
490prometheus_engine_query_duration_seconds{slice=\"inner_eval\",quantile=\"0.99\"} NaN
491prometheus_engine_query_duration_seconds_sum{slice=\"inner_eval\"} 12
492prometheus_engine_query_duration_seconds_count{slice=\"inner_eval\"} 0";
493        let summary = Summary::from_raw(
494            "prometheus_engine_query_duration_seconds",
495            &raw_data.lines().collect(),
496        )
497        .unwrap();
498        assert_eq!(summary.sum, "12".to_string());
499        assert_eq!(
500            summary.labels,
501            Some(hashmap! {"slice".to_string() => "inner_eval".to_string()})
502        );
503    }
504
505    #[test]
506    fn parse_metric_histogram() {
507        let raw_data = r#"prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="0.1"} 10871
508prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="0.2"} 10871
509prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="0.4"} 10871
510prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="1"} 10871
511prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="3"} 10871
512prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="8"} 10871
513prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="20"} 10871
514prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="60"} 10871
515prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="120"} 10871
516prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="+Inf"} 10871
517prometheus_http_request_duration_seconds_sum{handler="/metrics"} 67.48398663499978
518prometheus_http_request_duration_seconds_count{handler="/metrics"} 10871"#;
519        let histogram = Histogram::from_raw(
520            "prometheus_http_request_duration_seconds",
521            &raw_data.lines().collect(),
522        )
523        .unwrap();
524        assert_eq!(histogram.sum, "67.48398663499978");
525        assert_eq!(
526            histogram.labels,
527            Some(hashmap! {"handler".to_string() => "/metrics".to_string()})
528        );
529    }
530
531    #[test]
532    fn parse_metric_collection_to_json() {
533        let raw_data = r#"# HELP homestar_process_disk_total_read_bytes Total bytes read from disk.
534# TYPE homestar_process_disk_total_read_bytes gauge
535homestar_process_disk_total_read_bytes 45969408
536
537# HELP homestar_process_virtual_memory_bytes The virtual memory size in bytes.
538# TYPE homestar_process_virtual_memory_bytes gauge
539homestar_process_virtual_memory_bytes 418935930880
540
541# HELP homestar_network_received_bytes The bytes received since last refresh.
542# TYPE homestar_network_received_bytes gauge
543homestar_network_received_bytes 0
544
545# HELP homestar_system_available_memory_bytes The amount of available memory.
546# TYPE homestar_system_available_memory_bytes gauge
547homestar_system_available_memory_bytes 0
548
549# HELP homestar_system_disk_available_space_bytes The total amount of available disk space.
550# TYPE homestar_system_disk_available_space_bytes gauge
551homestar_system_disk_available_space_bytes 0
552
553# HELP homestar_system_load_average_percentage The load average over a five minute interval.
554# TYPE homestar_system_load_average_percentage gauge
555homestar_system_load_average_percentage 6.26611328125"#;
556
557        let prom_data = PrometheusData::from_string(raw_data).unwrap();
558        let json_string = serde_json::to_string(&prom_data).unwrap();
559        let root: serde_json::Value = serde_json::from_str(&json_string).unwrap();
560
561        let check = root
562            .get("metrics")
563            .and_then(|v| v.get(0))
564            .and_then(|v| v.get("data"))
565            .and_then(|v| v.get(0))
566            .and_then(|v| v.get("value"))
567            .unwrap();
568
569        assert_eq!(check, &serde_json::Value::String("45969408".to_string()));
570    }
571}