prometheus_scrape/
lib.rs

1use std::collections::HashMap;
2use std::io;
3
4extern crate chrono;
5use chrono::{DateTime, TimeZone, Utc};
6
7#[macro_use]
8extern crate lazy_static;
9
10extern crate regex;
11use regex::Regex;
12
13lazy_static! {
14    static ref HELP_RE: Regex = Regex::new(r"^#\s+HELP\s+(\w+)\s+(.+)$").unwrap();
15    static ref TYPE_RE: Regex = Regex::new(r"^#\s+TYPE\s+(\w+)\s+(\w+)").unwrap();
16    static ref SAMPLE_RE: Regex = Regex::new(
17        r"^(?P<name>\w+)(\{(?P<labels>[^}]+)\})?\s+(?P<value>\S+)(\s+(?P<timestamp>\S+))?"
18    )
19    .unwrap();
20}
21
22#[derive(Debug, Eq, PartialEq)]
23pub enum LineInfo<'a> {
24    Doc {
25        metric_name: &'a str,
26        doc: &'a str,
27    },
28    Type {
29        metric_name: String,
30        sample_type: SampleType,
31    },
32    Sample {
33        metric_name: &'a str,
34        labels: Option<&'a str>,
35        value: &'a str,
36        timestamp: Option<&'a str>,
37    },
38    Empty,
39    Ignored,
40}
41
42#[derive(Debug, Eq, PartialEq, Clone, Copy)]
43pub enum SampleType {
44    Counter,
45    Gauge,
46    Histogram,
47    Summary,
48    Untyped,
49}
50
51impl SampleType {
52    pub fn parse(s: &str) -> SampleType {
53        match s {
54            "counter" => SampleType::Counter,
55            "gauge" => SampleType::Gauge,
56            "histogram" => SampleType::Histogram,
57            "summary" => SampleType::Summary,
58            _ => SampleType::Untyped,
59        }
60    }
61}
62
63impl<'a> LineInfo<'a> {
64    pub fn parse(line: &'a str) -> LineInfo<'a> {
65        let line = line.trim();
66        if line.len() == 0 {
67            return LineInfo::Empty;
68        }
69        match HELP_RE.captures(line) {
70            Some(ref caps) => {
71                return match (caps.get(1), caps.get(2)) {
72                    (Some(ref metric_name), Some(ref doc)) => LineInfo::Doc {
73                        metric_name: metric_name.as_str(),
74                        doc: doc.as_str(),
75                    },
76                    _ => LineInfo::Ignored,
77                }
78            }
79            None => {}
80        }
81        match TYPE_RE.captures(line) {
82            Some(ref caps) => {
83                return match (caps.get(1), caps.get(2)) {
84                    (Some(ref metric_name), Some(ref sample_type)) => {
85                        let sample_type = SampleType::parse(sample_type.as_str());
86                        LineInfo::Type {
87                            metric_name: match sample_type {
88                                SampleType::Histogram => format!("{}_bucket", metric_name.as_str()),
89                                _ => metric_name.as_str().to_string(),
90                            },
91                            sample_type: sample_type,
92                        }
93                    }
94                    _ => LineInfo::Ignored,
95                }
96            }
97            None => {}
98        }
99        match SAMPLE_RE.captures(line) {
100            Some(ref caps) => {
101                return match (
102                    caps.name("name"),
103                    caps.name("labels"),
104                    caps.name("value"),
105                    caps.name("timestamp"),
106                ) {
107                    (Some(ref name), labels, Some(ref value), timestamp) => LineInfo::Sample {
108                        metric_name: name.as_str(),
109                        labels: labels.map_or(None, |c| Some(c.as_str())),
110                        value: value.as_str(),
111                        timestamp: timestamp.map_or(None, |c| Some(c.as_str())),
112                    },
113                    _ => LineInfo::Ignored,
114                }
115            }
116            None => LineInfo::Ignored,
117        }
118    }
119}
120
121#[derive(Debug, PartialEq)]
122pub struct Sample {
123    metric: String,
124    value: Value,
125    labels: Labels,
126    timestamp: DateTime<Utc>,
127}
128
129fn parse_bucket(s: &str, label: &str) -> Option<f64> {
130    if let Some(kv) = s.split(",").next() {
131        let kvpair = kv.split("=").collect::<Vec<_>>();
132        let (k, v) = (kvpair[0], kvpair[1].trim_matches('"'));
133        if k == label {
134            match parse_golang_float(v) {
135                Ok(v) => Some(v),
136                Err(_) => None,
137            }
138        } else {
139            None
140        }
141    } else {
142        None
143    }
144}
145
146#[derive(Debug, PartialEq)]
147pub struct HistogramCount {
148    less_than: f64,
149    count: f64,
150}
151
152#[derive(Debug, PartialEq)]
153pub struct SummaryCount {
154    quantile: f64,
155    count: f64,
156}
157
158#[derive(Debug, Eq, PartialEq)]
159pub struct Labels(HashMap<String, String>);
160
161impl Labels {
162    fn new() -> Labels {
163        Labels(HashMap::new())
164    }
165    fn parse(s: &str) -> Labels {
166        let mut l = HashMap::new();
167        for kv in s.split(",") {
168            let kvpair = kv.split("=").collect::<Vec<_>>();
169            if kvpair.len() != 2 || kvpair[0].len() == 0 {
170                continue;
171            }
172            l.insert(
173                kvpair[0].to_string(),
174                kvpair[1].trim_matches('"').to_string(),
175            );
176        }
177        Labels(l)
178    }
179    pub fn get(&self, name: &str) -> Option<&str> {
180        self.0.get(name).map(|ref x| x.as_str())
181    }
182}
183
184#[derive(Debug, PartialEq)]
185pub enum Value {
186    Counter(f64),
187    Gauge(f64),
188    Histogram(Vec<HistogramCount>),
189    Summary(Vec<SummaryCount>),
190    Untyped(f64),
191}
192
193impl Value {
194    fn push_histogram(&mut self, h: HistogramCount) {
195        match self {
196            &mut Value::Histogram(ref mut hs) => hs.push(h),
197            _ => {}
198        }
199    }
200    fn push_summary(&mut self, s: SummaryCount) {
201        match self {
202            &mut Value::Summary(ref mut ss) => ss.push(s),
203            _ => {}
204        }
205    }
206}
207
208#[derive(Debug)]
209pub struct Scrape {
210    pub docs: HashMap<String, String>,
211    pub samples: Vec<Sample>,
212}
213
214fn parse_golang_float(s: &str) -> Result<f64, <f64 as std::str::FromStr>::Err> {
215    match s.to_lowercase().as_str() {
216        "nan" => Ok(std::f64::NAN), // f64::parse doesn't recognize 'nan'
217        ref s => s.parse::<f64>(),  // f64::parse expects lowercase [+-]inf
218    }
219}
220
221impl Scrape {
222    pub fn parse(lines: impl Iterator<Item = io::Result<String>>) -> io::Result<Scrape> {
223        Scrape::parse_at(lines, Utc::now())
224    }
225    pub fn parse_at(
226        lines: impl Iterator<Item = io::Result<String>>,
227        sample_time: DateTime<Utc>,
228    ) -> io::Result<Scrape> {
229        let mut docs: HashMap<String, String> = HashMap::new();
230        let mut types: HashMap<String, SampleType> = HashMap::new();
231        let mut buckets: HashMap<String, Sample> = HashMap::new();
232        let mut samples: Vec<Sample> = vec![];
233
234        for read_line in lines {
235            let line = match read_line {
236                Ok(line) => line,
237                Err(e) => return Err(e),
238            };
239            match LineInfo::parse(&line) {
240                LineInfo::Doc {
241                    ref metric_name,
242                    ref doc,
243                } => {
244                    docs.insert(metric_name.to_string(), doc.to_string());
245                }
246                LineInfo::Type {
247                    ref metric_name,
248                    ref sample_type,
249                } => {
250                    types.insert(metric_name.to_string(), *sample_type);
251                }
252                LineInfo::Sample {
253                    metric_name,
254                    ref labels,
255                    value,
256                    timestamp,
257                } => {
258                    // Parse value or skip
259                    let fvalue = if let Ok(v) = parse_golang_float(value) {
260                        v
261                    } else {
262                        continue;
263                    };
264                    // Parse timestamp or use given sample time
265                    let timestamp = if let Some(Ok(ts_millis)) = timestamp.map(|x| x.parse::<i64>())
266                    {
267                        Utc.timestamp_millis(ts_millis)
268                    } else {
269                        sample_time
270                    };
271                    match (types.get(metric_name), labels) {
272                        (Some(SampleType::Histogram), Some(labels)) => {
273                            if let Some(lt) = parse_bucket(labels, "le") {
274                                let sample =
275                                    buckets.entry(metric_name.to_string()).or_insert(Sample {
276                                        metric: metric_name.to_string(),
277                                        labels: Labels::new(),
278                                        value: Value::Histogram(vec![]),
279                                        timestamp: timestamp,
280                                    });
281                                sample.value.push_histogram(HistogramCount {
282                                    less_than: lt,
283                                    count: fvalue,
284                                })
285                            }
286                        }
287                        (Some(SampleType::Summary), Some(labels)) => {
288                            if let Some(q) = parse_bucket(labels, "quantile") {
289                                let sample =
290                                    buckets.entry(metric_name.to_string()).or_insert(Sample {
291                                        metric: metric_name.to_string(),
292                                        labels: Labels::new(),
293                                        value: Value::Summary(vec![]),
294                                        timestamp: timestamp,
295                                    });
296                                sample.value.push_summary(SummaryCount {
297                                    quantile: q,
298                                    count: fvalue,
299                                })
300                            }
301                        }
302                        (ty, labels) => samples.push(Sample {
303                            metric: metric_name.to_string(),
304                            labels: labels.map_or(Labels::new(), |x| Labels::parse(x)),
305                            value: match ty {
306                                Some(SampleType::Counter) => Value::Counter(fvalue),
307                                Some(SampleType::Gauge) => Value::Gauge(fvalue),
308                                _ => Value::Untyped(fvalue),
309                            },
310                            timestamp: timestamp,
311                        }),
312                    };
313                }
314                _ => {}
315            }
316        }
317        samples.extend(buckets.drain().map(|(_k, v)| v).collect::<Vec<_>>());
318        Ok(Scrape {
319            docs: docs,
320            samples: samples,
321        })
322    }
323}
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328    use std::io::BufRead;
329
330    #[test]
331    fn test_lineinfo_parse() {
332        assert_eq!(
333            LineInfo::parse("foo 2"),
334            LineInfo::Sample {
335                metric_name: "foo",
336                value: "2",
337                labels: None,
338                timestamp: None,
339            }
340        );
341        assert_eq!(
342            LineInfo::parse("foo wtf -1"),
343            LineInfo::Sample {
344                metric_name: "foo",
345                value: "wtf",
346                labels: None,
347                timestamp: Some("-1"),
348            }
349        );
350        assert_eq!(LineInfo::parse("foo=2"), LineInfo::Ignored,);
351        assert_eq!(
352            LineInfo::parse("foo 2 1543182234"),
353            LineInfo::Sample {
354                metric_name: "foo",
355                value: "2",
356                labels: None,
357                timestamp: Some("1543182234"),
358            }
359        );
360        assert_eq!(
361            LineInfo::parse("foo{bar=baz} 2 1543182234"),
362            LineInfo::Sample {
363                metric_name: "foo",
364                value: "2",
365                labels: Some("bar=baz"),
366                timestamp: Some("1543182234"),
367            }
368        );
369        assert_eq!(
370            LineInfo::parse("foo{bar=baz,quux=nonce} 2 1543182234"),
371            LineInfo::Sample {
372                metric_name: "foo",
373                value: "2",
374                labels: Some("bar=baz,quux=nonce"),
375                timestamp: Some("1543182234"),
376            }
377        );
378        assert_eq!(
379            LineInfo::parse("# HELP foo this is a docstring"),
380            LineInfo::Doc {
381                metric_name: "foo",
382                doc: "this is a docstring"
383            },
384        );
385        assert_eq!(
386            LineInfo::parse("# TYPE foobar bazquux"),
387            LineInfo::Type {
388                metric_name: "foobar".to_string(),
389                sample_type: SampleType::Untyped,
390            },
391        );
392    }
393
394    fn pair_to_string(pair: &(&str, &str)) -> (String, String) {
395        (pair.0.to_string(), pair.1.to_string())
396    }
397
398    #[test]
399    fn test_labels_parse() {
400        assert_eq!(
401            Labels::parse("foo=bar"),
402            Labels([("foo", "bar")].iter().map(pair_to_string).collect())
403        );
404        assert_eq!(
405            Labels::parse("foo=bar,"),
406            Labels([("foo", "bar")].iter().map(pair_to_string).collect())
407        );
408        assert_eq!(
409            Labels::parse(",foo=bar,"),
410            Labels([("foo", "bar")].iter().map(pair_to_string).collect())
411        );
412        assert_eq!(
413            Labels::parse("=,foo=bar,"),
414            Labels([("foo", "bar")].iter().map(pair_to_string).collect())
415        );
416        assert_eq!(
417            Labels::parse(r#"foo="bar""#),
418            Labels([("foo", "bar")].iter().map(pair_to_string).collect())
419        );
420        assert_eq!(
421            Labels::parse(r#"foo="bar",baz="quux""#),
422            Labels(
423                [("foo", "bar"), ("baz", "quux")]
424                    .iter()
425                    .map(pair_to_string)
426                    .collect()
427            )
428        );
429        assert_eq!(
430            Labels::parse(r#"foo="foo bar",baz="baz quux""#),
431            Labels(
432                [("foo", "foo bar"), ("baz", "baz quux")]
433                    .iter()
434                    .map(pair_to_string)
435                    .collect()
436            )
437        );
438        assert_eq!(Labels::parse("==="), Labels(HashMap::new()),);
439    }
440
441    #[test]
442    fn test_golang_float() {
443        assert_eq!(parse_golang_float("1.0"), Ok(1.0f64));
444        assert_eq!(parse_golang_float("-1.0"), Ok(-1.0f64));
445        assert!(parse_golang_float("NaN").unwrap().is_nan());
446        assert_eq!(parse_golang_float("Inf"), Ok(std::f64::INFINITY));
447        assert_eq!(parse_golang_float("+Inf"), Ok(std::f64::INFINITY));
448        assert_eq!(parse_golang_float("-Inf"), Ok(std::f64::NEG_INFINITY));
449    }
450
451    #[test]
452    fn test_parse_samples() {
453        let scrape = r#"
454# HELP http_requests_total The total number of HTTP requests.
455# TYPE http_requests_total counter
456http_requests_total{method="post",code="200"} 1027 1395066363000
457http_requests_total{method="post",code="400"}    3 1395066363000
458
459# Escaping in label values:
460msdos_file_access_time_seconds{path="C:\\DIR\\FILE.TXT",error="Cannot find file:\n\"FILE.TXT\""} 1.458255915e9
461
462# Minimalistic line:
463metric_without_timestamp_and_labels 12.47
464
465# A weird metric from before the epoch:
466something_weird{problem="division by zero"} +Inf -3982045
467
468# A histogram, which has a pretty complex representation in the text format:
469# HELP http_request_duration_seconds A histogram of the request duration.
470# TYPE http_request_duration_seconds histogram
471http_request_duration_seconds_bucket{le="0.05"} 24054
472http_request_duration_seconds_bucket{le="0.1"} 33444
473http_request_duration_seconds_bucket{le="0.2"} 100392
474http_request_duration_seconds_bucket{le="0.5"} 129389
475http_request_duration_seconds_bucket{le="1"} 133988
476http_request_duration_seconds_bucket{le="+Inf"} 144320
477http_request_duration_seconds_sum 53423
478http_request_duration_seconds_count 144320
479
480# Finally a summary, which has a complex representation, too:
481# HELP rpc_duration_seconds A summary of the RPC duration in seconds.
482# TYPE rpc_duration_seconds summary
483rpc_duration_seconds{quantile="0.01"} 3102
484rpc_duration_seconds{quantile="0.05"} 3272
485rpc_duration_seconds{quantile="0.5"} 4773
486rpc_duration_seconds{quantile="0.9"} 9001
487rpc_duration_seconds{quantile="0.99"} 76656
488rpc_duration_seconds_sum 1.7560473e+07
489rpc_duration_seconds_count 2693
490"#;
491        let br = io::BufReader::new(scrape.as_bytes());
492        let s = Scrape::parse(br.lines()).unwrap();
493        assert_eq!(s.samples.len(), 11);
494
495        fn assert_match_sample<'a, F>(samples: &'a Vec<Sample>, f: F) -> &'a Sample
496        where
497            for<'r> F: FnMut(&'r &'a Sample) -> bool,
498        {
499            samples.iter().filter(f).next().as_ref().unwrap()
500        }
501        assert_eq!(
502            assert_match_sample(&s.samples, |s| s.metric == "http_requests_total"
503                && s.labels.get("code") == Some("200")),
504            &Sample {
505                metric: "http_requests_total".to_string(),
506                value: Value::Counter(1027f64),
507                labels: Labels(
508                    [("method", "post"), ("code", "200")]
509                        .iter()
510                        .map(pair_to_string)
511                        .collect()
512                ),
513                timestamp: Utc.timestamp_millis(1395066363000),
514            }
515        );
516        assert_eq!(
517            assert_match_sample(&s.samples, |s| s.metric == "http_requests_total"
518                && s.labels.get("code") == Some("400")),
519            &Sample {
520                metric: "http_requests_total".to_string(),
521                value: Value::Counter(3f64),
522                labels: Labels(
523                    [("method", "post"), ("code", "400")]
524                        .iter()
525                        .map(pair_to_string)
526                        .collect()
527                ),
528                timestamp: Utc.timestamp_millis(1395066363000),
529            }
530        );
531    }
532}