1use std::collections::HashMap;
2use std::io;
3
4extern crate chrono;
5use chrono::{DateTime, TimeZone, Utc};
6
7#[macro_use]
8extern crate lazy_static;
9
10extern crate regex;
11use regex::Regex;
12
13lazy_static! {
14 static ref HELP_RE: Regex = Regex::new(r"^#\s+HELP\s+(\w+)\s+(.+)$").unwrap();
15 static ref TYPE_RE: Regex = Regex::new(r"^#\s+TYPE\s+(\w+)\s+(\w+)").unwrap();
16 static ref SAMPLE_RE: Regex = Regex::new(
17 r"^(?P<name>\w+)(\{(?P<labels>[^}]+)\})?\s+(?P<value>\S+)(\s+(?P<timestamp>\S+))?"
18 )
19 .unwrap();
20}
21
22#[derive(Debug, Eq, PartialEq)]
23pub enum LineInfo<'a> {
24 Doc {
25 metric_name: &'a str,
26 doc: &'a str,
27 },
28 Type {
29 metric_name: String,
30 sample_type: SampleType,
31 },
32 Sample {
33 metric_name: &'a str,
34 labels: Option<&'a str>,
35 value: &'a str,
36 timestamp: Option<&'a str>,
37 },
38 Empty,
39 Ignored,
40}
41
42#[derive(Debug, Eq, PartialEq, Clone, Copy)]
43pub enum SampleType {
44 Counter,
45 Gauge,
46 Histogram,
47 Summary,
48 Untyped,
49}
50
51impl SampleType {
52 pub fn parse(s: &str) -> SampleType {
53 match s {
54 "counter" => SampleType::Counter,
55 "gauge" => SampleType::Gauge,
56 "histogram" => SampleType::Histogram,
57 "summary" => SampleType::Summary,
58 _ => SampleType::Untyped,
59 }
60 }
61}
62
63impl<'a> LineInfo<'a> {
64 pub fn parse(line: &'a str) -> LineInfo<'a> {
65 let line = line.trim();
66 if line.len() == 0 {
67 return LineInfo::Empty;
68 }
69 match HELP_RE.captures(line) {
70 Some(ref caps) => {
71 return match (caps.get(1), caps.get(2)) {
72 (Some(ref metric_name), Some(ref doc)) => LineInfo::Doc {
73 metric_name: metric_name.as_str(),
74 doc: doc.as_str(),
75 },
76 _ => LineInfo::Ignored,
77 }
78 }
79 None => {}
80 }
81 match TYPE_RE.captures(line) {
82 Some(ref caps) => {
83 return match (caps.get(1), caps.get(2)) {
84 (Some(ref metric_name), Some(ref sample_type)) => {
85 let sample_type = SampleType::parse(sample_type.as_str());
86 LineInfo::Type {
87 metric_name: match sample_type {
88 SampleType::Histogram => format!("{}_bucket", metric_name.as_str()),
89 _ => metric_name.as_str().to_string(),
90 },
91 sample_type: sample_type,
92 }
93 }
94 _ => LineInfo::Ignored,
95 }
96 }
97 None => {}
98 }
99 match SAMPLE_RE.captures(line) {
100 Some(ref caps) => {
101 return match (
102 caps.name("name"),
103 caps.name("labels"),
104 caps.name("value"),
105 caps.name("timestamp"),
106 ) {
107 (Some(ref name), labels, Some(ref value), timestamp) => LineInfo::Sample {
108 metric_name: name.as_str(),
109 labels: labels.map_or(None, |c| Some(c.as_str())),
110 value: value.as_str(),
111 timestamp: timestamp.map_or(None, |c| Some(c.as_str())),
112 },
113 _ => LineInfo::Ignored,
114 }
115 }
116 None => LineInfo::Ignored,
117 }
118 }
119}
120
121#[derive(Debug, PartialEq)]
122pub struct Sample {
123 metric: String,
124 value: Value,
125 labels: Labels,
126 timestamp: DateTime<Utc>,
127}
128
129fn parse_bucket(s: &str, label: &str) -> Option<f64> {
130 if let Some(kv) = s.split(",").next() {
131 let kvpair = kv.split("=").collect::<Vec<_>>();
132 let (k, v) = (kvpair[0], kvpair[1].trim_matches('"'));
133 if k == label {
134 match parse_golang_float(v) {
135 Ok(v) => Some(v),
136 Err(_) => None,
137 }
138 } else {
139 None
140 }
141 } else {
142 None
143 }
144}
145
146#[derive(Debug, PartialEq)]
147pub struct HistogramCount {
148 less_than: f64,
149 count: f64,
150}
151
152#[derive(Debug, PartialEq)]
153pub struct SummaryCount {
154 quantile: f64,
155 count: f64,
156}
157
158#[derive(Debug, Eq, PartialEq)]
159pub struct Labels(HashMap<String, String>);
160
161impl Labels {
162 fn new() -> Labels {
163 Labels(HashMap::new())
164 }
165 fn parse(s: &str) -> Labels {
166 let mut l = HashMap::new();
167 for kv in s.split(",") {
168 let kvpair = kv.split("=").collect::<Vec<_>>();
169 if kvpair.len() != 2 || kvpair[0].len() == 0 {
170 continue;
171 }
172 l.insert(
173 kvpair[0].to_string(),
174 kvpair[1].trim_matches('"').to_string(),
175 );
176 }
177 Labels(l)
178 }
179 pub fn get(&self, name: &str) -> Option<&str> {
180 self.0.get(name).map(|ref x| x.as_str())
181 }
182}
183
184#[derive(Debug, PartialEq)]
185pub enum Value {
186 Counter(f64),
187 Gauge(f64),
188 Histogram(Vec<HistogramCount>),
189 Summary(Vec<SummaryCount>),
190 Untyped(f64),
191}
192
193impl Value {
194 fn push_histogram(&mut self, h: HistogramCount) {
195 match self {
196 &mut Value::Histogram(ref mut hs) => hs.push(h),
197 _ => {}
198 }
199 }
200 fn push_summary(&mut self, s: SummaryCount) {
201 match self {
202 &mut Value::Summary(ref mut ss) => ss.push(s),
203 _ => {}
204 }
205 }
206}
207
208#[derive(Debug)]
209pub struct Scrape {
210 pub docs: HashMap<String, String>,
211 pub samples: Vec<Sample>,
212}
213
214fn parse_golang_float(s: &str) -> Result<f64, <f64 as std::str::FromStr>::Err> {
215 match s.to_lowercase().as_str() {
216 "nan" => Ok(std::f64::NAN), ref s => s.parse::<f64>(), }
219}
220
221impl Scrape {
222 pub fn parse(lines: impl Iterator<Item = io::Result<String>>) -> io::Result<Scrape> {
223 Scrape::parse_at(lines, Utc::now())
224 }
225 pub fn parse_at(
226 lines: impl Iterator<Item = io::Result<String>>,
227 sample_time: DateTime<Utc>,
228 ) -> io::Result<Scrape> {
229 let mut docs: HashMap<String, String> = HashMap::new();
230 let mut types: HashMap<String, SampleType> = HashMap::new();
231 let mut buckets: HashMap<String, Sample> = HashMap::new();
232 let mut samples: Vec<Sample> = vec![];
233
234 for read_line in lines {
235 let line = match read_line {
236 Ok(line) => line,
237 Err(e) => return Err(e),
238 };
239 match LineInfo::parse(&line) {
240 LineInfo::Doc {
241 ref metric_name,
242 ref doc,
243 } => {
244 docs.insert(metric_name.to_string(), doc.to_string());
245 }
246 LineInfo::Type {
247 ref metric_name,
248 ref sample_type,
249 } => {
250 types.insert(metric_name.to_string(), *sample_type);
251 }
252 LineInfo::Sample {
253 metric_name,
254 ref labels,
255 value,
256 timestamp,
257 } => {
258 let fvalue = if let Ok(v) = parse_golang_float(value) {
260 v
261 } else {
262 continue;
263 };
264 let timestamp = if let Some(Ok(ts_millis)) = timestamp.map(|x| x.parse::<i64>())
266 {
267 Utc.timestamp_millis(ts_millis)
268 } else {
269 sample_time
270 };
271 match (types.get(metric_name), labels) {
272 (Some(SampleType::Histogram), Some(labels)) => {
273 if let Some(lt) = parse_bucket(labels, "le") {
274 let sample =
275 buckets.entry(metric_name.to_string()).or_insert(Sample {
276 metric: metric_name.to_string(),
277 labels: Labels::new(),
278 value: Value::Histogram(vec![]),
279 timestamp: timestamp,
280 });
281 sample.value.push_histogram(HistogramCount {
282 less_than: lt,
283 count: fvalue,
284 })
285 }
286 }
287 (Some(SampleType::Summary), Some(labels)) => {
288 if let Some(q) = parse_bucket(labels, "quantile") {
289 let sample =
290 buckets.entry(metric_name.to_string()).or_insert(Sample {
291 metric: metric_name.to_string(),
292 labels: Labels::new(),
293 value: Value::Summary(vec![]),
294 timestamp: timestamp,
295 });
296 sample.value.push_summary(SummaryCount {
297 quantile: q,
298 count: fvalue,
299 })
300 }
301 }
302 (ty, labels) => samples.push(Sample {
303 metric: metric_name.to_string(),
304 labels: labels.map_or(Labels::new(), |x| Labels::parse(x)),
305 value: match ty {
306 Some(SampleType::Counter) => Value::Counter(fvalue),
307 Some(SampleType::Gauge) => Value::Gauge(fvalue),
308 _ => Value::Untyped(fvalue),
309 },
310 timestamp: timestamp,
311 }),
312 };
313 }
314 _ => {}
315 }
316 }
317 samples.extend(buckets.drain().map(|(_k, v)| v).collect::<Vec<_>>());
318 Ok(Scrape {
319 docs: docs,
320 samples: samples,
321 })
322 }
323}
324
325#[cfg(test)]
326mod tests {
327 use super::*;
328 use std::io::BufRead;
329
330 #[test]
331 fn test_lineinfo_parse() {
332 assert_eq!(
333 LineInfo::parse("foo 2"),
334 LineInfo::Sample {
335 metric_name: "foo",
336 value: "2",
337 labels: None,
338 timestamp: None,
339 }
340 );
341 assert_eq!(
342 LineInfo::parse("foo wtf -1"),
343 LineInfo::Sample {
344 metric_name: "foo",
345 value: "wtf",
346 labels: None,
347 timestamp: Some("-1"),
348 }
349 );
350 assert_eq!(LineInfo::parse("foo=2"), LineInfo::Ignored,);
351 assert_eq!(
352 LineInfo::parse("foo 2 1543182234"),
353 LineInfo::Sample {
354 metric_name: "foo",
355 value: "2",
356 labels: None,
357 timestamp: Some("1543182234"),
358 }
359 );
360 assert_eq!(
361 LineInfo::parse("foo{bar=baz} 2 1543182234"),
362 LineInfo::Sample {
363 metric_name: "foo",
364 value: "2",
365 labels: Some("bar=baz"),
366 timestamp: Some("1543182234"),
367 }
368 );
369 assert_eq!(
370 LineInfo::parse("foo{bar=baz,quux=nonce} 2 1543182234"),
371 LineInfo::Sample {
372 metric_name: "foo",
373 value: "2",
374 labels: Some("bar=baz,quux=nonce"),
375 timestamp: Some("1543182234"),
376 }
377 );
378 assert_eq!(
379 LineInfo::parse("# HELP foo this is a docstring"),
380 LineInfo::Doc {
381 metric_name: "foo",
382 doc: "this is a docstring"
383 },
384 );
385 assert_eq!(
386 LineInfo::parse("# TYPE foobar bazquux"),
387 LineInfo::Type {
388 metric_name: "foobar".to_string(),
389 sample_type: SampleType::Untyped,
390 },
391 );
392 }
393
394 fn pair_to_string(pair: &(&str, &str)) -> (String, String) {
395 (pair.0.to_string(), pair.1.to_string())
396 }
397
398 #[test]
399 fn test_labels_parse() {
400 assert_eq!(
401 Labels::parse("foo=bar"),
402 Labels([("foo", "bar")].iter().map(pair_to_string).collect())
403 );
404 assert_eq!(
405 Labels::parse("foo=bar,"),
406 Labels([("foo", "bar")].iter().map(pair_to_string).collect())
407 );
408 assert_eq!(
409 Labels::parse(",foo=bar,"),
410 Labels([("foo", "bar")].iter().map(pair_to_string).collect())
411 );
412 assert_eq!(
413 Labels::parse("=,foo=bar,"),
414 Labels([("foo", "bar")].iter().map(pair_to_string).collect())
415 );
416 assert_eq!(
417 Labels::parse(r#"foo="bar""#),
418 Labels([("foo", "bar")].iter().map(pair_to_string).collect())
419 );
420 assert_eq!(
421 Labels::parse(r#"foo="bar",baz="quux""#),
422 Labels(
423 [("foo", "bar"), ("baz", "quux")]
424 .iter()
425 .map(pair_to_string)
426 .collect()
427 )
428 );
429 assert_eq!(
430 Labels::parse(r#"foo="foo bar",baz="baz quux""#),
431 Labels(
432 [("foo", "foo bar"), ("baz", "baz quux")]
433 .iter()
434 .map(pair_to_string)
435 .collect()
436 )
437 );
438 assert_eq!(Labels::parse("==="), Labels(HashMap::new()),);
439 }
440
441 #[test]
442 fn test_golang_float() {
443 assert_eq!(parse_golang_float("1.0"), Ok(1.0f64));
444 assert_eq!(parse_golang_float("-1.0"), Ok(-1.0f64));
445 assert!(parse_golang_float("NaN").unwrap().is_nan());
446 assert_eq!(parse_golang_float("Inf"), Ok(std::f64::INFINITY));
447 assert_eq!(parse_golang_float("+Inf"), Ok(std::f64::INFINITY));
448 assert_eq!(parse_golang_float("-Inf"), Ok(std::f64::NEG_INFINITY));
449 }
450
451 #[test]
452 fn test_parse_samples() {
453 let scrape = r#"
454# HELP http_requests_total The total number of HTTP requests.
455# TYPE http_requests_total counter
456http_requests_total{method="post",code="200"} 1027 1395066363000
457http_requests_total{method="post",code="400"} 3 1395066363000
458
459# Escaping in label values:
460msdos_file_access_time_seconds{path="C:\\DIR\\FILE.TXT",error="Cannot find file:\n\"FILE.TXT\""} 1.458255915e9
461
462# Minimalistic line:
463metric_without_timestamp_and_labels 12.47
464
465# A weird metric from before the epoch:
466something_weird{problem="division by zero"} +Inf -3982045
467
468# A histogram, which has a pretty complex representation in the text format:
469# HELP http_request_duration_seconds A histogram of the request duration.
470# TYPE http_request_duration_seconds histogram
471http_request_duration_seconds_bucket{le="0.05"} 24054
472http_request_duration_seconds_bucket{le="0.1"} 33444
473http_request_duration_seconds_bucket{le="0.2"} 100392
474http_request_duration_seconds_bucket{le="0.5"} 129389
475http_request_duration_seconds_bucket{le="1"} 133988
476http_request_duration_seconds_bucket{le="+Inf"} 144320
477http_request_duration_seconds_sum 53423
478http_request_duration_seconds_count 144320
479
480# Finally a summary, which has a complex representation, too:
481# HELP rpc_duration_seconds A summary of the RPC duration in seconds.
482# TYPE rpc_duration_seconds summary
483rpc_duration_seconds{quantile="0.01"} 3102
484rpc_duration_seconds{quantile="0.05"} 3272
485rpc_duration_seconds{quantile="0.5"} 4773
486rpc_duration_seconds{quantile="0.9"} 9001
487rpc_duration_seconds{quantile="0.99"} 76656
488rpc_duration_seconds_sum 1.7560473e+07
489rpc_duration_seconds_count 2693
490"#;
491 let br = io::BufReader::new(scrape.as_bytes());
492 let s = Scrape::parse(br.lines()).unwrap();
493 assert_eq!(s.samples.len(), 11);
494
495 fn assert_match_sample<'a, F>(samples: &'a Vec<Sample>, f: F) -> &'a Sample
496 where
497 for<'r> F: FnMut(&'r &'a Sample) -> bool,
498 {
499 samples.iter().filter(f).next().as_ref().unwrap()
500 }
501 assert_eq!(
502 assert_match_sample(&s.samples, |s| s.metric == "http_requests_total"
503 && s.labels.get("code") == Some("200")),
504 &Sample {
505 metric: "http_requests_total".to_string(),
506 value: Value::Counter(1027f64),
507 labels: Labels(
508 [("method", "post"), ("code", "200")]
509 .iter()
510 .map(pair_to_string)
511 .collect()
512 ),
513 timestamp: Utc.timestamp_millis(1395066363000),
514 }
515 );
516 assert_eq!(
517 assert_match_sample(&s.samples, |s| s.metric == "http_requests_total"
518 && s.labels.get("code") == Some("400")),
519 &Sample {
520 metric: "http_requests_total".to_string(),
521 value: Value::Counter(3f64),
522 labels: Labels(
523 [("method", "post"), ("code", "400")]
524 .iter()
525 .map(pair_to_string)
526 .collect()
527 ),
528 timestamp: Utc.timestamp_millis(1395066363000),
529 }
530 );
531 }
532}