1#![doc(html_root_url = "https://docs.rs/grok/2.0.0")]
8
9include!(concat!(env!("OUT_DIR"), "/default_patterns.rs"));
10
11use onig::{Captures, Regex};
12use std::collections::btree_map::Iter as MapIter;
13use std::collections::{BTreeMap, HashMap};
14use std::error::Error as StdError;
15use std::fmt;
16
17const MAX_RECURSION: usize = 1024;
18
19const GROK_PATTERN: &str = r"%\{(?<name>(?<pattern>[A-z0-9]+)(?::(?<alias>[A-z0-9_:;\/\s\.]+))?)(?:=(?<definition>(?:(?:[^{}]+|\.+)+)+))?\}";
20const NAME_INDEX: usize = 1;
21const PATTERN_INDEX: usize = 2;
22const ALIAS_INDEX: usize = 3;
23const DEFINITION_INDEX: usize = 4;
24
25pub fn patterns<'a>() -> &'a [(&'a str, &'a str)] {
27 PATTERNS
28}
29
30#[derive(Debug)]
32pub struct Matches<'a> {
33 captures: Captures<'a>,
34 names: &'a BTreeMap<String, u32>,
35}
36
37impl<'a> Matches<'a> {
38 fn new(captures: Captures<'a>, names: &'a BTreeMap<String, u32>) -> Self {
40 Matches { captures, names }
41 }
42
43 pub fn get(&self, name_or_alias: &str) -> Option<&str> {
45 match self.names.get(name_or_alias) {
46 Some(found) => self.captures.at(*found as usize),
47 None => None,
48 }
49 }
50
51 pub fn len(&self) -> usize {
53 self.captures.len() - 1
54 }
55
56 pub fn is_empty(&self) -> bool {
58 self.len() == 0
59 }
60
61 pub fn iter(&'a self) -> MatchesIter<'a> {
65 MatchesIter {
66 captures: &self.captures,
67 names: self.names.iter(),
68 }
69 }
70}
71
72impl<'a> IntoIterator for &'a Matches<'a> {
73 type Item = (&'a str, &'a str);
74 type IntoIter = MatchesIter<'a>;
75
76 fn into_iter(self) -> Self::IntoIter {
77 self.iter()
78 }
79}
80
81pub struct MatchesIter<'a> {
83 captures: &'a Captures<'a>,
84 names: MapIter<'a, String, u32>,
85}
86
87impl<'a> Iterator for MatchesIter<'a> {
88 type Item = (&'a str, &'a str);
89
90 fn next(&mut self) -> Option<Self::Item> {
91 for (k, v) in self.names.by_ref() {
93 match self.captures.at(*v as usize) {
94 Some(value) => return Some((k.as_str(), value)),
95 None => {
96 continue;
97 }
98 }
99 }
100 None
101 }
102}
103
104#[derive(Debug)]
106pub struct Pattern {
107 regex: Regex,
108 names: BTreeMap<String, u32>,
109}
110
111impl Pattern {
112 fn new(regex: &str, alias: &HashMap<String, String>) -> Result<Self, Error> {
115 match Regex::new(regex) {
116 Ok(r) => Ok({
117 let mut names = BTreeMap::new();
118 r.foreach_name(|cap_name, cap_idx| {
119 let name = match alias.iter().find(|&(_k, v)| *v == cap_name) {
120 Some(item) => item.0.clone(),
121 None => String::from(cap_name),
122 };
123 names.insert(name, cap_idx[0]);
124 true
125 });
126 Pattern { regex: r, names }
127 }),
128 Err(_) => Err(Error::RegexCompilationFailed(regex.into())),
129 }
130 }
131
132 pub fn match_against<'a>(&'a self, text: &'a str) -> Option<Matches<'a>> {
134 self.regex
135 .captures(text)
136 .map(|cap| Matches::new(cap, &self.names))
137 }
138
139 pub fn capture_names(&self) -> impl Iterator<Item = &str> {
141 self.names.keys().map(|s| s.as_str())
142 }
143}
144
145#[derive(Debug)]
147pub struct Grok {
148 patterns: BTreeMap<String, String>,
149}
150
151impl Grok {
152 pub fn empty() -> Self {
154 Grok {
155 patterns: BTreeMap::new(),
156 }
157 }
158
159 pub fn with_default_patterns() -> Self {
161 let mut grok = Grok::empty();
162 for &(key, value) in PATTERNS {
163 grok.add_pattern(String::from(key), String::from(value));
164 }
165 grok
166 }
167
168 pub fn add_pattern<S: Into<String>>(&mut self, name: S, pattern: S) {
170 self.patterns.insert(name.into(), pattern.into());
171 }
172
173 pub fn compile(&mut self, pattern: &str, with_alias_only: bool) -> Result<Pattern, Error> {
175 let mut named_regex = String::from(pattern);
176 let mut alias: HashMap<String, String> = HashMap::new();
177
178 let mut index = 0;
179 let mut iteration_left = MAX_RECURSION;
180 let mut continue_iteration = true;
181
182 let grok_regex = match Regex::new(GROK_PATTERN) {
183 Ok(r) => r,
184 Err(_) => return Err(Error::RegexCompilationFailed(GROK_PATTERN.into())),
185 };
186
187 while continue_iteration {
188 continue_iteration = false;
189 if iteration_left == 0 {
190 return Err(Error::RecursionTooDeep);
191 }
192 iteration_left -= 1;
193
194 if let Some(m) = grok_regex.captures(&named_regex.clone()) {
195 continue_iteration = true;
196 let raw_pattern = match m.at(PATTERN_INDEX) {
197 Some(p) => p,
198 None => {
199 return Err(Error::GenericCompilationFailure(
200 "Could not find pattern in matches".into(),
201 ))
202 }
203 };
204
205 let mut name = match m.at(NAME_INDEX) {
206 Some(n) => String::from(n),
207 None => {
208 return Err(Error::GenericCompilationFailure(
209 "Could not find name in matches".into(),
210 ))
211 }
212 };
213
214 if let Some(definition) = m.at(DEFINITION_INDEX) {
215 self.add_pattern(raw_pattern, definition);
216 name = format!("{}={}", name, definition);
217 }
218
219 for _ in 0..named_regex.matches(&format!("%{{{}}}", name)).count() {
223 let pattern_definition = match self.patterns.get(raw_pattern) {
226 Some(d) => d,
227 None => return Err(Error::DefinitionNotFound(raw_pattern.into())),
228 };
229
230 let replacement = if with_alias_only && m.at(ALIAS_INDEX).is_none() {
236 format!("(?:{})", pattern_definition)
237 } else {
238 alias.insert(
242 match m.at(ALIAS_INDEX) {
243 Some(a) => a.into(),
244 None => name.clone(),
245 },
246 format!("name{}", index),
247 );
248
249 format!("(?<name{}>{})", index, pattern_definition)
250 };
251
252 named_regex = named_regex.replacen(&format!("%{{{}}}", name), &replacement, 1);
256
257 index += 1;
258 }
259 }
260 }
261
262 if named_regex.is_empty() {
263 Err(Error::CompiledPatternIsEmpty(pattern.into()))
264 } else {
265 Pattern::new(&named_regex, &alias)
266 }
267 }
268}
269
270impl Default for Grok {
272 fn default() -> Grok {
273 Grok::with_default_patterns()
274 }
275}
276
277impl<S: Into<String>> FromIterator<(S, S)> for Grok {
285 fn from_iter<I: IntoIterator<Item = (S, S)>>(iter: I) -> Self {
286 let mut grok = Grok::empty();
287 for (k, v) in iter {
288 grok.add_pattern(k, v);
289 }
290 grok
291 }
292}
293
294impl<S: Into<String>, const N: usize> From<[(S, S); N]> for Grok {
301 fn from(arr: [(S, S); N]) -> Self {
302 Self::from_iter(arr)
303 }
304}
305
306#[derive(Clone, Debug, PartialEq)]
308#[non_exhaustive]
309pub enum Error {
310 RecursionTooDeep,
312 CompiledPatternIsEmpty(String),
314 DefinitionNotFound(String),
316 RegexCompilationFailed(String),
318 GenericCompilationFailure(String),
320}
321
322impl StdError for Error {
323 fn description(&self) -> &str {
324 match *self {
325 Error::RecursionTooDeep => "compilation recursion reached the limit",
326 Error::CompiledPatternIsEmpty(_) => "compiled pattern is empty",
327 Error::DefinitionNotFound(_) => "pattern definition not found while compiling",
328 Error::RegexCompilationFailed(_) => "regex compilation in the engine failed",
329 Error::GenericCompilationFailure(_) => {
330 "something happened during the compilation phase"
331 }
332 }
333 }
334
335 fn cause(&self) -> Option<&dyn StdError> {
336 None
337 }
338}
339
340impl fmt::Display for Error {
341 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
342 match *self {
343 Error::RecursionTooDeep => write!(
344 f,
345 "Recursion while compiling reached the limit of {}",
346 MAX_RECURSION
347 ),
348 Error::CompiledPatternIsEmpty(ref p) => write!(
349 f,
350 "The given pattern \"{}\" ended up compiling into an empty regex",
351 p
352 ),
353 Error::DefinitionNotFound(ref d) => write!(
354 f,
355 "The given pattern definition name \"{}\" could not be found in the definition map",
356 d
357 ),
358 Error::RegexCompilationFailed(ref r) => write!(
359 f,
360 "The given regex \"{}\" failed compilation in the underlying engine",
361 r
362 ),
363 Error::GenericCompilationFailure(ref d) => write!(
364 f,
365 "Something unexpected happened during the compilation phase: \"{}\"",
366 d
367 ),
368 }
369 }
370}
371
372#[cfg(test)]
373mod tests {
374
375 use super::*;
376
377 #[test]
378 fn test_simple_anonymous_pattern() {
379 let mut grok = Grok::empty();
380 grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+");
381 let pattern = grok
382 .compile("%{USERNAME}", false)
383 .expect("Error while compiling!");
384
385 let matches = pattern.match_against("root").expect("No matches found!");
386 assert_eq!("root", matches.get("USERNAME").unwrap());
387 assert_eq!(1, matches.len());
388 let matches = pattern
389 .match_against("john doe")
390 .expect("No matches found!");
391 assert_eq!("john", matches.get("USERNAME").unwrap());
392 assert_eq!(1, matches.len());
393 }
394
395 #[test]
396 fn test_from_iter() {
397 let patterns = [("USERNAME", r"[a-zA-Z0-9._-]+")];
398 let mut grok = Grok::from_iter(patterns.into_iter());
399 let pattern = grok
400 .compile("%{USERNAME}", false)
401 .expect("Error while compiling!");
402
403 let matches = pattern.match_against("root").expect("No matches found!");
404 assert_eq!("root", matches.get("USERNAME").unwrap());
405 assert_eq!(1, matches.len());
406 let matches = pattern
407 .match_against("john doe")
408 .expect("No matches found!");
409 assert_eq!("john", matches.get("USERNAME").unwrap());
410 assert_eq!(1, matches.len());
411 }
412
413 #[test]
414 fn test_from() {
415 let mut grok = Grok::from([("USERNAME", r"[a-zA-Z0-9._-]+")]);
416 let pattern = grok
417 .compile("%{USERNAME}", false)
418 .expect("Error while compiling!");
419
420 let matches = pattern.match_against("root").expect("No matches found!");
421 assert_eq!("root", matches.get("USERNAME").unwrap());
422 assert_eq!(1, matches.len());
423 let matches = pattern
424 .match_against("john doe")
425 .expect("No matches found!");
426 assert_eq!("john", matches.get("USERNAME").unwrap());
427 assert_eq!(1, matches.len());
428 }
429
430 #[test]
431 fn test_simple_named_pattern() {
432 let mut grok = Grok::empty();
433 grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+");
434 let pattern = grok
435 .compile("%{USERNAME:usr}", false)
436 .expect("Error while compiling!");
437
438 let matches = pattern.match_against("root").expect("No matches found!");
439 assert_eq!("root", matches.get("usr").unwrap());
440 assert_eq!(1, matches.len());
441 let matches = pattern
442 .match_against("john doe")
443 .expect("No matches found!");
444 assert_eq!("john", matches.get("usr").unwrap());
445 assert_eq!(1, matches.len());
446 }
447
448 #[test]
449 fn test_alias_anonymous_pattern() {
450 let mut grok = Grok::empty();
451 grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+");
452 grok.add_pattern("USER", r"%{USERNAME}");
453 let pattern = grok
454 .compile("%{USER}", false)
455 .expect("Error while compiling!");
456
457 let matches = pattern.match_against("root").expect("No matches found!");
458 assert_eq!("root", matches.get("USER").unwrap());
459 let matches = pattern
460 .match_against("john doe")
461 .expect("No matches found!");
462 assert_eq!("john", matches.get("USER").unwrap());
463 }
464
465 #[test]
466 fn test_ailas_named_pattern() {
467 let mut grok = Grok::empty();
468 grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+");
469 grok.add_pattern("USER", r"%{USERNAME}");
470 let pattern = grok
471 .compile("%{USER:usr}", false)
472 .expect("Error while compiling!");
473
474 let matches = pattern.match_against("root").expect("No matches found!");
475 assert_eq!("root", matches.get("usr").unwrap());
476 let matches = pattern
477 .match_against("john doe")
478 .expect("No matches found!");
479 assert_eq!("john", matches.get("usr").unwrap());
480 }
481
482 #[test]
483 fn test_composite_or_pattern() {
484 let mut grok = Grok::empty();
485 grok.add_pattern("MAC", r"(?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})");
486 grok.add_pattern("CISCOMAC", r"(?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})");
487 grok.add_pattern("WINDOWSMAC", r"(?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})");
488 grok.add_pattern("COMMONMAC", r"(?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})");
489 let pattern = grok
490 .compile("%{MAC}", false)
491 .expect("Error while compiling!");
492
493 let matches = pattern
494 .match_against("5E:FF:56:A2:AF:15")
495 .expect("No matches found!");
496 assert_eq!("5E:FF:56:A2:AF:15", matches.get("MAC").unwrap());
497 assert_eq!(4, matches.len());
498 let matches = pattern
499 .match_against("hello! 5E:FF:56:A2:AF:15 what?")
500 .expect("No matches found!");
501 assert_eq!("5E:FF:56:A2:AF:15", matches.get("MAC").unwrap());
502 assert_eq!(true, pattern.match_against("5E:FF").is_none());
503 }
504
505 #[test]
506 fn test_multiple_patterns() {
507 let mut grok = Grok::empty();
508 grok.add_pattern("YEAR", r"(\d\d){1,2}");
509 grok.add_pattern("MONTH", r"\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b");
510 grok.add_pattern("DAY", r"(?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)");
511 let pattern = grok
512 .compile("%{DAY} %{MONTH} %{YEAR}", false)
513 .expect("Error while compiling!");
514
515 let matches = pattern
516 .match_against("Monday March 2012")
517 .expect("No matches found!");
518 assert_eq!("Monday", matches.get("DAY").unwrap());
519 assert_eq!("March", matches.get("MONTH").unwrap());
520 assert_eq!("2012", matches.get("YEAR").unwrap());
521 assert_eq!(None, matches.get("unknown"));
522 }
523
524 #[test]
525 fn test_with_alias_only() {
526 let mut grok = Grok::empty();
527 grok.add_pattern("MAC", r"(?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})");
528 grok.add_pattern("CISCOMAC", r"(?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})");
529 grok.add_pattern("WINDOWSMAC", r"(?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})");
530 grok.add_pattern("COMMONMAC", r"(?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})");
531 let pattern = grok
532 .compile("%{MAC:macaddr}", true)
533 .expect("Error while compiling!");
534
535 let matches = pattern
536 .match_against("5E:FF:56:A2:AF:15")
537 .expect("No matches found!");
538 assert_eq!("5E:FF:56:A2:AF:15", matches.get("macaddr").unwrap());
539 assert_eq!(1, matches.len());
540 let matches = pattern
541 .match_against("hello! 5E:FF:56:A2:AF:15 what?")
542 .expect("No matches found!");
543 assert_eq!("5E:FF:56:A2:AF:15", matches.get("macaddr").unwrap());
544 assert_eq!(true, pattern.match_against("5E:FF").is_none());
545 }
546
547 #[test]
548 fn test_match_iterator() {
549 let mut grok = Grok::empty();
550 grok.add_pattern("YEAR", r"(\d\d){1,2}");
551 grok.add_pattern("MONTH", r"\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b");
552 grok.add_pattern("DAY", r"(?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)");
553 grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+");
554 grok.add_pattern("SPACE", r"\s*");
555
556 let pattern = grok
557 .compile(
558 "%{DAY:day} %{MONTH:month} %{YEAR:year}%{SPACE}%{USERNAME:user}?",
559 true,
560 )
561 .expect("Error while compiling!");
562 let matches = pattern
563 .match_against("Monday March 2012")
564 .expect("No matches found!");
565 let mut found = 0;
566 for (k, v) in matches.iter() {
567 match k {
568 "day" => assert_eq!("Monday", v),
569 "month" => assert_eq!("March", v),
570 "year" => assert_eq!("2012", v),
571 e => panic!("{:?}", e),
572 }
573 found += 1;
574 }
575 assert_eq!(3, found);
576 }
577
578 #[test]
579 fn test_matches_into_iter() {
580 let mut grok = Grok::empty();
581 grok.add_pattern("YEAR", r"(\d\d){1,2}");
582 grok.add_pattern("MONTH", r"\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b");
583 grok.add_pattern("DAY", r"(?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)");
584 grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+");
585 grok.add_pattern("SPACE", r"\s*");
586
587 let pattern = grok
588 .compile(
589 "%{DAY:day} %{MONTH:month} %{YEAR:year}%{SPACE}%{USERNAME:user}?",
590 true,
591 )
592 .expect("Error while compiling!");
593 let matches = pattern
594 .match_against("Monday March 2012")
595 .expect("No matches found!");
596 let mut found = 0;
597 for (k, v) in &matches {
598 match k {
599 "day" => assert_eq!("Monday", v),
600 "month" => assert_eq!("March", v),
601 "year" => assert_eq!("2012", v),
602 e => panic!("{:?}", e),
603 }
604 found += 1;
605 }
606 assert_eq!(3, found);
607 }
608
609 #[test]
610 fn test_loaded_default_patterns() {
611 let mut grok = Grok::with_default_patterns();
612 let pattern = grok
613 .compile("%{DAY} %{MONTH} %{YEAR}", false)
614 .expect("Error while compiling!");
615
616 let matches = pattern
617 .match_against("Monday March 2012")
618 .expect("No matches found!");
619 assert_eq!("Monday", matches.get("DAY").unwrap());
620 assert_eq!("March", matches.get("MONTH").unwrap());
621 assert_eq!("2012", matches.get("YEAR").unwrap());
622 assert_eq!(None, matches.get("unknown"));
623 }
624
625 #[test]
626 fn test_compilation_of_all_default_patterns() {
627 let mut grok = Grok::default();
628 let mut num_checked = 0;
629 for &(key, _) in PATTERNS {
630 let pattern = format!("%{{{}}}", key);
631 grok.compile(&pattern, false).expect(&format!(
632 "Pattern {} key {} failed to compile!",
633 pattern, key
634 ));
635 num_checked += 1;
636 }
637 assert!(num_checked > 0);
638 }
639
640 #[test]
641 fn test_adhoc_pattern() {
642 let mut grok = Grok::default();
643 let pattern = grok
644 .compile(r"\[(?<threadname>[^\]]+)\]", false)
645 .expect("Error while compiling!");
646
647 let matches = pattern
648 .match_against("[thread1]")
649 .expect("No matches found!");
650 assert_eq!("thread1", matches.get("threadname").unwrap());
651 }
652
653 #[test]
654 fn test_adhoc_pattern_in_iter() {
655 let mut grok = Grok::default();
656 let pattern = grok
657 .compile(r"\[(?<threadname>[^\]]+)\]", false)
658 .expect("Error while compiling!");
659
660 let matches = pattern
661 .match_against("[thread1]")
662 .expect("No matches found!");
663 let mut found = 0;
664 for (k, v) in matches.iter() {
665 assert_eq!("threadname", k);
666 assert_eq!("thread1", v);
667 found += 1;
668 }
669 assert_eq!(1, found);
670 }
671
672 #[test]
673 fn test_capture_names() {
674 let mut grok = Grok::empty();
675 grok.add_pattern("YEAR", r"(\d\d){1,2}");
676 grok.add_pattern("MONTH", r"\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b");
677 grok.add_pattern("DAY", r"(?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)");
678 grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+");
679 grok.add_pattern("SPACE", r"\s*");
680
681 let pattern = grok
682 .compile("%{YEAR}%{SPACE}%{USERNAME:user}?", false)
683 .expect("Error while compiling!");
684
685 let expected = vec!["SPACE", "YEAR", "user"];
686 let actual = pattern.capture_names().collect::<Vec<_>>();
687 assert_eq!(expected, actual);
688 }
689}