1use std::{
33 collections::HashMap,
34 fs::File,
35 io::{BufRead, BufReader},
36};
37
38use regex::Regex;
39
40const MAX_RECURSION: i32 = 1024;
41
42const NAME_INDEX: usize = 1;
43const PATTERN_INDEX: usize = 2;
44const ALIAS_INDEX: usize = 3;
45const TYPE_INDEX: usize = 4;
46
47const GROK_PATTERN: &str = r"(?x)
48%\{
49 (?<name>
50 (?<pattern>[[:word:]]+)
51 (?:
52 :(?<alias>[[[:word:]]@.-]+)
53 (?:
54 :(?<type>int|long|float|double|bool(?:ean)?)
55 )?
56 )?
57 )
58\}";
59
60fn load_patterns() -> HashMap<String, String> {
61 let mut patterns = HashMap::new();
62
63 for line in glob::glob("src/patterns/*")
64 .unwrap()
65 .map(|e| File::open(e.unwrap()).unwrap())
66 .flat_map(|f| BufReader::new(f).lines())
67 .map(|line| line.unwrap())
68 .filter(|line| !line.starts_with('#') && !line.is_empty())
69 {
70 let (key, value) = line.split_at(line.find(' ').unwrap());
71 patterns.insert(key.to_string(), value.trim().to_string());
72 }
73
74 patterns.insert("BOOL".into(), "true|false".into());
75
76 patterns
77}
78
79lazy_static::lazy_static! {
80 static ref GROK_REGEX: Regex = Regex::new(GROK_PATTERN).unwrap();
81 static ref DEFAULT_PATTERNS: HashMap<String, String> = load_patterns();
82}
83
84#[derive(Debug, Clone, PartialEq)]
85pub enum Value {
86 Int(i64),
87 Float(f64),
88 Bool(bool),
89 String(String),
90}
91
92type AliasType = (String, Option<String>);
93
94#[derive(Debug)]
95pub struct Pattern {
96 regex: Regex,
97 alias: HashMap<String, AliasType>,
98}
99
100impl Pattern {
101 fn new(regex: Regex, alias: HashMap<String, AliasType>) -> Self {
102 Self { regex, alias }
103 }
104
105 pub fn parse(&self, s: &str) -> Result<HashMap<String, Value>, String> {
123 let mut map = HashMap::new();
124 let names = self.regex.capture_names().flatten().collect::<Vec<_>>();
125
126 let caps = match self.regex.captures(s) {
127 Some(caps) => caps,
128 None => return Ok(map),
129 };
130
131 for name in names {
132 if let Some(m) = caps.name(name) {
133 let value = m.as_str().to_string();
134 match self.alias.get(name) {
135 Some((alias, type_)) => {
136 let value = match type_ {
137 Some(type_) if type_.eq("int") || type_.eq("long") => Value::Int(
138 value.parse::<i64>().map_err(|e| format!("{e}: {value}"))?,
139 ),
140 Some(type_) if type_.eq("float") || type_.eq("double") => Value::Float(
141 value.parse::<f64>().map_err(|e| format!("{e}: {value}"))?,
142 ),
143 Some(type_) if type_.eq("bool") || type_.eq("boolean") => Value::Bool(
144 value.parse::<bool>().map_err(|e| format!("{e}: {value}"))?,
145 ),
146 _ => Value::String(value),
147 };
148 map.insert(alias.clone(), value);
149 }
150 None => {
151 map.insert(name.to_string(), Value::String(value));
152 }
153 }
154 }
155 }
156
157 Ok(map)
158 }
159}
160
161#[derive(Default, Debug)]
162pub struct Grok {
163 patterns: HashMap<String, String>,
164}
165
166impl Grok {
167 pub fn add_pattern<T: Into<String>>(&mut self, name: T, pattern: T) {
176 self.patterns.insert(name.into(), pattern.into());
177 }
178
179 pub fn compile(&self, s: &str, named_capture_only: bool) -> Result<Pattern, String> {
195 let mut alias_map = HashMap::new();
196 let mut haystack = s.to_string();
197 let mut index = 0;
198 let mut iter_left = MAX_RECURSION;
199
200 while let Some(caps) = GROK_REGEX.captures(haystack.clone().as_str()) {
201 if iter_left <= 0 {
202 return Err(format!("max recursion {MAX_RECURSION} reached"));
203 }
204 iter_left -= 1;
205
206 let name = caps.get(NAME_INDEX).ok_or("name not found")?.as_str();
207 let pattern = caps.get(PATTERN_INDEX).ok_or("pattern not found")?.as_str();
208
209 let pattern_regex = self
210 .patterns
211 .get(pattern)
212 .or(DEFAULT_PATTERNS.get(pattern))
213 .ok_or(format!("pattern: {pattern} not found"))?;
214
215 let to_replace = format!("%{{{name}}}");
216
217 while haystack.matches(&to_replace).count() > 0 {
218 let replacement = match caps.get(ALIAS_INDEX) {
219 None if named_capture_only => {
220 format!("(?:{pattern_regex})")
221 }
222 _ => {
223 let new_name = format!("name{index}");
224 let origin_alias =
225 caps.get(ALIAS_INDEX).map(|m| m.as_str()).unwrap_or(pattern);
226 let type_ = caps.get(TYPE_INDEX).map(|m| m.as_str().to_string());
227 alias_map.insert(new_name.clone(), (origin_alias.to_string(), type_));
228 format!("(?<{new_name}>{pattern_regex})")
229 }
230 };
231
232 haystack = haystack.replacen(&to_replace, &replacement, 1);
233 index += 1;
234 }
235 }
236
237 let re = Regex::new(haystack.as_str()).map_err(|e| e.to_string())?;
238 Ok(Pattern::new(re, alias_map))
239 }
240}
241
242impl<T: Into<String>> FromIterator<(T, T)> for Grok {
243 fn from_iter<I: IntoIterator<Item = (T, T)>>(iter: I) -> Self {
244 let mut grok = Grok::default();
245 for (k, v) in iter {
246 grok.add_pattern(k, v);
247 }
248 grok
249 }
250}
251
252impl<S: Into<String>, const N: usize> From<[(S, S); N]> for Grok {
253 fn from(arr: [(S, S); N]) -> Self {
254 Self::from_iter(arr)
255 }
256}
257
258#[cfg(test)]
259mod tests {
260 use super::*;
261
262 struct Case<'a> {
263 patterns: Vec<(&'a str, &'a str)>,
264 pattern: &'a str,
265 input: &'a str,
266 expected: HashMap<String, Value>,
267 named_capture_only: bool,
268 }
269
270 fn assert(c: Case<'_>) {
271 let grok = Grok::from_iter(c.patterns);
272 let pattern = grok.compile(c.pattern, c.named_capture_only).unwrap();
273 assert_eq!(c.expected, pattern.parse(c.input).unwrap());
274 }
275
276 fn asserts(cases: Vec<Case<'_>>) {
277 for c in cases {
278 assert(c);
279 }
280 }
281
282 #[test]
283 fn test_simple_add_pattern() {
284 let mut grok = Grok::default();
285 grok.add_pattern("NAME", r"[A-z0-9._-]+");
286 let pattern = grok.compile("%{NAME}", false).unwrap();
287 let expected: HashMap<String, Value> = [("NAME", "admin")]
288 .into_iter()
289 .map(|(k, v)| (k.to_string(), Value::String(v.to_string())))
290 .collect();
291
292 assert_eq!(expected, pattern.parse("admin").unwrap());
293 assert_eq!(expected, pattern.parse("admin user").unwrap());
294 }
295
296 #[test]
297 fn test_named_capture_only() {
298 let grok = Grok::default();
299 let pattern = grok
300 .compile("%{USERNAME} %{EMAILADDRESS:email}", true)
302 .unwrap();
303
304 let expected = [("email", "admin@example.com")]
305 .into_iter()
306 .map(|(k, v)| (k.to_string(), Value::String(v.to_string())))
307 .collect::<HashMap<String, Value>>();
308
309 assert_eq!(expected, pattern.parse("admin admin@example.com").unwrap());
310 }
311
312 #[test]
313 fn test_from() {
314 let expected = [("NAME", "admin")]
315 .into_iter()
316 .map(|(k, v)| (k.to_string(), Value::String(v.to_string())))
317 .collect::<HashMap<String, Value>>();
318
319 {
320 let grok = Grok::from_iter([("NAME", r"[A-z0-9._-]+")]);
321 let pattern = grok.compile("%{NAME}", false).unwrap();
322 assert_eq!(expected, pattern.parse("admin").unwrap());
323 }
324 {
325 let grok = Grok::from([("NAME", r"[A-z0-9._-]+")]);
326 let pattern = grok.compile("%{NAME}", false).unwrap();
327 assert_eq!(expected, pattern.parse("admin").unwrap());
328 }
329 }
330
331 #[test]
332 fn test_pattern_parse_no_captures() {
333 let grok = Grok::default();
334 let pattern = grok.compile("%{USERNAME}", false).unwrap();
335
336 assert!(pattern.parse("$#@").unwrap().is_empty());
337 assert!(pattern.parse("").unwrap().is_empty());
338 assert!(pattern.parse("โ
๐๐").unwrap().is_empty());
339 assert!(pattern.parse(" ").unwrap().is_empty());
340 }
341
342 #[test]
343 fn test_composite_or_pattern() {
344 let mut grok = Grok::default();
345 grok.add_pattern("MAC", r"(?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})");
346 grok.add_pattern("CISCOMAC", r"(?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})");
347 grok.add_pattern("WINDOWSMAC", r"(?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})");
348 grok.add_pattern("COMMONMAC", r"(?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})");
349
350 let pattern = grok.compile("%{MAC}", false).unwrap();
351 let expected = [
352 ("MAC", "5E:FF:56:A2:AF:15"),
353 ("COMMONMAC", "5E:FF:56:A2:AF:15"),
354 ]
355 .into_iter()
356 .map(|(k, v)| (k.to_string(), Value::String(v.to_string())))
357 .collect::<HashMap<String, Value>>();
358
359 assert_eq!(expected, pattern.parse("5E:FF:56:A2:AF:15").unwrap());
360 assert_eq!(
361 expected,
362 pattern.parse("127.0.0.1 5E:FF:56:A2:AF:15").unwrap()
363 );
364 }
365
366 #[test]
367 fn test_multiple_patterns() {
368 let mut grok = Grok::default();
369 grok.add_pattern("YEAR", r"(\d\d){1,2}");
370 grok.add_pattern("MONTH", r"\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b");
371 grok.add_pattern("DAY", r"(?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)");
372 let pattern = grok.compile("%{DAY} %{MONTH} %{YEAR}", false).unwrap();
373
374 let expected = [("DAY", "Monday"), ("MONTH", "March"), ("YEAR", "2012")]
375 .into_iter()
376 .map(|(k, v)| (k.to_string(), Value::String(v.to_string())))
377 .collect::<HashMap<String, Value>>();
378 assert_eq!(expected, pattern.parse("Monday March 2012").unwrap());
379 }
380
381 #[test]
382 fn test_adhoc_pattern() {
383 let grok = Grok::default();
384 let pattern = grok.compile(r"\[(?<threadname>[^\]]+)\]", false).unwrap();
385 let expected = [("threadname", "thread1")]
386 .into_iter()
387 .map(|(k, v)| (k.to_string(), Value::String(v.to_string())))
388 .collect::<HashMap<String, Value>>();
389 assert_eq!(expected, pattern.parse("[thread1]").unwrap());
390 }
391
392 #[test]
393 fn test_type() {
394 let mut grok = Grok::default();
395 grok.add_pattern("NUMBER", r"\d+");
396
397 {
399 let pattern = grok.compile("%{NUMBER:digit:int}", false).unwrap();
400 let expected = [("digit", Value::Int(123))]
401 .into_iter()
402 .map(|(k, v)| (k.to_string(), v))
403 .collect::<HashMap<String, Value>>();
404 assert_eq!(expected, pattern.parse("hello 123").unwrap());
405 }
406
407 {
409 let pattern = grok.compile("%{NUMBER:digit:float}", false).unwrap();
410 let expected = [("digit", Value::Float(123.0))]
411 .into_iter()
412 .map(|(k, v)| (k.to_string(), v))
413 .collect::<HashMap<String, Value>>();
414 assert_eq!(expected, pattern.parse("hello 123.0").unwrap());
415 }
416
417 {
419 let pattern = grok.compile("%{NUMBER:digit:wrong}", false);
420 assert!(pattern.is_err());
421 }
422
423 {
424 let pattern = grok.compile("%{USERNAME:digit:float}", false).unwrap();
426 assert_eq!(
427 Err("invalid float literal: grok".to_string()),
428 pattern.parse("grok")
429 );
430 }
431 }
432
433 #[test]
434 fn test_more_patterns() {
435 let cases: Vec<Case> = [(
436 vec![
437 (
438 "NGINX_HOST",
439 r#"(?:%{IP:destination.ip}|%{NGINX_NOTSEPARATOR:destination.domain})(:%{NUMBER:destination.port})?"#,
440 ),
441 ("IP", r#"(?:\[%{IPV6}\]|%{IPV6}|%{IPV4})"#),
442 ("NGINX_NOTSEPARATOR", r#"[^\t ,:]+"#),
443 ("NUMBER", r#"\d+"#),
444 (
445 "IPV6",
446 r#"((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?"#,
447 ),
448 (
449 "IPV4",
450 r#"\b(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])\.(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])\.(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])\.(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])\b"#,
451 ),
452 ],
453 "%{NGINX_HOST}",
454 "127.0.0.1:1234",
455 vec![
456 ("destination.ip", Value::String("127.0.0.1".to_string())),
457 ("destination.port", Value::String("1234".to_string())),
458 ],
459 true,
460 ),
461 (
462 vec![
463 (
464 "NGINX_HOST",
465 r#"(?:%{IP:destination.ip}|%{NGINX_NOTSEPARATOR:destination.domain})(:%{NUMBER:destination.port})?"#,
466 ),
467 ("IP", r#"(?:\[%{IPV6}\]|%{IPV6}|%{IPV4})"#),
468 ("NGINX_NOTSEPARATOR", r#"[^\t ,:]+"#),
469 ("NUMBER", r#"\d+"#),
470 (
471 "IPV6",
472 r#"((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?"#,
473 ),
474 (
475 "IPV4",
476 r#"\b(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])\.(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])\.(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])\.(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])\b"#,
477 ),
478 ],
479 "%{NGINX_HOST}",
480 "127.0.0.1:1234",
481 vec![
482 ("destination.ip", Value::String("127.0.0.1".to_string())),
483 ("destination.port", Value::String("1234".to_string())),
484 ("NGINX_HOST", Value::String("127.0.0.1:1234".to_string())),
485 ("IPV4", Value::String("127.0.0.1".to_string())),
486 ],
487 false,
488 )
489 ].into_iter().map(|(patterns, pattern, input, expected, named_capture_only)| Case {
490 patterns: patterns.into_iter().collect(),
491 pattern,
492 input,
493 expected: expected.into_iter().map(|(k, v)| (k.to_string(), v)).collect(),
494 named_capture_only,
495 }).collect();
496
497 asserts(cases);
498 }
499
500 #[test]
501 fn test_default_patterns() {
502 let cases: Vec<Case> = [
503 (
504 vec![
505 ("NGINX_HOST", r"(?:%{IP:destination.ip}|%{NGINX_NOTSEPARATOR:destination.domain})(:%{NUMBER:destination.port})?"),
506 ("NGINX_NOTSEPARATOR", r#"[^\t ,:]+"#),
507 ],
508 "%{NGINX_HOST}",
509 "127.0.0.1:1234",
510 vec![
511 ("destination.ip", Value::String("127.0.0.1".to_string())),
512 ("destination.port", Value::String("1234".to_string())),
513 ],
514 true,
515 ),
516 (
517 vec![
518 ("NGINX_HOST", r"(?:%{IP:destination.ip}|%{NGINX_NOTSEPARATOR:destination.domain})(:%{NUMBER:destination.port})?"),
519 ("NGINX_NOTSEPARATOR", r#"[^\t ,:]+"#),
520 ],
521 "%{NGINX_HOST}",
522 "127.0.0.1:1234",
523 vec![
524 ("destination.ip", Value::String("127.0.0.1".to_string())),
525 ("destination.port", Value::String("1234".to_string())),
526 ("BASE10NUM", Value::String("1234".to_string())),
527 ("NGINX_HOST", Value::String("127.0.0.1:1234".to_string())),
528 ("IPV4", Value::String("127.0.0.1".to_string())),
529 ],
530 false,
531 ),
532 ]
533 .into_iter()
534 .map(
535 |(patterns, pattern, input, expected, named_capture_only)| Case {
536 patterns: patterns.into_iter().collect(),
537 pattern,
538 input,
539 expected: expected
540 .into_iter()
541 .map(|(k, v)| (k.to_string(), v))
542 .collect(),
543 named_capture_only,
544 },
545 )
546 .collect();
547
548 asserts(cases);
549 }
550
551 #[test]
552 fn test_default_patterns_with_type() {
553 let cases: Vec<Case> = [
554 (
555 vec![
556 ("NGINX_HOST", r"(?:%{IP:destination.ip}|%{NGINX_NOTSEPARATOR:destination.domain})(:%{NUMBER:destination.port})?"),
557 ("NGINX_NOTSEPARATOR", r#"[^\t ,:]+"#),
558 ],
559 "%{NGINX_HOST}",
560 "127.0.0.1:1234",
561 vec![
562 ("destination.ip", Value::String("127.0.0.1".to_string())),
563 ("destination.port", Value::String("1234".to_string())),
564 ("BASE10NUM", Value::String("1234".to_string())),
565 ("NGINX_HOST", Value::String("127.0.0.1:1234".to_string())),
566 ("IPV4", Value::String("127.0.0.1".to_string())),
567 ],
568 false,
569 ),
570 (
571 vec![
572 ("NGINX_HOST", r#"(?:%{IP:destination.ip}|%{NGINX_NOTSEPARATOR:destination.domain})(:%{NUMBER:destination.port:int})?"#),
573 ("NGINX_NOTSEPARATOR", r#"[^\t ,:]+"#),
574 ("BOOL", r#"true|false"#),
575 ],
576 "%{NGINX_HOST} %{BOOL:destination.boolean:boolean}",
577 "127.0.0.1:1234 true",
578 vec![
579 ("destination.ip", Value::String("127.0.0.1".to_string())),
580 ("destination.port", Value::Int(1234)),
581 ("destination.boolean", Value::Bool(true)),
582 ],
583 true,
584 ),
585 ]
586 .into_iter()
587 .map(
588 |(patterns, pattern, input, expected, named_capture_only)| Case {
589 patterns: patterns.into_iter().collect(),
590 pattern,
591 input,
592 expected: expected
593 .into_iter()
594 .map(|(k, v)| (k.to_string(), v))
595 .collect(),
596 named_capture_only,
597 },
598 )
599 .collect();
600
601 asserts(cases);
602 }
603
604 #[test]
605 fn test_more_default_patterns() {
606 let cases = [
607 ("WORD", vec!["hello", "world123", "test_data"]),
608 ("NOTSPACE", vec!["example", "text-with-dashes", "12345"]),
609 ("SPACE", vec![" ", "\t", " "]),
610 ("INT", vec!["123", "-456", "+789"]),
612 ("NUMBER", vec!["123", "456.789", "-0.123"]),
613 ("BOOL", vec!["true", "false", "true"]),
614 ("BASE10NUM", vec!["123", "-123.456", "0.789"]),
615 ("BASE16NUM", vec!["1a2b", "0x1A2B", "-0x1a2b3c"]),
616 ("BASE16FLOAT", vec!["0x1.a2b3", "-0x1A2B3C.D", "0x123.abc"]),
617 ("POSINT", vec!["123", "456", "789"]),
618 ("NONNEGINT", vec!["0", "123", "456"]),
619 (
620 "GREEDYDATA",
621 vec!["anything goes", "literally anything", "123 #@!"],
622 ),
623 (
624 "QUOTEDSTRING",
625 vec!["\"This is a quote\"", "'single quoted'"],
626 ),
627 (
628 "UUID",
629 vec![
630 "123e4567-e89b-12d3-a456-426614174000",
631 "123e4567-e89b-12d3-a456-426614174001",
632 "123e4567-e89b-12d3-a456-426614174002",
633 ],
634 ),
635 (
636 "URN",
637 vec![
638 "urn:isbn:0451450523",
639 "urn:ietf:rfc:2648",
640 "urn:mpeg:mpeg7:schema:2001",
641 ],
642 ),
643 (
645 "IP",
646 vec![
647 "192.168.1.1",
648 "2001:0db8:85a3:0000:0000:8a2e:0370:7334",
649 "172.16.254.1",
650 ],
651 ),
652 (
653 "IPV6",
654 vec![
655 "2001:0db8:85a3:0000:0000:8a2e:0370:7334",
656 "::1",
657 "fe80::1ff:fe23:4567:890a",
658 ],
659 ),
660 ("IPV4", vec!["192.168.1.1", "10.0.0.1", "172.16.254.1"]),
661 (
662 "IPORHOST",
663 vec!["example.com", "192.168.1.1", "fe80::1ff:fe23:4567:890a"],
664 ),
665 (
666 "HOSTNAME",
667 vec!["example.com", "sub.domain.co.uk", "localhost"],
668 ),
669 ("EMAILLOCALPART", vec!["john.doe", "alice123", "bob-smith"]),
670 (
671 "EMAILADDRESS",
672 vec![
673 "john.doe@example.com",
674 "alice123@domain.co.uk",
675 "bob-smith@localhost",
676 ],
677 ),
678 ("USERNAME", vec!["user1", "john.doe", "alice_123"]),
679 ("USER", vec!["user1", "john.doe", "alice_123"]),
680 (
681 "MAC",
682 vec!["00:1A:2B:3C:4D:5E", "001A.2B3C.4D5E", "00-1A-2B-3C-4D-5E"],
683 ),
684 (
685 "CISCOMAC",
686 vec!["001A.2B3C.4D5E", "001B.2C3D.4E5F", "001C.2D3E.4F5A"],
687 ),
688 (
689 "WINDOWSMAC",
690 vec![
691 "00-1A-2B-3C-4D-5E",
692 "00-1B-2C-3D-4E-5F",
693 "00-1C-2D-3E-4F-5A",
694 ],
695 ),
696 (
697 "COMMONMAC",
698 vec![
699 "00:1A:2B:3C:4D:5E",
700 "00:1B:2C:3D:4E:5F",
701 "00:1C:2D:3E:4F:5A",
702 ],
703 ),
704 ("HOSTPORT", vec!["example.com:80", "192.168.1.1:8080"]),
705 (
707 "UNIXPATH",
708 vec!["/home/user", "/var/log/syslog", "/tmp/abc_123"],
709 ),
710 ("TTY", vec!["/dev/pts/1", "/dev/tty0", "/dev/ttyS0"]),
711 (
712 "WINPATH",
713 vec![
714 "C:\\Program Files\\App",
715 "D:\\Work\\project\\file.txt",
716 "E:\\New Folder\\test",
717 ],
718 ),
719 ("URIPROTO", vec!["http", "https", "ftp"]),
720 ("URIHOST", vec!["example.com", "192.168.1.1:8080"]),
721 (
722 "URIPATH",
723 vec!["/path/to/resource", "/another/path", "/root"],
724 ),
725 (
726 "URIQUERY",
727 vec!["key=value", "name=John&Doe", "search=query&active=true"],
728 ),
729 (
730 "URIPARAM",
731 vec!["?key=value", "?name=John&Doe", "?search=query&active=true"],
732 ),
733 (
734 "URIPATHPARAM",
735 vec![
736 "/path?query=1",
737 "/resource?name=John",
738 "/folder/path?valid=true",
739 ],
740 ),
741 (
742 "URI",
743 vec![
744 "http://user:password@example.com:80/path?query=string",
745 "https://example.com",
746 "ftp://192.168.1.1/upload",
747 ],
748 ),
749 (
750 "PATH",
751 vec![
752 "/home/user/documents",
753 "C:\\Windows\\system32",
754 "/var/log/syslog",
755 ],
756 ),
757 (
759 "MONTH",
760 vec![
761 "January",
762 "Feb",
763 "March",
764 "Apr",
765 "May",
766 "Jun",
767 "Jul",
768 "August",
769 "September",
770 "October",
771 "Nov",
772 "December",
773 ],
774 ),
775 (
777 "MONTHNUM2",
778 vec![
779 "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12",
780 ],
781 ),
782 (
784 "DAY",
785 vec![
786 "Monday",
787 "Tuesday",
788 "Wednesday",
789 "Thursday",
790 "Friday",
791 "Saturday",
792 "Sunday",
793 ],
794 ),
795 ("YEAR", vec!["1999", "2000", "2021"]),
797 ("HOUR", vec!["00", "12", "23"]),
798 ("MINUTE", vec!["00", "30", "59"]),
799 ("SECOND", vec!["00", "30", "60"]),
801 ("TIME", vec!["14:30", "23:59:59", "12:00:00", "12:00:60"]),
802 ("DATE_US", vec!["04/21/2022", "12-25-2020", "07/04/1999"]),
804 ("DATE_EU", vec!["21.04.2022", "25/12/2020", "04-07-1999"]),
805 ("ISO8601_TIMEZONE", vec!["Z", "+02:00", "-05:00"]),
806 ("ISO8601_SECOND", vec!["59", "30", "60.123"]),
807 (
808 "TIMESTAMP_ISO8601",
809 vec![
810 "2022-04-21T14:30:00Z",
811 "2020-12-25T23:59:59+02:00",
812 "1999-07-04T12:00:00-05:00",
813 ],
814 ),
815 ("DATE", vec!["04/21/2022", "21.04.2022", "12-25-2020"]),
816 (
817 "DATESTAMP",
818 vec!["04/21/2022 14:30", "21.04.2022 23:59", "12-25-2020 12:00"],
819 ),
820 ("TZ", vec!["EST", "CET", "PDT"]),
821 ("DATESTAMP_RFC822", vec!["Wed Jan 12 2024 14:33 EST"]),
822 (
823 "DATESTAMP_RFC2822",
824 vec![
825 "Tue, 12 Jan 2022 14:30 +0200",
826 "Fri, 25 Dec 2020 23:59 -0500",
827 "Sun, 04 Jul 1999 12:00 Z",
828 ],
829 ),
830 (
831 "DATESTAMP_OTHER",
832 vec![
833 "Tue Jan 12 14:30 EST 2022",
834 "Fri Dec 25 23:59 CET 2020",
835 "Sun Jul 04 12:00 PDT 1999",
836 ],
837 ),
838 (
839 "DATESTAMP_EVENTLOG",
840 vec!["20220421143000", "20201225235959", "19990704120000"],
841 ),
842 (
844 "SYSLOGTIMESTAMP",
845 vec!["Jan 1 00:00:00", "Mar 15 12:34:56", "Dec 31 23:59:59"],
846 ),
847 ("PROG", vec!["sshd", "kernel", "cron"]),
848 ("SYSLOGPROG", vec!["sshd[1234]", "kernel", "cron[5678]"]),
849 (
850 "SYSLOGHOST",
851 vec!["example.com", "192.168.1.1", "localhost"],
852 ),
853 ("SYSLOGFACILITY", vec!["<1.2>", "<12345.13456>"]),
854 ("HTTPDATE", vec!["25/Dec/2024:14:33 4"]),
855 ];
856
857 for (pattern, values) in cases {
858 let grok = Grok::default();
859 let p = grok
860 .compile(&format!("%{{{pattern}:result}}"), true)
861 .unwrap();
862
863 for value in values {
864 let m = p.parse(value).unwrap();
865 let result = m.get("result").unwrap();
866 assert_eq!(&Value::String(value.to_string()), result);
867 }
868 }
869 }
870
871 #[test]
872 fn test_elastic_docs() {
873 let cases = [(
874 "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes:int} %{NUMBER:duration:double}",
875 "55.3.244.1 GET /index.html 15824 0.043",
876 vec![
877 ("duration", Value::Float(0.043)),
878 ("request", Value::String("/index.html".to_string())),
879 ("method", Value::String("GET".to_string())),
880 ("bytes", Value::Int(15824)),
881 ("client", Value::String("55.3.244.1".to_string())),
882 ],
883 )];
884
885 for c in cases {
886 let grok = Grok::default();
887 let pattern = grok.compile(c.0, true).unwrap();
888 let expected =
889 c.2.into_iter()
890 .map(|(k, v)| (k.to_string(), v))
891 .collect::<HashMap<String, Value>>();
892 assert_eq!(expected, pattern.parse(c.1).unwrap());
893 }
894 }
895}