1use lazy_static::lazy_static;
35use regex::Regex;
36use regex::Split;
37
38fn words(line: &str) -> Split {
39 lazy_static! {
40 static ref RE: Regex = Regex::new(r"([ \t]|\\[nr])+").unwrap();
41 }
42 RE.split(line)
43}
44
45fn trim_quote_and_punctuation(word: &str) -> &str {
46 word.trim_start_matches("u\"")
47 .trim_start_matches("u'")
48 .trim_matches(|c| {
49 c == '\''
50 || c == '"'
51 || c == ','
52 || c == '.'
53 || c == ';'
54 || c == '('
55 || c == ')'
56 || c == '['
57 || c == ']'
58 || c == '{'
59 || c == '}'
60 || c == '>'
61 || c == '<'
62 || c == '\\'
63 })
64}
65
66fn global_filter(line: &str) -> bool {
76 lazy_static! {
77 static ref RE: Regex = Regex::new(concat!(
78 r"GET / HTTP/1.1",
79 r"|\* [a-zA-Z]+: [a-zA-Z0-9\.-]*$|Trying other mirror.",
81 r"|ovs-ofctl .* (dump-ports|dump-flows|show)\b",
83 r"|(ip|eb)tables .* -L\b",
84 r"|(^\^[+*-] [a-z0-9\.>-]{5,} [0-9])",
86 r"|(^[a-f0-9s/]+>[0-9]+ )",
88 r"|(^\+\+ echo [^ ]+$)"
90 )).unwrap();
91 }
92 let is_single_word = !line.contains(|c: char| c.is_whitespace());
93 is_single_word || RE.is_match(line)
94}
95
96fn remove_numbers(word: &str) -> String {
102 lazy_static! {
103 static ref RE: Regex = Regex::new("[0-9]+").unwrap();
104 }
105 RE.replace_all(word, "N").to_string()
106}
107
108fn is_date(word: &str) -> bool {
114 lazy_static! {
115 static ref RE: Regex = Regex::new(concat!(
116 "(?i-u:^(",
117 "sunday|monday|tuesday|wednesday|thursday|friday|saturday|",
118 "january|february|march|april|may|june|july|august|september|october|november|december",
119 ")$)"
120 ))
121 .unwrap();
122 }
123 RE.is_match(word)
124}
125
126fn is_error(word: &str) -> bool {
128 lazy_static! {
129 static ref RE: Regex = Regex::new(concat!(
130 "(?i-u:^(",
131 "error|failure|failed|warning|",
132 "err|fail|warn|",
133 "denied|",
134 "assert|assertion|non-zero|",
135 "exception|traceback",
136 ")$)"
137 ))
138 .unwrap();
139 }
140 RE.is_match(word)
141}
142
143fn contains_odd_char(word: &str) -> bool {
149 lazy_static! {
150 static ref RE: Regex = Regex::new(r"[<>{}%$,*]").unwrap();
151 }
152 RE.is_match(word)
153}
154
155fn is_uid(word: &str) -> bool {
163 lazy_static! {
164 static ref RE: Regex =
165 Regex::new(concat!("^(:*", r"[\[\]0-9a-fA-FxZ]+[:.-]*", ")+$")).unwrap();
166 }
167 RE.is_match(word)
168}
169
170fn is_uuid(word: &str) -> bool {
172 lazy_static! {
173 static ref RE: Regex = Regex::new(concat!(
174 "^[a-zA-Z0-9].*-[a-zA-Z0-9]{4}-[a-zA-Z0-9]{4}-[a-zA-Z0-9]{4}-"
175 ))
176 .unwrap();
177 }
178 RE.is_match(word)
179}
180
181fn has_many_dash(word: &str) -> bool {
183 lazy_static! {
184 static ref RE: Regex = Regex::new(concat!("^.+-.+-.+-.")).unwrap();
185 }
186 RE.is_match(word)
187}
188
189fn is_cookie(word: &str) -> bool {
190 lazy_static! {
191 static ref RE: Regex =
192 Regex::new(concat!("^(", "gAAAA|AAAA|tx[a-z]|tap|req-|AUTH_", ")")).unwrap();
193 }
194 RE.is_match(word)
195}
196
197fn is_url(word: &str) -> bool {
198 lazy_static! {
199 static ref RE: Regex =
200 Regex::new(concat!("(?i:^", "(https|http|ftp|ssh)://", ")")).unwrap();
201 }
202 RE.is_match(word)
203}
204
205fn is_base64(word: &str) -> bool {
212 lazy_static! {
213 static ref RE: Regex = Regex::new(concat!("^", "[A-Za-z0-9+/=]+", "$")).unwrap();
214 }
215 word.ends_with("==") || (word.len() > 24 && RE.is_match(word))
216}
217
218fn is_hash(word: &str) -> bool {
223 lazy_static! {
224 static ref RE: Regex = Regex::new(concat!("(?i:^", "(hash|sha|md)[0-9]*:", ")")).unwrap();
225 }
226 RE.is_match(word)
227}
228
229fn is_refs(word: &str) -> bool {
230 lazy_static! {
231 static ref RE: Regex = Regex::new(concat!(r"^\w{7}\.\.\w{7}$")).unwrap();
232 }
233 word.starts_with("refs/") || word.starts_with("repos/") || RE.is_match(word)
234}
235
236fn is_key_value(word: &str) -> Option<(&str, &str)> {
242 match word.split_once(|c| c == '=' || c == ':') {
243 Some((k, v)) => {
244 if k.starts_with(|c| (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_')) {
245 Some((k, v))
246 } else {
247 None
248 }
249 }
250 _ => None,
251 }
252}
253
254fn is_two_words(word: &str) -> Option<(&str, &str)> {
256 match word.split_once(|c| c == '[' || c == '(' || c == '\\' || c == '@') {
257 Some((k, v)) => Some((k, v.trim_end_matches(|c| c == ']' || c == ')'))),
258 None => None,
259 }
260}
261
262fn is_key_for_id(word: &str) -> bool {
263 lazy_static! {
264 static ref RE: Regex = Regex::new(concat!(
265 "(?i:",
266 "(id|key|ref|region|token|secret|password)",
267 ")"
268 ))
269 .unwrap();
270 }
271 RE.is_match(word)
272}
273
274fn is_random_path(word: &str) -> bool {
275 word.contains("tmp/") || word.contains("/tmp")
276}
277
278#[cfg(test)]
279mod re_tests {
280 use super::*;
281
282 #[test]
283 fn test_remove_numbers() {
284 assert_eq!(remove_numbers("test42-check"), "testN-check");
285 }
286
287 #[test]
288 fn test_date() {
289 assert!(vec!["sunday", "saturday", "Monday"]
290 .into_iter()
291 .all(is_date));
292 assert!(vec!["sunday ", " saturday", " jan ", "sund"]
293 .into_iter()
294 .all(|v| !is_date(v)));
295 }
296
297 #[test]
298 fn test_is_error() {
299 assert!(is_error("FAIL"));
300 }
301
302 #[test]
303 fn test_id() {
304 assert!(vec![
305 "aa:bb:cc:00:ff",
306 "42.24.21.12",
307 "abab-efef",
308 "2022-02-03",
309 "18:01:00.1"
310 ]
311 .into_iter()
312 .all(is_uid))
313 }
314
315 #[test]
316 fn test_hash() {
317 assert!(vec!["sha256:aabbcc00", "md5:test", "MD42:abab",]
318 .into_iter()
319 .all(is_hash))
320 }
321
322 #[test]
323 fn test_composite() {
324 assert_eq!(is_key_value("key=value"), Some(("key", "value")));
325 assert_eq!(is_key_value("keyvalue"), None);
326 assert_eq!(is_key_value("!KEY=value"), None);
327 }
328
329 #[test]
330 fn test_random_path() {
331 assert!(is_random_path("/tmp/test"));
332 assert!(is_random_path("/var/tmp/key"));
333 assert_eq!(is_random_path("/usr"), false);
334 }
335
336 #[test]
337 fn test_trim_pid() {
338 assert_eq!(trim_pid("systemd[42"), Some("systemd"))
339 }
340}
341
342fn parse_literal(word: &str) -> Option<&str> {
343 if is_date(word) {
344 Some("%DATE")
345 } else if is_hash(word) {
346 Some("%HASH")
347 } else if is_uid(word) {
348 Some("%ID")
349 } else if is_cookie(word) {
350 Some("%COOKIE")
351 } else if is_uuid(word) {
352 Some("%UID")
353 } else if is_url(word) {
354 Some("%URL")
355 } else if is_random_path(word) {
356 Some("%PATH")
357 } else if is_refs(word) {
358 Some("%REF")
359 } else if is_base64(word) {
360 Some("%BASE64")
361 } else {
362 None
363 }
364}
365
366fn trim_pid(word: &str) -> Option<&str> {
367 word.trim_end_matches(|c| c >= '0' && c <= '9')
368 .strip_suffix("[")
369}
370
371fn push_error(word: &str, result: &mut String) {
377 result.push_str(word);
379 result.push(' ');
380 result.push_str(word);
381 result.push_str("%A ");
382 result.push_str(word);
383 result.push_str("%B ");
384 result.push_str(word);
385 result.push_str("%C ");
386 result.push_str(word);
387 result.push_str("%D");
388}
389
390fn do_process(mut word: &str, result: &mut String) -> bool {
392 word = trim_quote_and_punctuation(word);
393 let mut added = true;
394 if let Some(token) = parse_literal(word) {
396 result.push_str(token)
398 } else if is_error(word) {
399 push_error(word, result)
401 } else if word.len() <= 3 {
402 added = false;
406 } else if let Some(strip) = trim_pid(word) {
407 do_process(strip, result);
409 result.push_str("%PID");
410 } else if contains_odd_char(word) {
411 result.push_str("%ODD")
412 } else if let Some((key, value)) = is_key_value(word) {
413 do_process(key, result);
415 if is_key_for_id(key) {
416 result.push_str("%EQ %VALUE_ID")
417 } else {
418 result.push_str("%EQ ");
419 added = do_process(value, result)
420 }
421 } else if let Some((w1, w2)) = word.split_once('/') {
422 if do_process(w1, result) {
423 result.push_str("/ ");
424 }
425 added = do_process(w2, result);
426 } else if let Some((w1, w2)) = word.split_once('-') {
427 if has_many_dash(w2) {
428 result.push_str("%DASH")
431 } else {
432 if do_process(w1, result) {
433 result.push_str("- ");
434 }
435 added = do_process(w2, result)
436 }
437 } else if let Some((w1, w2)) = word.split_once('|') {
438 if do_process(w1, result) {
439 result.push_str("| ");
440 }
441 added = do_process(w2, result)
442 } else if word.len() >= 32 {
443 result.push_str("%BIG")
444 } else if let Some((w1, w2)) = is_two_words(word) {
445 if do_process(w1, result) {
446 result.push(' ');
447 }
448 added = do_process(w2, result);
449 } else {
450 let x = remove_numbers(word);
452 if x.len() > 3 {
453 result.push_str(&x)
454 } else {
455 added = false;
456 }
457 }
458 added
459}
460
461pub fn process(line: &str) -> String {
463 let line = line.trim();
465
466 if global_filter(line) {
468 return "%GL_FILTER".to_string();
469 }
470
471 let mut result = String::with_capacity(line.len());
473 for word in words(line) {
474 if do_process(word, &mut result) {
475 result.push(' ')
476 }
477 }
478 result.trim().to_string()
480}
481
482#[macro_export]
484macro_rules! tokens_eq {
485 ($a:expr,$b:expr) => {
487 assert_eq!(process($a), process($b))
488 };
489}
490
491#[cfg(test)]
492mod tests {
493 use super::*;
494
495 #[test]
496 fn test_process_nl() {
497 assert_eq!(process("testy\r\n"), "%GL_FILTER");
498 assert_eq!(process("* mirror: 42\n"), "%GL_FILTER");
499 }
500
501 #[test]
502 fn test_process() {
503 assert_eq!(
504 process("error hash mismatch 'sha256:42'"),
505 "error error%A error%B error%C error%D hash mismatch %HASH"
506 );
507 assert_eq!(
508 process("getting \"http://local:4242/test\""),
509 "getting %URL"
510 );
511 assert_eq!(
512 process("sha256://toto tata finished in 28ms by systemd[4248]"),
513 "%HASH tata finished systemd%PID"
514 );
515 assert_eq!(
516 process("log_url=https://ansible AWS_ACCESS_KEY_ID=ASIA6CCDWXDODS7A4X53 "),
517 "log_url%EQ %URL AWS_ACCESS_KEY_ID%EQ %VALUE_ID"
518 );
519 assert_eq!(
520 process("** 192.168.24.1:8787/tripleovictoria/openstack-heat-api:175194d1801ec25367354976a18e3725-updated-20220125105210 **"),
521 "%ID/ tripleovictoria/ openstack- heat- %EQ %ID- updated- %ID"
522 );
523 }
524 #[test]
525 fn test_process02() {
526 assert_eq!(
527 process("nova::placement::password: UIbv1LPZWIXpBtaToNzsmgZI3"),
528 "nova%EQ :placement::password: %BASE64"
529 );
530 assert_eq!(
531 process("2022-01-25 12:11:14 | ++ export OS_PASSWORD=PobDt1cxalvf40uv9Om5VTNkw"),
532 "%ID %ID export OS_PASSWORD%EQ %VALUE_ID"
533 );
534 assert_eq!(
535 process("^+ ntp1a.example.com 1 10 377 635 -1217us[-1069us] +/- 16ms"),
536 "%GL_FILTER"
537 );
538 assert_eq!(process("a PobDt1cxalvf40uv9Om5VTNkw"), "%ID %BASE64");
539 }
540
541 #[test]
542 fn test_process03() {
543 assert_eq!(
544 process("2022-01-25T14:09:24.422Z|00014|jsonrpc|WARN|tcp:[fd00:fd00:fd00:2000::21e]:50504: receive error: Connection reset by peer"),
545 "%ID- %ID- NTN:N:N.NZ| %ID| jsonrpc| WARN WARN%A WARN%B WARN%C WARN%D| %EQ %ID receive error error%A error%B error%C error%D%EQ Connection reset peer"
546 );
547 assert_eq!(
548 process("Event ID: 3e75e420-761f-11ec-8d18-a0957bd68c36"),
549 process("Event ID: f671eb00-730e-11ec-915f-abcd86bae8f1")
550 );
551 assert_eq!(
552 process("\"mac_address\": \"12:fa:c8:b2:e0:ff\","),
553 process("\"mac_address\": \"12:a6:f2:17:d3:b5\",")
554 );
555 assert_eq!(
556 process("File \"nodepool/cmd/config_validator.py\", line 144, in validate"),
557 "File nodepool/ config_validator.py line %ID validate"
558 );
559 assert_eq!(
560 process("controller | \"after\": \"3}QP5CJuNBP65S%c:y>o\"",),
561 "controller after%EQ %ODD"
562 );
563 assert_eq!(
564 process("[Zuul] Job complete, result: FAILURE"),
565 "Zuul complete result%EQ FAILURE FAILURE%A FAILURE%B FAILURE%C FAILURE%D"
566 );
567 }
568
569 #[test]
570 fn test_process04() {
571 assert_eq!(
572 process("\"assertion\": \"new_dhcp is changed\""),
573 "assertion assertion%A assertion%B assertion%C assertion%D%EQ new_dhcp changed"
574 );
575 }
576
577 #[test]
578 fn test_process20() {
579 assert_eq!(
580 process("controller | +3}QP5CJuNBP65S%c:y>o"),
581 process("controller | +1T9,Eqb@g[VL@b0u*Et!")
582 );
583 assert_eq!(
584 process(" \"contents\": \"3}QP5CJuNBP65S%c:y>o\""),
585 process(" \"contents\": \"U%aNO^b5ITFU^xTTa9rV\",")
586 );
587 assert_eq!(
588 process(
589 "pkg: openstack-tripleo-heat-templates-13.5.1-0.20220121152841.1408598.el8.noarch"
590 ),
591 "%EQ %DASH"
592 );
593 tokens_eq!(
594 "id = \"HvXxSk-Foz9-XJE4-RZSD-KXxc-NxTt-AMi18O\"",
595 "id = \"BBW6bE-58DO-3GeE-3ix2-8pLG-wfWL-aiTdAf\""
596 );
597 tokens_eq!(
598 "rabbitmq::erlang_cookie: xkkGdfgqlUovQz3fP2CZ",
599 "rabbitmq::erlang_cookie: xkkGdfgqlUovQz3fP2CZ"
600 );
601 tokens_eq!(
602 "ZUUL_REF=Z60f0ad207fbb4c55a07d665ef44131a4",
603 "ZUUL_REF=Zbffe5ccbe3ef4ab48c016783ea185dfa"
604 );
605 tokens_eq!("tap44302f40-8", "tap423e2e40-8");
606 tokens_eq!(
607 "[fd00:fd00:fd00:2000::21e]:5672 (1)",
608 "[fd00:ad00:fd00:2100::21e]:5872 (1)"
609 );
610 tokens_eq!(
611 "DHCPREQUEST(tap44302f40-82) 192.168.24.9 fa:16:3e:94:88:3f",
612 "DHCPREQUEST(tap443e2140-82) 192.168.25.9 fb:16:3e:94:88:3f"
613 );
614 tokens_eq!(
615 r"\ = Local Signing Authority, CN = caa53b4e-fff041fe-93823ed2-7ee25a11\n\n\",
616 r"\ = Local Signing Authority, CN = 41319aee-68934f60-baf41d6e-158a15cd\n\n\"
617 );
618 tokens_eq!(
619 r"Baremetal Node@83d24142-5411-4568-b344-05caac9fcfbf: {}",
620 r"Baremetal Node@e54437f7-1f1d-4a9b-8cc5-ce73550f8608: {}"
621 );
622 }
623
624 #[test]
625 fn test_process21() {
626 tokens_eq!(
627 r"-netdev tap,fd=123,id=hostnet0 \",
628 r"-netdev tap,fd=175,id=hostnet0 \"
629 );
630 tokens_eq!(
631 r"-device virtio-net-pci,rx_queue_size=512,host_mtu=1292,netdev=hostnet0,id=net0,mac=fa:16:3e:a3:dc:e1,bus=pci.0,addr=0x3",
632 r"-device virtio-net-pci,rx_queue_size=52,host_mtu=12920,netdev=hostnet0,id=net0,mac=fa:16:3e:1a:1c:fd,bus=pci.1,addr=0x4"
633 );
634 }
635
636 #[test]
637 fn test_process22() {
638 tokens_eq!(
639 "creating Value \"ApacheNetworks\" Stack \"undercloud-UndercloudServiceChain-sczoll7kpg37-ServiceChain-ghee7usnfx3j-17-wztq7dmj6blw-ApacheServiceBase-7nwdrcrxjpmz",
640 "creating Value \"ApacheNetworks\" Stack \"undercloud-UndercloudServiceChain-dt26w6s63vd6-ServiceChain-dxxxgncfjqeg-0-yhtbooauehxj"
641 );
642 }
643
644 #[test]
645 fn test_process_ovn() {
646 assert_eq!(
647 process("addresses: [\"fa:16:3e:69:3c:cd\"]"),
648 "addresses%EQ %ID"
649 );
650 assert_eq!(
651 process("addresses: [\"fa:16:3e:19:15:bb 192.168.199.2\"]"),
652 "addresses%EQ %ID %ID"
653 );
654 }
655
656 #[test]
657 fn test_process_amqp() {
658 assert_eq!(
659 process("closing AMQP connection <0.4375.0> ([fd00:fd00:fd00:2000::40]:33588 -> [fd00:fd00:fd00:2000::21e]:5672 - nova-compute:8:08b39730-b2e6-4d1f-bcc1-318f9bcfd7c6, vhost: '/', user: 'guest')"),
660 "closing AMQP connection %ID %ID %ID %UID vhost%EQ user%EQ guest"
661 );
662 }
663
664 #[test]
665 fn test_kv() {
666 assert_eq!(
667 process("a name=delorean-tripleo-repos-8c402732195f680e7bf8197030cb5a25d45df5a9"),
668 "%ID name%EQ delorean- tripleo- repos- %ID"
669 );
670 }
671
672 #[test]
673 fn test_words() {
674 assert_eq!(
675 words(" a b ").collect::<Vec<&str>>(),
676 vec!["", "a", "b", ""]
677 );
678 }
679}