logreduce_tokenizer/
tokenizer.rs

1// Copyright (C) 2022 Red Hat
2// SPDX-License-Identifier: Apache-2.0
3
4//! The tokenizer logic
5//!
6//! The main function is [process]. The output is designed for further feature extraction,
7//! for example with a bag of words or hashing vectorizer. It looks like this:
8//!
9//! ```rust
10//! # use logreduce_tokenizer::tokenizer::{process};
11//! assert_eq!(process(
12//!    "2017-06-24 02:52:17.732 22627 tempest.lib.common.rest_client [req-b932e095-6706-4f5a-bd75-241c407a9d01 ] Request (main): 201 POST https://10.0.1.9/identity/v3/auth/tokens"),
13//!    "%ID %ID %ID tempest.lib.common.rest_client %COOKIE Request main%EQ %ID POST %URL")
14//! ```
15//!
16//! Here are some use cases:
17//!
18//! ```rust
19//! # use logreduce_tokenizer::{tokens_eq, tokenizer::*};
20//! tokens_eq!("+ export ZUUL_REF=refs/zuul/master/6546b192211a4531859db9d8b9375154",
21//!            "+ export ZUUL_REF=refs/zuul/master/9249f6066a2041bbbeb838e2ca1cf2b4");
22//! tokens_eq!("2017-06-23 20:10:06,848 INFO:dlrn-build:DEBUG: writing output... [ 90%] configuration",
23//!            "2017-06-24 13:35:57,754 INFO:dlrn-build:DEBUG: writing output... [ 88%] configuration");
24//! tokens_eq!("tempest.lib.common.rest_client [req-b932e095-6706-4f5a-bd75-241c407a9d01 ] Request (main): 201 POST https://10.0.1.9/identity/v3/auth/tokens",
25//!            "tempest.lib.common.rest_client [req-08043549-3227-4c61-aa3b-9d02fc8437c3 ] Request (main): 201 POST https://104.130.217.34/identity/v3/auth/tokens");
26//! ```
27//!
28//! TODO: decode json object and re-order the key to pass this test:
29//! ```should_panic
30//! # use logreduce_tokenizer::tokenizer::{process};
31//! assert_eq!(process("{\"key\": true, \"oth\": 1}"), process("{\"oth\": 1, \"key\": true}"));
32//! ```
33
34use lazy_static::lazy_static;
35use regex::Regex;
36use regex::Split;
37
38fn words(line: &str) -> Split {
39    lazy_static! {
40        static ref RE: Regex = Regex::new(r"([ \t]|\\[nr])+").unwrap();
41    }
42    RE.split(line)
43}
44
45fn trim_quote_and_punctuation(word: &str) -> &str {
46    word.trim_start_matches("u\"")
47        .trim_start_matches("u'")
48        .trim_matches(|c| {
49            c == '\''
50                || c == '"'
51                || c == ','
52                || c == '.'
53                || c == ';'
54                || c == '('
55                || c == ')'
56                || c == '['
57                || c == ']'
58                || c == '{'
59                || c == '}'
60                || c == '>'
61                || c == '<'
62                || c == '\\'
63        })
64}
65
66/// Apply global filter to skip specific lines.
67/// ```rust
68/// # use logreduce_tokenizer::tokenizer::{process};
69/// assert_eq!(process("iptables -N RULES42 -L"), "%GL_FILTER");
70/// assert_eq!(process("e2b607f0bb193c9bfed94af532ba1>33 STORED"), "%GL_FILTER");
71/// assert_eq!(process("s/5bf8>28 sending key"), "%GL_FILTER");
72/// assert_eq!(process("^- srcf-ntp.example.edu 2 9 377 429 -358us[ -358us] +/- 63ms"), "%GL_FILTER");
73/// assert_eq!(process("++ echo mswAxrrS1YwyGtIut9Vd"), "%GL_FILTER");
74/// ```
75fn global_filter(line: &str) -> bool {
76    lazy_static! {
77        static ref RE: Regex = Regex::new(concat!(
78            r"GET / HTTP/1.1",
79            // yum mirrors information
80            r"|\* [a-zA-Z]+: [a-zA-Z0-9\.-]*$|Trying other mirror.",
81            // useless debug statement
82            r"|ovs-ofctl .* (dump-ports|dump-flows|show)\b",
83            r"|(ip|eb)tables .* -L\b",
84            // chrony logs
85            r"|(^\^[+*-] [a-z0-9\.>-]{5,} [0-9])",
86            // memcached logs
87            r"|(^[a-f0-9s/]+>[0-9]+ )",
88            // shell debugs
89            r"|(^\+\+ echo [^ ]+$)"
90        )).unwrap();
91    }
92    let is_single_word = !line.contains(|c: char| c.is_whitespace());
93    is_single_word || RE.is_match(line)
94}
95
96/// Replace numbers sequences with `N`.
97/// ```rust
98/// # use logreduce_tokenizer::{tokens_eq, tokenizer::*};
99/// tokens_eq!("running test42", "running test43");
100/// ```
101fn remove_numbers(word: &str) -> String {
102    lazy_static! {
103        static ref RE: Regex = Regex::new("[0-9]+").unwrap();
104    }
105    RE.replace_all(word, "N").to_string()
106}
107
108/// Check if a word matches a date.
109/// ```rust
110/// # use logreduce_tokenizer::{tokens_eq, tokenizer::*};
111/// tokens_eq!("Sunday February 6th - message", "Monday February 7th - message");
112/// ```
113fn is_date(word: &str) -> bool {
114    lazy_static! {
115        static ref RE: Regex = Regex::new(concat!(
116            "(?i-u:^(",
117            "sunday|monday|tuesday|wednesday|thursday|friday|saturday|",
118            "january|february|march|april|may|june|july|august|september|october|november|december",
119            ")$)"
120        ))
121        .unwrap();
122    }
123    RE.is_match(word)
124}
125
126/// Check if a word matches an error prefix.
127fn is_error(word: &str) -> bool {
128    lazy_static! {
129        static ref RE: Regex = Regex::new(concat!(
130            "(?i-u:^(",
131            "error|failure|failed|warning|",
132            "err|fail|warn|",
133            "denied|",
134            "assert|assertion|non-zero|",
135            "exception|traceback",
136            ")$)"
137        ))
138        .unwrap();
139    }
140    RE.is_match(word)
141}
142
143/// Check if a word contains weird char, likely in generated id.
144/// ```rust
145/// # use logreduce_tokenizer::{tokens_eq, tokenizer::*};
146/// tokens_eq!("A{$@42", "$A%TE");
147/// ```
148fn contains_odd_char(word: &str) -> bool {
149    lazy_static! {
150        static ref RE: Regex = Regex::new(r"[<>{}%$,*]").unwrap();
151    }
152    RE.is_match(word)
153}
154
155/// Check if a word only contains hexa and sep char.
156/// ```rust
157/// # use logreduce_tokenizer::{tokens_eq, tokenizer::*};
158/// tokens_eq!("the_ip is 127.0.0.1", "the_ip is ::1");
159/// tokens_eq!("the_mac is aa:bb:cc", "the_mac is 00:11:cc");
160/// tokens_eq!("the_num is 0x4243", "the_num is 0x4142");
161/// ```
162fn is_uid(word: &str) -> bool {
163    lazy_static! {
164        static ref RE: Regex =
165            Regex::new(concat!("^(:*", r"[\[\]0-9a-fA-FxZ]+[:.-]*", ")+$")).unwrap();
166    }
167    RE.is_match(word)
168}
169
170/// 3 x 4letters word separated by -
171fn is_uuid(word: &str) -> bool {
172    lazy_static! {
173        static ref RE: Regex = Regex::new(concat!(
174            "^[a-zA-Z0-9].*-[a-zA-Z0-9]{4}-[a-zA-Z0-9]{4}-[a-zA-Z0-9]{4}-"
175        ))
176        .unwrap();
177    }
178    RE.is_match(word)
179}
180
181/// 3 dash separator
182fn has_many_dash(word: &str) -> bool {
183    lazy_static! {
184        static ref RE: Regex = Regex::new(concat!("^.+-.+-.+-.")).unwrap();
185    }
186    RE.is_match(word)
187}
188
189fn is_cookie(word: &str) -> bool {
190    lazy_static! {
191        static ref RE: Regex =
192            Regex::new(concat!("^(", "gAAAA|AAAA|tx[a-z]|tap|req-|AUTH_", ")")).unwrap();
193    }
194    RE.is_match(word)
195}
196
197fn is_url(word: &str) -> bool {
198    lazy_static! {
199        static ref RE: Regex =
200            Regex::new(concat!("(?i:^", "(https|http|ftp|ssh)://", ")")).unwrap();
201    }
202    RE.is_match(word)
203}
204
205/// ```rust
206/// # use logreduce_tokenizer::{tokens_eq, tokenizer::*};
207/// tokens_eq!("MqoplXLA2LPnJKTNMQW5JpGyMLJcLxRDDEejzh6b1im8KV/5TRKDsg7b5FwBJJoN", "fJkzOzsJdqxvhSvDFkUlAP7a/+kOBCYi1Yp1pz0v/mHLi0r1z5xtx3BemXVYHbom");
208/// tokens_eq!("a EqTsSXKlOsEjfIdFld+uwopnIIqvKI+Xu6e0RcAGYJEfj56/MG2IdH7/h1JmQ///\\nn2RZ/ocRcL5as2EHQES0b+/I12a2Gj+W+ub0OQAGDq8iL5o8P0/ogEWrpZmoBC+oi",
209///            "a MqoplXLA2LPnJKTNMQW5JpGyMLJcLxRDDEejzh6b1im8KV/5TRKDsg7b5FwBJJoN fJkzOzsJdqxvhSvDFkUlAP7a/+kOBCYi1Yp1pz0v/mHLi0r1z5xtx3BemXVYHbom");
210/// ```
211fn is_base64(word: &str) -> bool {
212    lazy_static! {
213        static ref RE: Regex = Regex::new(concat!("^", "[A-Za-z0-9+/=]+", "$")).unwrap();
214    }
215    word.ends_with("==") || (word.len() > 24 && RE.is_match(word))
216}
217
218/// ```
219/// # use logreduce_tokenizer::{tokens_eq, tokenizer::{process}};
220/// tokens_eq!("md5:d41d8cd98f00b204e9800998ecf8427e", "md5:e7b26fc34f528b5b19c4450867b9d597")
221/// ```
222fn is_hash(word: &str) -> bool {
223    lazy_static! {
224        static ref RE: Regex = Regex::new(concat!("(?i:^", "(hash|sha|md)[0-9]*:", ")")).unwrap();
225    }
226    RE.is_match(word)
227}
228
229fn is_refs(word: &str) -> bool {
230    lazy_static! {
231        static ref RE: Regex = Regex::new(concat!(r"^\w{7}\.\.\w{7}$")).unwrap();
232    }
233    word.starts_with("refs/") || word.starts_with("repos/") || RE.is_match(word)
234}
235
236/// ```
237/// # use logreduce_tokenizer::{tokens_eq, tokenizer::{process}};
238/// tokens_eq!("key=01:02:ff", "key=aa:bb:cc")
239/// ```
240// TODO: check for word terminated by `:`, where the value is the next word
241fn is_key_value(word: &str) -> Option<(&str, &str)> {
242    match word.split_once(|c| c == '=' || c == ':') {
243        Some((k, v)) => {
244            if k.starts_with(|c| (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_')) {
245                Some((k, v))
246            } else {
247                None
248            }
249        }
250        _ => None,
251    }
252}
253
254/// Separate attached words like `DHCPOFFER(ipaddr)` in `DHCPOFFER ipaddr`
255fn is_two_words(word: &str) -> Option<(&str, &str)> {
256    match word.split_once(|c| c == '[' || c == '(' || c == '\\' || c == '@') {
257        Some((k, v)) => Some((k, v.trim_end_matches(|c| c == ']' || c == ')'))),
258        None => None,
259    }
260}
261
262fn is_key_for_id(word: &str) -> bool {
263    lazy_static! {
264        static ref RE: Regex = Regex::new(concat!(
265            "(?i:",
266            "(id|key|ref|region|token|secret|password)",
267            ")"
268        ))
269        .unwrap();
270    }
271    RE.is_match(word)
272}
273
274fn is_random_path(word: &str) -> bool {
275    word.contains("tmp/") || word.contains("/tmp")
276}
277
278#[cfg(test)]
279mod re_tests {
280    use super::*;
281
282    #[test]
283    fn test_remove_numbers() {
284        assert_eq!(remove_numbers("test42-check"), "testN-check");
285    }
286
287    #[test]
288    fn test_date() {
289        assert!(vec!["sunday", "saturday", "Monday"]
290            .into_iter()
291            .all(is_date));
292        assert!(vec!["sunday ", " saturday", " jan ", "sund"]
293            .into_iter()
294            .all(|v| !is_date(v)));
295    }
296
297    #[test]
298    fn test_is_error() {
299        assert!(is_error("FAIL"));
300    }
301
302    #[test]
303    fn test_id() {
304        assert!(vec![
305            "aa:bb:cc:00:ff",
306            "42.24.21.12",
307            "abab-efef",
308            "2022-02-03",
309            "18:01:00.1"
310        ]
311        .into_iter()
312        .all(is_uid))
313    }
314
315    #[test]
316    fn test_hash() {
317        assert!(vec!["sha256:aabbcc00", "md5:test", "MD42:abab",]
318            .into_iter()
319            .all(is_hash))
320    }
321
322    #[test]
323    fn test_composite() {
324        assert_eq!(is_key_value("key=value"), Some(("key", "value")));
325        assert_eq!(is_key_value("keyvalue"), None);
326        assert_eq!(is_key_value("!KEY=value"), None);
327    }
328
329    #[test]
330    fn test_random_path() {
331        assert!(is_random_path("/tmp/test"));
332        assert!(is_random_path("/var/tmp/key"));
333        assert_eq!(is_random_path("/usr"), false);
334    }
335
336    #[test]
337    fn test_trim_pid() {
338        assert_eq!(trim_pid("systemd[42"), Some("systemd"))
339    }
340}
341
342fn parse_literal(word: &str) -> Option<&str> {
343    if is_date(word) {
344        Some("%DATE")
345    } else if is_hash(word) {
346        Some("%HASH")
347    } else if is_uid(word) {
348        Some("%ID")
349    } else if is_cookie(word) {
350        Some("%COOKIE")
351    } else if is_uuid(word) {
352        Some("%UID")
353    } else if is_url(word) {
354        Some("%URL")
355    } else if is_random_path(word) {
356        Some("%PATH")
357    } else if is_refs(word) {
358        Some("%REF")
359    } else if is_base64(word) {
360        Some("%BASE64")
361    } else {
362        None
363    }
364}
365
366fn trim_pid(word: &str) -> Option<&str> {
367    word.trim_end_matches(|c| c >= '0' && c <= '9')
368        .strip_suffix("[")
369}
370
371/// Makes error token appears bigger.
372/// ```rust
373/// # use logreduce_tokenizer::tokenizer::*;
374/// assert_eq!(process("Test Fail"), "Test Fail Fail%A Fail%B Fail%C Fail%D");
375/// ```
376fn push_error(word: &str, result: &mut String) {
377    // Make the error takes more space
378    result.push_str(word);
379    result.push(' ');
380    result.push_str(word);
381    result.push_str("%A ");
382    result.push_str(word);
383    result.push_str("%B ");
384    result.push_str(word);
385    result.push_str("%C ");
386    result.push_str(word);
387    result.push_str("%D");
388}
389
390/// The tokenizer main (recursive) function
391fn do_process(mut word: &str, result: &mut String) -> bool {
392    word = trim_quote_and_punctuation(word);
393    let mut added = true;
394    // We try to process from the most specifics to the most general case
395    if let Some(token) = parse_literal(word) {
396        // e.g. `February` or `sha256:...`
397        result.push_str(token)
398    } else if is_error(word) {
399        // e.g. `Traceback`
400        push_error(word, result)
401    } else if word.len() <= 3 {
402        // This is currently confusing the hashing vectorizer,
403        // but it might be useful to keep small words for another feature vector
404        // result.push_str("SML")
405        added = false;
406    } else if let Some(strip) = trim_pid(word) {
407        // e.g. `"systemd[42]"`
408        do_process(strip, result);
409        result.push_str("%PID");
410    } else if contains_odd_char(word) {
411        result.push_str("%ODD")
412    } else if let Some((key, value)) = is_key_value(word) {
413        // e.g. TOKEN=42
414        do_process(key, result);
415        if is_key_for_id(key) {
416            result.push_str("%EQ %VALUE_ID")
417        } else {
418            result.push_str("%EQ ");
419            added = do_process(value, result)
420        }
421    } else if let Some((w1, w2)) = word.split_once('/') {
422        if do_process(w1, result) {
423            result.push_str("/ ");
424        }
425        added = do_process(w2, result);
426    } else if let Some((w1, w2)) = word.split_once('-') {
427        if has_many_dash(w2) {
428            // when word contains more than 4 dash, then consider it noise.
429            // e.g. heat uid looks like: undercloud-UndercloudServiceChain-dt26w6s63vd6-ServiceChain-dxxxgncfjqeg-0-yhtbooauehxj
430            result.push_str("%DASH")
431        } else {
432            if do_process(w1, result) {
433                result.push_str("- ");
434            }
435            added = do_process(w2, result)
436        }
437    } else if let Some((w1, w2)) = word.split_once('|') {
438        if do_process(w1, result) {
439            result.push_str("| ");
440        }
441        added = do_process(w2, result)
442    } else if word.len() >= 32 {
443        result.push_str("%BIG")
444    } else if let Some((w1, w2)) = is_two_words(word) {
445        if do_process(w1, result) {
446            result.push(' ');
447        }
448        added = do_process(w2, result);
449    } else {
450        // here finally the word is added
451        let x = remove_numbers(word);
452        if x.len() > 3 {
453            result.push_str(&x)
454        } else {
455            added = false;
456        }
457    }
458    added
459}
460
461/// The tokenizer entry point
462pub fn process(line: &str) -> String {
463    // Remove surrounding whitespaces
464    let line = line.trim();
465
466    // check for global filter first
467    if global_filter(line) {
468        return "%GL_FILTER".to_string();
469    }
470
471    // split the line into space separated words.
472    let mut result = String::with_capacity(line.len());
473    for word in words(line) {
474        if do_process(word, &mut result) {
475            result.push(' ')
476        }
477    }
478    // TODO: check if result contains at least 2 word
479    result.trim().to_string()
480}
481
482/// Helper macro to write short tests. `tokens_eq!("a", "b")` is `assert_eq!(process("a"), process("b"))`
483#[macro_export]
484macro_rules! tokens_eq {
485    // macth like arm for macro
486    ($a:expr,$b:expr) => {
487        assert_eq!(process($a), process($b))
488    };
489}
490
491#[cfg(test)]
492mod tests {
493    use super::*;
494
495    #[test]
496    fn test_process_nl() {
497        assert_eq!(process("testy\r\n"), "%GL_FILTER");
498        assert_eq!(process("* mirror: 42\n"), "%GL_FILTER");
499    }
500
501    #[test]
502    fn test_process() {
503        assert_eq!(
504            process("error hash mismatch 'sha256:42'"),
505            "error error%A error%B error%C error%D hash mismatch %HASH"
506        );
507        assert_eq!(
508            process("getting \"http://local:4242/test\""),
509            "getting %URL"
510        );
511        assert_eq!(
512            process("sha256://toto tata finished in 28ms by systemd[4248]"),
513            "%HASH tata finished systemd%PID"
514        );
515        assert_eq!(
516            process("log_url=https://ansible AWS_ACCESS_KEY_ID=ASIA6CCDWXDODS7A4X53 "),
517            "log_url%EQ %URL AWS_ACCESS_KEY_ID%EQ %VALUE_ID"
518        );
519        assert_eq!(
520            process("** 192.168.24.1:8787/tripleovictoria/openstack-heat-api:175194d1801ec25367354976a18e3725-updated-20220125105210 **"),
521            "%ID/ tripleovictoria/ openstack- heat- %EQ %ID- updated- %ID"
522        );
523    }
524    #[test]
525    fn test_process02() {
526        assert_eq!(
527            process("nova::placement::password: UIbv1LPZWIXpBtaToNzsmgZI3"),
528            "nova%EQ :placement::password: %BASE64"
529        );
530        assert_eq!(
531            process("2022-01-25 12:11:14 | ++ export OS_PASSWORD=PobDt1cxalvf40uv9Om5VTNkw"),
532            "%ID %ID export OS_PASSWORD%EQ %VALUE_ID"
533        );
534        assert_eq!(
535            process("^+ ntp1a.example.com 1 10 377 635 -1217us[-1069us] +/- 16ms"),
536            "%GL_FILTER"
537        );
538        assert_eq!(process("a PobDt1cxalvf40uv9Om5VTNkw"), "%ID %BASE64");
539    }
540
541    #[test]
542    fn test_process03() {
543        assert_eq!(
544            process("2022-01-25T14:09:24.422Z|00014|jsonrpc|WARN|tcp:[fd00:fd00:fd00:2000::21e]:50504: receive error: Connection reset by peer"),
545            "%ID- %ID- NTN:N:N.NZ| %ID| jsonrpc| WARN WARN%A WARN%B WARN%C WARN%D| %EQ %ID receive error error%A error%B error%C error%D%EQ Connection reset peer"
546        );
547        assert_eq!(
548            process("Event ID: 3e75e420-761f-11ec-8d18-a0957bd68c36"),
549            process("Event ID: f671eb00-730e-11ec-915f-abcd86bae8f1")
550        );
551        assert_eq!(
552            process("\"mac_address\": \"12:fa:c8:b2:e0:ff\","),
553            process("\"mac_address\": \"12:a6:f2:17:d3:b5\",")
554        );
555        assert_eq!(
556            process("File \"nodepool/cmd/config_validator.py\", line 144, in validate"),
557            "File nodepool/ config_validator.py line %ID validate"
558        );
559        assert_eq!(
560            process("controller |             \"after\": \"3}QP5CJuNBP65S%c:y>o\"",),
561            "controller after%EQ %ODD"
562        );
563        assert_eq!(
564            process("[Zuul] Job complete, result: FAILURE"),
565            "Zuul complete result%EQ FAILURE FAILURE%A FAILURE%B FAILURE%C FAILURE%D"
566        );
567    }
568
569    #[test]
570    fn test_process04() {
571        assert_eq!(
572            process("\"assertion\": \"new_dhcp is changed\""),
573            "assertion assertion%A assertion%B assertion%C assertion%D%EQ new_dhcp changed"
574        );
575    }
576
577    #[test]
578    fn test_process20() {
579        assert_eq!(
580            process("controller | +3}QP5CJuNBP65S%c:y>o"),
581            process("controller | +1T9,Eqb@g[VL@b0u*Et!")
582        );
583        assert_eq!(
584            process("   \"contents\": \"3}QP5CJuNBP65S%c:y>o\""),
585            process("   \"contents\": \"U%aNO^b5ITFU^xTTa9rV\",")
586        );
587        assert_eq!(
588            process(
589                "pkg: openstack-tripleo-heat-templates-13.5.1-0.20220121152841.1408598.el8.noarch"
590            ),
591            "%EQ %DASH"
592        );
593        tokens_eq!(
594            "id = \"HvXxSk-Foz9-XJE4-RZSD-KXxc-NxTt-AMi18O\"",
595            "id = \"BBW6bE-58DO-3GeE-3ix2-8pLG-wfWL-aiTdAf\""
596        );
597        tokens_eq!(
598            "rabbitmq::erlang_cookie: xkkGdfgqlUovQz3fP2CZ",
599            "rabbitmq::erlang_cookie: xkkGdfgqlUovQz3fP2CZ"
600        );
601        tokens_eq!(
602            "ZUUL_REF=Z60f0ad207fbb4c55a07d665ef44131a4",
603            "ZUUL_REF=Zbffe5ccbe3ef4ab48c016783ea185dfa"
604        );
605        tokens_eq!("tap44302f40-8", "tap423e2e40-8");
606        tokens_eq!(
607            "[fd00:fd00:fd00:2000::21e]:5672 (1)",
608            "[fd00:ad00:fd00:2100::21e]:5872 (1)"
609        );
610        tokens_eq!(
611            "DHCPREQUEST(tap44302f40-82) 192.168.24.9 fa:16:3e:94:88:3f",
612            "DHCPREQUEST(tap443e2140-82) 192.168.25.9 fb:16:3e:94:88:3f"
613        );
614        tokens_eq!(
615            r"\ = Local Signing Authority, CN = caa53b4e-fff041fe-93823ed2-7ee25a11\n\n\",
616            r"\ = Local Signing Authority, CN = 41319aee-68934f60-baf41d6e-158a15cd\n\n\"
617        );
618        tokens_eq!(
619            r"Baremetal Node@83d24142-5411-4568-b344-05caac9fcfbf: {}",
620            r"Baremetal Node@e54437f7-1f1d-4a9b-8cc5-ce73550f8608: {}"
621        );
622    }
623
624    #[test]
625    fn test_process21() {
626        tokens_eq!(
627            r"-netdev tap,fd=123,id=hostnet0 \",
628            r"-netdev tap,fd=175,id=hostnet0 \"
629        );
630        tokens_eq!(
631            r"-device virtio-net-pci,rx_queue_size=512,host_mtu=1292,netdev=hostnet0,id=net0,mac=fa:16:3e:a3:dc:e1,bus=pci.0,addr=0x3",
632            r"-device virtio-net-pci,rx_queue_size=52,host_mtu=12920,netdev=hostnet0,id=net0,mac=fa:16:3e:1a:1c:fd,bus=pci.1,addr=0x4"
633        );
634    }
635
636    #[test]
637    fn test_process22() {
638        tokens_eq!(
639            "creating Value \"ApacheNetworks\" Stack \"undercloud-UndercloudServiceChain-sczoll7kpg37-ServiceChain-ghee7usnfx3j-17-wztq7dmj6blw-ApacheServiceBase-7nwdrcrxjpmz",
640            "creating Value \"ApacheNetworks\" Stack \"undercloud-UndercloudServiceChain-dt26w6s63vd6-ServiceChain-dxxxgncfjqeg-0-yhtbooauehxj"
641        );
642    }
643
644    #[test]
645    fn test_process_ovn() {
646        assert_eq!(
647            process("addresses: [\"fa:16:3e:69:3c:cd\"]"),
648            "addresses%EQ %ID"
649        );
650        assert_eq!(
651            process("addresses: [\"fa:16:3e:19:15:bb 192.168.199.2\"]"),
652            "addresses%EQ %ID %ID"
653        );
654    }
655
656    #[test]
657    fn test_process_amqp() {
658        assert_eq!(
659            process("closing AMQP connection <0.4375.0> ([fd00:fd00:fd00:2000::40]:33588 -> [fd00:fd00:fd00:2000::21e]:5672 - nova-compute:8:08b39730-b2e6-4d1f-bcc1-318f9bcfd7c6, vhost: '/', user: 'guest')"),
660            "closing AMQP connection %ID %ID %ID %UID vhost%EQ user%EQ guest"
661        );
662    }
663
664    #[test]
665    fn test_kv() {
666        assert_eq!(
667            process("a name=delorean-tripleo-repos-8c402732195f680e7bf8197030cb5a25d45df5a9"),
668            "%ID name%EQ delorean- tripleo- repos- %ID"
669        );
670    }
671
672    #[test]
673    fn test_words() {
674        assert_eq!(
675            words(" a b ").collect::<Vec<&str>>(),
676            vec!["", "a", "b", ""]
677        );
678    }
679}