use lazy_static::lazy_static;
use regex::Regex;
use regex::Split;
fn words(line: &str) -> Split {
lazy_static! {
static ref RE: Regex = Regex::new(r"([ \t]|\\[nr])+").unwrap();
}
RE.split(line)
}
fn trim_quote_and_punctuation(word: &str) -> &str {
word.trim_start_matches("u\"")
.trim_start_matches("u'")
.trim_matches(|c| {
c == '\''
|| c == '"'
|| c == ','
|| c == '.'
|| c == ';'
|| c == '('
|| c == ')'
|| c == '['
|| c == ']'
|| c == '{'
|| c == '}'
|| c == '>'
|| c == '<'
|| c == '\\'
})
}
fn global_filter(line: &str) -> bool {
lazy_static! {
static ref RE: Regex = Regex::new(concat!(
r"GET / HTTP/1.1",
r"|\* [a-zA-Z]+: [a-zA-Z0-9\.-]*$|Trying other mirror.",
r"|ovs-ofctl .* (dump-ports|dump-flows|show)\b",
r"|(ip|eb)tables .* -L\b",
r"|(^\^[+*-] [a-z0-9\.>-]{5,} [0-9])",
r"|(^[a-f0-9s/]+>[0-9]+ )",
r"|(^\+\+ echo [^ ]+$)"
)).unwrap();
}
let is_single_word = !line.contains(|c: char| c.is_whitespace());
is_single_word || RE.is_match(line)
}
fn remove_numbers(word: &str) -> String {
lazy_static! {
static ref RE: Regex = Regex::new("[0-9]+").unwrap();
}
RE.replace_all(word, "N").to_string()
}
fn is_date(word: &str) -> bool {
lazy_static! {
static ref RE: Regex = Regex::new(concat!(
"(?i-u:^(",
"sunday|monday|tuesday|wednesday|thursday|friday|saturday|",
"january|february|march|april|may|june|july|august|september|october|november|december",
")$)"
))
.unwrap();
}
RE.is_match(word)
}
fn is_error(word: &str) -> bool {
lazy_static! {
static ref RE: Regex = Regex::new(concat!(
"(?i-u:^(",
"error|failure|failed|warning|",
"err|fail|warn|",
"denied|",
"assert|assertion|non-zero|",
"exception|traceback",
")$)"
))
.unwrap();
}
RE.is_match(word)
}
fn contains_odd_char(word: &str) -> bool {
lazy_static! {
static ref RE: Regex = Regex::new(r"[<>{}%$,*]").unwrap();
}
RE.is_match(word)
}
fn is_uid(word: &str) -> bool {
lazy_static! {
static ref RE: Regex =
Regex::new(concat!("^(:*", r"[\[\]0-9a-fA-FxZ]+[:.-]*", ")+$")).unwrap();
}
RE.is_match(word)
}
fn is_uuid(word: &str) -> bool {
lazy_static! {
static ref RE: Regex = Regex::new(concat!(
"^[a-zA-Z0-9].*-[a-zA-Z0-9]{4}-[a-zA-Z0-9]{4}-[a-zA-Z0-9]{4}-"
))
.unwrap();
}
RE.is_match(word)
}
fn has_many_dash(word: &str) -> bool {
lazy_static! {
static ref RE: Regex = Regex::new(concat!("^.+-.+-.+-.")).unwrap();
}
RE.is_match(word)
}
fn is_cookie(word: &str) -> bool {
lazy_static! {
static ref RE: Regex =
Regex::new(concat!("^(", "gAAAA|AAAA|tx[a-z]|tap|req-|AUTH_", ")")).unwrap();
}
RE.is_match(word)
}
fn is_url(word: &str) -> bool {
lazy_static! {
static ref RE: Regex =
Regex::new(concat!("(?i:^", "(https|http|ftp|ssh)://", ")")).unwrap();
}
RE.is_match(word)
}
fn is_base64(word: &str) -> bool {
lazy_static! {
static ref RE: Regex = Regex::new(concat!("^", "[A-Za-z0-9+/=]+", "$")).unwrap();
}
word.ends_with("==") || (word.len() > 24 && RE.is_match(word))
}
fn is_hash(word: &str) -> bool {
lazy_static! {
static ref RE: Regex = Regex::new(concat!("(?i:^", "(hash|sha|md)[0-9]*:", ")")).unwrap();
}
RE.is_match(word)
}
fn is_refs(word: &str) -> bool {
lazy_static! {
static ref RE: Regex = Regex::new(concat!(r"^\w{7}\.\.\w{7}$")).unwrap();
}
word.starts_with("refs/") || word.starts_with("repos/") || RE.is_match(word)
}
fn is_key_value(word: &str) -> Option<(&str, &str)> {
match word.split_once(|c| c == '=' || c == ':') {
Some((k, v)) => {
if k.starts_with(|c| (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_')) {
Some((k, v))
} else {
None
}
}
_ => None,
}
}
fn is_two_words(word: &str) -> Option<(&str, &str)> {
match word.split_once(|c| c == '[' || c == '(' || c == '\\' || c == '@') {
Some((k, v)) => Some((k, v.trim_end_matches(|c| c == ']' || c == ')'))),
None => None,
}
}
fn is_key_for_id(word: &str) -> bool {
lazy_static! {
static ref RE: Regex = Regex::new(concat!(
"(?i:",
"(id|key|ref|region|token|secret|password)",
")"
))
.unwrap();
}
RE.is_match(word)
}
fn is_random_path(word: &str) -> bool {
word.contains("tmp/") || word.contains("/tmp")
}
#[cfg(test)]
mod re_tests {
use super::*;
#[test]
fn test_remove_numbers() {
assert_eq!(remove_numbers("test42-check"), "testN-check");
}
#[test]
fn test_date() {
assert!(vec!["sunday", "saturday", "Monday"]
.into_iter()
.all(is_date));
assert!(vec!["sunday ", " saturday", " jan ", "sund"]
.into_iter()
.all(|v| !is_date(v)));
}
#[test]
fn test_is_error() {
assert!(is_error("FAIL"));
}
#[test]
fn test_id() {
assert!(vec![
"aa:bb:cc:00:ff",
"42.24.21.12",
"abab-efef",
"2022-02-03",
"18:01:00.1"
]
.into_iter()
.all(is_uid))
}
#[test]
fn test_hash() {
assert!(vec!["sha256:aabbcc00", "md5:test", "MD42:abab",]
.into_iter()
.all(is_hash))
}
#[test]
fn test_composite() {
assert_eq!(is_key_value("key=value"), Some(("key", "value")));
assert_eq!(is_key_value("keyvalue"), None);
assert_eq!(is_key_value("!KEY=value"), None);
}
#[test]
fn test_random_path() {
assert!(is_random_path("/tmp/test"));
assert!(is_random_path("/var/tmp/key"));
assert_eq!(is_random_path("/usr"), false);
}
#[test]
fn test_trim_pid() {
assert_eq!(trim_pid("systemd[42"), Some("systemd"))
}
}
fn parse_literal(word: &str) -> Option<&str> {
if is_date(word) {
Some("%DATE")
} else if is_hash(word) {
Some("%HASH")
} else if is_uid(word) {
Some("%ID")
} else if is_cookie(word) {
Some("%COOKIE")
} else if is_uuid(word) {
Some("%UID")
} else if is_url(word) {
Some("%URL")
} else if is_random_path(word) {
Some("%PATH")
} else if is_refs(word) {
Some("%REF")
} else if is_base64(word) {
Some("%BASE64")
} else {
None
}
}
fn trim_pid(word: &str) -> Option<&str> {
word.trim_end_matches(|c| c >= '0' && c <= '9')
.strip_suffix("[")
}
fn push_error(word: &str, result: &mut String) {
result.push_str(word);
result.push(' ');
result.push_str(word);
result.push_str("%A ");
result.push_str(word);
result.push_str("%B ");
result.push_str(word);
result.push_str("%C ");
result.push_str(word);
result.push_str("%D");
}
fn do_process(mut word: &str, result: &mut String) -> bool {
word = trim_quote_and_punctuation(word);
let mut added = true;
if let Some(token) = parse_literal(word) {
result.push_str(token)
} else if is_error(word) {
push_error(word, result)
} else if word.len() <= 3 {
added = false;
} else if let Some(strip) = trim_pid(word) {
do_process(strip, result);
result.push_str("%PID");
} else if contains_odd_char(word) {
result.push_str("%ODD")
} else if let Some((key, value)) = is_key_value(word) {
do_process(key, result);
if is_key_for_id(key) {
result.push_str("%EQ %VALUE_ID")
} else {
result.push_str("%EQ ");
added = do_process(value, result)
}
} else if let Some((w1, w2)) = word.split_once('/') {
if do_process(w1, result) {
result.push_str("/ ");
}
added = do_process(w2, result);
} else if let Some((w1, w2)) = word.split_once('-') {
if has_many_dash(w2) {
result.push_str("%DASH")
} else {
if do_process(w1, result) {
result.push_str("- ");
}
added = do_process(w2, result)
}
} else if let Some((w1, w2)) = word.split_once('|') {
if do_process(w1, result) {
result.push_str("| ");
}
added = do_process(w2, result)
} else if word.len() >= 32 {
result.push_str("%BIG")
} else if let Some((w1, w2)) = is_two_words(word) {
if do_process(w1, result) {
result.push(' ');
}
added = do_process(w2, result);
} else {
let x = remove_numbers(word);
if x.len() > 3 {
result.push_str(&x)
} else {
added = false;
}
}
added
}
pub fn process(line: &str) -> String {
let line = line.trim();
if global_filter(line) {
return "%GL_FILTER".to_string();
}
let mut result = String::with_capacity(line.len());
for word in words(line) {
if do_process(word, &mut result) {
result.push(' ')
}
}
result.trim().to_string()
}
#[macro_export]
macro_rules! tokens_eq {
($a:expr,$b:expr) => {
assert_eq!(process($a), process($b))
};
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_process_nl() {
assert_eq!(process("testy\r\n"), "%GL_FILTER");
assert_eq!(process("* mirror: 42\n"), "%GL_FILTER");
}
#[test]
fn test_process() {
assert_eq!(
process("error hash mismatch 'sha256:42'"),
"error error%A error%B error%C error%D hash mismatch %HASH"
);
assert_eq!(
process("getting \"http://local:4242/test\""),
"getting %URL"
);
assert_eq!(
process("sha256://toto tata finished in 28ms by systemd[4248]"),
"%HASH tata finished systemd%PID"
);
assert_eq!(
process("log_url=https://ansible AWS_ACCESS_KEY_ID=ASIA6CCDWXDODS7A4X53 "),
"log_url%EQ %URL AWS_ACCESS_KEY_ID%EQ %VALUE_ID"
);
assert_eq!(
process("** 192.168.24.1:8787/tripleovictoria/openstack-heat-api:175194d1801ec25367354976a18e3725-updated-20220125105210 **"),
"%ID/ tripleovictoria/ openstack- heat- %EQ %ID- updated- %ID"
);
}
#[test]
fn test_process02() {
assert_eq!(
process("nova::placement::password: UIbv1LPZWIXpBtaToNzsmgZI3"),
"nova%EQ :placement::password: %BASE64"
);
assert_eq!(
process("2022-01-25 12:11:14 | ++ export OS_PASSWORD=PobDt1cxalvf40uv9Om5VTNkw"),
"%ID %ID export OS_PASSWORD%EQ %VALUE_ID"
);
assert_eq!(
process("^+ ntp1a.example.com 1 10 377 635 -1217us[-1069us] +/- 16ms"),
"%GL_FILTER"
);
assert_eq!(process("a PobDt1cxalvf40uv9Om5VTNkw"), "%ID %BASE64");
}
#[test]
fn test_process03() {
assert_eq!(
process("2022-01-25T14:09:24.422Z|00014|jsonrpc|WARN|tcp:[fd00:fd00:fd00:2000::21e]:50504: receive error: Connection reset by peer"),
"%ID- %ID- NTN:N:N.NZ| %ID| jsonrpc| WARN WARN%A WARN%B WARN%C WARN%D| %EQ %ID receive error error%A error%B error%C error%D%EQ Connection reset peer"
);
assert_eq!(
process("Event ID: 3e75e420-761f-11ec-8d18-a0957bd68c36"),
process("Event ID: f671eb00-730e-11ec-915f-abcd86bae8f1")
);
assert_eq!(
process("\"mac_address\": \"12:fa:c8:b2:e0:ff\","),
process("\"mac_address\": \"12:a6:f2:17:d3:b5\",")
);
assert_eq!(
process("File \"nodepool/cmd/config_validator.py\", line 144, in validate"),
"File nodepool/ config_validator.py line %ID validate"
);
assert_eq!(
process("controller | \"after\": \"3}QP5CJuNBP65S%c:y>o\"",),
"controller after%EQ %ODD"
);
assert_eq!(
process("[Zuul] Job complete, result: FAILURE"),
"Zuul complete result%EQ FAILURE FAILURE%A FAILURE%B FAILURE%C FAILURE%D"
);
}
#[test]
fn test_process04() {
assert_eq!(
process("\"assertion\": \"new_dhcp is changed\""),
"assertion assertion%A assertion%B assertion%C assertion%D%EQ new_dhcp changed"
);
}
#[test]
fn test_process20() {
assert_eq!(
process("controller | +3}QP5CJuNBP65S%c:y>o"),
process("controller | +1T9,Eqb@g[VL@b0u*Et!")
);
assert_eq!(
process(" \"contents\": \"3}QP5CJuNBP65S%c:y>o\""),
process(" \"contents\": \"U%aNO^b5ITFU^xTTa9rV\",")
);
assert_eq!(
process(
"pkg: openstack-tripleo-heat-templates-13.5.1-0.20220121152841.1408598.el8.noarch"
),
"%EQ %DASH"
);
tokens_eq!(
"id = \"HvXxSk-Foz9-XJE4-RZSD-KXxc-NxTt-AMi18O\"",
"id = \"BBW6bE-58DO-3GeE-3ix2-8pLG-wfWL-aiTdAf\""
);
tokens_eq!(
"rabbitmq::erlang_cookie: xkkGdfgqlUovQz3fP2CZ",
"rabbitmq::erlang_cookie: xkkGdfgqlUovQz3fP2CZ"
);
tokens_eq!(
"ZUUL_REF=Z60f0ad207fbb4c55a07d665ef44131a4",
"ZUUL_REF=Zbffe5ccbe3ef4ab48c016783ea185dfa"
);
tokens_eq!("tap44302f40-8", "tap423e2e40-8");
tokens_eq!(
"[fd00:fd00:fd00:2000::21e]:5672 (1)",
"[fd00:ad00:fd00:2100::21e]:5872 (1)"
);
tokens_eq!(
"DHCPREQUEST(tap44302f40-82) 192.168.24.9 fa:16:3e:94:88:3f",
"DHCPREQUEST(tap443e2140-82) 192.168.25.9 fb:16:3e:94:88:3f"
);
tokens_eq!(
r"\ = Local Signing Authority, CN = caa53b4e-fff041fe-93823ed2-7ee25a11\n\n\",
r"\ = Local Signing Authority, CN = 41319aee-68934f60-baf41d6e-158a15cd\n\n\"
);
tokens_eq!(
r"Baremetal Node@83d24142-5411-4568-b344-05caac9fcfbf: {}",
r"Baremetal Node@e54437f7-1f1d-4a9b-8cc5-ce73550f8608: {}"
);
}
#[test]
fn test_process21() {
tokens_eq!(
r"-netdev tap,fd=123,id=hostnet0 \",
r"-netdev tap,fd=175,id=hostnet0 \"
);
tokens_eq!(
r"-device virtio-net-pci,rx_queue_size=512,host_mtu=1292,netdev=hostnet0,id=net0,mac=fa:16:3e:a3:dc:e1,bus=pci.0,addr=0x3",
r"-device virtio-net-pci,rx_queue_size=52,host_mtu=12920,netdev=hostnet0,id=net0,mac=fa:16:3e:1a:1c:fd,bus=pci.1,addr=0x4"
);
}
#[test]
fn test_process22() {
tokens_eq!(
"creating Value \"ApacheNetworks\" Stack \"undercloud-UndercloudServiceChain-sczoll7kpg37-ServiceChain-ghee7usnfx3j-17-wztq7dmj6blw-ApacheServiceBase-7nwdrcrxjpmz",
"creating Value \"ApacheNetworks\" Stack \"undercloud-UndercloudServiceChain-dt26w6s63vd6-ServiceChain-dxxxgncfjqeg-0-yhtbooauehxj"
);
}
#[test]
fn test_process_ovn() {
assert_eq!(
process("addresses: [\"fa:16:3e:69:3c:cd\"]"),
"addresses%EQ %ID"
);
assert_eq!(
process("addresses: [\"fa:16:3e:19:15:bb 192.168.199.2\"]"),
"addresses%EQ %ID %ID"
);
}
#[test]
fn test_process_amqp() {
assert_eq!(
process("closing AMQP connection <0.4375.0> ([fd00:fd00:fd00:2000::40]:33588 -> [fd00:fd00:fd00:2000::21e]:5672 - nova-compute:8:08b39730-b2e6-4d1f-bcc1-318f9bcfd7c6, vhost: '/', user: 'guest')"),
"closing AMQP connection %ID %ID %ID %UID vhost%EQ user%EQ guest"
);
}
#[test]
fn test_kv() {
assert_eq!(
process("a name=delorean-tripleo-repos-8c402732195f680e7bf8197030cb5a25d45df5a9"),
"%ID name%EQ delorean- tripleo- repos- %ID"
);
}
#[test]
fn test_words() {
assert_eq!(
words(" a b ").collect::<Vec<&str>>(),
vec!["", "a", "b", ""]
);
}
}