use std::sync::LazyLock;
pub const EXPRESSION_MACROS: &[(&str, &str)] = &[
("spawn_process", "evt.name = 'CreateProcess'"),
("create_thread", "evt.name = 'CreateThread'"),
("write_file", "evt.name = 'WriteFile'"),
("rename_file", "evt.name = 'RenameFile'"),
("read_file", "evt.name = 'ReadFile'"),
("delete_file", "evt.name = 'DeleteFile'"),
("set_file_information", "evt.name = 'SetFileInformation'"),
("load_module", "evt.name = 'LoadModule'"),
("unload_module", "evt.name = 'UnloadModule'"),
("send_socket", "evt.name = 'Send'"),
("recv_socket", "evt.name = 'Recv'"),
("connect_socket", "evt.name = 'Connect'"),
("accept_socket", "evt.name = 'Accept'"),
("virtual_alloc", "evt.name = 'VirtualAlloc'"),
("virtual_free", "evt.name = 'VirtualFree'"),
("map_view_file", "evt.name = 'MapViewFile'"),
("unmap_view_file", "evt.name = 'UnmapViewFile'"),
("duplicate_handle", "evt.name = 'DuplicateHandle'"),
("create_handle", "evt.name = 'CreateHandle'"),
("query_dns", "evt.name = 'QueryDns'"),
("reply_dns", "evt.name = 'ReplyDns'"),
(
"open_file",
"evt.name = 'CreateFile' and file.operation = 'OPEN' and file.status = 'Success'",
),
(
"create_file",
"evt.name = 'CreateFile' and file.operation != 'OPEN' and file.status = 'Success'",
),
(
"create_new_file",
"evt.name = 'CreateFile' and file.operation = 'CREATE' and file.status = 'Success'",
),
(
"create_file_supersede",
"evt.name = 'CreateFile' and file.operation = 'SUPERSEDE'",
),
(
"set_value",
"evt.name = 'RegSetValue' and registry.status = 'Success'",
),
(
"create_key",
"evt.name = 'RegCreateKey' and registry.status = 'Success'",
),
(
"open_process",
"evt.name = 'OpenProcess' and ps.access.status = 'Success'",
),
(
"open_thread",
"evt.name = 'OpenThread' and thread.access.status = 'Success'",
),
(
"open_registry",
"evt.name = 'RegOpenKey' and registry.status = 'Success'",
),
];
type MacroClauses = (&'static str, Vec<String>, Vec<String>);
static MACRO_CLAUSES: LazyLock<Vec<MacroClauses>> = LazyLock::new(|| {
EXPRESSION_MACROS
.iter()
.map(|(name, src)| {
let cased: Vec<String> = split_clauses(src).into_iter().map(str::to_string).collect();
let default: Vec<String> = cased.iter().map(|c| to_ci_eq(c)).collect();
(*name, default, cased)
})
.collect()
});
pub fn is_known_macro(name: &str) -> bool {
EXPRESSION_MACROS.iter().any(|(n, _)| *n == name)
}
pub fn recognize(condition: &str) -> String {
if condition.is_empty() {
return String::new();
}
let clauses = split_top_level_and(condition);
if clauses.len() < 2 && !clauses.first().is_some_and(|c| matches_any_macro(c)) {
return condition.to_string();
}
let mut macros: Vec<&MacroClauses> = MACRO_CLAUSES.iter().collect();
macros.sort_by_key(|m| std::cmp::Reverse(m.1.len()));
let mut out: Vec<String> = Vec::with_capacity(clauses.len());
let mut i = 0;
while i < clauses.len() {
let mut matched = None;
for (name, default_clauses, cased_clauses) in ¯os {
let len = default_clauses.len();
if i + len > clauses.len() {
continue;
}
let slice = &clauses[i..i + len];
if clauses_match(slice, default_clauses, cased_clauses) {
matched = Some((*name, len));
break;
}
}
match matched {
Some((name, len)) => {
out.push(name.to_string());
i += len;
}
None => {
out.push(clauses[i].clone());
i += 1;
}
}
}
out.join(" and ")
}
fn matches_any_macro(clause: &str) -> bool {
let binding = clause.to_string();
let slice = std::slice::from_ref(&binding);
MACRO_CLAUSES
.iter()
.any(|(_, def, cased)| clauses_match(slice, def, cased))
}
fn clauses_match(slice: &[String], default: &[String], cased: &[String]) -> bool {
if slice.len() != default.len() || slice.len() != cased.len() {
return false;
}
slice
.iter()
.zip(default)
.zip(cased)
.all(|((got, d), c)| got.trim() == d.trim() || got.trim() == c.trim())
}
fn split_top_level_and(expr: &str) -> Vec<String> {
let bytes = expr.as_bytes();
let mut out: Vec<String> = Vec::new();
let mut start = 0usize;
let mut depth = 0i32;
let mut in_str = false;
let mut i = 0usize;
while i < bytes.len() {
let b = bytes[i];
if in_str {
if b == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
if b == b'\'' {
in_str = false;
}
i += 1;
continue;
}
match b {
b'\'' => in_str = true,
b'(' => depth += 1,
b')' => depth -= 1,
_ => {}
}
if depth == 0 && matches_token(bytes, i, b" and ") {
let piece = expr[start..i].trim().to_string();
if !piece.is_empty() {
out.push(piece);
}
i += b" and ".len();
start = i;
continue;
}
i += 1;
}
let tail = expr[start..].trim().to_string();
if !tail.is_empty() {
out.push(tail);
}
out
}
fn matches_token(bytes: &[u8], i: usize, kw: &[u8]) -> bool {
if i + kw.len() > bytes.len() {
return false;
}
bytes[i..i + kw.len()].eq_ignore_ascii_case(kw)
}
fn split_clauses(src: &str) -> Vec<&str> {
let bytes = src.as_bytes();
let mut out: Vec<&str> = Vec::new();
let mut start = 0usize;
let mut depth = 0i32;
let mut in_str = false;
let mut i = 0usize;
while i < bytes.len() {
let b = bytes[i];
if in_str {
if b == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
if b == b'\'' {
in_str = false;
}
i += 1;
continue;
}
match b {
b'\'' => in_str = true,
b'(' => depth += 1,
b')' => depth -= 1,
_ => {}
}
if depth == 0 && matches_token(bytes, i, b" and ") {
out.push(src[start..i].trim());
i += b" and ".len();
start = i;
continue;
}
i += 1;
}
let tail = src[start..].trim();
if !tail.is_empty() {
out.push(tail);
}
out
}
fn to_ci_eq(clause: &str) -> String {
let bytes = clause.as_bytes();
let mut depth = 0i32;
let mut in_str = false;
let mut i = 0usize;
while i < bytes.len() {
let b = bytes[i];
if in_str {
if b == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
if b == b'\'' {
in_str = false;
}
i += 1;
continue;
}
match b {
b'\'' => in_str = true,
b'(' => depth += 1,
b')' => depth -= 1,
_ => {}
}
if depth == 0 && matches_token(bytes, i, b" = '") {
let mut out = String::with_capacity(clause.len() + 6);
out.push_str(&clause[..i]);
out.push_str(" ~= '");
out.push_str(&clause[i + b" = '".len()..]);
return out;
}
i += 1;
}
clause.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn known_macro_lookup() {
assert!(is_known_macro("spawn_process"));
assert!(is_known_macro("create_thread"));
assert!(is_known_macro("open_file"));
assert!(is_known_macro("create_file"));
assert!(!is_known_macro("modify_registry"));
assert!(!is_known_macro("not_a_macro"));
}
#[test]
fn recognize_spawn_process_case_insensitive_form() {
let out = recognize("evt.name ~= 'CreateProcess'");
assert_eq!(out, "spawn_process");
}
#[test]
fn recognize_spawn_process_cased_form() {
let out = recognize("evt.name = 'CreateProcess'");
assert_eq!(out, "spawn_process");
}
#[test]
fn recognize_spawn_process_with_extra_clauses() {
let out = recognize(
"evt.name = 'CreateProcess' and ps.exe iendswith '\\cmd.exe' and ps.cmdline icontains 'whoami'",
);
assert_eq!(
out,
"spawn_process and ps.exe iendswith '\\cmd.exe' and ps.cmdline icontains 'whoami'",
);
}
#[test]
fn recognize_write_file_and_read_file() {
let out = recognize("evt.name = 'WriteFile' and file.path iendswith '\\out.log'");
assert_eq!(out, "write_file and file.path iendswith '\\out.log'");
let out2 = recognize("evt.name = 'ReadFile'");
assert_eq!(out2, "read_file");
}
#[test]
fn recognize_open_file_three_clauses() {
let out = recognize(
"evt.name = 'CreateFile' and file.operation ~= 'OPEN' and file.status ~= 'Success'",
);
assert_eq!(out, "open_file");
}
#[test]
fn recognize_open_file_keeps_trailing_clauses() {
let out = recognize(
"evt.name = 'CreateFile' and file.operation ~= 'OPEN' and file.status ~= 'Success' and file.path iendswith '\\secret.txt'",
);
assert_eq!(out, "open_file and file.path iendswith '\\secret.txt'");
}
#[test]
fn recognize_set_value_two_clauses() {
let out = recognize(
"evt.name = 'RegSetValue' and registry.status ~= 'Success' and registry.path icontains '\\Run\\'",
);
assert_eq!(out, "set_value and registry.path icontains '\\Run\\'",);
}
#[test]
fn recognize_create_file_matches_inequality_disposition() {
let out = recognize(
"evt.name = 'CreateFile' and file.operation != 'OPEN' and file.status ~= 'Success'",
);
assert_eq!(out, "create_file");
let out2 = recognize(
"evt.name = 'CreateFile' and file.operation ~= 'OPEN' and file.status ~= 'Success'",
);
assert_eq!(out2, "open_file");
}
#[test]
fn recognize_open_process_two_clauses_cased() {
let out = recognize("evt.name = 'OpenProcess' and ps.access.status = 'Success'");
assert_eq!(out, "open_process");
}
#[test]
fn recognize_does_not_match_with_different_value() {
let out = recognize("evt.name = 'TerminateProcess'");
assert_eq!(out, "evt.name = 'TerminateProcess'");
}
#[test]
fn recognize_does_not_match_with_extra_modifier() {
let out = recognize("evt.name iendswith 'CreateProcess'");
assert_eq!(out, "evt.name iendswith 'CreateProcess'");
}
#[test]
fn recognize_does_not_cross_or_groups() {
let out =
recognize("(evt.name = 'CreateProcess' or evt.name = 'CreateThread') and ps.pid = 4");
assert_eq!(
out,
"(evt.name = 'CreateProcess' or evt.name = 'CreateThread') and ps.pid = 4",
);
}
#[test]
fn recognize_picks_longest_match() {
let out = recognize(
"evt.name = 'CreateFile' and file.operation ~= 'OPEN' and file.status ~= 'Success'",
);
assert_eq!(out, "open_file");
let out2 = recognize("evt.name = 'CreateFile'");
assert_eq!(out2, "evt.name = 'CreateFile'");
}
#[test]
fn recognize_passes_through_when_no_macro_matches() {
let input = "ps.exe iendswith '\\cmd.exe' and ps.cmdline icontains 'whoami'";
assert_eq!(recognize(input), input);
}
#[test]
fn recognize_handles_empty_input() {
assert_eq!(recognize(""), "");
}
#[test]
fn split_keeps_paren_groups_intact() {
let out = split_top_level_and("(a or b) and c and (d and e)");
assert_eq!(out, vec!["(a or b)", "c", "(d and e)"]);
}
#[test]
fn split_keeps_quoted_and_inside_strings() {
let out = split_top_level_and("field = 'and inside string' and other");
assert_eq!(out, vec!["field = 'and inside string'", "other"]);
}
#[test]
fn to_ci_eq_substitutes_first_top_level_equality() {
assert_eq!(
to_ci_eq("evt.name = 'CreateProcess'"),
"evt.name ~= 'CreateProcess'",
);
}
#[test]
fn to_ci_eq_leaves_inequality_alone() {
assert_eq!(to_ci_eq("evt.pid != 4"), "evt.pid != 4");
assert_eq!(
to_ci_eq("file.operation != 'OPEN'"),
"file.operation != 'OPEN'",
);
}
}