use std::collections::HashMap;
use crate::compiler::{CompiledDetection, CompiledDetectionItem, CompiledRule};
use crate::event::{Event, EventValue};
use crate::matcher::CompiledMatcher;
pub(crate) struct RuleIndex {
field_index: HashMap<String, HashMap<String, Vec<usize>>>,
unindexable: Vec<usize>,
unindexable_by_product: HashMap<Option<String>, Vec<usize>>,
rule_count: usize,
}
impl RuleIndex {
pub(crate) fn empty() -> Self {
RuleIndex {
field_index: HashMap::new(),
unindexable: Vec::new(),
unindexable_by_product: HashMap::new(),
rule_count: 0,
}
}
pub(crate) fn build(rules: &[CompiledRule]) -> Self {
let mut index = Self::empty();
for (rule_idx, rule) in rules.iter().enumerate() {
index.append_rule(rule_idx, rule);
}
index.rule_count = rules.len();
index
}
pub(crate) fn append_rule(&mut self, rule_idx: usize, rule: &CompiledRule) {
let mut all_pairs: Vec<(String, String)> = Vec::new();
let mut every_detection_has_pairs = true;
for detection in rule.detections.values() {
let pairs = extract_exact_pairs(detection);
if pairs.is_empty() {
every_detection_has_pairs = false;
}
all_pairs.extend(pairs);
}
if all_pairs.is_empty() || !every_detection_has_pairs {
self.unindexable.push(rule_idx);
let product_key = rule.logsource.product.as_deref().map(str::to_lowercase);
self.unindexable_by_product
.entry(product_key)
.or_default()
.push(rule_idx);
} else {
for (field, value) in all_pairs {
self.field_index
.entry(field)
.or_default()
.entry(value)
.or_default()
.push(rule_idx);
}
}
if rule_idx + 1 > self.rule_count {
self.rule_count = rule_idx + 1;
}
}
pub(crate) fn candidates(&self, event: &impl Event) -> Vec<usize> {
if self.field_index.is_empty() {
return (0..self.rule_count).collect();
}
let mut seen = vec![false; self.rule_count];
let mut result = Vec::new();
let mut keys: Vec<String> = Vec::new();
for (field_name, value_map) in &self.field_index {
if let Some(event_value) = event.get_field(field_name) {
keys.clear();
collect_lowercase_keys(&event_value, &mut keys);
for key in &keys {
if let Some(rule_indices) = value_map.get(key) {
for &idx in rule_indices {
if !seen[idx] {
seen[idx] = true;
result.push(idx);
}
}
}
}
}
}
for &idx in &self.unindexable {
if !seen[idx] {
seen[idx] = true;
result.push(idx);
}
}
result
}
pub(crate) fn candidates_with_logsource(
&self,
event: &impl Event,
event_product: Option<&str>,
) -> Vec<usize> {
let product = match event_product {
Some(p) => p.to_lowercase(),
None => return self.candidates(event),
};
let mut seen = vec![false; self.rule_count];
let mut result = Vec::new();
let mut keys: Vec<String> = Vec::new();
for (field_name, value_map) in &self.field_index {
if let Some(event_value) = event.get_field(field_name) {
keys.clear();
collect_lowercase_keys(&event_value, &mut keys);
for key in &keys {
if let Some(rule_indices) = value_map.get(key) {
for &idx in rule_indices {
if !seen[idx] {
seen[idx] = true;
result.push(idx);
}
}
}
}
}
}
for bucket in [
self.unindexable_by_product.get(&None),
self.unindexable_by_product.get(&Some(product)),
]
.into_iter()
.flatten()
{
for &idx in bucket {
if !seen[idx] {
seen[idx] = true;
result.push(idx);
}
}
}
result
}
pub(crate) fn conflicting_unindexable_count(&self, event_product: Option<&str>) -> usize {
let product = match event_product {
Some(p) => p.to_lowercase(),
None => return 0,
};
let none_len = self.unindexable_by_product.get(&None).map_or(0, Vec::len);
let match_len = self
.unindexable_by_product
.get(&Some(product))
.map_or(0, Vec::len);
self.unindexable.len() - none_len - match_len
}
#[cfg(test)]
pub(crate) fn rule_count(&self) -> usize {
self.rule_count
}
#[cfg(test)]
pub(crate) fn unindexable_count(&self) -> usize {
self.unindexable.len()
}
#[cfg(test)]
pub(crate) fn indexed_field_count(&self) -> usize {
self.field_index.len()
}
}
fn extract_exact_pairs(detection: &CompiledDetection) -> Vec<(String, String)> {
let mut pairs = Vec::new();
match detection {
CompiledDetection::AllOf(items) => {
for item in items {
extract_from_item(item, &mut pairs);
}
}
CompiledDetection::AnyOf(subs) => {
for sub in subs {
pairs.extend(extract_exact_pairs(sub));
}
}
CompiledDetection::And(subs) => {
for sub in subs {
pairs.extend(extract_exact_pairs(sub));
}
}
CompiledDetection::ArrayMatch { .. } => {}
CompiledDetection::Conditional { .. } => {}
CompiledDetection::Keywords(_) => {}
}
pairs
}
fn extract_from_item(item: &CompiledDetectionItem, out: &mut Vec<(String, String)>) {
let field = match &item.field {
Some(f) => f.as_str(),
None => return,
};
extract_from_matcher(&item.matcher, field, out);
}
fn extract_from_matcher(matcher: &CompiledMatcher, field: &str, out: &mut Vec<(String, String)>) {
match matcher {
CompiledMatcher::Exact { value, .. } => {
out.push((field.to_string(), value.to_lowercase()));
}
CompiledMatcher::AnyOf(children) | CompiledMatcher::AllOf(children) => {
for child in children {
extract_from_matcher(child, field, out);
}
}
CompiledMatcher::CaseInsensitiveGroup { children, .. } => {
for child in children {
extract_from_matcher(child, field, out);
}
}
_ => {}
}
}
fn collect_lowercase_keys(value: &EventValue, out: &mut Vec<String>) {
match value {
EventValue::Str(s) => out.push(s.to_lowercase()),
EventValue::Int(n) => out.push(n.to_string()),
EventValue::Float(f) => out.push(f.to_string()),
EventValue::Bool(b) => out.push(b.to_string()),
EventValue::Array(arr) => {
for v in arr {
collect_lowercase_keys(v, out);
}
}
_ => {}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Engine;
use crate::event::JsonEvent;
use rsigma_parser::parse_sigma_yaml;
use serde_json::json;
fn build_index(yaml: &str) -> (Engine, RuleIndex) {
let collection = parse_sigma_yaml(yaml).unwrap();
let mut engine = Engine::new();
engine.add_collection(&collection).unwrap();
let index = RuleIndex::build(engine.rules());
(engine, index)
}
#[test]
fn test_exact_match_indexed() {
let (_, index) = build_index(
r#"
title: Login Event
logsource:
product: windows
detection:
selection:
EventType: 'login'
condition: selection
"#,
);
assert_eq!(index.rule_count(), 1);
assert_eq!(index.unindexable_count(), 0);
assert_eq!(index.indexed_field_count(), 1);
}
#[test]
fn test_contains_only_unindexable() {
let (_, index) = build_index(
r#"
title: Whoami Detection
logsource:
product: windows
detection:
selection:
CommandLine|contains: 'whoami'
condition: selection
"#,
);
assert_eq!(index.rule_count(), 1);
assert_eq!(index.unindexable_count(), 1);
assert_eq!(index.indexed_field_count(), 0);
}
#[test]
fn test_mixed_items_in_allof_detection() {
let (_, index) = build_index(
r#"
title: Process Create
logsource:
product: windows
detection:
selection:
EventType: 'process_create'
CommandLine|contains: 'whoami'
condition: selection
"#,
);
assert_eq!(index.unindexable_count(), 0);
assert!(index.indexed_field_count() > 0);
}
#[test]
fn test_candidates_returns_matching_rule() {
let (_, index) = build_index(
r#"
title: Login Event
logsource:
product: windows
detection:
selection:
EventType: 'login'
condition: selection
"#,
);
let ev = json!({"EventType": "login", "User": "admin"});
let event = JsonEvent::borrow(&ev);
let candidates = index.candidates(&event);
assert_eq!(candidates, vec![0]);
}
#[test]
fn test_candidates_skips_non_matching() {
let (_, index) = build_index(
r#"
title: Login Event
logsource:
product: windows
detection:
selection:
EventType: 'login'
condition: selection
"#,
);
let ev = json!({"EventType": "file_create", "User": "admin"});
let event = JsonEvent::borrow(&ev);
let candidates = index.candidates(&event);
assert!(candidates.is_empty());
}
#[test]
fn test_unindexable_always_returned() {
let (_, index) = build_index(
r#"
title: Wildcard Rule
logsource:
product: windows
detection:
selection:
CommandLine|contains: 'whoami'
condition: selection
"#,
);
let ev = json!({"SomeField": "whatever"});
let event = JsonEvent::borrow(&ev);
let candidates = index.candidates(&event);
assert_eq!(candidates, vec![0]);
}
#[test]
fn test_case_insensitive_lookup() {
let (_, index) = build_index(
r#"
title: Login Event
logsource:
product: windows
detection:
selection:
EventType: 'LOGIN'
condition: selection
"#,
);
let ev = json!({"EventType": "login"});
let event = JsonEvent::borrow(&ev);
let candidates = index.candidates(&event);
assert_eq!(candidates, vec![0]);
}
#[test]
fn test_multiple_rules_selective_candidates() {
let yaml = r#"
title: Login
logsource:
product: windows
detection:
selection:
EventType: 'login'
condition: selection
---
title: File Create
logsource:
product: windows
detection:
selection:
EventType: 'file_create'
condition: selection
---
title: Process Create
logsource:
product: windows
detection:
selection:
EventType: 'process_create'
condition: selection
"#;
let (_, index) = build_index(yaml);
assert_eq!(index.rule_count(), 3);
assert_eq!(index.unindexable_count(), 0);
let ev = json!({"EventType": "login"});
let event = JsonEvent::borrow(&ev);
let candidates = index.candidates(&event);
assert_eq!(candidates, vec![0]);
let ev2 = json!({"EventType": "process_create"});
let event2 = JsonEvent::borrow(&ev2);
let candidates2 = index.candidates(&event2);
assert_eq!(candidates2, vec![2]);
}
#[test]
fn test_or_with_mixed_indexable_unindexable_detections() {
let (_, index) = build_index(
r#"
title: Mixed OR
logsource:
product: windows
detection:
selection_a:
EventType: 'login'
selection_b:
CommandLine|contains: 'whoami'
condition: 1 of selection_*
"#,
);
assert_eq!(index.unindexable_count(), 1);
}
#[test]
fn test_anyof_exact_values_indexed() {
let (_, index) = build_index(
r#"
title: Multi Value
logsource:
product: windows
detection:
selection:
EventType:
- 'login'
- 'logout'
condition: selection
"#,
);
assert_eq!(index.unindexable_count(), 0);
let ev_login = json!({"EventType": "login"});
let ev_logout = json!({"EventType": "logout"});
let ev_other = json!({"EventType": "file_create"});
assert_eq!(index.candidates(&JsonEvent::borrow(&ev_login)), vec![0]);
assert_eq!(index.candidates(&JsonEvent::borrow(&ev_logout)), vec![0]);
assert!(index.candidates(&JsonEvent::borrow(&ev_other)).is_empty());
}
#[test]
fn test_numeric_event_value_lookup() {
let (_, index) = build_index(
r#"
title: Port Check
logsource:
product: windows
detection:
selection:
DestinationPort: '443'
condition: selection
"#,
);
let ev = json!({"DestinationPort": 443});
let event = JsonEvent::borrow(&ev);
let candidates = index.candidates(&event);
assert_eq!(candidates, vec![0]);
}
#[test]
fn test_empty_index_returns_all() {
let index = RuleIndex::empty();
assert_eq!(index.rule_count(), 0);
}
#[test]
fn test_dedup_candidates() {
let yaml = r#"
title: Multi Field
logsource:
product: windows
detection:
selection:
EventType: 'login'
Protocol: 'TCP'
condition: selection
"#;
let (_, index) = build_index(yaml);
let ev = json!({"EventType": "login", "Protocol": "TCP"});
let event = JsonEvent::borrow(&ev);
let candidates = index.candidates(&event);
assert_eq!(candidates, vec![0]);
}
#[test]
fn test_keyword_detection_unindexable() {
let (_, index) = build_index(
r#"
title: Keyword Rule
logsource:
product: windows
detection:
keywords:
- 'suspicious'
- 'malware'
condition: keywords
"#,
);
assert_eq!(index.unindexable_count(), 1);
}
#[test]
fn test_regex_only_unindexable() {
let (_, index) = build_index(
r#"
title: Regex Rule
logsource:
product: windows
detection:
selection:
CommandLine|re: '(?i)whoami.*'
condition: selection
"#,
);
assert_eq!(index.unindexable_count(), 1);
assert_eq!(index.indexed_field_count(), 0);
}
#[test]
fn test_append_rule_matches_build() {
let yaml = r#"
title: Login
logsource:
product: windows
detection:
selection:
EventType: 'login'
condition: selection
---
title: File Create
logsource:
product: windows
detection:
selection:
EventType: 'file_create'
Protocol: 'TCP'
condition: selection
---
title: Keyword Rule
logsource:
product: windows
detection:
keywords:
- 'malware'
condition: keywords
---
title: Multi Value
logsource:
product: windows
detection:
selection:
EventType:
- 'logon'
- 'logoff'
condition: selection
---
title: Regex Rule
logsource:
product: windows
detection:
selection:
CommandLine|re: '(?i)whoami.*'
condition: selection
"#;
let collection = parse_sigma_yaml(yaml).unwrap();
let mut engine = Engine::new();
engine.add_collection(&collection).unwrap();
let rules = engine.rules();
let batched = RuleIndex::build(rules);
let mut incremental = RuleIndex::empty();
for (rule_idx, rule) in rules.iter().enumerate() {
incremental.append_rule(rule_idx, rule);
}
assert_eq!(incremental.rule_count(), batched.rule_count());
assert_eq!(incremental.unindexable_count(), batched.unindexable_count());
assert_eq!(
incremental.indexed_field_count(),
batched.indexed_field_count()
);
let events = [
json!({"EventType": "login"}),
json!({"EventType": "logoff"}),
json!({"EventType": "file_create", "Protocol": "TCP"}),
json!({"CommandLine": "whoami /all"}),
json!({"SomeField": "nothing"}),
];
for ev in &events {
let event = JsonEvent::borrow(ev);
let mut a = batched.candidates(&event);
let mut b = incremental.candidates(&event);
a.sort_unstable();
b.sort_unstable();
assert_eq!(a, b, "verdicts diverge for event {ev}");
}
}
#[test]
fn test_append_rule_grows_rule_count() {
let yaml = r#"
title: A
logsource:
product: windows
detection:
selection:
EventType: 'a'
condition: selection
---
title: B
logsource:
product: windows
detection:
selection:
EventType: 'b'
condition: selection
"#;
let collection = parse_sigma_yaml(yaml).unwrap();
let mut engine = Engine::new();
engine.add_collection(&collection).unwrap();
let rules = engine.rules();
let mut index = RuleIndex::empty();
assert_eq!(index.rule_count(), 0);
index.append_rule(0, &rules[0]);
assert_eq!(index.rule_count(), 1);
let ev = json!({"EventType": "a"});
assert_eq!(index.candidates(&JsonEvent::borrow(&ev)), vec![0]);
index.append_rule(1, &rules[1]);
assert_eq!(index.rule_count(), 2);
let ev = json!({"EventType": "b"});
assert_eq!(index.candidates(&JsonEvent::borrow(&ev)), vec![1]);
}
#[test]
fn test_unindexable_partitioned_by_product() {
let (_, index) = build_index(
r#"
title: Win
logsource:
product: Windows
detection:
selection:
CommandLine|contains: 'whoami'
condition: selection
---
title: Lin
logsource:
product: linux
detection:
selection:
CommandLine|contains: 'whoami'
condition: selection
---
title: Generic
detection:
selection:
CommandLine|contains: 'whoami'
condition: selection
"#,
);
assert_eq!(index.unindexable_count(), 3);
assert_eq!(
index
.unindexable_by_product
.get(&Some("windows".to_string()))
.map(Vec::len),
Some(1)
);
assert_eq!(
index
.unindexable_by_product
.get(&Some("linux".to_string()))
.map(Vec::len),
Some(1)
);
assert_eq!(
index.unindexable_by_product.get(&None).map(Vec::len),
Some(1)
);
}
#[test]
fn test_candidates_with_logsource_prunes_and_falls_back() {
let (_, index) = build_index(
r#"
title: Win
logsource:
product: windows
detection:
selection:
CommandLine|contains: 'whoami'
condition: selection
---
title: Lin
logsource:
product: linux
detection:
selection:
CommandLine|contains: 'whoami'
condition: selection
---
title: Generic
detection:
selection:
CommandLine|contains: 'whoami'
condition: selection
"#,
);
let ev = json!({"CommandLine": "whoami"});
let event = JsonEvent::borrow(&ev);
let mut c = index.candidates_with_logsource(&event, Some("windows"));
c.sort_unstable();
assert_eq!(c, vec![0, 2]);
let mut c = index.candidates_with_logsource(&event, Some("macos"));
c.sort_unstable();
assert_eq!(c, vec![2]);
let mut c = index.candidates_with_logsource(&event, None);
c.sort_unstable();
assert_eq!(c, vec![0, 1, 2]);
}
#[test]
fn test_candidates_with_logsource_equivalent_to_postfilter() {
use rsigma_parser::LogSource;
let yaml = r#"
title: Win Exact
logsource:
product: windows
detection:
selection:
EventID: '1'
condition: selection
---
title: Lin Exact
logsource:
product: linux
detection:
selection:
EventID: '1'
condition: selection
---
title: Win Contains
logsource:
product: windows
category: process_creation
detection:
selection:
CommandLine|contains: 'whoami'
condition: selection
---
title: Lin Contains
logsource:
product: linux
detection:
selection:
CommandLine|contains: 'whoami'
condition: selection
---
title: Generic Contains
detection:
selection:
CommandLine|contains: 'whoami'
condition: selection
"#;
let (engine, index) = build_index(yaml);
let rules = engine.rules();
fn compatible(rule: &LogSource, event: &LogSource) -> bool {
fn conflicts(r: &Option<String>, e: &Option<String>) -> bool {
matches!((r, e), (Some(r), Some(e)) if !r.eq_ignore_ascii_case(e))
}
!(conflicts(&rule.product, &event.product)
|| conflicts(&rule.service, &event.service)
|| conflicts(&rule.category, &event.category))
}
let cases: [(serde_json::Value, Option<&str>, Option<&str>); 5] = [
(
json!({"EventID": "1", "CommandLine": "whoami"}),
Some("windows"),
None,
),
(
json!({"EventID": "1", "CommandLine": "whoami"}),
Some("linux"),
None,
),
(json!({"CommandLine": "whoami"}), Some("macos"), None),
(
json!({"EventID": "1", "CommandLine": "whoami"}),
Some("windows"),
Some("process_creation"),
),
(json!({"CommandLine": "whoami"}), None, None),
];
for (ev, product, category) in cases {
let event = JsonEvent::borrow(&ev);
let event_ls = LogSource {
product: product.map(String::from),
category: category.map(String::from),
..Default::default()
};
let mut reference: Vec<usize> = index
.candidates(&event)
.into_iter()
.filter(|&idx| compatible(&rules[idx].logsource, &event_ls))
.collect();
let mut got: Vec<usize> = index
.candidates_with_logsource(&event, product)
.into_iter()
.filter(|&idx| compatible(&rules[idx].logsource, &event_ls))
.collect();
reference.sort_unstable();
got.sort_unstable();
assert_eq!(reference, got, "diverge for {ev} product={product:?}");
}
}
}