use std::collections::HashMap;
use serde_json::Value;
use crate::compiler::{CompiledDetection, CompiledDetectionItem, CompiledRule};
use crate::event::Event;
use crate::matcher::CompiledMatcher;
pub(crate) struct RuleIndex {
field_index: HashMap<String, HashMap<String, Vec<usize>>>,
unindexable: Vec<usize>,
rule_count: usize,
}
impl RuleIndex {
pub(crate) fn empty() -> Self {
RuleIndex {
field_index: HashMap::new(),
unindexable: Vec::new(),
rule_count: 0,
}
}
pub(crate) fn build(rules: &[CompiledRule]) -> Self {
let mut field_index: HashMap<String, HashMap<String, Vec<usize>>> = HashMap::new();
let mut unindexable: Vec<usize> = Vec::new();
for (rule_idx, rule) in rules.iter().enumerate() {
let mut all_pairs: Vec<(String, String)> = Vec::new();
let mut every_detection_has_pairs = true;
for detection in rule.detections.values() {
let pairs = extract_exact_pairs(detection);
if pairs.is_empty() {
every_detection_has_pairs = false;
}
all_pairs.extend(pairs);
}
if all_pairs.is_empty() || !every_detection_has_pairs {
unindexable.push(rule_idx);
} else {
for (field, value) in all_pairs {
field_index
.entry(field)
.or_default()
.entry(value)
.or_default()
.push(rule_idx);
}
}
}
RuleIndex {
field_index,
unindexable,
rule_count: rules.len(),
}
}
pub(crate) fn candidates(&self, event: &Event) -> Vec<usize> {
if self.field_index.is_empty() {
return (0..self.rule_count).collect();
}
let mut seen = vec![false; self.rule_count];
let mut result = Vec::new();
for (field_name, value_map) in &self.field_index {
if let Some(event_value) = event.get_field(field_name)
&& let Some(search_key) = value_to_lowercase_string(event_value)
&& let Some(rule_indices) = value_map.get(&search_key)
{
for &idx in rule_indices {
if !seen[idx] {
seen[idx] = true;
result.push(idx);
}
}
}
}
for &idx in &self.unindexable {
if !seen[idx] {
seen[idx] = true;
result.push(idx);
}
}
result
}
#[cfg(test)]
pub(crate) fn rule_count(&self) -> usize {
self.rule_count
}
#[cfg(test)]
pub(crate) fn unindexable_count(&self) -> usize {
self.unindexable.len()
}
#[cfg(test)]
pub(crate) fn indexed_field_count(&self) -> usize {
self.field_index.len()
}
}
fn extract_exact_pairs(detection: &CompiledDetection) -> Vec<(String, String)> {
let mut pairs = Vec::new();
match detection {
CompiledDetection::AllOf(items) => {
for item in items {
extract_from_item(item, &mut pairs);
}
}
CompiledDetection::AnyOf(subs) => {
for sub in subs {
pairs.extend(extract_exact_pairs(sub));
}
}
CompiledDetection::Keywords(_) => {}
}
pairs
}
fn extract_from_item(item: &CompiledDetectionItem, out: &mut Vec<(String, String)>) {
let field = match &item.field {
Some(f) => f.as_str(),
None => return,
};
extract_from_matcher(&item.matcher, field, out);
}
fn extract_from_matcher(matcher: &CompiledMatcher, field: &str, out: &mut Vec<(String, String)>) {
match matcher {
CompiledMatcher::Exact { value, .. } => {
out.push((field.to_string(), value.to_lowercase()));
}
CompiledMatcher::AnyOf(children) => {
for child in children {
extract_from_matcher(child, field, out);
}
}
CompiledMatcher::AllOf(children) => {
for child in children {
extract_from_matcher(child, field, out);
}
}
_ => {}
}
}
fn value_to_lowercase_string(value: &Value) -> Option<String> {
match value {
Value::String(s) => Some(s.to_lowercase()),
Value::Number(n) => Some(n.to_string()),
Value::Bool(b) => Some(b.to_string()),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Engine;
use rsigma_parser::parse_sigma_yaml;
use serde_json::json;
fn build_index(yaml: &str) -> (Engine, RuleIndex) {
let collection = parse_sigma_yaml(yaml).unwrap();
let mut engine = Engine::new();
engine.add_collection(&collection).unwrap();
let index = RuleIndex::build(engine.rules());
(engine, index)
}
#[test]
fn test_exact_match_indexed() {
let (_, index) = build_index(
r#"
title: Login Event
logsource:
product: windows
detection:
selection:
EventType: 'login'
condition: selection
"#,
);
assert_eq!(index.rule_count(), 1);
assert_eq!(index.unindexable_count(), 0);
assert_eq!(index.indexed_field_count(), 1);
}
#[test]
fn test_contains_only_unindexable() {
let (_, index) = build_index(
r#"
title: Whoami Detection
logsource:
product: windows
detection:
selection:
CommandLine|contains: 'whoami'
condition: selection
"#,
);
assert_eq!(index.rule_count(), 1);
assert_eq!(index.unindexable_count(), 1);
assert_eq!(index.indexed_field_count(), 0);
}
#[test]
fn test_mixed_items_in_allof_detection() {
let (_, index) = build_index(
r#"
title: Process Create
logsource:
product: windows
detection:
selection:
EventType: 'process_create'
CommandLine|contains: 'whoami'
condition: selection
"#,
);
assert_eq!(index.unindexable_count(), 0);
assert!(index.indexed_field_count() > 0);
}
#[test]
fn test_candidates_returns_matching_rule() {
let (_, index) = build_index(
r#"
title: Login Event
logsource:
product: windows
detection:
selection:
EventType: 'login'
condition: selection
"#,
);
let ev = json!({"EventType": "login", "User": "admin"});
let event = Event::from_value(&ev);
let candidates = index.candidates(&event);
assert_eq!(candidates, vec![0]);
}
#[test]
fn test_candidates_skips_non_matching() {
let (_, index) = build_index(
r#"
title: Login Event
logsource:
product: windows
detection:
selection:
EventType: 'login'
condition: selection
"#,
);
let ev = json!({"EventType": "file_create", "User": "admin"});
let event = Event::from_value(&ev);
let candidates = index.candidates(&event);
assert!(candidates.is_empty());
}
#[test]
fn test_unindexable_always_returned() {
let (_, index) = build_index(
r#"
title: Wildcard Rule
logsource:
product: windows
detection:
selection:
CommandLine|contains: 'whoami'
condition: selection
"#,
);
let ev = json!({"SomeField": "whatever"});
let event = Event::from_value(&ev);
let candidates = index.candidates(&event);
assert_eq!(candidates, vec![0]);
}
#[test]
fn test_case_insensitive_lookup() {
let (_, index) = build_index(
r#"
title: Login Event
logsource:
product: windows
detection:
selection:
EventType: 'LOGIN'
condition: selection
"#,
);
let ev = json!({"EventType": "login"});
let event = Event::from_value(&ev);
let candidates = index.candidates(&event);
assert_eq!(candidates, vec![0]);
}
#[test]
fn test_multiple_rules_selective_candidates() {
let yaml = r#"
title: Login
logsource:
product: windows
detection:
selection:
EventType: 'login'
condition: selection
---
title: File Create
logsource:
product: windows
detection:
selection:
EventType: 'file_create'
condition: selection
---
title: Process Create
logsource:
product: windows
detection:
selection:
EventType: 'process_create'
condition: selection
"#;
let (_, index) = build_index(yaml);
assert_eq!(index.rule_count(), 3);
assert_eq!(index.unindexable_count(), 0);
let ev = json!({"EventType": "login"});
let event = Event::from_value(&ev);
let candidates = index.candidates(&event);
assert_eq!(candidates, vec![0]);
let ev2 = json!({"EventType": "process_create"});
let event2 = Event::from_value(&ev2);
let candidates2 = index.candidates(&event2);
assert_eq!(candidates2, vec![2]);
}
#[test]
fn test_or_with_mixed_indexable_unindexable_detections() {
let (_, index) = build_index(
r#"
title: Mixed OR
logsource:
product: windows
detection:
selection_a:
EventType: 'login'
selection_b:
CommandLine|contains: 'whoami'
condition: 1 of selection_*
"#,
);
assert_eq!(index.unindexable_count(), 1);
}
#[test]
fn test_anyof_exact_values_indexed() {
let (_, index) = build_index(
r#"
title: Multi Value
logsource:
product: windows
detection:
selection:
EventType:
- 'login'
- 'logout'
condition: selection
"#,
);
assert_eq!(index.unindexable_count(), 0);
let ev_login = json!({"EventType": "login"});
let ev_logout = json!({"EventType": "logout"});
let ev_other = json!({"EventType": "file_create"});
assert_eq!(index.candidates(&Event::from_value(&ev_login)), vec![0]);
assert_eq!(index.candidates(&Event::from_value(&ev_logout)), vec![0]);
assert!(index.candidates(&Event::from_value(&ev_other)).is_empty());
}
#[test]
fn test_numeric_event_value_lookup() {
let (_, index) = build_index(
r#"
title: Port Check
logsource:
product: windows
detection:
selection:
DestinationPort: '443'
condition: selection
"#,
);
let ev = json!({"DestinationPort": 443});
let event = Event::from_value(&ev);
let candidates = index.candidates(&event);
assert_eq!(candidates, vec![0]);
}
#[test]
fn test_empty_index_returns_all() {
let index = RuleIndex::empty();
assert_eq!(index.rule_count(), 0);
}
#[test]
fn test_dedup_candidates() {
let yaml = r#"
title: Multi Field
logsource:
product: windows
detection:
selection:
EventType: 'login'
Protocol: 'TCP'
condition: selection
"#;
let (_, index) = build_index(yaml);
let ev = json!({"EventType": "login", "Protocol": "TCP"});
let event = Event::from_value(&ev);
let candidates = index.candidates(&event);
assert_eq!(candidates, vec![0]);
}
#[test]
fn test_keyword_detection_unindexable() {
let (_, index) = build_index(
r#"
title: Keyword Rule
logsource:
product: windows
detection:
keywords:
- 'suspicious'
- 'malware'
condition: keywords
"#,
);
assert_eq!(index.unindexable_count(), 1);
}
#[test]
fn test_regex_only_unindexable() {
let (_, index) = build_index(
r#"
title: Regex Rule
logsource:
product: windows
detection:
selection:
CommandLine|re: '(?i)whoami.*'
condition: selection
"#,
);
assert_eq!(index.unindexable_count(), 1);
assert_eq!(index.indexed_field_count(), 0);
}
}