#[cfg(feature = "toml-rules")]
use crate::core::index::CompiledRuleIndex;
#[cfg(feature = "toml-rules")]
use crate::core::types::{CompiledPattern, PatternMapping, RuleEntry};
#[cfg(feature = "toml-rules")]
use crate::error::{Error, Result};
#[cfg(feature = "toml-rules")]
use crate::vm::bytecode::{Instruction, Program};
#[cfg(feature = "toml-rules")]
pub fn compile_toml_rules(rule_file: &rulekit::RuleFile) -> Result<CompiledRuleIndex> {
let rules = rule_file.rules();
if rules.is_empty() {
return Err(Error::Pattern {
message: "no rules to compile".to_string(),
});
}
let mut rule_entries = Vec::with_capacity(rules.len());
let mut compiled_patterns = Vec::with_capacity(rules.len());
let mut programs = Vec::with_capacity(rules.len());
let mut pattern_to_rules = Vec::new();
let mut rule_list = Vec::new();
let mut string_local_ids = Vec::new();
let mut builder = warpstate::PatternSet::builder();
for (rule_idx, rule) in rules.iter().enumerate() {
let rule_id = rule_idx as u32;
let pattern_id = rule_idx as u32;
let is_regex = matches!(rule.pattern_type, rulekit::PatternType::Regex);
if is_regex {
builder = builder.regex(&rule.value);
} else {
builder = builder.literal(&rule.value);
}
let string_id = format!("$s{rule_idx}");
rule_entries.push(RuleEntry {
name: rule.id.clone(),
tags: Vec::new(),
strings: vec![string_id.clone()],
});
compiled_patterns.push(CompiledPattern {
pattern_id,
rule_id,
string_id: 0,
identifier: string_id,
source: rule.value.clone(),
is_regex,
});
programs.push(Program {
instructions: vec![
Instruction::new(crate::vm::bytecode::Opcode::PushStringCount, 0),
Instruction::new(crate::vm::bytecode::Opcode::PushImmediate, 1),
Instruction::new(crate::vm::bytecode::Opcode::Gte, 0),
Instruction::new(crate::vm::bytecode::Opcode::Halt, 0),
],
});
let rule_list_start = rule_list.len() as u32;
pattern_to_rules.push([rule_list_start, 1]);
rule_list.push(rule_id);
string_local_ids.push(0u32);
}
let pattern_set = builder.build().map_err(|e| Error::Pattern {
message: format!("failed to compile pattern set: {e}"),
})?;
let mapping = PatternMapping {
pattern_to_rules,
rule_list,
string_local_ids,
};
Ok(CompiledRuleIndex::build(
rule_entries,
compiled_patterns,
mapping,
programs,
pattern_set,
))
}
#[cfg(feature = "toml-rules")]
pub fn compile_rules_dir(dir: impl AsRef<std::path::Path>) -> Result<CompiledRuleIndex> {
let dir = dir.as_ref();
let mut rule_files = Vec::new();
let entries = std::fs::read_dir(dir).map_err(|e| Error::Pattern {
message: format!("failed to read rules directory {}: {e}", dir.display()),
})?;
for entry in entries {
let entry = entry.map_err(|e| Error::Pattern {
message: format!("failed to read directory entry: {e}"),
})?;
let path = entry.path();
if path.extension().is_some_and(|ext| ext == "toml") {
let rule_file = rulekit::RuleFile::parse_path(&path).map_err(|e| Error::Pattern {
message: format!("failed to parse {}: {e}", path.display()),
})?;
rule_files.push(rule_file);
}
}
if rule_files.is_empty() {
return Err(Error::Pattern {
message: format!("no .toml rule files found in {}", dir.display()),
});
}
let mut all_rule_entries = Vec::new();
let mut all_compiled = Vec::new();
let mut all_programs = Vec::new();
let mut all_p2r = Vec::new();
let mut all_rule_list = Vec::new();
let mut all_string_local_ids = Vec::new();
let mut builder = warpstate::PatternSet::builder();
let mut idx = 0u32;
for rule_file in &rule_files {
for rule in rule_file.rules() {
let is_regex = matches!(rule.pattern_type, rulekit::PatternType::Regex);
if is_regex {
builder = builder.regex(&rule.value);
} else {
builder = builder.literal(&rule.value);
}
all_rule_entries.push(RuleEntry {
name: rule.id.clone(),
tags: Vec::new(),
strings: vec![format!("$s{idx}")],
});
all_compiled.push(CompiledPattern {
pattern_id: idx,
rule_id: idx,
string_id: 0,
identifier: format!("$s{idx}"),
source: rule.value.clone(),
is_regex,
});
all_programs.push(Program {
instructions: vec![
Instruction::new(crate::vm::bytecode::Opcode::PushStringCount, 0),
Instruction::new(crate::vm::bytecode::Opcode::PushImmediate, 1),
Instruction::new(crate::vm::bytecode::Opcode::Gte, 0),
Instruction::new(crate::vm::bytecode::Opcode::Halt, 0),
],
});
let start = all_rule_list.len() as u32;
all_p2r.push([start, 1]);
all_rule_list.push(idx);
all_string_local_ids.push(0u32);
idx += 1;
}
}
let pattern_set = builder.build().map_err(|e| Error::Pattern {
message: format!("failed to compile pattern set: {e}"),
})?;
Ok(CompiledRuleIndex::build(
all_rule_entries,
all_compiled,
PatternMapping {
pattern_to_rules: all_p2r,
rule_list: all_rule_list,
string_local_ids: all_string_local_ids,
},
all_programs,
pattern_set,
))
}
#[cfg(all(test, feature = "toml-rules"))]
mod tests {
use super::*;
fn sample_toml() -> &'static str {
r#"
[[rules]]
id = "cred.literal"
name = "Credential literal"
severity = "High"
pattern_type = "Literal"
value = "password"
[rules.metadata]
description = "Detects a literal password"
[[rules]]
id = "eval.call"
name = "Eval call"
severity = "Critical"
pattern_type = "Literal"
value = "eval("
"#
}
#[test]
fn compile_toml_rules_produces_valid_index() {
let rule_file = rulekit::RuleFile::parse_str(sample_toml()).unwrap();
let index = compile_toml_rules(&rule_file).unwrap();
assert_eq!(index.rule_count(), 2);
}
#[test]
fn compiled_index_scans_cpu_correctly() {
let rule_file = rulekit::RuleFile::parse_str(sample_toml()).unwrap();
let index = compile_toml_rules(&rule_file).unwrap();
let matches = index.scan_cpu(b"the password is eval(secret)").unwrap();
assert_eq!(matches.len(), 2, "should match both rules");
let names: Vec<&str> = matches.iter().map(|m| m.rule_name.as_str()).collect();
assert!(names.contains(&"cred.literal"));
assert!(names.contains(&"eval.call"));
}
#[test]
fn compiled_index_no_false_positives() {
let rule_file = rulekit::RuleFile::parse_str(sample_toml()).unwrap();
let index = compile_toml_rules(&rule_file).unwrap();
let matches = index.scan_cpu(b"nothing suspicious here").unwrap();
assert!(matches.is_empty(), "should find no matches in clean input");
}
#[test]
fn compiled_index_partial_match() {
let rule_file = rulekit::RuleFile::parse_str(sample_toml()).unwrap();
let index = compile_toml_rules(&rule_file).unwrap();
let matches = index.scan_cpu(b"my password is safe").unwrap();
assert_eq!(matches.len(), 1, "should match only password rule");
assert_eq!(matches[0].rule_name, "cred.literal");
}
#[test]
fn empty_rules_rejected() {
let rule_file = rulekit::RuleFile::parse_str("[[rules]]\nid = \"x\"\nname = \"X\"\npattern_type = \"Literal\"\nvalue = \"abc\"\n").unwrap();
let index = compile_toml_rules(&rule_file);
assert!(index.is_ok(), "single rule should compile");
}
#[test]
fn regex_rules_compile_and_match() {
let toml = r#"
[[rules]]
id = "hex.detect"
name = "Hex string"
severity = "Medium"
pattern_type = "Regex"
value = "[0-9a-f]{32}"
"#;
let rule_file = rulekit::RuleFile::parse_str(toml).unwrap();
let index = compile_toml_rules(&rule_file).unwrap();
let matches = index.scan_cpu(b"hash: 0123456789abcdef0123456789abcdef done").unwrap();
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].rule_name, "hex.detect");
}
#[test]
fn compile_rules_dir_finds_toml_files() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("test.toml"),
sample_toml(),
).unwrap();
let index = compile_rules_dir(dir.path()).unwrap();
assert_eq!(index.rule_count(), 2);
let matches = index.scan_cpu(b"eval(password)").unwrap();
assert_eq!(matches.len(), 2);
}
#[test]
fn compile_rules_dir_empty_directory_errors() {
let dir = tempfile::tempdir().unwrap();
let result = compile_rules_dir(dir.path());
assert!(result.is_err());
}
}