use std::collections::HashSet;
use crate::compiled_rules::CompiledRules;
use crate::config::Registry;
use crate::condition;
use crate::error::SyaraError;
use crate::models::Rule;
pub struct Compiler;
impl Compiler {
pub fn compile(mut rules: Vec<Rule>, registry: Registry) -> Result<CompiledRules, SyaraError> {
for rule in &mut rules {
Self::validate_and_compile(rule)?;
}
Ok(CompiledRules::new(rules, registry))
}
fn validate_and_compile(rule: &mut Rule) -> Result<(), SyaraError> {
let mut declared: HashSet<String> = HashSet::new();
for r in &rule.strings {
if !declared.insert(r.identifier.clone()) {
return Err(SyaraError::DuplicateIdentifier(
r.identifier.clone(),
rule.name.clone(),
));
}
}
for r in &rule.similarity {
if !declared.insert(r.identifier.clone()) {
return Err(SyaraError::DuplicateIdentifier(
r.identifier.clone(),
rule.name.clone(),
));
}
}
for r in &rule.phash {
if !declared.insert(r.identifier.clone()) {
return Err(SyaraError::DuplicateIdentifier(
r.identifier.clone(),
rule.name.clone(),
));
}
}
for r in &rule.classifier {
if !declared.insert(r.identifier.clone()) {
return Err(SyaraError::DuplicateIdentifier(
r.identifier.clone(),
rule.name.clone(),
));
}
}
for r in &rule.llm {
if !declared.insert(r.identifier.clone()) {
return Err(SyaraError::DuplicateIdentifier(
r.identifier.clone(),
rule.name.clone(),
));
}
}
if !rule.condition.is_empty() {
let expr = condition::parse(&rule.condition).map_err(|e| {
SyaraError::ConditionParse(format!("rule '{}': {}", rule.name, e))
})?;
let id_re = regex::Regex::new(r"[#$]\w+").unwrap();
let cond = &rule.condition;
for m in id_re.find_iter(cond) {
if cond[m.end()..].starts_with('*') {
continue;
}
let id = m.as_str();
let normalized = if let Some(rest) = id.strip_prefix('#') {
format!("${}", rest)
} else {
id.to_owned()
};
if !declared.contains(&normalized) {
return Err(SyaraError::UndefinedIdentifier {
identifier: id.to_owned(),
rule: rule.name.clone(),
});
}
}
rule.compiled_condition = Some(expr);
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::models::{Rule, StringRule};
use std::collections::HashMap;
fn rule_with(strings: Vec<StringRule>, condition: &str) -> Rule {
Rule {
name: "r".to_owned(),
tags: vec![],
meta: HashMap::new(),
strings,
similarity: vec![],
phash: vec![],
classifier: vec![],
llm: vec![],
condition: condition.to_owned(),
compiled_condition: None,
}
}
fn string_rule(id: &str, pattern: &str) -> StringRule {
StringRule {
identifier: id.to_owned(),
pattern: pattern.to_owned(),
modifiers: vec![],
is_regex: false,
}
}
#[test]
fn test_undefined_count_identifier_errors() {
let mut rule = rule_with(vec![], "#s_missing >= 1");
let err = Compiler::validate_and_compile(&mut rule).unwrap_err();
match err {
SyaraError::UndefinedIdentifier { identifier, .. } => {
assert_eq!(identifier, "#s_missing");
}
other => panic!("expected UndefinedIdentifier, got {:?}", other),
}
}
#[test]
fn test_defined_count_identifier_ok() {
let mut rule = rule_with(vec![string_rule("$s1", "hello")], "#s1 >= 1");
let result = Compiler::validate_and_compile(&mut rule);
assert!(result.is_ok(), "expected Ok, got {:?}", result);
assert!(rule.compiled_condition.is_some());
}
#[test]
fn test_undefined_dollar_identifier_still_errors() {
let mut rule = rule_with(vec![], "$s_missing");
let err = Compiler::validate_and_compile(&mut rule).unwrap_err();
assert!(matches!(err, SyaraError::UndefinedIdentifier { .. }));
}
}