use std::collections::HashSet;
use crate::compiled_rules::CompiledRules;
use crate::config::Registry;
use crate::condition;
use crate::error::SyaraError;
use crate::models::Rule;
pub struct Compiler;
impl Compiler {
pub fn compile(mut rules: Vec<Rule>, registry: Registry) -> Result<CompiledRules, SyaraError> {
for rule in &mut rules {
Self::validate_and_compile(rule)?;
}
Ok(CompiledRules::new(rules, registry))
}
fn validate_and_compile(rule: &mut Rule) -> Result<(), SyaraError> {
let mut declared: HashSet<&str> = HashSet::new();
for r in &rule.strings {
if !declared.insert(r.identifier.as_str()) {
return Err(SyaraError::DuplicateIdentifier(
r.identifier.clone(),
rule.name.clone(),
));
}
}
for r in &rule.similarity {
if !declared.insert(r.identifier.as_str()) {
return Err(SyaraError::DuplicateIdentifier(
r.identifier.clone(),
rule.name.clone(),
));
}
}
for r in &rule.phash {
if !declared.insert(r.identifier.as_str()) {
return Err(SyaraError::DuplicateIdentifier(
r.identifier.clone(),
rule.name.clone(),
));
}
}
for r in &rule.classifier {
if !declared.insert(r.identifier.as_str()) {
return Err(SyaraError::DuplicateIdentifier(
r.identifier.clone(),
rule.name.clone(),
));
}
}
for r in &rule.llm {
if !declared.insert(r.identifier.as_str()) {
return Err(SyaraError::DuplicateIdentifier(
r.identifier.clone(),
rule.name.clone(),
));
}
}
if !rule.condition.is_empty() {
let expr = condition::parse(&rule.condition).map_err(|e| {
SyaraError::ConditionParse(format!("rule '{}': {}", rule.name, e))
})?;
let id_re = regex::Regex::new(r"\$\w+").unwrap();
let cond = &rule.condition;
for m in id_re.find_iter(cond) {
if cond[m.end()..].starts_with('*') {
continue;
}
let id = m.as_str();
if !declared.contains(id) {
return Err(SyaraError::UndefinedIdentifier {
identifier: id.to_owned(),
rule: rule.name.clone(),
});
}
}
rule.compiled_condition = Some(expr);
}
Ok(())
}
}