use std::collections::HashMap;
use crate::condition::Expr;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Modifier {
NoCase,
Wide,
Ascii,
Dotall,
FullWord,
}
impl Modifier {
pub(crate) fn from_str(s: &str) -> Option<Self> {
match s {
"nocase" => Some(Self::NoCase),
"wide" => Some(Self::Wide),
"ascii" => Some(Self::Ascii),
"dotall" => Some(Self::Dotall),
"fullword" => Some(Self::FullWord),
_ => None,
}
}
}
#[derive(Debug, Clone)]
pub struct StringRule {
pub identifier: String,
pub pattern: String,
pub modifiers: Vec<Modifier>,
pub is_regex: bool,
}
#[derive(Debug, Clone)]
pub struct SimilarityRule {
pub identifier: String,
pub pattern: String,
pub threshold: f64,
pub cleaner_name: String,
pub chunker_name: String,
pub matcher_name: String,
}
impl Default for SimilarityRule {
fn default() -> Self {
Self {
identifier: String::new(),
pattern: String::new(),
threshold: 0.8,
cleaner_name: "default_cleaning".into(),
chunker_name: "no_chunking".into(),
matcher_name: "sbert".into(),
}
}
}
#[derive(Debug, Clone)]
pub struct PHashRule {
pub identifier: String,
pub file_path: String,
pub threshold: f64,
pub phash_name: String,
}
impl Default for PHashRule {
fn default() -> Self {
Self {
identifier: String::new(),
file_path: String::new(),
threshold: 0.9,
phash_name: "imagehash".into(),
}
}
}
#[derive(Debug, Clone)]
pub struct ClassifierRule {
pub identifier: String,
pub pattern: String,
pub threshold: f64,
pub cleaner_name: String,
pub chunker_name: String,
pub classifier_name: String,
}
impl Default for ClassifierRule {
fn default() -> Self {
Self {
identifier: String::new(),
pattern: String::new(),
threshold: 0.7,
cleaner_name: "default_cleaning".into(),
chunker_name: "no_chunking".into(),
classifier_name: "tuned-sbert".into(),
}
}
}
#[derive(Debug, Clone)]
pub struct LLMRule {
pub identifier: String,
pub pattern: String,
pub llm_name: String,
pub cleaner_name: String,
pub chunker_name: String,
}
impl Default for LLMRule {
fn default() -> Self {
Self {
identifier: String::new(),
pattern: String::new(),
llm_name: "openai-api-compatible".into(),
cleaner_name: "no_op".into(),
chunker_name: "no_chunking".into(),
}
}
}
#[derive(Debug, Clone, Default)]
pub struct Rule {
pub name: String,
pub tags: Vec<String>,
pub meta: HashMap<String, String>,
pub strings: Vec<StringRule>,
pub similarity: Vec<SimilarityRule>,
pub phash: Vec<PHashRule>,
pub classifier: Vec<ClassifierRule>,
pub llm: Vec<LLMRule>,
pub condition: String,
pub compiled_condition: Option<Expr>,
}
#[derive(Debug, Clone)]
pub struct MatchDetail {
pub identifier: String,
pub matched_text: String,
pub start_pos: Option<usize>,
pub end_pos: Option<usize>,
pub score: f64,
pub explanation: String,
}
impl MatchDetail {
pub fn new(identifier: impl Into<String>, matched_text: impl Into<String>) -> Self {
Self {
identifier: identifier.into(),
matched_text: matched_text.into(),
start_pos: None,
end_pos: None,
score: 1.0,
explanation: String::new(),
}
}
pub fn with_position(mut self, start: usize, end: usize) -> Self {
self.start_pos = Some(start);
self.end_pos = Some(end);
self
}
pub fn with_score(mut self, score: f64) -> Self {
self.score = score;
self
}
}
#[derive(Debug, Clone)]
pub struct Match {
pub rule_name: String,
pub tags: Vec<String>,
pub meta: HashMap<String, String>,
pub matched: bool,
pub matched_patterns: HashMap<String, Vec<MatchDetail>>,
}
impl Match {
pub fn no_match(rule: &Rule) -> Self {
Self {
rule_name: rule.name.clone(),
tags: Vec::new(),
meta: HashMap::new(),
matched: false,
matched_patterns: HashMap::new(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_match_detail_default_positions_are_none() {
let detail = MatchDetail::new("$s1", "hello");
assert_eq!(detail.start_pos, None);
assert_eq!(detail.end_pos, None);
}
#[test]
fn test_match_detail_with_position() {
let detail = MatchDetail::new("$s1", "hello").with_position(10, 15);
assert_eq!(detail.start_pos, Some(10));
assert_eq!(detail.end_pos, Some(15));
}
#[test]
fn test_match_detail_with_score() {
let detail = MatchDetail::new("$s1", "hello").with_score(0.95);
assert!((detail.score - 0.95).abs() < f64::EPSILON);
}
#[test]
fn test_match_detail_default_score_is_one() {
let detail = MatchDetail::new("$s1", "hello");
assert!((detail.score - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_no_match_has_empty_patterns() {
let rule = Rule {
name: "test".to_string(),
..Default::default()
};
let m = Match::no_match(&rule);
assert!(!m.matched);
assert!(m.matched_patterns.is_empty());
assert_eq!(m.rule_name, "test");
}
#[test]
fn test_no_match_does_not_clone_tags_or_meta() {
let rule = Rule {
name: "tagged".to_string(),
tags: vec!["security".to_string(), "test".to_string()],
meta: {
let mut m = HashMap::new();
m.insert("author".to_string(), "tester".to_string());
m
},
..Default::default()
};
let m = Match::no_match(&rule);
assert!(!m.matched);
assert!(m.tags.is_empty(), "non-match should have empty tags");
assert!(m.meta.is_empty(), "non-match should have empty meta");
}
#[test]
fn test_modifier_from_str() {
assert_eq!(Modifier::from_str("nocase"), Some(Modifier::NoCase));
assert_eq!(Modifier::from_str("wide"), Some(Modifier::Wide));
assert_eq!(Modifier::from_str("ascii"), Some(Modifier::Ascii));
assert_eq!(Modifier::from_str("dotall"), Some(Modifier::Dotall));
assert_eq!(Modifier::from_str("fullword"), Some(Modifier::FullWord));
assert_eq!(Modifier::from_str("unknown"), None);
}
#[test]
fn test_match_display_matched() {
let m = Match {
rule_name: "test_rule".to_string(),
tags: vec![],
meta: HashMap::new(),
matched: true,
matched_patterns: {
let mut mp = HashMap::new();
mp.insert(
"$s1".to_string(),
vec![MatchDetail::new("$s1", "x")],
);
mp
},
};
let display = format!("{}", m);
assert!(display.contains("matched=true"));
assert!(display.contains("patterns=1"));
}
#[test]
fn test_match_display_not_matched() {
let rule = Rule {
name: "test_rule".to_string(),
..Default::default()
};
let m = Match::no_match(&rule);
let display = format!("{}", m);
assert!(display.contains("matched=false"));
}
}
impl std::fmt::Display for Match {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if !self.matched {
write!(f, "Match(rule='{}', matched=false)", self.rule_name)
} else {
let count: usize = self.matched_patterns.values().map(|v| v.len()).sum();
write!(
f,
"Match(rule='{}', matched=true, patterns={})",
self.rule_name, count
)
}
}
}