1use std::collections::HashMap;
2
3use crate::condition::Expr;
4
5#[derive(Debug, Clone, PartialEq, Eq)]
7pub enum Modifier {
8 NoCase,
9 Wide,
10 Ascii,
11 Dotall,
12 FullWord,
13}
14
15impl Modifier {
16 pub(crate) fn from_str(s: &str) -> Option<Self> {
17 match s {
18 "nocase" => Some(Self::NoCase),
19 "wide" => Some(Self::Wide),
20 "ascii" => Some(Self::Ascii),
21 "dotall" => Some(Self::Dotall),
22 "fullword" => Some(Self::FullWord),
23 _ => None,
24 }
25 }
26}
27
28#[derive(Debug, Clone)]
30pub struct StringRule {
31 pub identifier: String,
32 pub pattern: String,
33 pub modifiers: Vec<Modifier>,
34 pub is_regex: bool,
35}
36
37#[derive(Debug, Clone)]
39pub struct SimilarityRule {
40 pub identifier: String,
41 pub pattern: String,
42 pub threshold: f64,
43 pub cleaner_name: String,
44 pub chunker_name: String,
45 pub matcher_name: String,
46}
47
48impl Default for SimilarityRule {
49 fn default() -> Self {
50 Self {
51 identifier: String::new(),
52 pattern: String::new(),
53 threshold: 0.8,
54 cleaner_name: "default_cleaning".into(),
55 chunker_name: "no_chunking".into(),
56 matcher_name: "sbert".into(),
57 }
58 }
59}
60
61#[derive(Debug, Clone)]
63pub struct PHashRule {
64 pub identifier: String,
65 pub file_path: String,
66 pub threshold: f64,
67 pub phash_name: String,
68}
69
70impl Default for PHashRule {
71 fn default() -> Self {
72 Self {
73 identifier: String::new(),
74 file_path: String::new(),
75 threshold: 0.9,
76 phash_name: "imagehash".into(),
77 }
78 }
79}
80
81#[derive(Debug, Clone)]
83pub struct ClassifierRule {
84 pub identifier: String,
85 pub pattern: String,
86 pub threshold: f64,
87 pub cleaner_name: String,
88 pub chunker_name: String,
89 pub classifier_name: String,
90}
91
92impl Default for ClassifierRule {
93 fn default() -> Self {
94 Self {
95 identifier: String::new(),
96 pattern: String::new(),
97 threshold: 0.7,
98 cleaner_name: "default_cleaning".into(),
99 chunker_name: "no_chunking".into(),
100 classifier_name: "tuned-sbert".into(),
101 }
102 }
103}
104
105#[derive(Debug, Clone)]
107pub struct LLMRule {
108 pub identifier: String,
109 pub pattern: String,
110 pub llm_name: String,
111 pub cleaner_name: String,
112 pub chunker_name: String,
113}
114
115impl Default for LLMRule {
116 fn default() -> Self {
117 Self {
118 identifier: String::new(),
119 pattern: String::new(),
120 llm_name: "openai-api-compatible".into(),
121 cleaner_name: "no_op".into(),
122 chunker_name: "no_chunking".into(),
123 }
124 }
125}
126
127#[derive(Debug, Clone, Default)]
129pub struct Rule {
130 pub name: String,
131 pub tags: Vec<String>,
132 pub meta: HashMap<String, String>,
133 pub strings: Vec<StringRule>,
134 pub similarity: Vec<SimilarityRule>,
135 pub phash: Vec<PHashRule>,
136 pub classifier: Vec<ClassifierRule>,
137 pub llm: Vec<LLMRule>,
138 pub condition: String,
139 pub compiled_condition: Option<Expr>,
141}
142
143#[derive(Debug, Clone)]
145pub struct MatchDetail {
146 pub identifier: String,
147 pub matched_text: String,
148 pub start_pos: Option<usize>,
149 pub end_pos: Option<usize>,
150 pub score: f64,
151 pub explanation: String,
152}
153
154impl MatchDetail {
155 pub fn new(identifier: impl Into<String>, matched_text: impl Into<String>) -> Self {
156 Self {
157 identifier: identifier.into(),
158 matched_text: matched_text.into(),
159 start_pos: None,
160 end_pos: None,
161 score: 1.0,
162 explanation: String::new(),
163 }
164 }
165
166 pub fn with_position(mut self, start: usize, end: usize) -> Self {
167 self.start_pos = Some(start);
168 self.end_pos = Some(end);
169 self
170 }
171
172 pub fn with_score(mut self, score: f64) -> Self {
173 self.score = score;
174 self
175 }
176}
177
178#[derive(Debug, Clone)]
180pub struct Match {
181 pub rule_name: String,
182 pub tags: Vec<String>,
183 pub meta: HashMap<String, String>,
184 pub matched: bool,
185 pub matched_patterns: HashMap<String, Vec<MatchDetail>>,
186}
187
188impl Match {
189 pub fn no_match(rule: &Rule) -> Self {
192 Self {
193 rule_name: rule.name.clone(),
194 tags: Vec::new(),
195 meta: HashMap::new(),
196 matched: false,
197 matched_patterns: HashMap::new(),
198 }
199 }
200}
201
202#[cfg(test)]
203mod tests {
204 use super::*;
205
206 #[test]
209 fn test_match_detail_default_positions_are_none() {
210 let detail = MatchDetail::new("$s1", "hello");
211 assert_eq!(detail.start_pos, None);
212 assert_eq!(detail.end_pos, None);
213 }
214
215 #[test]
216 fn test_match_detail_with_position() {
217 let detail = MatchDetail::new("$s1", "hello").with_position(10, 15);
218 assert_eq!(detail.start_pos, Some(10));
219 assert_eq!(detail.end_pos, Some(15));
220 }
221
222 #[test]
223 fn test_match_detail_with_score() {
224 let detail = MatchDetail::new("$s1", "hello").with_score(0.95);
225 assert!((detail.score - 0.95).abs() < f64::EPSILON);
226 }
227
228 #[test]
229 fn test_match_detail_default_score_is_one() {
230 let detail = MatchDetail::new("$s1", "hello");
231 assert!((detail.score - 1.0).abs() < f64::EPSILON);
232 }
233
234 #[test]
235 fn test_no_match_has_empty_patterns() {
236 let rule = Rule {
237 name: "test".to_string(),
238 ..Default::default()
239 };
240 let m = Match::no_match(&rule);
241 assert!(!m.matched);
242 assert!(m.matched_patterns.is_empty());
243 assert_eq!(m.rule_name, "test");
244 }
245
246 #[test]
249 fn test_no_match_does_not_clone_tags_or_meta() {
250 let rule = Rule {
251 name: "tagged".to_string(),
252 tags: vec!["security".to_string(), "test".to_string()],
253 meta: {
254 let mut m = HashMap::new();
255 m.insert("author".to_string(), "tester".to_string());
256 m
257 },
258 ..Default::default()
259 };
260 let m = Match::no_match(&rule);
261 assert!(!m.matched);
262 assert!(m.tags.is_empty(), "non-match should have empty tags");
263 assert!(m.meta.is_empty(), "non-match should have empty meta");
264 }
265
266 #[test]
267 fn test_modifier_from_str() {
268 assert_eq!(Modifier::from_str("nocase"), Some(Modifier::NoCase));
269 assert_eq!(Modifier::from_str("wide"), Some(Modifier::Wide));
270 assert_eq!(Modifier::from_str("ascii"), Some(Modifier::Ascii));
271 assert_eq!(Modifier::from_str("dotall"), Some(Modifier::Dotall));
272 assert_eq!(Modifier::from_str("fullword"), Some(Modifier::FullWord));
273 assert_eq!(Modifier::from_str("unknown"), None);
274 }
275
276 #[test]
277 fn test_match_display_matched() {
278 let m = Match {
279 rule_name: "test_rule".to_string(),
280 tags: vec![],
281 meta: HashMap::new(),
282 matched: true,
283 matched_patterns: {
284 let mut mp = HashMap::new();
285 mp.insert(
286 "$s1".to_string(),
287 vec![MatchDetail::new("$s1", "x")],
288 );
289 mp
290 },
291 };
292 let display = format!("{}", m);
293 assert!(display.contains("matched=true"));
294 assert!(display.contains("patterns=1"));
295 }
296
297 #[test]
298 fn test_match_display_not_matched() {
299 let rule = Rule {
300 name: "test_rule".to_string(),
301 ..Default::default()
302 };
303 let m = Match::no_match(&rule);
304 let display = format!("{}", m);
305 assert!(display.contains("matched=false"));
306 }
307}
308
309impl std::fmt::Display for Match {
310 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
311 if !self.matched {
312 write!(f, "Match(rule='{}', matched=false)", self.rule_name)
313 } else {
314 let count: usize = self.matched_patterns.values().map(|v| v.len()).sum();
315 write!(
316 f,
317 "Match(rule='{}', matched=true, patterns={})",
318 self.rule_name, count
319 )
320 }
321 }
322}