Skip to main content

syara_x/
models.rs

1use std::collections::HashMap;
2
3use crate::condition::Expr;
4
5/// Modifiers applicable to a string pattern.
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub enum Modifier {
8    NoCase,
9    Wide,
10    Ascii,
11    Dotall,
12    FullWord,
13}
14
15impl Modifier {
16    pub(crate) fn from_str(s: &str) -> Option<Self> {
17        match s {
18            "nocase" => Some(Self::NoCase),
19            "wide" => Some(Self::Wide),
20            "ascii" => Some(Self::Ascii),
21            "dotall" => Some(Self::Dotall),
22            "fullword" => Some(Self::FullWord),
23            _ => None,
24        }
25    }
26}
27
28/// Traditional string or regex pattern.
29#[derive(Debug, Clone)]
30pub struct StringRule {
31    pub identifier: String,
32    pub pattern: String,
33    pub modifiers: Vec<Modifier>,
34    pub is_regex: bool,
35}
36
37/// Semantic similarity pattern.
38#[derive(Debug, Clone)]
39pub struct SimilarityRule {
40    pub identifier: String,
41    pub pattern: String,
42    pub threshold: f64,
43    pub cleaner_name: String,
44    pub chunker_name: String,
45    pub matcher_name: String,
46}
47
48impl Default for SimilarityRule {
49    fn default() -> Self {
50        Self {
51            identifier: String::new(),
52            pattern: String::new(),
53            threshold: 0.8,
54            cleaner_name: "default_cleaning".into(),
55            chunker_name: "no_chunking".into(),
56            matcher_name: "sbert".into(),
57        }
58    }
59}
60
61/// Perceptual hash pattern for binary files (images, audio, video).
62#[derive(Debug, Clone)]
63pub struct PHashRule {
64    pub identifier: String,
65    pub file_path: String,
66    pub threshold: f64,
67    pub phash_name: String,
68}
69
70impl Default for PHashRule {
71    fn default() -> Self {
72        Self {
73            identifier: String::new(),
74            file_path: String::new(),
75            threshold: 0.9,
76            phash_name: "imagehash".into(),
77        }
78    }
79}
80
81/// ML classifier-based pattern.
82#[derive(Debug, Clone)]
83pub struct ClassifierRule {
84    pub identifier: String,
85    pub pattern: String,
86    pub threshold: f64,
87    pub cleaner_name: String,
88    pub chunker_name: String,
89    pub classifier_name: String,
90}
91
92impl Default for ClassifierRule {
93    fn default() -> Self {
94        Self {
95            identifier: String::new(),
96            pattern: String::new(),
97            threshold: 0.7,
98            cleaner_name: "default_cleaning".into(),
99            chunker_name: "no_chunking".into(),
100            classifier_name: "tuned-sbert".into(),
101        }
102    }
103}
104
105/// LLM-based evaluation pattern.
106#[derive(Debug, Clone)]
107pub struct LLMRule {
108    pub identifier: String,
109    pub pattern: String,
110    pub llm_name: String,
111    pub cleaner_name: String,
112    pub chunker_name: String,
113}
114
115impl Default for LLMRule {
116    fn default() -> Self {
117        Self {
118            identifier: String::new(),
119            pattern: String::new(),
120            llm_name: "openai-api-compatible".into(),
121            cleaner_name: "no_op".into(),
122            chunker_name: "no_chunking".into(),
123        }
124    }
125}
126
127/// A complete parsed rule.
128#[derive(Debug, Clone, Default)]
129pub struct Rule {
130    pub name: String,
131    pub tags: Vec<String>,
132    pub meta: HashMap<String, String>,
133    pub strings: Vec<StringRule>,
134    pub similarity: Vec<SimilarityRule>,
135    pub phash: Vec<PHashRule>,
136    pub classifier: Vec<ClassifierRule>,
137    pub llm: Vec<LLMRule>,
138    pub condition: String,
139    /// Pre-compiled condition AST, populated by the compiler.
140    pub compiled_condition: Option<Expr>,
141}
142
143/// Details of a single matched pattern within a rule.
144#[derive(Debug, Clone)]
145pub struct MatchDetail {
146    pub identifier: String,
147    pub matched_text: String,
148    pub start_pos: Option<usize>,
149    pub end_pos: Option<usize>,
150    pub score: f64,
151    pub explanation: String,
152}
153
154impl MatchDetail {
155    pub fn new(identifier: impl Into<String>, matched_text: impl Into<String>) -> Self {
156        Self {
157            identifier: identifier.into(),
158            matched_text: matched_text.into(),
159            start_pos: None,
160            end_pos: None,
161            score: 1.0,
162            explanation: String::new(),
163        }
164    }
165
166    pub fn with_position(mut self, start: usize, end: usize) -> Self {
167        self.start_pos = Some(start);
168        self.end_pos = Some(end);
169        self
170    }
171
172    pub fn with_score(mut self, score: f64) -> Self {
173        self.score = score;
174        self
175    }
176}
177
178/// Result of evaluating a single rule against input.
179#[derive(Debug, Clone)]
180pub struct Match {
181    pub rule_name: String,
182    pub tags: Vec<String>,
183    pub meta: HashMap<String, String>,
184    pub matched: bool,
185    pub matched_patterns: HashMap<String, Vec<MatchDetail>>,
186}
187
188impl Match {
189    /// BUG-024: non-matching results carry only the rule name — no cloned
190    /// tags/meta, since consumers filter on `matched` first.
191    pub fn no_match(rule: &Rule) -> Self {
192        Self {
193            rule_name: rule.name.clone(),
194            tags: Vec::new(),
195            meta: HashMap::new(),
196            matched: false,
197            matched_patterns: HashMap::new(),
198        }
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205
206    // ── BUG-017: positions use Option<usize> instead of i64 sentinels ───
207
208    #[test]
209    fn test_match_detail_default_positions_are_none() {
210        let detail = MatchDetail::new("$s1", "hello");
211        assert_eq!(detail.start_pos, None);
212        assert_eq!(detail.end_pos, None);
213    }
214
215    #[test]
216    fn test_match_detail_with_position() {
217        let detail = MatchDetail::new("$s1", "hello").with_position(10, 15);
218        assert_eq!(detail.start_pos, Some(10));
219        assert_eq!(detail.end_pos, Some(15));
220    }
221
222    #[test]
223    fn test_match_detail_with_score() {
224        let detail = MatchDetail::new("$s1", "hello").with_score(0.95);
225        assert!((detail.score - 0.95).abs() < f64::EPSILON);
226    }
227
228    #[test]
229    fn test_match_detail_default_score_is_one() {
230        let detail = MatchDetail::new("$s1", "hello");
231        assert!((detail.score - 1.0).abs() < f64::EPSILON);
232    }
233
234    #[test]
235    fn test_no_match_has_empty_patterns() {
236        let rule = Rule {
237            name: "test".to_string(),
238            ..Default::default()
239        };
240        let m = Match::no_match(&rule);
241        assert!(!m.matched);
242        assert!(m.matched_patterns.is_empty());
243        assert_eq!(m.rule_name, "test");
244    }
245
246    // ── BUG-024: no_match uses empty vecs, not clones ─────────────────────
247
248    #[test]
249    fn test_no_match_does_not_clone_tags_or_meta() {
250        let rule = Rule {
251            name: "tagged".to_string(),
252            tags: vec!["security".to_string(), "test".to_string()],
253            meta: {
254                let mut m = HashMap::new();
255                m.insert("author".to_string(), "tester".to_string());
256                m
257            },
258            ..Default::default()
259        };
260        let m = Match::no_match(&rule);
261        assert!(!m.matched);
262        assert!(m.tags.is_empty(), "non-match should have empty tags");
263        assert!(m.meta.is_empty(), "non-match should have empty meta");
264    }
265
266    #[test]
267    fn test_modifier_from_str() {
268        assert_eq!(Modifier::from_str("nocase"), Some(Modifier::NoCase));
269        assert_eq!(Modifier::from_str("wide"), Some(Modifier::Wide));
270        assert_eq!(Modifier::from_str("ascii"), Some(Modifier::Ascii));
271        assert_eq!(Modifier::from_str("dotall"), Some(Modifier::Dotall));
272        assert_eq!(Modifier::from_str("fullword"), Some(Modifier::FullWord));
273        assert_eq!(Modifier::from_str("unknown"), None);
274    }
275
276    #[test]
277    fn test_match_display_matched() {
278        let m = Match {
279            rule_name: "test_rule".to_string(),
280            tags: vec![],
281            meta: HashMap::new(),
282            matched: true,
283            matched_patterns: {
284                let mut mp = HashMap::new();
285                mp.insert(
286                    "$s1".to_string(),
287                    vec![MatchDetail::new("$s1", "x")],
288                );
289                mp
290            },
291        };
292        let display = format!("{}", m);
293        assert!(display.contains("matched=true"));
294        assert!(display.contains("patterns=1"));
295    }
296
297    #[test]
298    fn test_match_display_not_matched() {
299        let rule = Rule {
300            name: "test_rule".to_string(),
301            ..Default::default()
302        };
303        let m = Match::no_match(&rule);
304        let display = format!("{}", m);
305        assert!(display.contains("matched=false"));
306    }
307}
308
309impl std::fmt::Display for Match {
310    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
311        if !self.matched {
312            write!(f, "Match(rule='{}', matched=false)", self.rule_name)
313        } else {
314            let count: usize = self.matched_patterns.values().map(|v| v.len()).sum();
315            write!(
316                f,
317                "Match(rule='{}', matched=true, patterns={})",
318                self.rule_name, count
319            )
320        }
321    }
322}