1use std::collections::HashMap;
6use std::path::Path;
7
8use crate::cache::TextCache;
9use crate::condition;
10use crate::config::Registry;
11use crate::engine::string_matcher::StringMatcher;
12#[cfg(any(feature = "sbert", feature = "phash", feature = "classifier", feature = "llm", feature = "burn-llm"))]
13use crate::error::SyaraError;
14use crate::models::{Match, MatchDetail, Rule};
15
16pub struct CompiledRules {
17 pub(crate) rules: Vec<Rule>,
18 pub(crate) registry: Registry,
19}
20
21impl CompiledRules {
22 pub fn rule_count(&self) -> usize {
24 self.rules.len()
25 }
26
27 pub fn rule_names(&self) -> impl Iterator<Item = &str> {
29 self.rules.iter().map(|r| r.name.as_str())
30 }
31}
32
33impl CompiledRules {
34 pub(crate) fn new(rules: Vec<Rule>, registry: Registry) -> Self {
35 Self { rules, registry }
36 }
37
38 pub fn scan(&self, text: &str) -> Vec<Match> {
41 let mut cache = TextCache::new();
42 let mut string_matcher = StringMatcher::new();
43 let mut results = Vec::with_capacity(self.rules.len());
44
45 for rule in &self.rules {
46 let m = self.execute_rule(rule, text, None, &mut cache, &mut string_matcher);
47 results.push(m);
48 }
49
50 cache.clear();
51 #[cfg(any(feature = "llm", feature = "burn-llm"))]
52 self.registry.clear_llm_caches();
53 results
54 }
55
56 pub fn scan_file(&self, path: &Path) -> Vec<Match> {
61 let mut cache = TextCache::new();
62 let mut string_matcher = StringMatcher::new();
63 let mut results = Vec::new();
64
65 let text = std::fs::read(path)
67 .map(|bytes| String::from_utf8_lossy(&bytes).into_owned())
68 .unwrap_or_default();
69
70 for rule in &self.rules {
71 if !rule.phash.is_empty() {
72 let m =
73 self.execute_rule(rule, &text, Some(path), &mut cache, &mut string_matcher);
74 results.push(m);
75 }
76 }
77
78 cache.clear();
79 #[cfg(any(feature = "llm", feature = "burn-llm"))]
80 self.registry.clear_llm_caches();
81 results
82 }
83
84 #[allow(unused_variables)]
85 fn execute_rule(
86 &self,
87 rule: &Rule,
88 text: &str,
89 file_path: Option<&Path>,
90 cache: &mut TextCache,
91 string_matcher: &mut StringMatcher,
92 ) -> Match {
93 let mut pattern_matches: HashMap<String, Vec<MatchDetail>> = HashMap::new();
96 for r in &rule.strings {
97 pattern_matches.insert(r.identifier.clone(), vec![]);
98 }
99 for r in &rule.similarity {
100 pattern_matches.insert(r.identifier.clone(), vec![]);
101 }
102 for r in &rule.phash {
103 pattern_matches.insert(r.identifier.clone(), vec![]);
104 }
105 for r in &rule.classifier {
106 pattern_matches.insert(r.identifier.clone(), vec![]);
107 }
108 for r in &rule.llm {
109 pattern_matches.insert(r.identifier.clone(), vec![]);
110 }
111
112 for string_rule in &rule.strings {
114 match string_matcher.match_rule(string_rule, text) {
115 Ok(hits) if !hits.is_empty() => {
116 pattern_matches.insert(string_rule.identifier.clone(), hits);
117 }
118 _ => {}
119 }
120 }
121
122 #[cfg(feature = "sbert")]
124 for sim_rule in &rule.similarity {
125 if let Ok(hits) = self.execute_similarity(sim_rule, text, cache) {
126 if !hits.is_empty() {
127 pattern_matches.insert(sim_rule.identifier.clone(), hits);
128 }
129 }
130 }
131
132 #[cfg(feature = "phash")]
134 if let Some(fp) = file_path {
135 for phash_rule in &rule.phash {
136 if let Ok(hits) = self.execute_phash(phash_rule, fp) {
137 if !hits.is_empty() {
138 pattern_matches.insert(phash_rule.identifier.clone(), hits);
139 }
140 }
141 }
142 }
143
144 #[cfg(feature = "classifier")]
146 for cls_rule in &rule.classifier {
147 if let Ok(hits) = self.execute_classifier(cls_rule, text, cache) {
148 if !hits.is_empty() {
149 pattern_matches.insert(cls_rule.identifier.clone(), hits);
150 }
151 }
152 }
153
154 #[cfg(any(feature = "llm", feature = "burn-llm"))]
156 if let Some(ref expr) = rule.compiled_condition {
157 for llm_rule in &rule.llm {
158 if condition::is_identifier_needed(
159 &llm_rule.identifier,
160 expr,
161 &pattern_matches,
162 ) {
163 if let Ok(hits) = self.execute_llm(llm_rule, text, cache) {
164 if !hits.is_empty() {
165 pattern_matches.insert(llm_rule.identifier.clone(), hits);
166 }
167 }
168 }
169 }
170 }
171
172 let matched = self.evaluate_condition(rule, &pattern_matches);
174
175 Match {
176 rule_name: rule.name.clone(),
177 tags: rule.tags.clone(),
178 meta: rule.meta.clone(),
179 matched,
180 matched_patterns: if matched { pattern_matches } else { HashMap::new() },
181 }
182 }
183
184 fn evaluate_condition(
185 &self,
186 rule: &Rule,
187 pattern_matches: &HashMap<String, Vec<MatchDetail>>,
188 ) -> bool {
189 match rule.compiled_condition {
190 Some(ref expr) => condition::evaluate(expr, pattern_matches),
191 None => false,
192 }
193 }
194
195 #[cfg(feature = "sbert")]
198 fn execute_similarity(
199 &self,
200 rule: &crate::models::SimilarityRule,
201 text: &str,
202 cache: &mut TextCache,
203 ) -> Result<Vec<MatchDetail>, SyaraError> {
204 let cleaner = self.registry.get_cleaner(&rule.cleaner_name)?;
205 let chunker = self.registry.get_chunker(&rule.chunker_name)?;
206 let cleaned = get_cleaned(cache, text, cleaner, &rule.cleaner_name);
207 let chunks = chunker.chunk(&cleaned);
208 let matcher = self.registry.get_semantic_matcher(&rule.matcher_name)?;
209 matcher.match_chunks(rule, &chunks)
210 }
211
212 #[cfg(feature = "phash")]
213 fn execute_phash(
214 &self,
215 rule: &crate::models::PHashRule,
216 file_path: &Path,
217 ) -> Result<Vec<MatchDetail>, SyaraError> {
218 let matcher = self.registry.get_phash_matcher(&rule.phash_name)?;
219 matcher.match_rule(rule, file_path)
220 }
221
222 #[cfg(feature = "classifier")]
223 fn execute_classifier(
224 &self,
225 rule: &crate::models::ClassifierRule,
226 text: &str,
227 cache: &mut TextCache,
228 ) -> Result<Vec<MatchDetail>, SyaraError> {
229 let cleaner = self.registry.get_cleaner(&rule.cleaner_name)?;
230 let chunker = self.registry.get_chunker(&rule.chunker_name)?;
231 let cleaned = get_cleaned(cache, text, cleaner, &rule.cleaner_name);
232 let chunks = chunker.chunk(&cleaned);
233 let classifier = self.registry.get_classifier(&rule.classifier_name)?;
234 classifier.classify_chunks(rule, &chunks)
235 }
236
237 #[cfg(any(feature = "llm", feature = "burn-llm"))]
238 fn execute_llm(
239 &self,
240 rule: &crate::models::LLMRule,
241 text: &str,
242 cache: &mut TextCache,
243 ) -> Result<Vec<MatchDetail>, SyaraError> {
244 let cleaner = self.registry.get_cleaner(&rule.cleaner_name)?;
245 let chunker = self.registry.get_chunker(&rule.chunker_name)?;
246 let cleaned = get_cleaned(cache, text, cleaner, &rule.cleaner_name);
247 let chunks = chunker.chunk(&cleaned);
248 let evaluator = self.registry.get_llm_evaluator(&rule.llm_name)?;
249 evaluator.evaluate_chunks(rule, &chunks)
250 }
251
252 pub fn register_cleaner(
255 &mut self,
256 name: impl Into<String>,
257 cleaner: Box<dyn crate::engine::cleaner::TextCleaner>,
258 ) {
259 self.registry.register_cleaner(name, cleaner);
260 }
261
262 pub fn register_chunker(
263 &mut self,
264 name: impl Into<String>,
265 chunker: Box<dyn crate::engine::chunker::Chunker>,
266 ) {
267 self.registry.register_chunker(name, chunker);
268 }
269
270 #[cfg(feature = "sbert")]
271 pub fn register_semantic_matcher(
272 &mut self,
273 name: impl Into<String>,
274 matcher: Box<dyn crate::engine::semantic_matcher::SemanticMatcher>,
275 ) {
276 self.registry.register_semantic_matcher(name, matcher);
277 }
278
279 #[cfg(feature = "classifier")]
280 pub fn register_classifier(
281 &mut self,
282 name: impl Into<String>,
283 classifier: Box<dyn crate::engine::classifier::TextClassifier>,
284 ) {
285 self.registry.register_classifier(name, classifier);
286 }
287
288 #[cfg(any(feature = "llm", feature = "burn-llm"))]
289 pub fn register_llm_evaluator(
290 &mut self,
291 name: impl Into<String>,
292 evaluator: Box<dyn crate::engine::llm_evaluator::LLMEvaluator>,
293 ) {
294 self.registry.register_llm_evaluator(name, evaluator);
295 }
296
297 #[cfg(feature = "phash")]
298 pub fn register_phash_matcher(
299 &mut self,
300 name: impl Into<String>,
301 matcher: Box<dyn crate::engine::phash_matcher::PHashMatcher>,
302 ) {
303 self.registry.register_phash_matcher(name, matcher);
304 }
305}
306
307#[allow(dead_code)]
308fn get_cleaned(
310 cache: &mut TextCache,
311 text: &str,
312 cleaner: &dyn crate::engine::cleaner::TextCleaner,
313 cleaner_name: &str,
314) -> String {
315 if let Some(cached) = cache.get(text, cleaner_name) {
316 return cached.to_owned();
317 }
318 let cleaned = cleaner.clean(text);
319 cache.insert(text, cleaner_name, cleaned.clone());
320 cleaned
321}