1use crate::hash::Hash;
2use crate::{key, Match, Mode};
3use crate::ClassifyResult;
4use std::collections::HashMap;
5use std::fs;
6
7#[cfg(feature = "regex")]
8use regex::RegexSet;
9
10#[cfg(feature = "save")]
11use serde::{Serialize, Deserialize};
12
13const BSIZE: usize = 6;
14
15pub struct Dictionary {
21 bcount: usize,
22 blocks: Vec<(usize, usize)>,
24 entries: Vec<(u64, u8)>,
26 mask: u128,
28 word_weights: HashMap<u64, f32>,
30 key_weights: [f32; 256],
32 max_word_len: usize,
34 #[cfg(feature = "regex")]
36 regex_patterns: Option<(RegexSet, Vec<(u8, f32)>)>,
37}
38
39pub struct DictionaryBuilder {
43 words: Vec<(String, u8, f32)>,
44 key_weights: [f32; 256],
45 #[cfg(feature = "regex")]
47 regex_words: Vec<(String, u8, f32)>,
48}
49
50impl DictionaryBuilder {
51 pub fn new() -> Self {
52 DictionaryBuilder {
53 words: Vec::new(),
54 key_weights: [1.0; 256],
55 #[cfg(feature = "regex")]
56 regex_words: Vec::new(),
57 }
58 }
59
60 pub fn add(mut self, word: &str, key: u8) -> Self {
62 if !word.is_empty() { self.words.push((word.to_string(), key, 1.0)); }
63 self
64 }
65
66 pub fn add_weighted(mut self, word: &str, key: u8, weight: f32) -> Self {
68 if !word.is_empty() { self.words.push((word.to_string(), key, weight)); }
69 self
70 }
71
72 pub fn add_many(mut self, words: &[&str], key: u8) -> Self {
74 for &w in words {
75 if !w.is_empty() { self.words.push((w.to_string(), key, 1.0)); }
76 }
77 self
78 }
79
80 pub fn set_key_weight(mut self, key: u8, weight: f32) -> Self {
82 self.key_weights[key as usize] = weight;
83 self
84 }
85
86 pub fn load_str(mut self, data: &str) -> Self {
91 for line in data.lines() {
92 let line = line.trim();
93 if line.is_empty() || line.starts_with('#') { continue; }
94 let mut parts = line.splitn(3, '\t');
95 let word = parts.next().unwrap().trim();
96 if word.is_empty() { continue; }
97 let k: u8 = parts.next()
100 .map(|s| s.trim())
101 .and_then(|s| {
102 s.parse::<u8>().ok()
103 .or_else(|| u8::from_str_radix(s, 16).ok())
104 })
105 .unwrap_or(key::DEFAULT);
106 let w: f32 = parts.next()
107 .and_then(|s| s.trim().parse().ok())
108 .unwrap_or(1.0);
109 #[cfg(feature = "regex")]
111 if word.starts_with('/') && word.ends_with('/') && word.len() > 2 {
112 let pattern = &word[1..word.len() - 1];
113 self.regex_words.push((pattern.to_string(), k, w));
114 continue;
115 }
116 self.words.push((word.to_string(), k, w));
117 }
118 self
119 }
120
121 pub fn load_file(self, path: &str) -> Result<Self, String> {
123 let data = fs::read_to_string(path)
124 .map_err(|e| format!("cannot read {path}: {e}"))?;
125 Ok(self.load_str(&data))
126 }
127
128 pub fn merge(mut self, other: DictionaryBuilder) -> Self {
130 self.words.extend(other.words);
131 #[cfg(feature = "regex")]
132 self.regex_words.extend(other.regex_words);
133 self
134 }
135
136 pub fn build(self) -> Dictionary {
138 #[cfg(feature = "regex")]
139 {
140 Dictionary::build_from(self.words, self.key_weights, self.regex_words)
141 }
142 #[cfg(not(feature = "regex"))]
143 {
144 Dictionary::build_from(self.words, self.key_weights)
145 }
146 }
147}
148
149impl Default for DictionaryBuilder {
150 fn default() -> Self { Self::new() }
151}
152
153impl Dictionary {
156 pub fn builder() -> DictionaryBuilder { DictionaryBuilder::new() }
158
159 pub fn from_file(path: &str) -> Result<Self, String> {
161 DictionaryBuilder::new().load_file(path).map(|b| b.build())
162 }
163
164 pub fn from_text(data: &str) -> Self {
166 DictionaryBuilder::new().load_str(data).build()
167 }
168
169 pub fn seek(&self, word: &str) -> Option<u8> {
171 let h = Hash::from_bytes(word.as_bytes());
172 match self.lookup(&h) {
173 255 => None,
174 k => Some(k),
175 }
176 }
177
178 pub fn mask(&self) -> u128 { self.mask }
181
182 pub fn scan(&self, text: &str, mode: Mode) -> Vec<Match> {
188 let scan_text = if mode.contains(Mode::IGNORE_CASE) {
189 std::borrow::Cow::Owned(text.to_lowercase())
190 } else {
191 std::borrow::Cow::Borrowed(text)
192 };
193 let mut matches = self.scan_bytes(scan_text.as_bytes(), mode);
194 #[cfg(feature = "regex")]
195 self.scan_regex(&scan_text, mode, &mut matches);
196 matches.sort_unstable_by_key(|m| m.position);
197 matches
198 }
199
200 pub fn filter(&self, text: &str, mode: Mode) -> String {
204 let matches = self.scan(text, mode);
205 if matches.is_empty() { return text.to_string(); }
206 let mut buf = text.as_bytes().to_vec();
207 for m in &matches {
208 buf[m.position..m.position + m.length].fill(b'*');
209 }
210 String::from_utf8_lossy(&buf).into_owned()
211 }
212
213 pub fn scan_first(&self, text: &str, mode: Mode) -> Option<Match> {
217 if mode.contains(Mode::IGNORE_CASE) {
218 self.scan_bytes_first(text.to_lowercase().as_bytes(), mode)
219 } else {
220 self.scan_bytes_first(text.as_bytes(), mode)
221 }
222 }
223
224 pub fn contains(&self, text: &str, mode: Mode) -> bool {
226 self.scan_first(text, mode).is_some()
227 }
228
229 pub fn severity(&self, text: &str, mode: Mode) -> Option<Match> {
231 self.scan(text, mode).into_iter().min_by_key(|m| m.key)
232 }
233
234 pub fn score(&self, text: &str, mode: Mode) -> HashMap<u8, f32> {
238 self.score_inner(text, mode, None)
239 }
240
241 pub fn score_with_weights(&self, text: &str, mode: Mode, runtime_weights: &[(u8, f32)]) -> HashMap<u8, f32> {
246 self.score_inner(text, mode, Some(runtime_weights))
247 }
248
249 pub fn classify(&self, text: &str, mode: Mode) -> Option<ClassifyResult> {
251 self.classify_from(self.score(text, mode))
252 }
253
254 pub fn classify_with_weights(&self, text: &str, mode: Mode, runtime_weights: &[(u8, f32)]) -> Option<ClassifyResult> {
259 self.classify_from(self.score_with_weights(text, mode, runtime_weights))
260 }
261
262 fn score_inner(&self, text: &str, mode: Mode, runtime_weights: Option<&[(u8, f32)]>) -> HashMap<u8, f32> {
263 let mut scores: HashMap<u8, f32> = HashMap::new();
264 for m in self.scan(text, mode) {
265 let h = Hash::from_bytes(text[m.position..m.position + m.length].as_bytes());
266 let word_w = self.word_weights.get(&h.a).copied().unwrap_or(1.0);
267 let dict_kw = self.key_weights[m.key as usize];
268 let runtime_kw = runtime_weights
269 .and_then(|rw| rw.iter().find(|(k, _)| *k == m.key))
270 .map(|(_, w)| *w)
271 .unwrap_or(1.0);
272 *scores.entry(m.key).or_insert(0.0) += word_w * dict_kw * runtime_kw;
273 }
274 scores
275 }
276
277 fn classify_from(&self, scores: HashMap<u8, f32>) -> Option<ClassifyResult> {
278 scores.into_iter()
279 .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
280 .map(|(k, score)| ClassifyResult { key: k, score })
281 }
282
283 pub fn scan_key(&self, text: &str, key: u8, mode: Mode) -> Vec<Match> {
285 self.scan(text, mode).into_iter().filter(|m| m.key == key).collect()
286 }
287
288 #[cfg(feature = "regex")]
291 fn scan_regex(&self, text: &str, mode: Mode, matches: &mut Vec<Match>) {
292 let Some((set, meta)) = &self.regex_patterns else { return };
293 let forbid = mode.contains(Mode::FORBID);
294 for idx in set.matches(text) {
295 let (k, _) = meta[idx];
296 if !forbid && (k as u32) < key::FORBID_THRESHOLD as u32 { continue; }
297 if let Ok(re) = regex::Regex::new(set.patterns()[idx].as_str()) {
298 for m in re.find_iter(text) {
299 matches.push(Match { position: m.start(), length: m.len(), key: k });
300 }
301 }
302 }
303 }
304
305 #[cfg(feature = "save")]
308 pub fn save(&self) -> Result<Vec<u8>, String> {
309 let snapshot = DictSnapshot::from_dict(self);
310 postcard::to_allocvec(&snapshot).map_err(|e| e.to_string())
311 }
312
313 #[cfg(feature = "save")]
314 pub fn save_to_file(&self, path: &str) -> Result<(), String> {
315 let bytes = self.save()?;
316 fs::write(path, bytes).map_err(|e| format!("cannot write {path}: {e}"))
317 }
318
319 #[cfg(feature = "save")]
320 pub fn load(bytes: &[u8]) -> Result<Self, String> {
321 let snapshot: DictSnapshot = postcard::from_bytes(bytes).map_err(|e| e.to_string())?;
322 Ok(snapshot.into_dict())
323 }
324
325 #[cfg(feature = "save")]
326 pub fn load_from_file(path: &str) -> Result<Self, String> {
327 let bytes = fs::read(path).map_err(|e| format!("cannot read {path}: {e}"))?;
328 Self::load(&bytes)
329 }
330
331 #[cfg(feature = "regex")]
334 fn build_from(words: Vec<(String, u8, f32)>, key_weights: [f32; 256], regex_words: Vec<(String, u8, f32)>) -> Self {
335 let mut dict = Self::build_hash(words, key_weights);
336 if !regex_words.is_empty() {
337 let patterns: Vec<&str> = regex_words.iter().map(|(p, _, _)| p.as_str()).collect();
338 let meta: Vec<(u8, f32)> = regex_words.iter().map(|(_, k, w)| (*k, *w)).collect();
339 if let Ok(set) = RegexSet::new(&patterns) {
340 dict.regex_patterns = Some((set, meta));
341 }
342 }
343 dict
344 }
345
346 #[cfg(not(feature = "regex"))]
347 fn build_from(words: Vec<(String, u8, f32)>, key_weights: [f32; 256]) -> Self {
348 Self::build_hash(words, key_weights)
349 }
350
351 fn build_hash(words: Vec<(String, u8, f32)>, key_weights: [f32; 256]) -> Self {
352 let mut hashes: Vec<Hash> = Vec::new();
353 let mut mask: u128 = 0;
354 let mut word_weights: HashMap<u64, f32> = HashMap::new();
355
356 let max_word_len = words.iter().map(|(w, _, _)| w.len()).max().unwrap_or(16).max(16);
357
358 for (word, key, weight) in &words {
359 let bytes = word.as_bytes();
360 let mut tmp = bytes.to_vec();
361 let mut j = bytes.len();
362 while j > 16 {
363 j = (j - 1) & !0xf;
364 tmp[j] = 0;
365 let mut h = Hash::from_bytes(&tmp);
366 h.f = 255;
367 hashes.push(h);
368 tmp[j] = bytes[j];
369 }
370 let mut h = Hash::from_bytes(bytes);
371 h.f = *key;
372 word_weights.insert(h.a, *weight);
373 hashes.push(h);
374 if (*key as u32) < 128 { mask |= 1u128 << key; }
375 }
376
377 let msize = hashes.len();
378 if msize == 0 {
379 return Dictionary {
380 bcount: 0, blocks: vec![], entries: vec![],
381 mask: 0, word_weights, key_weights, max_word_len,
382 #[cfg(feature = "regex")]
383 regex_patterns: None,
384 };
385 }
386 let bcount = msize.div_ceil(BSIZE);
387
388 hashes.sort_by(|a, b| {
389 let ba = (a.a % bcount as u64) as usize;
390 let bb = (b.a % bcount as u64) as usize;
391 ba.cmp(&bb)
392 .then(a.b_masked().cmp(&b.b_masked()))
393 .then(a.f.cmp(&b.f))
394 });
395 hashes.dedup_by(|a, b| a.a == b.a && a.b == b.b);
396
397 let mut blocks = vec![(0usize, 0usize); bcount];
398 let mut entries: Vec<(u64, u8)> = Vec::with_capacity(hashes.len());
399 let mut cur_blk = usize::MAX;
400
401 for h in &hashes {
402 let blk = (h.a % bcount as u64) as usize;
403 if blk != cur_blk {
404 cur_blk = blk;
405 blocks[blk].0 = entries.len();
406 }
407 blocks[blk].1 += 1;
408 entries.push((h.b_masked(), h.f));
409 }
410
411 Dictionary {
412 bcount, blocks, entries, mask, word_weights, key_weights, max_word_len,
413 #[cfg(feature = "regex")]
414 regex_patterns: None,
415 }
416 }
417
418 fn lookup(&self, h: &Hash) -> u8 {
421 if self.bcount == 0 { return 255; }
422 let blk = (h.a % self.bcount as u64) as usize;
423 let (start, count) = self.blocks[blk];
424 let target = h.b_masked();
425 let slice = &self.entries[start..start + count];
426 match slice.binary_search_by_key(&target, |&(hb, _)| hb) {
427 Ok(i) => slice[i].1,
428 Err(_) => 255,
429 }
430 }
431
432 fn scan_bytes_first(&self, s: &[u8], mode: Mode) -> Option<Match> {
435 let size = s.len();
437 if size == 0 || self.bcount == 0 { return None; }
438
439 let html = mode.contains(Mode::HTML);
440 let forbid = mode.contains(Mode::FORBID);
441 let english = mode.contains(Mode::ENGLISH);
442
443 let mut w = vec![0u8; size];
444 let mut v = vec![0u8; size];
445 let mut hashes = vec![Hash::default(); 256];
446 let mut active = vec![false; 256];
447
448 let mut i = 0;
449 while i < size {
450 if html && s[i] == b'<' {
451 i = skip_tag(s, i + 1);
452 continue;
453 }
454
455 let slot = i & 0xff;
456 hashes[slot] = Hash::default();
457 active[slot] = true;
458
459 for j in 0..256usize {
460 if !active[j] { continue; }
461 hashes[j].feed(s[i]);
462 let r = self.lookup(&hashes[j]);
463
464 if i.wrapping_sub(j) >= self.max_word_len && r == 255 {
465 active[j] = false;
466 }
467
468 if r != 255 {
469 let start = i.wrapping_sub(i.wrapping_sub(j) & 0xff);
470 if english {
471 let ok_before = start == 0 || !is_word_char(s[start - 1]);
472 let ok_after = i + 1 >= size || !is_word_char(s[i + 1]);
473 if !ok_before || !ok_after { continue; }
474 }
475 w[start] = (i - j + 1) as u8;
476 v[start] = r;
477 }
478 }
479 i += 1;
480 }
481
482 let mut i = 0;
484 while i < size {
485 if w[i] != 0 {
486 let k = v[i];
487 if forbid || k >= key::FORBID_THRESHOLD {
488 return Some(Match { position: i, length: w[i] as usize, key: k });
489 }
490 i += (w[i] - 1) as usize;
491 }
492 i += 1;
493 }
494 None
495 }
496
497 fn scan_bytes(&self, s: &[u8], mode: Mode) -> Vec<Match> {
498 let size = s.len();
499 if size == 0 || self.bcount == 0 { return vec![]; }
500
501 let html = mode.contains(Mode::HTML);
502 let forbid = mode.contains(Mode::FORBID);
503 let english = mode.contains(Mode::ENGLISH);
504
505 let mut w = vec![0u8; size];
506 let mut v = vec![0u8; size];
507 let mut hashes = vec![Hash::default(); 256];
508 let mut active = vec![false; 256];
509
510 let mut i = 0;
511 while i < size {
512 if html && s[i] == b'<' {
513 i = skip_tag(s, i + 1);
514 continue;
515 }
516
517 let slot = i & 0xff;
518 hashes[slot] = Hash::default();
519 active[slot] = true;
520
521 for j in 0..256usize {
522 if !active[j] { continue; }
523 hashes[j].feed(s[i]);
524 let r = self.lookup(&hashes[j]);
525
526 if i.wrapping_sub(j) >= self.max_word_len && r == 255 {
527 active[j] = false;
528 }
529
530 if r != 255 {
531 let start = i.wrapping_sub(i.wrapping_sub(j) & 0xff);
532 if english {
533 let ok_before = start == 0 || !is_word_char(s[start - 1]);
534 let ok_after = i + 1 >= size || !is_word_char(s[i + 1]);
535 if !ok_before || !ok_after { continue; }
536 }
537 w[start] = (i - j + 1) as u8;
538 v[start] = r;
539 }
540 }
541 i += 1;
542 }
543
544 let mut results = Vec::new();
545 let mut i = 0;
546 while i < size {
547 if w[i] != 0 {
548 let k = v[i];
549 if forbid || k >= key::FORBID_THRESHOLD {
550 results.push(Match { position: i, length: w[i] as usize, key: k });
551 }
552 i += (w[i] - 1) as usize;
553 }
554 i += 1;
555 }
556 results
557 }
558}
559
560fn skip_tag(s: &[u8], mut i: usize) -> usize {
561 while i < s.len() && s[i] != b'>' {
562 let q = s[i];
563 if q == b'"' || q == b'\'' {
564 i += 1;
565 while i < s.len() && s[i] != q { i += 1; }
566 }
567 i += 1;
568 }
569 i + 1
570}
571
572fn is_word_char(c: u8) -> bool {
573 c.is_ascii_alphanumeric() || c == b'_' || c == b'@'
574}
575
576#[cfg(feature = "save")]
579#[derive(Serialize, Deserialize)]
580struct DictSnapshot {
581 bcount: usize,
582 blocks: Vec<(usize, usize)>,
583 entries: Vec<(u64, u8)>,
584 mask: u128,
585 word_weights: Vec<(u64, f32)>,
586 key_weights: Vec<f32>,
587 max_word_len: usize,
588 regex_patterns: Vec<(String, u8, f32)>,
590}
591
592#[cfg(feature = "save")]
593impl DictSnapshot {
594 fn from_dict(d: &Dictionary) -> Self {
595 DictSnapshot {
596 bcount: d.bcount,
597 blocks: d.blocks.clone(),
598 entries: d.entries.clone(),
599 mask: d.mask,
600 word_weights: d.word_weights.iter().map(|(&k, &v)| (k, v)).collect(),
601 key_weights: d.key_weights.to_vec(),
602 max_word_len: d.max_word_len,
603 regex_patterns: {
604 #[cfg(feature = "regex")]
605 {
606 d.regex_patterns.as_ref().map(|(set, meta)| {
607 set.patterns().iter().zip(meta.iter())
608 .map(|(p, &(k, w))| (p.clone(), k, w))
609 .collect()
610 }).unwrap_or_default()
611 }
612 #[cfg(not(feature = "regex"))]
613 { vec![] }
614 },
615 }
616 }
617
618 fn into_dict(self) -> Dictionary {
619 let word_weights: HashMap<u64, f32> = self.word_weights.into_iter().collect();
620 let mut key_weights = [1.0f32; 256];
621 for (i, w) in self.key_weights.iter().enumerate().take(256) {
622 key_weights[i] = *w;
623 }
624 Dictionary {
625 bcount: self.bcount,
626 blocks: self.blocks,
627 entries: self.entries,
628 mask: self.mask,
629 word_weights,
630 key_weights,
631 max_word_len: self.max_word_len,
632 #[cfg(feature = "regex")]
633 regex_patterns: if self.regex_patterns.is_empty() {
634 None
635 } else {
636 let patterns: Vec<&str> = self.regex_patterns.iter().map(|(p, _, _)| p.as_str()).collect();
637 let meta: Vec<(u8, f32)> = self.regex_patterns.iter().map(|(_, k, w)| (*k, *w)).collect();
638 RegexSet::new(&patterns).ok().map(|set| (set, meta))
639 },
640 }
641 }
642}
643
644#[cfg(test)]
647mod tests {
648 use super::*;
649 use crate::{key, Mode};
650
651 fn action_dict() -> Dictionary {
655 Dictionary::builder()
656 .add("shutdown", key::BLOCK)
657 .add("crash", key::BLOCK)
658 .add("disk_full", key::ALERT)
659 .add("oom", key::ALERT)
660 .add("deprecated_api", key::FLAG)
661 .add("slow_query", key::THROTTLE)
662 .add("retry", key::THROTTLE)
663 .add("user_login", key::LOG)
664 .add("health_check", key::PASS)
665 .add("ping", key::PASS)
666 .build()
667 }
668
669 fn compat_dict() -> Dictionary {
671 Dictionary::from_text(
672 "apple\t9\nspam\t2\nadult_word\t1\nfree\t2\nprize\t2\n"
673 )
674 }
675
676 #[test]
679 fn seek_block() {
680 let d = action_dict();
681 assert_eq!(d.seek("shutdown"), Some(key::BLOCK));
682 assert_eq!(d.seek("crash"), Some(key::BLOCK));
683 }
684
685 #[test]
686 fn seek_alert() {
687 let d = action_dict();
688 assert_eq!(d.seek("disk_full"), Some(key::ALERT));
689 assert_eq!(d.seek("oom"), Some(key::ALERT));
690 }
691
692 #[test]
693 fn seek_flag() {
694 let d = action_dict();
695 assert_eq!(d.seek("deprecated_api"), Some(key::FLAG));
696 }
697
698 #[test]
699 fn seek_throttle() {
700 let d = action_dict();
701 assert_eq!(d.seek("slow_query"), Some(key::THROTTLE));
702 assert_eq!(d.seek("retry"), Some(key::THROTTLE));
703 }
704
705 #[test]
706 fn seek_log() {
707 let d = action_dict();
708 assert_eq!(d.seek("user_login"), Some(key::LOG));
709 }
710
711 #[test]
712 fn seek_pass() {
713 let d = action_dict();
714 assert_eq!(d.seek("health_check"), Some(key::PASS));
715 assert_eq!(d.seek("ping"), Some(key::PASS));
716 }
717
718 #[test]
719 fn seek_not_found() {
720 let d = action_dict();
721 assert_eq!(d.seek("unknown_event"), None);
722 assert_eq!(d.seek(""), None);
723 }
724
725 #[test]
726 fn seek_case_sensitive() {
727 let d = action_dict();
728 assert_eq!(d.seek("Shutdown"), None);
730 assert_eq!(d.seek("SHUTDOWN"), None);
731 }
732
733 #[test]
736 fn scan_without_forbid_skips_actionable_keys() {
737 let d = action_dict();
738 let text = "shutdown disk_full deprecated_api slow_query user_login health_check";
740 let m = d.scan(text, Mode::default());
741 assert!(m.iter().all(|x| x.key >= key::FORBID_THRESHOLD));
743 assert!(m.iter().any(|x| x.key == key::PASS));
744 }
745
746 #[test]
747 fn scan_with_forbid_returns_all() {
748 let d = action_dict();
749 let text = "shutdown disk_full deprecated_api slow_query user_login health_check";
750 let m = d.scan(text, Mode::FORBID);
751 let keys: Vec<u8> = m.iter().map(|x| x.key).collect();
752 assert!(keys.contains(&key::BLOCK));
753 assert!(keys.contains(&key::ALERT));
754 assert!(keys.contains(&key::FLAG));
755 assert!(keys.contains(&key::THROTTLE));
756 assert!(keys.contains(&key::LOG));
757 assert!(keys.contains(&key::PASS));
758 }
759
760 #[test]
761 fn scan_empty_text() {
762 let d = action_dict();
763 assert!(d.scan("", Mode::FORBID).is_empty());
764 }
765
766 #[test]
767 fn scan_no_match() {
768 let d = action_dict();
769 assert!(d.scan("everything is fine today", Mode::FORBID).is_empty());
770 }
771
772 #[test]
775 fn scan_position_and_length() {
776 let d = action_dict();
777 let text = "system shutdown detected";
778 let m = d.scan(text, Mode::FORBID);
779 let hit = m.iter().find(|x| x.key == key::BLOCK).expect("shutdown not found");
780 assert_eq!(hit.extract(text), "shutdown");
781 assert_eq!(hit.position, 7);
782 assert_eq!(hit.length, 8);
783 }
784
785 #[test]
786 fn scan_multiple_matches_ordered() {
787 let d = action_dict();
788 let text = "crash then disk_full";
789 let m = d.scan(text, Mode::FORBID);
790 assert!(m.len() >= 2);
791 let positions: Vec<usize> = m.iter().map(|x| x.position).collect();
793 assert!(positions.windows(2).all(|w| w[0] < w[1]));
794 }
795
796 #[test]
797 fn scan_match_at_start() {
798 let d = action_dict();
799 let text = "shutdown now";
800 let m = d.scan(text, Mode::FORBID);
801 assert!(!m.is_empty());
802 assert_eq!(m[0].position, 0);
803 }
804
805 #[test]
806 fn scan_match_at_end() {
807 let d = action_dict();
808 let text = "system crash";
809 let m = d.scan(text, Mode::FORBID);
810 let hit = m.iter().find(|x| x.key == key::BLOCK).expect("crash not found");
811 assert_eq!(hit.extract(text), "crash");
812 assert_eq!(hit.position + hit.length, text.len());
813 }
814
815 #[test]
818 fn scan_html_skips_tags() {
819 let d = action_dict();
820 let text = r#"<meta name="shutdown"> disk_full occurred"#;
822 let m = d.scan(text, Mode::HTML | Mode::FORBID);
823 assert!(m.iter().all(|x| x.key != key::BLOCK), "shutdown inside tag should be skipped");
824 assert!(m.iter().any(|x| x.key == key::ALERT));
825 }
826
827 #[test]
828 fn scan_html_finds_text_content() {
829 let d = action_dict();
830 let text = "<p>system crash detected</p>";
831 let m = d.scan(text, Mode::HTML | Mode::FORBID);
832 assert!(m.iter().any(|x| x.key == key::BLOCK));
833 }
834
835 #[test]
838 fn filter_replaces_with_stars() {
839 let d = action_dict();
840 let out = d.filter("system shutdown detected", Mode::FORBID);
841 assert!(!out.contains("shutdown"));
842 assert!(out.contains('*'));
843 assert_eq!(out.len(), "system shutdown detected".len());
844 }
845
846 #[test]
847 fn filter_clean_text_unchanged() {
848 let d = action_dict();
849 let text = "everything is running smoothly";
850 assert_eq!(d.filter(text, Mode::FORBID), text);
851 }
852
853 #[test]
854 fn filter_multiple_words() {
855 let d = action_dict();
856 let out = d.filter("crash and disk_full", Mode::FORBID);
857 assert!(!out.contains("crash"));
858 assert!(!out.contains("disk_full"));
859 assert_eq!(out.chars().filter(|&c| c == '*').count(),
860 "crash".len() + "disk_full".len());
861 }
862
863 #[test]
866 fn scan_key_returns_only_requested_key() {
867 let d = action_dict();
868 let text = "crash disk_full deprecated_api slow_query health_check";
869 let blocks = d.scan_key(text, key::BLOCK, Mode::FORBID);
870 assert!(blocks.iter().all(|x| x.key == key::BLOCK));
871 assert!(!blocks.is_empty());
872 }
873
874 #[test]
875 fn scan_key_empty_when_no_match() {
876 let d = action_dict();
877 assert!(d.scan_key("health_check ping", key::BLOCK, Mode::FORBID).is_empty());
878 }
879
880 #[test]
883 fn builder_add_many() {
884 let d = Dictionary::builder()
885 .add_many(&["crash", "panic"], key::BLOCK)
886 .add_many(&["warn", "slow"], key::THROTTLE)
887 .add("ok", key::PASS)
888 .build();
889 assert_eq!(d.seek("crash"), Some(key::BLOCK));
890 assert_eq!(d.seek("panic"), Some(key::BLOCK));
891 assert_eq!(d.seek("warn"), Some(key::THROTTLE));
892 assert_eq!(d.seek("ok"), Some(key::PASS));
893 }
894
895 #[test]
896 fn builder_merge() {
897 let security = Dictionary::builder()
898 .add("shutdown", key::BLOCK)
899 .add("breach", key::ALERT);
900 let perf = Dictionary::builder()
901 .add("slow_query", key::THROTTLE)
902 .add("timeout", key::FLAG);
903 let d = security.merge(perf).build();
904 assert_eq!(d.seek("shutdown"), Some(key::BLOCK));
905 assert_eq!(d.seek("breach"), Some(key::ALERT));
906 assert_eq!(d.seek("slow_query"), Some(key::THROTTLE));
907 assert_eq!(d.seek("timeout"), Some(key::FLAG));
908 }
909
910 #[test]
911 fn builder_empty() {
912 let d = Dictionary::builder().build();
913 assert_eq!(d.seek("anything"), None);
914 assert!(d.scan("anything", Mode::FORBID).is_empty());
915 }
916
917 #[test]
920 fn key_full_u8_range() {
921 let d = Dictionary::builder()
922 .add("low", 0u8)
923 .add("mid", 100u8)
924 .add("high", 254u8)
925 .build();
926 assert_eq!(d.seek("low"), Some(0));
927 assert_eq!(d.seek("mid"), Some(100));
928 assert_eq!(d.seek("high"), Some(254));
929 }
930
931 #[test]
932 fn load_str_decimal_key() {
933 let d = Dictionary::from_text("critical_event\t20\nbulk_import\t100\n");
934 assert_eq!(d.seek("critical_event"), Some(20));
935 assert_eq!(d.seek("bulk_import"), Some(100));
936 }
937
938 #[test]
939 fn load_str_hex_key_compat() {
940 let d = Dictionary::from_text("spam_word\t2\nadult_word\t1\napple\t9\n");
942 assert_eq!(d.seek("spam_word"), Some(2));
943 assert_eq!(d.seek("adult_word"), Some(1));
944 assert_eq!(d.seek("apple"), Some(9));
945 }
946
947 #[test]
948 fn load_str_default_key() {
949 let d = Dictionary::from_text("someword\n");
951 assert_eq!(d.seek("someword"), Some(key::DEFAULT));
952 }
953
954 #[test]
957 fn mask_reflects_loaded_keys() {
958 let d = action_dict();
959 let m = d.mask();
960 assert!(m & (1u128 << key::BLOCK) != 0);
961 assert!(m & (1u128 << key::ALERT) != 0);
962 assert!(m & (1u128 << key::FLAG) != 0);
963 assert!(m & (1u128 << key::THROTTLE) != 0);
964 assert!(m & (1u128 << key::LOG) != 0);
965 assert!(m & (1u128 << key::PASS) != 0);
966 }
967
968 #[test]
969 fn mask_empty_dict() {
970 let d = Dictionary::builder().build();
971 assert_eq!(d.mask(), 0);
972 }
973
974 #[test]
977 fn scan_first_returns_first_match() {
978 let d = action_dict();
979 let text = "crash then disk_full";
980 let m = d.scan_first(text, Mode::FORBID).expect("should match");
981 assert_eq!(m.extract(text), "crash");
982 }
983
984 #[test]
985 fn scan_first_none_on_no_match() {
986 let d = action_dict();
987 assert!(d.scan_first("everything is fine", Mode::FORBID).is_none());
988 }
989
990 #[test]
991 fn scan_first_respects_forbid() {
992 let d = action_dict();
993 assert!(d.scan_first("shutdown", Mode::default()).is_none());
995 assert!(d.scan_first("shutdown", Mode::FORBID).is_some());
996 }
997
998 #[test]
999 fn contains_true_on_match() {
1000 let d = action_dict();
1001 assert!(d.contains("system crash detected", Mode::FORBID));
1002 }
1003
1004 #[test]
1005 fn contains_false_on_no_match() {
1006 let d = action_dict();
1007 assert!(!d.contains("all systems nominal", Mode::FORBID));
1008 }
1009
1010 #[test]
1011 fn severity_returns_lowest_key() {
1012 let d = action_dict();
1013 let text = "disk_full and crash occurred";
1015 let m = d.severity(text, Mode::FORBID).expect("should match");
1016 assert_eq!(m.key, key::BLOCK);
1017 assert_eq!(m.extract(text), "crash");
1018 }
1019
1020 #[test]
1021 fn severity_none_on_no_match() {
1022 let d = action_dict();
1023 assert!(d.severity("all clear", Mode::FORBID).is_none());
1024 }
1025
1026 #[test]
1029 fn compat_seek() {
1030 let d = compat_dict();
1031 assert_eq!(d.seek("apple"), Some(9));
1032 assert_eq!(d.seek("spam"), Some(2));
1033 assert_eq!(d.seek("adult_word"), Some(1));
1034 assert_eq!(d.seek("notaword"), None);
1035 }
1036
1037 #[test]
1040 fn classify_returns_dominant_key() {
1041 let d = action_dict();
1042 let text = "crash and shutdown cause disk_full";
1044 let r = d.classify(text, Mode::FORBID).expect("should classify");
1045 assert_eq!(r.key, key::BLOCK);
1046 assert!((r.score - 2.0).abs() < 0.01);
1047 }
1048
1049 #[test]
1050 fn classify_none_on_no_match() {
1051 let d = action_dict();
1052 assert!(d.classify("all clear nothing here", Mode::FORBID).is_none());
1053 }
1054
1055 #[test]
1056 fn score_returns_per_key_scores() {
1057 let d = action_dict();
1058 let text = "crash disk_full slow_query";
1059 let scores = d.score(text, Mode::FORBID);
1060 assert!((scores[&key::BLOCK] - 1.0).abs() < 0.01);
1061 assert!((scores[&key::ALERT] - 1.0).abs() < 0.01);
1062 assert!((scores[&key::THROTTLE] - 1.0).abs() < 0.01);
1063 }
1064
1065 #[test]
1066 fn word_weight_affects_score() {
1067 let d = Dictionary::builder()
1068 .add_weighted("critical_crash", key::BLOCK, 5.0)
1069 .add_weighted("minor_issue", key::BLOCK, 1.0)
1070 .build();
1071 let scores = d.score("critical_crash and minor_issue", Mode::FORBID);
1072 assert!((scores[&key::BLOCK] - 6.0).abs() < 0.01);
1073 }
1074
1075 #[test]
1076 fn key_weight_affects_score() {
1077 let d = Dictionary::builder()
1078 .add("crash", key::BLOCK)
1079 .add("slow_query", key::THROTTLE)
1080 .set_key_weight(key::BLOCK, 10.0)
1081 .build();
1082 let scores = d.score("crash slow_query", Mode::FORBID);
1083 assert!((scores[&key::BLOCK] - 10.0).abs() < 0.01);
1084 assert!((scores[&key::THROTTLE] - 1.0).abs() < 0.01);
1085 }
1086
1087 #[test]
1088 fn load_str_with_weight() {
1089 let d = Dictionary::from_text("shutdown\t0\t5.0\ndisk_full\t1\t2.0\nping\t5\n");
1090 let scores = d.score("shutdown disk_full ping", Mode::FORBID);
1091 assert!((scores[&key::BLOCK] - 5.0).abs() < 0.01);
1092 assert!((scores[&key::ALERT] - 2.0).abs() < 0.01);
1093 assert!((scores[&key::PASS] - 1.0).abs() < 0.01);
1094 }
1095
1096 #[test]
1097 fn classify_with_weights_changes_winner() {
1098 let d = Dictionary::builder()
1099 .add("crash", key::BLOCK)
1100 .add("crash", key::BLOCK)
1101 .add("slow_query", key::THROTTLE)
1102 .build();
1103 let text = "crash crash slow_query";
1105 let r = d.classify(text, Mode::FORBID).unwrap();
1106 assert_eq!(r.key, key::BLOCK);
1107
1108 let r2 = d.classify_with_weights(text, Mode::FORBID, &[(key::THROTTLE, 5.0)]).unwrap();
1110 assert_eq!(r2.key, key::THROTTLE);
1111 assert!((r2.score - 5.0).abs() < 0.01);
1112 }
1113
1114 #[test]
1115 fn score_with_weights_applies_runtime_multiplier() {
1116 let d = action_dict();
1117 let text = "crash disk_full";
1118 let scores = d.score_with_weights(text, Mode::FORBID, &[(key::ALERT, 3.0)]);
1119 assert!((scores[&key::BLOCK] - 1.0).abs() < 0.01); assert!((scores[&key::ALERT] - 3.0).abs() < 0.01); }
1122
1123 #[test]
1124 fn compat_scan_forbid() {
1125 let d = compat_dict();
1126 let m = d.scan("get free prize now", Mode::HTML | Mode::FORBID);
1127 assert!(!m.is_empty());
1128 assert!(m.iter().any(|x| x.key == 2));
1129 }
1130}