use crate::hash::Hash;
use crate::{key, Match, Mode};
use crate::ClassifyResult;
use std::collections::HashMap;
use std::fs;
#[cfg(feature = "regex")]
use regex::RegexSet;
#[cfg(feature = "save")]
use serde::{Serialize, Deserialize};
const BSIZE: usize = 6;
pub struct Dictionary {
bcount: usize,
blocks: Vec<(usize, usize)>,
entries: Vec<(u64, u8)>,
mask: u128,
word_weights: HashMap<u64, f32>,
key_weights: [f32; 256],
max_word_len: usize,
#[cfg(feature = "regex")]
regex_patterns: Option<(RegexSet, Vec<(u8, f32)>)>,
}
pub struct DictionaryBuilder {
words: Vec<(String, u8, f32)>,
key_weights: [f32; 256],
#[cfg(feature = "regex")]
regex_words: Vec<(String, u8, f32)>,
}
impl DictionaryBuilder {
pub fn new() -> Self {
DictionaryBuilder {
words: Vec::new(),
key_weights: [1.0; 256],
#[cfg(feature = "regex")]
regex_words: Vec::new(),
}
}
pub fn add(mut self, word: &str, key: u8) -> Self {
if !word.is_empty() { self.words.push((word.to_string(), key, 1.0)); }
self
}
pub fn add_weighted(mut self, word: &str, key: u8, weight: f32) -> Self {
if !word.is_empty() { self.words.push((word.to_string(), key, weight)); }
self
}
pub fn add_many(mut self, words: &[&str], key: u8) -> Self {
for &w in words {
if !w.is_empty() { self.words.push((w.to_string(), key, 1.0)); }
}
self
}
pub fn set_key_weight(mut self, key: u8, weight: f32) -> Self {
self.key_weights[key as usize] = weight;
self
}
pub fn load_str(mut self, data: &str) -> Self {
for line in data.lines() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') { continue; }
let mut parts = line.splitn(3, '\t');
let word = parts.next().unwrap().trim();
if word.is_empty() { continue; }
let k: u8 = parts.next()
.map(|s| s.trim())
.and_then(|s| {
s.parse::<u8>().ok()
.or_else(|| u8::from_str_radix(s, 16).ok())
})
.unwrap_or(key::DEFAULT);
let w: f32 = parts.next()
.and_then(|s| s.trim().parse().ok())
.unwrap_or(1.0);
#[cfg(feature = "regex")]
if word.starts_with('/') && word.ends_with('/') && word.len() > 2 {
let pattern = &word[1..word.len() - 1];
self.regex_words.push((pattern.to_string(), k, w));
continue;
}
self.words.push((word.to_string(), k, w));
}
self
}
pub fn load_file(self, path: &str) -> Result<Self, String> {
let data = fs::read_to_string(path)
.map_err(|e| format!("cannot read {path}: {e}"))?;
Ok(self.load_str(&data))
}
pub fn merge(mut self, other: DictionaryBuilder) -> Self {
self.words.extend(other.words);
#[cfg(feature = "regex")]
self.regex_words.extend(other.regex_words);
self
}
pub fn build(self) -> Dictionary {
#[cfg(feature = "regex")]
{
Dictionary::build_from(self.words, self.key_weights, self.regex_words)
}
#[cfg(not(feature = "regex"))]
{
Dictionary::build_from(self.words, self.key_weights)
}
}
}
impl Default for DictionaryBuilder {
fn default() -> Self { Self::new() }
}
impl Dictionary {
pub fn builder() -> DictionaryBuilder { DictionaryBuilder::new() }
pub fn from_file(path: &str) -> Result<Self, String> {
DictionaryBuilder::new().load_file(path).map(|b| b.build())
}
pub fn from_text(data: &str) -> Self {
DictionaryBuilder::new().load_str(data).build()
}
pub fn seek(&self, word: &str) -> Option<u8> {
let h = Hash::from_bytes(word.as_bytes());
match self.lookup(&h) {
255 => None,
k => Some(k),
}
}
pub fn mask(&self) -> u128 { self.mask }
pub fn scan(&self, text: &str, mode: Mode) -> Vec<Match> {
let scan_text = if mode.contains(Mode::IGNORE_CASE) {
std::borrow::Cow::Owned(text.to_lowercase())
} else {
std::borrow::Cow::Borrowed(text)
};
let mut matches = self.scan_bytes(scan_text.as_bytes(), mode);
#[cfg(feature = "regex")]
self.scan_regex(&scan_text, mode, &mut matches);
matches.sort_unstable_by_key(|m| m.position);
matches
}
pub fn filter(&self, text: &str, mode: Mode) -> String {
let matches = self.scan(text, mode);
if matches.is_empty() { return text.to_string(); }
let mut buf = text.as_bytes().to_vec();
for m in &matches {
buf[m.position..m.position + m.length].fill(b'*');
}
String::from_utf8_lossy(&buf).into_owned()
}
pub fn scan_first(&self, text: &str, mode: Mode) -> Option<Match> {
if mode.contains(Mode::IGNORE_CASE) {
self.scan_bytes_first(text.to_lowercase().as_bytes(), mode)
} else {
self.scan_bytes_first(text.as_bytes(), mode)
}
}
pub fn contains(&self, text: &str, mode: Mode) -> bool {
self.scan_first(text, mode).is_some()
}
pub fn severity(&self, text: &str, mode: Mode) -> Option<Match> {
self.scan(text, mode).into_iter().min_by_key(|m| m.key)
}
pub fn score(&self, text: &str, mode: Mode) -> HashMap<u8, f32> {
self.score_inner(text, mode, None)
}
pub fn score_with_weights(&self, text: &str, mode: Mode, runtime_weights: &[(u8, f32)]) -> HashMap<u8, f32> {
self.score_inner(text, mode, Some(runtime_weights))
}
pub fn classify(&self, text: &str, mode: Mode) -> Option<ClassifyResult> {
self.classify_from(self.score(text, mode))
}
pub fn classify_with_weights(&self, text: &str, mode: Mode, runtime_weights: &[(u8, f32)]) -> Option<ClassifyResult> {
self.classify_from(self.score_with_weights(text, mode, runtime_weights))
}
fn score_inner(&self, text: &str, mode: Mode, runtime_weights: Option<&[(u8, f32)]>) -> HashMap<u8, f32> {
let mut scores: HashMap<u8, f32> = HashMap::new();
for m in self.scan(text, mode) {
let h = Hash::from_bytes(text[m.position..m.position + m.length].as_bytes());
let word_w = self.word_weights.get(&h.a).copied().unwrap_or(1.0);
let dict_kw = self.key_weights[m.key as usize];
let runtime_kw = runtime_weights
.and_then(|rw| rw.iter().find(|(k, _)| *k == m.key))
.map(|(_, w)| *w)
.unwrap_or(1.0);
*scores.entry(m.key).or_insert(0.0) += word_w * dict_kw * runtime_kw;
}
scores
}
fn classify_from(&self, scores: HashMap<u8, f32>) -> Option<ClassifyResult> {
scores.into_iter()
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
.map(|(k, score)| ClassifyResult { key: k, score })
}
pub fn scan_key(&self, text: &str, key: u8, mode: Mode) -> Vec<Match> {
self.scan(text, mode).into_iter().filter(|m| m.key == key).collect()
}
#[cfg(feature = "regex")]
fn scan_regex(&self, text: &str, mode: Mode, matches: &mut Vec<Match>) {
let Some((set, meta)) = &self.regex_patterns else { return };
let forbid = mode.contains(Mode::FORBID);
for idx in set.matches(text) {
let (k, _) = meta[idx];
if !forbid && (k as u32) < key::FORBID_THRESHOLD as u32 { continue; }
if let Ok(re) = regex::Regex::new(set.patterns()[idx].as_str()) {
for m in re.find_iter(text) {
matches.push(Match { position: m.start(), length: m.len(), key: k });
}
}
}
}
#[cfg(feature = "save")]
pub fn save(&self) -> Result<Vec<u8>, String> {
let snapshot = DictSnapshot::from_dict(self);
postcard::to_allocvec(&snapshot).map_err(|e| e.to_string())
}
#[cfg(feature = "save")]
pub fn save_to_file(&self, path: &str) -> Result<(), String> {
let bytes = self.save()?;
fs::write(path, bytes).map_err(|e| format!("cannot write {path}: {e}"))
}
#[cfg(feature = "save")]
pub fn load(bytes: &[u8]) -> Result<Self, String> {
let snapshot: DictSnapshot = postcard::from_bytes(bytes).map_err(|e| e.to_string())?;
Ok(snapshot.into_dict())
}
#[cfg(feature = "save")]
pub fn load_from_file(path: &str) -> Result<Self, String> {
let bytes = fs::read(path).map_err(|e| format!("cannot read {path}: {e}"))?;
Self::load(&bytes)
}
#[cfg(feature = "regex")]
fn build_from(words: Vec<(String, u8, f32)>, key_weights: [f32; 256], regex_words: Vec<(String, u8, f32)>) -> Self {
let mut dict = Self::build_hash(words, key_weights);
if !regex_words.is_empty() {
let patterns: Vec<&str> = regex_words.iter().map(|(p, _, _)| p.as_str()).collect();
let meta: Vec<(u8, f32)> = regex_words.iter().map(|(_, k, w)| (*k, *w)).collect();
if let Ok(set) = RegexSet::new(&patterns) {
dict.regex_patterns = Some((set, meta));
}
}
dict
}
#[cfg(not(feature = "regex"))]
fn build_from(words: Vec<(String, u8, f32)>, key_weights: [f32; 256]) -> Self {
Self::build_hash(words, key_weights)
}
fn build_hash(words: Vec<(String, u8, f32)>, key_weights: [f32; 256]) -> Self {
let mut hashes: Vec<Hash> = Vec::new();
let mut mask: u128 = 0;
let mut word_weights: HashMap<u64, f32> = HashMap::new();
let max_word_len = words.iter().map(|(w, _, _)| w.len()).max().unwrap_or(16).max(16);
for (word, key, weight) in &words {
let bytes = word.as_bytes();
let mut tmp = bytes.to_vec();
let mut j = bytes.len();
while j > 16 {
j = (j - 1) & !0xf;
tmp[j] = 0;
let mut h = Hash::from_bytes(&tmp);
h.f = 255;
hashes.push(h);
tmp[j] = bytes[j];
}
let mut h = Hash::from_bytes(bytes);
h.f = *key;
word_weights.insert(h.a, *weight);
hashes.push(h);
if (*key as u32) < 128 { mask |= 1u128 << key; }
}
let msize = hashes.len();
if msize == 0 {
return Dictionary {
bcount: 0, blocks: vec![], entries: vec![],
mask: 0, word_weights, key_weights, max_word_len,
#[cfg(feature = "regex")]
regex_patterns: None,
};
}
let bcount = msize.div_ceil(BSIZE);
hashes.sort_by(|a, b| {
let ba = (a.a % bcount as u64) as usize;
let bb = (b.a % bcount as u64) as usize;
ba.cmp(&bb)
.then(a.b_masked().cmp(&b.b_masked()))
.then(a.f.cmp(&b.f))
});
hashes.dedup_by(|a, b| a.a == b.a && a.b == b.b);
let mut blocks = vec![(0usize, 0usize); bcount];
let mut entries: Vec<(u64, u8)> = Vec::with_capacity(hashes.len());
let mut cur_blk = usize::MAX;
for h in &hashes {
let blk = (h.a % bcount as u64) as usize;
if blk != cur_blk {
cur_blk = blk;
blocks[blk].0 = entries.len();
}
blocks[blk].1 += 1;
entries.push((h.b_masked(), h.f));
}
Dictionary {
bcount, blocks, entries, mask, word_weights, key_weights, max_word_len,
#[cfg(feature = "regex")]
regex_patterns: None,
}
}
fn lookup(&self, h: &Hash) -> u8 {
if self.bcount == 0 { return 255; }
let blk = (h.a % self.bcount as u64) as usize;
let (start, count) = self.blocks[blk];
let target = h.b_masked();
let slice = &self.entries[start..start + count];
match slice.binary_search_by_key(&target, |&(hb, _)| hb) {
Ok(i) => slice[i].1,
Err(_) => 255,
}
}
fn scan_bytes_first(&self, s: &[u8], mode: Mode) -> Option<Match> {
let size = s.len();
if size == 0 || self.bcount == 0 { return None; }
let html = mode.contains(Mode::HTML);
let forbid = mode.contains(Mode::FORBID);
let english = mode.contains(Mode::ENGLISH);
let mut w = vec![0u8; size];
let mut v = vec![0u8; size];
let mut hashes = vec![Hash::default(); 256];
let mut active = vec![false; 256];
let mut i = 0;
while i < size {
if html && s[i] == b'<' {
i = skip_tag(s, i + 1);
continue;
}
let slot = i & 0xff;
hashes[slot] = Hash::default();
active[slot] = true;
for j in 0..256usize {
if !active[j] { continue; }
hashes[j].feed(s[i]);
let r = self.lookup(&hashes[j]);
if i.wrapping_sub(j) >= self.max_word_len && r == 255 {
active[j] = false;
}
if r != 255 {
let start = i.wrapping_sub(i.wrapping_sub(j) & 0xff);
if english {
let ok_before = start == 0 || !is_word_char(s[start - 1]);
let ok_after = i + 1 >= size || !is_word_char(s[i + 1]);
if !ok_before || !ok_after { continue; }
}
w[start] = (i - j + 1) as u8;
v[start] = r;
}
}
i += 1;
}
let mut i = 0;
while i < size {
if w[i] != 0 {
let k = v[i];
if forbid || k >= key::FORBID_THRESHOLD {
return Some(Match { position: i, length: w[i] as usize, key: k });
}
i += (w[i] - 1) as usize;
}
i += 1;
}
None
}
fn scan_bytes(&self, s: &[u8], mode: Mode) -> Vec<Match> {
let size = s.len();
if size == 0 || self.bcount == 0 { return vec![]; }
let html = mode.contains(Mode::HTML);
let forbid = mode.contains(Mode::FORBID);
let english = mode.contains(Mode::ENGLISH);
let mut w = vec![0u8; size];
let mut v = vec![0u8; size];
let mut hashes = vec![Hash::default(); 256];
let mut active = vec![false; 256];
let mut i = 0;
while i < size {
if html && s[i] == b'<' {
i = skip_tag(s, i + 1);
continue;
}
let slot = i & 0xff;
hashes[slot] = Hash::default();
active[slot] = true;
for j in 0..256usize {
if !active[j] { continue; }
hashes[j].feed(s[i]);
let r = self.lookup(&hashes[j]);
if i.wrapping_sub(j) >= self.max_word_len && r == 255 {
active[j] = false;
}
if r != 255 {
let start = i.wrapping_sub(i.wrapping_sub(j) & 0xff);
if english {
let ok_before = start == 0 || !is_word_char(s[start - 1]);
let ok_after = i + 1 >= size || !is_word_char(s[i + 1]);
if !ok_before || !ok_after { continue; }
}
w[start] = (i - j + 1) as u8;
v[start] = r;
}
}
i += 1;
}
let mut results = Vec::new();
let mut i = 0;
while i < size {
if w[i] != 0 {
let k = v[i];
if forbid || k >= key::FORBID_THRESHOLD {
results.push(Match { position: i, length: w[i] as usize, key: k });
}
i += (w[i] - 1) as usize;
}
i += 1;
}
results
}
}
fn skip_tag(s: &[u8], mut i: usize) -> usize {
while i < s.len() && s[i] != b'>' {
let q = s[i];
if q == b'"' || q == b'\'' {
i += 1;
while i < s.len() && s[i] != q { i += 1; }
}
i += 1;
}
i + 1
}
fn is_word_char(c: u8) -> bool {
c.is_ascii_alphanumeric() || c == b'_' || c == b'@'
}
#[cfg(feature = "save")]
#[derive(Serialize, Deserialize)]
struct DictSnapshot {
bcount: usize,
blocks: Vec<(usize, usize)>,
entries: Vec<(u64, u8)>,
mask: u128,
word_weights: Vec<(u64, f32)>,
key_weights: Vec<f32>,
max_word_len: usize,
regex_patterns: Vec<(String, u8, f32)>,
}
#[cfg(feature = "save")]
impl DictSnapshot {
fn from_dict(d: &Dictionary) -> Self {
DictSnapshot {
bcount: d.bcount,
blocks: d.blocks.clone(),
entries: d.entries.clone(),
mask: d.mask,
word_weights: d.word_weights.iter().map(|(&k, &v)| (k, v)).collect(),
key_weights: d.key_weights.to_vec(),
max_word_len: d.max_word_len,
regex_patterns: {
#[cfg(feature = "regex")]
{
d.regex_patterns.as_ref().map(|(set, meta)| {
set.patterns().iter().zip(meta.iter())
.map(|(p, &(k, w))| (p.clone(), k, w))
.collect()
}).unwrap_or_default()
}
#[cfg(not(feature = "regex"))]
{ vec![] }
},
}
}
fn into_dict(self) -> Dictionary {
let word_weights: HashMap<u64, f32> = self.word_weights.into_iter().collect();
let mut key_weights = [1.0f32; 256];
for (i, w) in self.key_weights.iter().enumerate().take(256) {
key_weights[i] = *w;
}
Dictionary {
bcount: self.bcount,
blocks: self.blocks,
entries: self.entries,
mask: self.mask,
word_weights,
key_weights,
max_word_len: self.max_word_len,
#[cfg(feature = "regex")]
regex_patterns: if self.regex_patterns.is_empty() {
None
} else {
let patterns: Vec<&str> = self.regex_patterns.iter().map(|(p, _, _)| p.as_str()).collect();
let meta: Vec<(u8, f32)> = self.regex_patterns.iter().map(|(_, k, w)| (*k, *w)).collect();
RegexSet::new(&patterns).ok().map(|set| (set, meta))
},
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{key, Mode};
fn action_dict() -> Dictionary {
Dictionary::builder()
.add("shutdown", key::BLOCK)
.add("crash", key::BLOCK)
.add("disk_full", key::ALERT)
.add("oom", key::ALERT)
.add("deprecated_api", key::FLAG)
.add("slow_query", key::THROTTLE)
.add("retry", key::THROTTLE)
.add("user_login", key::LOG)
.add("health_check", key::PASS)
.add("ping", key::PASS)
.build()
}
fn compat_dict() -> Dictionary {
Dictionary::from_text(
"apple\t9\nspam\t2\nadult_word\t1\nfree\t2\nprize\t2\n"
)
}
#[test]
fn seek_block() {
let d = action_dict();
assert_eq!(d.seek("shutdown"), Some(key::BLOCK));
assert_eq!(d.seek("crash"), Some(key::BLOCK));
}
#[test]
fn seek_alert() {
let d = action_dict();
assert_eq!(d.seek("disk_full"), Some(key::ALERT));
assert_eq!(d.seek("oom"), Some(key::ALERT));
}
#[test]
fn seek_flag() {
let d = action_dict();
assert_eq!(d.seek("deprecated_api"), Some(key::FLAG));
}
#[test]
fn seek_throttle() {
let d = action_dict();
assert_eq!(d.seek("slow_query"), Some(key::THROTTLE));
assert_eq!(d.seek("retry"), Some(key::THROTTLE));
}
#[test]
fn seek_log() {
let d = action_dict();
assert_eq!(d.seek("user_login"), Some(key::LOG));
}
#[test]
fn seek_pass() {
let d = action_dict();
assert_eq!(d.seek("health_check"), Some(key::PASS));
assert_eq!(d.seek("ping"), Some(key::PASS));
}
#[test]
fn seek_not_found() {
let d = action_dict();
assert_eq!(d.seek("unknown_event"), None);
assert_eq!(d.seek(""), None);
}
#[test]
fn seek_case_sensitive() {
let d = action_dict();
assert_eq!(d.seek("Shutdown"), None);
assert_eq!(d.seek("SHUTDOWN"), None);
}
#[test]
fn scan_without_forbid_skips_actionable_keys() {
let d = action_dict();
let text = "shutdown disk_full deprecated_api slow_query user_login health_check";
let m = d.scan(text, Mode::default());
assert!(m.iter().all(|x| x.key >= key::FORBID_THRESHOLD));
assert!(m.iter().any(|x| x.key == key::PASS));
}
#[test]
fn scan_with_forbid_returns_all() {
let d = action_dict();
let text = "shutdown disk_full deprecated_api slow_query user_login health_check";
let m = d.scan(text, Mode::FORBID);
let keys: Vec<u8> = m.iter().map(|x| x.key).collect();
assert!(keys.contains(&key::BLOCK));
assert!(keys.contains(&key::ALERT));
assert!(keys.contains(&key::FLAG));
assert!(keys.contains(&key::THROTTLE));
assert!(keys.contains(&key::LOG));
assert!(keys.contains(&key::PASS));
}
#[test]
fn scan_empty_text() {
let d = action_dict();
assert!(d.scan("", Mode::FORBID).is_empty());
}
#[test]
fn scan_no_match() {
let d = action_dict();
assert!(d.scan("everything is fine today", Mode::FORBID).is_empty());
}
#[test]
fn scan_position_and_length() {
let d = action_dict();
let text = "system shutdown detected";
let m = d.scan(text, Mode::FORBID);
let hit = m.iter().find(|x| x.key == key::BLOCK).expect("shutdown not found");
assert_eq!(hit.extract(text), "shutdown");
assert_eq!(hit.position, 7);
assert_eq!(hit.length, 8);
}
#[test]
fn scan_multiple_matches_ordered() {
let d = action_dict();
let text = "crash then disk_full";
let m = d.scan(text, Mode::FORBID);
assert!(m.len() >= 2);
let positions: Vec<usize> = m.iter().map(|x| x.position).collect();
assert!(positions.windows(2).all(|w| w[0] < w[1]));
}
#[test]
fn scan_match_at_start() {
let d = action_dict();
let text = "shutdown now";
let m = d.scan(text, Mode::FORBID);
assert!(!m.is_empty());
assert_eq!(m[0].position, 0);
}
#[test]
fn scan_match_at_end() {
let d = action_dict();
let text = "system crash";
let m = d.scan(text, Mode::FORBID);
let hit = m.iter().find(|x| x.key == key::BLOCK).expect("crash not found");
assert_eq!(hit.extract(text), "crash");
assert_eq!(hit.position + hit.length, text.len());
}
#[test]
fn scan_html_skips_tags() {
let d = action_dict();
let text = r#"<meta name="shutdown"> disk_full occurred"#;
let m = d.scan(text, Mode::HTML | Mode::FORBID);
assert!(m.iter().all(|x| x.key != key::BLOCK), "shutdown inside tag should be skipped");
assert!(m.iter().any(|x| x.key == key::ALERT));
}
#[test]
fn scan_html_finds_text_content() {
let d = action_dict();
let text = "<p>system crash detected</p>";
let m = d.scan(text, Mode::HTML | Mode::FORBID);
assert!(m.iter().any(|x| x.key == key::BLOCK));
}
#[test]
fn filter_replaces_with_stars() {
let d = action_dict();
let out = d.filter("system shutdown detected", Mode::FORBID);
assert!(!out.contains("shutdown"));
assert!(out.contains('*'));
assert_eq!(out.len(), "system shutdown detected".len());
}
#[test]
fn filter_clean_text_unchanged() {
let d = action_dict();
let text = "everything is running smoothly";
assert_eq!(d.filter(text, Mode::FORBID), text);
}
#[test]
fn filter_multiple_words() {
let d = action_dict();
let out = d.filter("crash and disk_full", Mode::FORBID);
assert!(!out.contains("crash"));
assert!(!out.contains("disk_full"));
assert_eq!(out.chars().filter(|&c| c == '*').count(),
"crash".len() + "disk_full".len());
}
#[test]
fn scan_key_returns_only_requested_key() {
let d = action_dict();
let text = "crash disk_full deprecated_api slow_query health_check";
let blocks = d.scan_key(text, key::BLOCK, Mode::FORBID);
assert!(blocks.iter().all(|x| x.key == key::BLOCK));
assert!(!blocks.is_empty());
}
#[test]
fn scan_key_empty_when_no_match() {
let d = action_dict();
assert!(d.scan_key("health_check ping", key::BLOCK, Mode::FORBID).is_empty());
}
#[test]
fn builder_add_many() {
let d = Dictionary::builder()
.add_many(&["crash", "panic"], key::BLOCK)
.add_many(&["warn", "slow"], key::THROTTLE)
.add("ok", key::PASS)
.build();
assert_eq!(d.seek("crash"), Some(key::BLOCK));
assert_eq!(d.seek("panic"), Some(key::BLOCK));
assert_eq!(d.seek("warn"), Some(key::THROTTLE));
assert_eq!(d.seek("ok"), Some(key::PASS));
}
#[test]
fn builder_merge() {
let security = Dictionary::builder()
.add("shutdown", key::BLOCK)
.add("breach", key::ALERT);
let perf = Dictionary::builder()
.add("slow_query", key::THROTTLE)
.add("timeout", key::FLAG);
let d = security.merge(perf).build();
assert_eq!(d.seek("shutdown"), Some(key::BLOCK));
assert_eq!(d.seek("breach"), Some(key::ALERT));
assert_eq!(d.seek("slow_query"), Some(key::THROTTLE));
assert_eq!(d.seek("timeout"), Some(key::FLAG));
}
#[test]
fn builder_empty() {
let d = Dictionary::builder().build();
assert_eq!(d.seek("anything"), None);
assert!(d.scan("anything", Mode::FORBID).is_empty());
}
#[test]
fn key_full_u8_range() {
let d = Dictionary::builder()
.add("low", 0u8)
.add("mid", 100u8)
.add("high", 254u8)
.build();
assert_eq!(d.seek("low"), Some(0));
assert_eq!(d.seek("mid"), Some(100));
assert_eq!(d.seek("high"), Some(254));
}
#[test]
fn load_str_decimal_key() {
let d = Dictionary::from_text("critical_event\t20\nbulk_import\t100\n");
assert_eq!(d.seek("critical_event"), Some(20));
assert_eq!(d.seek("bulk_import"), Some(100));
}
#[test]
fn load_str_hex_key_compat() {
let d = Dictionary::from_text("spam_word\t2\nadult_word\t1\napple\t9\n");
assert_eq!(d.seek("spam_word"), Some(2));
assert_eq!(d.seek("adult_word"), Some(1));
assert_eq!(d.seek("apple"), Some(9));
}
#[test]
fn load_str_default_key() {
let d = Dictionary::from_text("someword\n");
assert_eq!(d.seek("someword"), Some(key::DEFAULT));
}
#[test]
fn mask_reflects_loaded_keys() {
let d = action_dict();
let m = d.mask();
assert!(m & (1u128 << key::BLOCK) != 0);
assert!(m & (1u128 << key::ALERT) != 0);
assert!(m & (1u128 << key::FLAG) != 0);
assert!(m & (1u128 << key::THROTTLE) != 0);
assert!(m & (1u128 << key::LOG) != 0);
assert!(m & (1u128 << key::PASS) != 0);
}
#[test]
fn mask_empty_dict() {
let d = Dictionary::builder().build();
assert_eq!(d.mask(), 0);
}
#[test]
fn scan_first_returns_first_match() {
let d = action_dict();
let text = "crash then disk_full";
let m = d.scan_first(text, Mode::FORBID).expect("should match");
assert_eq!(m.extract(text), "crash");
}
#[test]
fn scan_first_none_on_no_match() {
let d = action_dict();
assert!(d.scan_first("everything is fine", Mode::FORBID).is_none());
}
#[test]
fn scan_first_respects_forbid() {
let d = action_dict();
assert!(d.scan_first("shutdown", Mode::default()).is_none());
assert!(d.scan_first("shutdown", Mode::FORBID).is_some());
}
#[test]
fn contains_true_on_match() {
let d = action_dict();
assert!(d.contains("system crash detected", Mode::FORBID));
}
#[test]
fn contains_false_on_no_match() {
let d = action_dict();
assert!(!d.contains("all systems nominal", Mode::FORBID));
}
#[test]
fn severity_returns_lowest_key() {
let d = action_dict();
let text = "disk_full and crash occurred";
let m = d.severity(text, Mode::FORBID).expect("should match");
assert_eq!(m.key, key::BLOCK);
assert_eq!(m.extract(text), "crash");
}
#[test]
fn severity_none_on_no_match() {
let d = action_dict();
assert!(d.severity("all clear", Mode::FORBID).is_none());
}
#[test]
fn compat_seek() {
let d = compat_dict();
assert_eq!(d.seek("apple"), Some(9));
assert_eq!(d.seek("spam"), Some(2));
assert_eq!(d.seek("adult_word"), Some(1));
assert_eq!(d.seek("notaword"), None);
}
#[test]
fn classify_returns_dominant_key() {
let d = action_dict();
let text = "crash and shutdown cause disk_full";
let r = d.classify(text, Mode::FORBID).expect("should classify");
assert_eq!(r.key, key::BLOCK);
assert!((r.score - 2.0).abs() < 0.01);
}
#[test]
fn classify_none_on_no_match() {
let d = action_dict();
assert!(d.classify("all clear nothing here", Mode::FORBID).is_none());
}
#[test]
fn score_returns_per_key_scores() {
let d = action_dict();
let text = "crash disk_full slow_query";
let scores = d.score(text, Mode::FORBID);
assert!((scores[&key::BLOCK] - 1.0).abs() < 0.01);
assert!((scores[&key::ALERT] - 1.0).abs() < 0.01);
assert!((scores[&key::THROTTLE] - 1.0).abs() < 0.01);
}
#[test]
fn word_weight_affects_score() {
let d = Dictionary::builder()
.add_weighted("critical_crash", key::BLOCK, 5.0)
.add_weighted("minor_issue", key::BLOCK, 1.0)
.build();
let scores = d.score("critical_crash and minor_issue", Mode::FORBID);
assert!((scores[&key::BLOCK] - 6.0).abs() < 0.01);
}
#[test]
fn key_weight_affects_score() {
let d = Dictionary::builder()
.add("crash", key::BLOCK)
.add("slow_query", key::THROTTLE)
.set_key_weight(key::BLOCK, 10.0)
.build();
let scores = d.score("crash slow_query", Mode::FORBID);
assert!((scores[&key::BLOCK] - 10.0).abs() < 0.01);
assert!((scores[&key::THROTTLE] - 1.0).abs() < 0.01);
}
#[test]
fn load_str_with_weight() {
let d = Dictionary::from_text("shutdown\t0\t5.0\ndisk_full\t1\t2.0\nping\t5\n");
let scores = d.score("shutdown disk_full ping", Mode::FORBID);
assert!((scores[&key::BLOCK] - 5.0).abs() < 0.01);
assert!((scores[&key::ALERT] - 2.0).abs() < 0.01);
assert!((scores[&key::PASS] - 1.0).abs() < 0.01);
}
#[test]
fn classify_with_weights_changes_winner() {
let d = Dictionary::builder()
.add("crash", key::BLOCK)
.add("crash", key::BLOCK)
.add("slow_query", key::THROTTLE)
.build();
let text = "crash crash slow_query";
let r = d.classify(text, Mode::FORBID).unwrap();
assert_eq!(r.key, key::BLOCK);
let r2 = d.classify_with_weights(text, Mode::FORBID, &[(key::THROTTLE, 5.0)]).unwrap();
assert_eq!(r2.key, key::THROTTLE);
assert!((r2.score - 5.0).abs() < 0.01);
}
#[test]
fn score_with_weights_applies_runtime_multiplier() {
let d = action_dict();
let text = "crash disk_full";
let scores = d.score_with_weights(text, Mode::FORBID, &[(key::ALERT, 3.0)]);
assert!((scores[&key::BLOCK] - 1.0).abs() < 0.01); assert!((scores[&key::ALERT] - 3.0).abs() < 0.01); }
#[test]
fn compat_scan_forbid() {
let d = compat_dict();
let m = d.scan("get free prize now", Mode::HTML | Mode::FORBID);
assert!(!m.is_empty());
assert!(m.iter().any(|x| x.key == 2));
}
}