use std::collections::HashMap;
pub struct EntropyDetector {
threshold: f64,
min_length: usize,
}
impl EntropyDetector {
pub fn new(threshold: f64, min_length: usize) -> Self {
Self {
threshold,
min_length,
}
}
pub fn is_suspicious(&self, domain: &str) -> bool {
domain
.split('.')
.filter(|label| label.len() >= self.min_length)
.any(|label| self.shannon_entropy(label) > self.threshold)
}
pub fn shannon_entropy(&self, s: &str) -> f64 {
if s.is_empty() {
return 0.0;
}
let mut counts: HashMap<char, usize> = HashMap::new();
for c in s.chars() {
*counts.entry(c).or_insert(0) += 1;
}
let len = s.chars().count() as f64;
counts
.values()
.map(|&c| {
let p = c as f64 / len;
-p * p.log2()
})
.sum()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn repeated_char_has_lower_entropy_than_mixed() {
let detector = EntropyDetector::new(3.5, 20);
assert!(detector.shannon_entropy("aaaa") < detector.shannon_entropy("a1b2c3d4"));
}
#[test]
fn normal_domain_is_not_suspicious() {
let detector = EntropyDetector::new(3.5, 20);
assert!(!detector.is_suspicious("normal.example.com"));
}
#[test]
fn long_but_low_entropy_label_is_not_suspicious() {
let detector = EntropyDetector::new(3.5, 20);
assert!(!detector.is_suspicious("aaaaaaaaaaaaaaaaaaaaaaa.example.com"));
}
#[test]
fn long_high_entropy_label_is_suspicious() {
let detector = EntropyDetector::new(3.5, 20);
assert!(detector.is_suspicious("k8j4h2g9f7d5s3a1q6w8e4r2t0y.example.com"));
}
}