#[inline]
pub fn shannon_entropy(data: &str) -> f64 {
if data.is_empty() {
return 0.0;
}
let bytes = data.as_bytes();
let len = bytes.len() as f64;
let mut frequency = [0u32; 256];
for &byte in bytes {
frequency[byte as usize] += 1;
}
let mut entropy = 0.0;
for &count in &frequency {
if count > 0 {
let p = count as f64 / len;
entropy -= p * p.log2();
}
}
entropy
}
#[inline]
pub fn normalized_entropy(data: &str) -> f64 {
let entropy = shannon_entropy(data);
if data.is_empty() {
return 0.0;
}
let len = data.len();
let max_unique = len.min(256) as f64;
let max_entropy = max_unique.log2();
if max_entropy <= 0.0 {
return 0.0;
}
(entropy / max_entropy).clamp(0.0, 1.0)
}
#[inline]
pub fn is_entropy_anomaly(observed: f64, mean: f64, variance: f64, threshold: f64) -> bool {
if variance <= 0.001 {
return false;
}
let stddev = variance.sqrt();
let z_score = (observed - mean).abs() / stddev;
z_score > threshold
}
#[inline]
pub fn entropy_z_score(observed: f64, mean: f64, variance: f64) -> f64 {
if variance <= 0.001 {
return 0.0;
}
let stddev = variance.sqrt();
(observed - mean) / stddev
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_string() {
assert_eq!(shannon_entropy(""), 0.0);
assert_eq!(normalized_entropy(""), 0.0);
}
#[test]
fn test_uniform_string() {
assert!((shannon_entropy("aaaa") - 0.0).abs() < 0.001);
assert!((shannon_entropy("XXXXXXXX") - 0.0).abs() < 0.001);
}
#[test]
fn test_binary_string() {
assert!((shannon_entropy("abab") - 1.0).abs() < 0.001);
assert!((shannon_entropy("aabb") - 1.0).abs() < 0.001);
}
#[test]
fn test_increasing_entropy() {
let e1 = shannon_entropy("aaaa");
let e2 = shannon_entropy("aabb");
let e3 = shannon_entropy("abcd");
assert!(e1 < e2);
assert!(e2 < e3);
}
#[test]
fn test_english_text() {
let text = "The quick brown fox jumps over the lazy dog";
let entropy = shannon_entropy(text);
assert!(entropy > 3.0);
assert!(entropy < 5.0);
}
#[test]
fn test_base64_like() {
let base64 = "SGVsbG8gV29ybGQhIQ==";
let entropy = shannon_entropy(base64);
assert!(entropy > 3.5);
}
#[test]
fn test_uuid_entropy() {
let uuid = "550e8400-e29b-41d4-a716-446655440000";
let entropy = shannon_entropy(uuid);
assert!(entropy > 3.0);
assert!(entropy < 5.0);
}
#[test]
fn test_normalized_entropy() {
assert!((normalized_entropy("a") - 0.0).abs() < 0.001);
let all_unique = "abcdefghijklmnop";
let norm = normalized_entropy(all_unique);
assert!(norm > 0.9);
}
#[test]
fn test_is_entropy_anomaly() {
assert!(!is_entropy_anomaly(4.0, 4.0, 1.0, 3.0));
assert!(is_entropy_anomaly(9.0, 4.0, 1.0, 3.0));
assert!(!is_entropy_anomaly(10.0, 4.0, 0.0, 3.0));
}
#[test]
fn test_entropy_z_score() {
assert!((entropy_z_score(4.0, 4.0, 1.0) - 0.0).abs() < 0.001);
assert!((entropy_z_score(5.0, 4.0, 1.0) - 1.0).abs() < 0.001);
assert!((entropy_z_score(2.0, 4.0, 1.0) - (-2.0)).abs() < 0.001);
assert_eq!(entropy_z_score(10.0, 4.0, 0.0), 0.0);
}
#[test]
fn test_high_entropy_random_looking() {
let random_like = "x7Kp9mNq2R5vL8jY";
let entropy = shannon_entropy(random_like);
assert!(entropy > 3.5);
}
#[test]
fn test_jwt_like_token() {
let jwt = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9";
let entropy = shannon_entropy(jwt);
assert!(entropy > 4.0);
}
}