use crate::statistical_tests::utils::convert_string;
use std::collections::HashMap;
pub fn get_shannon_entropy(text: &str) -> f64 {
let data = convert_string(text);
if data.is_empty() {
return 0.0;
}
let mut freq_map = HashMap::new();
for &c in &data {
*freq_map.entry(c).or_insert(0) += 1;
}
let text_len = data.len() as f64;
let mut entropy = 0.0;
for &count in freq_map.values() {
let probability = count as f64 / text_len;
entropy -= probability * probability.log2();
}
entropy
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_entropy_english_text() {
let text = "THEQUICKBROWNFOXJUMPSOVERTHELAZYDOG";
let entropy = get_shannon_entropy(text);
assert!(entropy > 3.5 && entropy < 5.0);
}
#[test]
fn test_entropy_repeated_text() {
let text = "AAAAAAAAAAAAAAAAAAAA";
let entropy = get_shannon_entropy(text);
assert!(entropy < 1.0);
}
}