cipher_identifier/statistical_tests/shannon_entropy.rs
1//! Shannon Entropy Test
2//!
3//! This module implements the Shannon Entropy statistical test,
4//! which measures the information content or unpredictability of the text.
5
6use crate::statistical_tests::utils::convert_string;
7use std::collections::HashMap;
8
9/// Calculates the Shannon Entropy for the given text
10///
11/// Shannon Entropy is a measure of the unpredictability or information content
12/// in a message. Higher entropy indicates more randomness or unpredictability.
13///
14/// # Arguments
15///
16/// * `text` - The input text to analyze
17///
18/// # Returns
19///
20/// The Shannon Entropy value
21///
22/// # Examples
23///
24/// ```
25/// use cipher_identifier::statistical_tests::shannon_entropy::get_shannon_entropy;
26///
27/// let text = "HELLOWORLD";
28/// let entropy = get_shannon_entropy(text);
29/// assert!(entropy > 0.0);
30/// ```
31pub fn get_shannon_entropy(text: &str) -> f64 {
32 let data = convert_string(text);
33
34 if data.is_empty() {
35 return 0.0;
36 }
37
38 // Count frequency of each character
39 let mut freq_map = HashMap::new();
40 for &c in &data {
41 *freq_map.entry(c).or_insert(0) += 1;
42 }
43
44 // Calculate entropy
45 let text_len = data.len() as f64;
46 let mut entropy = 0.0;
47
48 for &count in freq_map.values() {
49 let probability = count as f64 / text_len;
50 entropy -= probability * probability.log2();
51 }
52
53 entropy
54}
55
56#[cfg(test)]
57mod tests {
58 use super::*;
59
60 #[test]
61 fn test_entropy_english_text() {
62 // English text typically has entropy around 4.0-4.5
63 let text = "THEQUICKBROWNFOXJUMPSOVERTHELAZYDOG";
64 let entropy = get_shannon_entropy(text);
65 assert!(entropy > 3.5 && entropy < 5.0);
66 }
67
68 #[test]
69 fn test_entropy_repeated_text() {
70 // Repeated text has lower entropy
71 let text = "AAAAAAAAAAAAAAAAAAAA";
72 let entropy = get_shannon_entropy(text);
73 assert!(entropy < 1.0);
74 }
75}