cipher_identifier/statistical_tests/
shannon_entropy.rs

1//! Shannon Entropy Test
2//!
3//! This module implements the Shannon Entropy statistical test,
4//! which measures the information content or unpredictability of the text.
5
6use crate::statistical_tests::utils::convert_string;
7use std::collections::HashMap;
8
9/// Calculates the Shannon Entropy for the given text
10///
11/// Shannon Entropy is a measure of the unpredictability or information content
12/// in a message. Higher entropy indicates more randomness or unpredictability.
13///
14/// # Arguments
15///
16/// * `text` - The input text to analyze
17///
18/// # Returns
19///
20/// The Shannon Entropy value
21///
22/// # Examples
23///
24/// ```
25/// use cipher_identifier::statistical_tests::shannon_entropy::get_shannon_entropy;
26///
27/// let text = "HELLOWORLD";
28/// let entropy = get_shannon_entropy(text);
29/// assert!(entropy > 0.0);
30/// ```
31pub fn get_shannon_entropy(text: &str) -> f64 {
32    let data = convert_string(text);
33    
34    if data.is_empty() {
35        return 0.0;
36    }
37    
38    // Count frequency of each character
39    let mut freq_map = HashMap::new();
40    for &c in &data {
41        *freq_map.entry(c).or_insert(0) += 1;
42    }
43    
44    // Calculate entropy
45    let text_len = data.len() as f64;
46    let mut entropy = 0.0;
47    
48    for &count in freq_map.values() {
49        let probability = count as f64 / text_len;
50        entropy -= probability * probability.log2();
51    }
52    
53    entropy
54}
55
56#[cfg(test)]
57mod tests {
58    use super::*;
59
60    #[test]
61    fn test_entropy_english_text() {
62        // English text typically has entropy around 4.0-4.5
63        let text = "THEQUICKBROWNFOXJUMPSOVERTHELAZYDOG";
64        let entropy = get_shannon_entropy(text);
65        assert!(entropy > 3.5 && entropy < 5.0);
66    }
67
68    #[test]
69    fn test_entropy_repeated_text() {
70        // Repeated text has lower entropy
71        let text = "AAAAAAAAAAAAAAAAAAAA";
72        let entropy = get_shannon_entropy(text);
73        assert!(entropy < 1.0);
74    }
75}