shannon/
lib.rs

1//! Shannon entropy calculation library.
2//!
3//! Provides functions for calculating Shannon entropy of byte sequences,
4//! useful for analyzing randomness and information density in data.
5
6use num_traits::{Float, FromPrimitive};
7
8/// Calculates the Shannon entropy of a byte slice.
9///
10/// Shannon entropy measures the average information content per byte,
11/// ranging from 0 (completely uniform) to 8 (maximum randomness).
12///
13/// # Arguments
14///
15/// * `data` - A byte slice to analyze
16///
17/// # Returns
18///
19/// The entropy value in bits per byte (0.0 to 8.0)
20///
21/// # Example
22///
23/// ## Calculating entropy of a Vec
24///
25/// ```
26/// use shannon::entropy;
27///
28/// let uniform_data = vec![0u8; 100];
29/// let e: f64 = entropy(&uniform_data);
30/// assert_eq!(e, 0.0);
31/// ```
32///
33/// ## Calculating entropy per character of a String
34///
35/// ```
36/// use shannon::entropy;
37///
38/// let text = String::from("AABB");
39/// let e: f64 = entropy(text.as_bytes());
40/// assert_eq!(e, 1.0);
41/// ```
42pub fn entropy<F: Float + FromPrimitive>(data: &[u8]) -> F {
43    let data_len = F::from_usize(data.len()).unwrap();
44    let mut counts = [0usize; 256];
45    for byte in data {
46        counts[*byte as usize] += 1;
47    }
48    let mut entropy = F::zero();
49    for count in counts {
50        if count == 0 {
51            continue;
52        }
53        let p_x = F::from_usize(count).unwrap() / data_len;
54        entropy = entropy - p_x * p_x.log2();
55    }
56    entropy
57}
58/// Calculates the total Shannon entropy of a byte slice.
59///
60/// Shannon entropy measures the average information content per byte,
61/// multiplied by the length this returns the total entropy of the byte slice.
62///
63/// # Arguments
64///
65/// * `data` - A byte slice to analyze
66///
67/// # Returns
68///
69/// The total entropy of data
70///
71/// # Example
72///
73/// ## Calculating total entropy of a String
74///
75/// ```
76/// use shannon::total_entropy;
77///
78/// let text = String::from("AABB");
79/// let e = total_entropy::<f64>(text.as_bytes());
80/// assert_eq!(e, 4.0);
81/// ```
82///
83pub fn total_entropy<F: Float + FromPrimitive>(data: &[u8]) -> F {
84    entropy::<F>(data) * (F::from_usize(data.len()).unwrap())
85}
86#[cfg(test)]
87mod test {
88    use super::*;
89    #[test]
90    fn skewed() {
91        let a = String::from("ABAB");
92        let b = String::from("AAAB");
93        let c = String::from("AAAAAB");
94        let s_a: f64 = entropy(a.as_bytes());
95        let s_b: f64 = entropy(b.as_bytes());
96        let s_c: f64 = entropy(c.as_bytes());
97        assert!(s_a > s_b);
98        assert!(s_b > s_c);
99    }
100    #[test]
101    fn same() {
102        let a = String::from("ABAB");
103        let b = String::from("AABB");
104        let s_a: f32 = entropy(a.as_bytes());
105        let s_b: f32 = entropy(b.as_bytes());
106        assert_eq!(s_a, s_b);
107    }
108    #[test]
109    fn exact2() {
110        let a = String::from("ABAB");
111        let s_a: f32 = entropy(a.as_bytes());
112        assert_eq!(s_a, 1.0);
113    }
114    #[test]
115    fn perm() {
116        let a = String::from("ABAB");
117        let b = String::from("CDCD");
118        let s_a: f32 = entropy(a.as_bytes());
119        let s_b = entropy::<f32>(b.as_bytes());
120        assert_eq!(s_a, s_b);
121    }
122    #[test]
123    fn expanding() {
124        let a = String::new();
125        let b = String::from("A");
126        let c = String::from("AB");
127        let d = String::from("ABCD");
128        let e = String::from("AAAAAAA");
129        let s_a: f32 = entropy(a.as_bytes());
130        let s_b: f32 = entropy(b.as_bytes());
131        let s_c: f32 = entropy(c.as_bytes());
132        let s_d: f32 = entropy(d.as_bytes());
133        let s_e: f32 = entropy(e.as_bytes());
134        assert_eq!(s_a, s_b);
135        assert!(s_b < s_c);
136        assert!(s_c < s_d);
137        assert_eq!(s_a, s_e);
138    }
139}