shannon/lib.rs
1//! Shannon entropy calculation library.
2//!
3//! Provides functions for calculating Shannon entropy of byte sequences,
4//! useful for analyzing randomness and information density in data.
5
6use num_traits::{Float, FromPrimitive};
7
8/// Represents the type of entropy edge detected.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum EdgeType {
11 Rising,
12 Falling,
13}
14
15/// Represents a detected entropy edge in a sequence of entropy values.
16#[derive(Debug, Clone, Copy, PartialEq)]
17pub struct EntropyEdge<F> {
18 /// Block index where the edge was detected
19 pub block_index: usize,
20 /// Type of edge (rising or falling)
21 pub edge_type: EdgeType,
22 /// Normalized entropy value (0.0 to 1.0) at this edge
23 pub entropy: F,
24}
25
26/// Detects rising and falling edges in a sequence of entropy values.
27///
28/// Uses hysteresis to avoid spurious edge detection: a rising edge is only
29/// detected when entropy crosses above `high_threshold`, and a falling edge
30/// when it crosses below `low_threshold`.
31///
32/// # Arguments
33///
34/// * `entropy_values` - Slice of (block_index, entropy) tuples where entropy is in bits (0-8)
35/// * `high_threshold` - Normalized threshold (0.0-1.0) for detecting rising edges
36/// * `low_threshold` - Normalized threshold (0.0-1.0) for detecting falling edges
37///
38/// # Returns
39///
40/// A vector of detected entropy edges
41///
42/// # Example
43///
44/// ```
45/// use shannon::{detect_edges, EdgeType};
46///
47/// // Entropy values in bits (0-8): starts high, drops low
48/// let values = vec![(0, 7.8_f64), (1, 7.9), (2, 2.0), (3, 1.0)];
49/// let edges = detect_edges(&values, 0.95, 0.85);
50/// assert_eq!(edges.len(), 2);
51/// assert_eq!(edges[0].edge_type, EdgeType::Rising);
52/// assert_eq!(edges[0].block_index, 0);
53/// assert_eq!(edges[1].edge_type, EdgeType::Falling);
54/// assert_eq!(edges[1].block_index, 2);
55/// ```
56pub fn detect_edges<F: Float + FromPrimitive>(
57 entropy_values: &[(usize, F)],
58 high_threshold: F,
59 low_threshold: F,
60) -> Vec<EntropyEdge<F>> {
61 let eight = F::from_f64(8.0).unwrap();
62 let mut edges = Vec::new();
63 let mut last_edge: Option<bool> = None;
64 let mut trigger_reset = true;
65
66 for &(block_index, entropy) in entropy_values {
67 let normalized = entropy / eight;
68
69 if (matches!(last_edge, None | Some(false)) && normalized > low_threshold)
70 || (matches!(last_edge, Some(true)) && normalized < high_threshold)
71 {
72 trigger_reset = true;
73 }
74
75 if trigger_reset && normalized >= high_threshold {
76 edges.push(EntropyEdge {
77 block_index,
78 edge_type: EdgeType::Rising,
79 entropy: normalized,
80 });
81 last_edge = Some(true);
82 trigger_reset = false;
83 } else if trigger_reset && normalized <= low_threshold {
84 edges.push(EntropyEdge {
85 block_index,
86 edge_type: EdgeType::Falling,
87 entropy: normalized,
88 });
89 last_edge = Some(false);
90 trigger_reset = false;
91 }
92 }
93
94 edges
95}
96
97/// Calculates the Shannon entropy of a byte slice.
98///
99/// Shannon entropy measures the average information content per byte,
100/// ranging from 0 (completely uniform) to 8 (maximum randomness).
101///
102/// # Arguments
103///
104/// * `data` - A byte slice to analyze
105///
106/// # Returns
107///
108/// The entropy value in bits per byte (0.0 to 8.0)
109///
110/// # Example
111///
112/// ## Calculating entropy of a Vec
113///
114/// ```
115/// use shannon::entropy;
116///
117/// let uniform_data = vec![0u8; 100];
118/// let e: f64 = entropy(&uniform_data);
119/// assert_eq!(e, 0.0);
120/// ```
121///
122/// ## Calculating entropy per character of a String
123///
124/// ```
125/// use shannon::entropy;
126///
127/// let text = String::from("AABB");
128/// let e: f64 = entropy(text.as_bytes());
129/// assert_eq!(e, 1.0);
130/// ```
131pub fn entropy<F: Float + FromPrimitive>(data: &[u8]) -> F {
132 let data_len = F::from_usize(data.len()).unwrap();
133 let mut counts = [0usize; 256];
134 for byte in data {
135 counts[*byte as usize] += 1;
136 }
137 let mut entropy = F::zero();
138 for count in counts {
139 if count == 0 {
140 continue;
141 }
142 let p_x = F::from_usize(count).unwrap() / data_len;
143 entropy = entropy - p_x * p_x.log2();
144 }
145 entropy
146}
147/// Calculates the total Shannon entropy of a byte slice.
148///
149/// Shannon entropy measures the average information content per byte,
150/// multiplied by the length this returns the total entropy of the byte slice.
151///
152/// # Arguments
153///
154/// * `data` - A byte slice to analyze
155///
156/// # Returns
157///
158/// The total entropy of data
159///
160/// # Example
161///
162/// ## Calculating total entropy of a String
163///
164/// ```
165/// use shannon::total_entropy;
166///
167/// let text = String::from("AABB");
168/// let e = total_entropy::<f64>(text.as_bytes());
169/// assert_eq!(e, 4.0);
170/// ```
171///
172pub fn total_entropy<F: Float + FromPrimitive>(data: &[u8]) -> F {
173 entropy::<F>(data) * (F::from_usize(data.len()).unwrap())
174}
175#[cfg(test)]
176mod test {
177 use super::*;
178 #[test]
179 fn skewed() {
180 let a = String::from("ABAB");
181 let b = String::from("AAAB");
182 let c = String::from("AAAAAB");
183 let s_a: f64 = entropy(a.as_bytes());
184 let s_b: f64 = entropy(b.as_bytes());
185 let s_c: f64 = entropy(c.as_bytes());
186 assert!(s_a > s_b);
187 assert!(s_b > s_c);
188 }
189 #[test]
190 fn same() {
191 let a = String::from("ABAB");
192 let b = String::from("AABB");
193 let s_a: f32 = entropy(a.as_bytes());
194 let s_b: f32 = entropy(b.as_bytes());
195 assert_eq!(s_a, s_b);
196 }
197 #[test]
198 fn exact2() {
199 let a = String::from("ABAB");
200 let s_a: f32 = entropy(a.as_bytes());
201 assert_eq!(s_a, 1.0);
202 }
203 #[test]
204 fn perm() {
205 let a = String::from("ABAB");
206 let b = String::from("CDCD");
207 let s_a: f32 = entropy(a.as_bytes());
208 let s_b = entropy::<f32>(b.as_bytes());
209 assert_eq!(s_a, s_b);
210 }
211 #[test]
212 fn expanding() {
213 let a = String::new();
214 let b = String::from("A");
215 let c = String::from("AB");
216 let d = String::from("ABCD");
217 let e = String::from("AAAAAAA");
218 let s_a: f32 = entropy(a.as_bytes());
219 let s_b: f32 = entropy(b.as_bytes());
220 let s_c: f32 = entropy(c.as_bytes());
221 let s_d: f32 = entropy(d.as_bytes());
222 let s_e: f32 = entropy(e.as_bytes());
223 assert_eq!(s_a, s_b);
224 assert!(s_b < s_c);
225 assert!(s_c < s_d);
226 assert_eq!(s_a, s_e);
227 }
228}