shannon/
lib.rs

1//! Shannon entropy calculation library.
2//!
3//! Provides functions for calculating Shannon entropy of byte sequences,
4//! useful for analyzing randomness and information density in data.
5
6use num_traits::{Float, FromPrimitive};
7
8/// Represents the type of entropy edge detected.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum EdgeType {
11    Rising,
12    Falling,
13}
14
15/// Represents a detected entropy edge in a sequence of entropy values.
16#[derive(Debug, Clone, Copy, PartialEq)]
17pub struct EntropyEdge<F> {
18    /// Block index where the edge was detected
19    pub block_index: usize,
20    /// Type of edge (rising or falling)
21    pub edge_type: EdgeType,
22    /// Normalized entropy value (0.0 to 1.0) at this edge
23    pub entropy: F,
24}
25
26/// Detects rising and falling edges in a sequence of entropy values.
27///
28/// Uses hysteresis to avoid spurious edge detection: a rising edge is only
29/// detected when entropy crosses above `high_threshold`, and a falling edge
30/// when it crosses below `low_threshold`.
31///
32/// # Arguments
33///
34/// * `entropy_values` - Slice of (block_index, entropy) tuples where entropy is in bits (0-8)
35/// * `high_threshold` - Normalized threshold (0.0-1.0) for detecting rising edges
36/// * `low_threshold` - Normalized threshold (0.0-1.0) for detecting falling edges
37///
38/// # Returns
39///
40/// A vector of detected entropy edges
41///
42/// # Example
43///
44/// ```
45/// use shannon::{detect_edges, EdgeType};
46///
47/// // Entropy values in bits (0-8): starts high, drops low
48/// let values = vec![(0, 7.8_f64), (1, 7.9), (2, 2.0), (3, 1.0)];
49/// let edges = detect_edges(&values, 0.95, 0.85);
50/// assert_eq!(edges.len(), 2);
51/// assert_eq!(edges[0].edge_type, EdgeType::Rising);
52/// assert_eq!(edges[0].block_index, 0);
53/// assert_eq!(edges[1].edge_type, EdgeType::Falling);
54/// assert_eq!(edges[1].block_index, 2);
55/// ```
56pub fn detect_edges<F: Float + FromPrimitive>(
57    entropy_values: &[(usize, F)],
58    high_threshold: F,
59    low_threshold: F,
60) -> Vec<EntropyEdge<F>> {
61    let eight = F::from_f64(8.0).unwrap();
62    let mut edges = Vec::new();
63    let mut last_edge: Option<bool> = None;
64    let mut trigger_reset = true;
65
66    for &(block_index, entropy) in entropy_values {
67        let normalized = entropy / eight;
68
69        if (matches!(last_edge, None | Some(false)) && normalized > low_threshold)
70            || (matches!(last_edge, Some(true)) && normalized < high_threshold)
71        {
72            trigger_reset = true;
73        }
74
75        if trigger_reset && normalized >= high_threshold {
76            edges.push(EntropyEdge {
77                block_index,
78                edge_type: EdgeType::Rising,
79                entropy: normalized,
80            });
81            last_edge = Some(true);
82            trigger_reset = false;
83        } else if trigger_reset && normalized <= low_threshold {
84            edges.push(EntropyEdge {
85                block_index,
86                edge_type: EdgeType::Falling,
87                entropy: normalized,
88            });
89            last_edge = Some(false);
90            trigger_reset = false;
91        }
92    }
93
94    edges
95}
96
97/// Calculates the Shannon entropy of a byte slice.
98///
99/// Shannon entropy measures the average information content per byte,
100/// ranging from 0 (completely uniform) to 8 (maximum randomness).
101///
102/// # Arguments
103///
104/// * `data` - A byte slice to analyze
105///
106/// # Returns
107///
108/// The entropy value in bits per byte (0.0 to 8.0)
109///
110/// # Example
111///
112/// ## Calculating entropy of a Vec
113///
114/// ```
115/// use shannon::entropy;
116///
117/// let uniform_data = vec![0u8; 100];
118/// let e: f64 = entropy(&uniform_data);
119/// assert_eq!(e, 0.0);
120/// ```
121///
122/// ## Calculating entropy per character of a String
123///
124/// ```
125/// use shannon::entropy;
126///
127/// let text = String::from("AABB");
128/// let e: f64 = entropy(text.as_bytes());
129/// assert_eq!(e, 1.0);
130/// ```
131pub fn entropy<F: Float + FromPrimitive>(data: &[u8]) -> F {
132    let data_len = F::from_usize(data.len()).unwrap();
133    let mut counts = [0usize; 256];
134    for byte in data {
135        counts[*byte as usize] += 1;
136    }
137    let mut entropy = F::zero();
138    for count in counts {
139        if count == 0 {
140            continue;
141        }
142        let p_x = F::from_usize(count).unwrap() / data_len;
143        entropy = entropy - p_x * p_x.log2();
144    }
145    entropy
146}
147/// Calculates the total Shannon entropy of a byte slice.
148///
149/// Shannon entropy measures the average information content per byte,
150/// multiplied by the length this returns the total entropy of the byte slice.
151///
152/// # Arguments
153///
154/// * `data` - A byte slice to analyze
155///
156/// # Returns
157///
158/// The total entropy of data
159///
160/// # Example
161///
162/// ## Calculating total entropy of a String
163///
164/// ```
165/// use shannon::total_entropy;
166///
167/// let text = String::from("AABB");
168/// let e = total_entropy::<f64>(text.as_bytes());
169/// assert_eq!(e, 4.0);
170/// ```
171///
172pub fn total_entropy<F: Float + FromPrimitive>(data: &[u8]) -> F {
173    entropy::<F>(data) * (F::from_usize(data.len()).unwrap())
174}
175#[cfg(test)]
176mod test {
177    use super::*;
178    #[test]
179    fn skewed() {
180        let a = String::from("ABAB");
181        let b = String::from("AAAB");
182        let c = String::from("AAAAAB");
183        let s_a: f64 = entropy(a.as_bytes());
184        let s_b: f64 = entropy(b.as_bytes());
185        let s_c: f64 = entropy(c.as_bytes());
186        assert!(s_a > s_b);
187        assert!(s_b > s_c);
188    }
189    #[test]
190    fn same() {
191        let a = String::from("ABAB");
192        let b = String::from("AABB");
193        let s_a: f32 = entropy(a.as_bytes());
194        let s_b: f32 = entropy(b.as_bytes());
195        assert_eq!(s_a, s_b);
196    }
197    #[test]
198    fn exact2() {
199        let a = String::from("ABAB");
200        let s_a: f32 = entropy(a.as_bytes());
201        assert_eq!(s_a, 1.0);
202    }
203    #[test]
204    fn perm() {
205        let a = String::from("ABAB");
206        let b = String::from("CDCD");
207        let s_a: f32 = entropy(a.as_bytes());
208        let s_b = entropy::<f32>(b.as_bytes());
209        assert_eq!(s_a, s_b);
210    }
211    #[test]
212    fn expanding() {
213        let a = String::new();
214        let b = String::from("A");
215        let c = String::from("AB");
216        let d = String::from("ABCD");
217        let e = String::from("AAAAAAA");
218        let s_a: f32 = entropy(a.as_bytes());
219        let s_b: f32 = entropy(b.as_bytes());
220        let s_c: f32 = entropy(c.as_bytes());
221        let s_d: f32 = entropy(d.as_bytes());
222        let s_e: f32 = entropy(e.as_bytes());
223        assert_eq!(s_a, s_b);
224        assert!(s_b < s_c);
225        assert!(s_c < s_d);
226        assert_eq!(s_a, s_e);
227    }
228}