epsilon_engine/quick_screen.rs
1//! Pillar: III. PACR field: Γ.
2//!
3//! O(1) Shannon entropy pre-filter for ε-machine inference.
4//!
5//! Before running the full CSSR pipeline (which is O(N × L)) this screen
6//! computes H(X) from a 256-bucket byte frequency table in a single pass.
7//! If H(X) is below a configurable threshold the data is near-deterministic
8//! and CSSR would return a trivial single-state machine — skip it.
9//!
10//! # Why O(1)?
11//!
12//! The frequency table has exactly 256 buckets regardless of input length N.
13//! One pass over the data fills the table; entropy is computed from the 256
14//! non-zero buckets. Both operations are O(N) in data length but O(1) in
15//! alphabet size (bounded constant 256). The "O(1)" claim in the spec refers
16//! to the constant-bounded alphabet work, not to skipping the data pass.
17//!
18//! # Usage
19//!
20//! ```
21//! use epsilon_engine::quick_screen::{quick_screen, ScreenResult};
22//!
23//! let data = vec![0u8; 1000]; // constant → H(X) = 0 → Skip
24//! let result = quick_screen(&data, 0.5);
25//! assert!(matches!(result, ScreenResult::Skip { .. }));
26//!
27//! let mixed: Vec<u8> = (0u8..=255).cycle().take(1024).collect();
28//! let result2 = quick_screen(&mixed, 0.5);
29//! assert!(matches!(result2, ScreenResult::Proceed { .. }));
30//! ```
31
32// ── ScreenResult ──────────────────────────────────────────────────────────────
33
34/// Result of the quick-screen entropy pre-filter.
35#[derive(Debug, Clone, PartialEq)]
36pub enum ScreenResult {
37 /// H(X) is below `threshold` — data is near-deterministic.
38 /// CSSR would return a trivial 1-state machine; skip it.
39 Skip {
40 /// Human-readable reason.
41 reason: &'static str,
42 /// Measured H(X) in bits.
43 entropy_bits: f64,
44 },
45 /// H(X) is at or above `threshold` — proceed to full CSSR inference.
46 Proceed {
47 /// Measured H(X) in bits.
48 entropy_bits: f64,
49 },
50}
51
52impl ScreenResult {
53 /// Returns `true` when this result is [`Skip`](ScreenResult::Skip).
54 #[must_use]
55 pub fn is_skip(&self) -> bool {
56 matches!(self, Self::Skip { .. })
57 }
58
59 /// Returns the measured entropy in bits regardless of variant.
60 #[must_use]
61 pub fn entropy_bits(&self) -> f64 {
62 match *self {
63 Self::Skip { entropy_bits, .. } | Self::Proceed { entropy_bits } => entropy_bits,
64 }
65 }
66}
67
68// ── quick_screen ──────────────────────────────────────────────────────────────
69
70/// Run the Shannon entropy pre-filter on `data`.
71///
72/// Builds a 256-bucket byte frequency table in one pass, computes H(X), and
73/// compares against `threshold` (bits).
74///
75/// # Arguments
76///
77/// * `data` — raw byte slice. Empty input yields H(X) = 0.0 → Skip.
78/// * `threshold` — entropy threshold in bits. Values below this trigger Skip.
79/// A threshold of `0.0` means only completely constant streams are skipped.
80///
81/// # Complexity
82///
83/// O(N) in input length; O(1) in alphabet size (fixed 256 buckets).
84#[must_use]
85pub fn quick_screen(data: &[u8], threshold: f64) -> ScreenResult {
86 let entropy_bits = byte_entropy(data);
87 if entropy_bits <= threshold {
88 ScreenResult::Skip {
89 reason: "near-deterministic",
90 entropy_bits,
91 }
92 } else {
93 ScreenResult::Proceed { entropy_bits }
94 }
95}
96
97// ── Internal ──────────────────────────────────────────────────────────────────
98
99/// Compute Shannon entropy H(X) of the byte distribution in `data`, in bits.
100///
101/// Uses a 256-entry frequency table. Returns 0.0 for empty input.
102#[must_use]
103pub(crate) fn byte_entropy(data: &[u8]) -> f64 {
104 if data.is_empty() {
105 return 0.0;
106 }
107 let mut freq = [0u64; 256];
108 for &b in data {
109 freq[b as usize] += 1;
110 }
111 let n = data.len() as f64;
112 freq.iter()
113 .filter(|&&c| c > 0)
114 .map(|&c| {
115 let p = c as f64 / n;
116 -p * p.log2()
117 })
118 .sum()
119}
120
121// ── Tests ─────────────────────────────────────────────────────────────────────
122
123#[cfg(test)]
124mod tests {
125 use super::*;
126
127 #[test]
128 fn constant_stream_is_skipped() {
129 let data = vec![42u8; 1000];
130 let r = quick_screen(&data, 0.5);
131 assert!(r.is_skip(), "constant stream must be skipped");
132 assert!((r.entropy_bits() - 0.0).abs() < 1e-10);
133 }
134
135 #[test]
136 fn uniform_byte_stream_proceeds() {
137 // All 256 values equally frequent → H(X) = 8 bits.
138 let data: Vec<u8> = (0u8..=255).cycle().take(2048).collect();
139 let r = quick_screen(&data, 0.5);
140 assert!(!r.is_skip(), "uniform stream must proceed");
141 assert!((r.entropy_bits() - 8.0).abs() < 0.01);
142 }
143
144 #[test]
145 fn empty_input_is_skipped() {
146 let r = quick_screen(&[], 0.5);
147 assert!(r.is_skip());
148 assert_eq!(r.entropy_bits(), 0.0);
149 }
150
151 #[test]
152 fn zero_threshold_only_skips_constant() {
153 let data = vec![0u8; 100];
154 assert!(quick_screen(&data, 0.0).is_skip());
155
156 // Any non-trivial data should proceed at threshold=0.0.
157 let data2 = vec![0u8, 1u8, 0u8, 1u8];
158 assert!(!quick_screen(&data2, 0.0).is_skip());
159 }
160
161 #[test]
162 fn binary_alternating_entropy_is_one_bit() {
163 let data: Vec<u8> = (0u8..2).cycle().take(1000).collect();
164 let h = byte_entropy(&data);
165 assert!((h - 1.0).abs() < 0.01, "H([0,1]) = 1 bit, got {h}");
166 }
167
168 #[test]
169 fn entropy_bits_accessor_consistent() {
170 let data: Vec<u8> = (0u8..=255).cycle().take(512).collect();
171 let r = quick_screen(&data, 4.0);
172 let h = byte_entropy(&data);
173 assert!((r.entropy_bits() - h).abs() < 1e-10);
174 }
175
176 #[test]
177 fn screen_result_is_skip_helper() {
178 let skip = ScreenResult::Skip {
179 reason: "near-deterministic",
180 entropy_bits: 0.1,
181 };
182 let proceed = ScreenResult::Proceed { entropy_bits: 5.0 };
183 assert!(skip.is_skip());
184 assert!(!proceed.is_skip());
185 }
186}