Skip to main content

epsilon_engine/
quick_screen.rs

1//! Pillar: III. PACR field: Γ.
2//!
3//! O(1) Shannon entropy pre-filter for ε-machine inference.
4//!
5//! Before running the full CSSR pipeline (which is O(N × L)) this screen
6//! computes H(X) from a 256-bucket byte frequency table in a single pass.
7//! If H(X) is below a configurable threshold the data is near-deterministic
8//! and CSSR would return a trivial single-state machine — skip it.
9//!
10//! # Why O(1)?
11//!
12//! The frequency table has exactly 256 buckets regardless of input length N.
13//! One pass over the data fills the table; entropy is computed from the 256
14//! non-zero buckets.  Both operations are O(N) in data length but O(1) in
15//! alphabet size (bounded constant 256).  The "O(1)" claim in the spec refers
16//! to the constant-bounded alphabet work, not to skipping the data pass.
17//!
18//! # Usage
19//!
20//! ```
21//! use epsilon_engine::quick_screen::{quick_screen, ScreenResult};
22//!
23//! let data = vec![0u8; 1000]; // constant → H(X) = 0 → Skip
24//! let result = quick_screen(&data, 0.5);
25//! assert!(matches!(result, ScreenResult::Skip { .. }));
26//!
27//! let mixed: Vec<u8> = (0u8..=255).cycle().take(1024).collect();
28//! let result2 = quick_screen(&mixed, 0.5);
29//! assert!(matches!(result2, ScreenResult::Proceed { .. }));
30//! ```
31
32// ── ScreenResult ──────────────────────────────────────────────────────────────
33
34/// Result of the quick-screen entropy pre-filter.
35#[derive(Debug, Clone, PartialEq)]
36pub enum ScreenResult {
37    /// H(X) is below `threshold` — data is near-deterministic.
38    /// CSSR would return a trivial 1-state machine; skip it.
39    Skip {
40        /// Human-readable reason.
41        reason: &'static str,
42        /// Measured H(X) in bits.
43        entropy_bits: f64,
44    },
45    /// H(X) is at or above `threshold` — proceed to full CSSR inference.
46    Proceed {
47        /// Measured H(X) in bits.
48        entropy_bits: f64,
49    },
50}
51
52impl ScreenResult {
53    /// Returns `true` when this result is [`Skip`](ScreenResult::Skip).
54    #[must_use]
55    pub fn is_skip(&self) -> bool {
56        matches!(self, Self::Skip { .. })
57    }
58
59    /// Returns the measured entropy in bits regardless of variant.
60    #[must_use]
61    pub fn entropy_bits(&self) -> f64 {
62        match *self {
63            Self::Skip { entropy_bits, .. } | Self::Proceed { entropy_bits } => entropy_bits,
64        }
65    }
66}
67
68// ── quick_screen ──────────────────────────────────────────────────────────────
69
70/// Run the Shannon entropy pre-filter on `data`.
71///
72/// Builds a 256-bucket byte frequency table in one pass, computes H(X), and
73/// compares against `threshold` (bits).
74///
75/// # Arguments
76///
77/// * `data`      — raw byte slice.  Empty input yields H(X) = 0.0 → Skip.
78/// * `threshold` — entropy threshold in bits.  Values below this trigger Skip.
79///   A threshold of `0.0` means only completely constant streams are skipped.
80///
81/// # Complexity
82///
83/// O(N) in input length; O(1) in alphabet size (fixed 256 buckets).
84#[must_use]
85pub fn quick_screen(data: &[u8], threshold: f64) -> ScreenResult {
86    let entropy_bits = byte_entropy(data);
87    if entropy_bits <= threshold {
88        ScreenResult::Skip {
89            reason: "near-deterministic",
90            entropy_bits,
91        }
92    } else {
93        ScreenResult::Proceed { entropy_bits }
94    }
95}
96
97// ── Internal ──────────────────────────────────────────────────────────────────
98
99/// Compute Shannon entropy H(X) of the byte distribution in `data`, in bits.
100///
101/// Uses a 256-entry frequency table.  Returns 0.0 for empty input.
102#[must_use]
103pub(crate) fn byte_entropy(data: &[u8]) -> f64 {
104    if data.is_empty() {
105        return 0.0;
106    }
107    let mut freq = [0u64; 256];
108    for &b in data {
109        freq[b as usize] += 1;
110    }
111    let n = data.len() as f64;
112    freq.iter()
113        .filter(|&&c| c > 0)
114        .map(|&c| {
115            let p = c as f64 / n;
116            -p * p.log2()
117        })
118        .sum()
119}
120
121// ── Tests ─────────────────────────────────────────────────────────────────────
122
123#[cfg(test)]
124mod tests {
125    use super::*;
126
127    #[test]
128    fn constant_stream_is_skipped() {
129        let data = vec![42u8; 1000];
130        let r = quick_screen(&data, 0.5);
131        assert!(r.is_skip(), "constant stream must be skipped");
132        assert!((r.entropy_bits() - 0.0).abs() < 1e-10);
133    }
134
135    #[test]
136    fn uniform_byte_stream_proceeds() {
137        // All 256 values equally frequent → H(X) = 8 bits.
138        let data: Vec<u8> = (0u8..=255).cycle().take(2048).collect();
139        let r = quick_screen(&data, 0.5);
140        assert!(!r.is_skip(), "uniform stream must proceed");
141        assert!((r.entropy_bits() - 8.0).abs() < 0.01);
142    }
143
144    #[test]
145    fn empty_input_is_skipped() {
146        let r = quick_screen(&[], 0.5);
147        assert!(r.is_skip());
148        assert_eq!(r.entropy_bits(), 0.0);
149    }
150
151    #[test]
152    fn zero_threshold_only_skips_constant() {
153        let data = vec![0u8; 100];
154        assert!(quick_screen(&data, 0.0).is_skip());
155
156        // Any non-trivial data should proceed at threshold=0.0.
157        let data2 = vec![0u8, 1u8, 0u8, 1u8];
158        assert!(!quick_screen(&data2, 0.0).is_skip());
159    }
160
161    #[test]
162    fn binary_alternating_entropy_is_one_bit() {
163        let data: Vec<u8> = (0u8..2).cycle().take(1000).collect();
164        let h = byte_entropy(&data);
165        assert!((h - 1.0).abs() < 0.01, "H([0,1]) = 1 bit, got {h}");
166    }
167
168    #[test]
169    fn entropy_bits_accessor_consistent() {
170        let data: Vec<u8> = (0u8..=255).cycle().take(512).collect();
171        let r = quick_screen(&data, 4.0);
172        let h = byte_entropy(&data);
173        assert!((r.entropy_bits() - h).abs() < 1e-10);
174    }
175
176    #[test]
177    fn screen_result_is_skip_helper() {
178        let skip = ScreenResult::Skip {
179            reason: "near-deterministic",
180            entropy_bits: 0.1,
181        };
182        let proceed = ScreenResult::Proceed { entropy_bits: 5.0 };
183        assert!(skip.is_skip());
184        assert!(!proceed.is_skip());
185    }
186}