Skip to main content

epsilon_engine/
bootstrap_backend.rs

1//! Pillar: III. PACR field: Γ.
2//!
3//! Pluggable bootstrap backend abstraction for ε-machine confidence intervals.
4//!
5//! # Design
6//!
7//! [`BootstrapBackend`] is a zero-cost trait that decouples the CSSR inference
8//! pipeline from the specific resampling engine.  Today only [`CpuBootstrap`]
9//! is active.  On `genesis_node` (M1 Ultra) a [`MetalBootstrap`] stub compiles
10//! in; the stub delegates to [`CpuBootstrap`] until the Metal compute pipeline
11//! is implemented in `ets-probe-ffi`.
12//!
13//! # Contract
14//!
15//! `resample_and_estimate(data, b)` returns a `Vec<f64>` of length `2 × b`
16//! laid out as `[c_0, h_0, c_1, h_1, …, c_{b-1}, h_{b-1}]` where `c_i` is
17//! the `C_μ` estimate and `h_i` is the `h_μ` estimate for bootstrap replicate `i`.
18//!
19//! This flat layout avoids heap-allocating `b` tuples and maps naturally to
20//! SIMD-friendly memory access patterns for future Metal kernels.
21
22use crate::complexity::counts_to_probs;
23use crate::cssr::{run_cssr, CausalState};
24
25// ── BootstrapBackend trait ────────────────────────────────────────────────────
26
27/// Pluggable backend for parametric bootstrap resampling of ε-machine metrics.
28///
29/// Implementors must be stateless (or internally synchronised) so that
30/// `resample_and_estimate` can be called from multiple async tasks.
31pub trait BootstrapBackend {
32    /// Run `b` bootstrap replicates on `data` and return interleaved
33    /// `[c_0, h_0, c_1, h_1, …]` estimates (length = `2 × b`).
34    ///
35    /// The caller is responsible for extracting percentiles from the returned
36    /// vector.
37    fn resample_and_estimate(&self, data: &[u8], b: usize) -> Vec<f64>;
38}
39
40// ── CpuBootstrap ──────────────────────────────────────────────────────────────
41
42/// CPU-based parametric bootstrap (the existing B=200 implementation).
43///
44/// Each replicate resamples each causal state's pooled emission counts from
45/// its empirical distribution using an Xorshift64 RNG, then recomputes
46/// (`C_μ`, `h_μ`).  The implementation mirrors [`complexity::bootstrap_ci`] but
47/// exposes the raw sample vector rather than aggregated percentiles.
48#[derive(Debug, Clone)]
49pub struct CpuBootstrap {
50    /// Maximum CSSR history depth L.
51    pub max_depth: usize,
52    /// KS significance level α.
53    pub alpha: f64,
54    /// Discrete alphabet size |A|.
55    pub alphabet_size: usize,
56}
57
58impl CpuBootstrap {
59    /// Create with explicit parameters.
60    #[must_use]
61    pub fn new(max_depth: usize, alpha: f64, alphabet_size: usize) -> Self {
62        Self {
63            max_depth,
64            alpha,
65            alphabet_size,
66        }
67    }
68}
69
70impl Default for CpuBootstrap {
71    fn default() -> Self {
72        Self {
73            max_depth: 4,
74            alpha: 0.001,
75            alphabet_size: 2,
76        }
77    }
78}
79
80impl BootstrapBackend for CpuBootstrap {
81    fn resample_and_estimate(&self, data: &[u8], b: usize) -> Vec<f64> {
82        if data.is_empty() || b == 0 {
83            return Vec::new();
84        }
85        let result = run_cssr(data, self.alphabet_size, self.max_depth, self.alpha);
86        if result.states.is_empty() {
87            return Vec::new();
88        }
89
90        // Empirical stationary distribution (visit counts per state).
91        let pi = empirical_pi(&result.states, data, result.max_depth);
92
93        let mut rng = Xorshift64::new(0xdead_beef_cafe_1234);
94        let mut out = Vec::with_capacity(2 * b);
95
96        for _ in 0..b {
97            let boot_states = resample_states(&result.states, &mut rng);
98            let (c, h) = compute_ch(&boot_states, &pi);
99            out.push(c);
100            out.push(h);
101        }
102        out
103    }
104}
105
106// ── MetalBootstrap (genesis_node stub) ───────────────────────────────────────
107
108/// GPU-accelerated bootstrap stub (M1 Ultra Metal compute pipeline).
109///
110/// Currently delegates to [`CpuBootstrap`].  When the Metal kernel is
111/// implemented in `ets-probe-ffi`, replace the body of
112/// `resample_and_estimate` with the Metal dispatch.
113#[cfg(feature = "genesis_node")]
114#[derive(Debug, Clone)]
115pub struct MetalBootstrap {
116    inner: CpuBootstrap,
117}
118
119#[cfg(feature = "genesis_node")]
120impl MetalBootstrap {
121    /// Create with explicit parameters.
122    #[must_use]
123    pub fn new(max_depth: usize, alpha: f64, alphabet_size: usize) -> Self {
124        Self {
125            inner: CpuBootstrap::new(max_depth, alpha, alphabet_size),
126        }
127    }
128}
129
130#[cfg(feature = "genesis_node")]
131impl Default for MetalBootstrap {
132    fn default() -> Self {
133        Self {
134            inner: CpuBootstrap::default(),
135        }
136    }
137}
138
139#[cfg(feature = "genesis_node")]
140impl BootstrapBackend for MetalBootstrap {
141    fn resample_and_estimate(&self, data: &[u8], b: usize) -> Vec<f64> {
142        // TODO: dispatch to Metal compute pipeline once ets-probe-ffi is ready.
143        self.inner.resample_and_estimate(data, b)
144    }
145}
146
147// ── Internal helpers ──────────────────────────────────────────────────────────
148
149/// Empirical stationary distribution: fraction of positions (past `max_depth`)
150/// assigned to each causal state.
151fn empirical_pi(states: &[CausalState], symbols: &[u8], max_depth: usize) -> Vec<f64> {
152    let k = states.len();
153    let mut visits = vec![0u64; k];
154    let n = symbols.len();
155
156    // Build a lookup: history bytes → state id.
157    let mut assignment: std::collections::HashMap<Vec<u8>, usize> =
158        std::collections::HashMap::new();
159    for s in states {
160        for h in &s.histories {
161            assignment.insert(h.clone(), s.id);
162        }
163    }
164
165    for pos in max_depth..n {
166        let mut assigned = false;
167        for d in (1..=max_depth).rev() {
168            let hist = &symbols[pos - d..pos];
169            if let Some(&sid) = assignment.get(hist) {
170                visits[sid] += 1;
171                assigned = true;
172                break;
173            }
174        }
175        if !assigned {
176            visits[0] += 1;
177        }
178    }
179    let total: u64 = visits.iter().sum();
180    if total == 0 {
181        return vec![1.0 / k as f64; k];
182    }
183    visits.iter().map(|&v| v as f64 / total as f64).collect()
184}
185
186/// Parametric resample: for each state, draw `total` new counts from its
187/// empirical distribution.
188fn resample_states(states: &[CausalState], rng: &mut Xorshift64) -> Vec<CausalState> {
189    states
190        .iter()
191        .map(|s| {
192            let total: u32 = s.pooled.iter().sum();
193            let probs = counts_to_probs(&s.pooled);
194            let mut new_counts = vec![0u32; s.pooled.len()];
195            for _ in 0..total {
196                let sym = rng.sample_categorical(&probs);
197                new_counts[sym] += 1;
198            }
199            CausalState {
200                id: s.id,
201                pooled: new_counts,
202                histories: s.histories.clone(),
203            }
204        })
205        .collect()
206}
207
208/// Compute (`C_μ`, `h_μ`) from resampled states and a fixed π.
209fn compute_ch(states: &[CausalState], pi: &[f64]) -> (f64, f64) {
210    // C_μ = H[π] = -Σ π_i log2(π_i)
211    let c_mu: f64 = pi
212        .iter()
213        .filter(|&&p| p > 1e-300)
214        .map(|&p| -p * p.log2())
215        .sum();
216    // h_μ = Σ π_i H[emission dist of state i]
217    let h_mu: f64 = states
218        .iter()
219        .zip(pi.iter())
220        .map(|(s, &pi_i)| {
221            let probs = counts_to_probs(&s.pooled);
222            let h: f64 = probs
223                .iter()
224                .filter(|&&p| p > 1e-300)
225                .map(|&p| -p * p.log2())
226                .sum();
227            pi_i * h
228        })
229        .sum();
230    (c_mu, h_mu)
231}
232
233// ── Minimal RNG ───────────────────────────────────────────────────────────────
234
235struct Xorshift64(u64);
236
237impl Xorshift64 {
238    fn new(seed: u64) -> Self {
239        Self(if seed == 0 {
240            0xcafe_babe_1234_5678
241        } else {
242            seed
243        })
244    }
245
246    fn next_u64(&mut self) -> u64 {
247        self.0 ^= self.0 << 13;
248        self.0 ^= self.0 >> 7;
249        self.0 ^= self.0 << 17;
250        self.0
251    }
252
253    fn next_f64(&mut self) -> f64 {
254        (self.next_u64() >> 11) as f64 / (1u64 << 53) as f64
255    }
256
257    fn sample_categorical(&mut self, probs: &[f64]) -> usize {
258        let u = self.next_f64();
259        let mut cum = 0.0;
260        for (i, &p) in probs.iter().enumerate() {
261            cum += p;
262            if u < cum {
263                return i;
264            }
265        }
266        probs.len() - 1
267    }
268}
269
270// ── Tests ─────────────────────────────────────────────────────────────────────
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275    use crate::test_utils::gen_even_process;
276
277    #[test]
278    fn cpu_bootstrap_empty_input_returns_empty() {
279        let backend = CpuBootstrap::default();
280        assert!(backend.resample_and_estimate(&[], 200).is_empty());
281    }
282
283    #[test]
284    fn cpu_bootstrap_zero_b_returns_empty() {
285        let backend = CpuBootstrap::default();
286        let data: Vec<u8> = gen_even_process(500, 1);
287        assert!(backend.resample_and_estimate(&data, 0).is_empty());
288    }
289
290    #[test]
291    fn cpu_bootstrap_output_length_is_2b() {
292        let backend = CpuBootstrap::default();
293        let data = gen_even_process(1000, 42);
294        let out = backend.resample_and_estimate(&data, 20);
295        assert_eq!(out.len(), 40, "expected 2×b=40 entries");
296    }
297
298    #[test]
299    fn cpu_bootstrap_c_mu_non_negative() {
300        let backend = CpuBootstrap::default();
301        let data = gen_even_process(2000, 7);
302        let out = backend.resample_and_estimate(&data, 50);
303        for i in (0..out.len()).step_by(2) {
304            assert!(out[i] >= 0.0, "C_μ[{}]={} < 0", i / 2, out[i]);
305        }
306    }
307
308    #[test]
309    fn cpu_bootstrap_h_mu_non_negative() {
310        let backend = CpuBootstrap::default();
311        let data = gen_even_process(2000, 13);
312        let out = backend.resample_and_estimate(&data, 50);
313        for i in (1..out.len()).step_by(2) {
314            assert!(out[i] >= 0.0, "h_μ[{}]={} < 0", i / 2, out[i]);
315        }
316    }
317
318    #[test]
319    fn xorshift_output_in_unit_interval() {
320        let mut rng = Xorshift64::new(99);
321        for _ in 0..1000 {
322            let v = rng.next_f64();
323            assert!((0.0..1.0).contains(&v));
324        }
325    }
326
327    #[cfg(feature = "genesis_node")]
328    #[test]
329    fn metal_bootstrap_delegates_to_cpu() {
330        let cpu = CpuBootstrap::default();
331        let metal = MetalBootstrap::default();
332        let data = gen_even_process(500, 5);
333        let cpu_out = cpu.resample_and_estimate(&data, 10);
334        let metal_out = metal.resample_and_estimate(&data, 10);
335        // Both must produce the same length output (same backend, different RNG seed ok).
336        assert_eq!(cpu_out.len(), metal_out.len());
337    }
338}