Skip to main content

epsilon_engine/
complexity.rs

1//! Pillar: III. PACR field: Γ.
2//!
3//! Statistical complexity (`C_μ`) and entropy rate (`h_μ`) computation from a
4//! [`CssrResult`], plus bootstrap confidence intervals.
5//!
6//! ## Definitions
7//!
8//! Given the inferred causal states with stationary distribution π:
9//!
10//! ```text
11//! C_μ = H[π]           = -Σ_i π_i log₂ π_i   (statistical complexity, bits)
12//! h_μ = Σ_i π_i H[P(·|i)] = Σ_i π_i (-Σ_a P(a|i) log₂ P(a|i))  (entropy rate, bits/sym)
13//! ```
14//!
15//! Bootstrap CIs use B=200 parametric resamples of the state emission counts.
16
17use crate::cssr::{CausalState, CssrResult};
18use pacr_types::Estimate;
19
20// ── Information measures ──────────────────────────────────────────────────────
21
22/// Shannon entropy of a probability vector (in bits).  Zeros are skipped.
23#[must_use]
24pub fn entropy(probs: &[f64]) -> f64 {
25    probs
26        .iter()
27        .filter(|&&p| p > 1e-300)
28        .map(|&p| -p * p.log2())
29        .sum()
30}
31
32/// Convert a count vector to probability vector.  Returns uniform if all zero.
33#[must_use]
34pub fn counts_to_probs(counts: &[u32]) -> Vec<f64> {
35    let total: u32 = counts.iter().sum();
36    if total == 0 {
37        let n = counts.len() as f64;
38        return vec![1.0 / n; counts.len()];
39    }
40    counts
41        .iter()
42        .map(|&c| f64::from(c) / f64::from(total))
43        .collect()
44}
45
46// ── Stationary distribution ───────────────────────────────────────────────────
47
48/// Estimate the stationary distribution of causal states empirically from the
49/// symbol sequence.  Each position (past position `max_depth`) is assigned to
50/// the state of its longest matching history prefix.
51#[must_use]
52pub fn stationary_distribution(result: &CssrResult, symbols: &[u8]) -> Vec<f64> {
53    let k = result.states.len();
54    let mut visits = vec![0u64; k];
55    let n = symbols.len();
56    let depth = result.max_depth;
57
58    for pos in depth..n {
59        // Try longest matching history first, fall back to shorter.
60        let mut assigned = false;
61        for d in (1..=depth).rev() {
62            let hist = &symbols[pos - d..pos];
63            if let Some(&sid) = result.assignment.get(hist) {
64                visits[sid] += 1;
65                assigned = true;
66                break;
67            }
68        }
69        if !assigned {
70            visits[0] += 1; // fallback to state 0
71        }
72    }
73
74    let total: u64 = visits.iter().sum();
75    if total == 0 {
76        return vec![1.0 / k as f64; k];
77    }
78    visits.iter().map(|&v| v as f64 / total as f64).collect()
79}
80
81// ── C_μ and h_μ ───────────────────────────────────────────────────────────────
82
83/// Compute (`C_μ`, `h_μ`) from causal states and their stationary distribution.
84#[must_use]
85pub fn compute_metrics(states: &[CausalState], pi: &[f64]) -> (f64, f64) {
86    let c_mu = entropy(pi);
87    let h_mu: f64 = states
88        .iter()
89        .zip(pi.iter())
90        .map(|(s, &pi_i)| {
91            let probs = counts_to_probs(&s.pooled);
92            pi_i * entropy(&probs)
93        })
94        .sum();
95    (c_mu, h_mu)
96}
97
98// ── Minimal LCG RNG (no external deps) ───────────────────────────────────────
99
100/// 64-bit xorshift RNG.  Deterministic, fast, zero-allocation.
101struct Xorshift64(u64);
102
103impl Xorshift64 {
104    fn new(seed: u64) -> Self {
105        Self(if seed == 0 {
106            0xcafe_babe_dead_beef
107        } else {
108            seed
109        })
110    }
111
112    fn next_u64(&mut self) -> u64 {
113        self.0 ^= self.0 << 13;
114        self.0 ^= self.0 >> 7;
115        self.0 ^= self.0 << 17;
116        self.0
117    }
118
119    fn next_f64(&mut self) -> f64 {
120        (self.next_u64() >> 11) as f64 / (1u64 << 53) as f64
121    }
122
123    /// Sample multinomial: given `probs` (sum 1.0) return a symbol index.
124    fn sample_categorical(&mut self, probs: &[f64]) -> usize {
125        let u = self.next_f64();
126        let mut cum = 0.0;
127        for (i, &p) in probs.iter().enumerate() {
128            cum += p;
129            if u < cum {
130                return i;
131            }
132        }
133        probs.len() - 1 // floating-point rounding guard
134    }
135}
136
137// ── Bootstrap CI (B = 200) ────────────────────────────────────────────────────
138
139/// Parametric bootstrap confidence intervals for (`C_μ`, `h_μ`).
140///
141/// Each bootstrap replicate perturbs each state's pooled counts by resampling
142/// from its empirical distribution with the same total.  The 2.5th–97.5th
143/// percentile of the B=200 replicates forms the 95% CI.
144///
145/// Returns `(Estimate<f64> for C_μ, Estimate<f64> for h_μ)` where
146/// `point` is the empirical estimate and `lower`/`upper` are CI bounds.
147#[allow(clippy::cast_precision_loss)]
148#[must_use]
149pub fn bootstrap_ci(
150    result: &CssrResult,
151    symbols: &[u8],
152    b: usize,
153) -> (Estimate<f64>, Estimate<f64>) {
154    let pi = stationary_distribution(result, symbols);
155    let (c_mu_point, h_mu_point) = compute_metrics(&result.states, &pi);
156
157    let mut rng = Xorshift64::new(0xdeadbeef_12345678);
158    let mut c_samples = Vec::with_capacity(b);
159    let mut h_samples = Vec::with_capacity(b);
160
161    for _ in 0..b {
162        // Resample each state's emission counts.
163        let boot_states: Vec<crate::cssr::CausalState> = result
164            .states
165            .iter()
166            .map(|s| {
167                let total: u32 = s.pooled.iter().sum();
168                let probs = counts_to_probs(&s.pooled);
169                let mut new_counts = vec![0u32; s.pooled.len()];
170                for _ in 0..total {
171                    let sym = rng.sample_categorical(&probs);
172                    new_counts[sym] += 1;
173                }
174                crate::cssr::CausalState {
175                    id: s.id,
176                    pooled: new_counts,
177                    histories: s.histories.clone(),
178                }
179            })
180            .collect();
181
182        let (c, h) = compute_metrics(&boot_states, &pi);
183        c_samples.push(c);
184        h_samples.push(h);
185    }
186
187    c_samples.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
188    h_samples.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
189
190    let lo_idx = (b as f64 * 0.025) as usize;
191    let hi_idx = (b as f64 * 0.975) as usize;
192    let hi_idx = hi_idx.min(b - 1);
193
194    (
195        Estimate {
196            point: c_mu_point,
197            lower: c_samples[lo_idx],
198            upper: c_samples[hi_idx],
199        },
200        Estimate {
201            point: h_mu_point,
202            lower: h_samples[lo_idx],
203            upper: h_samples[hi_idx],
204        },
205    )
206}
207
208// ── Tests ─────────────────────────────────────────────────────────────────────
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213
214    #[test]
215    fn entropy_uniform_two_symbols() {
216        let p = vec![0.5, 0.5];
217        let h = entropy(&p);
218        assert!((h - 1.0).abs() < 1e-10, "H([0.5,0.5]) = 1.0 bit, got {h}");
219    }
220
221    #[test]
222    fn entropy_deterministic_is_zero() {
223        let p = vec![1.0, 0.0];
224        let h = entropy(&p);
225        assert!(h.abs() < 1e-10, "H([1,0]) = 0, got {h}");
226    }
227
228    #[test]
229    fn entropy_one_third_two_thirds() {
230        let p = vec![1.0 / 3.0, 2.0 / 3.0];
231        let h = entropy(&p);
232        // H(1/3, 2/3) ≈ 0.9183 bits
233        assert!((h - 0.9183).abs() < 0.001, "H([1/3,2/3]) ≈ 0.9183, got {h}");
234    }
235
236    #[test]
237    fn counts_to_probs_normalises() {
238        let counts = vec![3u32, 1];
239        let p = counts_to_probs(&counts);
240        assert!((p[0] - 0.75).abs() < 1e-10);
241        assert!((p[1] - 0.25).abs() < 1e-10);
242    }
243
244    #[test]
245    fn counts_to_probs_all_zero_uniform() {
246        let counts = vec![0u32, 0, 0];
247        let p = counts_to_probs(&counts);
248        for &pi in &p {
249            assert!((pi - 1.0 / 3.0).abs() < 1e-10);
250        }
251    }
252
253    #[test]
254    fn xorshift_produces_values_in_range() {
255        let mut rng = Xorshift64::new(42);
256        for _ in 0..1000 {
257            let v = rng.next_f64();
258            assert!((0.0..1.0).contains(&v), "out of [0,1): {v}");
259        }
260    }
261}