epsilon_engine/
complexity.rs1use crate::cssr::{CausalState, CssrResult};
18use pacr_types::Estimate;
19
20#[must_use]
24pub fn entropy(probs: &[f64]) -> f64 {
25 probs
26 .iter()
27 .filter(|&&p| p > 1e-300)
28 .map(|&p| -p * p.log2())
29 .sum()
30}
31
32#[must_use]
34pub fn counts_to_probs(counts: &[u32]) -> Vec<f64> {
35 let total: u32 = counts.iter().sum();
36 if total == 0 {
37 let n = counts.len() as f64;
38 return vec![1.0 / n; counts.len()];
39 }
40 counts
41 .iter()
42 .map(|&c| f64::from(c) / f64::from(total))
43 .collect()
44}
45
46#[must_use]
52pub fn stationary_distribution(result: &CssrResult, symbols: &[u8]) -> Vec<f64> {
53 let k = result.states.len();
54 let mut visits = vec![0u64; k];
55 let n = symbols.len();
56 let depth = result.max_depth;
57
58 for pos in depth..n {
59 let mut assigned = false;
61 for d in (1..=depth).rev() {
62 let hist = &symbols[pos - d..pos];
63 if let Some(&sid) = result.assignment.get(hist) {
64 visits[sid] += 1;
65 assigned = true;
66 break;
67 }
68 }
69 if !assigned {
70 visits[0] += 1; }
72 }
73
74 let total: u64 = visits.iter().sum();
75 if total == 0 {
76 return vec![1.0 / k as f64; k];
77 }
78 visits.iter().map(|&v| v as f64 / total as f64).collect()
79}
80
81#[must_use]
85pub fn compute_metrics(states: &[CausalState], pi: &[f64]) -> (f64, f64) {
86 let c_mu = entropy(pi);
87 let h_mu: f64 = states
88 .iter()
89 .zip(pi.iter())
90 .map(|(s, &pi_i)| {
91 let probs = counts_to_probs(&s.pooled);
92 pi_i * entropy(&probs)
93 })
94 .sum();
95 (c_mu, h_mu)
96}
97
98struct Xorshift64(u64);
102
103impl Xorshift64 {
104 fn new(seed: u64) -> Self {
105 Self(if seed == 0 {
106 0xcafe_babe_dead_beef
107 } else {
108 seed
109 })
110 }
111
112 fn next_u64(&mut self) -> u64 {
113 self.0 ^= self.0 << 13;
114 self.0 ^= self.0 >> 7;
115 self.0 ^= self.0 << 17;
116 self.0
117 }
118
119 fn next_f64(&mut self) -> f64 {
120 (self.next_u64() >> 11) as f64 / (1u64 << 53) as f64
121 }
122
123 fn sample_categorical(&mut self, probs: &[f64]) -> usize {
125 let u = self.next_f64();
126 let mut cum = 0.0;
127 for (i, &p) in probs.iter().enumerate() {
128 cum += p;
129 if u < cum {
130 return i;
131 }
132 }
133 probs.len() - 1 }
135}
136
137#[allow(clippy::cast_precision_loss)]
148#[must_use]
149pub fn bootstrap_ci(
150 result: &CssrResult,
151 symbols: &[u8],
152 b: usize,
153) -> (Estimate<f64>, Estimate<f64>) {
154 let pi = stationary_distribution(result, symbols);
155 let (c_mu_point, h_mu_point) = compute_metrics(&result.states, &pi);
156
157 let mut rng = Xorshift64::new(0xdeadbeef_12345678);
158 let mut c_samples = Vec::with_capacity(b);
159 let mut h_samples = Vec::with_capacity(b);
160
161 for _ in 0..b {
162 let boot_states: Vec<crate::cssr::CausalState> = result
164 .states
165 .iter()
166 .map(|s| {
167 let total: u32 = s.pooled.iter().sum();
168 let probs = counts_to_probs(&s.pooled);
169 let mut new_counts = vec![0u32; s.pooled.len()];
170 for _ in 0..total {
171 let sym = rng.sample_categorical(&probs);
172 new_counts[sym] += 1;
173 }
174 crate::cssr::CausalState {
175 id: s.id,
176 pooled: new_counts,
177 histories: s.histories.clone(),
178 }
179 })
180 .collect();
181
182 let (c, h) = compute_metrics(&boot_states, &pi);
183 c_samples.push(c);
184 h_samples.push(h);
185 }
186
187 c_samples.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
188 h_samples.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
189
190 let lo_idx = (b as f64 * 0.025) as usize;
191 let hi_idx = (b as f64 * 0.975) as usize;
192 let hi_idx = hi_idx.min(b - 1);
193
194 (
195 Estimate {
196 point: c_mu_point,
197 lower: c_samples[lo_idx],
198 upper: c_samples[hi_idx],
199 },
200 Estimate {
201 point: h_mu_point,
202 lower: h_samples[lo_idx],
203 upper: h_samples[hi_idx],
204 },
205 )
206}
207
208#[cfg(test)]
211mod tests {
212 use super::*;
213
214 #[test]
215 fn entropy_uniform_two_symbols() {
216 let p = vec![0.5, 0.5];
217 let h = entropy(&p);
218 assert!((h - 1.0).abs() < 1e-10, "H([0.5,0.5]) = 1.0 bit, got {h}");
219 }
220
221 #[test]
222 fn entropy_deterministic_is_zero() {
223 let p = vec![1.0, 0.0];
224 let h = entropy(&p);
225 assert!(h.abs() < 1e-10, "H([1,0]) = 0, got {h}");
226 }
227
228 #[test]
229 fn entropy_one_third_two_thirds() {
230 let p = vec![1.0 / 3.0, 2.0 / 3.0];
231 let h = entropy(&p);
232 assert!((h - 0.9183).abs() < 0.001, "H([1/3,2/3]) ≈ 0.9183, got {h}");
234 }
235
236 #[test]
237 fn counts_to_probs_normalises() {
238 let counts = vec![3u32, 1];
239 let p = counts_to_probs(&counts);
240 assert!((p[0] - 0.75).abs() < 1e-10);
241 assert!((p[1] - 0.25).abs() < 1e-10);
242 }
243
244 #[test]
245 fn counts_to_probs_all_zero_uniform() {
246 let counts = vec![0u32, 0, 0];
247 let p = counts_to_probs(&counts);
248 for &pi in &p {
249 assert!((pi - 1.0 / 3.0).abs() < 1e-10);
250 }
251 }
252
253 #[test]
254 fn xorshift_produces_values_in_range() {
255 let mut rng = Xorshift64::new(42);
256 for _ in 0..1000 {
257 let v = rng.next_f64();
258 assert!((0.0..1.0).contains(&v), "out of [0,1): {v}");
259 }
260 }
261}