epsilon_engine/
bootstrap_backend.rs1use crate::complexity::counts_to_probs;
23use crate::cssr::{run_cssr, CausalState};
24
25pub trait BootstrapBackend {
32 fn resample_and_estimate(&self, data: &[u8], b: usize) -> Vec<f64>;
38}
39
40#[derive(Debug, Clone)]
49pub struct CpuBootstrap {
50 pub max_depth: usize,
52 pub alpha: f64,
54 pub alphabet_size: usize,
56}
57
58impl CpuBootstrap {
59 #[must_use]
61 pub fn new(max_depth: usize, alpha: f64, alphabet_size: usize) -> Self {
62 Self {
63 max_depth,
64 alpha,
65 alphabet_size,
66 }
67 }
68}
69
70impl Default for CpuBootstrap {
71 fn default() -> Self {
72 Self {
73 max_depth: 4,
74 alpha: 0.001,
75 alphabet_size: 2,
76 }
77 }
78}
79
80impl BootstrapBackend for CpuBootstrap {
81 fn resample_and_estimate(&self, data: &[u8], b: usize) -> Vec<f64> {
82 if data.is_empty() || b == 0 {
83 return Vec::new();
84 }
85 let result = run_cssr(data, self.alphabet_size, self.max_depth, self.alpha);
86 if result.states.is_empty() {
87 return Vec::new();
88 }
89
90 let pi = empirical_pi(&result.states, data, result.max_depth);
92
93 let mut rng = Xorshift64::new(0xdead_beef_cafe_1234);
94 let mut out = Vec::with_capacity(2 * b);
95
96 for _ in 0..b {
97 let boot_states = resample_states(&result.states, &mut rng);
98 let (c, h) = compute_ch(&boot_states, &pi);
99 out.push(c);
100 out.push(h);
101 }
102 out
103 }
104}
105
106#[cfg(feature = "genesis_node")]
114#[derive(Debug, Clone)]
115pub struct MetalBootstrap {
116 inner: CpuBootstrap,
117}
118
119#[cfg(feature = "genesis_node")]
120impl MetalBootstrap {
121 #[must_use]
123 pub fn new(max_depth: usize, alpha: f64, alphabet_size: usize) -> Self {
124 Self {
125 inner: CpuBootstrap::new(max_depth, alpha, alphabet_size),
126 }
127 }
128}
129
130#[cfg(feature = "genesis_node")]
131impl Default for MetalBootstrap {
132 fn default() -> Self {
133 Self {
134 inner: CpuBootstrap::default(),
135 }
136 }
137}
138
139#[cfg(feature = "genesis_node")]
140impl BootstrapBackend for MetalBootstrap {
141 fn resample_and_estimate(&self, data: &[u8], b: usize) -> Vec<f64> {
142 self.inner.resample_and_estimate(data, b)
144 }
145}
146
147fn empirical_pi(states: &[CausalState], symbols: &[u8], max_depth: usize) -> Vec<f64> {
152 let k = states.len();
153 let mut visits = vec![0u64; k];
154 let n = symbols.len();
155
156 let mut assignment: std::collections::HashMap<Vec<u8>, usize> =
158 std::collections::HashMap::new();
159 for s in states {
160 for h in &s.histories {
161 assignment.insert(h.clone(), s.id);
162 }
163 }
164
165 for pos in max_depth..n {
166 let mut assigned = false;
167 for d in (1..=max_depth).rev() {
168 let hist = &symbols[pos - d..pos];
169 if let Some(&sid) = assignment.get(hist) {
170 visits[sid] += 1;
171 assigned = true;
172 break;
173 }
174 }
175 if !assigned {
176 visits[0] += 1;
177 }
178 }
179 let total: u64 = visits.iter().sum();
180 if total == 0 {
181 return vec![1.0 / k as f64; k];
182 }
183 visits.iter().map(|&v| v as f64 / total as f64).collect()
184}
185
186fn resample_states(states: &[CausalState], rng: &mut Xorshift64) -> Vec<CausalState> {
189 states
190 .iter()
191 .map(|s| {
192 let total: u32 = s.pooled.iter().sum();
193 let probs = counts_to_probs(&s.pooled);
194 let mut new_counts = vec![0u32; s.pooled.len()];
195 for _ in 0..total {
196 let sym = rng.sample_categorical(&probs);
197 new_counts[sym] += 1;
198 }
199 CausalState {
200 id: s.id,
201 pooled: new_counts,
202 histories: s.histories.clone(),
203 }
204 })
205 .collect()
206}
207
208fn compute_ch(states: &[CausalState], pi: &[f64]) -> (f64, f64) {
210 let c_mu: f64 = pi
212 .iter()
213 .filter(|&&p| p > 1e-300)
214 .map(|&p| -p * p.log2())
215 .sum();
216 let h_mu: f64 = states
218 .iter()
219 .zip(pi.iter())
220 .map(|(s, &pi_i)| {
221 let probs = counts_to_probs(&s.pooled);
222 let h: f64 = probs
223 .iter()
224 .filter(|&&p| p > 1e-300)
225 .map(|&p| -p * p.log2())
226 .sum();
227 pi_i * h
228 })
229 .sum();
230 (c_mu, h_mu)
231}
232
233struct Xorshift64(u64);
236
237impl Xorshift64 {
238 fn new(seed: u64) -> Self {
239 Self(if seed == 0 {
240 0xcafe_babe_1234_5678
241 } else {
242 seed
243 })
244 }
245
246 fn next_u64(&mut self) -> u64 {
247 self.0 ^= self.0 << 13;
248 self.0 ^= self.0 >> 7;
249 self.0 ^= self.0 << 17;
250 self.0
251 }
252
253 fn next_f64(&mut self) -> f64 {
254 (self.next_u64() >> 11) as f64 / (1u64 << 53) as f64
255 }
256
257 fn sample_categorical(&mut self, probs: &[f64]) -> usize {
258 let u = self.next_f64();
259 let mut cum = 0.0;
260 for (i, &p) in probs.iter().enumerate() {
261 cum += p;
262 if u < cum {
263 return i;
264 }
265 }
266 probs.len() - 1
267 }
268}
269
270#[cfg(test)]
273mod tests {
274 use super::*;
275 use crate::test_utils::gen_even_process;
276
277 #[test]
278 fn cpu_bootstrap_empty_input_returns_empty() {
279 let backend = CpuBootstrap::default();
280 assert!(backend.resample_and_estimate(&[], 200).is_empty());
281 }
282
283 #[test]
284 fn cpu_bootstrap_zero_b_returns_empty() {
285 let backend = CpuBootstrap::default();
286 let data: Vec<u8> = gen_even_process(500, 1);
287 assert!(backend.resample_and_estimate(&data, 0).is_empty());
288 }
289
290 #[test]
291 fn cpu_bootstrap_output_length_is_2b() {
292 let backend = CpuBootstrap::default();
293 let data = gen_even_process(1000, 42);
294 let out = backend.resample_and_estimate(&data, 20);
295 assert_eq!(out.len(), 40, "expected 2×b=40 entries");
296 }
297
298 #[test]
299 fn cpu_bootstrap_c_mu_non_negative() {
300 let backend = CpuBootstrap::default();
301 let data = gen_even_process(2000, 7);
302 let out = backend.resample_and_estimate(&data, 50);
303 for i in (0..out.len()).step_by(2) {
304 assert!(out[i] >= 0.0, "C_μ[{}]={} < 0", i / 2, out[i]);
305 }
306 }
307
308 #[test]
309 fn cpu_bootstrap_h_mu_non_negative() {
310 let backend = CpuBootstrap::default();
311 let data = gen_even_process(2000, 13);
312 let out = backend.resample_and_estimate(&data, 50);
313 for i in (1..out.len()).step_by(2) {
314 assert!(out[i] >= 0.0, "h_μ[{}]={} < 0", i / 2, out[i]);
315 }
316 }
317
318 #[test]
319 fn xorshift_output_in_unit_interval() {
320 let mut rng = Xorshift64::new(99);
321 for _ in 0..1000 {
322 let v = rng.next_f64();
323 assert!((0.0..1.0).contains(&v));
324 }
325 }
326
327 #[cfg(feature = "genesis_node")]
328 #[test]
329 fn metal_bootstrap_delegates_to_cpu() {
330 let cpu = CpuBootstrap::default();
331 let metal = MetalBootstrap::default();
332 let data = gen_even_process(500, 5);
333 let cpu_out = cpu.resample_and_estimate(&data, 10);
334 let metal_out = metal.resample_and_estimate(&data, 10);
335 assert_eq!(cpu_out.len(), metal_out.len());
337 }
338}