1#![forbid(unsafe_code)]
32#![deny(clippy::all, clippy::pedantic)]
33#![allow(
34 clippy::cast_precision_loss,
35 clippy::cast_possible_truncation,
36 clippy::cast_sign_loss,
37 clippy::similar_names,
38 clippy::doc_markdown,
39 clippy::must_use_candidate,
40 clippy::needless_pass_by_value,
41 clippy::unreadable_literal,
42 clippy::missing_panics_doc,
43 clippy::missing_errors_doc,
44 clippy::doc_overindented_list_items,
45 clippy::ptr_arg
46)]
47
48pub mod bootstrap_backend;
49pub mod complexity;
50pub mod cssr;
51pub mod quick_screen;
52pub mod symbolize;
53
54pub use symbolize::WordSymbolizer;
56
57use complexity::{bootstrap_ci, compute_metrics, stationary_distribution};
58use cssr::run_cssr;
59use pacr_types::CognitiveSplit;
60
61#[derive(Debug, Clone)]
65pub struct Config {
66 pub max_depth: usize,
68 pub alpha: f64,
70 pub bootstrap_b: usize,
72 pub alphabet_size: usize,
74}
75
76impl Default for Config {
77 fn default() -> Self {
78 Self {
79 max_depth: 4,
80 alpha: 0.001,
81 bootstrap_b: 200,
82 alphabet_size: 2,
83 }
84 }
85}
86
87#[derive(Debug, Clone)]
89pub struct InferResult {
90 pub cognitive_split: CognitiveSplit,
92 pub num_states: usize,
94}
95
96#[must_use]
110pub fn infer(symbols: &[u8], cfg: Config) -> InferResult {
111 if symbols.is_empty() {
112 return InferResult {
113 cognitive_split: zero_split(),
114 num_states: 1,
115 };
116 }
117 let result = run_cssr(symbols, cfg.alphabet_size, cfg.max_depth, cfg.alpha);
118 let num_states = result.states.len();
119 let (c_mu, h_mu) = bootstrap_ci(&result, symbols, cfg.bootstrap_b);
120 InferResult {
121 cognitive_split: CognitiveSplit {
122 statistical_complexity: c_mu,
123 entropy_rate: h_mu,
124 },
125 num_states,
126 }
127}
128
129#[must_use]
131pub fn infer_fast(symbols: &[u8], cfg: Config) -> InferResult {
132 if symbols.is_empty() {
133 return InferResult {
134 cognitive_split: zero_split(),
135 num_states: 1,
136 };
137 }
138 let result = run_cssr(symbols, cfg.alphabet_size, cfg.max_depth, cfg.alpha);
139 let pi = stationary_distribution(&result, symbols);
140 let (c_point, h_point) = compute_metrics(&result.states, &pi);
141 let num_states = result.states.len();
142 InferResult {
143 cognitive_split: CognitiveSplit {
144 statistical_complexity: pacr_types::Estimate::exact(c_point),
145 entropy_rate: pacr_types::Estimate::exact(h_point),
146 },
147 num_states,
148 }
149}
150
151fn zero_split() -> CognitiveSplit {
152 CognitiveSplit {
153 statistical_complexity: pacr_types::Estimate::exact(0.0),
154 entropy_rate: pacr_types::Estimate::exact(0.0),
155 }
156}
157
158#[must_use]
173pub fn empirical_entropy_rate(symbols: &[u8], alphabet_size: usize) -> f64 {
174 if symbols.is_empty() {
175 return 0.0;
176 }
177
178 let mut counts = vec![0_u64; alphabet_size];
179 for &sym in symbols {
180 if (sym as usize) < alphabet_size {
181 counts[sym as usize] += 1;
182 }
183 }
184
185 let n = symbols.len() as f64;
186 let mut h = 0.0;
187 for &count in &counts {
188 if count > 0 {
189 let p = count as f64 / n;
190 h -= p * p.log2();
191 }
192 }
193 h
194}
195
196#[cfg(test)]
199mod test_utils {
200 pub struct TestRng(u64);
202
203 impl TestRng {
204 pub fn new(seed: u64) -> Self {
205 Self(if seed == 0 {
206 0xdead_beef_cafe_babe
207 } else {
208 seed
209 })
210 }
211
212 pub fn next_u64(&mut self) -> u64 {
213 self.0 ^= self.0 << 13;
214 self.0 ^= self.0 >> 7;
215 self.0 ^= self.0 << 17;
216 self.0
217 }
218
219 pub fn next_f64(&mut self) -> f64 {
220 (self.next_u64() >> 11) as f64 / (1u64 << 53) as f64
221 }
222 }
223
224 pub fn gen_even_process(n: usize, seed: u64) -> Vec<u8> {
235 let mut rng = TestRng::new(seed);
236 let mut symbols = Vec::with_capacity(n);
237 let mut state = 0u8;
238 for _ in 0..n {
239 let u = rng.next_f64();
240 let (sym, next) = if state == 0 {
241 if u < 2.0 / 3.0 {
242 (0u8, 0u8)
243 } else {
244 (1u8, 1u8)
245 }
246 } else {
247 if u < 1.0 / 3.0 {
248 (0u8, 0u8)
249 } else {
250 (1u8, 1u8)
251 }
252 };
253 symbols.push(sym);
254 state = next;
255 }
256 symbols
257 }
258
259 pub fn gen_golden_mean(n: usize, seed: u64) -> Vec<u8> {
275 let mut rng = TestRng::new(seed);
276 let mut symbols = Vec::with_capacity(n);
277 let mut state = 1u8; for _ in 0..n {
279 let u = rng.next_f64();
280 let (sym, next) = if state == 0 {
281 (0u8, 1u8)
283 } else {
284 if u < 0.5 {
286 (0u8, 1u8)
287 } else {
288 (1u8, 0u8)
289 }
290 };
291 symbols.push(sym);
292 state = next;
293 }
294 symbols
295 }
296}
297
298#[cfg(test)]
301mod tests {
302 use super::test_utils::{gen_even_process, gen_golden_mean};
303 use super::*;
304 use approx::assert_relative_eq;
305
306 #[test]
309 fn infer_empty_does_not_panic() {
310 let result = infer(&[], Config::default());
311 assert_eq!(result.num_states, 1);
312 assert!(result.cognitive_split.statistical_complexity.point >= 0.0);
313 }
314
315 #[test]
316 fn infer_single_symbol_stream() {
317 let symbols = vec![0u8; 1000];
318 let result = infer(
319 &symbols,
320 Config {
321 max_depth: 2,
322 ..Config::default()
323 },
324 );
325 assert_eq!(result.num_states, 1, "constant stream → 1 state");
326 assert!(result.cognitive_split.entropy_rate.point < 0.05);
327 }
328
329 #[test]
330 fn infer_alternating_stream_two_states() {
331 let symbols: Vec<u8> = (0..2000).map(|i| (i % 2) as u8).collect();
332 let cfg = Config {
333 max_depth: 2,
334 alpha: 0.001,
335 ..Config::default()
336 };
337 let result = infer_fast(&symbols, cfg);
338 assert_eq!(result.num_states, 2, "alternating → 2 states");
339 }
340
341 #[test]
347 fn kat_even_process_state_count() {
348 let seq = gen_even_process(10_000, 42);
349 let cfg = Config {
350 max_depth: 2,
351 alpha: 0.001,
352 bootstrap_b: 200,
353 alphabet_size: 2,
354 };
355 let result = infer_fast(&seq, cfg);
356 assert_eq!(
357 result.num_states, 2,
358 "Even Process must infer exactly 2 states, got {}",
359 result.num_states
360 );
361 }
362
363 #[test]
364 fn kat_even_process_complexity() {
365 let seq = gen_even_process(10_000, 42);
366 let cfg = Config {
367 max_depth: 2,
368 alpha: 0.001,
369 bootstrap_b: 200,
370 alphabet_size: 2,
371 };
372 let result = infer(&seq, cfg);
373 let c = &result.cognitive_split.statistical_complexity;
374 assert_relative_eq!(c.point, 1.0, epsilon = 0.05);
376 assert!(c.lower <= c.point + 1e-9 && c.point <= c.upper + 1e-9);
378 }
379
380 #[test]
381 fn kat_even_process_entropy_rate() {
382 let seq = gen_even_process(10_000, 42);
383 let cfg = Config {
384 max_depth: 2,
385 alpha: 0.001,
386 bootstrap_b: 200,
387 alphabet_size: 2,
388 };
389 let result = infer(&seq, cfg);
390 let h = &result.cognitive_split.entropy_rate;
391 assert_relative_eq!(h.point, 0.9183, epsilon = 0.05);
393 assert!(h.lower <= h.point + 1e-9 && h.point <= h.upper + 1e-9);
394 }
395
396 #[test]
405 fn kat_golden_mean_state_count() {
406 let seq = gen_golden_mean(10_000, 99);
407 let cfg = Config {
408 max_depth: 2,
409 alpha: 0.001,
410 bootstrap_b: 200,
411 alphabet_size: 2,
412 };
413 let result = infer_fast(&seq, cfg);
414 assert_eq!(
415 result.num_states, 2,
416 "Golden Mean must infer exactly 2 states, got {}",
417 result.num_states
418 );
419 }
420
421 #[test]
422 fn kat_golden_mean_complexity() {
423 let seq = gen_golden_mean(10_000, 99);
424 let cfg = Config {
425 max_depth: 2,
426 alpha: 0.001,
427 bootstrap_b: 200,
428 alphabet_size: 2,
429 };
430 let result = infer(&seq, cfg);
431 let c = &result.cognitive_split.statistical_complexity;
432 assert_relative_eq!(c.point, 0.9183, epsilon = 0.05);
434 assert!(c.lower <= c.point + 1e-9 && c.point <= c.upper + 1e-9);
435 }
436
437 #[test]
438 fn kat_golden_mean_entropy_rate() {
439 let seq = gen_golden_mean(10_000, 99);
440 let cfg = Config {
441 max_depth: 2,
442 alpha: 0.001,
443 bootstrap_b: 200,
444 alphabet_size: 2,
445 };
446 let result = infer(&seq, cfg);
447 let h = &result.cognitive_split.entropy_rate;
448 assert_relative_eq!(h.point, 0.6792, epsilon = 0.05);
450 assert!(h.lower <= h.point + 1e-9 && h.point <= h.upper + 1e-9);
451 }
452
453 #[test]
456 fn large_sequence_does_not_oom() {
457 let seq: Vec<u8> = (0..50_000u64)
460 .map(|i| {
461 (i.wrapping_mul(6364136223846793005)
462 .wrapping_add(1442695040888963407)
463 % 4) as u8
464 })
465 .collect();
466 let cfg = Config {
467 max_depth: 4,
468 alpha: 0.001,
469 bootstrap_b: 10,
470 alphabet_size: 4,
471 };
472 let result = infer_fast(&seq, cfg);
473 assert!(result.num_states >= 1);
474 assert!(result.cognitive_split.statistical_complexity.point >= 0.0);
475 }
476}
477
478#[cfg(test)]
481mod prop_tests {
482 use super::test_utils::{gen_even_process, gen_golden_mean};
483 use super::*;
484 use proptest::prelude::*;
485
486 proptest! {
487 #[test]
489 fn complexity_always_non_negative(seed in 0u64..u64::MAX, n in 200usize..1000usize) {
490 let seq = gen_even_process(n, seed);
491 let cfg = Config { max_depth: 2, bootstrap_b: 5, ..Config::default() };
492 let r = infer_fast(&seq, cfg);
493 prop_assert!(r.cognitive_split.statistical_complexity.point >= 0.0);
494 }
495
496 #[test]
498 fn entropy_rate_always_non_negative(seed in 0u64..u64::MAX, n in 200usize..1000usize) {
499 let seq = gen_even_process(n, seed);
500 let cfg = Config { max_depth: 2, bootstrap_b: 5, ..Config::default() };
501 let r = infer_fast(&seq, cfg);
502 prop_assert!(r.cognitive_split.entropy_rate.point >= 0.0);
503 }
504
505 #[test]
507 fn at_least_one_state(seed in 0u64..u64::MAX, n in 50usize..500usize) {
508 let seq = gen_golden_mean(n, seed);
509 let cfg = Config { max_depth: 2, bootstrap_b: 5, ..Config::default() };
510 let r = infer_fast(&seq, cfg);
511 prop_assert!(r.num_states >= 1);
512 }
513
514 #[test]
516 fn ci_bounds_ordered(seed in 0u64..u64::MAX, n in 500usize..2000usize) {
517 let seq = gen_even_process(n, seed);
518 let cfg = Config { max_depth: 2, bootstrap_b: 20, ..Config::default() };
519 let r = infer(&seq, cfg);
520 let c = &r.cognitive_split.statistical_complexity;
521 let h = &r.cognitive_split.entropy_rate;
522 prop_assert!(c.lower <= c.point, "C lower > point");
523 prop_assert!(c.point <= c.upper, "C point > upper");
524 prop_assert!(h.lower <= h.point, "H lower > point");
525 prop_assert!(h.point <= h.upper, "H point > upper");
526 }
527 }
528}