#![forbid(unsafe_code)]
#![deny(clippy::all, clippy::pedantic)]
#![allow(
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::similar_names,
clippy::doc_markdown,
clippy::must_use_candidate,
clippy::needless_pass_by_value,
clippy::unreadable_literal,
clippy::missing_panics_doc,
clippy::missing_errors_doc,
clippy::doc_overindented_list_items,
clippy::ptr_arg
)]
pub mod bootstrap_backend;
pub mod complexity;
pub mod cssr;
pub mod quick_screen;
pub mod symbolize;
pub use symbolize::WordSymbolizer;
use complexity::{bootstrap_ci, compute_metrics, stationary_distribution};
use cssr::run_cssr;
use pacr_types::CognitiveSplit;
#[derive(Debug, Clone)]
pub struct Config {
pub max_depth: usize,
pub alpha: f64,
pub bootstrap_b: usize,
pub alphabet_size: usize,
}
impl Default for Config {
fn default() -> Self {
Self {
max_depth: 4,
alpha: 0.001,
bootstrap_b: 200,
alphabet_size: 2,
}
}
}
#[derive(Debug, Clone)]
pub struct InferResult {
pub cognitive_split: CognitiveSplit,
pub num_states: usize,
}
#[must_use]
pub fn infer(symbols: &[u8], cfg: Config) -> InferResult {
if symbols.is_empty() {
return InferResult {
cognitive_split: zero_split(),
num_states: 1,
};
}
let result = run_cssr(symbols, cfg.alphabet_size, cfg.max_depth, cfg.alpha);
let num_states = result.states.len();
let (c_mu, h_mu) = bootstrap_ci(&result, symbols, cfg.bootstrap_b);
InferResult {
cognitive_split: CognitiveSplit {
statistical_complexity: c_mu,
entropy_rate: h_mu,
},
num_states,
}
}
#[must_use]
pub fn infer_fast(symbols: &[u8], cfg: Config) -> InferResult {
if symbols.is_empty() {
return InferResult {
cognitive_split: zero_split(),
num_states: 1,
};
}
let result = run_cssr(symbols, cfg.alphabet_size, cfg.max_depth, cfg.alpha);
let pi = stationary_distribution(&result, symbols);
let (c_point, h_point) = compute_metrics(&result.states, &pi);
let num_states = result.states.len();
InferResult {
cognitive_split: CognitiveSplit {
statistical_complexity: pacr_types::Estimate::exact(c_point),
entropy_rate: pacr_types::Estimate::exact(h_point),
},
num_states,
}
}
fn zero_split() -> CognitiveSplit {
CognitiveSplit {
statistical_complexity: pacr_types::Estimate::exact(0.0),
entropy_rate: pacr_types::Estimate::exact(0.0),
}
}
#[must_use]
pub fn empirical_entropy_rate(symbols: &[u8], alphabet_size: usize) -> f64 {
if symbols.is_empty() {
return 0.0;
}
let mut counts = vec![0_u64; alphabet_size];
for &sym in symbols {
if (sym as usize) < alphabet_size {
counts[sym as usize] += 1;
}
}
let n = symbols.len() as f64;
let mut h = 0.0;
for &count in &counts {
if count > 0 {
let p = count as f64 / n;
h -= p * p.log2();
}
}
h
}
#[cfg(test)]
mod test_utils {
pub struct TestRng(u64);
impl TestRng {
pub fn new(seed: u64) -> Self {
Self(if seed == 0 {
0xdead_beef_cafe_babe
} else {
seed
})
}
pub fn next_u64(&mut self) -> u64 {
self.0 ^= self.0 << 13;
self.0 ^= self.0 >> 7;
self.0 ^= self.0 << 17;
self.0
}
pub fn next_f64(&mut self) -> f64 {
(self.next_u64() >> 11) as f64 / (1u64 << 53) as f64
}
}
pub fn gen_even_process(n: usize, seed: u64) -> Vec<u8> {
let mut rng = TestRng::new(seed);
let mut symbols = Vec::with_capacity(n);
let mut state = 0u8;
for _ in 0..n {
let u = rng.next_f64();
let (sym, next) = if state == 0 {
if u < 2.0 / 3.0 {
(0u8, 0u8)
} else {
(1u8, 1u8)
}
} else {
if u < 1.0 / 3.0 {
(0u8, 0u8)
} else {
(1u8, 1u8)
}
};
symbols.push(sym);
state = next;
}
symbols
}
pub fn gen_golden_mean(n: usize, seed: u64) -> Vec<u8> {
let mut rng = TestRng::new(seed);
let mut symbols = Vec::with_capacity(n);
let mut state = 1u8; for _ in 0..n {
let u = rng.next_f64();
let (sym, next) = if state == 0 {
(0u8, 1u8)
} else {
if u < 0.5 {
(0u8, 1u8)
} else {
(1u8, 0u8)
}
};
symbols.push(sym);
state = next;
}
symbols
}
}
#[cfg(test)]
mod tests {
use super::test_utils::{gen_even_process, gen_golden_mean};
use super::*;
use approx::assert_relative_eq;
#[test]
fn infer_empty_does_not_panic() {
let result = infer(&[], Config::default());
assert_eq!(result.num_states, 1);
assert!(result.cognitive_split.statistical_complexity.point >= 0.0);
}
#[test]
fn infer_single_symbol_stream() {
let symbols = vec![0u8; 1000];
let result = infer(
&symbols,
Config {
max_depth: 2,
..Config::default()
},
);
assert_eq!(result.num_states, 1, "constant stream → 1 state");
assert!(result.cognitive_split.entropy_rate.point < 0.05);
}
#[test]
fn infer_alternating_stream_two_states() {
let symbols: Vec<u8> = (0..2000).map(|i| (i % 2) as u8).collect();
let cfg = Config {
max_depth: 2,
alpha: 0.001,
..Config::default()
};
let result = infer_fast(&symbols, cfg);
assert_eq!(result.num_states, 2, "alternating → 2 states");
}
#[test]
fn kat_even_process_state_count() {
let seq = gen_even_process(10_000, 42);
let cfg = Config {
max_depth: 2,
alpha: 0.001,
bootstrap_b: 200,
alphabet_size: 2,
};
let result = infer_fast(&seq, cfg);
assert_eq!(
result.num_states, 2,
"Even Process must infer exactly 2 states, got {}",
result.num_states
);
}
#[test]
fn kat_even_process_complexity() {
let seq = gen_even_process(10_000, 42);
let cfg = Config {
max_depth: 2,
alpha: 0.001,
bootstrap_b: 200,
alphabet_size: 2,
};
let result = infer(&seq, cfg);
let c = &result.cognitive_split.statistical_complexity;
assert_relative_eq!(c.point, 1.0, epsilon = 0.05);
assert!(c.lower <= c.point + 1e-9 && c.point <= c.upper + 1e-9);
}
#[test]
fn kat_even_process_entropy_rate() {
let seq = gen_even_process(10_000, 42);
let cfg = Config {
max_depth: 2,
alpha: 0.001,
bootstrap_b: 200,
alphabet_size: 2,
};
let result = infer(&seq, cfg);
let h = &result.cognitive_split.entropy_rate;
assert_relative_eq!(h.point, 0.9183, epsilon = 0.05);
assert!(h.lower <= h.point + 1e-9 && h.point <= h.upper + 1e-9);
}
#[test]
fn kat_golden_mean_state_count() {
let seq = gen_golden_mean(10_000, 99);
let cfg = Config {
max_depth: 2,
alpha: 0.001,
bootstrap_b: 200,
alphabet_size: 2,
};
let result = infer_fast(&seq, cfg);
assert_eq!(
result.num_states, 2,
"Golden Mean must infer exactly 2 states, got {}",
result.num_states
);
}
#[test]
fn kat_golden_mean_complexity() {
let seq = gen_golden_mean(10_000, 99);
let cfg = Config {
max_depth: 2,
alpha: 0.001,
bootstrap_b: 200,
alphabet_size: 2,
};
let result = infer(&seq, cfg);
let c = &result.cognitive_split.statistical_complexity;
assert_relative_eq!(c.point, 0.9183, epsilon = 0.05);
assert!(c.lower <= c.point + 1e-9 && c.point <= c.upper + 1e-9);
}
#[test]
fn kat_golden_mean_entropy_rate() {
let seq = gen_golden_mean(10_000, 99);
let cfg = Config {
max_depth: 2,
alpha: 0.001,
bootstrap_b: 200,
alphabet_size: 2,
};
let result = infer(&seq, cfg);
let h = &result.cognitive_split.entropy_rate;
assert_relative_eq!(h.point, 0.6792, epsilon = 0.05);
assert!(h.lower <= h.point + 1e-9 && h.point <= h.upper + 1e-9);
}
#[test]
fn large_sequence_does_not_oom() {
let seq: Vec<u8> = (0..50_000u64)
.map(|i| {
(i.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407)
% 4) as u8
})
.collect();
let cfg = Config {
max_depth: 4,
alpha: 0.001,
bootstrap_b: 10,
alphabet_size: 4,
};
let result = infer_fast(&seq, cfg);
assert!(result.num_states >= 1);
assert!(result.cognitive_split.statistical_complexity.point >= 0.0);
}
}
#[cfg(test)]
mod prop_tests {
use super::test_utils::{gen_even_process, gen_golden_mean};
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
fn complexity_always_non_negative(seed in 0u64..u64::MAX, n in 200usize..1000usize) {
let seq = gen_even_process(n, seed);
let cfg = Config { max_depth: 2, bootstrap_b: 5, ..Config::default() };
let r = infer_fast(&seq, cfg);
prop_assert!(r.cognitive_split.statistical_complexity.point >= 0.0);
}
#[test]
fn entropy_rate_always_non_negative(seed in 0u64..u64::MAX, n in 200usize..1000usize) {
let seq = gen_even_process(n, seed);
let cfg = Config { max_depth: 2, bootstrap_b: 5, ..Config::default() };
let r = infer_fast(&seq, cfg);
prop_assert!(r.cognitive_split.entropy_rate.point >= 0.0);
}
#[test]
fn at_least_one_state(seed in 0u64..u64::MAX, n in 50usize..500usize) {
let seq = gen_golden_mean(n, seed);
let cfg = Config { max_depth: 2, bootstrap_b: 5, ..Config::default() };
let r = infer_fast(&seq, cfg);
prop_assert!(r.num_states >= 1);
}
#[test]
fn ci_bounds_ordered(seed in 0u64..u64::MAX, n in 500usize..2000usize) {
let seq = gen_even_process(n, seed);
let cfg = Config { max_depth: 2, bootstrap_b: 20, ..Config::default() };
let r = infer(&seq, cfg);
let c = &r.cognitive_split.statistical_complexity;
let h = &r.cognitive_split.entropy_rate;
prop_assert!(c.lower <= c.point, "C lower > point");
prop_assert!(c.point <= c.upper, "C point > upper");
prop_assert!(h.lower <= h.point, "H lower > point");
prop_assert!(h.point <= h.upper, "H point > upper");
}
}
}