use crate::error::{StatsError, StatsResult};
struct PrefixSums {
sum: Vec<f64>, sum2: Vec<f64>, }
impl PrefixSums {
fn new(x: &[f64]) -> Self {
let n = x.len();
let mut sum = vec![0.0; n + 1];
let mut sum2 = vec![0.0; n + 1];
for (i, &v) in x.iter().enumerate() {
sum[i + 1] = sum[i] + v;
sum2[i + 1] = sum2[i] + v * v;
}
Self { sum, sum2 }
}
fn cost_l2(&self, s: usize, e: usize) -> f64 {
if e <= s {
return 0.0;
}
let n = (e - s) as f64;
let sx = self.sum[e] - self.sum[s];
let sx2 = self.sum2[e] - self.sum2[s];
sx2 - sx * sx / n
}
}
pub fn bic_penalty(n: usize, sigma: f64) -> f64 {
if n == 0 {
return 0.0;
}
sigma * sigma * (n as f64).ln()
}
pub fn pelt_detect(x: &[f64], penalty: f64) -> StatsResult<Vec<usize>> {
let n = x.len();
if n < 2 {
return Err(StatsError::InsufficientData(
"PELT requires at least 2 data points".to_string(),
));
}
let ps = PrefixSums::new(x);
let mut f = vec![f64::INFINITY; n + 1];
let mut cp = vec![0usize; n + 1];
f[0] = -penalty;
let mut cands: Vec<usize> = vec![0];
for t in 1..=n {
let mut best_f = f64::INFINITY;
let mut best_cp = 0;
let mut surviving: Vec<usize> = Vec::with_capacity(cands.len());
for &tau in &cands {
let cost = f[tau] + ps.cost_l2(tau, t) + penalty;
if cost < best_f {
best_f = cost;
best_cp = tau;
}
if f[tau] + ps.cost_l2(tau, t) <= best_f {
surviving.push(tau);
}
}
f[t] = best_f;
cp[t] = best_cp;
cands = surviving;
cands.push(t);
}
let mut change_points = Vec::new();
let mut t = n;
loop {
let prev = cp[t];
if prev == 0 {
break;
}
change_points.push(prev);
t = prev;
}
change_points.sort_unstable();
Ok(change_points)
}
pub fn cusum_detect(x: &[f64], threshold: f64) -> StatsResult<Vec<usize>> {
let n = x.len();
if n < 2 {
return Err(StatsError::InsufficientData(
"CUSUM requires at least 2 data points".to_string(),
));
}
if threshold < 0.0 {
return Err(StatsError::InvalidArgument(
"threshold must be non-negative".to_string(),
));
}
let mean = x.iter().sum::<f64>() / n as f64;
let mut change_points = Vec::new();
let mut cusum_pos = 0.0f64;
let mut cusum_neg = 0.0f64;
let mut ref_level = mean;
let mut seg_start = 0usize;
let _ = ref_level;
for (i, &xi) in x.iter().enumerate() {
cusum_pos = (cusum_pos + xi - mean).max(0.0);
cusum_neg = (cusum_neg - xi + mean).max(0.0);
if cusum_pos > threshold || cusum_neg > threshold {
if i > seg_start {
change_points.push(i);
}
cusum_pos = 0.0;
cusum_neg = 0.0;
seg_start = i + 1;
if seg_start < n {
let remaining = &x[seg_start..];
ref_level = remaining.iter().sum::<f64>() / remaining.len() as f64;
}
}
}
change_points.sort_unstable();
change_points.dedup();
Ok(change_points)
}
pub fn binary_segmentation(x: &[f64], n_bkps: usize) -> StatsResult<Vec<usize>> {
let n = x.len();
if n < 2 {
return Err(StatsError::InsufficientData(
"Binary segmentation requires at least 2 data points".to_string(),
));
}
if n_bkps == 0 {
return Ok(Vec::new());
}
let ps = PrefixSums::new(x);
let mut segments: Vec<(usize, usize)> = vec![(0, n)];
let mut change_points = Vec::new();
for _ in 0..n_bkps {
let mut best_gain = f64::NEG_INFINITY;
let mut best_split = 0usize;
let mut best_seg_idx = 0usize;
for (si, &(s, e)) in segments.iter().enumerate() {
if e - s < 2 {
continue;
}
let cost_full = ps.cost_l2(s, e);
for t in s + 1..e {
let gain = cost_full - ps.cost_l2(s, t) - ps.cost_l2(t, e);
if gain > best_gain {
best_gain = gain;
best_split = t;
best_seg_idx = si;
}
}
}
if best_gain <= 0.0 {
break; }
let (s, e) = segments[best_seg_idx];
segments.remove(best_seg_idx);
segments.push((s, best_split));
segments.push((best_split, e));
change_points.push(best_split);
}
change_points.sort_unstable();
Ok(change_points)
}
#[cfg(test)]
mod tests {
use super::*;
fn step_series(n: usize, cp: usize, level_a: f64, level_b: f64) -> Vec<f64> {
(0..n)
.map(|i| if i < cp { level_a } else { level_b })
.collect()
}
#[test]
fn test_bic_penalty_positive() {
let p = bic_penalty(100, 1.0);
assert!(p > 0.0);
}
#[test]
fn test_pelt_no_change_flat() {
let x = vec![1.0f64; 50];
let cp = pelt_detect(&x, bic_penalty(50, 1.0)).unwrap();
assert!(cp.is_empty(), "flat series should have no change points");
}
#[test]
fn test_pelt_single_step() {
let x = step_series(100, 50, 0.0, 5.0);
let cp = pelt_detect(&x, 2.0 * (100.0f64).ln()).unwrap();
assert!(!cp.is_empty(), "step series must have at least one change point");
let nearest = cp.iter().min_by_key(|&&c| (c as isize - 50).unsigned_abs()).copied().unwrap_or(0);
assert!(
(nearest as isize - 50).abs() <= 5,
"detected CP {nearest} is far from true CP 50"
);
}
#[test]
fn test_pelt_two_steps() {
let mut x: Vec<f64> = vec![0.0; 30];
for i in 10..20 {
x[i] = 4.0;
}
for i in 20..30 {
x[i] = 0.0;
}
let cp = pelt_detect(&x, 1.0).unwrap();
assert!(cp.len() >= 1, "should detect at least one CP");
}
#[test]
fn test_pelt_insufficient_data() {
assert!(pelt_detect(&[1.0], 1.0).is_err());
}
#[test]
fn test_cusum_no_change() {
let x = vec![0.0f64; 50];
let cp = cusum_detect(&x, 1.0).unwrap();
assert!(cp.is_empty());
}
#[test]
fn test_cusum_detects_step() {
let x = step_series(100, 50, 0.0, 10.0);
let std_est = 5.0; let cp = cusum_detect(&x, std_est).unwrap();
assert!(!cp.is_empty(), "should detect the step");
}
#[test]
fn test_cusum_negative_threshold_error() {
assert!(cusum_detect(&[1.0, 2.0], -1.0).is_err());
}
#[test]
fn test_binary_segmentation_zero_bkps() {
let x: Vec<f64> = (0..20).map(|i| i as f64).collect();
let cp = binary_segmentation(&x, 0).unwrap();
assert!(cp.is_empty());
}
#[test]
fn test_binary_segmentation_single_step() {
let x = step_series(60, 30, 0.0, 3.0);
let cp = binary_segmentation(&x, 1).unwrap();
assert_eq!(cp.len(), 1);
assert!(
(cp[0] as isize - 30).abs() <= 3,
"detected CP {} is far from true CP 30",
cp[0]
);
}
#[test]
fn test_binary_segmentation_sorted_output() {
let mut x: Vec<f64> = vec![0.0; 90];
for i in 30..60 { x[i] = 5.0; }
let cp = binary_segmentation(&x, 3).unwrap();
for w in cp.windows(2) {
assert!(w[0] < w[1], "output not sorted");
}
}
#[test]
fn test_binary_segmentation_insufficient_data() {
assert!(binary_segmentation(&[1.0], 1).is_err());
}
}