numrs2/new_modules/timeseries/
changepoint.rs

1//! Change Point Detection for Time Series
2//!
3//! Implements various algorithms for detecting structural breaks and change points
4//! in time series data, including changes in mean, variance, and distribution.
5//!
6//! ## Algorithms Implemented
7//!
8//! - **CUSUM** (Cumulative Sum): Forward/backward CUSUM with control limits
9//! - **PELT** (Pruned Exact Linear Time): Optimal change point detection with BIC/AIC/MBIC penalties
10//! - **Binary Segmentation**: Fast approximate change point detection via recursive splitting
11//! - **Bayesian Change Point Detection**: Online Bayesian (Adams & MacKay) with run length probabilities
12//!
13//! ## Utility Functions
14//!
15//! - Cost functions: L2, L1, normal log-likelihood
16//! - Segment statistics: mean, variance per segment
17//! - Multiple change point summary
18//!
19//! ## References
20//!
21//! - Killick, R., Fearnhead, P., & Eckley, I. A. (2012). "Optimal detection of changepoints
22//!   with a linear computational cost." *JASA*, 107(500), 1590-1598.
23//! - Adams, R. P., & MacKay, D. J. C. (2007). "Bayesian online changepoint detection."
24//!   *arXiv preprint arXiv:0710.3742*.
25//! - Page, E. S. (1954). "Continuous inspection schemes." *Biometrika*, 41(1/2), 100-115.
26
27use crate::error::{NumRs2Error, Result};
28use scirs2_core::ndarray::{s, Array1, Array2, ArrayView1};
29
30// =============================================================================
31// Cost Functions
32// =============================================================================
33
34/// Cost function type for change point detection.
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub enum CostFunction {
37    /// L2 (least squares) cost: sum of squared deviations from segment mean
38    L2,
39    /// L1 (least absolute deviations) cost: sum of absolute deviations from segment median
40    L1,
41    /// Normal log-likelihood cost: negative log-likelihood under Gaussian assumption
42    NormalLogLikelihood,
43}
44
45/// Compute cost for a segment under a given cost function.
46///
47/// # Arguments
48/// * `segment` - The data segment
49/// * `cost_fn` - Which cost function to use
50///
51/// # Returns
52/// The cost value for the segment
53pub fn segment_cost(segment: &ArrayView1<f64>, cost_fn: CostFunction) -> Result<f64> {
54    let n = segment.len();
55    if n == 0 {
56        return Ok(0.0);
57    }
58
59    match cost_fn {
60        CostFunction::L2 => {
61            let mean = segment.iter().sum::<f64>() / n as f64;
62            let cost = segment.iter().map(|&x| (x - mean).powi(2)).sum::<f64>();
63            Ok(cost)
64        }
65        CostFunction::L1 => {
66            let mut sorted: Vec<f64> = segment.iter().copied().collect();
67            sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
68            let median = if n.is_multiple_of(2) {
69                (sorted[n / 2 - 1] + sorted[n / 2]) / 2.0
70            } else {
71                sorted[n / 2]
72            };
73            let cost = segment.iter().map(|&x| (x - median).abs()).sum::<f64>();
74            Ok(cost)
75        }
76        CostFunction::NormalLogLikelihood => {
77            let nf = n as f64;
78            let mean = segment.iter().sum::<f64>() / nf;
79            let variance = segment.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / nf;
80            if variance <= 1e-15 {
81                // Near-zero variance: assign a large but finite cost
82                return Ok(nf * 50.0);
83            }
84            // Negative log-likelihood: (n/2)*ln(2*pi*sigma^2) + n/2
85            Ok(0.5 * nf * (2.0 * std::f64::consts::PI * variance).ln() + 0.5 * nf)
86        }
87    }
88}
89
90// =============================================================================
91// Penalty Functions
92// =============================================================================
93
94/// Penalty type for model selection in change point detection.
95#[derive(Debug, Clone, Copy, PartialEq, Eq)]
96pub enum PenaltyType {
97    /// Bayesian Information Criterion: penalty = log(n)
98    BIC,
99    /// Akaike Information Criterion: penalty = 2
100    AIC,
101    /// Modified BIC: penalty = 3 * log(n)
102    MBIC,
103    /// Manual penalty value (stored externally)
104    Manual,
105}
106
107/// Compute penalty value for a given type.
108///
109/// # Arguments
110/// * `penalty_type` - The penalty type
111/// * `n` - Number of data points
112/// * `manual_value` - Manual penalty value (used only for `PenaltyType::Manual`)
113pub fn compute_penalty(penalty_type: PenaltyType, n: usize, manual_value: f64) -> f64 {
114    let nf = n as f64;
115    match penalty_type {
116        PenaltyType::BIC => nf.ln(),
117        PenaltyType::AIC => 2.0,
118        PenaltyType::MBIC => 3.0 * nf.ln(),
119        PenaltyType::Manual => manual_value,
120    }
121}
122
123// =============================================================================
124// Segment Statistics
125// =============================================================================
126
127/// Statistics for a single segment of a time series.
128#[derive(Debug, Clone)]
129pub struct SegmentStats {
130    /// Start index of the segment (inclusive)
131    pub start: usize,
132    /// End index of the segment (exclusive)
133    pub end: usize,
134    /// Segment mean
135    pub mean: f64,
136    /// Segment variance
137    pub variance: f64,
138    /// Number of observations in the segment
139    pub count: usize,
140}
141
142/// Compute segment statistics given data and change point locations.
143///
144/// # Arguments
145/// * `data` - The full time series
146/// * `change_points` - Sorted change point indices
147///
148/// # Returns
149/// A vector of `SegmentStats`, one per segment
150pub fn compute_segment_statistics(
151    data: &ArrayView1<f64>,
152    change_points: &[usize],
153) -> Result<Vec<SegmentStats>> {
154    let n = data.len();
155    if n == 0 {
156        return Ok(Vec::new());
157    }
158
159    let mut boundaries = vec![0];
160    for &cp in change_points {
161        if cp > 0 && cp < n {
162            boundaries.push(cp);
163        }
164    }
165    boundaries.push(n);
166    boundaries.sort_unstable();
167    boundaries.dedup();
168
169    let mut stats = Vec::with_capacity(boundaries.len() - 1);
170    for w in boundaries.windows(2) {
171        let start = w[0];
172        let end = w[1];
173        let count = end - start;
174        if count == 0 {
175            continue;
176        }
177        let segment = data.slice(s![start..end]);
178        let mean = segment.iter().sum::<f64>() / count as f64;
179        let variance = if count > 1 {
180            segment.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / count as f64
181        } else {
182            0.0
183        };
184        stats.push(SegmentStats {
185            start,
186            end,
187            mean,
188            variance,
189            count,
190        });
191    }
192
193    Ok(stats)
194}
195
196// =============================================================================
197// Change Point Result
198// =============================================================================
199
200/// Result of change point detection.
201#[derive(Debug, Clone)]
202pub struct ChangePointResult {
203    /// Detected change point locations (indices)
204    pub locations: Vec<usize>,
205    /// Cost or statistic at each change point
206    pub costs: Vec<f64>,
207    /// Total optimal cost
208    pub total_cost: f64,
209}
210
211/// Summary of a multiple change point analysis.
212#[derive(Debug, Clone)]
213pub struct ChangePointSummary {
214    /// Number of change points detected
215    pub n_changepoints: usize,
216    /// Change point locations
217    pub locations: Vec<usize>,
218    /// Per-segment statistics
219    pub segments: Vec<SegmentStats>,
220    /// Total cost
221    pub total_cost: f64,
222}
223
224/// Produce a summary of a change point detection result.
225///
226/// # Arguments
227/// * `data` - The original time series
228/// * `result` - The detection result
229pub fn summarize_changepoints(
230    data: &ArrayView1<f64>,
231    result: &ChangePointResult,
232) -> Result<ChangePointSummary> {
233    let segments = compute_segment_statistics(data, &result.locations)?;
234    Ok(ChangePointSummary {
235        n_changepoints: result.locations.len(),
236        locations: result.locations.clone(),
237        segments,
238        total_cost: result.total_cost,
239    })
240}
241
242// =============================================================================
243// CUSUM (Cumulative Sum) Detection
244// =============================================================================
245
246/// CUSUM (Cumulative Sum) control chart for change detection.
247///
248/// Implements both forward and backward CUSUM for detecting mean shifts.
249/// The two-sided CUSUM tracks both upward and downward shifts simultaneously:
250///
251/// - S_high(t) = max(0, S_high(t-1) + (x_t - mu) - k)
252/// - S_low(t) = max(0, S_low(t-1) - (x_t - mu) - k)
253///
254/// A change point is detected when either S_high or S_low exceeds the threshold h.
255///
256/// # References
257///
258/// Page, E. S. (1954). "Continuous inspection schemes." *Biometrika*, 41(1/2), 100-115.
259#[derive(Debug, Clone)]
260pub struct Cusum {
261    /// Target mean (reference value)
262    pub target: f64,
263    /// Threshold for detection (h parameter)
264    pub threshold: f64,
265    /// Drift parameter (k parameter, typically 0.5 * shift_size)
266    pub drift: f64,
267}
268
269impl Cusum {
270    /// Create a new CUSUM detector.
271    ///
272    /// # Arguments
273    /// * `target` - Target mean (reference value)
274    /// * `threshold` - Detection threshold (h), typically 4-5 standard deviations
275    /// * `drift` - Drift parameter (k), typically 0.5 * expected shift size
276    pub fn new(target: f64, threshold: f64, drift: f64) -> Self {
277        Self {
278            target,
279            threshold,
280            drift,
281        }
282    }
283
284    /// Detect change points using two-sided CUSUM.
285    ///
286    /// Returns all time indices where the CUSUM statistic exceeds the threshold.
287    pub fn detect(&self, data: &ArrayView1<f64>) -> Result<ChangePointResult> {
288        if data.is_empty() {
289            return Ok(ChangePointResult {
290                locations: Vec::new(),
291                costs: Vec::new(),
292                total_cost: 0.0,
293            });
294        }
295
296        let mut locations = Vec::new();
297        let mut costs = Vec::new();
298        let mut s_high = 0.0_f64;
299        let mut s_low = 0.0_f64;
300
301        for (i, &x) in data.iter().enumerate() {
302            s_high = (s_high + (x - self.target) - self.drift).max(0.0);
303            s_low = (s_low - (x - self.target) - self.drift).max(0.0);
304
305            if s_high > self.threshold {
306                locations.push(i);
307                costs.push(s_high);
308                s_high = 0.0;
309            } else if s_low > self.threshold {
310                locations.push(i);
311                costs.push(s_low);
312                s_low = 0.0;
313            }
314        }
315
316        let total_cost = costs.iter().sum();
317        Ok(ChangePointResult {
318            locations,
319            costs,
320            total_cost,
321        })
322    }
323
324    /// Compute forward CUSUM statistics for the entire series.
325    ///
326    /// Returns the cumulative sum statistics (high and low) at each time step.
327    pub fn forward_cusum(&self, data: &ArrayView1<f64>) -> Result<(Array1<f64>, Array1<f64>)> {
328        let n = data.len();
329        if n == 0 {
330            return Ok((Array1::zeros(0), Array1::zeros(0)));
331        }
332
333        let mut s_high = Array1::zeros(n);
334        let mut s_low = Array1::zeros(n);
335
336        s_high[0] = ((data[0] - self.target) - self.drift).max(0.0);
337        s_low[0] = (-(data[0] - self.target) - self.drift).max(0.0);
338
339        for i in 1..n {
340            s_high[i] = (s_high[i - 1] + (data[i] - self.target) - self.drift).max(0.0);
341            s_low[i] = (s_low[i - 1] - (data[i] - self.target) - self.drift).max(0.0);
342        }
343
344        Ok((s_high, s_low))
345    }
346
347    /// Compute backward CUSUM statistics (running from end to start).
348    ///
349    /// This is useful for confirming change points detected by forward CUSUM.
350    pub fn backward_cusum(&self, data: &ArrayView1<f64>) -> Result<(Array1<f64>, Array1<f64>)> {
351        let n = data.len();
352        if n == 0 {
353            return Ok((Array1::zeros(0), Array1::zeros(0)));
354        }
355
356        let mut s_high = Array1::zeros(n);
357        let mut s_low = Array1::zeros(n);
358
359        let last = n - 1;
360        s_high[last] = ((data[last] - self.target) - self.drift).max(0.0);
361        s_low[last] = (-(data[last] - self.target) - self.drift).max(0.0);
362
363        for i in (0..last).rev() {
364            s_high[i] = (s_high[i + 1] + (data[i] - self.target) - self.drift).max(0.0);
365            s_low[i] = (s_low[i + 1] - (data[i] - self.target) - self.drift).max(0.0);
366        }
367
368        Ok((s_high, s_low))
369    }
370
371    /// Compute control limits for the CUSUM chart.
372    ///
373    /// Returns (upper_limit, lower_limit) arrays. A value crossing the control
374    /// limit indicates an out-of-control condition.
375    pub fn control_limits(&self, n: usize) -> (Array1<f64>, Array1<f64>) {
376        let upper = Array1::from_elem(n, self.threshold);
377        let lower = Array1::from_elem(n, self.threshold);
378        (upper, lower)
379    }
380}
381
382// =============================================================================
383// PELT (Pruned Exact Linear Time)
384// =============================================================================
385
386/// Change type for PELT detection.
387#[derive(Debug, Clone, Copy, PartialEq, Eq)]
388pub enum ChangeType {
389    /// Detect changes in mean only
390    Mean,
391    /// Detect changes in variance only
392    Variance,
393    /// Detect changes in both mean and variance
394    MeanAndVariance,
395}
396
397/// PELT (Pruned Exact Linear Time) algorithm for optimal change point detection.
398///
399/// Efficiently finds the optimal set of change points by dynamic programming
400/// with a pruning step that achieves O(n) average time complexity.
401///
402/// Cost function: sum[C(y_{tau_i+1:tau_{i+1}})] + beta * K
403/// where C is the segment cost, beta is penalty, and K is number of change points.
404///
405/// # References
406///
407/// Killick, R., Fearnhead, P., & Eckley, I. A. (2012). "Optimal detection of changepoints
408/// with a linear computational cost." *JASA*, 107(500), 1590-1598.
409#[derive(Debug, Clone)]
410pub struct Pelt {
411    /// Penalty for each change point
412    pub penalty: f64,
413    /// Minimum segment length between change points
414    pub min_size: usize,
415    /// Cost function to use
416    pub cost_fn: CostFunction,
417    /// Penalty type (for automatic penalty computation)
418    pub penalty_type: PenaltyType,
419}
420
421impl Pelt {
422    /// Create a new PELT detector with manual penalty.
423    ///
424    /// # Arguments
425    /// * `penalty` - Penalty for each change point (typical: log(n) to 2*log(n))
426    /// * `min_size` - Minimum segment length (default: 2)
427    pub fn new(penalty: f64, min_size: usize) -> Self {
428        Self {
429            penalty,
430            min_size: min_size.max(1),
431            cost_fn: CostFunction::NormalLogLikelihood,
432            penalty_type: PenaltyType::Manual,
433        }
434    }
435
436    /// Create PELT with a specific penalty type and cost function.
437    ///
438    /// The penalty value will be computed automatically from the data length.
439    pub fn with_options(penalty_type: PenaltyType, cost_fn: CostFunction, min_size: usize) -> Self {
440        Self {
441            penalty: 0.0, // will be computed from data
442            min_size: min_size.max(1),
443            cost_fn,
444            penalty_type,
445        }
446    }
447
448    /// Detect change points based on the configured change type.
449    pub fn detect(
450        &self,
451        data: &ArrayView1<f64>,
452        change_type: ChangeType,
453    ) -> Result<ChangePointResult> {
454        match change_type {
455            ChangeType::Mean => self.detect_mean_change(data),
456            ChangeType::Variance => self.detect_variance_change(data),
457            ChangeType::MeanAndVariance => self.detect_mean_variance_change(data),
458        }
459    }
460
461    /// Detect change points in mean.
462    pub fn detect_mean_change(&self, data: &ArrayView1<f64>) -> Result<ChangePointResult> {
463        self.pelt_core(data, CostFunction::L2)
464    }
465
466    /// Detect change points in variance.
467    pub fn detect_variance_change(&self, data: &ArrayView1<f64>) -> Result<ChangePointResult> {
468        self.pelt_core(data, CostFunction::NormalLogLikelihood)
469    }
470
471    /// Detect change points in both mean and variance.
472    pub fn detect_mean_variance_change(&self, data: &ArrayView1<f64>) -> Result<ChangePointResult> {
473        self.pelt_core(data, CostFunction::NormalLogLikelihood)
474    }
475
476    /// Core PELT dynamic programming algorithm.
477    fn pelt_core(
478        &self,
479        data: &ArrayView1<f64>,
480        cost_fn: CostFunction,
481    ) -> Result<ChangePointResult> {
482        let n = data.len();
483        if n < 2 * self.min_size {
484            return Err(NumRs2Error::ValueError(
485                "Insufficient data for change point detection".to_string(),
486            ));
487        }
488
489        // Resolve penalty
490        let penalty = if self.penalty_type == PenaltyType::Manual {
491            self.penalty
492        } else {
493            compute_penalty(self.penalty_type, n, self.penalty)
494        };
495
496        // Dynamic programming arrays
497        // f[t] = optimal cost for data[0..t]
498        let mut f = vec![f64::INFINITY; n + 1];
499        let mut last_cp = vec![0_usize; n + 1]; // last change point for backtracking
500        f[0] = -penalty;
501
502        // Candidate set for pruning
503        let mut candidates: Vec<usize> = vec![0];
504
505        for t in self.min_size..=n {
506            let mut best_cost = f64::INFINITY;
507            let mut best_s = 0_usize;
508
509            for &s_val in &candidates {
510                if t - s_val >= self.min_size {
511                    let seg = data.slice(s![s_val..t]);
512                    let c = segment_cost(&seg, cost_fn)?;
513                    let total = f[s_val] + c + penalty;
514                    if total < best_cost {
515                        best_cost = total;
516                        best_s = s_val;
517                    }
518                }
519            }
520
521            if best_cost.is_infinite() {
522                // No valid candidate yet; keep going
523                candidates.push(t);
524                continue;
525            }
526
527            f[t] = best_cost;
528            last_cp[t] = best_s;
529
530            // Pruning step: retain only candidates that can still be optimal
531            candidates.retain(|&s_val| {
532                if t - s_val < self.min_size {
533                    return true;
534                }
535                if let Ok(c) = segment_cost(&data.slice(s![s_val..t]), cost_fn) {
536                    f[s_val] + c + penalty <= best_cost
537                } else {
538                    false
539                }
540            });
541
542            candidates.push(t);
543        }
544
545        // Backtrack to find change points
546        let mut locations = Vec::new();
547        let mut idx = n;
548        while idx > 0 {
549            let prev = last_cp[idx];
550            if prev > 0 {
551                locations.push(prev);
552            }
553            if prev >= idx {
554                break;
555            }
556            idx = prev;
557        }
558        locations.sort_unstable();
559        locations.dedup();
560
561        // Compute per-segment costs
562        let costs = self.compute_costs_for_segments(data, &locations, cost_fn)?;
563        let total_cost = if f[n].is_finite() { f[n] } else { 0.0 };
564
565        Ok(ChangePointResult {
566            locations,
567            costs,
568            total_cost,
569        })
570    }
571
572    /// Compute costs for each segment defined by change points.
573    fn compute_costs_for_segments(
574        &self,
575        data: &ArrayView1<f64>,
576        change_points: &[usize],
577        cost_fn: CostFunction,
578    ) -> Result<Vec<f64>> {
579        let n = data.len();
580        let mut costs = Vec::new();
581        let mut start = 0;
582
583        for &cp in change_points {
584            if cp > start && cp <= n {
585                let seg = data.slice(s![start..cp]);
586                costs.push(segment_cost(&seg, cost_fn)?);
587                start = cp;
588            }
589        }
590
591        if start < n {
592            let seg = data.slice(s![start..n]);
593            costs.push(segment_cost(&seg, cost_fn)?);
594        }
595
596        Ok(costs)
597    }
598}
599
600// =============================================================================
601// Binary Segmentation
602// =============================================================================
603
604/// Binary Segmentation for change point detection.
605///
606/// Greedy algorithm that recursively splits segments at points of maximum change
607/// statistic. Time complexity: O(n log n) average, O(n^2) worst case.
608///
609/// # Algorithm
610///
611/// 1. Compute test statistic for all candidate split points across the full series.
612/// 2. If the maximum statistic exceeds the threshold, accept the split point.
613/// 3. Recursively apply to each resulting sub-segment.
614/// 4. Stop when no segment can be split or max_changepoints is reached.
615#[derive(Debug, Clone)]
616pub struct BinarySegmentation {
617    /// Maximum number of change points to detect
618    pub max_changepoints: usize,
619    /// Threshold for accepting a change point
620    pub threshold: f64,
621    /// Minimum segment size
622    pub min_size: usize,
623    /// Cost function
624    pub cost_fn: CostFunction,
625}
626
627impl BinarySegmentation {
628    /// Create a new binary segmentation detector.
629    pub fn new(max_changepoints: usize, threshold: f64, min_size: usize) -> Self {
630        Self {
631            max_changepoints,
632            threshold,
633            min_size: min_size.max(1),
634            cost_fn: CostFunction::L2,
635        }
636    }
637
638    /// Create with a specific cost function.
639    pub fn with_cost(
640        max_changepoints: usize,
641        threshold: f64,
642        min_size: usize,
643        cost_fn: CostFunction,
644    ) -> Self {
645        Self {
646            max_changepoints,
647            threshold,
648            min_size: min_size.max(1),
649            cost_fn,
650        }
651    }
652
653    /// Detect change points in the data.
654    pub fn detect(&self, data: &ArrayView1<f64>) -> Result<ChangePointResult> {
655        let n = data.len();
656        if n < 2 * self.min_size {
657            return Ok(ChangePointResult {
658                locations: Vec::new(),
659                costs: Vec::new(),
660                total_cost: 0.0,
661            });
662        }
663
664        let mut change_points = Vec::new();
665        let mut segments: Vec<(usize, usize)> = vec![(0, n)];
666        let mut total_cost = 0.0;
667
668        for _ in 0..self.max_changepoints {
669            let mut best_seg_idx = None;
670            let mut best_cp = 0;
671            let mut best_gain = 0.0_f64;
672
673            for (idx, &(start, end)) in segments.iter().enumerate() {
674                if end - start < 2 * self.min_size {
675                    continue;
676                }
677
678                if let Ok((cp, gain)) = self.find_best_split(data, start, end) {
679                    if gain > best_gain {
680                        best_gain = gain;
681                        best_cp = cp;
682                        best_seg_idx = Some(idx);
683                    }
684                }
685            }
686
687            if best_gain < self.threshold {
688                break;
689            }
690
691            if let Some(idx) = best_seg_idx {
692                let (start, end) = segments[idx];
693                segments.remove(idx);
694                segments.push((start, best_cp));
695                segments.push((best_cp, end));
696                change_points.push(best_cp);
697                total_cost += best_gain;
698            } else {
699                break;
700            }
701        }
702
703        change_points.sort_unstable();
704
705        let costs = if change_points.is_empty() {
706            Vec::new()
707        } else {
708            vec![total_cost / change_points.len() as f64; change_points.len()]
709        };
710
711        Ok(ChangePointResult {
712            locations: change_points,
713            costs,
714            total_cost,
715        })
716    }
717
718    /// Find the best split point within a segment [start, end).
719    ///
720    /// The gain is computed as: cost(full_segment) - cost(left) - cost(right)
721    fn find_best_split(
722        &self,
723        data: &ArrayView1<f64>,
724        start: usize,
725        end: usize,
726    ) -> Result<(usize, f64)> {
727        if end - start < 2 * self.min_size {
728            return Err(NumRs2Error::ValueError("Segment too small".to_string()));
729        }
730
731        let full_seg = data.slice(s![start..end]);
732        let full_cost = segment_cost(&full_seg, self.cost_fn)?;
733
734        let mut best_cp = start + self.min_size;
735        let mut best_gain = f64::NEG_INFINITY;
736
737        for t in (start + self.min_size)..(end - self.min_size + 1) {
738            let left = data.slice(s![start..t]);
739            let right = data.slice(s![t..end]);
740
741            let left_cost = segment_cost(&left, self.cost_fn)?;
742            let right_cost = segment_cost(&right, self.cost_fn)?;
743
744            let gain = full_cost - left_cost - right_cost;
745            if gain > best_gain {
746                best_gain = gain;
747                best_cp = t;
748            }
749        }
750
751        Ok((best_cp, best_gain))
752    }
753}
754
755// =============================================================================
756// Bayesian Online Change Point Detection (Adams & MacKay 2007)
757// =============================================================================
758
759/// Bayesian Online Change Point Detection.
760///
761/// Implements the Adams & MacKay (2007) algorithm that maintains a distribution
762/// over "run lengths" (time since last change point). At each time step:
763///
764/// 1. Observe new data point x_t
765/// 2. Update run length probabilities using predictive likelihood
766/// 3. Apply hazard function to compute growth and change point probabilities
767///
768/// The hazard function H(tau) gives the probability of a change point given the
769/// current run length tau. A constant hazard 1/lambda yields a geometric prior
770/// on run lengths.
771///
772/// # References
773///
774/// Adams, R. P., & MacKay, D. J. C. (2007). "Bayesian online changepoint detection."
775/// *arXiv preprint arXiv:0710.3742*.
776#[derive(Debug, Clone)]
777pub struct BayesianChangePoint {
778    /// Hazard rate (1/lambda): probability of change at each time step
779    pub hazard_rate: f64,
780    /// Prior mean for the normal model
781    pub prior_mean: f64,
782    /// Prior variance for the normal model
783    pub prior_var: f64,
784    /// Observation noise variance
785    pub obs_var: f64,
786    /// Threshold on change point posterior probability
787    pub threshold: f64,
788}
789
790impl BayesianChangePoint {
791    /// Create a new Bayesian change point detector.
792    ///
793    /// # Arguments
794    /// * `hazard_rate` - Constant hazard rate (1/lambda). Higher values mean change
795    ///   points are more frequent a priori.
796    /// * `threshold` - Posterior probability threshold for declaring a change point.
797    pub fn new(hazard_rate: f64, threshold: f64) -> Self {
798        Self {
799            hazard_rate: hazard_rate.clamp(1e-10, 1.0 - 1e-10),
800            prior_mean: 0.0,
801            prior_var: 1.0,
802            obs_var: 1.0,
803            threshold: threshold.clamp(0.0, 1.0),
804        }
805    }
806
807    /// Create with custom prior parameters.
808    pub fn with_prior(
809        hazard_rate: f64,
810        threshold: f64,
811        prior_mean: f64,
812        prior_var: f64,
813        obs_var: f64,
814    ) -> Self {
815        Self {
816            hazard_rate: hazard_rate.clamp(1e-10, 1.0 - 1e-10),
817            prior_mean,
818            prior_var: prior_var.max(1e-10),
819            obs_var: obs_var.max(1e-10),
820            threshold: threshold.clamp(0.0, 1.0),
821        }
822    }
823
824    /// Detect change points using Bayesian online detection.
825    ///
826    /// Uses the MAP (Maximum A Posteriori) run length to detect changepoints.
827    /// With a constant hazard function, P(r_t = 0 | x_{1:t}) = h always,
828    /// so the raw run-length-zero probability cannot distinguish changepoints.
829    /// Instead, we detect times when the MAP run length drops significantly,
830    /// indicating the run length distribution has shifted to short run lengths
831    /// after a structural break in the data.
832    ///
833    /// The `threshold` parameter controls the minimum posterior probability
834    /// of the new MAP run length required to declare a changepoint, filtering
835    /// out low-confidence detections.
836    pub fn detect(&self, data: &ArrayView1<f64>) -> Result<ChangePointResult> {
837        let n = data.len();
838        if n < 2 {
839            return Ok(ChangePointResult {
840                locations: Vec::new(),
841                costs: Vec::new(),
842                total_cost: 0.0,
843            });
844        }
845
846        let (rlp, _cp_probs) = self.compute_run_length_distribution(data)?;
847
848        // Compute the MAP run length and its posterior probability at each time step
849        let mut map_rls = vec![0_usize; n + 1];
850        let mut map_probs = vec![0.0_f64; n + 1];
851        for t in 0..=n {
852            let mut best_r = 0_usize;
853            let mut best_p = 0.0_f64;
854            let max_r = t.min(n);
855            for r in 0..=max_r {
856                if rlp[[t, r]] > best_p {
857                    best_p = rlp[[t, r]];
858                    best_r = r;
859                }
860            }
861            map_rls[t] = best_r;
862            map_probs[t] = best_p;
863        }
864
865        let mut locations = Vec::new();
866        let mut costs = Vec::new();
867
868        // Detect changepoints: the MAP run length should grow by 1 each step
869        // under continuity. A significant drop indicates a changepoint.
870        for t in 2..=n {
871            let prev_rl = map_rls[t - 1];
872            let curr_rl = map_rls[t];
873
874            // Under no changepoint, we expect curr_rl ≈ prev_rl + 1.
875            // A changepoint is signalled when the MAP run length drops
876            // instead of growing (and the previous run was long enough).
877            let expected_rl = prev_rl + 1;
878            if expected_rl > 3 && curr_rl < expected_rl / 2 && map_probs[t] >= self.threshold {
879                // The changepoint occurred at approximately t - curr_rl
880                let cp_location = t.saturating_sub(curr_rl);
881
882                // Deduplicate: skip if too close to a previous detection
883                let already_found = locations
884                    .iter()
885                    .any(|&loc: &usize| loc.abs_diff(cp_location) < 5);
886
887                if !already_found && cp_location > 0 && cp_location < n {
888                    locations.push(cp_location);
889                    costs.push(map_probs[t]);
890                }
891            }
892        }
893
894        let total_cost = costs.iter().sum();
895        Ok(ChangePointResult {
896            locations,
897            costs,
898            total_cost,
899        })
900    }
901
902    /// Compute the full run length distribution over time.
903    ///
904    /// Returns:
905    /// * `run_length_probs` - Matrix where row t contains the probability of each
906    ///   run length at time t. Entry \[t\]\[r\] = P(r_t = r | x_{1:t}).
907    /// * `cp_probs` - Vector of change point posterior probabilities at each time step.
908    pub fn compute_run_length_distribution(
909        &self,
910        data: &ArrayView1<f64>,
911    ) -> Result<(Array2<f64>, Array1<f64>)> {
912        let n = data.len();
913        if n == 0 {
914            return Ok((Array2::zeros((0, 0)), Array1::zeros(0)));
915        }
916
917        // run_length_probs[t][r] = joint probability P(r_t = r, x_{1:t})
918        // We store up to (n+1) possible run lengths at each time step
919        let mut rlp = Array2::zeros((n + 1, n + 1));
920        rlp[[0, 0]] = 1.0; // Initially run length is 0 with probability 1
921
922        let mut cp_probs = Array1::zeros(n + 1);
923
924        // Sufficient statistics for the conjugate normal model per run length
925        // For each possible run length r: accumulate sum and sum-of-squares
926        let mut mu = vec![self.prior_mean; n + 1]; // posterior mean
927        let mut sigma2 = vec![self.prior_var; n + 1]; // posterior variance
928
929        for t in 0..n {
930            let x = data[t];
931
932            // Compute predictive probabilities for each run length
933            let mut pred_probs = vec![0.0_f64; t + 1];
934            for r in 0..=t {
935                // Predictive distribution: N(mu_r, sigma2_r + obs_var)
936                let pred_var = sigma2[r] + self.obs_var;
937                let diff = x - mu[r];
938                let log_pred = -0.5 * (2.0 * std::f64::consts::PI * pred_var).ln()
939                    - 0.5 * diff * diff / pred_var;
940                pred_probs[r] = log_pred.exp();
941            }
942
943            // Growth probabilities (run length grows by 1)
944            let h = self.hazard_rate;
945            let mut new_rlp = vec![0.0_f64; t + 2];
946
947            // Growth: run length increases
948            for r in 0..=t {
949                new_rlp[r + 1] += rlp[[t, r]] * pred_probs[r] * (1.0 - h);
950            }
951
952            // Change point: run length resets to 0
953            let mut cp_sum = 0.0;
954            for r in 0..=t {
955                cp_sum += rlp[[t, r]] * pred_probs[r] * h;
956            }
957            new_rlp[0] = cp_sum;
958
959            // Normalize
960            let evidence: f64 = new_rlp.iter().sum();
961            if evidence > 1e-300 {
962                for r in 0..=(t + 1) {
963                    rlp[[t + 1, r]] = new_rlp[r] / evidence;
964                }
965            }
966
967            cp_probs[t + 1] = if evidence > 1e-300 {
968                new_rlp[0] / evidence
969            } else {
970                0.0
971            };
972
973            // Update sufficient statistics for each run length
974            // Bayesian update: posterior after seeing x with prior (mu_r, sigma2_r)
975            let mut new_mu = vec![self.prior_mean; t + 2];
976            let mut new_sigma2 = vec![self.prior_var; t + 2];
977
978            for r in 0..=t {
979                let kalman_gain = sigma2[r] / (sigma2[r] + self.obs_var);
980                new_mu[r + 1] = mu[r] + kalman_gain * (x - mu[r]);
981                new_sigma2[r + 1] = sigma2[r] * (1.0 - kalman_gain);
982            }
983            // Run length 0 resets to prior
984            new_mu[0] = self.prior_mean;
985            new_sigma2[0] = self.prior_var;
986
987            mu = new_mu;
988            sigma2 = new_sigma2;
989        }
990
991        // Extract the posterior portion we filled
992        let rlp_out = rlp.slice(s![0..=n, 0..=n]).to_owned();
993        let cp_out = cp_probs.slice(s![0..=n]).to_owned();
994
995        Ok((rlp_out, cp_out))
996    }
997
998    /// Constant hazard function value.
999    pub fn hazard(&self, _run_length: usize) -> f64 {
1000        self.hazard_rate
1001    }
1002}
1003
1004// =============================================================================
1005// Tests
1006// =============================================================================
1007
1008#[cfg(test)]
1009mod tests {
1010    use super::*;
1011    use scirs2_core::ndarray::Array1;
1012
1013    // -------------------------------------------------------------------------
1014    // Helper: generate step signal with a single mean shift
1015    // -------------------------------------------------------------------------
1016    fn step_signal(n1: usize, n2: usize, mean1: f64, mean2: f64) -> Array1<f64> {
1017        let mut data = vec![mean1; n1];
1018        data.extend(vec![mean2; n2]);
1019        Array1::from_vec(data)
1020    }
1021
1022    /// Add small deterministic noise to avoid zero-variance segments.
1023    fn add_noise(data: &mut [f64]) {
1024        for (i, v) in data.iter_mut().enumerate() {
1025            *v += 0.01 * ((i as f64 * 1.23456).sin());
1026        }
1027    }
1028
1029    // -------------------------------------------------------------------------
1030    // Test 1: CUSUM - single mean shift detection
1031    // -------------------------------------------------------------------------
1032    #[test]
1033    fn test_cusum_single_mean_shift() {
1034        let mut data = vec![0.0; 50];
1035        data.extend(vec![3.0; 50]);
1036        add_noise(&mut data);
1037        let arr = Array1::from_vec(data);
1038
1039        let cusum = Cusum::new(0.0, 8.0, 0.5);
1040        let result = cusum
1041            .detect(&arr.view())
1042            .expect("CUSUM detect should succeed");
1043
1044        assert!(
1045            !result.locations.is_empty(),
1046            "CUSUM should detect at least one change point"
1047        );
1048        // First detection should be near index 50
1049        let first = result.locations[0];
1050        assert!(
1051            (40..=70).contains(&first),
1052            "First change point {first} should be near 50"
1053        );
1054    }
1055
1056    // -------------------------------------------------------------------------
1057    // Test 2: CUSUM forward and backward
1058    // -------------------------------------------------------------------------
1059    #[test]
1060    fn test_cusum_forward_backward() {
1061        let mut data = vec![0.0; 30];
1062        data.extend(vec![4.0; 30]);
1063        add_noise(&mut data);
1064        let arr = Array1::from_vec(data);
1065
1066        let cusum = Cusum::new(0.0, 5.0, 0.5);
1067        let (fwd_high, fwd_low) = cusum
1068            .forward_cusum(&arr.view())
1069            .expect("forward CUSUM should succeed");
1070        let (bwd_high, bwd_low) = cusum
1071            .backward_cusum(&arr.view())
1072            .expect("backward CUSUM should succeed");
1073
1074        assert_eq!(fwd_high.len(), 60);
1075        assert_eq!(bwd_high.len(), 60);
1076
1077        // Forward high CUSUM should rise after the shift
1078        assert!(
1079            fwd_high[59] > fwd_high[0],
1080            "Forward CUSUM should rise after shift"
1081        );
1082        // Backward high CUSUM should be large at the beginning (running backwards from shift)
1083        assert!(
1084            bwd_high[0] > bwd_high[59],
1085            "Backward CUSUM should detect shift from the other direction"
1086        );
1087    }
1088
1089    // -------------------------------------------------------------------------
1090    // Test 3: CUSUM control limits
1091    // -------------------------------------------------------------------------
1092    #[test]
1093    fn test_cusum_control_limits() {
1094        let cusum = Cusum::new(0.0, 5.0, 0.5);
1095        let (upper, lower) = cusum.control_limits(100);
1096        assert_eq!(upper.len(), 100);
1097        assert_eq!(lower.len(), 100);
1098        for i in 0..100 {
1099            assert!((upper[i] - 5.0).abs() < 1e-10);
1100            assert!((lower[i] - 5.0).abs() < 1e-10);
1101        }
1102    }
1103
1104    // -------------------------------------------------------------------------
1105    // Test 4: CUSUM - no change in stationary data
1106    // -------------------------------------------------------------------------
1107    #[test]
1108    fn test_cusum_no_change() {
1109        let mut data = vec![5.0; 100];
1110        add_noise(&mut data);
1111        let arr = Array1::from_vec(data);
1112
1113        // Target at 5.0, high threshold so no false alarms
1114        let cusum = Cusum::new(5.0, 50.0, 0.5);
1115        let result = cusum.detect(&arr.view()).expect("CUSUM should succeed");
1116
1117        assert!(
1118            result.locations.is_empty(),
1119            "CUSUM should not detect changes in stationary data with high threshold"
1120        );
1121    }
1122
1123    // -------------------------------------------------------------------------
1124    // Test 5: PELT - single mean change
1125    // -------------------------------------------------------------------------
1126    #[test]
1127    fn test_pelt_single_mean_change() {
1128        let mut raw = vec![0.0; 50];
1129        raw.extend(vec![5.0; 50]);
1130        add_noise(&mut raw);
1131        let data = Array1::from_vec(raw);
1132
1133        let pelt = Pelt::new(15.0, 5);
1134        let result = pelt
1135            .detect_mean_change(&data.view())
1136            .expect("PELT mean change should succeed");
1137
1138        assert!(
1139            !result.locations.is_empty(),
1140            "PELT should detect at least one change point"
1141        );
1142        // Should be near index 50
1143        let closest = result
1144            .locations
1145            .iter()
1146            .min_by_key(|&&cp| ((cp as i64) - 50).unsigned_abs())
1147            .copied();
1148        if let Some(cp) = closest {
1149            assert!(
1150                (cp as i64 - 50).unsigned_abs() < 10,
1151                "Closest change point {cp} should be near 50"
1152            );
1153        }
1154    }
1155
1156    // -------------------------------------------------------------------------
1157    // Test 6: PELT - multiple change points
1158    // -------------------------------------------------------------------------
1159    #[test]
1160    fn test_pelt_multiple_changepoints() {
1161        let mut raw = vec![0.0; 40];
1162        raw.extend(vec![5.0; 40]);
1163        raw.extend(vec![10.0; 40]);
1164        add_noise(&mut raw);
1165        let data = Array1::from_vec(raw);
1166
1167        let pelt = Pelt::new(15.0, 5);
1168        let result = pelt
1169            .detect_mean_change(&data.view())
1170            .expect("PELT should succeed");
1171
1172        assert!(
1173            result.locations.len() >= 2,
1174            "PELT should detect at least 2 change points, got {}",
1175            result.locations.len()
1176        );
1177    }
1178
1179    // -------------------------------------------------------------------------
1180    // Test 7: PELT - variance change
1181    // -------------------------------------------------------------------------
1182    #[test]
1183    fn test_pelt_variance_change() {
1184        // Low variance segment then high variance segment
1185        let mut raw = Vec::with_capacity(100);
1186        for i in 0..50 {
1187            raw.push(5.0 + 0.05 * (i as f64 * 0.73).sin());
1188        }
1189        for i in 50..100 {
1190            raw.push(5.0 + 3.0 * (i as f64 * 0.73).sin());
1191        }
1192        let data = Array1::from_vec(raw);
1193
1194        let pelt = Pelt::new(10.0, 5);
1195        let result = pelt
1196            .detect_variance_change(&data.view())
1197            .expect("PELT variance change should succeed");
1198
1199        assert!(result.total_cost.is_finite(), "Total cost should be finite");
1200    }
1201
1202    // -------------------------------------------------------------------------
1203    // Test 8: PELT with BIC penalty
1204    // -------------------------------------------------------------------------
1205    #[test]
1206    fn test_pelt_bic_penalty() {
1207        let mut raw = vec![0.0; 60];
1208        raw.extend(vec![6.0; 60]);
1209        add_noise(&mut raw);
1210        let data = Array1::from_vec(raw);
1211
1212        let pelt = Pelt::with_options(PenaltyType::BIC, CostFunction::L2, 5);
1213        let result = pelt
1214            .detect(&data.view(), ChangeType::Mean)
1215            .expect("PELT with BIC should succeed");
1216
1217        assert!(
1218            !result.locations.is_empty(),
1219            "PELT with BIC should detect change"
1220        );
1221    }
1222
1223    // -------------------------------------------------------------------------
1224    // Test 9: PELT with AIC penalty
1225    // -------------------------------------------------------------------------
1226    #[test]
1227    fn test_pelt_aic_penalty() {
1228        let mut raw = vec![0.0; 50];
1229        raw.extend(vec![5.0; 50]);
1230        add_noise(&mut raw);
1231        let data = Array1::from_vec(raw);
1232
1233        let pelt = Pelt::with_options(PenaltyType::AIC, CostFunction::L2, 5);
1234        let result = pelt
1235            .detect(&data.view(), ChangeType::Mean)
1236            .expect("PELT with AIC should succeed");
1237
1238        assert!(
1239            !result.locations.is_empty(),
1240            "PELT with AIC should detect change"
1241        );
1242    }
1243
1244    // -------------------------------------------------------------------------
1245    // Test 10: PELT - no change (stationary)
1246    // -------------------------------------------------------------------------
1247    #[test]
1248    fn test_pelt_no_change_stationary() {
1249        let mut raw = vec![5.0; 100];
1250        add_noise(&mut raw);
1251        let data = Array1::from_vec(raw);
1252
1253        let pelt = Pelt::new(100.0, 10); // Very high penalty
1254        let result = pelt
1255            .detect_mean_change(&data.view())
1256            .expect("PELT should succeed on stationary data");
1257
1258        assert!(
1259            result.locations.is_empty(),
1260            "PELT should not detect changes in stationary data with high penalty, got {:?}",
1261            result.locations
1262        );
1263    }
1264
1265    // -------------------------------------------------------------------------
1266    // Test 11: PELT insufficient data
1267    // -------------------------------------------------------------------------
1268    #[test]
1269    fn test_pelt_insufficient_data() {
1270        let data = Array1::from_vec(vec![1.0, 2.0, 3.0]);
1271        let pelt = Pelt::new(5.0, 5);
1272        let result = pelt.detect_mean_change(&data.view());
1273        assert!(result.is_err(), "PELT should fail with insufficient data");
1274    }
1275
1276    // -------------------------------------------------------------------------
1277    // Test 12: Binary Segmentation - basic detection
1278    // -------------------------------------------------------------------------
1279    #[test]
1280    fn test_binseg_basic_detection() {
1281        let mut raw = vec![0.0; 50];
1282        raw.extend(vec![5.0; 50]);
1283        add_noise(&mut raw);
1284        let data = Array1::from_vec(raw);
1285
1286        let bs = BinarySegmentation::new(5, 5.0, 5);
1287        let result = bs.detect(&data.view()).expect("BinSeg should succeed");
1288
1289        assert!(!result.locations.is_empty(), "BinSeg should detect change");
1290    }
1291
1292    // -------------------------------------------------------------------------
1293    // Test 13: Binary Segmentation - multiple change points
1294    // -------------------------------------------------------------------------
1295    #[test]
1296    fn test_binseg_multiple_changepoints() {
1297        let mut raw = vec![0.0; 40];
1298        raw.extend(vec![4.0; 40]);
1299        raw.extend(vec![8.0; 40]);
1300        add_noise(&mut raw);
1301        let data = Array1::from_vec(raw);
1302
1303        let bs = BinarySegmentation::new(10, 5.0, 5);
1304        let result = bs.detect(&data.view()).expect("BinSeg should succeed");
1305
1306        assert!(
1307            result.locations.len() >= 2,
1308            "BinSeg should detect at least 2 change points, got {}",
1309            result.locations.len()
1310        );
1311    }
1312
1313    // -------------------------------------------------------------------------
1314    // Test 14: PELT vs BinSeg comparison
1315    // -------------------------------------------------------------------------
1316    #[test]
1317    fn test_pelt_vs_binseg_comparison() {
1318        let mut raw = vec![0.0; 50];
1319        raw.extend(vec![5.0; 50]);
1320        add_noise(&mut raw);
1321        let data = Array1::from_vec(raw);
1322
1323        let pelt = Pelt::new(10.0, 5);
1324        let pelt_result = pelt
1325            .detect_mean_change(&data.view())
1326            .expect("PELT should succeed");
1327
1328        let bs = BinarySegmentation::new(5, 5.0, 5);
1329        let bs_result = bs.detect(&data.view()).expect("BinSeg should succeed");
1330
1331        // Both should detect at least one change point
1332        assert!(
1333            !pelt_result.locations.is_empty(),
1334            "PELT should detect change"
1335        );
1336        assert!(
1337            !bs_result.locations.is_empty(),
1338            "BinSeg should detect change"
1339        );
1340
1341        // Both should find a change point near index 50
1342        let pelt_near_50 = pelt_result
1343            .locations
1344            .iter()
1345            .any(|&cp| (cp as i64 - 50).unsigned_abs() < 15);
1346        let bs_near_50 = bs_result
1347            .locations
1348            .iter()
1349            .any(|&cp| (cp as i64 - 50).unsigned_abs() < 15);
1350
1351        assert!(pelt_near_50, "PELT should find change near 50");
1352        assert!(bs_near_50, "BinSeg should find change near 50");
1353    }
1354
1355    // -------------------------------------------------------------------------
1356    // Test 15: Bayesian online change point detection
1357    // -------------------------------------------------------------------------
1358    #[test]
1359    fn test_bayesian_online_detection() {
1360        let mut raw = vec![0.0; 50];
1361        raw.extend(vec![5.0; 50]);
1362        add_noise(&mut raw);
1363        let data = Array1::from_vec(raw);
1364
1365        let bayes = BayesianChangePoint::with_prior(0.05, 0.3, 0.0, 10.0, 1.0);
1366        let result = bayes
1367            .detect(&data.view())
1368            .expect("Bayesian detection should succeed");
1369
1370        // Should detect change near index 50
1371        assert!(
1372            !result.locations.is_empty(),
1373            "Bayesian should detect at least one change point"
1374        );
1375    }
1376
1377    // -------------------------------------------------------------------------
1378    // Test 16: Bayesian run length distribution
1379    // -------------------------------------------------------------------------
1380    #[test]
1381    fn test_bayesian_run_length_distribution() {
1382        let mut raw = vec![0.0; 30];
1383        raw.extend(vec![5.0; 30]);
1384        add_noise(&mut raw);
1385        let data = Array1::from_vec(raw);
1386
1387        let bayes = BayesianChangePoint::with_prior(0.1, 0.3, 0.0, 10.0, 1.0);
1388        let (rlp, cp_probs) = bayes
1389            .compute_run_length_distribution(&data.view())
1390            .expect("Run length computation should succeed");
1391
1392        assert_eq!(rlp.nrows(), 61); // n+1
1393        assert_eq!(cp_probs.len(), 61);
1394
1395        // Run length probabilities should sum to approximately 1 at each time step
1396        for t in 1..=60 {
1397            let row_sum: f64 = rlp.row(t).iter().sum();
1398            assert!(
1399                (row_sum - 1.0).abs() < 0.1,
1400                "Row {t} sum = {row_sum} should be near 1.0"
1401            );
1402        }
1403    }
1404
1405    // -------------------------------------------------------------------------
1406    // Test 17: Bayesian hazard function
1407    // -------------------------------------------------------------------------
1408    #[test]
1409    fn test_bayesian_hazard_function() {
1410        let bayes = BayesianChangePoint::new(0.05, 0.5);
1411        // Constant hazard
1412        assert!((bayes.hazard(0) - 0.05).abs() < 1e-10);
1413        assert!((bayes.hazard(100) - 0.05).abs() < 1e-10);
1414        assert!((bayes.hazard(1000) - 0.05).abs() < 1e-10);
1415    }
1416
1417    // -------------------------------------------------------------------------
1418    // Test 18: Cost functions
1419    // -------------------------------------------------------------------------
1420    #[test]
1421    fn test_cost_functions() {
1422        let data = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1423
1424        let l2 = segment_cost(&data.view(), CostFunction::L2).expect("L2 cost should succeed");
1425        assert!(l2 > 0.0, "L2 cost should be positive for non-constant data");
1426
1427        let l1 = segment_cost(&data.view(), CostFunction::L1).expect("L1 cost should succeed");
1428        assert!(l1 > 0.0, "L1 cost should be positive for non-constant data");
1429
1430        let nll = segment_cost(&data.view(), CostFunction::NormalLogLikelihood)
1431            .expect("NLL cost should succeed");
1432        assert!(nll.is_finite(), "NLL cost should be finite");
1433    }
1434
1435    // -------------------------------------------------------------------------
1436    // Test 19: Segment statistics
1437    // -------------------------------------------------------------------------
1438    #[test]
1439    fn test_segment_statistics() {
1440        let data = step_signal(50, 50, 0.0, 10.0);
1441        let change_points = vec![50];
1442        let stats = compute_segment_statistics(&data.view(), &change_points)
1443            .expect("segment stats should succeed");
1444
1445        assert_eq!(stats.len(), 2);
1446        assert!((stats[0].mean - 0.0).abs() < 1e-10);
1447        assert!((stats[1].mean - 10.0).abs() < 1e-10);
1448        assert_eq!(stats[0].count, 50);
1449        assert_eq!(stats[1].count, 50);
1450    }
1451
1452    // -------------------------------------------------------------------------
1453    // Test 20: Change point summary
1454    // -------------------------------------------------------------------------
1455    #[test]
1456    fn test_changepoint_summary() {
1457        let data = step_signal(50, 50, 2.0, 8.0);
1458        let result = ChangePointResult {
1459            locations: vec![50],
1460            costs: vec![10.0],
1461            total_cost: 10.0,
1462        };
1463
1464        let summary =
1465            summarize_changepoints(&data.view(), &result).expect("summarize should succeed");
1466
1467        assert_eq!(summary.n_changepoints, 1);
1468        assert_eq!(summary.segments.len(), 2);
1469        assert!((summary.segments[0].mean - 2.0).abs() < 1e-10);
1470        assert!((summary.segments[1].mean - 8.0).abs() < 1e-10);
1471    }
1472
1473    // -------------------------------------------------------------------------
1474    // Test 21: Penalty functions
1475    // -------------------------------------------------------------------------
1476    #[test]
1477    fn test_penalty_functions() {
1478        let n = 100;
1479        let bic = compute_penalty(PenaltyType::BIC, n, 0.0);
1480        let aic = compute_penalty(PenaltyType::AIC, n, 0.0);
1481        let mbic = compute_penalty(PenaltyType::MBIC, n, 0.0);
1482        let manual = compute_penalty(PenaltyType::Manual, n, 42.0);
1483
1484        assert!((bic - (100.0_f64).ln()).abs() < 1e-10);
1485        assert!((aic - 2.0).abs() < 1e-10);
1486        assert!((mbic - 3.0 * (100.0_f64).ln()).abs() < 1e-10);
1487        assert!((manual - 42.0).abs() < 1e-10);
1488
1489        // BIC < MBIC for n > e^(2/3)
1490        assert!(bic < mbic);
1491    }
1492
1493    // -------------------------------------------------------------------------
1494    // Test 22: Edge case - short series
1495    // -------------------------------------------------------------------------
1496    #[test]
1497    fn test_edge_case_short_series() {
1498        let data = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1499
1500        // PELT with small min_size
1501        let pelt = Pelt::new(10.0, 2);
1502        let result = pelt.detect_mean_change(&data.view());
1503        // Should succeed (5 >= 2*2)
1504        assert!(result.is_ok());
1505
1506        // BinSeg with small min_size
1507        let bs = BinarySegmentation::new(3, 0.5, 2);
1508        let result = bs.detect(&data.view());
1509        assert!(result.is_ok());
1510    }
1511
1512    // -------------------------------------------------------------------------
1513    // Test 23: Edge case - change at boundaries
1514    // -------------------------------------------------------------------------
1515    #[test]
1516    fn test_edge_case_change_at_boundary() {
1517        // Change very early
1518        let mut early = vec![10.0; 5];
1519        early.extend(vec![0.0; 95]);
1520        add_noise(&mut early);
1521        let data = Array1::from_vec(early);
1522
1523        let pelt = Pelt::new(10.0, 3);
1524        let result = pelt
1525            .detect_mean_change(&data.view())
1526            .expect("PELT should succeed");
1527        // Should detect something near the beginning
1528        assert!(
1529            !result.locations.is_empty(),
1530            "Should detect change near beginning"
1531        );
1532
1533        // Change very late
1534        let mut late = vec![0.0; 95];
1535        late.extend(vec![10.0; 5]);
1536        add_noise(&mut late);
1537        let data_late = Array1::from_vec(late);
1538
1539        let result_late = pelt
1540            .detect_mean_change(&data_late.view())
1541            .expect("PELT should succeed on late change");
1542        assert!(
1543            !result_late.locations.is_empty(),
1544            "Should detect change near end"
1545        );
1546    }
1547
1548    // -------------------------------------------------------------------------
1549    // Test 24: L1 cost function (median-based)
1550    // -------------------------------------------------------------------------
1551    #[test]
1552    fn test_l1_cost_function() {
1553        let data = Array1::from_vec(vec![1.0, 1.0, 1.0, 10.0, 1.0]);
1554        let cost = segment_cost(&data.view(), CostFunction::L1).expect("L1 cost should succeed");
1555        // Median is 1.0, sum of |x - 1| = 0+0+0+9+0 = 9
1556        assert!(
1557            (cost - 9.0).abs() < 1e-10,
1558            "L1 cost should be 9.0, got {cost}"
1559        );
1560    }
1561
1562    // -------------------------------------------------------------------------
1563    // Test 25: BinarySegmentation with L1 cost
1564    // -------------------------------------------------------------------------
1565    #[test]
1566    fn test_binseg_l1_cost() {
1567        let mut raw = vec![0.0; 50];
1568        raw.extend(vec![5.0; 50]);
1569        add_noise(&mut raw);
1570        let data = Array1::from_vec(raw);
1571
1572        let bs = BinarySegmentation::with_cost(5, 5.0, 5, CostFunction::L1);
1573        let result = bs.detect(&data.view()).expect("BinSeg L1 should succeed");
1574
1575        assert!(
1576            !result.locations.is_empty(),
1577            "BinSeg with L1 cost should detect change"
1578        );
1579    }
1580
1581    // -------------------------------------------------------------------------
1582    // Test 26: PELT detect with ChangeType enum
1583    // -------------------------------------------------------------------------
1584    #[test]
1585    fn test_pelt_detect_with_change_type() {
1586        let mut raw = vec![0.0; 50];
1587        raw.extend(vec![5.0; 50]);
1588        add_noise(&mut raw);
1589        let data = Array1::from_vec(raw);
1590
1591        let pelt = Pelt::new(10.0, 5);
1592
1593        let mean_result = pelt
1594            .detect(&data.view(), ChangeType::Mean)
1595            .expect("Mean detect should succeed");
1596        let var_result = pelt
1597            .detect(&data.view(), ChangeType::Variance)
1598            .expect("Variance detect should succeed");
1599        let mv_result = pelt
1600            .detect(&data.view(), ChangeType::MeanAndVariance)
1601            .expect("MeanAndVariance detect should succeed");
1602
1603        assert!(mean_result.total_cost.is_finite());
1604        assert!(var_result.total_cost.is_finite());
1605        assert!(mv_result.total_cost.is_finite());
1606    }
1607
1608    // -------------------------------------------------------------------------
1609    // Test 27: Empty data handling
1610    // -------------------------------------------------------------------------
1611    #[test]
1612    fn test_empty_data() {
1613        let data = Array1::<f64>::zeros(0);
1614
1615        let cusum = Cusum::new(0.0, 5.0, 0.5);
1616        let result = cusum
1617            .detect(&data.view())
1618            .expect("CUSUM empty should succeed");
1619        assert!(result.locations.is_empty());
1620
1621        let bayes = BayesianChangePoint::new(0.1, 0.5);
1622        let result = bayes
1623            .detect(&data.view())
1624            .expect("Bayes empty should succeed");
1625        assert!(result.locations.is_empty());
1626    }
1627}
numrs2/new_modules/timeseries/changepoint.rs

numrs2/new_modules/timeseries/
changepoint.rs