dsfb_database/baselines/
pelt.rs

1//! PELT — Pruned Exact Linear Time change-point detection.
2//!
3//! Reference: Killick, Fearnhead & Eckley, *Optimal Detection of
4//! Changepoints With a Linear Computational Cost*, JASA 107 (500), 2012.
5//!
6//! PELT solves the optimal-partition DP
7//!
8//! ```text
9//!   F(t) = min_{τ < t} [ F(τ) + C(y_{τ+1..t}) + β ]
10//! ```
11//!
12//! where `C` is a segment cost (here L2 on the mean, i.e.
13//! `Σ (yᵢ − ȳ)²`) and `β` is the per-change-point penalty. The
14//! pruning step removes candidates `τ` for which
15//! `F(τ) + C(τ+1:t) + K ≥ F(t)` — those can never be optimal later —
16//! and gives the algorithm its linear expected-time behaviour under a
17//! Poisson-rate change-point assumption.
18//!
19//! `K` is the cost-reduction constant; for the L2 cost `K = 0` is
20//! valid (see Killick et al. §2.2).
21
22use super::ChangePointDetector;
23
24pub struct Pelt {
25    /// BIC penalty coefficient. The BIC default is `β = k · ln(n) · σ²`
26    /// with `k = 1`; we compute `σ²` from the input series' variance so
27    /// the penalty adapts to scale.
28    pub penalty_k: f64,
29    /// Minimum segment length. Killick recommends ≥ 2; 5 damps noise
30    /// for small series.
31    pub min_seg_len: usize,
32}
33
34impl Default for Pelt {
35    fn default() -> Self {
36        Self {
37            penalty_k: 2.0,
38            min_seg_len: 5,
39        }
40    }
41}
42
43impl ChangePointDetector for Pelt {
44    fn name(&self) -> &'static str {
45        "pelt"
46    }
47
48    fn detect(&self, series: &[(f64, f64)]) -> Vec<f64> {
49        let n = series.len();
50        if n < 2 * self.min_seg_len {
51            return Vec::new();
52        }
53
54        // Prefix sums for O(1) segment cost:
55        // cost(i..=j) = Σ y² − (Σ y)² / (j - i + 1)
56        let mut prefix_sum = vec![0.0f64; n + 1];
57        let mut prefix_sq = vec![0.0f64; n + 1];
58        for i in 0..n {
59            prefix_sum[i + 1] = prefix_sum[i] + series[i].1;
60            prefix_sq[i + 1] = prefix_sq[i] + series[i].1 * series[i].1;
61        }
62        let mean = prefix_sum[n] / n as f64;
63        let var = (prefix_sq[n] / n as f64 - mean * mean).max(f64::MIN_POSITIVE);
64        let beta = self.penalty_k * (n as f64).ln() * var;
65
66        let cost = |i: usize, j: usize| -> f64 {
67            debug_assert!(i < j, "nonempty segment");
68            let k = (j - i) as f64;
69            let s = prefix_sum[j] - prefix_sum[i];
70            let sq = prefix_sq[j] - prefix_sq[i];
71            (sq - s * s / k).max(0.0)
72        };
73
74        // DP tables.
75        let mut f = vec![f64::INFINITY; n + 1];
76        let mut prev = vec![0usize; n + 1];
77        f[0] = -beta;
78        let mut candidates: Vec<usize> = vec![0];
79
80        for t in self.min_seg_len..=n {
81            let (mut best_cost, mut best_tau) = (f64::INFINITY, 0usize);
82            let mut next_cands: Vec<usize> = Vec::with_capacity(candidates.len() + 1);
83            for &tau in &candidates {
84                if t - tau < self.min_seg_len {
85                    next_cands.push(tau);
86                    continue;
87                }
88                let seg = cost(tau, t);
89                let c = f[tau] + seg + beta;
90                if c < best_cost {
91                    best_cost = c;
92                    best_tau = tau;
93                }
94                // Pruning: if f[τ] + cost(τ..t) ≥ f[t], τ cannot be the
95                // optimal last change-point at any future time either
96                // (Killick et al. §2.2, with K = 0 for L2).
97                if f[tau] + seg < f[t].min(best_cost) {
98                    next_cands.push(tau);
99                }
100            }
101            f[t] = best_cost;
102            prev[t] = best_tau;
103            // Always keep `t` as a future candidate (segment boundary).
104            next_cands.push(t);
105            candidates = next_cands;
106        }
107
108        // Backtrack.
109        let mut cuts: Vec<usize> = Vec::new();
110        let mut t = n;
111        while t > 0 {
112            let p = prev[t];
113            if p > 0 {
114                cuts.push(p);
115            }
116            if p == t {
117                break;
118            }
119            t = p;
120        }
121        cuts.sort_unstable();
122        cuts.dedup();
123        cuts.into_iter().map(|i| series[i].0).collect()
124    }
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130
131    #[test]
132    fn flags_one_shift() {
133        let mut series = Vec::new();
134        for i in 0..60 {
135            series.push((i as f64, 0.0));
136        }
137        for i in 60..120 {
138            series.push((i as f64, 3.0));
139        }
140        let cps = Pelt::default().detect(&series);
141        assert!(!cps.is_empty(), "PELT should flag the obvious shift");
142        assert!(cps.iter().any(|&t| (t - 60.0).abs() < 10.0));
143    }
144
145    #[test]
146    fn quiet_on_flat() {
147        let series: Vec<(f64, f64)> = (0..200).map(|i| (i as f64, 0.0)).collect();
148        let cps = Pelt::default().detect(&series);
149        assert!(cps.is_empty(), "PELT should not invent changes");
150    }
151}
dsfb_database/baselines/pelt.rs

dsfb_database/baselines/
pelt.rs