dsfb_database/baselines/
bocpd.rs

1//! BOCPD — Bayesian Online Change-Point Detection.
2//!
3//! Reference: Adams & MacKay, *Bayesian Online Changepoint Detection*,
4//! arXiv:0710.3742 (2007).
5//!
6//! Core idea: at each time `t` maintain a posterior `P(r_t | x_{1..t})`
7//! over the run length `r_t` (the number of samples since the last
8//! change-point). Updates factorise into
9//!
10//! ```text
11//!   P(r_t = r_{t-1}+1 | x_{1..t}) ∝ P(r_{t-1} | x_{1..t-1}) ·
12//!                                   UPM(x_t | x_{1..t-1}, r_{t-1}) ·
13//!                                   (1 − H(r_{t-1}+1))
14//!
15//!   P(r_t = 0 | x_{1..t}) ∝ Σ P(r_{t-1} | x_{1..t-1}) ·
16//!                              UPM(x_t | x_{1..t-1}, r_{t-1}) ·
17//!                              H(r_{t-1}+1)
18//! ```
19//!
20//! with a constant hazard `H(r) = 1/λ` (geometric run-length prior) and
21//! a Student-t UPM derived from a Normal-Gamma conjugate prior on
22//! `(μ, σ²)`.
23//!
24//! We report a change-point at time `t` when the *MAP run length*
25//! (`argmax_r P(r_t | x_{1..t})`) drops by more than one step from the
26//! previous time. This is the standard point-decision rule given in
27//! §2.1 of Adams & MacKay (and is what the cited Figure 3 plots): a
28//! MAP drop means the posterior has collapsed back toward `r = 0`,
29//! signalling a regime change. Under a constant hazard, a fixed
30//! `P(r_t = 0) > τ` rule is pinned near the hazard rate even on
31//! obvious shifts — the MAP rule avoids that degeneracy.
32
33use super::ChangePointDetector;
34use std::f64::consts::PI;
35
36pub struct Bocpd {
37    /// Expected run length. `hazard = 1 / expected_run_length`. 100 is
38    /// a common default in the BOCPD literature for traces of a few
39    /// hundred samples; we keep it here so the crate's bake-off lines up
40    /// with published benchmarks.
41    pub expected_run_length: f64,
42    /// Minimum drop in MAP run length that triggers a change-point.
43    /// 1 would fire on every slight argmax jitter; 2 damps that without
44    /// missing real regime changes.
45    pub map_drop_min: usize,
46    /// Normal-Gamma prior hyper-parameters. Weakly informative defaults
47    /// match Adams & MacKay's Figure 3 setup.
48    pub mu0: f64,
49    pub kappa0: f64,
50    pub alpha0: f64,
51    pub beta0: f64,
52}
53
54impl Default for Bocpd {
55    fn default() -> Self {
56        Self {
57            expected_run_length: 100.0,
58            map_drop_min: 2,
59            mu0: 0.0,
60            kappa0: 1.0,
61            alpha0: 1.0,
62            beta0: 1.0,
63        }
64    }
65}
66
67impl ChangePointDetector for Bocpd {
68    fn name(&self) -> &'static str {
69        "bocpd"
70    }
71
72    fn detect(&self, series: &[(f64, f64)]) -> Vec<f64> {
73        if series.len() < 2 {
74            return Vec::new();
75        }
76        let hazard = 1.0 / self.expected_run_length.max(1.0);
77
78        // Per-run-length sufficient statistics. mu[i], kappa[i], alpha[i],
79        // beta[i] are the posterior Normal-Gamma parameters for the run
80        // of length i.
81        let mut mu = vec![self.mu0];
82        let mut kappa = vec![self.kappa0];
83        let mut alpha = vec![self.alpha0];
84        let mut beta = vec![self.beta0];
85        // Run-length posterior; starts with all mass at r=0.
86        let mut run_posterior: Vec<f64> = vec![1.0];
87
88        let mut cps = Vec::new();
89        let mut prev_map: Option<usize> = None;
90        for (t, &(ts, x)) in series.iter().enumerate() {
91            // Predictive Student-t density at each extant run length.
92            let mut pred = Vec::with_capacity(run_posterior.len());
93            for i in 0..run_posterior.len() {
94                pred.push(student_t_pdf(
95                    x,
96                    mu[i],
97                    beta[i] * (kappa[i] + 1.0) / (alpha[i] * kappa[i]),
98                    2.0 * alpha[i],
99                ));
100            }
101
102            // Growth probabilities (no change).
103            let mut new_post = vec![0.0; run_posterior.len() + 1];
104            for i in 0..run_posterior.len() {
105                new_post[i + 1] = run_posterior[i] * pred[i] * (1.0 - hazard);
106            }
107            // Change probability (collapse all mass to r=0).
108            new_post[0] = run_posterior
109                .iter()
110                .zip(pred.iter())
111                .map(|(r, p)| r * p * hazard)
112                .sum();
113
114            // Normalise — protects against numerical underflow over
115            // long series.
116            let z: f64 = new_post.iter().sum();
117            if z > 0.0 {
118                for p in &mut new_post {
119                    *p /= z;
120                }
121            } else {
122                new_post[0] = 1.0;
123            }
124
125            // Update sufficient statistics: extend every existing run by
126            // 1, and prepend a fresh prior for the r=0 branch.
127            let mut new_mu = vec![self.mu0];
128            let mut new_kappa = vec![self.kappa0];
129            let mut new_alpha = vec![self.alpha0];
130            let mut new_beta = vec![self.beta0];
131            for i in 0..run_posterior.len() {
132                let k1 = kappa[i] + 1.0;
133                new_mu.push((kappa[i] * mu[i] + x) / k1);
134                new_kappa.push(k1);
135                new_alpha.push(alpha[i] + 0.5);
136                let diff = x - mu[i];
137                new_beta.push(beta[i] + kappa[i] * diff * diff / (2.0 * k1));
138            }
139
140            mu = new_mu;
141            kappa = new_kappa;
142            alpha = new_alpha;
143            beta = new_beta;
144            run_posterior = new_post;
145
146            // Decision. argmax run length is the MAP; a drop ≥
147            // `map_drop_min` between consecutive steps flags a change.
148            // Skip t=0 because MAP is trivially 0 at the boundary.
149            let (map, _) = run_posterior
150                .iter()
151                .enumerate()
152                .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
153                .map(|(i, v)| (i, *v))
154                .unwrap_or((0, 0.0));
155            if t > 0 {
156                if let Some(pm) = prev_map {
157                    if pm >= self.map_drop_min && map + self.map_drop_min <= pm {
158                        cps.push(ts);
159                    }
160                }
161            }
162            prev_map = Some(map);
163
164            // Truncation: beyond a horizon the tail contributes negligible
165            // posterior mass. 200 samples is standard in the literature
166            // and keeps the inner loop O(min(t, 200)) per step.
167            const MAX_RUN: usize = 200;
168            if run_posterior.len() > MAX_RUN {
169                run_posterior.truncate(MAX_RUN);
170                mu.truncate(MAX_RUN);
171                kappa.truncate(MAX_RUN);
172                alpha.truncate(MAX_RUN);
173                beta.truncate(MAX_RUN);
174                let z: f64 = run_posterior.iter().sum();
175                if z > 0.0 {
176                    for p in &mut run_posterior {
177                        *p /= z;
178                    }
179                }
180            }
181        }
182        cps
183    }
184}
185
186/// Student-t PDF at `x` with location `mu`, scale² `s2`, and `nu` df,
187/// computed in log-space to avoid overflow under the tight posteriors
188/// BOCPD develops over long runs of similar observations.
189fn student_t_pdf(x: f64, mu: f64, s2: f64, nu: f64) -> f64 {
190    let s = s2.max(f64::MIN_POSITIVE).sqrt();
191    let z = (x - mu) / s;
192    let log_num = ln_gamma((nu + 1.0) * 0.5) - ln_gamma(nu * 0.5);
193    let log_den = 0.5 * (nu * PI).ln() + ((nu + 1.0) * 0.5) * (1.0 + z * z / nu).ln();
194    (log_num - log_den).exp() / s
195}
196
197/// Lanczos `ln Γ(x)` approximation, g = 7, n = 9. Standard coefficients
198/// (Numerical Recipes 3e §6.1). Accurate to ~15 digits for x > 0.5, and
199/// we use the reflection formula below for the small-x tail. BOCPD's
200/// updates only ever evaluate at α-type values that climb from the
201/// prior's α₀ upward, so we stay in the accurate regime in practice.
202fn ln_gamma(x: f64) -> f64 {
203    const P: [f64; 9] = [
204        0.999_999_999_999_809_9,
205        676.520_368_121_885,
206        -1_259.139_216_722_403,
207        771.323_428_777_653_1,
208        -176.615_029_162_140_6,
209        12.507_343_278_686_905,
210        -0.138_571_095_265_720_1,
211        9.984_369_578_019_572e-6,
212        1.505_632_735_149_311_7e-7,
213    ];
214    const G: f64 = 7.0;
215    if x < 0.5 {
216        // Reflection: Γ(x)Γ(1−x) = π / sin(πx).
217        (PI / (PI * x).sin()).ln() - ln_gamma(1.0 - x)
218    } else {
219        let x = x - 1.0;
220        let mut a = P[0];
221        for (i, &p) in P.iter().enumerate().skip(1) {
222            a += p / (x + i as f64);
223        }
224        let t = x + G + 0.5;
225        0.5 * (2.0 * PI).ln() + (x + 0.5) * t.ln() - t + a.ln()
226    }
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232
233    #[test]
234    fn flags_mean_shift() {
235        let mut series = Vec::new();
236        for i in 0..80 {
237            series.push((i as f64, 0.0));
238        }
239        for i in 80..160 {
240            series.push((i as f64, 5.0));
241        }
242        let cps = Bocpd::default().detect(&series);
243        assert!(!cps.is_empty(), "BOCPD should flag a 5σ shift");
244    }
245
246    #[test]
247    fn quiet_on_constant() {
248        let series: Vec<(f64, f64)> = (0..100).map(|i| (i as f64, 0.0)).collect();
249        let cps = Bocpd::default().detect(&series);
250        // A completely constant series may still trigger at t=1 due to
251        // the prior being updated, but should not emit a deluge.
252        assert!(
253            cps.len() <= 2,
254            "BOCPD should be quiet on constants, got {cps:?}"
255        );
256    }
257}
dsfb_database/baselines/bocpd.rs

dsfb_database/baselines/
bocpd.rs