dsfb_database/baselines/bocpd.rs
1//! BOCPD — Bayesian Online Change-Point Detection.
2//!
3//! Reference: Adams & MacKay, *Bayesian Online Changepoint Detection*,
4//! arXiv:0710.3742 (2007).
5//!
6//! Core idea: at each time `t` maintain a posterior `P(r_t | x_{1..t})`
7//! over the run length `r_t` (the number of samples since the last
8//! change-point). Updates factorise into
9//!
10//! ```text
11//! P(r_t = r_{t-1}+1 | x_{1..t}) ∝ P(r_{t-1} | x_{1..t-1}) ·
12//! UPM(x_t | x_{1..t-1}, r_{t-1}) ·
13//! (1 − H(r_{t-1}+1))
14//!
15//! P(r_t = 0 | x_{1..t}) ∝ Σ P(r_{t-1} | x_{1..t-1}) ·
16//! UPM(x_t | x_{1..t-1}, r_{t-1}) ·
17//! H(r_{t-1}+1)
18//! ```
19//!
20//! with a constant hazard `H(r) = 1/λ` (geometric run-length prior) and
21//! a Student-t UPM derived from a Normal-Gamma conjugate prior on
22//! `(μ, σ²)`.
23//!
24//! We report a change-point at time `t` when the *MAP run length*
25//! (`argmax_r P(r_t | x_{1..t})`) drops by more than one step from the
26//! previous time. This is the standard point-decision rule given in
27//! §2.1 of Adams & MacKay (and is what the cited Figure 3 plots): a
28//! MAP drop means the posterior has collapsed back toward `r = 0`,
29//! signalling a regime change. Under a constant hazard, a fixed
30//! `P(r_t = 0) > τ` rule is pinned near the hazard rate even on
31//! obvious shifts — the MAP rule avoids that degeneracy.
32
33use super::ChangePointDetector;
34use std::f64::consts::PI;
35
36pub struct Bocpd {
37 /// Expected run length. `hazard = 1 / expected_run_length`. 100 is
38 /// a common default in the BOCPD literature for traces of a few
39 /// hundred samples; we keep it here so the crate's bake-off lines up
40 /// with published benchmarks.
41 pub expected_run_length: f64,
42 /// Minimum drop in MAP run length that triggers a change-point.
43 /// 1 would fire on every slight argmax jitter; 2 damps that without
44 /// missing real regime changes.
45 pub map_drop_min: usize,
46 /// Normal-Gamma prior hyper-parameters. Weakly informative defaults
47 /// match Adams & MacKay's Figure 3 setup.
48 pub mu0: f64,
49 pub kappa0: f64,
50 pub alpha0: f64,
51 pub beta0: f64,
52}
53
54impl Default for Bocpd {
55 fn default() -> Self {
56 Self {
57 expected_run_length: 100.0,
58 map_drop_min: 2,
59 mu0: 0.0,
60 kappa0: 1.0,
61 alpha0: 1.0,
62 beta0: 1.0,
63 }
64 }
65}
66
67impl ChangePointDetector for Bocpd {
68 fn name(&self) -> &'static str {
69 "bocpd"
70 }
71
72 fn detect(&self, series: &[(f64, f64)]) -> Vec<f64> {
73 if series.len() < 2 {
74 return Vec::new();
75 }
76 let hazard = 1.0 / self.expected_run_length.max(1.0);
77
78 // Per-run-length sufficient statistics. mu[i], kappa[i], alpha[i],
79 // beta[i] are the posterior Normal-Gamma parameters for the run
80 // of length i.
81 let mut mu = vec![self.mu0];
82 let mut kappa = vec![self.kappa0];
83 let mut alpha = vec![self.alpha0];
84 let mut beta = vec![self.beta0];
85 // Run-length posterior; starts with all mass at r=0.
86 let mut run_posterior: Vec<f64> = vec![1.0];
87
88 let mut cps = Vec::new();
89 let mut prev_map: Option<usize> = None;
90 for (t, &(ts, x)) in series.iter().enumerate() {
91 // Predictive Student-t density at each extant run length.
92 let mut pred = Vec::with_capacity(run_posterior.len());
93 for i in 0..run_posterior.len() {
94 pred.push(student_t_pdf(
95 x,
96 mu[i],
97 beta[i] * (kappa[i] + 1.0) / (alpha[i] * kappa[i]),
98 2.0 * alpha[i],
99 ));
100 }
101
102 // Growth probabilities (no change).
103 let mut new_post = vec![0.0; run_posterior.len() + 1];
104 for i in 0..run_posterior.len() {
105 new_post[i + 1] = run_posterior[i] * pred[i] * (1.0 - hazard);
106 }
107 // Change probability (collapse all mass to r=0).
108 new_post[0] = run_posterior
109 .iter()
110 .zip(pred.iter())
111 .map(|(r, p)| r * p * hazard)
112 .sum();
113
114 // Normalise — protects against numerical underflow over
115 // long series.
116 let z: f64 = new_post.iter().sum();
117 if z > 0.0 {
118 for p in &mut new_post {
119 *p /= z;
120 }
121 } else {
122 new_post[0] = 1.0;
123 }
124
125 // Update sufficient statistics: extend every existing run by
126 // 1, and prepend a fresh prior for the r=0 branch.
127 let mut new_mu = vec![self.mu0];
128 let mut new_kappa = vec![self.kappa0];
129 let mut new_alpha = vec![self.alpha0];
130 let mut new_beta = vec![self.beta0];
131 for i in 0..run_posterior.len() {
132 let k1 = kappa[i] + 1.0;
133 new_mu.push((kappa[i] * mu[i] + x) / k1);
134 new_kappa.push(k1);
135 new_alpha.push(alpha[i] + 0.5);
136 let diff = x - mu[i];
137 new_beta.push(beta[i] + kappa[i] * diff * diff / (2.0 * k1));
138 }
139
140 mu = new_mu;
141 kappa = new_kappa;
142 alpha = new_alpha;
143 beta = new_beta;
144 run_posterior = new_post;
145
146 // Decision. argmax run length is the MAP; a drop ≥
147 // `map_drop_min` between consecutive steps flags a change.
148 // Skip t=0 because MAP is trivially 0 at the boundary.
149 let (map, _) = run_posterior
150 .iter()
151 .enumerate()
152 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
153 .map(|(i, v)| (i, *v))
154 .unwrap_or((0, 0.0));
155 if t > 0 {
156 if let Some(pm) = prev_map {
157 if pm >= self.map_drop_min && map + self.map_drop_min <= pm {
158 cps.push(ts);
159 }
160 }
161 }
162 prev_map = Some(map);
163
164 // Truncation: beyond a horizon the tail contributes negligible
165 // posterior mass. 200 samples is standard in the literature
166 // and keeps the inner loop O(min(t, 200)) per step.
167 const MAX_RUN: usize = 200;
168 if run_posterior.len() > MAX_RUN {
169 run_posterior.truncate(MAX_RUN);
170 mu.truncate(MAX_RUN);
171 kappa.truncate(MAX_RUN);
172 alpha.truncate(MAX_RUN);
173 beta.truncate(MAX_RUN);
174 let z: f64 = run_posterior.iter().sum();
175 if z > 0.0 {
176 for p in &mut run_posterior {
177 *p /= z;
178 }
179 }
180 }
181 }
182 cps
183 }
184}
185
186/// Student-t PDF at `x` with location `mu`, scale² `s2`, and `nu` df,
187/// computed in log-space to avoid overflow under the tight posteriors
188/// BOCPD develops over long runs of similar observations.
189fn student_t_pdf(x: f64, mu: f64, s2: f64, nu: f64) -> f64 {
190 let s = s2.max(f64::MIN_POSITIVE).sqrt();
191 let z = (x - mu) / s;
192 let log_num = ln_gamma((nu + 1.0) * 0.5) - ln_gamma(nu * 0.5);
193 let log_den = 0.5 * (nu * PI).ln() + ((nu + 1.0) * 0.5) * (1.0 + z * z / nu).ln();
194 (log_num - log_den).exp() / s
195}
196
197/// Lanczos `ln Γ(x)` approximation, g = 7, n = 9. Standard coefficients
198/// (Numerical Recipes 3e §6.1). Accurate to ~15 digits for x > 0.5, and
199/// we use the reflection formula below for the small-x tail. BOCPD's
200/// updates only ever evaluate at α-type values that climb from the
201/// prior's α₀ upward, so we stay in the accurate regime in practice.
202fn ln_gamma(x: f64) -> f64 {
203 const P: [f64; 9] = [
204 0.999_999_999_999_809_9,
205 676.520_368_121_885,
206 -1_259.139_216_722_403,
207 771.323_428_777_653_1,
208 -176.615_029_162_140_6,
209 12.507_343_278_686_905,
210 -0.138_571_095_265_720_1,
211 9.984_369_578_019_572e-6,
212 1.505_632_735_149_311_7e-7,
213 ];
214 const G: f64 = 7.0;
215 if x < 0.5 {
216 // Reflection: Γ(x)Γ(1−x) = π / sin(πx).
217 (PI / (PI * x).sin()).ln() - ln_gamma(1.0 - x)
218 } else {
219 let x = x - 1.0;
220 let mut a = P[0];
221 for (i, &p) in P.iter().enumerate().skip(1) {
222 a += p / (x + i as f64);
223 }
224 let t = x + G + 0.5;
225 0.5 * (2.0 * PI).ln() + (x + 0.5) * t.ln() - t + a.ln()
226 }
227}
228
229#[cfg(test)]
230mod tests {
231 use super::*;
232
233 #[test]
234 fn flags_mean_shift() {
235 let mut series = Vec::new();
236 for i in 0..80 {
237 series.push((i as f64, 0.0));
238 }
239 for i in 80..160 {
240 series.push((i as f64, 5.0));
241 }
242 let cps = Bocpd::default().detect(&series);
243 assert!(!cps.is_empty(), "BOCPD should flag a 5σ shift");
244 }
245
246 #[test]
247 fn quiet_on_constant() {
248 let series: Vec<(f64, f64)> = (0..100).map(|i| (i as f64, 0.0)).collect();
249 let cps = Bocpd::default().detect(&series);
250 // A completely constant series may still trigger at t=1 due to
251 // the prior being updated, but should not emit a deluge.
252 assert!(
253 cps.len() <= 2,
254 "BOCPD should be quiet on constants, got {cps:?}"
255 );
256 }
257}