dsfb_database/baselines/mod.rs
1//! Published-baseline change-point detectors for the paper's §7 bake-off.
2//!
3//! The bake-off answers a reviewer's mandatory question: *on the same
4//! residual streams that dsfb-database consumes, how do standard
5//! change-point baselines score against the same ground-truth windows?*
6//!
7//! We implement three classics from the change-point literature:
8//!
9//! * [`adwin`] — ADWIN (Bifet & Gavaldà, 2007, *"Learning from
10//! time-changing data with adaptive windowing"*). Online, streaming,
11//! no distributional assumption.
12//! * [`bocpd`] — Bayesian Online Change-Point Detection (Adams & MacKay,
13//! 2007). Online, Gaussian Normal-Gamma conjugate predictive.
14//! * [`pelt`] — Pruned Exact Linear Time (Killick, Fearnhead & Eckley,
15//! 2012). Offline, optimal under an L2 cost for changes in mean.
16//!
17//! All three are faithful reference implementations rather than thin
18//! calls to external crates — we want every bake-off number to be
19//! reproducible from this file alone, with no hidden dependency drift.
20//!
21//! Each detector returns a list of *change-point timestamps*. The
22//! [`run_detector`] helper wraps those into [`crate::grammar::Episode`]s
23//! on the matching motif so that [`crate::metrics::evaluate`] scores the
24//! baseline with the exact same TP / FP / FN rules that grade the
25//! dsfb-database motif grammar — apples-to-apples.
26//!
27//! ### Charitable conversion choice
28//!
29//! Change-point detectors report points; motif episodes are intervals.
30//! We convert a change-point at time `t` into an episode `[t, t + dwell]`
31//! where `dwell` is the motif's `min_dwell_seconds`. This is deliberately
32//! generous to the baselines — any ground-truth window within
33//! `min_dwell` of the reported change-point counts as a TP, which
34//! maximises the baselines' recall. An adversarial reviewer can
35//! re-derive stricter numbers from the emitted CSVs by shrinking the
36//! dwell; the raw change-point times are in the CSV alongside the
37//! wrapped episodes.
38
39use crate::grammar::{Episode, MotifClass, MotifParams};
40use crate::residual::{ResidualClass, ResidualStream};
41use std::collections::BTreeMap;
42
43pub mod adwin;
44pub mod bocpd;
45pub mod pelt;
46
47/// A detector operates on an ordered univariate `(t, value)` series and
48/// reports change-point timestamps. Implementations must be deterministic
49/// for a given input — the whole bake-off is pinned to byte-identical
50/// CSVs, so any randomness kills the replay guarantee.
51pub trait ChangePointDetector {
52 /// Short kebab-case label embedded in CSV rows.
53 fn name(&self) -> &'static str;
54 fn detect(&self, series: &[(f64, f64)]) -> Vec<f64>;
55}
56
57/// Run a detector against one motif of a residual stream and emit
58/// [`Episode`]s that the standard metrics path can score.
59///
60/// The series is built per-channel: for each distinct channel string
61/// under the motif's residual class we invoke `detector.detect`
62/// independently, then union the resulting change-points. This mirrors
63/// the channel granularity the dsfb motif state machine gets for free
64/// — without it the baseline would be unfairly starved of the channel
65/// signal that the motif grammar consumes.
66pub fn run_detector(
67 detector: &dyn ChangePointDetector,
68 motif: MotifClass,
69 stream: &ResidualStream,
70) -> Vec<Episode> {
71 let class: ResidualClass = motif.residual_class();
72 let mut by_channel: BTreeMap<String, Vec<(f64, f64)>> = BTreeMap::new();
73 for s in stream.iter_class(class) {
74 let ch = s.channel.clone().unwrap_or_default();
75 by_channel.entry(ch).or_default().push((s.t, s.value));
76 }
77 let dwell = MotifParams::default_for(motif).min_dwell_seconds;
78 let mut eps = Vec::new();
79 for (channel, series) in by_channel {
80 let cps = detector.detect(&series);
81 for t in cps {
82 eps.push(Episode {
83 motif,
84 channel: if channel.is_empty() {
85 None
86 } else {
87 Some(channel.clone())
88 },
89 t_start: t,
90 t_end: t + dwell,
91 peak: 0.0,
92 ema_at_boundary: 0.0,
93 trust_sum: 1.0,
94 });
95 }
96 }
97 eps.sort_by(|a, b| {
98 a.t_start
99 .partial_cmp(&b.t_start)
100 .unwrap_or(std::cmp::Ordering::Equal)
101 });
102 eps
103}