simplebench_runtime/changepoint.rs
1//! Bayesian Online Change Point Detection
2//!
3//! Based on Adams & MacKay (2007) "Bayesian Online Changepoint Detection"
4//! Simplified implementation for detecting performance regressions in benchmark data.
5
6use crate::statistics::mean;
7
8/// Bayesian Online Change Point Detection core algorithm
9pub struct BayesianCPD {
10 /// Prior probability of a change point occurring (hazard rate)
11 hazard_rate: f64,
12}
13
14impl BayesianCPD {
15 /// Create a new Bayesian CPD detector
16 ///
17 /// # Arguments
18 /// * `hazard_rate` - Prior probability of change point (e.g., 0.1 = expect change every 10 runs)
19 pub fn new(hazard_rate: f64) -> Self {
20 Self { hazard_rate }
21 }
22
23 /// Update with a new observation and return change point probability
24 ///
25 /// # Arguments
26 /// * `value` - New observation (e.g., current benchmark mean)
27 /// * `historical` - Historical observations (e.g., previous benchmark means)
28 ///
29 /// # Returns
30 /// Probability that a change point occurred (0.0 to 1.0)
31 pub fn update(&mut self, value: f64, historical: &[f64]) -> f64 {
32 if historical.is_empty() {
33 return 0.0;
34 }
35
36 // Calculate predictive probability using all historical data
37 let likelihood = self.student_t_likelihood(value, historical);
38
39 // For simplified implementation: compare new value against historical distribution
40 // High likelihood = fits the pattern (low change probability)
41 // Low likelihood = doesn't fit (high change probability)
42
43 // Convert likelihood to change probability (invert the likelihood)
44 // If value is very different from historical mean, likelihood will be low
45 let unlikelihood = 1.0 - likelihood.min(1.0);
46
47 // Combine data evidence with prior (hazard rate)
48 // Bayesian update: P(CP|data) ∝ P(data|CP) * P(CP)
49 // Where P(CP) = hazard_rate
50 // And P(data|CP) = 1 - likelihood (assuming uniform prior under change)
51
52 // Weight the evidence by the hazard rate as a prior
53 let prior_weight = self.hazard_rate;
54 let evidence_weight = unlikelihood;
55
56 // Combine: higher hazard rate gives more weight to the prior expectation of change
57 let change_prob =
58 (evidence_weight * 0.7) + (prior_weight * 0.3) + (evidence_weight * prior_weight * 0.5);
59
60 change_prob.min(1.0)
61 }
62
63 /// Calculate Student's t-distribution likelihood
64 ///
65 /// This is used as the predictive distribution for new observations.
66 /// Student's t is more robust to outliers than normal distribution.
67 fn student_t_likelihood(&self, value: f64, historical: &[f64]) -> f64 {
68 if historical.is_empty() {
69 return 0.5;
70 }
71
72 let n = historical.len() as f64;
73 let hist_mean = mean(historical);
74 let variance = if n > 1.0 {
75 historical
76 .iter()
77 .map(|&x| (x - hist_mean).powi(2))
78 .sum::<f64>()
79 / n
80 } else {
81 1.0 // Default variance for single observation
82 };
83
84 if variance < 1e-10 {
85 // Very low variance, use normal approximation
86 let stddev = variance.sqrt().max(1e-5);
87 let z = ((value - hist_mean) / stddev).abs();
88 return (-0.5 * z * z).exp();
89 }
90
91 // Student's t-distribution with n-1 degrees of freedom
92 let df = (n - 1.0).max(1.0);
93 let t = (value - hist_mean) / variance.sqrt();
94 let t_squared = t * t;
95
96 // Simplified Student's t PDF (good enough for our purposes)
97 (1.0 + t_squared / df).powf(-(df + 1.0) / 2.0)
98 }
99}
100
101/// Simplified API: calculate change point probability for a new value
102///
103/// This is a convenience function that creates a new BayesianCPD instance
104/// and calculates the change point probability in a single call.
105///
106/// # Arguments
107/// * `new_value` - Current observation (e.g., current benchmark mean)
108/// * `historical` - Historical observations (e.g., previous benchmark means)
109/// * `hazard_rate` - Prior probability of change point
110///
111/// # Returns
112/// Probability that a change point occurred (0.0 to 1.0)
113pub fn bayesian_change_point_probability(
114 new_value: f64,
115 historical: &[f64],
116 hazard_rate: f64,
117) -> f64 {
118 let mut cpd = BayesianCPD::new(hazard_rate);
119 cpd.update(new_value, historical)
120}
121
122#[cfg(test)]
123mod tests {
124 use super::*;
125
126 #[test]
127 fn test_stable_data_low_change_probability() {
128 // Stable data should have low change probability
129 let historical = vec![1.0, 1.01, 0.99, 1.0, 1.02, 0.98, 1.0, 1.01];
130 let new_value = 1.0;
131 let prob = bayesian_change_point_probability(new_value, &historical, 0.1);
132
133 // Should be low probability since new value fits the pattern
134 assert!(prob < 0.5, "Expected low change probability, got {}", prob);
135 }
136
137 #[test]
138 fn test_clear_change_point() {
139 // Historical data around 1.0, then sudden jump to 2.0
140 let historical = vec![1.0, 1.01, 0.99, 1.0, 1.02, 0.98, 1.0, 1.01];
141 let new_value = 2.0;
142 let prob = bayesian_change_point_probability(new_value, &historical, 0.1);
143
144 // Should be high probability since new value is very different
145 assert!(prob > 0.3, "Expected high change probability, got {}", prob);
146 }
147
148 #[test]
149 fn test_empty_historical() {
150 let historical: Vec<f64> = vec![];
151 let new_value = 1.0;
152 let prob = bayesian_change_point_probability(new_value, &historical, 0.1);
153
154 // Should return 0.0 for empty history
155 assert_eq!(prob, 0.0);
156 }
157
158 #[test]
159 fn test_gradual_drift() {
160 // Gradual increase should show lower change probability than sudden jump
161 let historical = vec![1.0, 1.05, 1.1, 1.15, 1.2, 1.25, 1.3];
162 let new_value = 1.35;
163 let prob_gradual = bayesian_change_point_probability(new_value, &historical, 0.1);
164
165 let historical_stable = vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0];
166 let new_value_jump = 1.35;
167 let prob_sudden =
168 bayesian_change_point_probability(new_value_jump, &historical_stable, 0.1);
169
170 // Sudden jump should have higher change probability than gradual drift
171 assert!(
172 prob_sudden > prob_gradual,
173 "Sudden jump ({}) should have higher change probability than gradual drift ({})",
174 prob_sudden,
175 prob_gradual
176 );
177 }
178
179 #[test]
180 fn test_higher_hazard_rate_increases_change_probability() {
181 let historical = vec![1.0, 1.0, 1.0, 1.0, 1.0];
182 let new_value = 1.2;
183
184 let prob_low_hazard = bayesian_change_point_probability(new_value, &historical, 0.05);
185 let prob_high_hazard = bayesian_change_point_probability(new_value, &historical, 0.3);
186
187 // Higher hazard rate should increase change point probability
188 assert!(prob_high_hazard > prob_low_hazard,
189 "Higher hazard rate ({}) should produce higher change probability than lower hazard rate ({})",
190 prob_high_hazard, prob_low_hazard);
191 }
192}