simplebench_runtime/
changepoint.rs

1//! Bayesian Online Change Point Detection
2//!
3//! Based on Adams & MacKay (2007) "Bayesian Online Changepoint Detection"
4//! Simplified implementation for detecting performance regressions in benchmark data.
5
6use crate::statistics::mean;
7
8/// Bayesian Online Change Point Detection core algorithm
9pub struct BayesianCPD {
10    /// Prior probability of a change point occurring (hazard rate)
11    hazard_rate: f64,
12}
13
14impl BayesianCPD {
15    /// Create a new Bayesian CPD detector
16    ///
17    /// # Arguments
18    /// * `hazard_rate` - Prior probability of change point (e.g., 0.1 = expect change every 10 runs)
19    pub fn new(hazard_rate: f64) -> Self {
20        Self { hazard_rate }
21    }
22
23    /// Update with a new observation and return change point probability
24    ///
25    /// # Arguments
26    /// * `value` - New observation (e.g., current benchmark mean)
27    /// * `historical` - Historical observations (e.g., previous benchmark means)
28    ///
29    /// # Returns
30    /// Probability that a change point occurred (0.0 to 1.0)
31    pub fn update(&mut self, value: f64, historical: &[f64]) -> f64 {
32        if historical.is_empty() {
33            return 0.0;
34        }
35
36        // Calculate predictive probability using all historical data
37        let likelihood = self.student_t_likelihood(value, historical);
38
39        // For simplified implementation: compare new value against historical distribution
40        // High likelihood = fits the pattern (low change probability)
41        // Low likelihood = doesn't fit (high change probability)
42
43        // Convert likelihood to change probability (invert the likelihood)
44        // If value is very different from historical mean, likelihood will be low
45        let unlikelihood = 1.0 - likelihood.min(1.0);
46
47        // Combine data evidence with prior (hazard rate)
48        // Bayesian update: P(CP|data) ∝ P(data|CP) * P(CP)
49        // Where P(CP) = hazard_rate
50        // And P(data|CP) = 1 - likelihood (assuming uniform prior under change)
51
52        // Weight the evidence by the hazard rate as a prior
53        let prior_weight = self.hazard_rate;
54        let evidence_weight = unlikelihood;
55
56        // Combine: higher hazard rate gives more weight to the prior expectation of change
57        let change_prob =
58            (evidence_weight * 0.7) + (prior_weight * 0.3) + (evidence_weight * prior_weight * 0.5);
59
60        change_prob.min(1.0)
61    }
62
63    /// Calculate Student's t-distribution likelihood
64    ///
65    /// This is used as the predictive distribution for new observations.
66    /// Student's t is more robust to outliers than normal distribution.
67    fn student_t_likelihood(&self, value: f64, historical: &[f64]) -> f64 {
68        if historical.is_empty() {
69            return 0.5;
70        }
71
72        let n = historical.len() as f64;
73        let hist_mean = mean(historical);
74        let variance = if n > 1.0 {
75            historical
76                .iter()
77                .map(|&x| (x - hist_mean).powi(2))
78                .sum::<f64>()
79                / n
80        } else {
81            1.0 // Default variance for single observation
82        };
83
84        if variance < 1e-10 {
85            // Very low variance, use normal approximation
86            let stddev = variance.sqrt().max(1e-5);
87            let z = ((value - hist_mean) / stddev).abs();
88            return (-0.5 * z * z).exp();
89        }
90
91        // Student's t-distribution with n-1 degrees of freedom
92        let df = (n - 1.0).max(1.0);
93        let t = (value - hist_mean) / variance.sqrt();
94        let t_squared = t * t;
95
96        // Simplified Student's t PDF (good enough for our purposes)
97        (1.0 + t_squared / df).powf(-(df + 1.0) / 2.0)
98    }
99}
100
101/// Simplified API: calculate change point probability for a new value
102///
103/// This is a convenience function that creates a new BayesianCPD instance
104/// and calculates the change point probability in a single call.
105///
106/// # Arguments
107/// * `new_value` - Current observation (e.g., current benchmark mean)
108/// * `historical` - Historical observations (e.g., previous benchmark means)
109/// * `hazard_rate` - Prior probability of change point
110///
111/// # Returns
112/// Probability that a change point occurred (0.0 to 1.0)
113pub fn bayesian_change_point_probability(
114    new_value: f64,
115    historical: &[f64],
116    hazard_rate: f64,
117) -> f64 {
118    let mut cpd = BayesianCPD::new(hazard_rate);
119    cpd.update(new_value, historical)
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125
126    #[test]
127    fn test_stable_data_low_change_probability() {
128        // Stable data should have low change probability
129        let historical = vec![1.0, 1.01, 0.99, 1.0, 1.02, 0.98, 1.0, 1.01];
130        let new_value = 1.0;
131        let prob = bayesian_change_point_probability(new_value, &historical, 0.1);
132
133        // Should be low probability since new value fits the pattern
134        assert!(prob < 0.5, "Expected low change probability, got {}", prob);
135    }
136
137    #[test]
138    fn test_clear_change_point() {
139        // Historical data around 1.0, then sudden jump to 2.0
140        let historical = vec![1.0, 1.01, 0.99, 1.0, 1.02, 0.98, 1.0, 1.01];
141        let new_value = 2.0;
142        let prob = bayesian_change_point_probability(new_value, &historical, 0.1);
143
144        // Should be high probability since new value is very different
145        assert!(prob > 0.3, "Expected high change probability, got {}", prob);
146    }
147
148    #[test]
149    fn test_empty_historical() {
150        let historical: Vec<f64> = vec![];
151        let new_value = 1.0;
152        let prob = bayesian_change_point_probability(new_value, &historical, 0.1);
153
154        // Should return 0.0 for empty history
155        assert_eq!(prob, 0.0);
156    }
157
158    #[test]
159    fn test_gradual_drift() {
160        // Gradual increase should show lower change probability than sudden jump
161        let historical = vec![1.0, 1.05, 1.1, 1.15, 1.2, 1.25, 1.3];
162        let new_value = 1.35;
163        let prob_gradual = bayesian_change_point_probability(new_value, &historical, 0.1);
164
165        let historical_stable = vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0];
166        let new_value_jump = 1.35;
167        let prob_sudden =
168            bayesian_change_point_probability(new_value_jump, &historical_stable, 0.1);
169
170        // Sudden jump should have higher change probability than gradual drift
171        assert!(
172            prob_sudden > prob_gradual,
173            "Sudden jump ({}) should have higher change probability than gradual drift ({})",
174            prob_sudden,
175            prob_gradual
176        );
177    }
178
179    #[test]
180    fn test_higher_hazard_rate_increases_change_probability() {
181        let historical = vec![1.0, 1.0, 1.0, 1.0, 1.0];
182        let new_value = 1.2;
183
184        let prob_low_hazard = bayesian_change_point_probability(new_value, &historical, 0.05);
185        let prob_high_hazard = bayesian_change_point_probability(new_value, &historical, 0.3);
186
187        // Higher hazard rate should increase change point probability
188        assert!(prob_high_hazard > prob_low_hazard,
189            "Higher hazard rate ({}) should produce higher change probability than lower hazard rate ({})",
190            prob_high_hazard, prob_low_hazard);
191    }
192}