Skip to main content

tacet_core/statistics/
online_stats.rs

1//! Online (streaming) statistics computation using Welford's algorithm.
2//!
3//! This module provides incremental computation of mean, variance, and lag-1
4//! autocorrelation with O(1) memory and O(1) per-sample overhead.
5//!
6//! Used for condition drift detection (spec Section 2.6, Gate 6) to compare
7//! measurement statistics from calibration vs the full test run.
8
9use crate::math::sqrt;
10
11/// Online statistics accumulator using Welford's algorithm.
12///
13/// Tracks mean, variance, and lag-1 autocorrelation incrementally with
14/// O(1) overhead per sample. This is used to detect condition drift
15/// between calibration and the adaptive loop.
16///
17/// # Example
18///
19/// ```
20/// use tacet_core::statistics::OnlineStats;
21///
22/// let mut stats = OnlineStats::new();
23/// for x in [1.0, 2.0, 3.0, 4.0, 5.0] {
24///     stats.update(x);
25/// }
26/// let snapshot = stats.finalize();
27/// assert!((snapshot.mean - 3.0).abs() < 1e-10);
28/// ```
29#[derive(Debug, Clone)]
30pub struct OnlineStats {
31    /// Number of samples seen.
32    count: usize,
33    /// Running mean.
34    mean: f64,
35    /// Welford's M2: sum of squared deviations from current mean.
36    /// Variance = m2 / (count - 1) for sample variance.
37    m2: f64,
38    /// Previous value (for lag-1 autocorrelation).
39    prev_value: f64,
40    /// Previous mean (for lag-1 autocorrelation with current mean estimate).
41    prev_mean: f64,
42    /// Sum of (x_i - mean)(x_{i-1} - mean) products for autocorrelation.
43    autocorr_sum: f64,
44    /// Number of pairs for autocorrelation (count - 1).
45    autocorr_count: usize,
46}
47
48impl Default for OnlineStats {
49    fn default() -> Self {
50        Self::new()
51    }
52}
53
54impl OnlineStats {
55    /// Create a new empty statistics accumulator.
56    pub fn new() -> Self {
57        Self {
58            count: 0,
59            mean: 0.0,
60            m2: 0.0,
61            prev_value: 0.0,
62            prev_mean: 0.0,
63            autocorr_sum: 0.0,
64            autocorr_count: 0,
65        }
66    }
67
68    /// Update statistics with a new sample.
69    ///
70    /// Uses Welford's online algorithm for numerically stable variance computation.
71    /// Also tracks lag-1 autocorrelation incrementally.
72    ///
73    /// # Arguments
74    ///
75    /// * `x` - The new sample value
76    pub fn update(&mut self, x: f64) {
77        // Welford's online algorithm for mean and variance
78        self.count += 1;
79        let delta = x - self.mean;
80        self.mean += delta / self.count as f64;
81        let delta2 = x - self.mean;
82        self.m2 += delta * delta2;
83
84        // Lag-1 autocorrelation: accumulate (x_i - μ)(x_{i-1} - μ)
85        // We use the current mean estimate for both terms, which introduces
86        // a small bias but is acceptable for drift detection purposes.
87        if self.count > 1 {
88            // Use deviation from current mean for both current and previous value
89            let dev_curr = x - self.mean;
90            let dev_prev = self.prev_value - self.mean;
91            self.autocorr_sum += dev_curr * dev_prev;
92            self.autocorr_count += 1;
93        }
94
95        self.prev_value = x;
96        self.prev_mean = self.mean;
97    }
98
99    /// Finalize and return the computed statistics.
100    ///
101    /// # Returns
102    ///
103    /// A `StatsSnapshot` containing mean, variance, and lag-1 autocorrelation.
104    /// Returns default values (0.0) if insufficient samples.
105    pub fn finalize(&self) -> StatsSnapshot {
106        if self.count < 2 {
107            return StatsSnapshot {
108                mean: self.mean,
109                variance: 0.0,
110                autocorr_lag1: 0.0,
111                count: self.count,
112            };
113        }
114
115        let variance = self.m2 / (self.count - 1) as f64;
116
117        // Autocorrelation coefficient: cov(x_i, x_{i-1}) / var(x)
118        let autocorr_lag1 = if self.autocorr_count > 0 && variance > 1e-15 {
119            let autocovariance = self.autocorr_sum / self.autocorr_count as f64;
120            (autocovariance / variance).clamp(-1.0, 1.0)
121        } else {
122            0.0
123        };
124
125        StatsSnapshot {
126            mean: self.mean,
127            variance,
128            autocorr_lag1,
129            count: self.count,
130        }
131    }
132
133    /// Get the current sample count.
134    pub fn count(&self) -> usize {
135        self.count
136    }
137
138    /// Get the current mean estimate.
139    pub fn mean(&self) -> f64 {
140        self.mean
141    }
142
143    /// Get the current variance estimate (returns 0 if count < 2).
144    pub fn variance(&self) -> f64 {
145        if self.count < 2 {
146            0.0
147        } else {
148            self.m2 / (self.count - 1) as f64
149        }
150    }
151}
152
153/// Snapshot of computed statistics at a point in time.
154///
155/// This is returned by `OnlineStats::finalize()` and used for
156/// comparing calibration vs post-test statistics.
157#[derive(Debug, Clone, Copy, PartialEq)]
158pub struct StatsSnapshot {
159    /// Sample mean.
160    pub mean: f64,
161    /// Sample variance (using n-1 denominator).
162    pub variance: f64,
163    /// Lag-1 autocorrelation coefficient, in range [-1, 1].
164    pub autocorr_lag1: f64,
165    /// Number of samples.
166    pub count: usize,
167}
168
169impl StatsSnapshot {
170    /// Get the standard deviation.
171    pub fn std_dev(&self) -> f64 {
172        sqrt(self.variance)
173    }
174}
175
176#[cfg(test)]
177mod tests {
178    use super::*;
179    use crate::math::sq;
180
181    #[test]
182    fn test_online_stats_basic() {
183        let mut stats = OnlineStats::new();
184        let data = [1.0, 2.0, 3.0, 4.0, 5.0];
185
186        for &x in &data {
187            stats.update(x);
188        }
189
190        let snapshot = stats.finalize();
191
192        // Mean should be 3.0
193        assert!(
194            (snapshot.mean - 3.0).abs() < 1e-10,
195            "Expected mean=3.0, got {}",
196            snapshot.mean
197        );
198
199        // Variance should be 2.5 (sample variance of [1,2,3,4,5])
200        assert!(
201            (snapshot.variance - 2.5).abs() < 1e-10,
202            "Expected variance=2.5, got {}",
203            snapshot.variance
204        );
205
206        assert_eq!(snapshot.count, 5);
207    }
208
209    #[test]
210    fn test_online_stats_single_value() {
211        let mut stats = OnlineStats::new();
212        stats.update(42.0);
213
214        let snapshot = stats.finalize();
215
216        assert!((snapshot.mean - 42.0).abs() < 1e-10);
217        assert!((snapshot.variance - 0.0).abs() < 1e-10);
218        assert_eq!(snapshot.count, 1);
219    }
220
221    #[test]
222    fn test_online_stats_empty() {
223        let stats = OnlineStats::new();
224        let snapshot = stats.finalize();
225
226        assert!((snapshot.mean - 0.0).abs() < 1e-10);
227        assert!((snapshot.variance - 0.0).abs() < 1e-10);
228        assert_eq!(snapshot.count, 0);
229    }
230
231    #[test]
232    fn test_online_stats_constant_values() {
233        let mut stats = OnlineStats::new();
234        for _ in 0..100 {
235            stats.update(5.0);
236        }
237
238        let snapshot = stats.finalize();
239
240        assert!((snapshot.mean - 5.0).abs() < 1e-10);
241        assert!(
242            snapshot.variance < 1e-10,
243            "Constant values should have ~0 variance"
244        );
245        assert_eq!(snapshot.count, 100);
246    }
247
248    #[test]
249    fn test_online_stats_matches_batch() {
250        // Compare online computation with batch computation
251        let data: Vec<f64> = (0..1000).map(|i| (i as f64).sin() * 100.0).collect();
252
253        // Online computation
254        let mut stats = OnlineStats::new();
255        for &x in &data {
256            stats.update(x);
257        }
258        let online = stats.finalize();
259
260        // Batch computation
261        let n = data.len() as f64;
262        let batch_mean: f64 = data.iter().sum::<f64>() / n;
263        let batch_variance: f64 =
264            data.iter().map(|x| sq(x - batch_mean)).sum::<f64>() / (n - 1.0);
265
266        assert!(
267            (online.mean - batch_mean).abs() < 1e-10,
268            "Mean mismatch: online={}, batch={}",
269            online.mean,
270            batch_mean
271        );
272        assert!(
273            (online.variance - batch_variance).abs() < 1e-6,
274            "Variance mismatch: online={}, batch={}",
275            online.variance,
276            batch_variance
277        );
278    }
279
280    #[test]
281    fn test_online_stats_autocorr_positive() {
282        // Highly autocorrelated data: each value close to previous
283        let mut stats = OnlineStats::new();
284        let mut x = 0.0;
285        for _ in 0..1000 {
286            x += 0.1; // Monotonically increasing
287            stats.update(x);
288        }
289
290        let snapshot = stats.finalize();
291
292        // Strong positive autocorrelation expected
293        assert!(
294            snapshot.autocorr_lag1 > 0.9,
295            "Expected high positive autocorrelation, got {}",
296            snapshot.autocorr_lag1
297        );
298    }
299
300    #[test]
301    fn test_online_stats_autocorr_negative() {
302        // Alternating values: strong negative autocorrelation
303        let mut stats = OnlineStats::new();
304        for i in 0..1000 {
305            let x = if i % 2 == 0 { 100.0 } else { -100.0 };
306            stats.update(x);
307        }
308
309        let snapshot = stats.finalize();
310
311        // Strong negative autocorrelation expected
312        assert!(
313            snapshot.autocorr_lag1 < -0.9,
314            "Expected high negative autocorrelation, got {}",
315            snapshot.autocorr_lag1
316        );
317    }
318
319    #[test]
320    fn test_online_stats_autocorr_near_zero() {
321        // Random-ish data: low autocorrelation
322        let mut stats = OnlineStats::new();
323        let mut state: u64 = 12345;
324        for _ in 0..1000 {
325            // Pseudo-random using simple LCG with wrapping arithmetic
326            state = state.wrapping_mul(1103515245).wrapping_add(12345);
327            let x = (state % 1000) as f64;
328            stats.update(x);
329        }
330
331        let snapshot = stats.finalize();
332
333        // Near-zero autocorrelation expected for pseudo-random data
334        assert!(
335            snapshot.autocorr_lag1.abs() < 0.1,
336            "Expected near-zero autocorrelation, got {}",
337            snapshot.autocorr_lag1
338        );
339    }
340
341    #[test]
342    fn test_stats_snapshot_std_dev() {
343        let snapshot = StatsSnapshot {
344            mean: 5.0,
345            variance: 4.0,
346            autocorr_lag1: 0.0,
347            count: 100,
348        };
349
350        assert!((snapshot.std_dev() - 2.0).abs() < 1e-10);
351    }
352}