Skip to main content

tacet_core/statistics/
online_stats.rs

1//! Online (streaming) statistics computation using Welford's algorithm.
2//!
3//! This module provides incremental computation of mean, variance, and lag-1
4//! autocorrelation with O(1) memory and O(1) per-sample overhead.
5//!
6//! Used for condition drift detection (spec §3.5.4, Gate 4) to compare
7//! measurement statistics from calibration vs the full test run.
8
9use crate::math::sqrt;
10
11/// Online statistics accumulator using Welford's algorithm.
12///
13/// Tracks mean, variance, and lag-1 autocorrelation incrementally with
14/// O(1) overhead per sample. This is used to detect condition drift
15/// between calibration and the adaptive loop.
16///
17/// # Example
18///
19/// ```
20/// use tacet_core::statistics::OnlineStats;
21///
22/// let mut stats = OnlineStats::new();
23/// for x in [1.0, 2.0, 3.0, 4.0, 5.0] {
24///     stats.update(x);
25/// }
26/// let snapshot = stats.finalize();
27/// assert!((snapshot.mean - 3.0).abs() < 1e-10);
28/// ```
29#[derive(Debug, Clone)]
30pub struct OnlineStats {
31    /// Number of samples seen.
32    count: usize,
33    /// Running mean.
34    mean: f64,
35    /// Welford's M2: sum of squared deviations from current mean.
36    /// Variance = m2 / (count - 1) for sample variance.
37    m2: f64,
38    /// Previous value (for lag-1 autocorrelation).
39    prev_value: f64,
40    /// Previous mean (for lag-1 autocorrelation with current mean estimate).
41    prev_mean: f64,
42    /// Sum of (x_i - mean)(x_{i-1} - mean) products for autocorrelation.
43    autocorr_sum: f64,
44    /// Number of pairs for autocorrelation (count - 1).
45    autocorr_count: usize,
46}
47
48impl Default for OnlineStats {
49    fn default() -> Self {
50        Self::new()
51    }
52}
53
54impl OnlineStats {
55    /// Create a new empty statistics accumulator.
56    pub fn new() -> Self {
57        Self {
58            count: 0,
59            mean: 0.0,
60            m2: 0.0,
61            prev_value: 0.0,
62            prev_mean: 0.0,
63            autocorr_sum: 0.0,
64            autocorr_count: 0,
65        }
66    }
67
68    /// Update statistics with a new sample.
69    ///
70    /// Uses Welford's online algorithm for numerically stable variance computation.
71    /// Also tracks lag-1 autocorrelation incrementally.
72    ///
73    /// # Arguments
74    ///
75    /// * `x` - The new sample value
76    pub fn update(&mut self, x: f64) {
77        // Welford's online algorithm for mean and variance
78        self.count += 1;
79        let delta = x - self.mean;
80        self.mean += delta / self.count as f64;
81        let delta2 = x - self.mean;
82        self.m2 += delta * delta2;
83
84        // Lag-1 autocorrelation: accumulate (x_i - μ)(x_{i-1} - μ)
85        // We use the current mean estimate for both terms, which introduces
86        // a small bias but is acceptable for drift detection purposes.
87        if self.count > 1 {
88            // Use deviation from current mean for both current and previous value
89            let dev_curr = x - self.mean;
90            let dev_prev = self.prev_value - self.mean;
91            self.autocorr_sum += dev_curr * dev_prev;
92            self.autocorr_count += 1;
93        }
94
95        self.prev_value = x;
96        self.prev_mean = self.mean;
97    }
98
99    /// Finalize and return the computed statistics.
100    ///
101    /// # Returns
102    ///
103    /// A `StatsSnapshot` containing mean, variance, and lag-1 autocorrelation.
104    /// Returns default values (0.0) if insufficient samples.
105    pub fn finalize(&self) -> StatsSnapshot {
106        if self.count < 2 {
107            return StatsSnapshot {
108                mean: self.mean,
109                variance: 0.0,
110                autocorr_lag1: 0.0,
111                count: self.count,
112            };
113        }
114
115        let variance = self.m2 / (self.count - 1) as f64;
116
117        // Autocorrelation coefficient: cov(x_i, x_{i-1}) / var(x)
118        let autocorr_lag1 = if self.autocorr_count > 0 && variance > 1e-15 {
119            let autocovariance = self.autocorr_sum / self.autocorr_count as f64;
120            (autocovariance / variance).clamp(-1.0, 1.0)
121        } else {
122            0.0
123        };
124
125        StatsSnapshot {
126            mean: self.mean,
127            variance,
128            autocorr_lag1,
129            count: self.count,
130        }
131    }
132
133    /// Get the current sample count.
134    pub fn count(&self) -> usize {
135        self.count
136    }
137
138    /// Get the current mean estimate.
139    pub fn mean(&self) -> f64 {
140        self.mean
141    }
142
143    /// Get the current variance estimate (returns 0 if count < 2).
144    pub fn variance(&self) -> f64 {
145        if self.count < 2 {
146            0.0
147        } else {
148            self.m2 / (self.count - 1) as f64
149        }
150    }
151}
152
153/// Snapshot of computed statistics at a point in time.
154///
155/// This is returned by `OnlineStats::finalize()` and used for
156/// comparing calibration vs post-test statistics.
157#[derive(Debug, Clone, Copy, PartialEq)]
158pub struct StatsSnapshot {
159    /// Sample mean.
160    pub mean: f64,
161    /// Sample variance (using n-1 denominator).
162    pub variance: f64,
163    /// Lag-1 autocorrelation coefficient, in range [-1, 1].
164    pub autocorr_lag1: f64,
165    /// Number of samples.
166    pub count: usize,
167}
168
169impl StatsSnapshot {
170    /// Get the standard deviation.
171    pub fn std_dev(&self) -> f64 {
172        sqrt(self.variance)
173    }
174}
175
176#[cfg(test)]
177mod tests {
178    use super::*;
179    use crate::math::sq;
180
181    #[test]
182    fn test_online_stats_basic() {
183        let mut stats = OnlineStats::new();
184        let data = [1.0, 2.0, 3.0, 4.0, 5.0];
185
186        for &x in &data {
187            stats.update(x);
188        }
189
190        let snapshot = stats.finalize();
191
192        // Mean should be 3.0
193        assert!(
194            (snapshot.mean - 3.0).abs() < 1e-10,
195            "Expected mean=3.0, got {}",
196            snapshot.mean
197        );
198
199        // Variance should be 2.5 (sample variance of [1,2,3,4,5])
200        assert!(
201            (snapshot.variance - 2.5).abs() < 1e-10,
202            "Expected variance=2.5, got {}",
203            snapshot.variance
204        );
205
206        assert_eq!(snapshot.count, 5);
207    }
208
209    #[test]
210    fn test_online_stats_single_value() {
211        let mut stats = OnlineStats::new();
212        stats.update(42.0);
213
214        let snapshot = stats.finalize();
215
216        assert!((snapshot.mean - 42.0).abs() < 1e-10);
217        assert!((snapshot.variance - 0.0).abs() < 1e-10);
218        assert_eq!(snapshot.count, 1);
219    }
220
221    #[test]
222    fn test_online_stats_empty() {
223        let stats = OnlineStats::new();
224        let snapshot = stats.finalize();
225
226        assert!((snapshot.mean - 0.0).abs() < 1e-10);
227        assert!((snapshot.variance - 0.0).abs() < 1e-10);
228        assert_eq!(snapshot.count, 0);
229    }
230
231    #[test]
232    fn test_online_stats_constant_values() {
233        let mut stats = OnlineStats::new();
234        for _ in 0..100 {
235            stats.update(5.0);
236        }
237
238        let snapshot = stats.finalize();
239
240        assert!((snapshot.mean - 5.0).abs() < 1e-10);
241        assert!(
242            snapshot.variance < 1e-10,
243            "Constant values should have ~0 variance"
244        );
245        assert_eq!(snapshot.count, 100);
246    }
247
248    #[test]
249    fn test_online_stats_matches_batch() {
250        // Compare online computation with batch computation
251        let data: Vec<f64> = (0..1000).map(|i| (i as f64).sin() * 100.0).collect();
252
253        // Online computation
254        let mut stats = OnlineStats::new();
255        for &x in &data {
256            stats.update(x);
257        }
258        let online = stats.finalize();
259
260        // Batch computation
261        let n = data.len() as f64;
262        let batch_mean: f64 = data.iter().sum::<f64>() / n;
263        let batch_variance: f64 = data.iter().map(|x| sq(x - batch_mean)).sum::<f64>() / (n - 1.0);
264
265        assert!(
266            (online.mean - batch_mean).abs() < 1e-10,
267            "Mean mismatch: online={}, batch={}",
268            online.mean,
269            batch_mean
270        );
271        assert!(
272            (online.variance - batch_variance).abs() < 1e-6,
273            "Variance mismatch: online={}, batch={}",
274            online.variance,
275            batch_variance
276        );
277    }
278
279    #[test]
280    fn test_online_stats_autocorr_positive() {
281        // Highly autocorrelated data: each value close to previous
282        let mut stats = OnlineStats::new();
283        let mut x = 0.0;
284        for _ in 0..1000 {
285            x += 0.1; // Monotonically increasing
286            stats.update(x);
287        }
288
289        let snapshot = stats.finalize();
290
291        // Strong positive autocorrelation expected
292        assert!(
293            snapshot.autocorr_lag1 > 0.9,
294            "Expected high positive autocorrelation, got {}",
295            snapshot.autocorr_lag1
296        );
297    }
298
299    #[test]
300    fn test_online_stats_autocorr_negative() {
301        // Alternating values: strong negative autocorrelation
302        let mut stats = OnlineStats::new();
303        for i in 0..1000 {
304            let x = if i % 2 == 0 { 100.0 } else { -100.0 };
305            stats.update(x);
306        }
307
308        let snapshot = stats.finalize();
309
310        // Strong negative autocorrelation expected
311        assert!(
312            snapshot.autocorr_lag1 < -0.9,
313            "Expected high negative autocorrelation, got {}",
314            snapshot.autocorr_lag1
315        );
316    }
317
318    #[test]
319    fn test_online_stats_autocorr_near_zero() {
320        // Random-ish data: low autocorrelation
321        let mut stats = OnlineStats::new();
322        let mut state: u64 = 12345;
323        for _ in 0..1000 {
324            // Pseudo-random using simple LCG with wrapping arithmetic
325            state = state.wrapping_mul(1103515245).wrapping_add(12345);
326            let x = (state % 1000) as f64;
327            stats.update(x);
328        }
329
330        let snapshot = stats.finalize();
331
332        // Near-zero autocorrelation expected for pseudo-random data
333        assert!(
334            snapshot.autocorr_lag1.abs() < 0.1,
335            "Expected near-zero autocorrelation, got {}",
336            snapshot.autocorr_lag1
337        );
338    }
339
340    #[test]
341    fn test_stats_snapshot_std_dev() {
342        let snapshot = StatsSnapshot {
343            mean: 5.0,
344            variance: 4.0,
345            autocorr_lag1: 0.0,
346            count: 100,
347        };
348
349        assert!((snapshot.std_dev() - 2.0).abs() < 1e-10);
350    }
351}