stats/
online.rs

1use std::fmt;
2
3use num_traits::ToPrimitive;
4use serde::{Deserialize, Serialize};
5
6use crate::Commute;
7
8/// Compute the standard deviation of a stream in constant space.
9#[inline]
10pub fn stddev<'a, I, T>(x: I) -> f64
11where
12    I: IntoIterator<Item = T>,
13    T: Into<&'a f64>,
14{
15    let it = x.into_iter();
16    stddev(it)
17}
18
19/// Compute the variance of a stream in constant space.
20#[inline]
21pub fn variance<'a, I, T>(x: I) -> f64
22where
23    I: IntoIterator<Item = T>,
24    T: Into<&'a f64>,
25{
26    let it = x.into_iter();
27    variance(it)
28}
29
30/// Compute the mean of a stream in constant space.
31#[inline]
32pub fn mean<'a, I, T>(x: I) -> f64
33where
34    I: IntoIterator<Item = T>,
35    T: Into<&'a f64>,
36{
37    let it = x.into_iter();
38    mean(it)
39}
40
41/// Online state for computing mean, variance and standard deviation.
42///
43/// Optimized memory layout for better cache performance:
44/// - 64-byte alignment for cache line efficiency
45/// - Grouped related fields together
46/// - Uses bit flags to reduce padding overhead
47#[derive(Clone, Copy, Serialize, Deserialize, PartialEq)]
48#[repr(C, align(64))]
49pub struct OnlineStats {
50    // Core statistics (40 bytes)
51    size: u64,          // 8 bytes
52    mean: f64,          // 8 bytes
53    q: f64,             // 8 bytes
54    harmonic_sum: f64,  // 8 bytes
55    geometric_sum: f64, // 8 bytes
56
57    // flags (2 bytes)
58    has_zero: bool,     // 1 byte
59    has_negative: bool, // 1 byte
60    hg_sums: bool,      // 1 byte
61
62    // Padding to reach 64 bytes for cache alignment
63    _padding: [u8; 21], // 21 bytes padding
64}
65
66impl OnlineStats {
67    /// Create initial state.
68    ///
69    /// Population size, variance and mean are set to `0`.
70    #[must_use]
71    pub fn new() -> OnlineStats {
72        Default::default()
73    }
74
75    /// Initializes `OnlineStats` from a sample.
76    #[must_use]
77    pub fn from_slice<T: ToPrimitive>(samples: &[T]) -> OnlineStats {
78        // safety: OnlineStats is only for numbers
79        samples
80            .iter()
81            .map(|n| unsafe { n.to_f64().unwrap_unchecked() })
82            .collect()
83    }
84
85    /// Return the current mean.
86    #[must_use]
87    pub const fn mean(&self) -> f64 {
88        if self.is_empty() { f64::NAN } else { self.mean }
89    }
90
91    /// Return the current standard deviation.
92    #[must_use]
93    pub fn stddev(&self) -> f64 {
94        self.variance().sqrt()
95    }
96
97    /// Return the current variance.
98    // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
99    #[must_use]
100    pub fn variance(&self) -> f64 {
101        self.q / (self.size as f64)
102    }
103
104    /// Return the current harmonic mean.
105    #[must_use]
106    pub fn harmonic_mean(&self) -> f64 {
107        if self.is_empty() || self.has_zero || self.has_negative {
108            f64::NAN
109        } else {
110            (self.size as f64) / self.harmonic_sum
111        }
112    }
113
114    /// Return the current geometric mean.
115    #[must_use]
116    pub fn geometric_mean(&self) -> f64 {
117        if self.is_empty()
118            || self.has_negative
119            || self.geometric_sum.is_infinite()
120            || self.geometric_sum.is_nan()
121        {
122            f64::NAN
123        } else if self.has_zero {
124            0.0
125        } else {
126            (self.geometric_sum / (self.size as f64)).exp()
127        }
128    }
129
130    // TODO: Calculate kurtosis
131    // also see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
132
133    /// Add a new sample.
134    #[inline]
135    pub fn add<T: ToPrimitive>(&mut self, sample: &T) {
136        // safety: we only add samples for numbers, so safe to unwrap
137        let sample = unsafe { sample.to_f64().unwrap_unchecked() };
138
139        // Taken from: https://en.wikipedia.org/wiki/Standard_deviation#Rapid_calculation_methods
140        // See also: https://api.semanticscholar.org/CorpusID:120126049
141        self.size += 1;
142        let delta = sample - self.mean;
143
144        // FMA: equivalent to: self.mean += delta * (1.0 / (self.size as f64));
145        self.mean = delta.mul_add(1.0 / (self.size as f64), self.mean);
146
147        // FMA: equivalent to: self.q += delta * (sample - self.mean);
148        self.q = delta.mul_add(sample - self.mean, self.q);
149
150        // Optimized path for positive numbers (most common case)
151        if sample > 0.0 && self.hg_sums {
152            // Fast path: compute harmonic & geometric sums directly
153            // use FMA. equivalent to: self.harmonic_sum += 1.0 / sample
154            self.harmonic_sum = (1.0 / sample).mul_add(1.0, self.harmonic_sum);
155            // use FMA. equivalent to: self.geometric_sum += ln(sample)
156            self.geometric_sum = sample.ln().mul_add(1.0, self.geometric_sum);
157            return;
158        }
159
160        // Handle special cases (zero and negative numbers)
161        if sample <= 0.0 {
162            if sample.is_sign_negative() {
163                self.has_negative = true;
164            } else {
165                self.has_zero = true;
166            }
167            self.hg_sums = !self.has_negative && !self.has_zero;
168        }
169    }
170
171    /// Add a new f64 sample.
172    /// Skipping the ToPrimitive conversion.
173    #[inline]
174    pub fn add_f64(&mut self, sample: f64) {
175        self.size += 1;
176        let delta = sample - self.mean;
177
178        self.mean = delta.mul_add(1.0 / (self.size as f64), self.mean);
179        self.q = delta.mul_add(sample - self.mean, self.q);
180
181        if sample > 0.0 && self.hg_sums {
182            self.harmonic_sum = (1.0 / sample).mul_add(1.0, self.harmonic_sum);
183            self.geometric_sum = sample.ln().mul_add(1.0, self.geometric_sum);
184            return;
185        }
186
187        if sample <= 0.0 {
188            if sample.is_sign_negative() {
189                self.has_negative = true;
190            } else {
191                self.has_zero = true;
192            }
193            self.hg_sums = !self.has_negative && !self.has_zero;
194        }
195    }
196
197    /// Add a new NULL value to the population.
198    /// This increases the population size by `1`.
199    #[inline]
200    pub fn add_null(&mut self) {
201        self.add_f64(0.0);
202    }
203
204    /// Returns the number of data points.
205    #[inline]
206    #[must_use]
207    pub const fn len(&self) -> usize {
208        self.size as usize
209    }
210
211    /// Returns if empty.
212    #[inline]
213    #[must_use]
214    pub const fn is_empty(&self) -> bool {
215        self.size == 0
216    }
217}
218
219impl Commute for OnlineStats {
220    #[inline]
221    fn merge(&mut self, v: OnlineStats) {
222        if v.is_empty() {
223            return;
224        }
225
226        // Taken from: https://en.wikipedia.org/wiki/Standard_deviation#Combining_standard_deviations
227        let (s1, s2) = (self.size as f64, v.size as f64);
228        let total = s1 + s2;
229        let meandiffsq = (self.mean - v.mean) * (self.mean - v.mean);
230
231        self.size += v.size;
232
233        //self.mean = ((s1 * self.mean) + (s2 * v.mean)) / (s1 + s2);
234        // below is the fused multiply add version of the statement above
235        // its more performant as we're taking advantage of a CPU instruction
236        self.mean = s1.mul_add(self.mean, s2 * v.mean) / total;
237
238        // self.q += v.q + meandiffsq * s1 * s2 / (s1 + s2);
239        // below is the fused multiply add version of the statement above
240        self.q += v.q + f64::mul_add(meandiffsq, s1 * s2 / total, 0.0);
241
242        self.harmonic_sum += v.harmonic_sum;
243        self.geometric_sum += v.geometric_sum;
244
245        self.has_zero |= v.has_zero;
246        self.has_negative |= v.has_negative;
247    }
248}
249
250impl Default for OnlineStats {
251    fn default() -> OnlineStats {
252        OnlineStats {
253            size: 0,
254            mean: 0.0,
255            q: 0.0,
256            harmonic_sum: 0.0,
257            geometric_sum: 0.0,
258            has_zero: false,
259            has_negative: false,
260            hg_sums: true,
261            _padding: [0; 21],
262        }
263    }
264}
265
266impl fmt::Debug for OnlineStats {
267    #[inline]
268    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
269        write!(f, "{:.10} +/- {:.10}", self.mean(), self.stddev())
270    }
271}
272
273impl<T: ToPrimitive> FromIterator<T> for OnlineStats {
274    #[inline]
275    fn from_iter<I: IntoIterator<Item = T>>(it: I) -> OnlineStats {
276        let mut v = OnlineStats::new();
277        v.extend(it);
278        v
279    }
280}
281
282impl<T: ToPrimitive> Extend<T> for OnlineStats {
283    #[inline]
284    fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
285        for sample in it {
286            self.add(&sample);
287        }
288    }
289}
290
291#[cfg(test)]
292mod test {
293    use super::OnlineStats;
294    use {crate::Commute, crate::merge_all};
295
296    #[test]
297    fn online() {
298        // TODO: Convert this to a quickcheck test.
299        let expected = OnlineStats::from_slice(&[1usize, 2, 3, 2, 4, 6]);
300
301        let var1 = OnlineStats::from_slice(&[1usize, 2, 3]);
302        let var2 = OnlineStats::from_slice(&[2usize, 4, 6]);
303        let mut got = var1;
304        got.merge(var2);
305        assert_eq!(expected.stddev(), got.stddev());
306        assert_eq!(expected.mean(), got.mean());
307        assert_eq!(expected.variance(), got.variance());
308    }
309
310    #[test]
311    fn online_empty() {
312        let expected = OnlineStats::new();
313        assert!(expected.is_empty());
314    }
315
316    #[test]
317    fn online_many() {
318        // TODO: Convert this to a quickcheck test.
319        let expected = OnlineStats::from_slice(&[1usize, 2, 3, 2, 4, 6, 3, 6, 9]);
320
321        let vars = vec![
322            OnlineStats::from_slice(&[1usize, 2, 3]),
323            OnlineStats::from_slice(&[2usize, 4, 6]),
324            OnlineStats::from_slice(&[3usize, 6, 9]),
325        ];
326        assert_eq!(
327            expected.stddev(),
328            merge_all(vars.clone().into_iter()).unwrap().stddev()
329        );
330        assert_eq!(
331            expected.mean(),
332            merge_all(vars.clone().into_iter()).unwrap().mean()
333        );
334        assert_eq!(
335            expected.variance(),
336            merge_all(vars.into_iter()).unwrap().variance()
337        );
338    }
339
340    #[test]
341    fn test_means() {
342        let mut stats = OnlineStats::new();
343        stats.extend(vec![2.0f64, 4.0, 8.0]);
344
345        // Arithmetic mean = (2 + 4 + 8) / 3 = 4.666...
346        assert!((stats.mean() - 4.666666666667).abs() < 1e-10);
347
348        // Harmonic mean = 3 / (1/2 + 1/4 + 1/8) = 3.428571429
349        assert_eq!("3.42857143", format!("{:.8}", stats.harmonic_mean()));
350
351        // Geometric mean = (2 * 4 * 8)^(1/3) = 4.0
352        assert!((stats.geometric_mean() - 4.0).abs() < 1e-10);
353    }
354
355    #[test]
356    fn test_means_with_negative() {
357        let mut stats = OnlineStats::new();
358        stats.extend(vec![-2.0f64, 2.0]);
359
360        // Arithmetic mean = (-2 + 2) / 2 = 0
361        assert!(stats.mean().abs() < 1e-10);
362
363        // Geometric mean is NaN for negative numbers
364        assert!(stats.geometric_mean().is_nan());
365
366        // Harmonic mean is undefined when values have different signs
367        assert!(stats.harmonic_mean().is_nan());
368    }
369
370    #[test]
371    fn test_means_with_zero() {
372        let mut stats = OnlineStats::new();
373        stats.extend(vec![0.0f64, 4.0, 8.0]);
374
375        // Arithmetic mean = (0 + 4 + 8) / 3 = 4
376        assert!((stats.mean() - 4.0).abs() < 1e-10);
377
378        // Geometric mean = (0 * 4 * 8)^(1/3) = 0
379        assert!(stats.geometric_mean().abs() < 1e-10);
380
381        // Harmonic mean is undefined when any value is 0
382        assert!(stats.harmonic_mean().is_nan());
383    }
384
385    #[test]
386    fn test_means_with_zero_and_negative_values() {
387        let mut stats = OnlineStats::new();
388        stats.extend(vec![-10i32, -5, 0, 5, 10]);
389
390        // Arithmetic mean = (-10 + -5 + 0 + 5 + 10) / 5 = 0
391        assert!(stats.mean().abs() < 1e-10);
392
393        // Geometric mean is NaN due to negative values
394        assert!(stats.geometric_mean().is_nan());
395
396        // Harmonic mean is NaN due to zero value
397        assert!(stats.harmonic_mean().is_nan());
398    }
399
400    #[test]
401    fn test_means_single_value() {
402        let mut stats = OnlineStats::new();
403        stats.extend(vec![5.0f64]);
404
405        // All means should equal the single value
406        assert!((stats.mean() - 5.0).abs() < 1e-10);
407        assert!((stats.geometric_mean() - 5.0).abs() < 1e-10);
408        assert!((stats.harmonic_mean() - 5.0).abs() < 1e-10);
409    }
410
411    #[test]
412    fn test_means_empty() {
413        let stats = OnlineStats::new();
414
415        // All means should be NaN for empty stats
416        assert!(stats.mean().is_nan());
417        assert!(stats.geometric_mean().is_nan());
418        assert!(stats.harmonic_mean().is_nan());
419    }
420}