ta_statistics/
statistics.rs

1use num_traits::Float;
2
3use core::iter::Sum;
4
5use crate::{
6    PairedStatistics, SingleStatistics, Window,
7    helper::{median_from_sorted_slice, quantile_from_sorted_slice},
8};
9
10/// A structure that computes various statistics over a fixed-size window of values.
11///
12/// `Statistics<T>` maintains a circular buffer of values and computes statistical measures
13/// such as mean, variance, standard deviation, median, etc. It can handle both single value
14/// statistics and paired statistics (when `T` is a tuple).
15///
16/// The structure automatically updates statistics as new values are added and old values
17/// are removed from the window, making it efficient for streaming data analysis.
18///
19/// # Type Parameters
20///
21/// * `T` - The type of values to compute statistics over. Can be a single numeric type
22///   or a tuple for paired statistics.
23#[derive(Debug, Clone)]
24pub struct Statistics<T> {
25    /// Fixed circular buffer
26    buf: Window<T>,
27    /// Fixed sorted buffer
28    sorted_buf: Window<T>,
29    /// Statistics period
30    period: usize,
31    /// Delta Degrees of Freedom
32    ddof: bool,
33    /// Latest updated value to statistics
34    current_value: Option<T>,
35    /// Previous value popped out the window, only available after full window
36    popped: Option<T>,
37    /// Sum of inputs
38    sum: Option<T>,
39    /// Sum of squares (used for variance calculation)
40    sum_sq: Option<T>,
41    /// Sum of product of x and y, used in paired statistics
42    sum_prod: Option<T>,
43    /// Current minimum value
44    min: Option<T>,
45    /// Current maximum value
46    max: Option<T>,
47    /// Maximum drawdown
48    max_drawdown: Option<T>,
49}
50
51impl<T> Statistics<T> {
52    /// Creates a new statistics object with the specified period
53    ///
54    /// # Arguments
55    ///
56    /// * `period` - The period of the statistics
57    ///
58    /// # Returns
59    ///
60    /// * `Statistics<T>` - The new statistics object
61    pub fn new(period: usize) -> Self
62    where
63        T: Copy + Default,
64    {
65        Self {
66            buf: Window::new(period),
67            sorted_buf: Window::new(period),
68            period,
69            ddof: false,
70            current_value: None,
71            popped: None,
72            sum: None,
73            sum_sq: None,
74            sum_prod: None,
75            min: None,
76            max: None,
77            max_drawdown: None,
78        }
79    }
80
81    /// Resets the statistics
82    ///
83    /// # Returns
84    ///
85    /// * `&mut Self` - The statistics object
86    pub fn reset(&mut self) -> &mut Self
87    where
88        T: Default + Copy,
89    {
90        self.buf.reset();
91        self.sorted_buf.reset();
92        self.ddof = false;
93        self.current_value = None;
94        self.popped = None;
95        self.sum = None;
96        self.sum_sq = None;
97        self.sum_prod = None;
98        self.min = None;
99        self.max = None;
100        self.max_drawdown = None;
101        self
102    }
103
104    /// Returns the current number of elements in the buffer.
105    ///
106    /// # Returns
107    ///
108    /// * `usize` - The current number of elements in the buffer
109    pub const fn len(&self) -> usize {
110        self.buf.len()
111    }
112
113    /// Check if the statistics buffer is full
114    ///
115    /// # Returns
116    ///
117    /// * `bool` - True if the statistics buffer is full
118    pub const fn is_full(&self) -> bool {
119        self.buf.is_full()
120    }
121
122    /// Returns the period of the statistics
123    ///
124    /// # Returns
125    ///
126    /// * `usize` - The period of the statistics
127    pub const fn period(&self) -> usize {
128        self.period
129    }
130
131    /// Returns the Delta Degrees of Freedom
132    ///
133    /// # Returns
134    ///
135    /// * `bool` - The Delta Degrees of Freedom
136    pub const fn ddof(&self) -> bool {
137        self.ddof
138    }
139
140    /// Sets the Delta Degrees of Freedom
141    ///
142    /// # Arguments
143    ///
144    /// * `ddof` - The Delta Degrees of Freedom
145    ///
146    /// # Returns
147    ///
148    /// * `&mut Self` - The statistics object
149    pub const fn set_ddof(&mut self, ddof: bool) -> &mut Self {
150        self.ddof = ddof;
151        self
152    }
153
154    // Copies and sorts the buf
155    fn sorted_buf(&mut self) -> &[T]
156    where
157        T: Copy + Default + PartialOrd,
158    {
159        self.sorted_buf.copy_from_slice(self.buf.as_slice());
160        self.sorted_buf.sort()
161    }
162
163    fn period_t(&self) -> Option<T>
164    where
165        T: Float,
166    {
167        T::from(self.period)
168    }
169}
170
171impl<T> Statistics<T> {}
172
173impl<T: Float + Default + Sum> SingleStatistics<T> for Statistics<T> {
174    fn next(&mut self, value: T) -> &mut Self {
175        let popped = self.buf.next(value);
176        self.current_value = Some(value);
177
178        if self.is_full() {
179            self.popped = match self.popped {
180                None => (self.buf.index() > 0).then_some(popped),
181                _ => Some(popped),
182            };
183        }
184
185        self.sum = match self.sum {
186            None => Some(value - popped),
187            Some(s) => Some(s + value - popped),
188        };
189
190        self.sum_sq = match self.sum_sq {
191            None => Some(value * value - popped * popped),
192            Some(sq) => Some(sq + value * value - popped * popped),
193        };
194
195        self
196    }
197    fn sum(&self) -> Option<T> {
198        if self.is_full() { self.sum } else { None }
199    }
200
201    fn sum_sq(&self) -> Option<T> {
202        if self.is_full() { self.sum_sq } else { None }
203    }
204
205    fn mean(&self) -> Option<T> {
206        self.sum().zip(self.period_t()).map(|(sum, n)| sum / n)
207    }
208
209    fn mean_sq(&self) -> Option<T> {
210        self.sum_sq()
211            .zip(self.period_t())
212            .map(|(sum_sq, n)| sum_sq / n)
213    }
214
215    fn mode(&mut self) -> Option<T> {
216        if !self.is_full() {
217            return None;
218        }
219
220        let slice = self.sorted_buf();
221
222        let mut mode = slice[0];
223        let mut mode_count = 1;
224
225        let mut current = slice[0];
226        let mut current_count = 1;
227
228        for &value in &slice[1..] {
229            if value == current {
230                current_count += 1;
231            } else {
232                if current_count > mode_count || (current_count == mode_count && current < mode) {
233                    mode = current;
234                    mode_count = current_count;
235                }
236                current = value;
237                current_count = 1;
238            }
239        }
240
241        if current_count > mode_count || (current_count == mode_count && current < mode) {
242            mode = current;
243        }
244
245        Some(mode)
246    }
247
248    fn median(&mut self) -> Option<T> {
249        self.is_full()
250            .then_some(median_from_sorted_slice(self.sorted_buf()))
251    }
252
253    fn min(&mut self) -> Option<T> {
254        if !self.is_full() {
255            return None;
256        }
257
258        self.min = match self.min {
259            None => self.buf.min(),
260            Some(min) => {
261                if self.popped == Some(min) {
262                    self.buf.min()
263                } else if self.current_value < Some(min) {
264                    self.current_value
265                } else {
266                    Some(min)
267                }
268            }
269        };
270        self.min
271    }
272
273    fn max(&mut self) -> Option<T> {
274        if !self.is_full() {
275            return None;
276        }
277
278        self.max = match self.max {
279            None => self.buf.max(),
280            Some(max) => {
281                if self.popped == Some(max) {
282                    self.buf.max()
283                } else if self.current_value > Some(max) {
284                    self.current_value
285                } else {
286                    Some(max)
287                }
288            }
289        };
290
291        self.max
292    }
293
294    fn mean_absolute_deviation(&self) -> Option<T> {
295        let mean = self.mean()?;
296        let abs_sum = self.buf.iter().map(|&x| (x - mean).abs()).sum::<T>();
297        self.period_t().map(|n| abs_sum / n)
298    }
299
300    fn median_absolute_deviation(&mut self) -> Option<T> {
301        let median = self.median()?;
302
303        self.sorted_buf
304            .iter_mut()
305            .zip(self.buf.as_slice())
306            .for_each(|(dev, &x)| *dev = (x - median).abs());
307
308        Some(median_from_sorted_slice(self.sorted_buf.sort()))
309    }
310
311    fn variance(&self) -> Option<T> {
312        let variance = self
313            .mean()
314            .zip(self.mean_sq())
315            .map(|(mean, mean_sq)| (mean_sq - (mean * mean)));
316
317        if self.ddof() {
318            variance
319                .zip(self.period_t())
320                .map(|(var, n)| var * (n / (n - T::one())))
321        } else {
322            variance
323        }
324    }
325
326    fn stddev(&self) -> Option<T> {
327        self.variance().map(T::sqrt)
328    }
329
330    fn zscore(&self) -> Option<T> {
331        self.mean()
332            .zip(self.stddev())
333            .zip(self.current_value)
334            .map(|((mean, stddev), x)| match stddev.abs() < T::epsilon() {
335                true => T::zero(),
336                _ => (x - mean) / stddev,
337            })
338    }
339
340    fn skew(&self) -> Option<T> {
341        let len = self.len();
342        if self.ddof() && len < 3 {
343            return None;
344        }
345
346        let (mean, stddev) = self.mean().zip(self.stddev())?;
347        if stddev.abs() < T::epsilon() {
348            return Some(T::zero());
349        }
350
351        let sum_cubed = self
352            .buf
353            .iter()
354            .map(|&x| {
355                let z = (x - mean) / stddev;
356                z * z * z
357            })
358            .sum();
359
360        let n = T::from(len)?;
361        let _1 = T::one();
362        let _2 = T::from(2.0)?;
363        if self.ddof() {
364            Some((n / ((n - _1) * (n - _2))) * sum_cubed)
365        } else {
366            Some(sum_cubed / n)
367        }
368    }
369
370    fn kurt(&self) -> Option<T> {
371        let len = self.len();
372        if len < 4 {
373            return None;
374        }
375
376        let (mean, stddev) = self.mean().zip(self.stddev())?;
377        if stddev.abs() < T::epsilon() {
378            return Some(T::zero());
379        }
380
381        let sum_fourth = self
382            .buf
383            .iter()
384            .map(|&x| {
385                let z = (x - mean) / stddev;
386                z * z * z * z
387            })
388            .sum();
389
390        let n = T::from(len)?;
391        let _1 = T::one();
392        let _2 = T::from(2)?;
393        let _3 = T::from(3)?;
394
395        let kurt = if self.ddof() {
396            let numerator = n * (n + _1) * sum_fourth;
397            let denominator = (n - _1) * (n - _2) * (n - _3);
398            let correction = _3 * ((n - _1) * (n - _1)) / ((n - _2) * (n - _3));
399            (numerator / denominator) - correction
400        } else {
401            sum_fourth / n - _3
402        };
403
404        Some(kurt)
405    }
406
407    fn linreg_slope(&self) -> Option<T> {
408        if !self.is_full() {
409            return None;
410        }
411
412        let mut s = Statistics::new(self.period);
413        for (i, &x) in self.buf.iter().enumerate() {
414            <Statistics<(T, T)> as PairedStatistics<T>>::next(&mut s, (x, T::from(i)?));
415        }
416
417        s.beta()
418    }
419
420    fn linreg_slope_intercept(&self) -> Option<(T, T)> {
421        let (mean, slope) = self.mean().zip(self.linreg_slope())?;
422        let _1 = T::one();
423        self.period_t()
424            .zip(T::from(2))
425            .map(|(p, _2)| (p - _1) / _2)
426            .map(|mt| (slope, mean - slope * mt))
427    }
428
429    fn linreg_intercept(&self) -> Option<T> {
430        self.linreg_slope_intercept()
431            .map(|(_, intercept)| intercept)
432    }
433
434    fn linreg_angle(&self) -> Option<T> {
435        self.linreg_slope().map(|slope| slope.atan())
436    }
437
438    fn linreg(&self) -> Option<T> {
439        let _1 = T::one();
440        self.linreg_slope_intercept()
441            .zip(self.period_t())
442            .map(|((slope, intercept), period)| slope * (period - _1) + intercept)
443    }
444
445    fn drawdown(&mut self) -> Option<T> {
446        self.max().zip(self.current_value).map(|(max, input)| {
447            if max <= T::zero() || input <= T::zero() {
448                T::zero()
449            } else {
450                ((max - input) / max).max(T::zero())
451            }
452        })
453    }
454
455    fn max_drawdown(&mut self) -> Option<T> {
456        let drawdown = self.drawdown()?;
457        self.max_drawdown = match self.max_drawdown {
458            Some(md) => Some(md.max(drawdown)),
459            None => Some(drawdown),
460        };
461        self.max_drawdown
462    }
463
464    fn diff(&self) -> Option<T> {
465        self.current_value
466            .zip(self.popped)
467            .map(|(input, popped)| input - popped)
468    }
469
470    fn pct_change(&self) -> Option<T> {
471        self.diff().zip(self.popped).and_then(|(diff, popped)| {
472            if popped.is_zero() {
473                None
474            } else {
475                Some(diff / popped)
476            }
477        })
478    }
479
480    fn log_return(&self) -> Option<T> {
481        self.current_value
482            .zip(self.popped)
483            .and_then(|(current, popped)| {
484                if popped <= T::zero() || current <= T::zero() {
485                    None
486                } else {
487                    Some(current.ln() - popped.ln())
488                }
489            })
490    }
491
492    fn quantile(&mut self, q: f64) -> Option<T> {
493        if !self.is_full() || !(0.0..=1.0).contains(&q) {
494            return None;
495        }
496        let period = self.period();
497        let sorted = self.sorted_buf();
498        quantile_from_sorted_slice(sorted, q, period)
499    }
500
501    fn iqr(&mut self) -> Option<T> {
502        if !self.is_full() {
503            return None;
504        }
505
506        let period = self.period();
507        let sorted = self.sorted_buf();
508
509        let q1 = quantile_from_sorted_slice(sorted, 0.25, period);
510        let q3 = quantile_from_sorted_slice(sorted, 0.75, period);
511
512        q1.zip(q3).map(|(q1, q3)| q3 - q1)
513    }
514}
515
516impl<T: Float> Statistics<(T, T)> {
517    fn mean(&self) -> Option<(T, T)> {
518        if self.is_full() {
519            let n = T::from(self.period)?;
520            return self.sum.map(|(sum_x, sum_y)| (sum_x / n, sum_y / n));
521        }
522        None
523    }
524
525    fn mean_prod(&self) -> Option<(T, T)> {
526        if self.is_full() {
527            let n = T::from(self.period)?;
528            return self.sum_prod.map(|(sum_xy, _)| (sum_xy / n, sum_xy / n));
529        }
530        None
531    }
532
533    fn mean_sq(&self) -> Option<(T, T)> {
534        if self.is_full() {
535            let n = T::from(self.period)?;
536            return self
537                .sum_sq
538                .map(|(sum_sq_x, sum_sq_y)| (sum_sq_x / n, sum_sq_y / n));
539        }
540        None
541    }
542
543    fn variance(&self) -> Option<(T, T)> {
544        let variance = self
545            .mean()
546            .zip(self.mean_sq())
547            .map(|(mean, mean_sq)| (mean_sq.0 - (mean.0 * mean.0), mean_sq.1 - (mean.1 * mean.1)));
548
549        if self.ddof() {
550            variance
551                .zip(T::from(self.period))
552                .map(|(var, n)| (var.0 * (n / (n - T::one())), var.1 * (n / (n - T::one()))))
553        } else {
554            variance
555        }
556    }
557
558    fn stddev(&self) -> Option<(T, T)> {
559        self.variance().map(|var| (var.0.sqrt(), var.1.sqrt()))
560    }
561}
562
563impl<T: Float + Default> PairedStatistics<T> for Statistics<(T, T)> {
564    fn next(&mut self, (x, y): (T, T)) -> &mut Self {
565        let popped = self.buf.next((x, y));
566        self.current_value = Some((x, y));
567
568        let x_diff = x - popped.0;
569        let y_diff = y - popped.1;
570        self.sum = match self.sum {
571            None => Some((x_diff, y_diff)),
572            Some((sx, sy)) => Some((sx + x_diff, sy + y_diff)),
573        };
574
575        let prod_diff = (x * y) - (popped.0 * popped.1);
576        self.sum_prod = match self.sum_prod {
577            None => Some((prod_diff, prod_diff)),
578            Some((spx, spy)) => Some((spx + prod_diff, spy + prod_diff)),
579        };
580
581        let sq_x_diff = (x * x) - (popped.0 * popped.0);
582        let sq_y_diff = (y * y) - (popped.1 * popped.1);
583        self.sum_sq = match self.sum_sq {
584            None => Some((sq_x_diff, sq_y_diff)),
585            Some((ssx, ssy)) => Some((ssx + sq_x_diff, ssy + sq_y_diff)),
586        };
587        self
588    }
589
590    fn cov(&self) -> Option<T> {
591        let (mean_x, mean_y) = self.mean()?;
592        let (mean_xy, _) = self.mean_prod()?;
593
594        let cov = mean_xy - mean_x * mean_y;
595
596        let n = T::from(self.period)?;
597        if self.ddof() {
598            Some(cov * (n / (n - T::one())))
599        } else {
600            Some(cov)
601        }
602    }
603
604    fn corr(&self) -> Option<T> {
605        self.cov()
606            .zip(self.stddev())
607            .and_then(|(cov, (stddev_x, stddev_y))| {
608                if stddev_x.is_zero() || stddev_y.is_zero() {
609                    None
610                } else {
611                    Some(cov / (stddev_x * stddev_y))
612                }
613            })
614    }
615
616    fn beta(&self) -> Option<T> {
617        self.cov().zip(self.variance()).and_then(
618            |(cov, (_, var))| {
619                if var.is_zero() { None } else { Some(cov / var) }
620            },
621        )
622    }
623}