math_ops/
statistics.rs

1//! Statistical methods for `Vector<T>`.
2
3use crate::vector::Vector;
4use num_traits::{Float, ToPrimitive};
5use crate::IntoVector;
6
7/// Trait definition for Statistics, generic over type T.
8/// T is expected to be a floating-point type like f32 or f64.
9pub trait Statistics<T> {
10  /// Computes the mean (average) of the data.
11  /// Returns an Option<T> where None represents an empty dataset.
12  fn mean(&self) -> Option<T>;
13
14  /// Computes the variance of the data.
15  /// Returns an Option<T>, where None represents an empty dataset.
16  /// Variance is the average of the squared deviations from the mean.
17  fn var(&self) -> Option<T>;
18
19  /// Computes the standard deviation of the data.
20  /// Returns an Option<T>, where None represents an empty dataset.
21  /// Standard deviation is the square root of the variance.
22  fn stddev(&self) -> Option<T>;
23
24  /// Computes the median of the data.
25  /// Returns an Option<T>, where None represents an empty dataset.
26  /// The median is the value separating the higher half from the lower half.
27  fn median(&self) -> Option<T>;
28
29  /// Computes the quantile for the given fraction `q`.
30  /// `q` is expected to be a floating-point value between 0 and 1.
31  /// Returns an Option<T> where None represents an empty dataset.
32  /// For example, q = 0.5 gives the median, q = 0.25 gives the 25th percentile.
33  fn quantile(&self, q: T) -> Option<T>;
34
35  /// Computes the interquartile range (IQR) of the data.
36  /// Returns an Option<T>, where None represents an empty dataset.
37  /// IQR is the range between the 25th percentile and 75th percentile.
38  fn iqr(&self) -> Option<T>;
39
40  /// Returns the minimum value in the dataset, ignoring NaN values.
41  /// Returns an Option<T>, where None represents an empty dataset.
42  fn min(&self) -> Option<T>;
43
44  /// Returns the maximum value in the dataset, ignoring NaN values.
45  /// Returns an Option<T>, where None represents an empty dataset.
46  fn max(&self) -> Option<T>;
47
48  /// Computes the cumulative sum of the data.
49  /// Returns a `Vector<T>`, where each element is the cumulative sum up to that index.
50  /// NaN values are ignored in the summation.
51  fn cumsum(&self) -> Vector<T>;
52}
53
54impl<T> Statistics<T> for Vector<T>
55where
56  T: Float + ToPrimitive + Copy + PartialOrd,
57{
58  fn mean(&self) -> Option<T> {
59    let mut sum = T::zero();
60    let mut count = 0;
61    for &x in self.iter() {
62      if !x.is_nan() {
63        sum = sum + x;
64        count += 1;
65      }
66    }
67    if count == 0 {
68      None
69    } else {
70      Some(sum / T::from(count).unwrap())
71    }
72  }
73
74  fn var(&self) -> Option<T> {
75    let mean = self.mean()?;
76    let mut sum_sq_diff = T::zero();
77    let mut count = 0;
78    for &x in self.iter() {
79      if !x.is_nan() {
80        sum_sq_diff = sum_sq_diff + (x - mean) * (x - mean);
81        count += 1;
82      }
83    }
84    if count < 2 {
85      None
86    } else {
87      Some(sum_sq_diff / T::from(count).unwrap())
88    }
89  }
90
91  fn stddev(&self) -> Option<T> {
92    self.var().map(|v| v.sqrt())
93  }
94
95  fn median(&self) -> Option<T> {
96    let mut non_nan_values: Vec<T> = self.iter().cloned().filter(|x| !x.is_nan()).collect();
97    let n = non_nan_values.len();
98    if n == 0 {
99      return None;
100    }
101    non_nan_values.sort_by(|a, b| a.partial_cmp(b).unwrap());
102    let mid = n / 2;
103    if n % 2 == 0 {
104      Some((non_nan_values[mid - 1] + non_nan_values[mid]) / T::from(2.0).unwrap())
105    } else {
106      Some(non_nan_values[mid])
107    }
108  }
109
110  fn quantile(&self, q: T) -> Option<T> {
111    if q < T::zero() || q > T::one() {
112      return None;
113    }
114    let mut non_nan_values: Vec<T> = self.iter().cloned().filter(|x| !x.is_nan()).collect();
115    let n = non_nan_values.len();
116    if n == 0 {
117      return None;
118    }
119    non_nan_values.sort_by(|a, b| a.partial_cmp(b).unwrap());
120    let pos = q * T::from(n - 1).unwrap();
121    let pos_floor = pos.floor();
122    let pos_ceil = pos.ceil();
123    let weight = pos - pos_floor;
124    let idx_floor = pos_floor.to_usize()?;
125    let idx_ceil = pos_ceil.to_usize()?;
126    if idx_floor == idx_ceil {
127      Some(non_nan_values[idx_floor])
128    } else {
129      Some(
130        non_nan_values[idx_floor]
131          + (non_nan_values[idx_ceil] - non_nan_values[idx_floor]) * weight,
132      )
133    }
134  }
135
136  fn iqr(&self) -> Option<T> {
137    let q75 = self.quantile(T::from(0.75).unwrap())?;
138    let q25 = self.quantile(T::from(0.25).unwrap())?;
139    Some(q75 - q25)
140  }
141
142  fn min(&self) -> Option<T> {
143    self.iter()
144      .cloned()
145      .filter(|x| !x.is_nan())
146      .min_by(|a, b| a.partial_cmp(b).unwrap())
147  }
148
149  fn max(&self) -> Option<T> {
150    self.iter()
151      .cloned()
152      .filter(|x| !x.is_nan())
153      .max_by(|a, b| a.partial_cmp(b).unwrap())
154  }
155
156  fn cumsum(&self) -> Vector<T> {
157    let mut cum_sum = T::zero();
158    let mut result = Vec::with_capacity(self.len());
159    for &x in self.iter() {
160      if !x.is_nan() {
161        cum_sum = cum_sum + x;
162      }
163      result.push(cum_sum);
164    }
165    result.into_vector()
166  }
167}