numra_stats/
descriptive.rs1use numra_core::Scalar;
8
9use crate::error::StatsError;
10
11pub fn mean<S: Scalar>(data: &[S]) -> Result<S, StatsError> {
13 if data.is_empty() {
14 return Err(StatsError::EmptyData);
15 }
16 let n = S::from_usize(data.len());
17 let sum: S = data.iter().copied().fold(S::ZERO, |a, b| a + b);
18 Ok(sum / n)
19}
20
21pub fn variance<S: Scalar>(data: &[S]) -> Result<S, StatsError> {
23 if data.len() < 2 {
24 return Err(StatsError::EmptyData);
25 }
26 let m = mean(data)?;
27 let n = S::from_usize(data.len());
28 let sum_sq: S = data
29 .iter()
30 .copied()
31 .fold(S::ZERO, |a, x| a + (x - m) * (x - m));
32 Ok(sum_sq / (n - S::ONE))
33}
34
35pub fn std_dev<S: Scalar>(data: &[S]) -> Result<S, StatsError> {
37 Ok(variance(data)?.sqrt())
38}
39
40pub fn median<S: Scalar>(data: &[S]) -> Result<S, StatsError> {
42 if data.is_empty() {
43 return Err(StatsError::EmptyData);
44 }
45 let mut sorted: Vec<S> = data.to_vec();
46 sorted.sort_by(|a, b| a.to_f64().partial_cmp(&b.to_f64()).unwrap());
47 let n = sorted.len();
48 if n % 2 == 1 {
49 Ok(sorted[n / 2])
50 } else {
51 let half = S::HALF;
52 Ok((sorted[n / 2 - 1] + sorted[n / 2]) * half)
53 }
54}
55
56pub fn percentile<S: Scalar>(data: &[S], p: S) -> Result<S, StatsError> {
58 if data.is_empty() {
59 return Err(StatsError::EmptyData);
60 }
61 let p_f64 = p.to_f64();
62 if !(0.0..=100.0).contains(&p_f64) {
63 return Err(StatsError::InvalidParameter(
64 "percentile must be in [0, 100]".into(),
65 ));
66 }
67 let mut sorted: Vec<S> = data.to_vec();
68 sorted.sort_by(|a, b| a.to_f64().partial_cmp(&b.to_f64()).unwrap());
69 let n = sorted.len();
70 if n == 1 {
71 return Ok(sorted[0]);
72 }
73 let rank = p_f64 / 100.0 * (n - 1) as f64;
75 let lo = rank.floor() as usize;
76 let hi = rank.ceil() as usize;
77 if lo == hi {
78 Ok(sorted[lo])
79 } else {
80 let frac = S::from_f64(rank - lo as f64);
81 Ok(sorted[lo] + frac * (sorted[hi] - sorted[lo]))
82 }
83}
84
85pub fn skewness<S: Scalar>(data: &[S]) -> Result<S, StatsError> {
87 if data.len() < 3 {
88 return Err(StatsError::EmptyData);
89 }
90 let m = mean(data)?;
91 let n = data.len() as f64;
92 let s = std_dev(data)?;
93 let m3: S = data
94 .iter()
95 .copied()
96 .fold(S::ZERO, |a, x| a + (x - m).powi(3));
97 let m3_avg = m3 / S::from_f64(n);
98 let s3 = s * s * s;
99 let adjust = n * n / ((n - 1.0) * (n - 2.0));
101 Ok(m3_avg / s3 * S::from_f64(adjust))
102}
103
104pub fn kurtosis<S: Scalar>(data: &[S]) -> Result<S, StatsError> {
106 if data.len() < 4 {
107 return Err(StatsError::EmptyData);
108 }
109 let m = mean(data)?;
110 let n = data.len() as f64;
111 let v = variance(data)?;
112 let m4: S = data
113 .iter()
114 .copied()
115 .fold(S::ZERO, |a, x| a + (x - m).powi(4));
116 let m4_avg = m4 / S::from_f64(n);
117 let v2 = v * v;
118 let term1 = n * (n + 1.0) / ((n - 1.0) * (n - 2.0) * (n - 3.0));
120 let term2 = 3.0 * (n - 1.0) * (n - 1.0) / ((n - 2.0) * (n - 3.0));
121 Ok(S::from_f64(term1) * m4_avg * S::from_f64(n) / v2 - S::from_f64(term2))
123}
124
125pub fn covariance<S: Scalar>(x: &[S], y: &[S]) -> Result<S, StatsError> {
127 if x.is_empty() {
128 return Err(StatsError::EmptyData);
129 }
130 if x.len() != y.len() {
131 return Err(StatsError::LengthMismatch {
132 expected: x.len(),
133 got: y.len(),
134 });
135 }
136 if x.len() < 2 {
137 return Err(StatsError::EmptyData);
138 }
139 let mx = mean(x)?;
140 let my = mean(y)?;
141 let n = S::from_usize(x.len());
142 let sum: S = x
143 .iter()
144 .zip(y.iter())
145 .fold(S::ZERO, |a, (&xi, &yi)| a + (xi - mx) * (yi - my));
146 Ok(sum / (n - S::ONE))
147}
148
149pub fn covariance_matrix<S: Scalar>(data: &[Vec<S>]) -> Result<Vec<S>, StatsError> {
154 if data.is_empty() {
155 return Err(StatsError::EmptyData);
156 }
157 let p = data.len();
158 let n = data[0].len();
159 for v in data {
160 if v.len() != n {
161 return Err(StatsError::LengthMismatch {
162 expected: n,
163 got: v.len(),
164 });
165 }
166 }
167 if n < 2 {
168 return Err(StatsError::EmptyData);
169 }
170
171 let mut cov = vec![S::ZERO; p * p];
172 for i in 0..p {
173 for j in i..p {
174 let c = covariance(&data[i], &data[j])?;
175 cov[i * p + j] = c;
176 cov[j * p + i] = c;
177 }
178 }
179 Ok(cov)
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185
186 #[test]
187 fn test_mean() {
188 let data = vec![1.0_f64, 2.0, 3.0, 4.0, 5.0];
189 assert!((mean(&data).unwrap() - 3.0).abs() < 1e-14);
190 }
191
192 #[test]
193 fn test_variance() {
194 let data = vec![2.0_f64, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
195 assert!((variance(&data).unwrap() - 4.571428571428571).abs() < 1e-10);
196 }
197
198 #[test]
199 fn test_std_dev() {
200 let data = vec![2.0_f64, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
201 let s = std_dev(&data).unwrap();
202 assert!((s * s - variance(&data).unwrap()).abs() < 1e-12);
203 }
204
205 #[test]
206 fn test_median_odd() {
207 let data = vec![3.0_f64, 1.0, 2.0];
208 assert!((median(&data).unwrap() - 2.0).abs() < 1e-14);
209 }
210
211 #[test]
212 fn test_median_even() {
213 let data = vec![1.0_f64, 2.0, 3.0, 4.0];
214 assert!((median(&data).unwrap() - 2.5).abs() < 1e-14);
215 }
216
217 #[test]
218 fn test_percentile() {
219 let data = vec![1.0_f64, 2.0, 3.0, 4.0, 5.0];
220 assert!((percentile(&data, 0.0).unwrap() - 1.0).abs() < 1e-14);
221 assert!((percentile(&data, 50.0).unwrap() - 3.0).abs() < 1e-14);
222 assert!((percentile(&data, 100.0).unwrap() - 5.0).abs() < 1e-14);
223 }
224
225 #[test]
226 fn test_skewness_symmetric() {
227 let data = vec![1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
229 assert!(skewness(&data).unwrap().abs() < 1e-10);
230 }
231
232 #[test]
233 fn test_kurtosis_normal_like() {
234 let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
236 let k = kurtosis(&data).unwrap();
237 assert!(k < 0.0, "kurtosis = {}", k); }
239
240 #[test]
241 fn test_covariance_self() {
242 let data = vec![1.0_f64, 2.0, 3.0, 4.0, 5.0];
243 let cov = covariance(&data, &data).unwrap();
244 let var = variance(&data).unwrap();
245 assert!((cov - var).abs() < 1e-12);
246 }
247
248 #[test]
249 fn test_covariance_matrix_diagonal() {
250 let x = vec![1.0_f64, 2.0, 3.0, 4.0, 5.0];
251 let y = vec![5.0_f64, 4.0, 3.0, 2.0, 1.0];
252 let cov = covariance_matrix(&[x.clone(), y.clone()]).unwrap();
253 assert_eq!(cov.len(), 4);
254 assert!((cov[0] - variance(&x).unwrap()).abs() < 1e-12);
255 assert!((cov[3] - variance(&y).unwrap()).abs() < 1e-12);
256 assert!(cov[1] < 0.0);
258 assert!((cov[1] - cov[2]).abs() < 1e-12); }
260
261 #[test]
262 fn test_empty_data() {
263 assert!(mean::<f64>(&[]).is_err());
264 assert!(variance::<f64>(&[]).is_err());
265 assert!(median::<f64>(&[]).is_err());
266 }
267}