1use cyanea_core::{CyaneaError, Result, Summarizable};
8
9#[derive(Debug, Clone)]
11pub struct DescriptiveStats {
12 pub count: usize,
14 pub mean: f64,
16 pub median: f64,
18 pub variance: f64,
20 pub sample_variance: f64,
22 pub std_dev: f64,
24 pub sample_std_dev: f64,
26 pub min: f64,
28 pub max: f64,
30 pub range: f64,
32 pub q1: f64,
34 pub q3: f64,
36 pub iqr: f64,
38 pub skewness: f64,
40 pub kurtosis: f64,
42}
43
44impl Summarizable for DescriptiveStats {
45 fn summary(&self) -> String {
46 format!(
47 "n={}, mean={:.4}, std={:.4}, min={:.4}, max={:.4}",
48 self.count, self.mean, self.std_dev, self.min, self.max,
49 )
50 }
51}
52
53pub fn describe(data: &[f64]) -> Result<DescriptiveStats> {
57 if data.is_empty() {
58 return Err(CyaneaError::InvalidInput(
59 "describe: data must not be empty".into(),
60 ));
61 }
62
63 let n = data.len();
64 let n_f = n as f64;
65
66 let mut sum = 0.0;
68 let mut min_val = f64::INFINITY;
69 let mut max_val = f64::NEG_INFINITY;
70 for &x in data {
71 sum += x;
72 if x < min_val {
73 min_val = x;
74 }
75 if x > max_val {
76 max_val = x;
77 }
78 }
79 let mean_val = sum / n_f;
80
81 let mut m2 = 0.0;
83 let mut m3 = 0.0;
84 let mut m4 = 0.0;
85 for &x in data {
86 let d = x - mean_val;
87 m2 += d * d;
88 m3 += d * d * d;
89 m4 += d * d * d * d;
90 }
91
92 let pop_var = m2 / n_f;
93 let sample_var = if n > 1 { m2 / (n_f - 1.0) } else { f64::NAN };
94 let pop_std = pop_var.sqrt();
95 let sample_std = sample_var.sqrt();
96
97 let skewness = if pop_std > 0.0 {
98 (m3 / n_f) / (pop_std * pop_std * pop_std)
99 } else {
100 0.0
101 };
102
103 let kurtosis = if pop_std > 0.0 {
104 (m4 / n_f) / (pop_var * pop_var) - 3.0
105 } else {
106 0.0
107 };
108
109 let mut sorted = data.to_vec();
111 sorted.sort_by(|a, b| a.total_cmp(b));
112
113 let median_val = compute_quantile_sorted(&sorted, 0.5);
114 let q1_val = compute_quantile_sorted(&sorted, 0.25);
115 let q3_val = compute_quantile_sorted(&sorted, 0.75);
116
117 Ok(DescriptiveStats {
118 count: n,
119 mean: mean_val,
120 median: median_val,
121 variance: pop_var,
122 sample_variance: sample_var,
123 std_dev: pop_std,
124 sample_std_dev: sample_std,
125 min: min_val,
126 max: max_val,
127 range: max_val - min_val,
128 q1: q1_val,
129 q3: q3_val,
130 iqr: q3_val - q1_val,
131 skewness,
132 kurtosis,
133 })
134}
135
136pub fn mean(data: &[f64]) -> Result<f64> {
140 if data.is_empty() {
141 return Err(CyaneaError::InvalidInput(
142 "mean: data must not be empty".into(),
143 ));
144 }
145 Ok(data.iter().sum::<f64>() / data.len() as f64)
146}
147
148pub fn median(data: &[f64]) -> Result<f64> {
150 if data.is_empty() {
151 return Err(CyaneaError::InvalidInput(
152 "median: data must not be empty".into(),
153 ));
154 }
155 let mut sorted = data.to_vec();
156 sorted.sort_by(|a, b| a.total_cmp(b));
157 Ok(compute_quantile_sorted(&sorted, 0.5))
158}
159
160pub fn variance(data: &[f64], ddof: usize) -> Result<f64> {
165 let n = data.len();
166 if n <= ddof {
167 return Err(CyaneaError::InvalidInput(format!(
168 "variance: need more than {} observations (got {})",
169 ddof, n,
170 )));
171 }
172 let m = mean(data)?;
173 let ss: f64 = data.iter().map(|&x| (x - m).powi(2)).sum();
174 Ok(ss / (n - ddof) as f64)
175}
176
177pub fn std_dev(data: &[f64], ddof: usize) -> Result<f64> {
179 Ok(variance(data, ddof)?.sqrt())
180}
181
182pub fn quantile(data: &[f64], q: f64) -> Result<f64> {
184 if data.is_empty() {
185 return Err(CyaneaError::InvalidInput(
186 "quantile: data must not be empty".into(),
187 ));
188 }
189 if !(0.0..=1.0).contains(&q) {
190 return Err(CyaneaError::InvalidInput(
191 "quantile: q must be in [0, 1]".into(),
192 ));
193 }
194 let mut sorted = data.to_vec();
195 sorted.sort_by(|a, b| a.total_cmp(b));
196 Ok(compute_quantile_sorted(&sorted, q))
197}
198
199pub fn iqr(data: &[f64]) -> Result<f64> {
201 let q1 = quantile(data, 0.25)?;
202 let q3 = quantile(data, 0.75)?;
203 Ok(q3 - q1)
204}
205
206pub fn mad(data: &[f64]) -> Result<f64> {
208 let med = median(data)?;
209 let deviations: Vec<f64> = data.iter().map(|&x| (x - med).abs()).collect();
210 median(&deviations)
211}
212
213fn compute_quantile_sorted(sorted: &[f64], q: f64) -> f64 {
217 let n = sorted.len();
218 if n == 1 {
219 return sorted[0];
220 }
221 let pos = q * (n - 1) as f64;
222 let lo = pos.floor() as usize;
223 let hi = lo + 1;
224 let frac = pos - lo as f64;
225 if hi >= n {
226 sorted[n - 1]
227 } else {
228 sorted[lo] * (1.0 - frac) + sorted[hi] * frac
229 }
230}
231
232#[cfg(test)]
235mod tests {
236 use super::*;
237
238 const TOL: f64 = 1e-10;
239
240 #[test]
241 fn describe_known_data() {
242 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
243 let stats = describe(&data).unwrap();
244 assert_eq!(stats.count, 5);
245 assert!((stats.mean - 3.0).abs() < TOL);
246 assert!((stats.median - 3.0).abs() < TOL);
247 assert!((stats.min - 1.0).abs() < TOL);
248 assert!((stats.max - 5.0).abs() < TOL);
249 assert!((stats.range - 4.0).abs() < TOL);
250 assert!((stats.variance - 2.0).abs() < TOL);
252 assert!((stats.sample_variance - 2.5).abs() < TOL);
254 }
255
256 #[test]
257 fn describe_single() {
258 let data = [42.0];
259 let stats = describe(&data).unwrap();
260 assert_eq!(stats.count, 1);
261 assert!((stats.mean - 42.0).abs() < TOL);
262 assert!((stats.variance - 0.0).abs() < TOL);
263 assert!(stats.sample_variance.is_nan());
264 }
265
266 #[test]
267 fn describe_empty() {
268 assert!(describe(&[]).is_err());
269 }
270
271 #[test]
272 fn mean_basic() {
273 assert!((mean(&[2.0, 4.0, 6.0]).unwrap() - 4.0).abs() < TOL);
274 }
275
276 #[test]
277 fn mean_empty() {
278 assert!(mean(&[]).is_err());
279 }
280
281 #[test]
282 fn median_odd() {
283 assert!((median(&[3.0, 1.0, 2.0]).unwrap() - 2.0).abs() < TOL);
284 }
285
286 #[test]
287 fn median_even() {
288 assert!((median(&[4.0, 1.0, 3.0, 2.0]).unwrap() - 2.5).abs() < TOL);
289 }
290
291 #[test]
292 fn variance_population() {
293 let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
294 assert!((variance(&data, 0).unwrap() - 4.0).abs() < TOL);
295 }
296
297 #[test]
298 fn variance_sample() {
299 let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
300 let expected = 32.0 / 7.0; assert!((variance(&data, 1).unwrap() - expected).abs() < TOL);
302 }
303
304 #[test]
305 fn variance_too_few() {
306 assert!(variance(&[1.0], 1).is_err());
307 assert!(variance(&[], 0).is_err());
308 }
309
310 #[test]
311 fn quantile_basic() {
312 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
313 assert!((quantile(&data, 0.0).unwrap() - 1.0).abs() < TOL);
314 assert!((quantile(&data, 1.0).unwrap() - 5.0).abs() < TOL);
315 assert!((quantile(&data, 0.5).unwrap() - 3.0).abs() < TOL);
316 }
317
318 #[test]
319 fn quantile_invalid_q() {
320 assert!(quantile(&[1.0], -0.1).is_err());
321 assert!(quantile(&[1.0], 1.1).is_err());
322 }
323
324 #[test]
325 fn iqr_basic() {
326 let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
327 let result = iqr(&data).unwrap();
328 assert!((result - 3.5).abs() < TOL);
330 }
331
332 #[test]
333 fn mad_basic() {
334 let data = [1.0, 1.0, 2.0, 2.0, 4.0, 6.0, 9.0];
335 assert!((mad(&data).unwrap() - 1.0).abs() < TOL);
337 }
338
339 #[test]
340 fn describe_skewness_symmetric() {
341 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
343 let stats = describe(&data).unwrap();
344 assert!((stats.skewness).abs() < TOL);
345 }
346
347 #[test]
348 fn summarizable_impl() {
349 let stats = describe(&[1.0, 2.0, 3.0]).unwrap();
350 let s = stats.summary();
351 assert!(s.contains("n=3"));
352 assert!(s.contains("mean="));
353 }
354}
355
356#[cfg(test)]
357mod proptests {
358 use super::*;
359 use proptest::prelude::*;
360
361 fn finite_f64() -> impl Strategy<Value = f64> {
362 -1e12_f64..1e12_f64
363 }
364
365 proptest! {
366 #[test]
367 fn describe_count_equals_length(
368 data in proptest::collection::vec(finite_f64(), 1..=500)
369 ) {
370 let stats = describe(&data).unwrap();
371 prop_assert_eq!(stats.count, data.len());
372 }
373
374 #[test]
375 fn variance_nonnegative(
376 data in proptest::collection::vec(finite_f64(), 1..=500)
377 ) {
378 let stats = describe(&data).unwrap();
379 prop_assert!(stats.variance >= 0.0, "variance={} should be >= 0", stats.variance);
380 }
381
382 #[test]
383 fn min_le_mean_le_max(
384 data in proptest::collection::vec(finite_f64(), 1..=500)
385 ) {
386 let stats = describe(&data).unwrap();
387 prop_assert!(stats.min <= stats.mean + 1e-10,
388 "min={} > mean={}", stats.min, stats.mean);
389 prop_assert!(stats.mean <= stats.max + 1e-10,
390 "mean={} > max={}", stats.mean, stats.max);
391 }
392
393 #[test]
394 fn range_equals_max_minus_min(
395 data in proptest::collection::vec(finite_f64(), 1..=500)
396 ) {
397 let stats = describe(&data).unwrap();
398 prop_assert!((stats.range - (stats.max - stats.min)).abs() < 1e-10);
399 }
400
401 #[test]
402 fn median_between_min_and_max(
403 data in proptest::collection::vec(finite_f64(), 1..=500)
404 ) {
405 let stats = describe(&data).unwrap();
406 prop_assert!(stats.median >= stats.min - 1e-10,
407 "median={} < min={}", stats.median, stats.min);
408 prop_assert!(stats.median <= stats.max + 1e-10,
409 "median={} > max={}", stats.median, stats.max);
410 }
411 }
412}