d3rs/array/statistics.rs
1//! Statistical functions for data arrays
2//!
3//! Provides functions for computing summary statistics like min, max, mean,
4//! median, variance, and quantiles.
5
6use std::cmp::Ordering;
7use std::iter::Sum;
8
9/// Returns the minimum value in the slice.
10///
11/// Returns `None` if the slice is empty.
12///
13/// # Example
14///
15/// ```
16/// use d3rs::array::min;
17///
18/// let data = vec![3, 1, 4, 1, 5, 9];
19/// assert_eq!(min(&data), Some(&1));
20/// assert_eq!(min::<i32>(&[]), None);
21/// ```
22pub fn min<T: Ord>(data: &[T]) -> Option<&T> {
23 data.iter().min()
24}
25
26/// Returns the minimum value in the slice using a custom comparator.
27///
28/// # Example
29///
30/// ```
31/// use d3rs::array::min_by;
32///
33/// let data = vec![3.0, 1.0, f64::NAN, 4.0];
34/// let result = min_by(&data, |a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
35/// assert_eq!(result, Some(&1.0));
36/// ```
37pub fn min_by<T, F>(data: &[T], compare: F) -> Option<&T>
38where
39 F: Fn(&T, &T) -> Ordering,
40{
41 data.iter().min_by(|a, b| compare(a, b))
42}
43
44/// Returns the maximum value in the slice.
45///
46/// Returns `None` if the slice is empty.
47///
48/// # Example
49///
50/// ```
51/// use d3rs::array::max;
52///
53/// let data = vec![3, 1, 4, 1, 5, 9];
54/// assert_eq!(max(&data), Some(&9));
55/// ```
56pub fn max<T: Ord>(data: &[T]) -> Option<&T> {
57 data.iter().max()
58}
59
60/// Returns the maximum value in the slice using a custom comparator.
61pub fn max_by<T, F>(data: &[T], compare: F) -> Option<&T>
62where
63 F: Fn(&T, &T) -> Ordering,
64{
65 data.iter().max_by(|a, b| compare(a, b))
66}
67
68/// Returns the minimum and maximum values in the slice.
69///
70/// Returns `None` if the slice is empty.
71///
72/// # Example
73///
74/// ```
75/// use d3rs::array::extent;
76///
77/// let data = vec![3, 1, 4, 1, 5, 9];
78/// assert_eq!(extent(&data), Some((&1, &9)));
79/// ```
80pub fn extent<T: Ord>(data: &[T]) -> Option<(&T, &T)> {
81 if data.is_empty() {
82 return None;
83 }
84 let min = data.iter().min()?;
85 let max = data.iter().max()?;
86 Some((min, max))
87}
88
89/// Returns the minimum and maximum values using a custom comparator.
90pub fn extent_by<T, F>(data: &[T], compare: F) -> Option<(&T, &T)>
91where
92 F: Fn(&T, &T) -> Ordering + Copy,
93{
94 if data.is_empty() {
95 return None;
96 }
97 let min = data.iter().min_by(|a, b| compare(a, b))?;
98 let max = data.iter().max_by(|a, b| compare(a, b))?;
99 Some((min, max))
100}
101
102/// Returns the sum of values in the slice.
103///
104/// # Example
105///
106/// ```
107/// use d3rs::array::sum;
108///
109/// let data = vec![1, 2, 3, 4, 5];
110/// assert_eq!(sum(&data), 15);
111/// ```
112pub fn sum<T>(data: &[T]) -> T
113where
114 T: Sum + Clone,
115{
116 data.iter().cloned().sum()
117}
118
119/// Returns the arithmetic mean of values in the slice.
120///
121/// Returns `None` if the slice is empty.
122///
123/// # Example
124///
125/// ```
126/// use d3rs::array::mean;
127///
128/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
129/// assert_eq!(mean(&data), Some(3.0));
130/// ```
131pub fn mean(data: &[f64]) -> Option<f64> {
132 if data.is_empty() {
133 return None;
134 }
135 Some(data.iter().sum::<f64>() / data.len() as f64)
136}
137
138/// Returns the arithmetic mean using an accessor function.
139///
140/// # Example
141///
142/// ```
143/// use d3rs::array::mean_by;
144///
145/// #[derive(Debug)]
146/// struct Point { x: f64, y: f64 }
147///
148/// let data = vec![
149/// Point { x: 1.0, y: 2.0 },
150/// Point { x: 3.0, y: 4.0 },
151/// ];
152/// assert_eq!(mean_by(&data, |p| p.x), Some(2.0));
153/// ```
154pub fn mean_by<T, F>(data: &[T], accessor: F) -> Option<f64>
155where
156 F: Fn(&T) -> f64,
157{
158 if data.is_empty() {
159 return None;
160 }
161 Some(data.iter().map(&accessor).sum::<f64>() / data.len() as f64)
162}
163
164/// Returns the median of values in a mutable slice.
165///
166/// The slice will be partially reordered.
167/// Returns `None` if the slice is empty.
168///
169/// # Example
170///
171/// ```
172/// use d3rs::array::median;
173///
174/// let mut data = vec![3.0, 1.0, 4.0, 1.0, 5.0];
175/// assert_eq!(median(&mut data), Some(3.0));
176/// ```
177pub fn median(data: &mut [f64]) -> Option<f64> {
178 if data.is_empty() {
179 return None;
180 }
181 // Filter out NaN values
182 let mut valid: Vec<f64> = data.iter().copied().filter(|x| !x.is_nan()).collect();
183 if valid.is_empty() {
184 return None;
185 }
186 valid.sort_by(|a, b| a.partial_cmp(b).unwrap());
187 let n = valid.len();
188 if n.is_multiple_of(2) {
189 Some((valid[n / 2 - 1] + valid[n / 2]) / 2.0)
190 } else {
191 Some(valid[n / 2])
192 }
193}
194
195/// Returns the p-quantile of values in a sorted slice.
196///
197/// The slice must be sorted. The parameter `p` should be in `[0, 1]`.
198///
199/// # Example
200///
201/// ```
202/// use d3rs::array::quantile_sorted;
203///
204/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
205/// assert_eq!(quantile_sorted(&data, 0.5), Some(3.0));
206/// assert_eq!(quantile_sorted(&data, 0.25), Some(2.0));
207/// ```
208pub fn quantile_sorted(data: &[f64], p: f64) -> Option<f64> {
209 if data.is_empty() || !(0.0..=1.0).contains(&p) {
210 return None;
211 }
212 if data.len() == 1 {
213 return Some(data[0]);
214 }
215 let n = data.len();
216 let i = (n - 1) as f64 * p;
217 let i0 = i.floor() as usize;
218 let i1 = i.ceil() as usize;
219 let v0 = data[i0];
220 let v1 = data[i1];
221 Some(v0 + (v1 - v0) * (i - i0 as f64))
222}
223
224/// Returns the p-quantile of values in a mutable slice.
225///
226/// The slice will be partially reordered.
227///
228/// # Example
229///
230/// ```
231/// use d3rs::array::quantile;
232///
233/// let mut data = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0, 2.0, 6.0];
234/// assert_eq!(quantile(&mut data, 0.5), Some(3.5));
235/// ```
236pub fn quantile(data: &mut [f64], p: f64) -> Option<f64> {
237 if data.is_empty() || !(0.0..=1.0).contains(&p) {
238 return None;
239 }
240 // Filter out NaN values
241 let mut valid: Vec<f64> = data.iter().copied().filter(|x| !x.is_nan()).collect();
242 if valid.is_empty() {
243 return None;
244 }
245 valid.sort_by(|a, b| a.partial_cmp(b).unwrap());
246 quantile_sorted(&valid, p)
247}
248
249/// Returns the sample variance of values in the slice.
250///
251/// Uses Bessel's correction (divides by n-1).
252/// Returns `None` if the slice has fewer than 2 elements.
253///
254/// # Example
255///
256/// ```
257/// use d3rs::array::variance;
258///
259/// let data = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
260/// let var = variance(&data).unwrap();
261/// assert!((var - 4.571428571428571).abs() < 1e-10);
262/// ```
263pub fn variance(data: &[f64]) -> Option<f64> {
264 if data.len() < 2 {
265 return None;
266 }
267 let m = mean(data)?;
268 let sum_sq: f64 = data.iter().map(|x| (x - m).powi(2)).sum();
269 Some(sum_sq / (data.len() - 1) as f64)
270}
271
272/// Returns the sample standard deviation of values in the slice.
273///
274/// Uses Bessel's correction (divides by n-1).
275/// Returns `None` if the slice has fewer than 2 elements.
276///
277/// # Example
278///
279/// ```
280/// use d3rs::array::deviation;
281///
282/// let data = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
283/// let dev = deviation(&data).unwrap();
284/// assert!((dev - 2.138089935299395).abs() < 1e-10);
285/// ```
286pub fn deviation(data: &[f64]) -> Option<f64> {
287 variance(data).map(|v| v.sqrt())
288}
289
290/// Returns the cumulative sum of values in the slice.
291///
292/// # Example
293///
294/// ```
295/// use d3rs::array::cumsum;
296///
297/// let data = vec![1.0, 2.0, 3.0, 4.0];
298/// assert_eq!(cumsum(&data), vec![1.0, 3.0, 6.0, 10.0]);
299/// ```
300pub fn cumsum(data: &[f64]) -> Vec<f64> {
301 let mut result = Vec::with_capacity(data.len());
302 let mut acc = 0.0;
303 for &x in data {
304 acc += x;
305 result.push(acc);
306 }
307 result
308}
309
310/// Returns the index of the minimum value in the slice.
311///
312/// # Example
313///
314/// ```
315/// use d3rs::array::min_index;
316///
317/// let data = vec![3, 1, 4, 1, 5, 9];
318/// assert_eq!(min_index(&data), Some(1));
319/// ```
320pub fn min_index<T: Ord>(data: &[T]) -> Option<usize> {
321 if data.is_empty() {
322 return None;
323 }
324 let mut min_idx = 0;
325 for (i, v) in data.iter().enumerate().skip(1) {
326 if v < &data[min_idx] {
327 min_idx = i;
328 }
329 }
330 Some(min_idx)
331}
332
333/// Returns the index of the maximum value in the slice.
334///
335/// # Example
336///
337/// ```
338/// use d3rs::array::max_index;
339///
340/// let data = vec![3, 1, 4, 1, 5, 9];
341/// assert_eq!(max_index(&data), Some(5));
342/// ```
343pub fn max_index<T: Ord>(data: &[T]) -> Option<usize> {
344 if data.is_empty() {
345 return None;
346 }
347 let mut max_idx = 0;
348 for (i, v) in data.iter().enumerate().skip(1) {
349 if v > &data[max_idx] {
350 max_idx = i;
351 }
352 }
353 Some(max_idx)
354}
355
356/// Returns the count of values in the slice that satisfy the predicate.
357///
358/// # Example
359///
360/// ```
361/// use d3rs::array::count;
362///
363/// let data = vec![1, 2, 3, 4, 5, 6];
364/// assert_eq!(count(&data, |x| x % 2 == 0), 3);
365/// ```
366pub fn count<T, F>(data: &[T], predicate: F) -> usize
367where
368 F: Fn(&T) -> bool,
369{
370 data.iter().filter(|x| predicate(x)).count()
371}
372
373#[cfg(test)]
374mod tests {
375 use super::*;
376
377 #[test]
378 fn test_min_max() {
379 let data = vec![3, 1, 4, 1, 5, 9, 2, 6];
380 assert_eq!(min(&data), Some(&1));
381 assert_eq!(max(&data), Some(&9));
382 }
383
384 #[test]
385 fn test_extent() {
386 let data = vec![3, 1, 4, 1, 5, 9, 2, 6];
387 assert_eq!(extent(&data), Some((&1, &9)));
388 assert_eq!(extent::<i32>(&[]), None);
389 }
390
391 #[test]
392 fn test_sum() {
393 let data = vec![1, 2, 3, 4, 5];
394 assert_eq!(sum(&data), 15);
395 }
396
397 #[test]
398 fn test_mean() {
399 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
400 assert_eq!(mean(&data), Some(3.0));
401 assert_eq!(mean(&[]), None);
402 }
403
404 #[test]
405 fn test_median() {
406 let mut odd = vec![3.0, 1.0, 4.0, 1.0, 5.0];
407 assert_eq!(median(&mut odd), Some(3.0));
408
409 let mut even = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0];
410 assert_eq!(median(&mut even), Some(3.5));
411 }
412
413 #[test]
414 fn test_quantile() {
415 let mut data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
416 assert_eq!(quantile(&mut data, 0.0), Some(1.0));
417 assert_eq!(quantile(&mut data, 0.5), Some(3.0));
418 assert_eq!(quantile(&mut data, 1.0), Some(5.0));
419 }
420
421 #[test]
422 fn test_variance_deviation() {
423 let data = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
424 let var = variance(&data).unwrap();
425 let dev = deviation(&data).unwrap();
426 assert!((var - 4.571428571428571).abs() < 1e-10);
427 assert!((dev - 2.138089935299395).abs() < 1e-10);
428 }
429
430 #[test]
431 fn test_cumsum() {
432 let data = vec![1.0, 2.0, 3.0, 4.0];
433 assert_eq!(cumsum(&data), vec![1.0, 3.0, 6.0, 10.0]);
434 }
435
436 #[test]
437 fn test_min_max_index() {
438 let data = vec![3, 1, 4, 1, 5, 9, 2, 6];
439 assert_eq!(min_index(&data), Some(1));
440 assert_eq!(max_index(&data), Some(5));
441 }
442
443 #[test]
444 fn test_count() {
445 let data = vec![1, 2, 3, 4, 5, 6];
446 assert_eq!(count(&data, |x| x % 2 == 0), 3);
447 assert_eq!(count(&data, |x| *x > 10), 0);
448 }
449}