linreg_core/stats.rs
1//! Basic statistical utility functions.
2//!
3//! This module provides fundamental descriptive statistics operations
4//! including measures of central tendency, dispersion, and position.
5
6#![allow(clippy::needless_range_loop)]
7
8/// Calculates the arithmetic mean (average) of a slice of f64 values.
9///
10/// # Arguments
11///
12/// * `data` - Slice of f64 values
13///
14/// # Returns
15///
16/// The mean as f64, or NaN if the slice is empty
17///
18/// # Examples
19///
20/// ```rust
21/// use linreg_core::stats::mean;
22///
23/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
24/// assert_eq!(mean(&data), 3.0);
25/// ```
26pub fn mean(data: &[f64]) -> f64 {
27 if data.is_empty() {
28 return f64::NAN;
29 }
30 let sum: f64 = data.iter().sum();
31 sum / data.len() as f64
32}
33
34/// Calculates the sample variance of a slice of f64 values.
35///
36/// Uses the (n-1) denominator for unbiased sample variance estimation.
37///
38/// This implementation uses **Welford's online algorithm** for numerical
39/// stability, avoiding catastrophic cancellation that can occur with the
40/// two-pass approach when values have large magnitude.
41///
42/// # Arguments
43///
44/// * `data` - Slice of f64 values
45///
46/// # Returns
47///
48/// The variance as f64, or NaN if the slice has fewer than 2 elements
49///
50/// # Examples
51///
52/// ```rust
53/// use linreg_core::stats::variance;
54///
55/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
56/// let v = variance(&data);
57/// assert!((v - 2.5).abs() < 1e-10);
58/// ```
59pub fn variance(data: &[f64]) -> f64 {
60 let n = data.len();
61 if n < 2 {
62 return f64::NAN;
63 }
64
65 // Welford's online algorithm for numerical stability
66 let mut mean = data[0];
67 let mut m2 = 0.0;
68
69 for i in 1..n {
70 let x = data[i];
71 let delta = x - mean;
72 mean += delta / (i + 1) as f64;
73 let delta_new = x - mean;
74 m2 += delta * delta_new;
75 }
76
77 m2 / (n - 1) as f64
78}
79
80/// Calculates the population variance of a slice of f64 values.
81///
82/// Uses the n denominator (for when data represents the entire population).
83///
84/// This implementation uses **Welford's online algorithm** for numerical
85/// stability, avoiding catastrophic cancellation that can occur with the
86/// two-pass approach when values have large magnitude.
87///
88/// # Arguments
89///
90/// * `data` - Slice of f64 values
91///
92/// # Returns
93///
94/// The population variance as f64, or NaN if the slice is empty
95///
96/// # Examples
97///
98/// ```rust
99/// use linreg_core::stats::variance_population;
100///
101/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
102/// let v = variance_population(&data);
103/// assert!((v - 2.0).abs() < 1e-10);
104/// ```
105pub fn variance_population(data: &[f64]) -> f64 {
106 let n = data.len();
107 if n == 0 {
108 return f64::NAN;
109 }
110
111 // Welford's online algorithm for numerical stability
112 let mut mean = data[0];
113 let mut m2 = 0.0;
114
115 for i in 1..n {
116 let x = data[i];
117 let delta = x - mean;
118 mean += delta / (i + 1) as f64;
119 let delta_new = x - mean;
120 m2 += delta * delta_new;
121 }
122
123 m2 / n as f64
124}
125
126/// Calculates the sample standard deviation of a slice of f64 values.
127///
128/// Uses the (n-1) denominator for unbiased estimation.
129///
130/// # Arguments
131///
132/// * `data` - Slice of f64 values
133///
134/// # Returns
135///
136/// The standard deviation as f64, or NaN if the slice has fewer than 2 elements
137///
138/// # Examples
139///
140/// ```rust
141/// use linreg_core::stats::stddev;
142///
143/// let data = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
144/// let s = stddev(&data);
145/// assert!((s - 2.138089935).abs() < 1e-9);
146/// ```
147pub fn stddev(data: &[f64]) -> f64 {
148 variance(data).sqrt()
149}
150
151/// Calculates the population standard deviation of a slice of f64 values.
152///
153/// Uses the n denominator (for when data represents the entire population).
154///
155/// # Arguments
156///
157/// * `data` - Slice of f64 values
158///
159/// # Returns
160///
161/// The population standard deviation as f64, or NaN if the slice is empty
162///
163/// # Examples
164///
165/// ```rust
166/// use linreg_core::stats::stddev_population;
167///
168/// let data = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
169/// let s = stddev_population(&data);
170/// assert!((s - 2.0).abs() < 1e-9);
171/// ```
172pub fn stddev_population(data: &[f64]) -> f64 {
173 variance_population(data).sqrt()
174}
175
176/// Calculates the median of a slice of f64 values.
177///
178/// # Arguments
179///
180/// * `data` - Slice of f64 values
181///
182/// # Returns
183///
184/// The median as f64, or NaN if the slice is empty
185///
186/// # Examples
187///
188/// ```rust
189/// use linreg_core::stats::median;
190///
191/// let odd = vec![1.0, 3.0, 5.0];
192/// assert_eq!(median(&odd), 3.0);
193///
194/// let even = vec![1.0, 2.0, 3.0, 4.0];
195/// assert_eq!(median(&even), 2.5);
196/// ```
197pub fn median(data: &[f64]) -> f64 {
198 if data.is_empty() {
199 return f64::NAN;
200 }
201
202 let mut sorted = data.to_vec();
203 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
204
205 let len = sorted.len();
206 if len.is_multiple_of(2) {
207 (sorted[len / 2 - 1] + sorted[len / 2]) / 2.0
208 } else {
209 sorted[len / 2]
210 }
211}
212
213/// Calculates a quantile of a slice of f64 values using linear interpolation.
214///
215/// # Arguments
216///
217/// * `data` - Slice of f64 values
218/// * `q` - Quantile to calculate (0.0 to 1.0)
219///
220/// # Returns
221///
222/// The quantile value as f64, or NaN if the slice is empty or q is out of range
223///
224/// # Examples
225///
226/// ```rust
227/// use linreg_core::stats::quantile;
228///
229/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
230/// let q25 = quantile(&data, 0.25);
231/// let q50 = quantile(&data, 0.50);
232/// let q75 = quantile(&data, 0.75);
233/// assert_eq!(q50, 5.0);
234/// ```
235pub fn quantile(data: &[f64], q: f64) -> f64 {
236 if data.is_empty() || !(0.0..=1.0).contains(&q) {
237 return f64::NAN;
238 }
239
240 let mut sorted = data.to_vec();
241 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
242
243 let n = sorted.len();
244 let index = q * (n - 1) as f64;
245 let lower = index.floor() as usize;
246 let upper = index.ceil() as usize;
247
248 if lower == upper {
249 sorted[lower]
250 } else {
251 let weight = index - lower as f64;
252 sorted[lower] * (1.0 - weight) + sorted[upper] * weight
253 }
254}
255
256/// Calculates the sum of a slice of f64 values.
257///
258/// # Arguments
259///
260/// * `data` - Slice of f64 values
261///
262/// # Returns
263///
264/// The sum as f64 (0.0 for empty slice)
265///
266/// # Examples
267///
268/// ```rust
269/// use linreg_core::stats::sum;
270///
271/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
272/// assert_eq!(sum(&data), 15.0);
273/// ```
274pub fn sum(data: &[f64]) -> f64 {
275 data.iter().sum()
276}
277
278/// Finds the minimum value in a slice of f64 values.
279///
280/// # Arguments
281///
282/// * `data` - Slice of f64 values
283///
284/// # Returns
285///
286/// The minimum value as f64, or NaN if the slice is empty
287///
288/// # Examples
289///
290/// ```rust
291/// use linreg_core::stats::min;
292///
293/// let data = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0];
294/// assert_eq!(min(&data), 1.0);
295/// ```
296pub fn min(data: &[f64]) -> f64 {
297 if data.is_empty() {
298 return f64::NAN;
299 }
300 data.iter().fold(f64::INFINITY, |a, &b| a.min(b))
301}
302
303/// Finds the maximum value in a slice of f64 values.
304///
305/// # Arguments
306///
307/// * `data` - Slice of f64 values
308///
309/// # Returns
310///
311/// The maximum value as f64, or NaN if the slice is empty
312///
313/// # Examples
314///
315/// ```rust
316/// use linreg_core::stats::max;
317///
318/// let data = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0];
319/// assert_eq!(max(&data), 9.0);
320/// ```
321pub fn max(data: &[f64]) -> f64 {
322 if data.is_empty() {
323 return f64::NAN;
324 }
325 data.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b))
326}
327
328/// Calculates the range (max - min) of a slice of f64 values.
329///
330/// # Arguments
331///
332/// * `data` - Slice of f64 values
333///
334/// # Returns
335///
336/// The range as f64, or NaN if the slice is empty
337///
338/// # Examples
339///
340/// ```rust
341/// use linreg_core::stats::range;
342///
343/// let data = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0];
344/// assert_eq!(range(&data), 8.0); // 9.0 - 1.0
345/// ```
346pub fn range(data: &[f64]) -> f64 {
347 max(data) - min(data)
348}
349
350/// Result of the mode calculation.
351///
352/// For continuous or near-continuous data, returns all values that appear
353/// most frequently. For discrete data with a clear mode, returns a single value.
354#[derive(Debug, Clone, PartialEq, serde::Serialize)]
355pub struct ModeResult {
356 /// The mode value(s). May contain multiple values if there are ties.
357 pub modes: Vec<f64>,
358 /// The frequency of the mode(s).
359 pub frequency: usize,
360 /// Total number of unique values in the dataset.
361 pub unique_count: usize,
362}
363
364/// Calculates the mode(s) of a slice of f64 values.
365///
366/// The mode is the value that appears most frequently in the data. This function
367/// handles ties by returning all modes and works with both discrete and continuous data.
368///
369/// # Arguments
370///
371/// * `data` - Slice of f64 values
372///
373/// # Returns
374///
375/// A `ModeResult` containing the mode(s), their frequency, and the count of unique values.
376/// Returns `None` if the slice is empty.
377///
378/// # Examples
379///
380/// ```rust
381/// use linreg_core::stats::mode;
382///
383/// let single_mode = vec![1.0, 2.0, 2.0, 3.0, 4.0];
384/// let result = mode(&single_mode).unwrap();
385/// assert_eq!(result.modes, vec![2.0]);
386/// assert_eq!(result.frequency, 2);
387///
388/// let multi_mode = vec![1.0, 1.0, 2.0, 2.0, 3.0];
389/// let result = mode(&multi_mode).unwrap();
390/// assert_eq!(result.modes, vec![1.0, 2.0]);
391/// assert_eq!(result.frequency, 2);
392/// ```
393pub fn mode(data: &[f64]) -> Option<ModeResult> {
394 if data.is_empty() {
395 return None;
396 }
397
398 // Filter out NaN values and sort for frequency counting
399 let mut sorted_data: Vec<f64> = data
400 .iter()
401 .filter(|&&v| !v.is_nan())
402 .copied()
403 .collect();
404
405 if sorted_data.is_empty() {
406 // All values were NaN
407 return None;
408 }
409
410 sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
411
412 // Count frequencies by iterating through sorted data
413 let mut frequencies: Vec<(f64, usize)> = Vec::new();
414 let mut current_value = sorted_data[0];
415 let mut count = 1;
416
417 for &value in &sorted_data[1..] {
418 // Use partial_cmp to handle f64 comparison
419 if (value - current_value).abs() < f64::EPSILON {
420 count += 1;
421 } else {
422 frequencies.push((current_value, count));
423 current_value = value;
424 count = 1;
425 }
426 }
427 // Don't forget the last value
428 frequencies.push((current_value, count));
429
430 if frequencies.is_empty() {
431 return None;
432 }
433
434 // Find the maximum frequency
435 let max_frequency = frequencies.iter().map(|&(_, freq)| freq).max().unwrap();
436
437 // Collect all values with the maximum frequency
438 let modes: Vec<f64> = frequencies
439 .into_iter()
440 .filter_map(|(value, freq)| {
441 if freq == max_frequency {
442 Some(value)
443 } else {
444 None
445 }
446 })
447 .collect();
448
449 // Count unique values
450 let unique_count = {
451 let mut count = 1;
452 for i in 1..sorted_data.len() {
453 if (sorted_data[i] - sorted_data[i - 1]).abs() > f64::EPSILON {
454 count += 1;
455 }
456 }
457 count
458 };
459
460 Some(ModeResult {
461 modes,
462 frequency: max_frequency,
463 unique_count,
464 })
465}
466
467/// Calculates the five-number summary of a slice of f64 values.
468///
469/// The five-number summary consists of:
470/// - Minimum
471/// - First quartile (Q1, 25th percentile)
472/// - Median (Q2, 50th percentile)
473/// - Third quartile (Q3, 75th percentile)
474/// - Maximum
475///
476/// This is also known as the "boxplot summary" as these values are used to create box plots.
477///
478/// # Arguments
479///
480/// * `data` - Slice of f64 values
481///
482/// # Returns
483///
484/// A `FiveNumberSummary` struct containing the summary statistics, or `None` if the slice is empty.
485///
486/// # Examples
487///
488/// ```rust
489/// use linreg_core::stats::five_number_summary;
490///
491/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
492/// let summary = five_number_summary(&data).unwrap();
493/// assert_eq!(summary.min, 1.0);
494/// assert_eq!(summary.max, 9.0);
495/// assert_eq!(summary.median, 5.0);
496/// ```
497pub fn five_number_summary(data: &[f64]) -> Option<FiveNumberSummary> {
498 if data.is_empty() {
499 return None;
500 }
501
502 let min_val = min(data);
503 let max_val = max(data);
504 let q1 = quantile(data, 0.25);
505 let median_val = median(data);
506 let q3 = quantile(data, 0.75);
507
508 Some(FiveNumberSummary {
509 min: min_val,
510 q1,
511 median: median_val,
512 q3,
513 max: max_val,
514 })
515}
516
517/// Five-number summary statistics.
518///
519/// Contains the minimum, first quartile, median, third quartile, and maximum
520/// of a dataset. Used for box plots and exploratory data analysis.
521#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize)]
522pub struct FiveNumberSummary {
523 /// Minimum value
524 pub min: f64,
525 /// First quartile (25th percentile)
526 pub q1: f64,
527 /// Median (50th percentile)
528 pub median: f64,
529 /// Third quartile (75th percentile)
530 pub q3: f64,
531 /// Maximum value
532 pub max: f64,
533}
534
535impl FiveNumberSummary {
536 /// Calculates the interquartile range (IQR).
537 ///
538 /// The IQR is Q3 - Q1 and represents the spread of the middle 50% of the data.
539 pub fn iqr(&self) -> f64 {
540 self.q3 - self.q1
541 }
542
543 /// Determines if a value is an outlier using the 1.5*IQR rule.
544 ///
545 /// A value is considered an outlier if it is below Q1 - 1.5*IQR
546 /// or above Q3 + 1.5*IQR.
547 pub fn is_outlier(&self, value: f64) -> bool {
548 let iqr = self.iqr();
549 let lower_fence = self.q1 - 1.5 * iqr;
550 let upper_fence = self.q3 + 1.5 * iqr;
551 value < lower_fence || value > upper_fence
552 }
553}
554
555/// Calculates the correlation coefficient (Pearson's r) between two slices.
556///
557/// This implementation uses a **numerically stable single-pass algorithm**
558/// that avoids catastrophic cancellation, similar to Welford's method.
559/// It computes mean, variance, and covariance in one pass.
560///
561/// # Arguments
562///
563/// * `x` - First slice of f64 values
564/// * `y` - Second slice of f64 values (must be same length as x)
565///
566/// # Returns
567///
568/// The correlation coefficient as f64, or NaN if inputs are invalid
569///
570/// # Examples
571///
572/// ```rust
573/// use linreg_core::stats::correlation;
574///
575/// let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
576/// let y = vec![2.0, 4.0, 5.0, 4.0, 5.0];
577/// let r = correlation(&x, &y);
578/// assert!((r - 0.7746).abs() < 1e-4);
579/// ```
580pub fn correlation(x: &[f64], y: &[f64]) -> f64 {
581 if x.len() != y.len() || x.len() < 2 {
582 return f64::NAN;
583 }
584
585 let n = x.len();
586
587 // Numerically stable single-pass algorithm
588 // Tracks: mean_x, mean_y, m2_x, m2_y (variances), and covariance
589 let mut mean_x = x[0];
590 let mut mean_y = y[0];
591 let mut m2_x = 0.0;
592 let mut m2_y = 0.0;
593 let mut cov = 0.0;
594
595 for i in 1..n {
596 let xi = x[i];
597 let yi = y[i];
598 let delta_x = xi - mean_x;
599 let delta_y = yi - mean_y;
600
601 let i_inv = 1.0 / (i + 1) as f64;
602 mean_x += delta_x * i_inv;
603 mean_y += delta_y * i_inv;
604
605 let delta_x_new = xi - mean_x;
606 let delta_y_new = yi - mean_y;
607
608 m2_x += delta_x * delta_x_new;
609 m2_y += delta_y * delta_y_new;
610 cov += delta_x * delta_y_new;
611 }
612
613 let denom = (m2_x * m2_y).sqrt();
614 if denom == 0.0 {
615 return f64::NAN;
616 }
617
618 cov / denom
619}
620
621#[cfg(test)]
622mod tests {
623 use super::*;
624
625 #[test]
626 fn test_mean() {
627 assert_eq!(mean(&[1.0, 2.0, 3.0, 4.0, 5.0]), 3.0);
628 assert!(mean(&[]).is_nan());
629 }
630
631 #[test]
632 fn test_variance() {
633 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
634 let v = variance(&data);
635 assert!((v - 2.5).abs() < 1e-10);
636 assert!(variance(&[1.0]).is_nan());
637 }
638
639 #[test]
640 fn test_variance_population() {
641 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
642 let v = variance_population(&data);
643 assert!((v - 2.0).abs() < 1e-10);
644 }
645
646 #[test]
647 fn test_stddev() {
648 let data = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
649 let s = stddev(&data);
650 assert!((s - 2.138089935).abs() < 1e-9);
651 }
652
653 #[test]
654 fn test_median() {
655 assert_eq!(median(&[1.0, 3.0, 5.0]), 3.0);
656 assert_eq!(median(&[1.0, 2.0, 3.0, 4.0]), 2.5);
657 assert!(median(&[]).is_nan());
658 }
659
660 #[test]
661 fn test_quantile() {
662 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
663 assert_eq!(quantile(&data, 0.0), 1.0);
664 assert_eq!(quantile(&data, 0.5), 5.0);
665 assert_eq!(quantile(&data, 1.0), 9.0);
666 }
667
668 #[test]
669 fn test_min_max() {
670 let data = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0];
671 assert_eq!(min(&data), 1.0);
672 assert_eq!(max(&data), 9.0);
673 }
674
675 #[test]
676 fn test_sum() {
677 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
678 assert_eq!(sum(&data), 15.0);
679 // Empty slice returns 0.0 (standard Rust behavior)
680 assert_eq!(sum(&[]), 0.0);
681 }
682
683 #[test]
684 fn test_range() {
685 let data = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0];
686 assert_eq!(range(&data), 8.0); // 9.0 - 1.0
687 // Empty slice returns NaN (max - min = NaN - NaN)
688 assert!(range(&[]).is_nan());
689 }
690
691 #[test]
692 fn test_correlation() {
693 let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
694 let y = vec![2.0, 4.0, 5.0, 4.0, 5.0];
695 let r = correlation(&x, &y);
696 // Correct value: r = 6/sqrt(60) ≈ 0.7746
697 assert!((r - 0.7746).abs() < 1e-4);
698 }
699
700 // ============================================================================
701 // NaN/Inf Handling Tests
702 // ============================================================================
703
704 #[test]
705 fn test_mean_with_nan() {
706 let data = vec![1.0, f64::NAN, 3.0, 4.0, 5.0];
707 // mean propagates NaN (standard Rust behavior for sum)
708 assert!(mean(&data).is_nan());
709 }
710
711 #[test]
712 fn test_mean_with_inf() {
713 let data = vec![1.0, 2.0, f64::INFINITY, 4.0, 5.0];
714 assert_eq!(mean(&data), f64::INFINITY);
715
716 let data2 = vec![1.0, 2.0, f64::NEG_INFINITY, 4.0, 5.0];
717 assert_eq!(mean(&data2), f64::NEG_INFINITY);
718 }
719
720 #[test]
721 fn test_variance_with_nan() {
722 let data = vec![1.0, f64::NAN, 3.0, 4.0, 5.0];
723 assert!(variance(&data).is_nan());
724 }
725
726 #[test]
727 fn test_variance_with_inf() {
728 let data = vec![1.0, 2.0, f64::INFINITY, 4.0, 5.0];
729 // Variance with INF should be NaN or INF depending on calculation
730 let v = variance(&data);
731 assert!(v.is_nan() || v.is_infinite());
732 }
733
734 #[test]
735 fn test_correlation_with_nan() {
736 let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
737 let y = vec![2.0, f64::NAN, 5.0, 4.0, 5.0];
738 // correlation with NaN in data returns NaN
739 assert!(correlation(&x, &y).is_nan());
740 }
741
742 #[test]
743 fn test_correlation_with_inf() {
744 let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
745 let y = vec![2.0, f64::INFINITY, 5.0, 4.0, 5.0];
746 // correlation with INF in data returns NaN
747 let r = correlation(&x, &y);
748 assert!(r.is_nan() || r.is_infinite());
749 }
750
751 #[test]
752 fn test_correlation_single_value() {
753 let x = vec![1.0];
754 let y = vec![2.0];
755 // Single value arrays should return NaN (undefined correlation)
756 assert!(correlation(&x, &y).is_nan());
757 }
758
759 #[test]
760 fn test_correlation_mismatched_lengths() {
761 let x = vec![1.0, 2.0, 3.0];
762 let y = vec![1.0, 2.0];
763 // Mismatched lengths should return NaN
764 assert!(correlation(&x, &y).is_nan());
765 }
766
767 #[test]
768 fn test_min_max_with_nan() {
769 let data = vec![3.0, 1.0, f64::NAN, 4.0, 5.0];
770 // min/max ignore NaN in fold comparison
771 assert_eq!(min(&data), 1.0);
772 assert_eq!(max(&data), 5.0);
773 }
774
775 #[test]
776 fn test_min_max_with_inf() {
777 let data = vec![3.0, 1.0, f64::INFINITY, 4.0, 5.0];
778 assert_eq!(min(&data), 1.0);
779 assert_eq!(max(&data), f64::INFINITY);
780 }
781
782 #[test]
783 fn test_median_with_inf() {
784 let data = vec![1.0, 2.0, f64::INFINITY, 4.0, 5.0];
785 // INF sorts to the end
786 assert_eq!(median(&data), 4.0);
787 }
788
789 #[test]
790 fn test_stddev_single_value() {
791 // Single element should return NaN (undefined sample stddev)
792 assert!(stddev(&[1.0]).is_nan());
793 }
794
795 #[test]
796 fn test_stddev_population_single_value() {
797 // Single element population stddev should be 0
798 assert_eq!(stddev_population(&[1.0]), 0.0);
799 }
800
801 // ============================================================================
802 // Mode Tests
803 // ============================================================================
804
805 #[test]
806 fn test_mode_single() {
807 let data = vec![1.0, 2.0, 2.0, 3.0, 4.0];
808 let result = mode(&data).unwrap();
809 assert_eq!(result.modes, vec![2.0]);
810 assert_eq!(result.frequency, 2);
811 }
812
813 #[test]
814 fn test_mode_multiple() {
815 let data = vec![1.0, 1.0, 2.0, 2.0, 3.0];
816 let result = mode(&data).unwrap();
817 assert_eq!(result.modes, vec![1.0, 2.0]);
818 assert_eq!(result.frequency, 2);
819 }
820
821 #[test]
822 fn test_mode_all_unique() {
823 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
824 let result = mode(&data).unwrap();
825 // All values are modes with frequency 1
826 assert_eq!(result.modes.len(), 5);
827 assert_eq!(result.frequency, 1);
828 }
829
830 #[test]
831 fn test_mode_with_nan() {
832 let data = vec![1.0, f64::NAN, 2.0, 2.0, 3.0];
833 let result = mode(&data).unwrap();
834 assert_eq!(result.modes, vec![2.0]);
835 assert_eq!(result.frequency, 2);
836 }
837
838 #[test]
839 fn test_mode_empty() {
840 assert!(mode(&[]).is_none());
841 }
842
843 #[test]
844 fn test_mode_all_nan() {
845 let data = vec![f64::NAN, f64::NAN, f64::NAN];
846 assert!(mode(&data).is_none());
847 }
848
849 // ============================================================================
850 // Five-Number Summary Tests
851 // ============================================================================
852
853 #[test]
854 fn test_five_number_summary_basic() {
855 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
856 let summary = five_number_summary(&data).unwrap();
857 assert_eq!(summary.min, 1.0);
858 assert_eq!(summary.max, 9.0);
859 assert_eq!(summary.median, 5.0);
860 // Q1 and Q3 use linear interpolation
861 assert!((summary.q1 - 3.0).abs() < 0.1);
862 assert!((summary.q3 - 7.0).abs() < 0.1);
863 }
864
865 #[test]
866 fn test_five_number_summary_iqr() {
867 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
868 let summary = five_number_summary(&data).unwrap();
869 let iqr = summary.iqr();
870 assert!(iqr > 0.0);
871 assert_eq!(iqr, summary.q3 - summary.q1);
872 }
873
874 #[test]
875 fn test_five_number_summary_outlier_detection() {
876 let data: Vec<f64> = (1..=20).map(|v| v as f64).collect();
877 let summary = five_number_summary(&data).unwrap();
878
879 // 50 should be an outlier (well above the upper fence)
880 assert!(summary.is_outlier(50.0));
881
882 // 10 should not be an outlier
883 assert!(!summary.is_outlier(10.0));
884 }
885
886 #[test]
887 fn test_five_number_summary_empty() {
888 assert!(five_number_summary(&[]).is_none());
889 }
890
891 #[test]
892 fn test_five_number_summary_single_value() {
893 let data = vec![5.0];
894 let summary = five_number_summary(&data).unwrap();
895 assert_eq!(summary.min, 5.0);
896 assert_eq!(summary.max, 5.0);
897 assert_eq!(summary.median, 5.0);
898 // Q1 and Q3 equal the single value
899 assert_eq!(summary.q1, 5.0);
900 assert_eq!(summary.q3, 5.0);
901 }
902}