linreg_core/
stats.rs

1//! Basic statistical utility functions.
2//!
3//! This module provides fundamental descriptive statistics operations
4//! including measures of central tendency, dispersion, and position.
5
6#![allow(clippy::needless_range_loop)]
7
8/// Calculates the arithmetic mean (average) of a slice of f64 values.
9///
10/// # Arguments
11///
12/// * `data` - Slice of f64 values
13///
14/// # Returns
15///
16/// The mean as f64, or NaN if the slice is empty
17///
18/// # Examples
19///
20/// ```rust
21/// use linreg_core::stats::mean;
22///
23/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
24/// assert_eq!(mean(&data), 3.0);
25/// ```
26pub fn mean(data: &[f64]) -> f64 {
27    if data.is_empty() {
28        return f64::NAN;
29    }
30    let sum: f64 = data.iter().sum();
31    sum / data.len() as f64
32}
33
34/// Calculates the sample variance of a slice of f64 values.
35///
36/// Uses the (n-1) denominator for unbiased sample variance estimation.
37///
38/// This implementation uses **Welford's online algorithm** for numerical
39/// stability, avoiding catastrophic cancellation that can occur with the
40/// two-pass approach when values have large magnitude.
41///
42/// # Arguments
43///
44/// * `data` - Slice of f64 values
45///
46/// # Returns
47///
48/// The variance as f64, or NaN if the slice has fewer than 2 elements
49///
50/// # Examples
51///
52/// ```rust
53/// use linreg_core::stats::variance;
54///
55/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
56/// let v = variance(&data);
57/// assert!((v - 2.5).abs() < 1e-10);
58/// ```
59pub fn variance(data: &[f64]) -> f64 {
60    let n = data.len();
61    if n < 2 {
62        return f64::NAN;
63    }
64
65    // Welford's online algorithm for numerical stability
66    let mut mean = data[0];
67    let mut m2 = 0.0;
68
69    for i in 1..n {
70        let x = data[i];
71        let delta = x - mean;
72        mean += delta / (i + 1) as f64;
73        let delta_new = x - mean;
74        m2 += delta * delta_new;
75    }
76
77    m2 / (n - 1) as f64
78}
79
80/// Calculates the population variance of a slice of f64 values.
81///
82/// Uses the n denominator (for when data represents the entire population).
83///
84/// This implementation uses **Welford's online algorithm** for numerical
85/// stability, avoiding catastrophic cancellation that can occur with the
86/// two-pass approach when values have large magnitude.
87///
88/// # Arguments
89///
90/// * `data` - Slice of f64 values
91///
92/// # Returns
93///
94/// The population variance as f64, or NaN if the slice is empty
95///
96/// # Examples
97///
98/// ```rust
99/// use linreg_core::stats::variance_population;
100///
101/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
102/// let v = variance_population(&data);
103/// assert!((v - 2.0).abs() < 1e-10);
104/// ```
105pub fn variance_population(data: &[f64]) -> f64 {
106    let n = data.len();
107    if n == 0 {
108        return f64::NAN;
109    }
110
111    // Welford's online algorithm for numerical stability
112    let mut mean = data[0];
113    let mut m2 = 0.0;
114
115    for i in 1..n {
116        let x = data[i];
117        let delta = x - mean;
118        mean += delta / (i + 1) as f64;
119        let delta_new = x - mean;
120        m2 += delta * delta_new;
121    }
122
123    m2 / n as f64
124}
125
126/// Calculates the sample standard deviation of a slice of f64 values.
127///
128/// Uses the (n-1) denominator for unbiased estimation.
129///
130/// # Arguments
131///
132/// * `data` - Slice of f64 values
133///
134/// # Returns
135///
136/// The standard deviation as f64, or NaN if the slice has fewer than 2 elements
137///
138/// # Examples
139///
140/// ```rust
141/// use linreg_core::stats::stddev;
142///
143/// let data = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
144/// let s = stddev(&data);
145/// assert!((s - 2.138089935).abs() < 1e-9);
146/// ```
147pub fn stddev(data: &[f64]) -> f64 {
148    variance(data).sqrt()
149}
150
151/// Calculates the population standard deviation of a slice of f64 values.
152///
153/// Uses the n denominator (for when data represents the entire population).
154///
155/// # Arguments
156///
157/// * `data` - Slice of f64 values
158///
159/// # Returns
160///
161/// The population standard deviation as f64, or NaN if the slice is empty
162///
163/// # Examples
164///
165/// ```rust
166/// use linreg_core::stats::stddev_population;
167///
168/// let data = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
169/// let s = stddev_population(&data);
170/// assert!((s - 2.0).abs() < 1e-9);
171/// ```
172pub fn stddev_population(data: &[f64]) -> f64 {
173    variance_population(data).sqrt()
174}
175
176/// Calculates the median of a slice of f64 values.
177///
178/// # Arguments
179///
180/// * `data` - Slice of f64 values
181///
182/// # Returns
183///
184/// The median as f64, or NaN if the slice is empty
185///
186/// # Examples
187///
188/// ```rust
189/// use linreg_core::stats::median;
190///
191/// let odd = vec![1.0, 3.0, 5.0];
192/// assert_eq!(median(&odd), 3.0);
193///
194/// let even = vec![1.0, 2.0, 3.0, 4.0];
195/// assert_eq!(median(&even), 2.5);
196/// ```
197pub fn median(data: &[f64]) -> f64 {
198    if data.is_empty() {
199        return f64::NAN;
200    }
201
202    let mut sorted = data.to_vec();
203    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
204
205    let len = sorted.len();
206    if len.is_multiple_of(2) {
207        (sorted[len / 2 - 1] + sorted[len / 2]) / 2.0
208    } else {
209        sorted[len / 2]
210    }
211}
212
213/// Calculates a quantile of a slice of f64 values using linear interpolation.
214///
215/// # Arguments
216///
217/// * `data` - Slice of f64 values
218/// * `q` - Quantile to calculate (0.0 to 1.0)
219///
220/// # Returns
221///
222/// The quantile value as f64, or NaN if the slice is empty or q is out of range
223///
224/// # Examples
225///
226/// ```rust
227/// use linreg_core::stats::quantile;
228///
229/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
230/// let q25 = quantile(&data, 0.25);
231/// let q50 = quantile(&data, 0.50);
232/// let q75 = quantile(&data, 0.75);
233/// assert_eq!(q50, 5.0);
234/// ```
235pub fn quantile(data: &[f64], q: f64) -> f64 {
236    if data.is_empty() || !(0.0..=1.0).contains(&q) {
237        return f64::NAN;
238    }
239
240    let mut sorted = data.to_vec();
241    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
242
243    let n = sorted.len();
244    let index = q * (n - 1) as f64;
245    let lower = index.floor() as usize;
246    let upper = index.ceil() as usize;
247
248    if lower == upper {
249        sorted[lower]
250    } else {
251        let weight = index - lower as f64;
252        sorted[lower] * (1.0 - weight) + sorted[upper] * weight
253    }
254}
255
256/// Calculates the sum of a slice of f64 values.
257///
258/// # Arguments
259///
260/// * `data` - Slice of f64 values
261///
262/// # Returns
263///
264/// The sum as f64 (0.0 for empty slice)
265///
266/// # Examples
267///
268/// ```rust
269/// use linreg_core::stats::sum;
270///
271/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
272/// assert_eq!(sum(&data), 15.0);
273/// ```
274pub fn sum(data: &[f64]) -> f64 {
275    data.iter().sum()
276}
277
278/// Finds the minimum value in a slice of f64 values.
279///
280/// # Arguments
281///
282/// * `data` - Slice of f64 values
283///
284/// # Returns
285///
286/// The minimum value as f64, or NaN if the slice is empty
287///
288/// # Examples
289///
290/// ```rust
291/// use linreg_core::stats::min;
292///
293/// let data = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0];
294/// assert_eq!(min(&data), 1.0);
295/// ```
296pub fn min(data: &[f64]) -> f64 {
297    if data.is_empty() {
298        return f64::NAN;
299    }
300    data.iter().fold(f64::INFINITY, |a, &b| a.min(b))
301}
302
303/// Finds the maximum value in a slice of f64 values.
304///
305/// # Arguments
306///
307/// * `data` - Slice of f64 values
308///
309/// # Returns
310///
311/// The maximum value as f64, or NaN if the slice is empty
312///
313/// # Examples
314///
315/// ```rust
316/// use linreg_core::stats::max;
317///
318/// let data = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0];
319/// assert_eq!(max(&data), 9.0);
320/// ```
321pub fn max(data: &[f64]) -> f64 {
322    if data.is_empty() {
323        return f64::NAN;
324    }
325    data.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b))
326}
327
328/// Calculates the range (max - min) of a slice of f64 values.
329///
330/// # Arguments
331///
332/// * `data` - Slice of f64 values
333///
334/// # Returns
335///
336/// The range as f64, or NaN if the slice is empty
337///
338/// # Examples
339///
340/// ```rust
341/// use linreg_core::stats::range;
342///
343/// let data = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0];
344/// assert_eq!(range(&data), 8.0);  // 9.0 - 1.0
345/// ```
346pub fn range(data: &[f64]) -> f64 {
347    max(data) - min(data)
348}
349
350/// Result of the mode calculation.
351///
352/// For continuous or near-continuous data, returns all values that appear
353/// most frequently. For discrete data with a clear mode, returns a single value.
354#[derive(Debug, Clone, PartialEq, serde::Serialize)]
355pub struct ModeResult {
356    /// The mode value(s). May contain multiple values if there are ties.
357    pub modes: Vec<f64>,
358    /// The frequency of the mode(s).
359    pub frequency: usize,
360    /// Total number of unique values in the dataset.
361    pub unique_count: usize,
362}
363
364/// Calculates the mode(s) of a slice of f64 values.
365///
366/// The mode is the value that appears most frequently in the data. This function
367/// handles ties by returning all modes and works with both discrete and continuous data.
368///
369/// # Arguments
370///
371/// * `data` - Slice of f64 values
372///
373/// # Returns
374///
375/// A `ModeResult` containing the mode(s), their frequency, and the count of unique values.
376/// Returns `None` if the slice is empty.
377///
378/// # Examples
379///
380/// ```rust
381/// use linreg_core::stats::mode;
382///
383/// let single_mode = vec![1.0, 2.0, 2.0, 3.0, 4.0];
384/// let result = mode(&single_mode).unwrap();
385/// assert_eq!(result.modes, vec![2.0]);
386/// assert_eq!(result.frequency, 2);
387///
388/// let multi_mode = vec![1.0, 1.0, 2.0, 2.0, 3.0];
389/// let result = mode(&multi_mode).unwrap();
390/// assert_eq!(result.modes, vec![1.0, 2.0]);
391/// assert_eq!(result.frequency, 2);
392/// ```
393pub fn mode(data: &[f64]) -> Option<ModeResult> {
394    if data.is_empty() {
395        return None;
396    }
397
398    // Filter out NaN values and sort for frequency counting
399    let mut sorted_data: Vec<f64> = data
400        .iter()
401        .filter(|&&v| !v.is_nan())
402        .copied()
403        .collect();
404
405    if sorted_data.is_empty() {
406        // All values were NaN
407        return None;
408    }
409
410    sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
411
412    // Count frequencies by iterating through sorted data
413    let mut frequencies: Vec<(f64, usize)> = Vec::new();
414    let mut current_value = sorted_data[0];
415    let mut count = 1;
416
417    for &value in &sorted_data[1..] {
418        // Use partial_cmp to handle f64 comparison
419        if (value - current_value).abs() < f64::EPSILON {
420            count += 1;
421        } else {
422            frequencies.push((current_value, count));
423            current_value = value;
424            count = 1;
425        }
426    }
427    // Don't forget the last value
428    frequencies.push((current_value, count));
429
430    if frequencies.is_empty() {
431        return None;
432    }
433
434    // Find the maximum frequency
435    let max_frequency = frequencies.iter().map(|&(_, freq)| freq).max().unwrap();
436
437    // Collect all values with the maximum frequency
438    let modes: Vec<f64> = frequencies
439        .into_iter()
440        .filter_map(|(value, freq)| {
441            if freq == max_frequency {
442                Some(value)
443            } else {
444                None
445            }
446        })
447        .collect();
448
449    // Count unique values
450    let unique_count = {
451        let mut count = 1;
452        for i in 1..sorted_data.len() {
453            if (sorted_data[i] - sorted_data[i - 1]).abs() > f64::EPSILON {
454                count += 1;
455            }
456        }
457        count
458    };
459
460    Some(ModeResult {
461        modes,
462        frequency: max_frequency,
463        unique_count,
464    })
465}
466
467/// Calculates the five-number summary of a slice of f64 values.
468///
469/// The five-number summary consists of:
470/// - Minimum
471/// - First quartile (Q1, 25th percentile)
472/// - Median (Q2, 50th percentile)
473/// - Third quartile (Q3, 75th percentile)
474/// - Maximum
475///
476/// This is also known as the "boxplot summary" as these values are used to create box plots.
477///
478/// # Arguments
479///
480/// * `data` - Slice of f64 values
481///
482/// # Returns
483///
484/// A `FiveNumberSummary` struct containing the summary statistics, or `None` if the slice is empty.
485///
486/// # Examples
487///
488/// ```rust
489/// use linreg_core::stats::five_number_summary;
490///
491/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
492/// let summary = five_number_summary(&data).unwrap();
493/// assert_eq!(summary.min, 1.0);
494/// assert_eq!(summary.max, 9.0);
495/// assert_eq!(summary.median, 5.0);
496/// ```
497pub fn five_number_summary(data: &[f64]) -> Option<FiveNumberSummary> {
498    if data.is_empty() {
499        return None;
500    }
501
502    let min_val = min(data);
503    let max_val = max(data);
504    let q1 = quantile(data, 0.25);
505    let median_val = median(data);
506    let q3 = quantile(data, 0.75);
507
508    Some(FiveNumberSummary {
509        min: min_val,
510        q1,
511        median: median_val,
512        q3,
513        max: max_val,
514    })
515}
516
517/// Five-number summary statistics.
518///
519/// Contains the minimum, first quartile, median, third quartile, and maximum
520/// of a dataset. Used for box plots and exploratory data analysis.
521#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize)]
522pub struct FiveNumberSummary {
523    /// Minimum value
524    pub min: f64,
525    /// First quartile (25th percentile)
526    pub q1: f64,
527    /// Median (50th percentile)
528    pub median: f64,
529    /// Third quartile (75th percentile)
530    pub q3: f64,
531    /// Maximum value
532    pub max: f64,
533}
534
535impl FiveNumberSummary {
536    /// Calculates the interquartile range (IQR).
537    ///
538    /// The IQR is Q3 - Q1 and represents the spread of the middle 50% of the data.
539    pub fn iqr(&self) -> f64 {
540        self.q3 - self.q1
541    }
542
543    /// Determines if a value is an outlier using the 1.5*IQR rule.
544    ///
545    /// A value is considered an outlier if it is below Q1 - 1.5*IQR
546    /// or above Q3 + 1.5*IQR.
547    pub fn is_outlier(&self, value: f64) -> bool {
548        let iqr = self.iqr();
549        let lower_fence = self.q1 - 1.5 * iqr;
550        let upper_fence = self.q3 + 1.5 * iqr;
551        value < lower_fence || value > upper_fence
552    }
553}
554
555/// Calculates the correlation coefficient (Pearson's r) between two slices.
556///
557/// This implementation uses a **numerically stable single-pass algorithm**
558/// that avoids catastrophic cancellation, similar to Welford's method.
559/// It computes mean, variance, and covariance in one pass.
560///
561/// # Arguments
562///
563/// * `x` - First slice of f64 values
564/// * `y` - Second slice of f64 values (must be same length as x)
565///
566/// # Returns
567///
568/// The correlation coefficient as f64, or NaN if inputs are invalid
569///
570/// # Examples
571///
572/// ```rust
573/// use linreg_core::stats::correlation;
574///
575/// let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
576/// let y = vec![2.0, 4.0, 5.0, 4.0, 5.0];
577/// let r = correlation(&x, &y);
578/// assert!((r - 0.7746).abs() < 1e-4);
579/// ```
580pub fn correlation(x: &[f64], y: &[f64]) -> f64 {
581    if x.len() != y.len() || x.len() < 2 {
582        return f64::NAN;
583    }
584
585    let n = x.len();
586
587    // Numerically stable single-pass algorithm
588    // Tracks: mean_x, mean_y, m2_x, m2_y (variances), and covariance
589    let mut mean_x = x[0];
590    let mut mean_y = y[0];
591    let mut m2_x = 0.0;
592    let mut m2_y = 0.0;
593    let mut cov = 0.0;
594
595    for i in 1..n {
596        let xi = x[i];
597        let yi = y[i];
598        let delta_x = xi - mean_x;
599        let delta_y = yi - mean_y;
600
601        let i_inv = 1.0 / (i + 1) as f64;
602        mean_x += delta_x * i_inv;
603        mean_y += delta_y * i_inv;
604
605        let delta_x_new = xi - mean_x;
606        let delta_y_new = yi - mean_y;
607
608        m2_x += delta_x * delta_x_new;
609        m2_y += delta_y * delta_y_new;
610        cov += delta_x * delta_y_new;
611    }
612
613    let denom = (m2_x * m2_y).sqrt();
614    if denom == 0.0 {
615        return f64::NAN;
616    }
617
618    cov / denom
619}
620
621#[cfg(test)]
622mod tests {
623    use super::*;
624
625    #[test]
626    fn test_mean() {
627        assert_eq!(mean(&[1.0, 2.0, 3.0, 4.0, 5.0]), 3.0);
628        assert!(mean(&[]).is_nan());
629    }
630
631    #[test]
632    fn test_variance() {
633        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
634        let v = variance(&data);
635        assert!((v - 2.5).abs() < 1e-10);
636        assert!(variance(&[1.0]).is_nan());
637    }
638
639    #[test]
640    fn test_variance_population() {
641        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
642        let v = variance_population(&data);
643        assert!((v - 2.0).abs() < 1e-10);
644    }
645
646    #[test]
647    fn test_stddev() {
648        let data = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
649        let s = stddev(&data);
650        assert!((s - 2.138089935).abs() < 1e-9);
651    }
652
653    #[test]
654    fn test_median() {
655        assert_eq!(median(&[1.0, 3.0, 5.0]), 3.0);
656        assert_eq!(median(&[1.0, 2.0, 3.0, 4.0]), 2.5);
657        assert!(median(&[]).is_nan());
658    }
659
660    #[test]
661    fn test_quantile() {
662        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
663        assert_eq!(quantile(&data, 0.0), 1.0);
664        assert_eq!(quantile(&data, 0.5), 5.0);
665        assert_eq!(quantile(&data, 1.0), 9.0);
666    }
667
668    #[test]
669    fn test_min_max() {
670        let data = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0];
671        assert_eq!(min(&data), 1.0);
672        assert_eq!(max(&data), 9.0);
673    }
674
675    #[test]
676    fn test_sum() {
677        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
678        assert_eq!(sum(&data), 15.0);
679        // Empty slice returns 0.0 (standard Rust behavior)
680        assert_eq!(sum(&[]), 0.0);
681    }
682
683    #[test]
684    fn test_range() {
685        let data = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0];
686        assert_eq!(range(&data), 8.0);  // 9.0 - 1.0
687        // Empty slice returns NaN (max - min = NaN - NaN)
688        assert!(range(&[]).is_nan());
689    }
690
691    #[test]
692    fn test_correlation() {
693        let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
694        let y = vec![2.0, 4.0, 5.0, 4.0, 5.0];
695        let r = correlation(&x, &y);
696        // Correct value: r = 6/sqrt(60) ≈ 0.7746
697        assert!((r - 0.7746).abs() < 1e-4);
698    }
699
700    // ============================================================================
701    // NaN/Inf Handling Tests
702    // ============================================================================
703
704    #[test]
705    fn test_mean_with_nan() {
706        let data = vec![1.0, f64::NAN, 3.0, 4.0, 5.0];
707        // mean propagates NaN (standard Rust behavior for sum)
708        assert!(mean(&data).is_nan());
709    }
710
711    #[test]
712    fn test_mean_with_inf() {
713        let data = vec![1.0, 2.0, f64::INFINITY, 4.0, 5.0];
714        assert_eq!(mean(&data), f64::INFINITY);
715
716        let data2 = vec![1.0, 2.0, f64::NEG_INFINITY, 4.0, 5.0];
717        assert_eq!(mean(&data2), f64::NEG_INFINITY);
718    }
719
720    #[test]
721    fn test_variance_with_nan() {
722        let data = vec![1.0, f64::NAN, 3.0, 4.0, 5.0];
723        assert!(variance(&data).is_nan());
724    }
725
726    #[test]
727    fn test_variance_with_inf() {
728        let data = vec![1.0, 2.0, f64::INFINITY, 4.0, 5.0];
729        // Variance with INF should be NaN or INF depending on calculation
730        let v = variance(&data);
731        assert!(v.is_nan() || v.is_infinite());
732    }
733
734    #[test]
735    fn test_correlation_with_nan() {
736        let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
737        let y = vec![2.0, f64::NAN, 5.0, 4.0, 5.0];
738        // correlation with NaN in data returns NaN
739        assert!(correlation(&x, &y).is_nan());
740    }
741
742    #[test]
743    fn test_correlation_with_inf() {
744        let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
745        let y = vec![2.0, f64::INFINITY, 5.0, 4.0, 5.0];
746        // correlation with INF in data returns NaN
747        let r = correlation(&x, &y);
748        assert!(r.is_nan() || r.is_infinite());
749    }
750
751    #[test]
752    fn test_correlation_single_value() {
753        let x = vec![1.0];
754        let y = vec![2.0];
755        // Single value arrays should return NaN (undefined correlation)
756        assert!(correlation(&x, &y).is_nan());
757    }
758
759    #[test]
760    fn test_correlation_mismatched_lengths() {
761        let x = vec![1.0, 2.0, 3.0];
762        let y = vec![1.0, 2.0];
763        // Mismatched lengths should return NaN
764        assert!(correlation(&x, &y).is_nan());
765    }
766
767    #[test]
768    fn test_min_max_with_nan() {
769        let data = vec![3.0, 1.0, f64::NAN, 4.0, 5.0];
770        // min/max ignore NaN in fold comparison
771        assert_eq!(min(&data), 1.0);
772        assert_eq!(max(&data), 5.0);
773    }
774
775    #[test]
776    fn test_min_max_with_inf() {
777        let data = vec![3.0, 1.0, f64::INFINITY, 4.0, 5.0];
778        assert_eq!(min(&data), 1.0);
779        assert_eq!(max(&data), f64::INFINITY);
780    }
781
782    #[test]
783    fn test_median_with_inf() {
784        let data = vec![1.0, 2.0, f64::INFINITY, 4.0, 5.0];
785        // INF sorts to the end
786        assert_eq!(median(&data), 4.0);
787    }
788
789    #[test]
790    fn test_stddev_single_value() {
791        // Single element should return NaN (undefined sample stddev)
792        assert!(stddev(&[1.0]).is_nan());
793    }
794
795    #[test]
796    fn test_stddev_population_single_value() {
797        // Single element population stddev should be 0
798        assert_eq!(stddev_population(&[1.0]), 0.0);
799    }
800
801    // ============================================================================
802    // Mode Tests
803    // ============================================================================
804
805    #[test]
806    fn test_mode_single() {
807        let data = vec![1.0, 2.0, 2.0, 3.0, 4.0];
808        let result = mode(&data).unwrap();
809        assert_eq!(result.modes, vec![2.0]);
810        assert_eq!(result.frequency, 2);
811    }
812
813    #[test]
814    fn test_mode_multiple() {
815        let data = vec![1.0, 1.0, 2.0, 2.0, 3.0];
816        let result = mode(&data).unwrap();
817        assert_eq!(result.modes, vec![1.0, 2.0]);
818        assert_eq!(result.frequency, 2);
819    }
820
821    #[test]
822    fn test_mode_all_unique() {
823        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
824        let result = mode(&data).unwrap();
825        // All values are modes with frequency 1
826        assert_eq!(result.modes.len(), 5);
827        assert_eq!(result.frequency, 1);
828    }
829
830    #[test]
831    fn test_mode_with_nan() {
832        let data = vec![1.0, f64::NAN, 2.0, 2.0, 3.0];
833        let result = mode(&data).unwrap();
834        assert_eq!(result.modes, vec![2.0]);
835        assert_eq!(result.frequency, 2);
836    }
837
838    #[test]
839    fn test_mode_empty() {
840        assert!(mode(&[]).is_none());
841    }
842
843    #[test]
844    fn test_mode_all_nan() {
845        let data = vec![f64::NAN, f64::NAN, f64::NAN];
846        assert!(mode(&data).is_none());
847    }
848
849    // ============================================================================
850    // Five-Number Summary Tests
851    // ============================================================================
852
853    #[test]
854    fn test_five_number_summary_basic() {
855        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
856        let summary = five_number_summary(&data).unwrap();
857        assert_eq!(summary.min, 1.0);
858        assert_eq!(summary.max, 9.0);
859        assert_eq!(summary.median, 5.0);
860        // Q1 and Q3 use linear interpolation
861        assert!((summary.q1 - 3.0).abs() < 0.1);
862        assert!((summary.q3 - 7.0).abs() < 0.1);
863    }
864
865    #[test]
866    fn test_five_number_summary_iqr() {
867        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
868        let summary = five_number_summary(&data).unwrap();
869        let iqr = summary.iqr();
870        assert!(iqr > 0.0);
871        assert_eq!(iqr, summary.q3 - summary.q1);
872    }
873
874    #[test]
875    fn test_five_number_summary_outlier_detection() {
876        let data: Vec<f64> = (1..=20).map(|v| v as f64).collect();
877        let summary = five_number_summary(&data).unwrap();
878
879        // 50 should be an outlier (well above the upper fence)
880        assert!(summary.is_outlier(50.0));
881
882        // 10 should not be an outlier
883        assert!(!summary.is_outlier(10.0));
884    }
885
886    #[test]
887    fn test_five_number_summary_empty() {
888        assert!(five_number_summary(&[]).is_none());
889    }
890
891    #[test]
892    fn test_five_number_summary_single_value() {
893        let data = vec![5.0];
894        let summary = five_number_summary(&data).unwrap();
895        assert_eq!(summary.min, 5.0);
896        assert_eq!(summary.max, 5.0);
897        assert_eq!(summary.median, 5.0);
898        // Q1 and Q3 equal the single value
899        assert_eq!(summary.q1, 5.0);
900        assert_eq!(summary.q3, 5.0);
901    }
902}
linreg_core/stats.rs

linreg_core/
stats.rs