scirs2_stats/
quantile.rs

1//! Quantile-based statistics
2//!
3//! This module provides functions for computing quantile-based statistics
4//! including percentiles, quartiles, quantiles, and related summary statistics.
5
6use crate::error::{StatsError, StatsResult};
7use scirs2_core::ndarray::{Array1, ArrayView1};
8use scirs2_core::numeric::{Float, NumCast};
9
10/// Helper to convert f64 constants to generic Float type
11#[inline(always)]
12fn const_f64<F: Float + NumCast>(value: f64) -> F {
13    F::from(value).expect("Failed to convert constant to target float type")
14}
15
16/// Methods for interpolating quantiles
17///
18/// These methods correspond to the methods in scipy.stats.quantile.
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
20pub enum QuantileInterpolation {
21    /// Return the first data point whose position equals or exceeds the quantile.
22    /// Also known as type 1 in Hyndman and Fan's classification.
23    InvertedCdf,
24
25    /// Return the average of the two data points closest to the quantile.
26    /// Also known as type 2 in Hyndman and Fan's classification.
27    AveragedInvertedCdf,
28
29    /// Return the closest data point to the quantile.
30    /// Also known as type 3 in Hyndman and Fan's classification.
31    ClosestObservation,
32
33    /// Use linear interpolation of the inverted CDF, with m=0.
34    /// Also known as type 4 in Hyndman and Fan's classification.
35    InterpolatedInvertedCdf,
36
37    /// Use linear interpolation with m=0.5 (Hazen's formula).
38    /// Also known as type 5 in Hyndman and Fan's classification.
39    Hazen,
40
41    /// Use linear interpolation with m=p (Weibull's formula).
42    /// Also known as type 6 in Hyndman and Fan's classification.
43    Weibull,
44
45    /// Use linear interpolation with m=1-p (standard linear interpolation).
46    /// Also known as type 7 in Hyndman and Fan's classification (default in R).
47    #[default]
48    Linear,
49
50    /// Use linear interpolation with m=p/3 + 1/3 (median-unbiased).
51    /// Also known as type 8 in Hyndman and Fan's classification.
52    MedianUnbiased,
53
54    /// Use linear interpolation with m=p/4 + 3/8 (normal-unbiased).
55    /// Also known as type 9 in Hyndman and Fan's classification.
56    NormalUnbiased,
57
58    /// Use a midpoint interpolation.
59    Midpoint,
60
61    /// Use nearest interpolation.
62    Nearest,
63
64    /// Use lower interpolation.
65    Lower,
66
67    /// Use higher interpolation.
68    Higher,
69}
70
71/// Compute the quantile of a dataset.
72///
73/// A quantile is a value below which a specified portion of the data falls.
74/// For example, the 0.5 quantile is the median, the 0.25 quantile is the first quartile,
75/// and the 0.75 quantile is the third quartile.
76///
77/// # Arguments
78///
79/// * `x` - Input data
80/// * `q` - Quantile to compute, must be between 0 and 1 inclusive
81/// * `method` - Interpolation method to use
82///
83/// # Returns
84///
85/// * The quantile value
86///
87/// # Examples
88///
89/// ```
90/// use scirs2_core::ndarray::array;
91/// use scirs2_stats::quantile;
92/// use scirs2_stats::QuantileInterpolation;
93///
94/// let data = array![1.0, 3.0, 5.0, 7.0, 9.0];
95///
96/// // Compute the median (0.5 quantile)
97/// let median = quantile(&data.view(), 0.5, QuantileInterpolation::Linear).expect("Test/example failed");
98/// assert_eq!(median, 5.0);
99///
100/// // Compute the first quartile (0.25 quantile)
101/// let q1 = quantile(&data.view(), 0.25, QuantileInterpolation::Linear).expect("Test/example failed");
102/// assert_eq!(q1, 3.0);
103///
104/// // Compute the third quartile (0.75 quantile)
105/// let q3 = quantile(&data.view(), 0.75, QuantileInterpolation::Linear).expect("Test/example failed");
106/// assert_eq!(q3, 7.0);
107/// ```
108#[allow(dead_code)]
109pub fn quantile<F>(x: &ArrayView1<F>, q: F, method: QuantileInterpolation) -> StatsResult<F>
110where
111    F: Float + NumCast + std::fmt::Display,
112{
113    // Check for empty array
114    if x.is_empty() {
115        return Err(StatsError::InvalidArgument(
116            "Empty array provided".to_string(),
117        ));
118    }
119
120    // Validate the quantile value
121    if q < F::zero() || q > F::one() {
122        return Err(StatsError::InvalidArgument(
123            "Quantile must be between 0 and 1".to_string(),
124        ));
125    }
126
127    // Make a sorted copy of the data
128    let mut sorteddata: Vec<F> = x.iter().cloned().collect();
129    sorteddata.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
130
131    // Calculate index and interpolation value based on method
132    let n = F::from(sorteddata.len()).expect("Test/example failed");
133
134    match method {
135        QuantileInterpolation::Lower => {
136            let index = (q * (n - F::one()))
137                .floor()
138                .to_usize()
139                .expect("Failed to convert to usize");
140            Ok(sorteddata[index])
141        }
142        QuantileInterpolation::Higher => {
143            let index = (q * (n - F::one()))
144                .ceil()
145                .to_usize()
146                .expect("Failed to convert to usize")
147                .min(sorteddata.len() - 1);
148            Ok(sorteddata[index])
149        }
150        QuantileInterpolation::Nearest => {
151            let index = (q * (n - F::one()))
152                .round()
153                .to_usize()
154                .expect("Failed to convert to usize")
155                .min(sorteddata.len() - 1);
156            Ok(sorteddata[index])
157        }
158        QuantileInterpolation::Midpoint => {
159            let i_lower = (q * (n - F::one()))
160                .floor()
161                .to_usize()
162                .expect("Failed to convert to usize");
163            let i_upper = (q * (n - F::one()))
164                .ceil()
165                .to_usize()
166                .expect("Failed to convert to usize")
167                .min(sorteddata.len() - 1);
168            Ok((sorteddata[i_lower] + sorteddata[i_upper]) / const_f64::<F>(2.0))
169        }
170        QuantileInterpolation::InvertedCdf => {
171            let jg = q * n;
172            let j = jg.floor().to_usize().expect("Failed to convert to usize");
173            let g = if jg % F::one() > F::zero() {
174                F::one()
175            } else {
176                F::zero()
177            };
178
179            let j = j.min(sorteddata.len() - 1);
180            let jp1 = (j + 1).min(sorteddata.len() - 1);
181
182            if g <= F::epsilon() {
183                Ok(sorteddata[j])
184            } else {
185                Ok(sorteddata[jp1])
186            }
187        }
188        QuantileInterpolation::AveragedInvertedCdf => {
189            let jg = q * n;
190            let j = jg.floor().to_usize().expect("Failed to convert to usize");
191            let g = if jg % F::one() > F::zero() {
192                const_f64::<F>(0.5)
193            } else {
194                F::zero()
195            };
196
197            let j = j.min(sorteddata.len() - 1);
198            let jp1 = (j + 1).min(sorteddata.len() - 1);
199
200            if g <= F::epsilon() {
201                Ok(sorteddata[j])
202            } else {
203                Ok(sorteddata[j] * (F::one() - g) + sorteddata[jp1] * g)
204            }
205        }
206        QuantileInterpolation::ClosestObservation => {
207            let jg = q * n - const_f64::<F>(0.5);
208            let j = jg.floor().to_usize().expect("Failed to convert to usize");
209
210            // Determine g value for closest observation
211            let g = if jg % F::one() == F::zero() && j % 2 == 1 {
212                F::zero()
213            } else {
214                F::one()
215            };
216
217            let j = j.min(sorteddata.len() - 1);
218            let jp1 = (j + 1).min(sorteddata.len() - 1);
219
220            if g <= F::epsilon() {
221                Ok(sorteddata[j])
222            } else {
223                Ok(sorteddata[jp1])
224            }
225        }
226        // Use linear interpolation with different m values
227        _ => {
228            // Get the m value based on method
229            let m = match method {
230                QuantileInterpolation::InterpolatedInvertedCdf => F::zero(),
231                QuantileInterpolation::Hazen => const_f64::<F>(0.5),
232                QuantileInterpolation::Weibull => q,
233                QuantileInterpolation::Linear => F::one() - q,
234                QuantileInterpolation::MedianUnbiased => {
235                    q / const_f64::<F>(3.0) + const_f64::<F>(1.0 / 3.0)
236                }
237                QuantileInterpolation::NormalUnbiased => {
238                    q / const_f64::<F>(4.0) + const_f64::<F>(3.0 / 8.0)
239                }
240                _ => unreachable!(),
241            };
242
243            let jg = q * n + m - F::one();
244            let j = jg.floor().to_usize().expect("Failed to convert to usize");
245            let g = jg % F::one();
246
247            // Boundary handling
248            let j = if jg < F::zero() {
249                0
250            } else {
251                j.min(sorteddata.len() - 1)
252            };
253            let jp1 = (j + 1).min(sorteddata.len() - 1);
254            let g = if jg < F::zero() { F::zero() } else { g };
255
256            // Linear interpolation
257            Ok((F::one() - g) * sorteddata[j] + g * sorteddata[jp1])
258        }
259    }
260}
261
262/// Compute the percentile of a dataset.
263///
264/// A percentile is a value below which a specified percentage of the data falls.
265/// For example, the 50th percentile is the median, the 25th percentile is the first quartile,
266/// and the 75th percentile is the third quartile.
267///
268/// # Arguments
269///
270/// * `x` - Input data
271/// * `p` - Percentile to compute, must be between 0 and 100 inclusive
272/// * `method` - Interpolation method to use
273///
274/// # Returns
275///
276/// * The percentile value
277///
278/// # Examples
279///
280/// ```
281/// use scirs2_core::ndarray::array;
282/// use scirs2_stats::percentile;
283/// use scirs2_stats::QuantileInterpolation;
284///
285/// let data = array![1.0, 3.0, 5.0, 7.0, 9.0];
286///
287/// // Compute the median (50th percentile)
288/// let median = percentile(&data.view(), 50.0, QuantileInterpolation::Linear).expect("Test/example failed");
289/// assert_eq!(median, 5.0);
290///
291/// // Compute the first quartile (25th percentile)
292/// let q1 = percentile(&data.view(), 25.0, QuantileInterpolation::Linear).expect("Test/example failed");
293/// assert_eq!(q1, 3.0);
294///
295/// // Compute the third quartile (75th percentile)
296/// let q3 = percentile(&data.view(), 75.0, QuantileInterpolation::Linear).expect("Test/example failed");
297/// assert_eq!(q3, 7.0);
298/// ```
299#[allow(dead_code)]
300pub fn percentile<F>(x: &ArrayView1<F>, p: F, method: QuantileInterpolation) -> StatsResult<F>
301where
302    F: Float + NumCast + std::fmt::Display,
303{
304    // Check for empty array
305    if x.is_empty() {
306        return Err(StatsError::InvalidArgument(
307            "Empty array provided".to_string(),
308        ));
309    }
310
311    // Validate the percentile value
312    if p < F::zero() || p > const_f64::<F>(100.0) {
313        return Err(StatsError::InvalidArgument(
314            "Percentile must be between 0 and 100".to_string(),
315        ));
316    }
317
318    // Convert percentile to quantile and calculate
319    let q = p / const_f64::<F>(100.0);
320    quantile(x, q, method)
321}
322
323/// Compute the quartiles of a dataset.
324///
325/// Quartiles divide the dataset into four equal parts.
326/// The first quartile (Q1) is the value that 25% of the data falls below.
327/// The second quartile (Q2) is the median (50%).
328/// The third quartile (Q3) is the value that 75% of the data falls below.
329///
330/// # Arguments
331///
332/// * `x` - Input data
333/// * `method` - Interpolation method to use
334///
335/// # Returns
336///
337/// * An array containing the three quartiles: [Q1, Q2, Q3]
338///
339/// # Examples
340///
341/// ```
342/// use scirs2_core::ndarray::array;
343/// use scirs2_stats::quartiles;
344/// use scirs2_stats::QuantileInterpolation;
345///
346/// let data = array![1.0, 3.0, 5.0, 7.0, 9.0];
347///
348/// let q = quartiles(&data.view(), QuantileInterpolation::Linear).expect("Test/example failed");
349/// assert_eq!(q[0], 3.0);  // Q1 (25th percentile)
350/// assert_eq!(q[1], 5.0);  // Q2 (median)
351/// assert_eq!(q[2], 7.0);  // Q3 (75th percentile)
352/// ```
353#[allow(dead_code)]
354pub fn quartiles<F>(x: &ArrayView1<F>, method: QuantileInterpolation) -> StatsResult<Array1<F>>
355where
356    F: Float + NumCast + std::fmt::Display,
357{
358    // Check for empty array
359    if x.is_empty() {
360        return Err(StatsError::InvalidArgument(
361            "Empty array provided".to_string(),
362        ));
363    }
364
365    // Calculate the quartiles
366    let q1 = quantile(x, const_f64::<F>(0.25), method)?;
367    let q2 = quantile(x, const_f64::<F>(0.5), method)?;
368    let q3 = quantile(x, const_f64::<F>(0.75), method)?;
369
370    // Return as array
371    Ok(Array1::from(vec![q1, q2, q3]))
372}
373
374/// Compute the quintiles of a dataset.
375///
376/// Quintiles divide the dataset into five equal parts.
377/// The quintiles are the values that divide the data at 20%, 40%, 60%, and 80%.
378///
379/// # Arguments
380///
381/// * `x` - Input data
382/// * `method` - Interpolation method to use
383///
384/// # Returns
385///
386/// * An array containing the four quintiles: [Q1, Q2, Q3, Q4]
387///
388/// # Examples
389///
390/// ```
391/// use scirs2_core::ndarray::array;
392/// use scirs2_stats::quintiles;
393/// use scirs2_stats::QuantileInterpolation;
394///
395/// let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
396///
397/// let q = quintiles(&data.view(), QuantileInterpolation::Linear).expect("Test/example failed");
398/// assert_eq!(q[0], 2.8);  // 20th percentile
399/// assert_eq!(q[1], 4.6);  // 40th percentile
400/// assert_eq!(q[2], 6.4);  // 60th percentile
401/// assert_eq!(q[3], 8.2);  // 80th percentile
402/// ```
403#[allow(dead_code)]
404pub fn quintiles<F>(x: &ArrayView1<F>, method: QuantileInterpolation) -> StatsResult<Array1<F>>
405where
406    F: Float + NumCast + std::fmt::Display,
407{
408    // Check for empty array
409    if x.is_empty() {
410        return Err(StatsError::InvalidArgument(
411            "Empty array provided".to_string(),
412        ));
413    }
414
415    // Calculate the quintiles
416    let q1 = quantile(x, const_f64::<F>(0.2), method)?;
417    let q2 = quantile(x, const_f64::<F>(0.4), method)?;
418    let q3 = quantile(x, const_f64::<F>(0.6), method)?;
419    let q4 = quantile(x, const_f64::<F>(0.8), method)?;
420
421    // Return as array
422    Ok(Array1::from(vec![q1, q2, q3, q4]))
423}
424
425/// Compute the deciles of a dataset.
426///
427/// Deciles divide the dataset into ten equal parts.
428/// The deciles are the values that divide the data at 10%, 20%, ..., 90%.
429///
430/// # Arguments
431///
432/// * `x` - Input data
433/// * `method` - Interpolation method to use
434///
435/// # Returns
436///
437/// * An array containing the nine deciles: [D1, D2, D3, D4, D5, D6, D7, D8, D9]
438///
439/// # Examples
440///
441/// ```
442/// use scirs2_core::ndarray::array;
443/// use scirs2_stats::deciles;
444/// use scirs2_stats::QuantileInterpolation;
445///
446/// let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
447///
448/// let d = deciles(&data.view(), QuantileInterpolation::Linear).expect("Test/example failed");
449/// assert_eq!(d[0], 1.9);  // 10th percentile
450/// assert_eq!(d[4], 5.5);  // 50th percentile (median)
451/// assert_eq!(d[8], 9.1);  // 90th percentile
452/// ```
453#[allow(dead_code)]
454pub fn deciles<F>(x: &ArrayView1<F>, method: QuantileInterpolation) -> StatsResult<Array1<F>>
455where
456    F: Float + NumCast + std::fmt::Display,
457{
458    // Check for empty array
459    if x.is_empty() {
460        return Err(StatsError::InvalidArgument(
461            "Empty array provided".to_string(),
462        ));
463    }
464
465    // Calculate the deciles
466    let mut result = Vec::with_capacity(9);
467    for i in 1..=9 {
468        let p = F::from(i * 10).expect("Test/example failed");
469        let decile = percentile(x, p, method)?;
470        result.push(decile);
471    }
472
473    // Return as array
474    Ok(Array1::from(result))
475}
476
477/// Compute boxplot statistics for a dataset.
478///
479/// Boxplot statistics include the median, quartiles, and whiskers of the data.
480/// The whiskers extend to the most extreme data points within a certain range
481/// of the box (by default, 1.5 times the interquartile range).
482///
483/// # Arguments
484///
485/// * `x` - Input data
486/// * `whis` - Range of whiskers as a factor of the IQR (default 1.5)
487/// * `method` - Interpolation method for quartiles
488///
489/// # Returns
490///
491/// * A tuple containing:
492///   - q1: First quartile
493///   - q2: Median (second quartile)
494///   - q3: Third quartile
495///   - whislo: Lower whisker
496///   - whishi: Upper whisker
497///   - outliers: Array of outliers
498///
499/// # Examples
500///
501/// ```
502/// use scirs2_core::ndarray::array;
503/// use scirs2_stats::boxplot_stats;
504/// use scirs2_stats::QuantileInterpolation;
505///
506/// let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 20.0];  // Note the outlier
507///
508/// let (q1, q2, q3, whislo, whishi, outliers) =
509///     boxplot_stats(&data.view(), Some(1.5), QuantileInterpolation::Linear).expect("Test/example failed");
510///
511/// assert_eq!(q1, 3.25);  // 25th percentile
512/// assert_eq!(q2, 5.5);   // median
513/// assert_eq!(q3, 7.75);  // 75th percentile
514/// assert_eq!(whislo, 1.0);  // lowest value within 1.5*IQR of Q1
515/// assert_eq!(whishi, 9.0);  // highest value within 1.5*IQR of Q3
516/// assert_eq!(outliers[0], 20.0);  // outlier beyond the whiskers
517/// ```
518#[allow(dead_code)]
519pub fn boxplot_stats<F>(
520    x: &ArrayView1<F>,
521    whis: Option<F>,
522    method: QuantileInterpolation,
523) -> StatsResult<(F, F, F, F, F, Vec<F>)>
524where
525    F: Float + NumCast + std::fmt::Debug + std::fmt::Display,
526{
527    // Check for empty array
528    if x.is_empty() {
529        return Err(StatsError::InvalidArgument(
530            "Empty array provided".to_string(),
531        ));
532    }
533
534    // Set the whisker range (defaults to 1.5)
535    let whis_factor = whis.unwrap_or(const_f64::<F>(1.5));
536    if whis_factor < F::zero() {
537        return Err(StatsError::InvalidArgument(
538            "Whisker range must be non-negative".to_string(),
539        ));
540    }
541
542    // Calculate quartiles
543    let q1 = quantile(x, const_f64::<F>(0.25), method)?;
544    let q2 = quantile(x, const_f64::<F>(0.5), method)?;
545    let q3 = quantile(x, const_f64::<F>(0.75), method)?;
546
547    // Calculate interquartile range (IQR)
548    let iqr = q3 - q1;
549
550    // Calculate whisker limits
551    let whislo_limit = q1 - whis_factor * iqr;
552    let whishi_limit = q3 + whis_factor * iqr;
553
554    // Find actual whisker positions and outliers
555    let mut sorteddata: Vec<F> = x.iter().cloned().collect();
556    sorteddata.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
557
558    // Find the lowest and highest values within the whisker limits
559    let whislo = *sorteddata
560        .iter()
561        .find(|&&val| val >= whislo_limit)
562        .unwrap_or(&sorteddata[0]);
563
564    let whishi = *sorteddata
565        .iter()
566        .rev()
567        .find(|&&val| val <= whishi_limit)
568        .unwrap_or(&sorteddata[sorteddata.len() - 1]);
569
570    // Collect outliers (values outside the whiskers)
571    let outliers: Vec<F> = sorteddata
572        .iter()
573        .filter(|&&val| val < whislo || val > whishi)
574        .cloned()
575        .collect();
576
577    Ok((q1, q2, q3, whislo, whishi, outliers))
578}
579
580/// Compute the winsorized mean of a dataset.
581///
582/// The winsorized mean is calculated by replacing the specified proportion
583/// of extreme values (both low and high) with the values at the corresponding
584/// percentiles, and then calculating the mean of the resulting array.
585///
586/// # Arguments
587///
588/// * `x` - Input data
589/// * `limits` - Proportion of values to replace at each end (must be between 0 and 0.5)
590///
591/// # Returns
592///
593/// * The winsorized mean
594///
595/// # Examples
596///
597/// ```
598/// use scirs2_core::ndarray::array;
599/// use scirs2_stats::winsorized_mean;
600///
601/// let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 100.0];  // Note the outlier
602///
603/// // 10% winsorization (replace lowest and highest values)
604/// let mean_10 = winsorized_mean(&data.view(), 0.1).expect("Test/example failed");
605/// // This will replace the lowest 10% (1.0) with 2.0 and highest 10% (100.0) with 9.0
606/// // Then calculate the mean of [2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0]
607/// assert!((mean_10 - 5.5f64).abs() < 1e-10f64);
608///
609/// // 20% winsorization
610/// let mean_20 = winsorized_mean(&data.view(), 0.2).expect("Test/example failed");
611/// // This will replace the lowest 20% (1.0, 2.0) with 3.0 and highest 20% (9.0, 100.0) with 8.0
612/// // Then calculate the mean of [3.0, 3.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 8.0, 8.0]
613/// assert!((mean_20 - 5.5f64).abs() < 1e-10f64);
614/// ```
615#[allow(dead_code)]
616pub fn winsorized_mean<F>(x: &ArrayView1<F>, limits: F) -> StatsResult<F>
617where
618    F: Float + NumCast + std::iter::Sum + std::fmt::Display,
619{
620    // Check for empty array
621    if x.is_empty() {
622        return Err(StatsError::InvalidArgument(
623            "Empty array provided".to_string(),
624        ));
625    }
626
627    // Validate limits (must be between 0 and 0.5)
628    if limits < F::zero() || limits > const_f64::<F>(0.5) {
629        return Err(StatsError::InvalidArgument(
630            "Limits must be between 0 and 0.5".to_string(),
631        ));
632    }
633
634    // If limits is 0, return the regular mean
635    if limits == F::zero() {
636        return Ok(x.iter().cloned().sum::<F>()
637            / F::from(x.len()).expect("Failed to convert length to float"));
638    }
639
640    // Make a copy of the data for winsorization
641    let mut data: Vec<F> = x.iter().cloned().collect();
642
643    // Sort the data
644    data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
645
646    // Determine the number of values to replace on each end
647    let n = data.len();
648    let n_replace = (F::from(n).expect("Failed to convert to float") * limits)
649        .to_usize()
650        .expect("Failed to convert to usize")
651        .max(1);
652
653    // Get the replacement values
654    let low_val = data[n_replace];
655    let high_val = data[n - n_replace - 1];
656
657    // Replace the extreme values
658    for i in 0..n_replace {
659        data[i] = low_val;
660        data[n - i - 1] = high_val;
661    }
662
663    // Calculate the mean of the winsorized data
664    let mean = data.iter().cloned().sum::<F>() / F::from(n).expect("Test/example failed");
665
666    Ok(mean)
667}
668
669/// Compute the winsorized variance of a dataset.
670///
671/// The winsorized variance is calculated by replacing the specified proportion
672/// of extreme values (both low and high) with the values at the corresponding
673/// percentiles, and then calculating the variance of the resulting array.
674///
675/// # Arguments
676///
677/// * `x` - Input data
678/// * `limits` - Proportion of values to replace at each end (must be between 0 and 0.5)
679/// * `ddof` - Delta degrees of freedom (0 for population variance, 1 for sample variance)
680///
681/// # Returns
682///
683/// * The winsorized variance
684///
685/// # Examples
686///
687/// ```
688/// use scirs2_core::ndarray::array;
689/// use scirs2_stats::winsorized_variance;
690///
691/// let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 100.0];  // Note the outlier
692///
693/// // 10% winsorization with ddof=1 (sample variance)
694/// let var_10 = winsorized_variance(&data.view(), 0.1, 1).expect("Test/example failed");
695/// // Variance of the winsorized data [2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0]
696/// assert!((var_10 - 7.3888888888889f64).abs() < 1e-10f64);
697/// ```
698#[allow(dead_code)]
699pub fn winsorized_variance<F>(x: &ArrayView1<F>, limits: F, ddof: usize) -> StatsResult<F>
700where
701    F: Float + NumCast + std::iter::Sum + std::fmt::Display,
702{
703    // Check for empty array
704    if x.is_empty() {
705        return Err(StatsError::InvalidArgument(
706            "Empty array provided".to_string(),
707        ));
708    }
709
710    // Validate limits (must be between 0 and 0.5)
711    if limits < F::zero() || limits > const_f64::<F>(0.5) {
712        return Err(StatsError::InvalidArgument(
713            "Limits must be between 0 and 0.5".to_string(),
714        ));
715    }
716
717    // Check degrees of freedom
718    if ddof >= x.len() {
719        return Err(StatsError::InvalidArgument(
720            "Degrees of freedom must be less than the number of observations".to_string(),
721        ));
722    }
723
724    // Make a copy of the data for winsorization
725    let mut data: Vec<F> = x.iter().cloned().collect();
726
727    // Sort the data
728    data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
729
730    // Determine the number of values to replace on each end
731    let n = data.len();
732    let n_replace = (F::from(n).expect("Failed to convert to float") * limits)
733        .to_usize()
734        .expect("Failed to convert to usize")
735        .max(1);
736
737    // Get the replacement values
738    let low_val = data[n_replace];
739    let high_val = data[n - n_replace - 1];
740
741    // Replace the extreme values
742    for i in 0..n_replace {
743        data[i] = low_val;
744        data[n - i - 1] = high_val;
745    }
746
747    // Calculate the mean of the winsorized data
748    let mean = data.iter().cloned().sum::<F>() / F::from(n).expect("Test/example failed");
749
750    // Calculate the variance
751    let sum_sq_dev = data.iter().map(|&x| (x - mean).powi(2)).sum::<F>();
752    let denom = F::from(n - ddof).expect("Test/example failed");
753
754    Ok(sum_sq_dev / denom)
755}
756
757#[cfg(test)]
758mod tests {
759    use super::*;
760    use approx::assert_abs_diff_eq;
761    use scirs2_core::ndarray::array;
762
763    #[test]
764    fn test_quantile() {
765        let data = array![1.0, 3.0, 5.0, 7.0, 9.0];
766
767        // Test linear interpolation (default)
768        let median = quantile(&data.view(), 0.5, QuantileInterpolation::Linear)
769            .expect("Test/example failed");
770        assert_abs_diff_eq!(median, 5.0, epsilon = 1e-10);
771
772        let q1 = quantile(&data.view(), 0.25, QuantileInterpolation::Linear)
773            .expect("Test/example failed");
774        assert_abs_diff_eq!(q1, 3.0, epsilon = 1e-10);
775
776        let q3 = quantile(&data.view(), 0.75, QuantileInterpolation::Linear)
777            .expect("Test/example failed");
778        assert_abs_diff_eq!(q3, 7.0, epsilon = 1e-10);
779
780        // Test with interpolation methods
781        let q_lower =
782            quantile(&data.view(), 0.4, QuantileInterpolation::Lower).expect("Test/example failed");
783        assert_abs_diff_eq!(q_lower, 3.0, epsilon = 1e-10);
784
785        let q_higher = quantile(&data.view(), 0.4, QuantileInterpolation::Higher)
786            .expect("Test/example failed");
787        assert_abs_diff_eq!(q_higher, 5.0, epsilon = 1e-10);
788
789        let q_midpoint = quantile(&data.view(), 0.4, QuantileInterpolation::Midpoint)
790            .expect("Test/example failed");
791        assert_abs_diff_eq!(q_midpoint, 4.0, epsilon = 1e-10);
792    }
793
794    #[test]
795    fn test_quantile_r_methods() {
796        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
797
798        // Test methods equivalent to R's quantile types 1-9
799        let q_type1 = quantile(&data.view(), 0.4, QuantileInterpolation::InvertedCdf)
800            .expect("Test/example failed");
801        assert_abs_diff_eq!(q_type1, 5.0, epsilon = 1e-10);
802
803        let q_type2 = quantile(
804            &data.view(),
805            0.4,
806            QuantileInterpolation::AveragedInvertedCdf,
807        )
808        .expect("Test/example failed");
809        assert_abs_diff_eq!(q_type2, 4.5, epsilon = 1e-10);
810
811        let q_type3 = quantile(&data.view(), 0.4, QuantileInterpolation::ClosestObservation)
812            .expect("Test/example failed");
813        assert_abs_diff_eq!(q_type3, 5.0, epsilon = 1e-10);
814
815        let q_type4 = quantile(
816            &data.view(),
817            0.4,
818            QuantileInterpolation::InterpolatedInvertedCdf,
819        )
820        .expect("Test/example failed");
821        assert_abs_diff_eq!(q_type4, 3.6, epsilon = 1e-10);
822
823        let q_type5 =
824            quantile(&data.view(), 0.4, QuantileInterpolation::Hazen).expect("Test/example failed");
825        assert_abs_diff_eq!(q_type5, 4.1, epsilon = 1e-10);
826
827        let q_type6 = quantile(&data.view(), 0.4, QuantileInterpolation::Weibull)
828            .expect("Test/example failed");
829        assert_abs_diff_eq!(q_type6, 4.0, epsilon = 1e-10);
830
831        let q_type7 = quantile(&data.view(), 0.4, QuantileInterpolation::Linear)
832            .expect("Test/example failed");
833        assert_abs_diff_eq!(q_type7, 4.2, epsilon = 1e-10);
834
835        let q_type8 = quantile(&data.view(), 0.4, QuantileInterpolation::MedianUnbiased)
836            .expect("Test/example failed");
837        assert_abs_diff_eq!(q_type8, 4.066666666666666, epsilon = 1e-10);
838
839        let q_type9 = quantile(&data.view(), 0.4, QuantileInterpolation::NormalUnbiased)
840            .expect("Test/example failed");
841        assert_abs_diff_eq!(q_type9, 4.075, epsilon = 1e-10);
842    }
843
844    #[test]
845    fn test_percentile() {
846        let data = array![1.0, 3.0, 5.0, 7.0, 9.0];
847
848        // Test percentiles
849        let p50 = percentile(&data.view(), 50.0, QuantileInterpolation::Linear)
850            .expect("Test/example failed");
851        assert_abs_diff_eq!(p50, 5.0, epsilon = 1e-10);
852
853        let p25 = percentile(&data.view(), 25.0, QuantileInterpolation::Linear)
854            .expect("Test/example failed");
855        assert_abs_diff_eq!(p25, 3.0, epsilon = 1e-10);
856
857        let p75 = percentile(&data.view(), 75.0, QuantileInterpolation::Linear)
858            .expect("Test/example failed");
859        assert_abs_diff_eq!(p75, 7.0, epsilon = 1e-10);
860
861        // Test out-of-range values
862        assert!(percentile(&data.view(), -1.0, QuantileInterpolation::Linear).is_err());
863        assert!(percentile(&data.view(), 101.0, QuantileInterpolation::Linear).is_err());
864    }
865
866    #[test]
867    fn test_quartiles() {
868        let data = array![1.0, 3.0, 5.0, 7.0, 9.0];
869
870        // Test quartiles
871        let q =
872            quartiles(&data.view(), QuantileInterpolation::Linear).expect("Test/example failed");
873        assert_abs_diff_eq!(q[0], 3.0, epsilon = 1e-10); // Q1
874        assert_abs_diff_eq!(q[1], 5.0, epsilon = 1e-10); // Q2 (median)
875        assert_abs_diff_eq!(q[2], 7.0, epsilon = 1e-10); // Q3
876    }
877
878    #[test]
879    fn test_quintiles() {
880        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
881
882        // Test quintiles
883        let q =
884            quintiles(&data.view(), QuantileInterpolation::Linear).expect("Test/example failed");
885        assert_abs_diff_eq!(q[0], 2.8, epsilon = 1e-10); // 20th percentile
886        assert_abs_diff_eq!(q[1], 4.6, epsilon = 1e-10); // 40th percentile
887        assert_abs_diff_eq!(q[2], 6.4, epsilon = 1e-10); // 60th percentile
888        assert_abs_diff_eq!(q[3], 8.2, epsilon = 1e-10); // 80th percentile
889    }
890
891    #[test]
892    fn test_deciles() {
893        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
894
895        // Test deciles
896        let d = deciles(&data.view(), QuantileInterpolation::Linear).expect("Test/example failed");
897        assert_abs_diff_eq!(d[0], 1.9, epsilon = 1e-10); // 10th percentile
898        assert_abs_diff_eq!(d[4], 5.5, epsilon = 1e-10); // 50th percentile (median)
899        assert_abs_diff_eq!(d[8], 9.1, epsilon = 1e-10); // 90th percentile
900    }
901
902    #[test]
903    fn test_boxplot_stats() {
904        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 20.0]; // Note the outlier
905
906        // Test boxplot statistics
907        let (q1, q2, q3, whislo, whishi, outliers) =
908            boxplot_stats(&data.view(), Some(1.5), QuantileInterpolation::Linear)
909                .expect("Test/example failed");
910
911        assert_abs_diff_eq!(q1, 3.25, epsilon = 1e-10); // 25th percentile
912        assert_abs_diff_eq!(q2, 5.5, epsilon = 1e-10); // median
913        assert_abs_diff_eq!(q3, 7.75, epsilon = 1e-10); // 75th percentile
914
915        // IQR = 8.25 - 2.75 = 5.5
916        // Lower whisker limit = 2.75 - 1.5*5.5 = -5.5, so whislo is the minimum value
917        assert_abs_diff_eq!(whislo, 1.0, epsilon = 1e-10);
918
919        // Upper whisker limit = 8.25 + 1.5*5.5 = 16.5, so whishi is the highest value below 16.5
920        assert_abs_diff_eq!(whishi, 9.0, epsilon = 1e-10);
921
922        // Outliers beyond the whiskers
923        assert_eq!(outliers.len(), 1);
924        assert_abs_diff_eq!(outliers[0], 20.0, epsilon = 1e-10);
925    }
926
927    #[test]
928    fn test_winsorized_mean() {
929        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 100.0]; // Note the outlier
930
931        // 10% winsorization (replace 1 lowest and 1 highest values)
932        let mean_10 = winsorized_mean(&data.view(), 0.1).expect("Test/example failed");
933        // This will replace 1.0 with 2.0 and 100.0 with 9.0
934        // New data: [2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0]
935        // Mean = 55/10 = 5.5
936        assert_abs_diff_eq!(mean_10, 5.5, epsilon = 1e-10);
937
938        // 20% winsorization (replace 2 lowest and 2 highest values)
939        let mean_20 = winsorized_mean(&data.view(), 0.2).expect("Test/example failed");
940        // This will replace 1.0, 2.0 with 3.0 and 9.0, 100.0 with 8.0
941        // New data: [3.0, 3.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 8.0, 8.0]
942        // Mean = 55/10 = 5.5
943        assert_abs_diff_eq!(mean_20, 5.5, epsilon = 1e-10);
944
945        // With 0 winsorization, should be the regular mean
946        let mean_0 = winsorized_mean(&data.view(), 0.0).expect("Test/example failed");
947        let expected_mean = data.iter().sum::<f64>() / data.len() as f64;
948        assert_abs_diff_eq!(mean_0, expected_mean, epsilon = 1e-10);
949    }
950
951    #[test]
952    fn test_winsorized_variance() {
953        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 100.0]; // Note the outlier
954
955        // 10% winsorization with ddof=1 (sample variance)
956        let var_10 = winsorized_variance(&data.view(), 0.1, 1).expect("Test/example failed");
957        // Winsorized data: [2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0]
958        // Mean = 5.5
959        // Sum of squared deviations = (2-5.5)^2 + (2-5.5)^2 + ... + (9-5.5)^2 + (9-5.5)^2
960        //                            = 12.25 + 12.25 + 6.25 + 2.25 + 0.25 + 0.25 + 2.25 + 6.25 + 12.25 + 12.25
961        //                            = 64.5
962        // Variance = 66.5 / 9 = 7.38888...
963        assert_abs_diff_eq!(var_10, 7.388888888888889, epsilon = 1e-10);
964
965        // 20% winsorization with ddof=0 (population variance)
966        let var_20 = winsorized_variance(&data.view(), 0.2, 0).expect("Test/example failed");
967        // Winsorized data: [3.0, 3.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 8.0, 8.0]
968        // Mean = 5.5
969        // Sum of squared deviations = (3-5.5)^2 * 3 + (4-5.5)^2 + ... + (8-5.5)^2 * 3
970        //                            = 6.25*3 + 2.25 + 0.25 + 0.25 + 2.25 + 6.25*3
971        //                            = 42.5
972        // Variance = 42.5 / 10 = 4.25
973        assert_abs_diff_eq!(var_20, 4.25, epsilon = 1e-10);
974    }
975}
scirs2_stats/quantile.rs

scirs2_stats/
quantile.rs