ferray_stats/reductions/
quantile.rs

1// ferray-stats: Quantile-based reductions — median, percentile, quantile (REQ-1)
2// Also nanmedian, nanpercentile (REQ-3)
3
4use ferray_core::error::{FerrayError, FerrayResult};
5use ferray_core::{Array, Dimension, Element, IxDyn};
6use num_traits::Float;
7
8use super::{collect_data, make_result, output_shape, reduce_axis_general, validate_axis};
9
10// ---------------------------------------------------------------------------
11// Helpers
12// ---------------------------------------------------------------------------
13
14/// Interpolation method for [`quantile_with_method`] and its `percentile`
15/// / `median` friends.
16///
17/// Matches all 13 NumPy quantile methods (#462, #566). The continuous
18/// methods use the Hyndman-Fan 1996 `(alpha, beta)` parameterization:
19/// `virtual_index = n*q + alpha + q*(1 - alpha - beta) - 1` with
20/// linear interpolation between the two bracketing sorted elements.
21/// The discrete methods compute an integer index via method-specific
22/// rules and return the exact sorted element (no interpolation).
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum QuantileMethod {
25    /// NumPy default. Continuous with `alpha = beta = 1`. Returns
26    /// `lo_val * (1 - frac) + hi_val * frac`.
27    Linear,
28    /// Pick the element at `floor(q * (n - 1))` — the lower of the two
29    /// bracketing sorted elements.
30    Lower,
31    /// Pick the element at `ceil(q * (n - 1))` — the upper of the two
32    /// bracketing sorted elements.
33    Higher,
34    /// Pick the sorted element nearest to `q * (n - 1)`, with ties
35    /// (frac = 0.5) broken to the even index (matches NumPy's
36    /// round-half-to-even convention).
37    Nearest,
38    /// Average of the two bracketing sorted elements: `(lo + hi) / 2`.
39    Midpoint,
40    /// Discrete method, Hyndman definition 1. Returns
41    /// `sorted[ceil(n * q) - 1]` — a step function with jumps at
42    /// `k / n`. NumPy's `'inverted_cdf'`.
43    InvertedCdf,
44    /// Discrete method, Hyndman definition 2. Same as `InvertedCdf`
45    /// except that when `n * q` is an integer the result is the
46    /// average of the two bracketing sorted elements. NumPy's
47    /// `'averaged_inverted_cdf'`.
48    AveragedInvertedCdf,
49    /// Discrete method, Hyndman definition 3. `sorted[k]` where
50    /// `k = round_half_to_even(n*q - 0.5)`. NumPy's
51    /// `'closest_observation'`.
52    ClosestObservation,
53    /// Continuous method, Hyndman definition 4. `alpha = 0`, `beta = 1`.
54    /// NumPy's `'interpolated_inverted_cdf'`.
55    InterpolatedInvertedCdf,
56    /// Continuous method, Hyndman definition 5. `alpha = beta = 0.5`.
57    /// NumPy's `'hazen'`.
58    Hazen,
59    /// Continuous method, Hyndman definition 6. `alpha = beta = 0`.
60    /// NumPy's `'weibull'`.
61    Weibull,
62    /// Continuous method, Hyndman definition 8. `alpha = beta = 1/3`.
63    /// NumPy's `'median_unbiased'`.
64    MedianUnbiased,
65    /// Continuous method, Hyndman definition 9. `alpha = beta = 3/8`.
66    /// NumPy's `'normal_unbiased'`.
67    NormalUnbiased,
68}
69
70/// Compute a Hyndman-Fan virtual index for the continuous quantile
71/// methods. Returns `(lo_i, gamma)` with `lo_i` clamped to `[0, n - 1]`
72/// and `gamma` in `[0, 1]`.
73///
74/// `virtual_index = n * q + alpha + q * (1 - alpha - beta) - 1`
75#[inline]
76fn continuous_vidx<T: Float>(n: usize, q: T, alpha: T, beta: T) -> (usize, T) {
77    let nf = T::from(n).unwrap();
78    let zero = T::zero();
79    let one = T::one();
80    let n_minus_1 = T::from(n - 1).unwrap();
81
82    let vidx = nf * q + alpha + q * (one - alpha - beta) - one;
83
84    // Clamp the virtual index into the addressable range.
85    let vidx_clamped = if vidx < zero {
86        zero
87    } else if vidx > n_minus_1 {
88        n_minus_1
89    } else {
90        vidx
91    };
92
93    let lo = vidx_clamped.floor();
94    let lo_i = lo.to_usize().unwrap_or(0).min(n - 1);
95    let gamma = vidx_clamped - lo;
96    (lo_i, gamma)
97}
98
99/// Round-half-to-even (banker's rounding) for a float.
100#[inline]
101fn round_half_to_even<T: Float>(x: T) -> T {
102    let floor = x.floor();
103    let frac = x - floor;
104    let half = T::from(0.5).unwrap();
105    if frac < half {
106        floor
107    } else if frac > half {
108        floor + T::one()
109    } else {
110        // Exactly 0.5 — pick the even neighbor.
111        let floor_i = floor.to_i64().unwrap_or(0);
112        if floor_i.rem_euclid(2) == 0 {
113            floor
114        } else {
115            floor + T::one()
116        }
117    }
118}
119
120/// Compute `(lo_i, gamma)` for a given quantile method, where the
121/// result of the quantile is `(1 - gamma) * sorted[lo_i] + gamma *
122/// sorted[lo_i + 1]`. For all discrete methods and for integer virtual
123/// indices `gamma = 0`, which short-circuits to `sorted[lo_i]` and
124/// avoids the second-pass scan for `hi_val`.
125fn method_index_and_gamma<T: Float>(n: usize, q: T, method: QuantileMethod) -> (usize, T) {
126    let zero = T::zero();
127    let one = T::one();
128    let half = T::from(0.5).unwrap();
129    let nf = T::from(n).unwrap();
130
131    match method {
132        // --- Continuous methods via (alpha, beta). Linear is (1, 1). ---
133        QuantileMethod::Linear => continuous_vidx(n, q, one, one),
134        QuantileMethod::Weibull => continuous_vidx(n, q, zero, zero),
135        QuantileMethod::Hazen => continuous_vidx(n, q, half, half),
136        QuantileMethod::InterpolatedInvertedCdf => continuous_vidx(n, q, zero, one),
137        QuantileMethod::MedianUnbiased => {
138            let third = T::from(1.0 / 3.0).unwrap();
139            continuous_vidx(n, q, third, third)
140        }
141        QuantileMethod::NormalUnbiased => {
142            let ae = T::from(3.0 / 8.0).unwrap();
143            continuous_vidx(n, q, ae, ae)
144        }
145
146        // --- Old discrete classics: reuse the linear virtual index
147        //     and apply their specific rounding rules.
148        QuantileMethod::Lower => {
149            let vidx = q * T::from(n - 1).unwrap();
150            let lo_i = vidx.floor().to_usize().unwrap_or(0).min(n - 1);
151            (lo_i, zero)
152        }
153        QuantileMethod::Higher => {
154            let vidx = q * T::from(n - 1).unwrap();
155            let lo = vidx.floor();
156            let lo_i = lo.to_usize().unwrap_or(0).min(n - 1);
157            let frac = vidx - lo;
158            // If there's a fractional part, gamma=1 picks sorted[lo_i+1]
159            // exactly. If not, lo_val itself is the ceiling.
160            if frac > zero && lo_i + 1 < n {
161                (lo_i, one)
162            } else {
163                (lo_i, zero)
164            }
165        }
166        QuantileMethod::Nearest => {
167            let vidx = q * T::from(n - 1).unwrap();
168            let lo = vidx.floor();
169            let lo_i = lo.to_usize().unwrap_or(0).min(n - 1);
170            let frac = vidx - lo;
171            if frac < half {
172                (lo_i, zero)
173            } else if frac > half {
174                if lo_i + 1 < n {
175                    (lo_i, one)
176                } else {
177                    (lo_i, zero)
178                }
179            } else {
180                // Tie: round to the even lo_i.
181                if lo_i % 2 == 0 || lo_i + 1 >= n {
182                    (lo_i, zero)
183                } else {
184                    (lo_i, one)
185                }
186            }
187        }
188        QuantileMethod::Midpoint => {
189            let vidx = q * T::from(n - 1).unwrap();
190            let lo = vidx.floor();
191            let lo_i = lo.to_usize().unwrap_or(0).min(n - 1);
192            let frac = vidx - lo;
193            if frac > zero && lo_i + 1 < n {
194                (lo_i, half)
195            } else {
196                (lo_i, zero)
197            }
198        }
199
200        // --- Discrete step-function methods ---
201        QuantileMethod::InvertedCdf => {
202            // k = ceil(n * q) - 1, clamped to [0, n - 1].
203            let nq = nf * q;
204            let k = if nq <= zero {
205                0
206            } else {
207                nq.ceil()
208                    .to_usize()
209                    .unwrap_or(0)
210                    .saturating_sub(1)
211                    .min(n - 1)
212            };
213            (k, zero)
214        }
215        QuantileMethod::AveragedInvertedCdf => {
216            // Same as InvertedCdf for non-integer n*q; for exact
217            // integer n*q the result is (sorted[k-1] + sorted[k]) / 2.
218            let nq = nf * q;
219            let floor_nq = nq.floor();
220            let is_integer = nq == floor_nq;
221            if is_integer && nq > zero && nq < nf {
222                // nq is in (0, n), so k = floor(nq) = floor(nq) - 1 + 1
223                // and we average sorted[k-1] and sorted[k].
224                let k = floor_nq.to_usize().unwrap_or(0);
225                let lo_i = k.saturating_sub(1).min(n - 1);
226                if lo_i + 1 < n {
227                    (lo_i, half)
228                } else {
229                    (lo_i, zero)
230                }
231            } else {
232                // Non-integer, or at the boundary — same as InvertedCdf.
233                let k = if nq <= zero {
234                    0
235                } else {
236                    nq.ceil()
237                        .to_usize()
238                        .unwrap_or(0)
239                        .saturating_sub(1)
240                        .min(n - 1)
241                };
242                (k, zero)
243            }
244        }
245        QuantileMethod::ClosestObservation => {
246            // k = round_half_to_even(n * q - 0.5), clamped to [0, n-1].
247            let nq = nf * q;
248            let adj = nq - half;
249            let rounded = round_half_to_even(adj);
250            let k = if rounded < zero {
251                0
252            } else {
253                rounded.to_usize().unwrap_or(0).min(n - 1)
254            };
255            (k, zero)
256        }
257    }
258}
259
260/// Compute a single quantile value from an unsorted slice using
261/// `select_nth_unstable_by` rather than a full `sort_by`.
262///
263/// The selection algorithm gives an O(n) average-time path (quickselect)
264/// instead of the O(n log n) full sort the previous implementation used
265/// (#175). All 13 NumPy quantile methods are supported: every method
266/// produces a `(lo_i, gamma)` pair via [`method_index_and_gamma`] and
267/// the kernel applies a single uniform interpolation formula.
268///
269/// `data` is consumed: it is partitioned in place so the caller should
270/// pass an owned buffer (or a clone they no longer need).
271fn quantile_select<T: Float>(mut data: Vec<T>, q: T, method: QuantileMethod) -> T {
272    let n = data.len();
273    if n == 0 {
274        return T::nan();
275    }
276    if n == 1 {
277        return data[0];
278    }
279
280    let cmp = |a: &T, b: &T| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
281    let (lo_i, gamma) = method_index_and_gamma(n, q, method);
282
283    // First selection: place the lo_i-th smallest at position lo_i.
284    data.select_nth_unstable_by(lo_i, cmp);
285    let lo_val = data[lo_i];
286
287    // Fast exit: no interpolation needed for discrete methods or
288    // whenever the virtual index landed exactly on an integer position.
289    if gamma == T::zero() || lo_i >= n - 1 {
290        return lo_val;
291    }
292
293    // After the partial select, every element in `data[lo_i + 1..]` is
294    // ordered-after `lo_val`; the smallest of them is the
295    // `(lo_i + 1)`-th smallest element overall, which is the `hi_val`
296    // the interpolation formula needs.
297    let hi_val = data[lo_i + 1..]
298        .iter()
299        .copied()
300        .reduce(|a, b| match cmp(&a, &b) {
301            std::cmp::Ordering::Less | std::cmp::Ordering::Equal => a,
302            std::cmp::Ordering::Greater => b,
303        })
304        .unwrap_or(lo_val);
305
306    (T::one() - gamma) * lo_val + gamma * hi_val
307}
308
309/// Compute quantile on a lane using a caller-chosen interpolation method.
310fn lane_quantile_with_method<T: Float>(lane: &[T], q: T, method: QuantileMethod) -> T {
311    quantile_select(lane.to_vec(), q, method)
312}
313
314/// Compute quantile on a lane, excluding NaNs.
315fn lane_nanquantile<T: Float>(lane: &[T], q: T) -> T {
316    let filtered: Vec<T> = lane.iter().copied().filter(|x| !x.is_nan()).collect();
317    if filtered.is_empty() {
318        return T::nan();
319    }
320    quantile_select(filtered, q, QuantileMethod::Linear)
321}
322
323// ---------------------------------------------------------------------------
324// quantile
325// ---------------------------------------------------------------------------
326
327/// Compute the q-th quantile of array data along a given axis.
328///
329/// `q` must be in \[0, 1\]. Uses linear interpolation (NumPy default method).
330/// Equivalent to `numpy.quantile`. See [`quantile_with_method`] for the
331/// variant that accepts a [`QuantileMethod`] selector (#462).
332pub fn quantile<T, D>(a: &Array<T, D>, q: T, axis: Option<usize>) -> FerrayResult<Array<T, IxDyn>>
333where
334    T: Element + Float,
335    D: Dimension,
336{
337    quantile_with_method(a, q, axis, QuantileMethod::Linear)
338}
339
340/// Compute the q-th quantile of array data along a given axis using a
341/// specific interpolation method.
342///
343/// Equivalent to `numpy.quantile(a, q, axis=axis, method=method)` for the
344/// five classic methods exposed via [`QuantileMethod`]. `q` must be in
345/// \[0, 1\]. Added for #462.
346///
347/// # Errors
348/// - `FerrayError::InvalidValue` if `q` is outside \[0, 1\] or the array
349///   is empty.
350/// - `FerrayError::AxisOutOfBounds` if `axis` is out of range.
351pub fn quantile_with_method<T, D>(
352    a: &Array<T, D>,
353    q: T,
354    axis: Option<usize>,
355    method: QuantileMethod,
356) -> FerrayResult<Array<T, IxDyn>>
357where
358    T: Element + Float,
359    D: Dimension,
360{
361    if q < <T as Element>::zero() || q > <T as Element>::one() {
362        return Err(FerrayError::invalid_value("quantile q must be in [0, 1]"));
363    }
364    if a.is_empty() {
365        return Err(FerrayError::invalid_value(
366            "cannot compute quantile of empty array",
367        ));
368    }
369    let data = collect_data(a);
370    match axis {
371        None => {
372            let val = lane_quantile_with_method(&data, q, method);
373            make_result(&[], vec![val])
374        }
375        Some(ax) => {
376            validate_axis(ax, a.ndim())?;
377            let shape = a.shape();
378            let out_s = output_shape(shape, ax);
379            let result = reduce_axis_general(&data, shape, ax, |lane| {
380                lane_quantile_with_method(lane, q, method)
381            });
382            make_result(&out_s, result)
383        }
384    }
385}
386
387// ---------------------------------------------------------------------------
388// percentile
389// ---------------------------------------------------------------------------
390
391/// Compute the q-th percentile of array data along a given axis.
392///
393/// `q` must be in \[0, 100\]. Uses linear interpolation. See
394/// [`percentile_with_method`] for the variant that accepts a
395/// [`QuantileMethod`] selector.
396///
397/// Equivalent to `numpy.percentile`.
398pub fn percentile<T, D>(a: &Array<T, D>, q: T, axis: Option<usize>) -> FerrayResult<Array<T, IxDyn>>
399where
400    T: Element + Float,
401    D: Dimension,
402{
403    percentile_with_method(a, q, axis, QuantileMethod::Linear)
404}
405
406/// Compute the q-th percentile of array data along a given axis using a
407/// specific interpolation method.
408///
409/// `q` must be in \[0, 100\]. Equivalent to
410/// `numpy.percentile(a, q, axis=axis, method=method)` for the five
411/// classic methods exposed via [`QuantileMethod`].
412pub fn percentile_with_method<T, D>(
413    a: &Array<T, D>,
414    q: T,
415    axis: Option<usize>,
416    method: QuantileMethod,
417) -> FerrayResult<Array<T, IxDyn>>
418where
419    T: Element + Float,
420    D: Dimension,
421{
422    let hundred = T::from(100.0).unwrap();
423    if q < <T as Element>::zero() || q > hundred {
424        return Err(FerrayError::invalid_value(
425            "percentile q must be in [0, 100]",
426        ));
427    }
428    quantile_with_method(a, q / hundred, axis, method)
429}
430
431// ---------------------------------------------------------------------------
432// median
433// ---------------------------------------------------------------------------
434
435/// Compute the median of array elements along a given axis.
436///
437/// Equivalent to `numpy.median`.
438pub fn median<T, D>(a: &Array<T, D>, axis: Option<usize>) -> FerrayResult<Array<T, IxDyn>>
439where
440    T: Element + Float,
441    D: Dimension,
442{
443    let half = T::from(0.5).unwrap();
444    quantile(a, half, axis)
445}
446
447// ---------------------------------------------------------------------------
448// NaN-aware variants
449// ---------------------------------------------------------------------------
450
451/// Median, skipping NaN values.
452///
453/// Equivalent to `numpy.nanmedian`.
454pub fn nanmedian<T, D>(a: &Array<T, D>, axis: Option<usize>) -> FerrayResult<Array<T, IxDyn>>
455where
456    T: Element + Float,
457    D: Dimension,
458{
459    let half = T::from(0.5).unwrap();
460    nanquantile(a, half, axis)
461}
462
463/// Percentile, skipping NaN values.
464///
465/// Equivalent to `numpy.nanpercentile`.
466pub fn nanpercentile<T, D>(
467    a: &Array<T, D>,
468    q: T,
469    axis: Option<usize>,
470) -> FerrayResult<Array<T, IxDyn>>
471where
472    T: Element + Float,
473    D: Dimension,
474{
475    let hundred = T::from(100.0).unwrap();
476    if q < <T as Element>::zero() || q > hundred {
477        return Err(FerrayError::invalid_value(
478            "nanpercentile q must be in [0, 100]",
479        ));
480    }
481    nanquantile(a, q / hundred, axis)
482}
483
484/// Quantile, skipping NaN values. Equivalent to `numpy.nanquantile`
485/// (#93 — was previously private, only accessible indirectly through
486/// `nanmedian`/`nanpercentile`).
487pub fn nanquantile<T, D>(
488    a: &Array<T, D>,
489    q: T,
490    axis: Option<usize>,
491) -> FerrayResult<Array<T, IxDyn>>
492where
493    T: Element + Float,
494    D: Dimension,
495{
496    if q < <T as Element>::zero() || q > <T as Element>::one() {
497        return Err(FerrayError::invalid_value("quantile q must be in [0, 1]"));
498    }
499    if a.is_empty() {
500        return Err(FerrayError::invalid_value(
501            "cannot compute nanquantile of empty array",
502        ));
503    }
504    let data = collect_data(a);
505    match axis {
506        None => {
507            let val = lane_nanquantile(&data, q);
508            make_result(&[], vec![val])
509        }
510        Some(ax) => {
511            validate_axis(ax, a.ndim())?;
512            let shape = a.shape();
513            let out_s = output_shape(shape, ax);
514            let result = reduce_axis_general(&data, shape, ax, |lane| lane_nanquantile(lane, q));
515            make_result(&out_s, result)
516        }
517    }
518}
519
520#[cfg(test)]
521mod tests {
522    use super::*;
523    use ferray_core::Ix1;
524
525    #[test]
526    fn test_median_odd() {
527        let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![5.0, 1.0, 3.0, 2.0, 4.0]).unwrap();
528        let m = median(&a, None).unwrap();
529        assert!((m.iter().next().unwrap() - 3.0).abs() < 1e-12);
530    }
531
532    #[test]
533    fn test_median_even() {
534        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![4.0, 1.0, 3.0, 2.0]).unwrap();
535        let m = median(&a, None).unwrap();
536        assert!((m.iter().next().unwrap() - 2.5).abs() < 1e-12);
537    }
538
539    #[test]
540    fn test_percentile_0_50_100() {
541        let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
542        let p0 = percentile(&a, 0.0, None).unwrap();
543        let p50 = percentile(&a, 50.0, None).unwrap();
544        let p100 = percentile(&a, 100.0, None).unwrap();
545        assert!((p0.iter().next().unwrap() - 1.0).abs() < 1e-12);
546        assert!((p50.iter().next().unwrap() - 3.0).abs() < 1e-12);
547        assert!((p100.iter().next().unwrap() - 5.0).abs() < 1e-12);
548    }
549
550    #[test]
551    fn test_quantile_bounds() {
552        let a = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 2.0, 3.0]).unwrap();
553        assert!(quantile(&a, -0.1, None).is_err());
554        assert!(quantile(&a, 1.1, None).is_err());
555    }
556
557    #[test]
558    fn test_quantile_interpolation() {
559        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
560        let q = quantile(&a, 0.25, None).unwrap();
561        // index = 0.25 * 3 = 0.75, interp between 1.0 and 2.0 -> 1.75
562        assert!((q.iter().next().unwrap() - 1.75).abs() < 1e-12);
563    }
564
565    #[test]
566    fn test_nanmedian() {
567        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, f64::NAN, 3.0, 5.0]).unwrap();
568        let m = nanmedian(&a, None).unwrap();
569        // non-nan sorted: [1, 3, 5], median = 3.0
570        assert!((m.iter().next().unwrap() - 3.0).abs() < 1e-12);
571    }
572
573    #[test]
574    fn test_nanpercentile() {
575        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, f64::NAN, 3.0, 5.0]).unwrap();
576        let p = nanpercentile(&a, 50.0, None).unwrap();
577        assert!((p.iter().next().unwrap() - 3.0).abs() < 1e-12);
578    }
579
580    #[test]
581    fn test_nanmedian_all_nan() {
582        let a = Array::<f64, Ix1>::from_vec(Ix1::new([2]), vec![f64::NAN, f64::NAN]).unwrap();
583        let m = nanmedian(&a, None).unwrap();
584        assert!(m.iter().next().unwrap().is_nan());
585    }
586
587    // ---- quantile interpolation methods (#462) ----
588
589    fn arr_1_5() -> Array<f64, Ix1> {
590        Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap()
591    }
592
593    #[test]
594    fn test_quantile_method_linear_matches_legacy() {
595        // Default quantile uses Linear; explicit Linear must match.
596        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
597        let legacy = quantile(&a, 0.25, None).unwrap();
598        let with_flag = quantile_with_method(&a, 0.25, None, QuantileMethod::Linear).unwrap();
599        assert_eq!(
600            legacy.iter().next().unwrap(),
601            with_flag.iter().next().unwrap()
602        );
603    }
604
605    #[test]
606    fn test_quantile_method_lower() {
607        // n=5, q=0.25 → idx=1.0 → lo_i=1, frac=0.0 (integer index)
608        // All methods agree: result = 2.0
609        let a = arr_1_5();
610        let q = quantile_with_method(&a, 0.25, None, QuantileMethod::Lower).unwrap();
611        assert!((q.iter().next().unwrap() - 2.0).abs() < 1e-12);
612
613        // n=4, q=0.25 → idx=0.75, lo_i=0, frac=0.75 → Lower = lo_val = 1.0
614        let a4 = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
615        let q = quantile_with_method(&a4, 0.25, None, QuantileMethod::Lower).unwrap();
616        assert!((q.iter().next().unwrap() - 1.0).abs() < 1e-12);
617    }
618
619    #[test]
620    fn test_quantile_method_higher() {
621        // n=4, q=0.25 → idx=0.75 → Higher = hi_val = 2.0
622        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
623        let q = quantile_with_method(&a, 0.25, None, QuantileMethod::Higher).unwrap();
624        assert!((q.iter().next().unwrap() - 2.0).abs() < 1e-12);
625    }
626
627    #[test]
628    fn test_quantile_method_nearest_round_down() {
629        // n=4, q=0.2 → idx=0.6, frac=0.6 > 0.5 → pick hi_val (index 1) = 2.0
630        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
631        let q = quantile_with_method(&a, 0.2, None, QuantileMethod::Nearest).unwrap();
632        assert!((q.iter().next().unwrap() - 2.0).abs() < 1e-12);
633
634        // n=4, q=0.1 → idx=0.3, frac=0.3 < 0.5 → pick lo_val (index 0) = 1.0
635        let q2 = quantile_with_method(&a, 0.1, None, QuantileMethod::Nearest).unwrap();
636        assert!((q2.iter().next().unwrap() - 1.0).abs() < 1e-12);
637    }
638
639    #[test]
640    fn test_quantile_method_nearest_tie_even() {
641        // n=5, q=0.125 → idx=0.5, frac=0.5, lo_i=0 (even) → pick lo_val = 1.0
642        let a = arr_1_5();
643        let q = quantile_with_method(&a, 0.125, None, QuantileMethod::Nearest).unwrap();
644        assert!((q.iter().next().unwrap() - 1.0).abs() < 1e-12);
645
646        // n=5, q=0.375 → idx=1.5, frac=0.5, lo_i=1 (odd) → pick hi_val = 3.0
647        let q2 = quantile_with_method(&a, 0.375, None, QuantileMethod::Nearest).unwrap();
648        assert!((q2.iter().next().unwrap() - 3.0).abs() < 1e-12);
649    }
650
651    #[test]
652    fn test_quantile_method_midpoint() {
653        // n=4, q=0.25 → idx=0.75, lo_val=1.0, hi_val=2.0 → midpoint = 1.5
654        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
655        let q = quantile_with_method(&a, 0.25, None, QuantileMethod::Midpoint).unwrap();
656        assert!((q.iter().next().unwrap() - 1.5).abs() < 1e-12);
657
658        // n=4, q=0.75 → idx=2.25, lo_val=3.0, hi_val=4.0 → midpoint = 3.5
659        let q2 = quantile_with_method(&a, 0.75, None, QuantileMethod::Midpoint).unwrap();
660        assert!((q2.iter().next().unwrap() - 3.5).abs() < 1e-12);
661    }
662
663    #[test]
664    fn test_quantile_method_integer_index_all_agree() {
665        // n=5, q=0.5 → idx=2.0, exactly on sorted[2]. All five methods
666        // must return the same value.
667        let a = arr_1_5();
668        let linear = quantile_with_method(&a, 0.5, None, QuantileMethod::Linear).unwrap();
669        let lower = quantile_with_method(&a, 0.5, None, QuantileMethod::Lower).unwrap();
670        let higher = quantile_with_method(&a, 0.5, None, QuantileMethod::Higher).unwrap();
671        let nearest = quantile_with_method(&a, 0.5, None, QuantileMethod::Nearest).unwrap();
672        let midpoint = quantile_with_method(&a, 0.5, None, QuantileMethod::Midpoint).unwrap();
673        let expected = 3.0;
674        assert!((linear.iter().next().unwrap() - expected).abs() < 1e-12);
675        assert!((lower.iter().next().unwrap() - expected).abs() < 1e-12);
676        assert!((higher.iter().next().unwrap() - expected).abs() < 1e-12);
677        assert!((nearest.iter().next().unwrap() - expected).abs() < 1e-12);
678        assert!((midpoint.iter().next().unwrap() - expected).abs() < 1e-12);
679    }
680
681    #[test]
682    fn test_quantile_method_axis_variant() {
683        // Per-row quantile with a non-linear method.
684        use ferray_core::Ix2;
685        let a = Array::<f64, Ix2>::from_vec(
686            Ix2::new([2, 4]),
687            vec![1.0, 2.0, 3.0, 4.0, 10.0, 20.0, 30.0, 40.0],
688        )
689        .unwrap();
690        // q=0.25, n=4, idx=0.75 → Lower picks lo_val (index 0).
691        let r = quantile_with_method(&a, 0.25, Some(1), QuantileMethod::Lower).unwrap();
692        assert_eq!(r.as_slice().unwrap(), &[1.0, 10.0]);
693    }
694
695    #[test]
696    fn test_percentile_with_method_50() {
697        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
698        // q=50 (percentile) → 0.5 (quantile), n=4, idx=1.5
699        // Linear: 2.5, Lower: 2.0, Higher: 3.0, Nearest (tie, lo_i=1 odd): 3.0, Midpoint: 2.5
700        let lin = percentile_with_method(&a, 50.0, None, QuantileMethod::Linear).unwrap();
701        let lo = percentile_with_method(&a, 50.0, None, QuantileMethod::Lower).unwrap();
702        let hi = percentile_with_method(&a, 50.0, None, QuantileMethod::Higher).unwrap();
703        let nr = percentile_with_method(&a, 50.0, None, QuantileMethod::Nearest).unwrap();
704        let mp = percentile_with_method(&a, 50.0, None, QuantileMethod::Midpoint).unwrap();
705        assert!((lin.iter().next().unwrap() - 2.5).abs() < 1e-12);
706        assert!((lo.iter().next().unwrap() - 2.0).abs() < 1e-12);
707        assert!((hi.iter().next().unwrap() - 3.0).abs() < 1e-12);
708        assert!((nr.iter().next().unwrap() - 3.0).abs() < 1e-12);
709        assert!((mp.iter().next().unwrap() - 2.5).abs() < 1e-12);
710    }
711
712    #[test]
713    fn test_percentile_with_method_rejects_out_of_range() {
714        let a = arr_1_5();
715        assert!(percentile_with_method(&a, -1.0, None, QuantileMethod::Linear).is_err());
716        assert!(percentile_with_method(&a, 101.0, None, QuantileMethod::Linear).is_err());
717    }
718
719    // ---- remaining 8 NumPy quantile methods (#566) ----
720    //
721    // Hand-verified expected values come from Hyndman & Fan 1996 / NumPy
722    // source. For continuous methods the virtual index is
723    //   vidx = n*q + alpha + q*(1 - alpha - beta) - 1
724    // and the result is (1 - gamma) * sorted[lo_i] + gamma * sorted[lo_i+1]
725    // with gamma = frac(vidx_clamped), lo_i = floor(vidx_clamped).
726
727    fn arr_1_4() -> Array<f64, Ix1> {
728        Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap()
729    }
730
731    #[test]
732    fn test_quantile_weibull_q_half() {
733        // n=4, q=0.5, alpha=beta=0:
734        //   vidx = 4*0.5 + 0 + 0.5*(1 - 0 - 0) - 1 = 2 + 0.5 - 1 = 1.5
735        //   lo_i=1, gamma=0.5 → 0.5*sorted[1] + 0.5*sorted[2] = 0.5*2 + 0.5*3 = 2.5
736        let a = arr_1_4();
737        let q = quantile_with_method(&a, 0.5, None, QuantileMethod::Weibull).unwrap();
738        assert!((q.iter().next().copied().unwrap() - 2.5).abs() < 1e-12);
739    }
740
741    #[test]
742    fn test_quantile_weibull_q_quarter() {
743        // n=4, q=0.25, alpha=beta=0:
744        //   vidx = 4*0.25 + 0 + 0.25 - 1 = 0.25
745        //   lo_i=0, gamma=0.25 → 0.75*1 + 0.25*2 = 1.25
746        let a = arr_1_4();
747        let q = quantile_with_method(&a, 0.25, None, QuantileMethod::Weibull).unwrap();
748        assert!((q.iter().next().copied().unwrap() - 1.25).abs() < 1e-12);
749    }
750
751    #[test]
752    fn test_quantile_hazen_q_quarter() {
753        // n=4, q=0.25, alpha=beta=0.5:
754        //   vidx = 4*0.25 + 0.5 + 0.25*(1 - 1) - 1 = 1 + 0.5 - 1 = 0.5
755        //   lo_i=0, gamma=0.5 → 0.5*1 + 0.5*2 = 1.5
756        let a = arr_1_4();
757        let q = quantile_with_method(&a, 0.25, None, QuantileMethod::Hazen).unwrap();
758        assert!((q.iter().next().copied().unwrap() - 1.5).abs() < 1e-12);
759    }
760
761    #[test]
762    fn test_quantile_median_unbiased_q_half() {
763        // n=4, q=0.5, alpha=beta=1/3:
764        //   vidx = 4*0.5 + 1/3 + 0.5*(1 - 2/3) - 1
765        //        = 2 + 1/3 + 0.5*(1/3) - 1
766        //        = 2 + 1/3 + 1/6 - 1
767        //        = 2 + 0.5 - 1 = 1.5
768        //   lo_i=1, gamma=0.5 → 2.5 (same as Linear's median for n=4)
769        let a = arr_1_4();
770        let q = quantile_with_method(&a, 0.5, None, QuantileMethod::MedianUnbiased).unwrap();
771        assert!((q.iter().next().copied().unwrap() - 2.5).abs() < 1e-12);
772    }
773
774    #[test]
775    fn test_quantile_normal_unbiased_q_half() {
776        // n=4, q=0.5, alpha=beta=3/8:
777        //   vidx = 4*0.5 + 3/8 + 0.5*(1 - 6/8) - 1
778        //        = 2 + 0.375 + 0.5*0.25 - 1
779        //        = 2 + 0.375 + 0.125 - 1
780        //        = 1.5
781        //   → 2.5 (matches Linear median at n=4)
782        let a = arr_1_4();
783        let q = quantile_with_method(&a, 0.5, None, QuantileMethod::NormalUnbiased).unwrap();
784        assert!((q.iter().next().copied().unwrap() - 2.5).abs() < 1e-12);
785    }
786
787    #[test]
788    fn test_quantile_interpolated_inverted_cdf_q_half() {
789        // n=4, q=0.5, alpha=0, beta=1:
790        //   vidx = 4*0.5 + 0 + 0.5*(1 - 0 - 1) - 1
791        //        = 2 + 0 + 0 - 1 = 1
792        //   lo_i=1, gamma=0 → sorted[1] = 2
793        // This is different from the median-family methods.
794        let a = arr_1_4();
795        let q =
796            quantile_with_method(&a, 0.5, None, QuantileMethod::InterpolatedInvertedCdf).unwrap();
797        assert!((q.iter().next().copied().unwrap() - 2.0).abs() < 1e-12);
798    }
799
800    #[test]
801    fn test_quantile_inverted_cdf_q_half() {
802        // n=4, q=0.5: nq=2, ceil-1=1 → sorted[1] = 2
803        let a = arr_1_4();
804        let q = quantile_with_method(&a, 0.5, None, QuantileMethod::InvertedCdf).unwrap();
805        assert!((q.iter().next().copied().unwrap() - 2.0).abs() < 1e-12);
806    }
807
808    #[test]
809    fn test_quantile_inverted_cdf_step_function() {
810        // n=5, q values straddling the k/n steps:
811        //   q=0.19 → nq=0.95 → ceil-1 = 0 → sorted[0] = 1
812        //   q=0.21 → nq=1.05 → ceil-1 = 1 → sorted[1] = 2
813        let a = arr_1_5();
814        let q1 = quantile_with_method(&a, 0.19, None, QuantileMethod::InvertedCdf).unwrap();
815        assert!((q1.iter().next().copied().unwrap() - 1.0).abs() < 1e-12);
816        let q2 = quantile_with_method(&a, 0.21, None, QuantileMethod::InvertedCdf).unwrap();
817        assert!((q2.iter().next().copied().unwrap() - 2.0).abs() < 1e-12);
818    }
819
820    #[test]
821    fn test_quantile_averaged_inverted_cdf_integer_nq_averages() {
822        // n=4, q=0.5 → nq=2 (integer) → average of sorted[1] and sorted[2]
823        // = 0.5*2 + 0.5*3 = 2.5
824        let a = arr_1_4();
825        let q = quantile_with_method(&a, 0.5, None, QuantileMethod::AveragedInvertedCdf).unwrap();
826        assert!((q.iter().next().copied().unwrap() - 2.5).abs() < 1e-12);
827    }
828
829    #[test]
830    fn test_quantile_averaged_inverted_cdf_non_integer_nq_matches_inverted_cdf() {
831        // n=5, q=0.3 → nq=1.5 (non-integer) → same as InvertedCdf:
832        //   ceil(1.5) - 1 = 1 → sorted[1] = 2
833        let a = arr_1_5();
834        let q1 = quantile_with_method(&a, 0.3, None, QuantileMethod::AveragedInvertedCdf).unwrap();
835        let q2 = quantile_with_method(&a, 0.3, None, QuantileMethod::InvertedCdf).unwrap();
836        assert_eq!(
837            q1.iter().next().copied().unwrap(),
838            q2.iter().next().copied().unwrap()
839        );
840        assert!((q1.iter().next().copied().unwrap() - 2.0).abs() < 1e-12);
841    }
842
843    #[test]
844    fn test_quantile_closest_observation_half_to_even() {
845        // n=4, q=0.5:
846        //   nq=2, adj=1.5, round_half_to_even(1.5) = 2 (2 is even) → k=2
847        //   → sorted[2] = 3
848        let a = arr_1_4();
849        let q = quantile_with_method(&a, 0.5, None, QuantileMethod::ClosestObservation).unwrap();
850        assert!((q.iter().next().copied().unwrap() - 3.0).abs() < 1e-12);
851
852        // n=4, q=0.125:
853        //   nq=0.5, adj=0, round_half_to_even(0) = 0 → k=0 → sorted[0] = 1
854        let q2 = quantile_with_method(&a, 0.125, None, QuantileMethod::ClosestObservation).unwrap();
855        assert!((q2.iter().next().copied().unwrap() - 1.0).abs() < 1e-12);
856    }
857
858    #[test]
859    fn test_quantile_closest_observation_nq_0_875_rounds_up() {
860        // n=4, q=0.875:
861        //   nq=3.5, adj=3, round_half_to_even(3) = 3 → k=3 → sorted[3] = 4
862        let a = arr_1_4();
863        let q = quantile_with_method(&a, 0.875, None, QuantileMethod::ClosestObservation).unwrap();
864        assert!((q.iter().next().copied().unwrap() - 4.0).abs() < 1e-12);
865    }
866
867    #[test]
868    fn test_quantile_continuous_methods_agree_at_q_0_and_q_1() {
869        // At q=0 all continuous methods should return the min; at q=1 all
870        // should return the max (clamping). This is a sanity check that
871        // the virtual-index clamp works in every branch.
872        let a = arr_1_5();
873        let methods = [
874            QuantileMethod::Linear,
875            QuantileMethod::Weibull,
876            QuantileMethod::Hazen,
877            QuantileMethod::InterpolatedInvertedCdf,
878            QuantileMethod::MedianUnbiased,
879            QuantileMethod::NormalUnbiased,
880        ];
881        for &m in &methods {
882            let q0 = quantile_with_method(&a, 0.0, None, m).unwrap();
883            let q1 = quantile_with_method(&a, 1.0, None, m).unwrap();
884            assert!(
885                (q0.iter().next().copied().unwrap() - 1.0).abs() < 1e-12,
886                "method {m:?} at q=0 should be min"
887            );
888            assert!(
889                (q1.iter().next().copied().unwrap() - 5.0).abs() < 1e-12,
890                "method {m:?} at q=1 should be max"
891            );
892        }
893    }
894
895    #[test]
896    fn test_quantile_discrete_methods_agree_at_q_1() {
897        // At q=1.0 every method returns the max.
898        let a = arr_1_5();
899        let methods = [
900            QuantileMethod::InvertedCdf,
901            QuantileMethod::AveragedInvertedCdf,
902            QuantileMethod::ClosestObservation,
903        ];
904        for &m in &methods {
905            let q = quantile_with_method(&a, 1.0, None, m).unwrap();
906            assert!(
907                (q.iter().next().copied().unwrap() - 5.0).abs() < 1e-12,
908                "method {m:?} at q=1 should be max"
909            );
910        }
911    }
912
913    #[test]
914    fn test_quantile_all_13_methods_at_integer_index_agree() {
915        // When the virtual index lands on a real integer position,
916        // EVERY method should return that exact sorted element because
917        // each method's dispatch produces gamma=0 or the continuous
918        // formula yields fractional = 0. On n=5 with q=0.5, the linear
919        // virtual index is exactly 2.0 → sorted[2] = 3.
920        let a = arr_1_5();
921        let all_methods = [
922            QuantileMethod::Linear,
923            QuantileMethod::Lower,
924            QuantileMethod::Higher,
925            QuantileMethod::Nearest,
926            QuantileMethod::Midpoint,
927            QuantileMethod::Weibull,
928            QuantileMethod::Hazen,
929            QuantileMethod::MedianUnbiased,
930            QuantileMethod::NormalUnbiased,
931            // These three use different virtual indices so they MAY
932            // disagree at q=0.5 even for odd n; check them separately.
933        ];
934        for &m in &all_methods {
935            let r = quantile_with_method(&a, 0.5, None, m).unwrap();
936            assert!(
937                (r.iter().next().copied().unwrap() - 3.0).abs() < 1e-12,
938                "method {m:?} at odd n, q=0.5 should be 3.0"
939            );
940        }
941    }
942
943    #[test]
944    fn test_quantile_method_axis_variant_weibull() {
945        use ferray_core::Ix2;
946        // (2, 4) rows; per-row Weibull quantile at q=0.5 on [1,2,3,4]
947        // is 2.5, and on [10,20,30,40] is 25.0.
948        let a = Array::<f64, Ix2>::from_vec(
949            Ix2::new([2, 4]),
950            vec![1.0, 2.0, 3.0, 4.0, 10.0, 20.0, 30.0, 40.0],
951        )
952        .unwrap();
953        let r = quantile_with_method(&a, 0.5, Some(1), QuantileMethod::Weibull).unwrap();
954        assert_eq!(r.shape(), &[2]);
955        let s = r.as_slice().unwrap();
956        assert!((s[0] - 2.5).abs() < 1e-12);
957        assert!((s[1] - 25.0).abs() < 1e-12);
958    }
959}
ferray_stats/reductions/quantile.rs

ferray_stats/reductions/
quantile.rs