ferray_stats/reductions/
quantile.rs

1// ferray-stats: Quantile-based reductions — median, percentile, quantile (REQ-1)
2// Also nanmedian, nanpercentile (REQ-3)
3
4use ferray_core::error::{FerrayError, FerrayResult};
5use ferray_core::{Array, Dimension, Element, IxDyn};
6use num_traits::Float;
7
8use super::{collect_data, make_result, output_shape, reduce_axis_general, validate_axis};
9
10// ---------------------------------------------------------------------------
11// Helpers
12// ---------------------------------------------------------------------------
13
14/// Interpolation method for [`quantile_with_method`] and its `percentile`
15/// / `median` friends.
16///
17/// Matches all 13 `NumPy` quantile methods (#462, #566). The continuous
18/// methods use the Hyndman-Fan 1996 `(alpha, beta)` parameterization:
19/// `virtual_index = n*q + alpha + q*(1 - alpha - beta) - 1` with
20/// linear interpolation between the two bracketing sorted elements.
21/// The discrete methods compute an integer index via method-specific
22/// rules and return the exact sorted element (no interpolation).
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum QuantileMethod {
25    /// `NumPy` default. Continuous with `alpha = beta = 1`. Returns
26    /// `lo_val * (1 - frac) + hi_val * frac`.
27    Linear,
28    /// Pick the element at `floor(q * (n - 1))` — the lower of the two
29    /// bracketing sorted elements.
30    Lower,
31    /// Pick the element at `ceil(q * (n - 1))` — the upper of the two
32    /// bracketing sorted elements.
33    Higher,
34    /// Pick the sorted element nearest to `q * (n - 1)`, with ties
35    /// (frac = 0.5) broken to the even index (matches `NumPy`'s
36    /// round-half-to-even convention).
37    Nearest,
38    /// Average of the two bracketing sorted elements: `(lo + hi) / 2`.
39    Midpoint,
40    /// Discrete method, Hyndman definition 1. Returns
41    /// `sorted[ceil(n * q) - 1]` — a step function with jumps at
42    /// `k / n`. `NumPy`'s `'inverted_cdf'`.
43    InvertedCdf,
44    /// Discrete method, Hyndman definition 2. Same as `InvertedCdf`
45    /// except that when `n * q` is an integer the result is the
46    /// average of the two bracketing sorted elements. `NumPy`'s
47    /// `'averaged_inverted_cdf'`.
48    AveragedInvertedCdf,
49    /// Discrete method, Hyndman definition 3. `sorted[k]` where
50    /// `k = round_half_to_even(n*q - 0.5)`. `NumPy`'s
51    /// `'closest_observation'`.
52    ClosestObservation,
53    /// Continuous method, Hyndman definition 4. `alpha = 0`, `beta = 1`.
54    /// `NumPy`'s `'interpolated_inverted_cdf'`.
55    InterpolatedInvertedCdf,
56    /// Continuous method, Hyndman definition 5. `alpha = beta = 0.5`.
57    /// `NumPy`'s `'hazen'`.
58    Hazen,
59    /// Continuous method, Hyndman definition 6. `alpha = beta = 0`.
60    /// `NumPy`'s `'weibull'`.
61    Weibull,
62    /// Continuous method, Hyndman definition 8. `alpha = beta = 1/3`.
63    /// `NumPy`'s `'median_unbiased'`.
64    MedianUnbiased,
65    /// Continuous method, Hyndman definition 9. `alpha = beta = 3/8`.
66    /// `NumPy`'s `'normal_unbiased'`.
67    NormalUnbiased,
68}
69
70/// Compute a Hyndman-Fan virtual index for the continuous quantile
71/// methods. Returns `(lo_i, gamma)` with `lo_i` clamped to `[0, n - 1]`
72/// and `gamma` in `[0, 1]`.
73///
74/// `virtual_index = n * q + alpha + q * (1 - alpha - beta) - 1`
75#[inline]
76fn continuous_vidx<T: Float>(n: usize, q: T, alpha: T, beta: T) -> (usize, T) {
77    let nf = T::from(n).unwrap();
78    let zero = T::zero();
79    let one = T::one();
80    let n_minus_1 = T::from(n - 1).unwrap();
81
82    let vidx = nf * q + alpha + q * (one - alpha - beta) - one;
83
84    // Clamp the virtual index into the addressable range.
85    let vidx_clamped = if vidx < zero {
86        zero
87    } else if vidx > n_minus_1 {
88        n_minus_1
89    } else {
90        vidx
91    };
92
93    let lo = vidx_clamped.floor();
94    let lo_i = lo.to_usize().unwrap_or(0).min(n - 1);
95    let gamma = vidx_clamped - lo;
96    (lo_i, gamma)
97}
98
99/// Round-half-to-even (banker's rounding) for a float.
100#[inline]
101fn round_half_to_even<T: Float>(x: T) -> T {
102    let floor = x.floor();
103    let frac = x - floor;
104    let half = T::from(0.5).unwrap();
105    if frac < half {
106        floor
107    } else if frac > half {
108        floor + T::one()
109    } else {
110        // Exactly 0.5 — pick the even neighbor.
111        let floor_i = floor.to_i64().unwrap_or(0);
112        if floor_i.rem_euclid(2) == 0 {
113            floor
114        } else {
115            floor + T::one()
116        }
117    }
118}
119
120/// Compute `(lo_i, gamma)` for a given quantile method, where the
121/// result of the quantile is `(1 - gamma) * sorted[lo_i] + gamma *
122/// sorted[lo_i + 1]`. For all discrete methods and for integer virtual
123/// indices `gamma = 0`, which short-circuits to `sorted[lo_i]` and
124/// avoids the second-pass scan for `hi_val`.
125//
126// Each `QuantileMethod` arm reproduces the corresponding NumPy formula
127// verbatim; splitting the dispatch into helpers would scatter the spec
128// across the file and make maintenance harder than the line count.
129#[allow(clippy::too_many_lines)]
130fn method_index_and_gamma<T: Float>(n: usize, q: T, method: QuantileMethod) -> (usize, T) {
131    let zero = T::zero();
132    let one = T::one();
133    let half = T::from(0.5).unwrap();
134    let nf = T::from(n).unwrap();
135
136    match method {
137        // --- Continuous methods via (alpha, beta). Linear is (1, 1). ---
138        QuantileMethod::Linear => continuous_vidx(n, q, one, one),
139        QuantileMethod::Weibull => continuous_vidx(n, q, zero, zero),
140        QuantileMethod::Hazen => continuous_vidx(n, q, half, half),
141        QuantileMethod::InterpolatedInvertedCdf => continuous_vidx(n, q, zero, one),
142        QuantileMethod::MedianUnbiased => {
143            let third = T::from(1.0 / 3.0).unwrap();
144            continuous_vidx(n, q, third, third)
145        }
146        QuantileMethod::NormalUnbiased => {
147            let ae = T::from(3.0 / 8.0).unwrap();
148            continuous_vidx(n, q, ae, ae)
149        }
150
151        // --- Old discrete classics: reuse the linear virtual index
152        //     and apply their specific rounding rules.
153        QuantileMethod::Lower => {
154            let vidx = q * T::from(n - 1).unwrap();
155            let lo_i = vidx.floor().to_usize().unwrap_or(0).min(n - 1);
156            (lo_i, zero)
157        }
158        QuantileMethod::Higher => {
159            let vidx = q * T::from(n - 1).unwrap();
160            let lo = vidx.floor();
161            let lo_i = lo.to_usize().unwrap_or(0).min(n - 1);
162            let frac = vidx - lo;
163            // If there's a fractional part, gamma=1 picks sorted[lo_i+1]
164            // exactly. If not, lo_val itself is the ceiling.
165            if frac > zero && lo_i + 1 < n {
166                (lo_i, one)
167            } else {
168                (lo_i, zero)
169            }
170        }
171        QuantileMethod::Nearest => {
172            let vidx = q * T::from(n - 1).unwrap();
173            let lo = vidx.floor();
174            let lo_i = lo.to_usize().unwrap_or(0).min(n - 1);
175            let frac = vidx - lo;
176            if frac < half {
177                (lo_i, zero)
178            } else if frac > half {
179                if lo_i + 1 < n {
180                    (lo_i, one)
181                } else {
182                    (lo_i, zero)
183                }
184            } else {
185                // Tie: round to the even lo_i.
186                if lo_i % 2 == 0 || lo_i + 1 >= n {
187                    (lo_i, zero)
188                } else {
189                    (lo_i, one)
190                }
191            }
192        }
193        QuantileMethod::Midpoint => {
194            let vidx = q * T::from(n - 1).unwrap();
195            let lo = vidx.floor();
196            let lo_i = lo.to_usize().unwrap_or(0).min(n - 1);
197            let frac = vidx - lo;
198            if frac > zero && lo_i + 1 < n {
199                (lo_i, half)
200            } else {
201                (lo_i, zero)
202            }
203        }
204
205        // --- Discrete step-function methods ---
206        QuantileMethod::InvertedCdf => {
207            // k = ceil(n * q) - 1, clamped to [0, n - 1].
208            let nq = nf * q;
209            let k = if nq <= zero {
210                0
211            } else {
212                nq.ceil()
213                    .to_usize()
214                    .unwrap_or(0)
215                    .saturating_sub(1)
216                    .min(n - 1)
217            };
218            (k, zero)
219        }
220        QuantileMethod::AveragedInvertedCdf => {
221            // Same as InvertedCdf for non-integer n*q; for exact
222            // integer n*q the result is (sorted[k-1] + sorted[k]) / 2.
223            let nq = nf * q;
224            let floor_nq = nq.floor();
225            let is_integer = nq == floor_nq;
226            if is_integer && nq > zero && nq < nf {
227                // nq is in (0, n), so k = floor(nq) = floor(nq) - 1 + 1
228                // and we average sorted[k-1] and sorted[k].
229                let k = floor_nq.to_usize().unwrap_or(0);
230                let lo_i = k.saturating_sub(1).min(n - 1);
231                if lo_i + 1 < n {
232                    (lo_i, half)
233                } else {
234                    (lo_i, zero)
235                }
236            } else {
237                // Non-integer, or at the boundary — same as InvertedCdf.
238                let k = if nq <= zero {
239                    0
240                } else {
241                    nq.ceil()
242                        .to_usize()
243                        .unwrap_or(0)
244                        .saturating_sub(1)
245                        .min(n - 1)
246                };
247                (k, zero)
248            }
249        }
250        QuantileMethod::ClosestObservation => {
251            // k = round_half_to_even(n * q - 0.5), clamped to [0, n-1].
252            let nq = nf * q;
253            let adj = nq - half;
254            let rounded = round_half_to_even(adj);
255            let k = if rounded < zero {
256                0
257            } else {
258                rounded.to_usize().unwrap_or(0).min(n - 1)
259            };
260            (k, zero)
261        }
262    }
263}
264
265/// Compute a single quantile value from an unsorted slice using
266/// `select_nth_unstable_by` rather than a full `sort_by`.
267///
268/// The selection algorithm gives an O(n) average-time path (quickselect)
269/// instead of the O(n log n) full sort the previous implementation used
270/// (#175). All 13 `NumPy` quantile methods are supported: every method
271/// produces a `(lo_i, gamma)` pair via [`method_index_and_gamma`] and
272/// the kernel applies a single uniform interpolation formula.
273///
274/// `data` is consumed: it is partitioned in place so the caller should
275/// pass an owned buffer (or a clone they no longer need).
276fn quantile_select<T: Float>(mut data: Vec<T>, q: T, method: QuantileMethod) -> T {
277    let n = data.len();
278    if n == 0 {
279        return T::nan();
280    }
281    if n == 1 {
282        return data[0];
283    }
284
285    let cmp = |a: &T, b: &T| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
286    let (lo_i, gamma) = method_index_and_gamma(n, q, method);
287
288    // First selection: place the lo_i-th smallest at position lo_i.
289    data.select_nth_unstable_by(lo_i, cmp);
290    let lo_val = data[lo_i];
291
292    // Fast exit: no interpolation needed for discrete methods or
293    // whenever the virtual index landed exactly on an integer position.
294    if gamma == T::zero() || lo_i >= n - 1 {
295        return lo_val;
296    }
297
298    // After the partial select, every element in `data[lo_i + 1..]` is
299    // ordered-after `lo_val`; the smallest of them is the
300    // `(lo_i + 1)`-th smallest element overall, which is the `hi_val`
301    // the interpolation formula needs.
302    let hi_val = data[lo_i + 1..]
303        .iter()
304        .copied()
305        .reduce(|a, b| match cmp(&a, &b) {
306            std::cmp::Ordering::Less | std::cmp::Ordering::Equal => a,
307            std::cmp::Ordering::Greater => b,
308        })
309        .unwrap_or(lo_val);
310
311    (T::one() - gamma) * lo_val + gamma * hi_val
312}
313
314/// Compute quantile on a lane using a caller-chosen interpolation method.
315fn lane_quantile_with_method<T: Float>(lane: &[T], q: T, method: QuantileMethod) -> T {
316    quantile_select(lane.to_vec(), q, method)
317}
318
319/// Compute quantile on a lane, excluding NaNs.
320fn lane_nanquantile<T: Float>(lane: &[T], q: T) -> T {
321    let filtered: Vec<T> = lane.iter().copied().filter(|x| !x.is_nan()).collect();
322    if filtered.is_empty() {
323        return T::nan();
324    }
325    quantile_select(filtered, q, QuantileMethod::Linear)
326}
327
328// ---------------------------------------------------------------------------
329// quantile
330// ---------------------------------------------------------------------------
331
332/// Compute the q-th quantile of array data along a given axis.
333///
334/// `q` must be in \[0, 1\]. Uses linear interpolation (`NumPy` default method).
335/// Equivalent to `numpy.quantile`. See [`quantile_with_method`] for the
336/// variant that accepts a [`QuantileMethod`] selector (#462).
337pub fn quantile<T, D>(a: &Array<T, D>, q: T, axis: Option<usize>) -> FerrayResult<Array<T, IxDyn>>
338where
339    T: Element + Float,
340    D: Dimension,
341{
342    quantile_with_method(a, q, axis, QuantileMethod::Linear)
343}
344
345/// Compute the q-th quantile of array data along a given axis using a
346/// specific interpolation method.
347///
348/// Equivalent to `numpy.quantile(a, q, axis=axis, method=method)` for the
349/// five classic methods exposed via [`QuantileMethod`]. `q` must be in
350/// \[0, 1\]. Added for #462.
351///
352/// # Errors
353/// - `FerrayError::InvalidValue` if `q` is outside \[0, 1\] or the array
354///   is empty.
355/// - `FerrayError::AxisOutOfBounds` if `axis` is out of range.
356pub fn quantile_with_method<T, D>(
357    a: &Array<T, D>,
358    q: T,
359    axis: Option<usize>,
360    method: QuantileMethod,
361) -> FerrayResult<Array<T, IxDyn>>
362where
363    T: Element + Float,
364    D: Dimension,
365{
366    if q < <T as Element>::zero() || q > <T as Element>::one() {
367        return Err(FerrayError::invalid_value("quantile q must be in [0, 1]"));
368    }
369    if a.is_empty() {
370        return Err(FerrayError::invalid_value(
371            "cannot compute quantile of empty array",
372        ));
373    }
374    let data = collect_data(a);
375    match axis {
376        None => {
377            let val = lane_quantile_with_method(&data, q, method);
378            make_result(&[], vec![val])
379        }
380        Some(ax) => {
381            validate_axis(ax, a.ndim())?;
382            let shape = a.shape();
383            let out_s = output_shape(shape, ax);
384            let result = reduce_axis_general(&data, shape, ax, |lane| {
385                lane_quantile_with_method(lane, q, method)
386            });
387            make_result(&out_s, result)
388        }
389    }
390}
391
392// ---------------------------------------------------------------------------
393// percentile
394// ---------------------------------------------------------------------------
395
396/// Compute the q-th percentile of array data along a given axis.
397///
398/// `q` must be in \[0, 100\]. Uses linear interpolation. See
399/// [`percentile_with_method`] for the variant that accepts a
400/// [`QuantileMethod`] selector.
401///
402/// Equivalent to `numpy.percentile`.
403pub fn percentile<T, D>(a: &Array<T, D>, q: T, axis: Option<usize>) -> FerrayResult<Array<T, IxDyn>>
404where
405    T: Element + Float,
406    D: Dimension,
407{
408    percentile_with_method(a, q, axis, QuantileMethod::Linear)
409}
410
411/// Compute the q-th percentile of array data along a given axis using a
412/// specific interpolation method.
413///
414/// `q` must be in \[0, 100\]. Equivalent to
415/// `numpy.percentile(a, q, axis=axis, method=method)` for the five
416/// classic methods exposed via [`QuantileMethod`].
417pub fn percentile_with_method<T, D>(
418    a: &Array<T, D>,
419    q: T,
420    axis: Option<usize>,
421    method: QuantileMethod,
422) -> FerrayResult<Array<T, IxDyn>>
423where
424    T: Element + Float,
425    D: Dimension,
426{
427    let hundred = T::from(100.0).unwrap();
428    if q < <T as Element>::zero() || q > hundred {
429        return Err(FerrayError::invalid_value(
430            "percentile q must be in [0, 100]",
431        ));
432    }
433    quantile_with_method(a, q / hundred, axis, method)
434}
435
436// ---------------------------------------------------------------------------
437// median
438// ---------------------------------------------------------------------------
439
440/// Compute the median of array elements along a given axis.
441///
442/// Equivalent to `numpy.median`.
443pub fn median<T, D>(a: &Array<T, D>, axis: Option<usize>) -> FerrayResult<Array<T, IxDyn>>
444where
445    T: Element + Float,
446    D: Dimension,
447{
448    let half = T::from(0.5).unwrap();
449    quantile(a, half, axis)
450}
451
452// ---------------------------------------------------------------------------
453// NaN-aware variants
454// ---------------------------------------------------------------------------
455
456/// Median, skipping NaN values.
457///
458/// Equivalent to `numpy.nanmedian`.
459pub fn nanmedian<T, D>(a: &Array<T, D>, axis: Option<usize>) -> FerrayResult<Array<T, IxDyn>>
460where
461    T: Element + Float,
462    D: Dimension,
463{
464    let half = T::from(0.5).unwrap();
465    nanquantile(a, half, axis)
466}
467
468/// Percentile, skipping NaN values.
469///
470/// Equivalent to `numpy.nanpercentile`.
471pub fn nanpercentile<T, D>(
472    a: &Array<T, D>,
473    q: T,
474    axis: Option<usize>,
475) -> FerrayResult<Array<T, IxDyn>>
476where
477    T: Element + Float,
478    D: Dimension,
479{
480    let hundred = T::from(100.0).unwrap();
481    if q < <T as Element>::zero() || q > hundred {
482        return Err(FerrayError::invalid_value(
483            "nanpercentile q must be in [0, 100]",
484        ));
485    }
486    nanquantile(a, q / hundred, axis)
487}
488
489/// Quantile, skipping NaN values. Equivalent to `numpy.nanquantile`
490/// (#93 — was previously private, only accessible indirectly through
491/// `nanmedian`/`nanpercentile`).
492pub fn nanquantile<T, D>(
493    a: &Array<T, D>,
494    q: T,
495    axis: Option<usize>,
496) -> FerrayResult<Array<T, IxDyn>>
497where
498    T: Element + Float,
499    D: Dimension,
500{
501    if q < <T as Element>::zero() || q > <T as Element>::one() {
502        return Err(FerrayError::invalid_value("quantile q must be in [0, 1]"));
503    }
504    if a.is_empty() {
505        return Err(FerrayError::invalid_value(
506            "cannot compute nanquantile of empty array",
507        ));
508    }
509    let data = collect_data(a);
510    match axis {
511        None => {
512            let val = lane_nanquantile(&data, q);
513            make_result(&[], vec![val])
514        }
515        Some(ax) => {
516            validate_axis(ax, a.ndim())?;
517            let shape = a.shape();
518            let out_s = output_shape(shape, ax);
519            let result = reduce_axis_general(&data, shape, ax, |lane| lane_nanquantile(lane, q));
520            make_result(&out_s, result)
521        }
522    }
523}
524
525#[cfg(test)]
526mod tests {
527    use super::*;
528    use ferray_core::Ix1;
529
530    #[test]
531    fn test_median_odd() {
532        let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![5.0, 1.0, 3.0, 2.0, 4.0]).unwrap();
533        let m = median(&a, None).unwrap();
534        assert!((m.iter().next().unwrap() - 3.0).abs() < 1e-12);
535    }
536
537    #[test]
538    fn test_median_even() {
539        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![4.0, 1.0, 3.0, 2.0]).unwrap();
540        let m = median(&a, None).unwrap();
541        assert!((m.iter().next().unwrap() - 2.5).abs() < 1e-12);
542    }
543
544    #[test]
545    fn test_percentile_0_50_100() {
546        let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
547        let p0 = percentile(&a, 0.0, None).unwrap();
548        let p50 = percentile(&a, 50.0, None).unwrap();
549        let p100 = percentile(&a, 100.0, None).unwrap();
550        assert!((p0.iter().next().unwrap() - 1.0).abs() < 1e-12);
551        assert!((p50.iter().next().unwrap() - 3.0).abs() < 1e-12);
552        assert!((p100.iter().next().unwrap() - 5.0).abs() < 1e-12);
553    }
554
555    #[test]
556    fn test_quantile_bounds() {
557        let a = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 2.0, 3.0]).unwrap();
558        assert!(quantile(&a, -0.1, None).is_err());
559        assert!(quantile(&a, 1.1, None).is_err());
560    }
561
562    #[test]
563    fn test_quantile_interpolation() {
564        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
565        let q = quantile(&a, 0.25, None).unwrap();
566        // index = 0.25 * 3 = 0.75, interp between 1.0 and 2.0 -> 1.75
567        assert!((q.iter().next().unwrap() - 1.75).abs() < 1e-12);
568    }
569
570    #[test]
571    fn test_nanmedian() {
572        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, f64::NAN, 3.0, 5.0]).unwrap();
573        let m = nanmedian(&a, None).unwrap();
574        // non-nan sorted: [1, 3, 5], median = 3.0
575        assert!((m.iter().next().unwrap() - 3.0).abs() < 1e-12);
576    }
577
578    #[test]
579    fn test_nanpercentile() {
580        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, f64::NAN, 3.0, 5.0]).unwrap();
581        let p = nanpercentile(&a, 50.0, None).unwrap();
582        assert!((p.iter().next().unwrap() - 3.0).abs() < 1e-12);
583    }
584
585    #[test]
586    fn test_nanmedian_all_nan() {
587        let a = Array::<f64, Ix1>::from_vec(Ix1::new([2]), vec![f64::NAN, f64::NAN]).unwrap();
588        let m = nanmedian(&a, None).unwrap();
589        assert!(m.iter().next().unwrap().is_nan());
590    }
591
592    // ---- quantile interpolation methods (#462) ----
593
594    fn arr_1_5() -> Array<f64, Ix1> {
595        Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap()
596    }
597
598    #[test]
599    fn test_quantile_method_linear_matches_legacy() {
600        // Default quantile uses Linear; explicit Linear must match.
601        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
602        let legacy = quantile(&a, 0.25, None).unwrap();
603        let with_flag = quantile_with_method(&a, 0.25, None, QuantileMethod::Linear).unwrap();
604        assert_eq!(
605            legacy.iter().next().unwrap(),
606            with_flag.iter().next().unwrap()
607        );
608    }
609
610    #[test]
611    fn test_quantile_method_lower() {
612        // n=5, q=0.25 → idx=1.0 → lo_i=1, frac=0.0 (integer index)
613        // All methods agree: result = 2.0
614        let a = arr_1_5();
615        let q = quantile_with_method(&a, 0.25, None, QuantileMethod::Lower).unwrap();
616        assert!((q.iter().next().unwrap() - 2.0).abs() < 1e-12);
617
618        // n=4, q=0.25 → idx=0.75, lo_i=0, frac=0.75 → Lower = lo_val = 1.0
619        let a4 = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
620        let q = quantile_with_method(&a4, 0.25, None, QuantileMethod::Lower).unwrap();
621        assert!((q.iter().next().unwrap() - 1.0).abs() < 1e-12);
622    }
623
624    #[test]
625    fn test_quantile_method_higher() {
626        // n=4, q=0.25 → idx=0.75 → Higher = hi_val = 2.0
627        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
628        let q = quantile_with_method(&a, 0.25, None, QuantileMethod::Higher).unwrap();
629        assert!((q.iter().next().unwrap() - 2.0).abs() < 1e-12);
630    }
631
632    #[test]
633    fn test_quantile_method_nearest_round_down() {
634        // n=4, q=0.2 → idx=0.6, frac=0.6 > 0.5 → pick hi_val (index 1) = 2.0
635        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
636        let q = quantile_with_method(&a, 0.2, None, QuantileMethod::Nearest).unwrap();
637        assert!((q.iter().next().unwrap() - 2.0).abs() < 1e-12);
638
639        // n=4, q=0.1 → idx=0.3, frac=0.3 < 0.5 → pick lo_val (index 0) = 1.0
640        let q2 = quantile_with_method(&a, 0.1, None, QuantileMethod::Nearest).unwrap();
641        assert!((q2.iter().next().unwrap() - 1.0).abs() < 1e-12);
642    }
643
644    #[test]
645    fn test_quantile_method_nearest_tie_even() {
646        // n=5, q=0.125 → idx=0.5, frac=0.5, lo_i=0 (even) → pick lo_val = 1.0
647        let a = arr_1_5();
648        let q = quantile_with_method(&a, 0.125, None, QuantileMethod::Nearest).unwrap();
649        assert!((q.iter().next().unwrap() - 1.0).abs() < 1e-12);
650
651        // n=5, q=0.375 → idx=1.5, frac=0.5, lo_i=1 (odd) → pick hi_val = 3.0
652        let q2 = quantile_with_method(&a, 0.375, None, QuantileMethod::Nearest).unwrap();
653        assert!((q2.iter().next().unwrap() - 3.0).abs() < 1e-12);
654    }
655
656    #[test]
657    fn test_quantile_method_midpoint() {
658        // n=4, q=0.25 → idx=0.75, lo_val=1.0, hi_val=2.0 → midpoint = 1.5
659        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
660        let q = quantile_with_method(&a, 0.25, None, QuantileMethod::Midpoint).unwrap();
661        assert!((q.iter().next().unwrap() - 1.5).abs() < 1e-12);
662
663        // n=4, q=0.75 → idx=2.25, lo_val=3.0, hi_val=4.0 → midpoint = 3.5
664        let q2 = quantile_with_method(&a, 0.75, None, QuantileMethod::Midpoint).unwrap();
665        assert!((q2.iter().next().unwrap() - 3.5).abs() < 1e-12);
666    }
667
668    #[test]
669    fn test_quantile_method_integer_index_all_agree() {
670        // n=5, q=0.5 → idx=2.0, exactly on sorted[2]. All five methods
671        // must return the same value.
672        let a = arr_1_5();
673        let linear = quantile_with_method(&a, 0.5, None, QuantileMethod::Linear).unwrap();
674        let lower = quantile_with_method(&a, 0.5, None, QuantileMethod::Lower).unwrap();
675        let higher = quantile_with_method(&a, 0.5, None, QuantileMethod::Higher).unwrap();
676        let nearest = quantile_with_method(&a, 0.5, None, QuantileMethod::Nearest).unwrap();
677        let midpoint = quantile_with_method(&a, 0.5, None, QuantileMethod::Midpoint).unwrap();
678        let expected = 3.0;
679        assert!((linear.iter().next().unwrap() - expected).abs() < 1e-12);
680        assert!((lower.iter().next().unwrap() - expected).abs() < 1e-12);
681        assert!((higher.iter().next().unwrap() - expected).abs() < 1e-12);
682        assert!((nearest.iter().next().unwrap() - expected).abs() < 1e-12);
683        assert!((midpoint.iter().next().unwrap() - expected).abs() < 1e-12);
684    }
685
686    #[test]
687    fn test_quantile_method_axis_variant() {
688        // Per-row quantile with a non-linear method.
689        use ferray_core::Ix2;
690        let a = Array::<f64, Ix2>::from_vec(
691            Ix2::new([2, 4]),
692            vec![1.0, 2.0, 3.0, 4.0, 10.0, 20.0, 30.0, 40.0],
693        )
694        .unwrap();
695        // q=0.25, n=4, idx=0.75 → Lower picks lo_val (index 0).
696        let r = quantile_with_method(&a, 0.25, Some(1), QuantileMethod::Lower).unwrap();
697        assert_eq!(r.as_slice().unwrap(), &[1.0, 10.0]);
698    }
699
700    #[test]
701    fn test_percentile_with_method_50() {
702        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
703        // q=50 (percentile) → 0.5 (quantile), n=4, idx=1.5
704        // Linear: 2.5, Lower: 2.0, Higher: 3.0, Nearest (tie, lo_i=1 odd): 3.0, Midpoint: 2.5
705        let lin = percentile_with_method(&a, 50.0, None, QuantileMethod::Linear).unwrap();
706        let lo = percentile_with_method(&a, 50.0, None, QuantileMethod::Lower).unwrap();
707        let hi = percentile_with_method(&a, 50.0, None, QuantileMethod::Higher).unwrap();
708        let nr = percentile_with_method(&a, 50.0, None, QuantileMethod::Nearest).unwrap();
709        let mp = percentile_with_method(&a, 50.0, None, QuantileMethod::Midpoint).unwrap();
710        assert!((lin.iter().next().unwrap() - 2.5).abs() < 1e-12);
711        assert!((lo.iter().next().unwrap() - 2.0).abs() < 1e-12);
712        assert!((hi.iter().next().unwrap() - 3.0).abs() < 1e-12);
713        assert!((nr.iter().next().unwrap() - 3.0).abs() < 1e-12);
714        assert!((mp.iter().next().unwrap() - 2.5).abs() < 1e-12);
715    }
716
717    #[test]
718    fn test_percentile_with_method_rejects_out_of_range() {
719        let a = arr_1_5();
720        assert!(percentile_with_method(&a, -1.0, None, QuantileMethod::Linear).is_err());
721        assert!(percentile_with_method(&a, 101.0, None, QuantileMethod::Linear).is_err());
722    }
723
724    // ---- remaining 8 NumPy quantile methods (#566) ----
725    //
726    // Hand-verified expected values come from Hyndman & Fan 1996 / NumPy
727    // source. For continuous methods the virtual index is
728    //   vidx = n*q + alpha + q*(1 - alpha - beta) - 1
729    // and the result is (1 - gamma) * sorted[lo_i] + gamma * sorted[lo_i+1]
730    // with gamma = frac(vidx_clamped), lo_i = floor(vidx_clamped).
731
732    fn arr_1_4() -> Array<f64, Ix1> {
733        Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap()
734    }
735
736    #[test]
737    fn test_quantile_weibull_q_half() {
738        // n=4, q=0.5, alpha=beta=0:
739        //   vidx = 4*0.5 + 0 + 0.5*(1 - 0 - 0) - 1 = 2 + 0.5 - 1 = 1.5
740        //   lo_i=1, gamma=0.5 → 0.5*sorted[1] + 0.5*sorted[2] = 0.5*2 + 0.5*3 = 2.5
741        let a = arr_1_4();
742        let q = quantile_with_method(&a, 0.5, None, QuantileMethod::Weibull).unwrap();
743        assert!((q.iter().next().copied().unwrap() - 2.5).abs() < 1e-12);
744    }
745
746    #[test]
747    fn test_quantile_weibull_q_quarter() {
748        // n=4, q=0.25, alpha=beta=0:
749        //   vidx = 4*0.25 + 0 + 0.25 - 1 = 0.25
750        //   lo_i=0, gamma=0.25 → 0.75*1 + 0.25*2 = 1.25
751        let a = arr_1_4();
752        let q = quantile_with_method(&a, 0.25, None, QuantileMethod::Weibull).unwrap();
753        assert!((q.iter().next().copied().unwrap() - 1.25).abs() < 1e-12);
754    }
755
756    #[test]
757    fn test_quantile_hazen_q_quarter() {
758        // n=4, q=0.25, alpha=beta=0.5:
759        //   vidx = 4*0.25 + 0.5 + 0.25*(1 - 1) - 1 = 1 + 0.5 - 1 = 0.5
760        //   lo_i=0, gamma=0.5 → 0.5*1 + 0.5*2 = 1.5
761        let a = arr_1_4();
762        let q = quantile_with_method(&a, 0.25, None, QuantileMethod::Hazen).unwrap();
763        assert!((q.iter().next().copied().unwrap() - 1.5).abs() < 1e-12);
764    }
765
766    #[test]
767    fn test_quantile_median_unbiased_q_half() {
768        // n=4, q=0.5, alpha=beta=1/3:
769        //   vidx = 4*0.5 + 1/3 + 0.5*(1 - 2/3) - 1
770        //        = 2 + 1/3 + 0.5*(1/3) - 1
771        //        = 2 + 1/3 + 1/6 - 1
772        //        = 2 + 0.5 - 1 = 1.5
773        //   lo_i=1, gamma=0.5 → 2.5 (same as Linear's median for n=4)
774        let a = arr_1_4();
775        let q = quantile_with_method(&a, 0.5, None, QuantileMethod::MedianUnbiased).unwrap();
776        assert!((q.iter().next().copied().unwrap() - 2.5).abs() < 1e-12);
777    }
778
779    #[test]
780    fn test_quantile_normal_unbiased_q_half() {
781        // n=4, q=0.5, alpha=beta=3/8:
782        //   vidx = 4*0.5 + 3/8 + 0.5*(1 - 6/8) - 1
783        //        = 2 + 0.375 + 0.5*0.25 - 1
784        //        = 2 + 0.375 + 0.125 - 1
785        //        = 1.5
786        //   → 2.5 (matches Linear median at n=4)
787        let a = arr_1_4();
788        let q = quantile_with_method(&a, 0.5, None, QuantileMethod::NormalUnbiased).unwrap();
789        assert!((q.iter().next().copied().unwrap() - 2.5).abs() < 1e-12);
790    }
791
792    #[test]
793    fn test_quantile_interpolated_inverted_cdf_q_half() {
794        // n=4, q=0.5, alpha=0, beta=1:
795        //   vidx = 4*0.5 + 0 + 0.5*(1 - 0 - 1) - 1
796        //        = 2 + 0 + 0 - 1 = 1
797        //   lo_i=1, gamma=0 → sorted[1] = 2
798        // This is different from the median-family methods.
799        let a = arr_1_4();
800        let q =
801            quantile_with_method(&a, 0.5, None, QuantileMethod::InterpolatedInvertedCdf).unwrap();
802        assert!((q.iter().next().copied().unwrap() - 2.0).abs() < 1e-12);
803    }
804
805    #[test]
806    fn test_quantile_inverted_cdf_q_half() {
807        // n=4, q=0.5: nq=2, ceil-1=1 → sorted[1] = 2
808        let a = arr_1_4();
809        let q = quantile_with_method(&a, 0.5, None, QuantileMethod::InvertedCdf).unwrap();
810        assert!((q.iter().next().copied().unwrap() - 2.0).abs() < 1e-12);
811    }
812
813    #[test]
814    fn test_quantile_inverted_cdf_step_function() {
815        // n=5, q values straddling the k/n steps:
816        //   q=0.19 → nq=0.95 → ceil-1 = 0 → sorted[0] = 1
817        //   q=0.21 → nq=1.05 → ceil-1 = 1 → sorted[1] = 2
818        let a = arr_1_5();
819        let q1 = quantile_with_method(&a, 0.19, None, QuantileMethod::InvertedCdf).unwrap();
820        assert!((q1.iter().next().copied().unwrap() - 1.0).abs() < 1e-12);
821        let q2 = quantile_with_method(&a, 0.21, None, QuantileMethod::InvertedCdf).unwrap();
822        assert!((q2.iter().next().copied().unwrap() - 2.0).abs() < 1e-12);
823    }
824
825    #[test]
826    fn test_quantile_averaged_inverted_cdf_integer_nq_averages() {
827        // n=4, q=0.5 → nq=2 (integer) → average of sorted[1] and sorted[2]
828        // = 0.5*2 + 0.5*3 = 2.5
829        let a = arr_1_4();
830        let q = quantile_with_method(&a, 0.5, None, QuantileMethod::AveragedInvertedCdf).unwrap();
831        assert!((q.iter().next().copied().unwrap() - 2.5).abs() < 1e-12);
832    }
833
834    #[test]
835    fn test_quantile_averaged_inverted_cdf_non_integer_nq_matches_inverted_cdf() {
836        // n=5, q=0.3 → nq=1.5 (non-integer) → same as InvertedCdf:
837        //   ceil(1.5) - 1 = 1 → sorted[1] = 2
838        let a = arr_1_5();
839        let q1 = quantile_with_method(&a, 0.3, None, QuantileMethod::AveragedInvertedCdf).unwrap();
840        let q2 = quantile_with_method(&a, 0.3, None, QuantileMethod::InvertedCdf).unwrap();
841        assert_eq!(
842            q1.iter().next().copied().unwrap(),
843            q2.iter().next().copied().unwrap()
844        );
845        assert!((q1.iter().next().copied().unwrap() - 2.0).abs() < 1e-12);
846    }
847
848    #[test]
849    fn test_quantile_closest_observation_half_to_even() {
850        // n=4, q=0.5:
851        //   nq=2, adj=1.5, round_half_to_even(1.5) = 2 (2 is even) → k=2
852        //   → sorted[2] = 3
853        let a = arr_1_4();
854        let q = quantile_with_method(&a, 0.5, None, QuantileMethod::ClosestObservation).unwrap();
855        assert!((q.iter().next().copied().unwrap() - 3.0).abs() < 1e-12);
856
857        // n=4, q=0.125:
858        //   nq=0.5, adj=0, round_half_to_even(0) = 0 → k=0 → sorted[0] = 1
859        let q2 = quantile_with_method(&a, 0.125, None, QuantileMethod::ClosestObservation).unwrap();
860        assert!((q2.iter().next().copied().unwrap() - 1.0).abs() < 1e-12);
861    }
862
863    #[test]
864    fn test_quantile_closest_observation_nq_0_875_rounds_up() {
865        // n=4, q=0.875:
866        //   nq=3.5, adj=3, round_half_to_even(3) = 3 → k=3 → sorted[3] = 4
867        let a = arr_1_4();
868        let q = quantile_with_method(&a, 0.875, None, QuantileMethod::ClosestObservation).unwrap();
869        assert!((q.iter().next().copied().unwrap() - 4.0).abs() < 1e-12);
870    }
871
872    #[test]
873    fn test_quantile_continuous_methods_agree_at_q_0_and_q_1() {
874        // At q=0 all continuous methods should return the min; at q=1 all
875        // should return the max (clamping). This is a sanity check that
876        // the virtual-index clamp works in every branch.
877        let a = arr_1_5();
878        let methods = [
879            QuantileMethod::Linear,
880            QuantileMethod::Weibull,
881            QuantileMethod::Hazen,
882            QuantileMethod::InterpolatedInvertedCdf,
883            QuantileMethod::MedianUnbiased,
884            QuantileMethod::NormalUnbiased,
885        ];
886        for &m in &methods {
887            let q0 = quantile_with_method(&a, 0.0, None, m).unwrap();
888            let q1 = quantile_with_method(&a, 1.0, None, m).unwrap();
889            assert!(
890                (q0.iter().next().copied().unwrap() - 1.0).abs() < 1e-12,
891                "method {m:?} at q=0 should be min"
892            );
893            assert!(
894                (q1.iter().next().copied().unwrap() - 5.0).abs() < 1e-12,
895                "method {m:?} at q=1 should be max"
896            );
897        }
898    }
899
900    #[test]
901    fn test_quantile_discrete_methods_agree_at_q_1() {
902        // At q=1.0 every method returns the max.
903        let a = arr_1_5();
904        let methods = [
905            QuantileMethod::InvertedCdf,
906            QuantileMethod::AveragedInvertedCdf,
907            QuantileMethod::ClosestObservation,
908        ];
909        for &m in &methods {
910            let q = quantile_with_method(&a, 1.0, None, m).unwrap();
911            assert!(
912                (q.iter().next().copied().unwrap() - 5.0).abs() < 1e-12,
913                "method {m:?} at q=1 should be max"
914            );
915        }
916    }
917
918    #[test]
919    fn test_quantile_all_13_methods_at_integer_index_agree() {
920        // When the virtual index lands on a real integer position,
921        // EVERY method should return that exact sorted element because
922        // each method's dispatch produces gamma=0 or the continuous
923        // formula yields fractional = 0. On n=5 with q=0.5, the linear
924        // virtual index is exactly 2.0 → sorted[2] = 3.
925        let a = arr_1_5();
926        let all_methods = [
927            QuantileMethod::Linear,
928            QuantileMethod::Lower,
929            QuantileMethod::Higher,
930            QuantileMethod::Nearest,
931            QuantileMethod::Midpoint,
932            QuantileMethod::Weibull,
933            QuantileMethod::Hazen,
934            QuantileMethod::MedianUnbiased,
935            QuantileMethod::NormalUnbiased,
936            // These three use different virtual indices so they MAY
937            // disagree at q=0.5 even for odd n; check them separately.
938        ];
939        for &m in &all_methods {
940            let r = quantile_with_method(&a, 0.5, None, m).unwrap();
941            assert!(
942                (r.iter().next().copied().unwrap() - 3.0).abs() < 1e-12,
943                "method {m:?} at odd n, q=0.5 should be 3.0"
944            );
945        }
946    }
947
948    #[test]
949    fn test_quantile_method_axis_variant_weibull() {
950        use ferray_core::Ix2;
951        // (2, 4) rows; per-row Weibull quantile at q=0.5 on [1,2,3,4]
952        // is 2.5, and on [10,20,30,40] is 25.0.
953        let a = Array::<f64, Ix2>::from_vec(
954            Ix2::new([2, 4]),
955            vec![1.0, 2.0, 3.0, 4.0, 10.0, 20.0, 30.0, 40.0],
956        )
957        .unwrap();
958        let r = quantile_with_method(&a, 0.5, Some(1), QuantileMethod::Weibull).unwrap();
959        assert_eq!(r.shape(), &[2]);
960        let s = r.as_slice().unwrap();
961        assert!((s[0] - 2.5).abs() < 1e-12);
962        assert!((s[1] - 25.0).abs() < 1e-12);
963    }
964}
ferray_stats/reductions/quantile.rs

ferray_stats/reductions/
quantile.rs