Skip to main content

polars_core/chunked_array/ops/aggregate/
quantile.rs

1use num_traits::AsPrimitive;
2use polars_compute::rolling::QuantileMethod;
3#[cfg(feature = "dtype-f16")]
4use polars_utils::float16::pf16;
5
6use super::*;
7
8pub trait QuantileAggSeries {
9    /// Get the median of the [`ChunkedArray`] as a new [`Series`] of length 1.
10    fn median_reduce(&self) -> Scalar;
11
12    /// Get the quantile of the [`ChunkedArray`] as a new [`Series`] of length 1.
13    fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar>;
14
15    /// Get the quantiles of the [`ChunkedArray`] as a new [`Series`] of same length as quantiles
16    fn quantiles_reduce(&self, _quantiles: &[f64], _method: QuantileMethod)
17    -> PolarsResult<Scalar>;
18}
19
20/// helper
21fn quantile_idx(
22    quantile: f64,
23    length: usize,
24    null_count: usize,
25    method: QuantileMethod,
26) -> (usize, f64, usize) {
27    let nonnull_count = (length - null_count) as f64;
28    let float_idx = (nonnull_count - 1.0) * quantile + null_count as f64;
29    let mut base_idx = match method {
30        QuantileMethod::Nearest => {
31            let idx = float_idx.round() as usize;
32            return (idx, 0.0, idx);
33        },
34        QuantileMethod::Lower | QuantileMethod::Midpoint | QuantileMethod::Linear => {
35            float_idx as usize
36        },
37        QuantileMethod::Higher => float_idx.ceil() as usize,
38        QuantileMethod::Equiprobable => {
39            let idx = ((nonnull_count * quantile).ceil() - 1.0).max(0.0) as usize + null_count;
40            return (idx, 0.0, idx);
41        },
42    };
43
44    base_idx = base_idx.clamp(0, length - 1);
45    let top_idx = f64::ceil(float_idx) as usize;
46
47    (base_idx, float_idx, top_idx)
48}
49
50/// helper
51fn linear_interpol<T: Float>(lower: T, upper: T, idx: usize, float_idx: f64) -> T {
52    if lower == upper {
53        lower
54    } else {
55        let proportion: T = T::from(float_idx).unwrap() - T::from(idx).unwrap();
56        proportion * (upper - lower) + lower
57    }
58}
59fn midpoint_interpol<T: Float>(lower: T, upper: T) -> T {
60    if lower == upper {
61        lower
62    } else {
63        (lower + upper) / (T::one() + T::one())
64    }
65}
66
67// Quickselect algorithm is used when
68//    1. The data is not already sorted
69//    2. We can make a contiguous slice of the data
70//    3. We only need a single quantile
71fn quantile_slice<T: ToPrimitive + TotalOrd + Copy>(
72    vals: &mut [T],
73    quantile: f64,
74    method: QuantileMethod,
75) -> PolarsResult<Option<f64>> {
76    polars_ensure!((0.0..=1.0).contains(&quantile),
77        ComputeError: "quantile should be between 0.0 and 1.0",
78    );
79    if vals.is_empty() {
80        return Ok(None);
81    }
82    if vals.len() == 1 {
83        return Ok(vals[0].to_f64());
84    }
85    let (idx, float_idx, top_idx) = quantile_idx(quantile, vals.len(), 0, method);
86
87    let (_lhs, lower, rhs) = vals.select_nth_unstable_by(idx, TotalOrd::tot_cmp);
88    if idx == top_idx {
89        Ok(lower.to_f64())
90    } else {
91        match method {
92            QuantileMethod::Midpoint => {
93                let upper = rhs.iter().copied().min_by(TotalOrd::tot_cmp).unwrap();
94                Ok(Some(midpoint_interpol(
95                    lower.to_f64().unwrap(),
96                    upper.to_f64().unwrap(),
97                )))
98            },
99            QuantileMethod::Linear => {
100                let upper = rhs.iter().copied().min_by(TotalOrd::tot_cmp).unwrap();
101                Ok(linear_interpol(
102                    lower.to_f64().unwrap(),
103                    upper.to_f64().unwrap(),
104                    idx,
105                    float_idx,
106                )
107                .to_f64())
108            },
109            _ => Ok(lower.to_f64()),
110        }
111    }
112}
113
114// This function is called if multiple quantiles are requested
115// but we are able to make a contiguous slice of the data.
116// Right now, we do the same thing as the generic function: sort once and
117// get all quantiles from the sorted data. But we could consider multi-quickselect
118// algorithms in the future.
119fn quantiles_slice<T: ToPrimitive + TotalOrd + Copy>(
120    vals: &mut [T],
121    quantiles: &[f64],
122    method: QuantileMethod,
123) -> PolarsResult<Vec<Option<f64>>> {
124    // Validate all quantiles
125    for &q in quantiles {
126        polars_ensure!(
127            (0.0..=1.0).contains(&q),
128            ComputeError: "quantile should be between 0.0 and 1.0"
129        );
130    }
131
132    if vals.is_empty() {
133        return Ok(vec![None; quantiles.len()]);
134    }
135    if vals.len() == 1 {
136        let v = vals[0].to_f64();
137        return Ok(vec![v; quantiles.len()]);
138    }
139
140    // For multiple quantiles, just sort once
141    vals.sort_by(TotalOrd::tot_cmp);
142    let n = vals.len();
143
144    let mut out = Vec::with_capacity(quantiles.len());
145
146    for &q in quantiles {
147        let (idx, float_idx, top_idx) = quantile_idx(q, n, 0, method);
148
149        // No nulls here, so unwrap is safe
150        let lower = vals[idx].to_f64().unwrap();
151
152        let opt = match method {
153            QuantileMethod::Midpoint => {
154                if top_idx == idx {
155                    Some(lower)
156                } else {
157                    let upper = vals[idx + 1].to_f64().unwrap();
158                    midpoint_interpol(lower, upper).to_f64()
159                }
160            },
161            QuantileMethod::Linear => {
162                if top_idx == idx {
163                    Some(lower)
164                } else {
165                    let upper = vals[idx + 1].to_f64().unwrap();
166                    linear_interpol(lower, upper, idx, float_idx).to_f64()
167                }
168            },
169            _ => Some(lower),
170        };
171
172        out.push(opt);
173    }
174
175    Ok(out)
176}
177
178// This function is called if data is already sorted or we cannot make a contiguous slice
179fn generic_quantiles<T>(
180    ca: ChunkedArray<T>,
181    quantiles: &[f64],
182    method: QuantileMethod,
183) -> PolarsResult<Vec<Option<f64>>>
184where
185    T: PolarsNumericType,
186{
187    // Validate all quantiles
188    for &q in quantiles {
189        polars_ensure!(
190            (0.0..=1.0).contains(&q),
191            ComputeError: "`quantile` should be between 0.0 and 1.0",
192        );
193    }
194
195    let null_count = ca.null_count();
196    let length = ca.len();
197
198    if null_count == length {
199        return Ok(vec![None; quantiles.len()]);
200    }
201
202    let sorted = ca.sort(false);
203    let mut out = Vec::with_capacity(quantiles.len());
204
205    for &q in quantiles {
206        let (idx, float_idx, top_idx) = quantile_idx(q, length, null_count, method);
207
208        let lower = sorted.get(idx).map(|v| v.to_f64().unwrap());
209
210        let opt = match method {
211            QuantileMethod::Midpoint => {
212                if top_idx == idx {
213                    lower
214                } else {
215                    let upper = sorted.get(idx + 1).map(|v| v.to_f64().unwrap());
216                    midpoint_interpol(lower.unwrap(), upper.unwrap()).to_f64()
217                }
218            },
219            QuantileMethod::Linear => {
220                if top_idx == idx {
221                    lower
222                } else {
223                    let upper = sorted.get(idx + 1).map(|v| v.to_f64().unwrap());
224                    linear_interpol(lower.unwrap(), upper.unwrap(), idx, float_idx).to_f64()
225                }
226            },
227            _ => lower,
228        };
229
230        out.push(opt);
231    }
232
233    Ok(out)
234}
235
236impl<T> ChunkQuantile<f64> for ChunkedArray<T>
237where
238    T: PolarsIntegerType,
239    T::Native: TotalOrd,
240{
241    fn quantile(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Option<f64>> {
242        // in case of sorted data, the sort is free, so don't take quickselect route
243        if let (Ok(slice), false) = (self.cont_slice(), self.is_sorted_ascending_flag()) {
244            let mut owned = slice.to_vec();
245            quantile_slice(&mut owned, quantile, method)
246        } else {
247            let re_val = generic_quantiles(self.clone(), &[quantile], method)?;
248            Ok(re_val.into_iter().next().unwrap())
249        }
250    }
251
252    fn quantiles(
253        &self,
254        quantiles: &[f64],
255        method: QuantileMethod,
256    ) -> PolarsResult<Vec<Option<f64>>> {
257        // in case of sorted data, the sort is free, so don't take quickselect route
258        if let (Ok(slice), false) = (self.cont_slice(), self.is_sorted_ascending_flag()) {
259            let mut owned = slice.to_vec();
260            quantiles_slice(&mut owned, quantiles, method)
261        } else {
262            generic_quantiles(self.clone(), quantiles, method)
263        }
264    }
265
266    fn median(&self) -> Option<f64> {
267        self.quantile(0.5, QuantileMethod::Linear).unwrap() // unwrap fine since quantile in range
268    }
269}
270
271// Version of quantile/median that don't need a memcpy
272impl<T> ChunkedArray<T>
273where
274    T: PolarsIntegerType,
275    T::Native: TotalOrd,
276{
277    pub(crate) fn quantile_faster(
278        mut self,
279        quantile: f64,
280        method: QuantileMethod,
281    ) -> PolarsResult<Option<f64>> {
282        // in case of sorted data, the sort is free, so don't take quickselect route
283        let is_sorted = self.is_sorted_ascending_flag();
284        if let (Some(slice), false) = (self.cont_slice_mut(), is_sorted) {
285            quantile_slice(slice, quantile, method)
286        } else {
287            self.quantile(quantile, method)
288        }
289    }
290
291    pub(crate) fn median_faster(self) -> Option<f64> {
292        self.quantile_faster(0.5, QuantileMethod::Linear).unwrap()
293    }
294}
295
296macro_rules! impl_chunk_quantile_for_float_chunked {
297    ($T:ty, $CA:ty) => {
298        impl ChunkQuantile<$T> for $CA {
299            fn quantile(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Option<$T>> {
300                // in case of sorted data, the sort is free, so don't take quickselect route
301                let out = if let (Ok(slice), false) =
302                    (self.cont_slice(), self.is_sorted_ascending_flag())
303                {
304                    let mut owned = slice.to_vec();
305                    quantile_slice(&mut owned, quantile, method)
306                } else {
307                    let re_val = generic_quantiles(self.clone(), &[quantile], method)?;
308                    Ok(re_val.into_iter().next().unwrap())
309                };
310                out.map(|v| v.map(|v| v.as_()))
311            }
312
313            fn quantiles(
314                &self,
315                quantiles: &[f64],
316                method: QuantileMethod,
317            ) -> PolarsResult<Vec<Option<$T>>> {
318                // in case of sorted data, the sort is free, so don't take quickselect route
319                let out = if let (Ok(slice), false) =
320                    (self.cont_slice(), self.is_sorted_ascending_flag())
321                {
322                    let mut owned = slice.to_vec();
323                    quantiles_slice(&mut owned, quantiles, method)
324                } else {
325                    generic_quantiles(self.clone(), quantiles, method)
326                };
327
328                out.map(|vec_t| {
329                    vec_t
330                        .into_iter()
331                        .map(|opt| opt.map(|v| AsPrimitive::<$T>::as_(v)))
332                        .collect::<Vec<Option<$T>>>()
333                })
334            }
335
336            fn median(&self) -> Option<$T> {
337                self.quantile(0.5, QuantileMethod::Linear).unwrap() // unwrap fine since quantile in range
338            }
339        }
340    };
341}
342
343#[cfg(feature = "dtype-f16")]
344impl_chunk_quantile_for_float_chunked!(pf16, Float16Chunked);
345impl_chunk_quantile_for_float_chunked!(f32, Float32Chunked);
346impl_chunk_quantile_for_float_chunked!(f64, Float64Chunked);
347
348macro_rules! impl_float_chunked {
349    ($T:ty, $CA:ty) => {
350        impl $CA {
351            pub(crate) fn quantile_faster(
352                mut self,
353                quantile: f64,
354                method: QuantileMethod,
355            ) -> PolarsResult<Option<$T>> {
356                // in case of sorted data, the sort is free, so don't take quickselect route
357                let is_sorted = self.is_sorted_ascending_flag();
358                if let (Some(slice), false) = (self.cont_slice_mut(), is_sorted) {
359                    Ok(quantile_slice(slice, quantile, method)?.map(AsPrimitive::as_))
360                } else {
361                    Ok(self.quantile(quantile, method)?.map(AsPrimitive::as_))
362                }
363            }
364
365            pub(crate) fn median_faster(self) -> Option<$T> {
366                self.quantile_faster(0.5.into(), QuantileMethod::Linear)
367                    .unwrap()
368            }
369        }
370    };
371}
372
373#[cfg(feature = "dtype-f16")]
374impl_float_chunked!(pf16, Float16Chunked);
375impl_float_chunked!(f32, Float32Chunked);
376impl_float_chunked!(f64, Float64Chunked);
377
378impl ChunkQuantile<String> for StringChunked {}
379impl ChunkQuantile<Series> for ListChunked {}
380#[cfg(feature = "dtype-array")]
381impl ChunkQuantile<Series> for ArrayChunked {}
382#[cfg(feature = "object")]
383impl<T: PolarsObject> ChunkQuantile<Series> for ObjectChunked<T> {}
384impl ChunkQuantile<bool> for BooleanChunked {}