polars_core/chunked_array/ops/aggregate/
quantile.rs1use num_traits::AsPrimitive;
2use polars_compute::rolling::QuantileMethod;
3#[cfg(feature = "dtype-f16")]
4use polars_utils::float16::pf16;
5
6use super::*;
7
8pub trait QuantileAggSeries {
9 fn median_reduce(&self) -> Scalar;
11
12 fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar>;
14
15 fn quantiles_reduce(&self, _quantiles: &[f64], _method: QuantileMethod)
17 -> PolarsResult<Scalar>;
18}
19
20fn quantile_idx(
22 quantile: f64,
23 length: usize,
24 null_count: usize,
25 method: QuantileMethod,
26) -> (usize, f64, usize) {
27 let nonnull_count = (length - null_count) as f64;
28 let float_idx = (nonnull_count - 1.0) * quantile + null_count as f64;
29 let mut base_idx = match method {
30 QuantileMethod::Nearest => {
31 let idx = float_idx.round() as usize;
32 return (idx, 0.0, idx);
33 },
34 QuantileMethod::Lower | QuantileMethod::Midpoint | QuantileMethod::Linear => {
35 float_idx as usize
36 },
37 QuantileMethod::Higher => float_idx.ceil() as usize,
38 QuantileMethod::Equiprobable => {
39 let idx = ((nonnull_count * quantile).ceil() - 1.0).max(0.0) as usize + null_count;
40 return (idx, 0.0, idx);
41 },
42 };
43
44 base_idx = base_idx.clamp(0, length - 1);
45 let top_idx = f64::ceil(float_idx) as usize;
46
47 (base_idx, float_idx, top_idx)
48}
49
50fn linear_interpol<T: Float>(lower: T, upper: T, idx: usize, float_idx: f64) -> T {
52 if lower == upper {
53 lower
54 } else {
55 let proportion: T = T::from(float_idx).unwrap() - T::from(idx).unwrap();
56 proportion * (upper - lower) + lower
57 }
58}
59fn midpoint_interpol<T: Float>(lower: T, upper: T) -> T {
60 if lower == upper {
61 lower
62 } else {
63 (lower + upper) / (T::one() + T::one())
64 }
65}
66
67fn quantile_slice<T: ToPrimitive + TotalOrd + Copy>(
72 vals: &mut [T],
73 quantile: f64,
74 method: QuantileMethod,
75) -> PolarsResult<Option<f64>> {
76 polars_ensure!((0.0..=1.0).contains(&quantile),
77 ComputeError: "quantile should be between 0.0 and 1.0",
78 );
79 if vals.is_empty() {
80 return Ok(None);
81 }
82 if vals.len() == 1 {
83 return Ok(vals[0].to_f64());
84 }
85 let (idx, float_idx, top_idx) = quantile_idx(quantile, vals.len(), 0, method);
86
87 let (_lhs, lower, rhs) = vals.select_nth_unstable_by(idx, TotalOrd::tot_cmp);
88 if idx == top_idx {
89 Ok(lower.to_f64())
90 } else {
91 match method {
92 QuantileMethod::Midpoint => {
93 let upper = rhs.iter().copied().min_by(TotalOrd::tot_cmp).unwrap();
94 Ok(Some(midpoint_interpol(
95 lower.to_f64().unwrap(),
96 upper.to_f64().unwrap(),
97 )))
98 },
99 QuantileMethod::Linear => {
100 let upper = rhs.iter().copied().min_by(TotalOrd::tot_cmp).unwrap();
101 Ok(linear_interpol(
102 lower.to_f64().unwrap(),
103 upper.to_f64().unwrap(),
104 idx,
105 float_idx,
106 )
107 .to_f64())
108 },
109 _ => Ok(lower.to_f64()),
110 }
111 }
112}
113
114fn quantiles_slice<T: ToPrimitive + TotalOrd + Copy>(
120 vals: &mut [T],
121 quantiles: &[f64],
122 method: QuantileMethod,
123) -> PolarsResult<Vec<Option<f64>>> {
124 for &q in quantiles {
126 polars_ensure!(
127 (0.0..=1.0).contains(&q),
128 ComputeError: "quantile should be between 0.0 and 1.0"
129 );
130 }
131
132 if vals.is_empty() {
133 return Ok(vec![None; quantiles.len()]);
134 }
135 if vals.len() == 1 {
136 let v = vals[0].to_f64();
137 return Ok(vec![v; quantiles.len()]);
138 }
139
140 vals.sort_by(TotalOrd::tot_cmp);
142 let n = vals.len();
143
144 let mut out = Vec::with_capacity(quantiles.len());
145
146 for &q in quantiles {
147 let (idx, float_idx, top_idx) = quantile_idx(q, n, 0, method);
148
149 let lower = vals[idx].to_f64().unwrap();
151
152 let opt = match method {
153 QuantileMethod::Midpoint => {
154 if top_idx == idx {
155 Some(lower)
156 } else {
157 let upper = vals[idx + 1].to_f64().unwrap();
158 midpoint_interpol(lower, upper).to_f64()
159 }
160 },
161 QuantileMethod::Linear => {
162 if top_idx == idx {
163 Some(lower)
164 } else {
165 let upper = vals[idx + 1].to_f64().unwrap();
166 linear_interpol(lower, upper, idx, float_idx).to_f64()
167 }
168 },
169 _ => Some(lower),
170 };
171
172 out.push(opt);
173 }
174
175 Ok(out)
176}
177
178fn generic_quantiles<T>(
180 ca: ChunkedArray<T>,
181 quantiles: &[f64],
182 method: QuantileMethod,
183) -> PolarsResult<Vec<Option<f64>>>
184where
185 T: PolarsNumericType,
186{
187 for &q in quantiles {
189 polars_ensure!(
190 (0.0..=1.0).contains(&q),
191 ComputeError: "`quantile` should be between 0.0 and 1.0",
192 );
193 }
194
195 let null_count = ca.null_count();
196 let length = ca.len();
197
198 if null_count == length {
199 return Ok(vec![None; quantiles.len()]);
200 }
201
202 let sorted = ca.sort(false);
203 let mut out = Vec::with_capacity(quantiles.len());
204
205 for &q in quantiles {
206 let (idx, float_idx, top_idx) = quantile_idx(q, length, null_count, method);
207
208 let lower = sorted.get(idx).map(|v| v.to_f64().unwrap());
209
210 let opt = match method {
211 QuantileMethod::Midpoint => {
212 if top_idx == idx {
213 lower
214 } else {
215 let upper = sorted.get(idx + 1).map(|v| v.to_f64().unwrap());
216 midpoint_interpol(lower.unwrap(), upper.unwrap()).to_f64()
217 }
218 },
219 QuantileMethod::Linear => {
220 if top_idx == idx {
221 lower
222 } else {
223 let upper = sorted.get(idx + 1).map(|v| v.to_f64().unwrap());
224 linear_interpol(lower.unwrap(), upper.unwrap(), idx, float_idx).to_f64()
225 }
226 },
227 _ => lower,
228 };
229
230 out.push(opt);
231 }
232
233 Ok(out)
234}
235
236impl<T> ChunkQuantile<f64> for ChunkedArray<T>
237where
238 T: PolarsIntegerType,
239 T::Native: TotalOrd,
240{
241 fn quantile(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Option<f64>> {
242 if let (Ok(slice), false) = (self.cont_slice(), self.is_sorted_ascending_flag()) {
244 let mut owned = slice.to_vec();
245 quantile_slice(&mut owned, quantile, method)
246 } else {
247 let re_val = generic_quantiles(self.clone(), &[quantile], method)?;
248 Ok(re_val.into_iter().next().unwrap())
249 }
250 }
251
252 fn quantiles(
253 &self,
254 quantiles: &[f64],
255 method: QuantileMethod,
256 ) -> PolarsResult<Vec<Option<f64>>> {
257 if let (Ok(slice), false) = (self.cont_slice(), self.is_sorted_ascending_flag()) {
259 let mut owned = slice.to_vec();
260 quantiles_slice(&mut owned, quantiles, method)
261 } else {
262 generic_quantiles(self.clone(), quantiles, method)
263 }
264 }
265
266 fn median(&self) -> Option<f64> {
267 self.quantile(0.5, QuantileMethod::Linear).unwrap() }
269}
270
271impl<T> ChunkedArray<T>
273where
274 T: PolarsIntegerType,
275 T::Native: TotalOrd,
276{
277 pub(crate) fn quantile_faster(
278 mut self,
279 quantile: f64,
280 method: QuantileMethod,
281 ) -> PolarsResult<Option<f64>> {
282 let is_sorted = self.is_sorted_ascending_flag();
284 if let (Some(slice), false) = (self.cont_slice_mut(), is_sorted) {
285 quantile_slice(slice, quantile, method)
286 } else {
287 self.quantile(quantile, method)
288 }
289 }
290
291 pub(crate) fn median_faster(self) -> Option<f64> {
292 self.quantile_faster(0.5, QuantileMethod::Linear).unwrap()
293 }
294}
295
296macro_rules! impl_chunk_quantile_for_float_chunked {
297 ($T:ty, $CA:ty) => {
298 impl ChunkQuantile<$T> for $CA {
299 fn quantile(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Option<$T>> {
300 let out = if let (Ok(slice), false) =
302 (self.cont_slice(), self.is_sorted_ascending_flag())
303 {
304 let mut owned = slice.to_vec();
305 quantile_slice(&mut owned, quantile, method)
306 } else {
307 let re_val = generic_quantiles(self.clone(), &[quantile], method)?;
308 Ok(re_val.into_iter().next().unwrap())
309 };
310 out.map(|v| v.map(|v| v.as_()))
311 }
312
313 fn quantiles(
314 &self,
315 quantiles: &[f64],
316 method: QuantileMethod,
317 ) -> PolarsResult<Vec<Option<$T>>> {
318 let out = if let (Ok(slice), false) =
320 (self.cont_slice(), self.is_sorted_ascending_flag())
321 {
322 let mut owned = slice.to_vec();
323 quantiles_slice(&mut owned, quantiles, method)
324 } else {
325 generic_quantiles(self.clone(), quantiles, method)
326 };
327
328 out.map(|vec_t| {
329 vec_t
330 .into_iter()
331 .map(|opt| opt.map(|v| AsPrimitive::<$T>::as_(v)))
332 .collect::<Vec<Option<$T>>>()
333 })
334 }
335
336 fn median(&self) -> Option<$T> {
337 self.quantile(0.5, QuantileMethod::Linear).unwrap() }
339 }
340 };
341}
342
343#[cfg(feature = "dtype-f16")]
344impl_chunk_quantile_for_float_chunked!(pf16, Float16Chunked);
345impl_chunk_quantile_for_float_chunked!(f32, Float32Chunked);
346impl_chunk_quantile_for_float_chunked!(f64, Float64Chunked);
347
348macro_rules! impl_float_chunked {
349 ($T:ty, $CA:ty) => {
350 impl $CA {
351 pub(crate) fn quantile_faster(
352 mut self,
353 quantile: f64,
354 method: QuantileMethod,
355 ) -> PolarsResult<Option<$T>> {
356 let is_sorted = self.is_sorted_ascending_flag();
358 if let (Some(slice), false) = (self.cont_slice_mut(), is_sorted) {
359 Ok(quantile_slice(slice, quantile, method)?.map(AsPrimitive::as_))
360 } else {
361 Ok(self.quantile(quantile, method)?.map(AsPrimitive::as_))
362 }
363 }
364
365 pub(crate) fn median_faster(self) -> Option<$T> {
366 self.quantile_faster(0.5.into(), QuantileMethod::Linear)
367 .unwrap()
368 }
369 }
370 };
371}
372
373#[cfg(feature = "dtype-f16")]
374impl_float_chunked!(pf16, Float16Chunked);
375impl_float_chunked!(f32, Float32Chunked);
376impl_float_chunked!(f64, Float64Chunked);
377
378impl ChunkQuantile<String> for StringChunked {}
379impl ChunkQuantile<Series> for ListChunked {}
380#[cfg(feature = "dtype-array")]
381impl ChunkQuantile<Series> for ArrayChunked {}
382#[cfg(feature = "object")]
383impl<T: PolarsObject> ChunkQuantile<Series> for ObjectChunked<T> {}
384impl ChunkQuantile<bool> for BooleanChunked {}