ndarray_histogram/quantile/
mod.rs

1use self::interpolate::{Interpolate, higher_index, lower_index};
2use crate::errors::QuantileError;
3use crate::errors::{EmptyInput, MinMaxError, MinMaxError::UndefinedOrder};
4use crate::{MaybeNan, MaybeNanExt};
5use ndarray::prelude::*;
6use ndarray::{Data, DataMut, RemoveAxis, Zip};
7use ndarray_slice::Slice1Ext;
8use num_traits::Float;
9use std::{cmp, collections::HashMap, fmt::Debug};
10
11/// Quantile methods for `ArrayBase`.
12pub trait QuantileExt<A, S, D>
13where
14	A: Send,
15	S: Data<Elem = A>,
16	D: Dimension,
17{
18	/// Finds the index of the minimum value of the array.
19	///
20	/// Returns `Err(MinMaxError::UndefinedOrder)` if any of the pairwise
21	/// orderings tested by the function are undefined. (For example, this
22	/// occurs if there are any floating-point NaN values in the array.)
23	///
24	/// Returns `Err(MinMaxError::EmptyInput)` if the array is empty.
25	///
26	/// Even if there are multiple (equal) elements that are minima, only one
27	/// index is returned. (Which one is returned is unspecified and may depend
28	/// on the memory layout of the array.)
29	///
30	/// # Example
31	///
32	/// ```
33	/// use ndarray::array;
34	/// use ndarray_histogram::QuantileExt;
35	///
36	/// let a = array![[1., 3., 5.], [2., 0., 6.]];
37	/// assert_eq!(a.argmin(), Ok((1, 1)));
38	/// ```
39	fn argmin(&self) -> Result<D::Pattern, MinMaxError>
40	where
41		A: PartialOrd + Send;
42
43	/// Finds the index of the minimum value of the array skipping NaN values.
44	///
45	/// Returns `Err(EmptyInput)` if the array is empty or none of the values in the array
46	/// are non-NaN values.
47	///
48	/// Even if there are multiple (equal) elements that are minima, only one
49	/// index is returned. (Which one is returned is unspecified and may depend
50	/// on the memory layout of the array.)
51	///
52	/// # Example
53	///
54	/// ```
55	/// use ndarray::array;
56	/// use ndarray_histogram::QuantileExt;
57	///
58	/// let a = array![[::std::f64::NAN, 3., 5.], [2., 0., 6.]];
59	/// assert_eq!(a.argmin_skipnan(), Ok((1, 1)));
60	/// ```
61	fn argmin_skipnan(&self) -> Result<D::Pattern, EmptyInput>
62	where
63		A: MaybeNan,
64		A::NotNan: Ord + Send;
65
66	/// Finds the elementwise minimum of the array.
67	///
68	/// Returns `Err(MinMaxError::UndefinedOrder)` if any of the pairwise
69	/// orderings tested by the function are undefined. (For example, this
70	/// occurs if there are any floating-point NaN values in the array.)
71	///
72	/// Returns `Err(MinMaxError::EmptyInput)` if the array is empty.
73	///
74	/// Even if there are multiple (equal) elements that are minima, only one
75	/// is returned. (Which one is returned is unspecified and may depend on
76	/// the memory layout of the array.)
77	fn min(&self) -> Result<&A, MinMaxError>
78	where
79		A: PartialOrd + Send;
80
81	/// Finds the elementwise minimum of the array, skipping NaN values.
82	///
83	/// Even if there are multiple (equal) elements that are minima, only one
84	/// is returned. (Which one is returned is unspecified and may depend on
85	/// the memory layout of the array.)
86	///
87	/// **Warning** This method will return a NaN value if none of the values
88	/// in the array are non-NaN values. Note that the NaN value might not be
89	/// in the array.
90	fn min_skipnan(&self) -> &A
91	where
92		A: MaybeNan,
93		A::NotNan: Ord + Send;
94
95	/// Finds the index of the maximum value of the array.
96	///
97	/// Returns `Err(MinMaxError::UndefinedOrder)` if any of the pairwise
98	/// orderings tested by the function are undefined. (For example, this
99	/// occurs if there are any floating-point NaN values in the array.)
100	///
101	/// Returns `Err(MinMaxError::EmptyInput)` if the array is empty.
102	///
103	/// Even if there are multiple (equal) elements that are maxima, only one
104	/// index is returned. (Which one is returned is unspecified and may depend
105	/// on the memory layout of the array.)
106	///
107	/// # Example
108	///
109	/// ```
110	/// use ndarray::array;
111	/// use ndarray_histogram::QuantileExt;
112	///
113	/// let a = array![[1., 3., 7.], [2., 5., 6.]];
114	/// assert_eq!(a.argmax(), Ok((0, 2)));
115	/// ```
116	fn argmax(&self) -> Result<D::Pattern, MinMaxError>
117	where
118		A: PartialOrd + Send;
119
120	/// Finds the index of the maximum value of the array skipping NaN values.
121	///
122	/// Returns `Err(EmptyInput)` if the array is empty or none of the values in the array
123	/// are non-NaN values.
124	///
125	/// Even if there are multiple (equal) elements that are maxima, only one
126	/// index is returned. (Which one is returned is unspecified and may depend
127	/// on the memory layout of the array.)
128	///
129	/// # Example
130	///
131	/// ```
132	/// use ndarray::array;
133	/// use ndarray_histogram::QuantileExt;
134	///
135	/// let a = array![[::std::f64::NAN, 3., 5.], [2., 0., 6.]];
136	/// assert_eq!(a.argmax_skipnan(), Ok((1, 2)));
137	/// ```
138	fn argmax_skipnan(&self) -> Result<D::Pattern, EmptyInput>
139	where
140		A: MaybeNan,
141		A::NotNan: Ord + Send;
142
143	/// Finds the elementwise maximum of the array.
144	///
145	/// Returns `Err(MinMaxError::UndefinedOrder)` if any of the pairwise
146	/// orderings tested by the function are undefined. (For example, this
147	/// occurs if there are any floating-point NaN values in the array.)
148	///
149	/// Returns `Err(EmptyInput)` if the array is empty.
150	///
151	/// Even if there are multiple (equal) elements that are maxima, only one
152	/// is returned. (Which one is returned is unspecified and may depend on
153	/// the memory layout of the array.)
154	fn max(&self) -> Result<&A, MinMaxError>
155	where
156		A: PartialOrd + Send;
157
158	/// Finds the elementwise maximum of the array, skipping NaN values.
159	///
160	/// Even if there are multiple (equal) elements that are maxima, only one
161	/// is returned. (Which one is returned is unspecified and may depend on
162	/// the memory layout of the array.)
163	///
164	/// **Warning** This method will return a NaN value if none of the values
165	/// in the array are non-NaN values. Note that the NaN value might not be
166	/// in the array.
167	fn max_skipnan(&self) -> &A
168	where
169		A: MaybeNan,
170		A::NotNan: Ord + Send;
171
172	/// Return the qth quantile of the data along the specified axis.
173	///
174	/// `q` needs to be a float between 0 and 1, bounds included.
175	/// The qth quantile for a 1-dimensional lane of length `N` is defined
176	/// as the element that would be indexed as `(N-1)q` if the lane were to be sorted
177	/// in increasing order.
178	/// If `(N-1)q` is not an integer the desired quantile lies between
179	/// two data points: we return the lower, nearest, higher or interpolated
180	/// value depending on the `interpolate` strategy.
181	///
182	/// Some examples (`q=0` and `q=1` are considered improper quantiles):
183	///
184	///   - `q=0.` returns the minimum along each 1-dimensional lane;
185	///   - `q=0.5` returns the median along each 1-dimensional lane;
186	///   - `q=1.` returns the maximum along each 1-dimensional lane.
187	///
188	/// The array is shuffled **in place** along each 1-dimensional lane in
189	/// order to produce the required quantile without allocating a copy
190	/// of the original array. Each 1-dimensional lane is shuffled independently
191	/// from the others.
192	/// No assumptions should be made on the ordering of the array elements
193	/// after this computation.
194	///
195	/// Complexity ([quickselect](https://en.wikipedia.org/wiki/Quickselect)):
196	///
197	///   - average case: O(`m`);
198	///   - worst case: O(`m`^2);
199	///
200	/// where `m` is the number of elements in the array.
201	///
202	/// Returns `Err(EmptyInput)` when the specified axis has length 0.
203	///
204	/// Returns `Err(InvalidQuantile(q))` if `q` is not between `0.` and `1.` (inclusive).
205	///
206	/// **Panics** if `axis` is out of bounds.
207	fn quantile_axis_mut<F, I>(
208		&mut self,
209		axis: Axis,
210		q: F,
211		interpolate: &I,
212	) -> Result<Array<A, D::Smaller>, QuantileError<F>>
213	where
214		D: RemoveAxis,
215		A: Ord + Send + Clone,
216		S: DataMut,
217		F: Float + Debug,
218		I: Interpolate<A>;
219
220	/// A bulk version of [`quantile_axis_mut`], optimized to retrieve multiple
221	/// quantiles at once.
222	///
223	/// Returns an `Array`, where subviews along `axis` of the array correspond
224	/// to the elements of `qs`.
225	///
226	/// See [`quantile_axis_mut`] for additional details on quantiles and the algorithm
227	/// used to retrieve them.
228	///
229	/// Returns `Err(EmptyInput)` when the specified axis has length 0.
230	///
231	/// Returns `Err(InvalidQuantile(q))` if any `q` in `qs` is not between `0.` and `1.` (inclusive).
232	///
233	/// **Panics** if `axis` is out of bounds.
234	///
235	/// [`quantile_axis_mut`]: #tymethod.quantile_axis_mut
236	///
237	/// # Example
238	///
239	/// ```rust
240	/// use ndarray::{Axis, array, aview1};
241	/// use ndarray_histogram::{QuantileExt, interpolate::Nearest};
242	///
243	/// let mut data = array![[3, 4, 5], [6, 7, 8]];
244	/// let axis = Axis(1);
245	/// let qs = &[0.3, 0.7];
246	/// let quantiles = data
247	/// 	.quantiles_axis_mut(axis, &aview1(qs), &Nearest)
248	/// 	.unwrap();
249	/// for (&q, quantile) in qs.iter().zip(quantiles.axis_iter(axis)) {
250	/// 	assert_eq!(quantile, data.quantile_axis_mut(axis, q, &Nearest).unwrap());
251	/// }
252	/// ```
253	fn quantiles_axis_mut<S2, F, I>(
254		&mut self,
255		axis: Axis,
256		qs: &ArrayBase<S2, Ix1>,
257		interpolate: &I,
258	) -> Result<Array<A, D>, QuantileError<F>>
259	where
260		D: RemoveAxis,
261		A: Ord + Send + Clone,
262		S: DataMut,
263		S2: Data<Elem = F>,
264		F: Float + Debug,
265		I: Interpolate<A>;
266
267	/// Return the `q`th quantile of the data along the specified axis, skipping NaN values.
268	///
269	/// See [`quantile_axis_mut`](#tymethod.quantile_axis_mut) for details.
270	fn quantile_axis_skipnan_mut<F, I>(
271		&mut self,
272		axis: Axis,
273		q: F,
274		interpolate: &I,
275	) -> Result<Array<A, D::Smaller>, QuantileError<F>>
276	where
277		D: RemoveAxis,
278		A: MaybeNan,
279		A::NotNan: Clone + Ord + Send,
280		S: DataMut,
281		F: Float + Debug,
282		I: Interpolate<A::NotNan>;
283
284	private_decl! {}
285}
286
287impl<A, S, D> QuantileExt<A, S, D> for ArrayBase<S, D>
288where
289	A: Send,
290	S: Data<Elem = A>,
291	D: Dimension,
292{
293	fn argmin(&self) -> Result<D::Pattern, MinMaxError>
294	where
295		A: PartialOrd + Send,
296	{
297		let mut current_min = self.first().ok_or(EmptyInput)?;
298		let mut current_pattern_min = D::zeros(self.ndim()).into_pattern();
299
300		for (pattern, elem) in self.indexed_iter() {
301			if elem.partial_cmp(current_min).ok_or(UndefinedOrder)? == cmp::Ordering::Less {
302				current_pattern_min = pattern;
303				current_min = elem
304			}
305		}
306
307		Ok(current_pattern_min)
308	}
309
310	fn argmin_skipnan(&self) -> Result<D::Pattern, EmptyInput>
311	where
312		A: MaybeNan,
313		A::NotNan: Ord + Send,
314	{
315		let mut pattern_min = D::zeros(self.ndim()).into_pattern();
316		let min = self.indexed_fold_skipnan(None, |current_min, (pattern, elem)| {
317			Some(match current_min {
318				Some(m) if (m <= elem) => m,
319				_ => {
320					pattern_min = pattern;
321					elem
322				}
323			})
324		});
325		if min.is_some() {
326			Ok(pattern_min)
327		} else {
328			Err(EmptyInput)
329		}
330	}
331
332	fn min(&self) -> Result<&A, MinMaxError>
333	where
334		A: PartialOrd + Send,
335	{
336		let first = self.first().ok_or(EmptyInput)?;
337		self.fold(Ok(first), |acc, elem| {
338			let acc = acc?;
339			match elem.partial_cmp(acc).ok_or(UndefinedOrder)? {
340				cmp::Ordering::Less => Ok(elem),
341				_ => Ok(acc),
342			}
343		})
344	}
345
346	fn min_skipnan(&self) -> &A
347	where
348		A: MaybeNan,
349		A::NotNan: Ord + Send,
350	{
351		let first = self.first().and_then(|v| v.try_as_not_nan());
352		A::from_not_nan_ref_opt(self.fold_skipnan(first, |acc, elem| {
353			Some(match acc {
354				Some(acc) => acc.min(elem),
355				None => elem,
356			})
357		}))
358	}
359
360	fn argmax(&self) -> Result<D::Pattern, MinMaxError>
361	where
362		A: PartialOrd + Send,
363	{
364		let mut current_max = self.first().ok_or(EmptyInput)?;
365		let mut current_pattern_max = D::zeros(self.ndim()).into_pattern();
366
367		for (pattern, elem) in self.indexed_iter() {
368			if elem.partial_cmp(current_max).ok_or(UndefinedOrder)? == cmp::Ordering::Greater {
369				current_pattern_max = pattern;
370				current_max = elem
371			}
372		}
373
374		Ok(current_pattern_max)
375	}
376
377	fn argmax_skipnan(&self) -> Result<D::Pattern, EmptyInput>
378	where
379		A: MaybeNan,
380		A::NotNan: Ord + Send,
381	{
382		let mut pattern_max = D::zeros(self.ndim()).into_pattern();
383		let max = self.indexed_fold_skipnan(None, |current_max, (pattern, elem)| {
384			Some(match current_max {
385				Some(m) if m >= elem => m,
386				_ => {
387					pattern_max = pattern;
388					elem
389				}
390			})
391		});
392		if max.is_some() {
393			Ok(pattern_max)
394		} else {
395			Err(EmptyInput)
396		}
397	}
398
399	fn max(&self) -> Result<&A, MinMaxError>
400	where
401		A: PartialOrd + Send,
402	{
403		let first = self.first().ok_or(EmptyInput)?;
404		self.fold(Ok(first), |acc, elem| {
405			let acc = acc?;
406			match elem.partial_cmp(acc).ok_or(UndefinedOrder)? {
407				cmp::Ordering::Greater => Ok(elem),
408				_ => Ok(acc),
409			}
410		})
411	}
412
413	fn max_skipnan(&self) -> &A
414	where
415		A: MaybeNan,
416		A::NotNan: Ord + Send,
417	{
418		let first = self.first().and_then(|v| v.try_as_not_nan());
419		A::from_not_nan_ref_opt(self.fold_skipnan(first, |acc, elem| {
420			Some(match acc {
421				Some(acc) => acc.max(elem),
422				None => elem,
423			})
424		}))
425	}
426
427	fn quantiles_axis_mut<S2, F, I>(
428		&mut self,
429		axis: Axis,
430		qs: &ArrayBase<S2, Ix1>,
431		interpolate: &I,
432	) -> Result<Array<A, D>, QuantileError<F>>
433	where
434		D: RemoveAxis,
435		A: Ord + Send + Clone,
436		S: DataMut,
437		S2: Data<Elem = F>,
438		F: Float + Debug,
439		I: Interpolate<A>,
440	{
441		// Minimize number of type parameters to avoid monomorphization bloat.
442		fn quantiles_axis_mut<A, D, F, I>(
443			mut data: ArrayViewMut<'_, A, D>,
444			axis: Axis,
445			qs: ArrayView1<'_, F>,
446			_interpolate: &I,
447		) -> Result<Array<A, D>, QuantileError<F>>
448		where
449			D: RemoveAxis,
450			A: Ord + Send + Clone,
451			F: Float + Debug,
452			I: Interpolate<A>,
453		{
454			for &q in qs {
455				if !(F::from(0.).unwrap()..=F::from(1.).unwrap()).contains(&q) {
456					return Err(QuantileError::InvalidQuantile(q));
457				}
458			}
459
460			let axis_len = data.len_of(axis);
461			if axis_len == 0 {
462				return Err(QuantileError::EmptyInput);
463			}
464
465			let mut results_shape = data.raw_dim();
466			results_shape[axis.index()] = qs.len();
467			if results_shape.size() == 0 {
468				return Ok(Array::from_shape_vec(results_shape, Vec::new()).unwrap());
469			}
470
471			let mut searched_indexes = Vec::with_capacity(2 * qs.len());
472			for &q in &qs {
473				if I::needs_lower(q, axis_len) {
474					searched_indexes.push(lower_index(q, axis_len));
475				}
476				if I::needs_higher(q, axis_len) {
477					searched_indexes.push(higher_index(q, axis_len));
478				}
479			}
480			let mut indexes = Array1::from_vec(searched_indexes);
481			indexes.sort_unstable();
482			let (indexes, _duplicates) = indexes.partition_dedup();
483
484			let mut results = Array::from_elem(results_shape, data.first().unwrap().clone());
485			Zip::from(results.lanes_mut(axis))
486				.and(data.lanes_mut(axis))
487				.for_each(|mut results, mut data| {
488					#[cfg(feature = "rayon")]
489					let values = {
490						let mut values = Vec::new();
491						data.par_select_many_nth_unstable(&indexes, &mut values);
492						HashMap::<usize, &mut A>::from_iter(
493							indexes.iter().copied().zip(values.into_iter()),
494						)
495					};
496					#[cfg(not(feature = "rayon"))]
497					let values = {
498						let mut values = HashMap::new();
499						data.select_many_nth_unstable(&indexes, &mut values);
500						values
501					};
502					for (result, &q) in results.iter_mut().zip(qs) {
503						let lower = if I::needs_lower(q, axis_len) {
504							Some(values[&lower_index(q, axis_len)].clone())
505						} else {
506							None
507						};
508						let higher = if I::needs_higher(q, axis_len) {
509							Some(values[&higher_index(q, axis_len)].clone())
510						} else {
511							None
512						};
513						*result = I::interpolate(lower, higher, q, axis_len);
514					}
515				});
516			Ok(results)
517		}
518
519		quantiles_axis_mut(self.view_mut(), axis, qs.view(), interpolate)
520	}
521
522	fn quantile_axis_mut<F, I>(
523		&mut self,
524		axis: Axis,
525		q: F,
526		interpolate: &I,
527	) -> Result<Array<A, D::Smaller>, QuantileError<F>>
528	where
529		D: RemoveAxis,
530		A: Ord + Send + Clone,
531		S: DataMut,
532		F: Float + Debug,
533		I: Interpolate<A>,
534	{
535		self.quantiles_axis_mut(axis, &aview1(&[q]), interpolate)
536			.map(|a| a.index_axis_move(axis, 0))
537	}
538
539	fn quantile_axis_skipnan_mut<F, I>(
540		&mut self,
541		axis: Axis,
542		q: F,
543		interpolate: &I,
544	) -> Result<Array<A, D::Smaller>, QuantileError<F>>
545	where
546		D: RemoveAxis,
547		A: MaybeNan,
548		A::NotNan: Clone + Ord + Send,
549		S: DataMut,
550		F: Float + Debug,
551		I: Interpolate<A::NotNan>,
552	{
553		if !(F::from(0.).unwrap()..=F::from(1.).unwrap()).contains(&q) {
554			return Err(QuantileError::InvalidQuantile(q));
555		}
556
557		if self.len_of(axis) == 0 {
558			return Err(QuantileError::EmptyInput);
559		}
560
561		let quantile = self.map_axis_mut(axis, |lane| {
562			let mut not_nan = A::remove_nan_mut(lane);
563			A::from_not_nan_opt(if not_nan.is_empty() {
564				None
565			} else {
566				Some(
567					not_nan
568						.quantile_axis_mut::<F, I>(Axis(0), q, interpolate)
569						.unwrap()
570						.into_scalar(),
571				)
572			})
573		});
574		Ok(quantile)
575	}
576
577	private_impl! {}
578}
579
580/// Quantile methods for 1-D arrays.
581pub trait Quantile1dExt<A, S>
582where
583	A: Send,
584	S: Data<Elem = A>,
585{
586	/// Return the qth quantile of the data.
587	///
588	/// `q` needs to be a float between 0 and 1, bounds included.
589	/// The qth quantile for a 1-dimensional array of length `N` is defined
590	/// as the element that would be indexed as `(N-1)q` if the array were to be sorted
591	/// in increasing order.
592	/// If `(N-1)q` is not an integer the desired quantile lies between
593	/// two data points: we return the lower, nearest, higher or interpolated
594	/// value depending on the `interpolate` strategy.
595	///
596	/// Some examples (`q=0` and `q=1` are considered improper quantiles):
597	///
598	///   - `q=0.` returns the minimum;
599	///   - `q=0.5` returns the median;
600	///   - `q=1.` returns the maximum.
601	///
602	/// The array is shuffled **in place** in order to produce the required quantile
603	/// without allocating a copy.
604	/// No assumptions should be made on the ordering of the array elements
605	/// after this computation.
606	///
607	/// Complexity ([quickselect](https://en.wikipedia.org/wiki/Quickselect)):
608	///
609	///   - average case: O(`m`),
610	///   - worst case: O(`m`^2),
611	///
612	/// where `m` is the number of elements in the array.
613	///
614	/// Returns `Err(EmptyInput)` if the array is empty.
615	///
616	/// Returns `Err(InvalidQuantile(q))` if `q` is not between `0.` and `1.` (inclusive).
617	fn quantile_mut<F, I>(&mut self, q: F, interpolate: &I) -> Result<A, QuantileError<F>>
618	where
619		A: Ord + Send + Clone,
620		S: DataMut,
621		F: Float + Debug,
622		I: Interpolate<A>;
623
624	/// A bulk version of [`quantile_mut`], optimized to retrieve multiple
625	/// quantiles at once.
626	///
627	/// Returns an `Array`, where the elements of the array correspond to the
628	/// elements of `qs`.
629	///
630	/// Returns `Err(EmptyInput)` if the array is empty.
631	///
632	/// Returns `Err(InvalidQuantile(q))` if any `q` in
633	/// `qs` is not between `0.` and `1.` (inclusive).
634	///
635	/// See [`quantile_mut`] for additional details on quantiles and the algorithm
636	/// used to retrieve them.
637	///
638	/// [`quantile_mut`]: #tymethod.quantile_mut
639	fn quantiles_mut<S2, F, I>(
640		&mut self,
641		qs: &ArrayBase<S2, Ix1>,
642		interpolate: &I,
643	) -> Result<Array1<A>, QuantileError<F>>
644	where
645		A: Ord + Send + Clone,
646		S: DataMut,
647		F: Float + Debug,
648		S2: Data<Elem = F>,
649		I: Interpolate<A>;
650
651	private_decl! {}
652}
653
654impl<A, S> Quantile1dExt<A, S> for ArrayBase<S, Ix1>
655where
656	A: Send,
657	S: Data<Elem = A>,
658{
659	fn quantile_mut<F, I>(&mut self, q: F, interpolate: &I) -> Result<A, QuantileError<F>>
660	where
661		A: Ord + Send + Clone,
662		S: DataMut,
663		F: Float + Debug,
664		I: Interpolate<A>,
665	{
666		Ok(self
667			.quantile_axis_mut(Axis(0), q, interpolate)?
668			.into_scalar())
669	}
670
671	fn quantiles_mut<S2, F, I>(
672		&mut self,
673		qs: &ArrayBase<S2, Ix1>,
674		interpolate: &I,
675	) -> Result<Array1<A>, QuantileError<F>>
676	where
677		A: Ord + Send + Clone,
678		S: DataMut,
679		F: Float + Debug,
680		S2: Data<Elem = F>,
681		I: Interpolate<A>,
682	{
683		self.quantiles_axis_mut(Axis(0), qs, interpolate)
684	}
685
686	private_impl! {}
687}
688
689pub mod interpolate;