ndarray_histogram/histogram/
bins.rs

1use ndarray::prelude::*;
2use std::ops::{Index, Range};
3
4#[cfg(feature = "rayon")]
5use rayon::slice::ParallelSliceMut;
6
7/// A sorted collection of type `A` elements used to represent the boundaries of intervals, i.e.
8/// [`Bins`] on a 1-dimensional axis.
9///
10/// **Note** that all intervals are left-closed and right-open. See examples below.
11///
12/// # Examples
13///
14/// ```
15/// use ndarray_histogram::{
16/// 	histogram::{Bins, Edges},
17/// 	o64,
18/// };
19///
20/// let unit_edges = Edges::from(vec![o64(0.), o64(1.)]);
21/// let unit_interval = Bins::new(unit_edges);
22/// // left-closed
23/// assert_eq!(unit_interval.range_of(&o64(0.)).unwrap(), o64(0.)..o64(1.),);
24/// // right-open
25/// assert_eq!(unit_interval.range_of(&o64(1.)), None);
26/// ```
27///
28/// [`Bins`]: struct.Bins.html
29#[derive(Clone, Debug, Eq, PartialEq)]
30pub struct Edges<A: Ord + Send> {
31	edges: Vec<A>,
32}
33
34impl<A: Ord + Send> From<Vec<A>> for Edges<A> {
35	/// Converts a `Vec<A>` into an `Edges<A>`, consuming the edges.
36	/// The vector will be sorted in increasing order using an unstable sorting algorithm, with
37	/// duplicates removed.
38	///
39	/// # Current implementation
40	///
41	/// The current sorting algorithm is the same as [`std::slice::sort_unstable()`][sort],
42	/// which is based on [pattern-defeating quicksort][pdqsort].
43	///
44	/// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not allocate)
45	/// , and O(n log n) worst-case.
46	///
47	/// # Examples
48	///
49	/// ```
50	/// use ndarray::array;
51	/// use ndarray_histogram::histogram::Edges;
52	///
53	/// let edges = Edges::from(array![1, 15, 10, 10, 20]);
54	/// // The array gets sorted!
55	/// assert_eq!(edges[2], 15);
56	/// ```
57	///
58	/// [sort]: https://doc.rust-lang.org/stable/std/primitive.slice.html#method.sort_unstable
59	/// [pdqsort]: https://github.com/orlp/pdqsort
60	fn from(mut edges: Vec<A>) -> Self {
61		// sort the array in-place
62		#[cfg(feature = "rayon")]
63		edges.par_sort_unstable();
64		#[cfg(not(feature = "rayon"))]
65		edges.sort_unstable();
66		// remove duplicates
67		edges.dedup();
68		Edges { edges }
69	}
70}
71
72impl<A: Ord + Send + Clone> From<Array1<A>> for Edges<A> {
73	/// Converts an `Array1<A>` into an `Edges<A>`, consuming the 1-dimensional array.
74	/// The array will be sorted in increasing order using an unstable sorting algorithm, with
75	/// duplicates removed.
76	///
77	/// # Current implementation
78	///
79	/// The current sorting algorithm is the same as [`std::slice::sort_unstable()`][sort],
80	/// which is based on [pattern-defeating quicksort][pdqsort].
81	///
82	/// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not allocate)
83	/// , and O(n log n) worst-case.
84	///
85	/// # Examples
86	///
87	/// ```
88	/// use ndarray_histogram::histogram::Edges;
89	///
90	/// let edges = Edges::from(vec![1, 15, 10, 20]);
91	/// // The vec gets sorted!
92	/// assert_eq!(edges[1], 10);
93	/// ```
94	///
95	/// [sort]: https://doc.rust-lang.org/stable/std/primitive.slice.html#method.sort_unstable
96	/// [pdqsort]: https://github.com/orlp/pdqsort
97	fn from(edges: Array1<A>) -> Self {
98		let edges = edges.to_vec();
99		Self::from(edges)
100	}
101}
102
103impl<A: Ord + Send> Index<usize> for Edges<A> {
104	type Output = A;
105
106	/// Returns a reference to the `i`-th edge in `self`.
107	///
108	/// # Panics
109	///
110	/// Panics if the index `i` is out of bounds.
111	///
112	/// # Examples
113	///
114	/// ```
115	/// use ndarray_histogram::histogram::Edges;
116	///
117	/// let edges = Edges::from(vec![1, 5, 10, 20]);
118	/// assert_eq!(edges[1], 5);
119	/// ```
120	fn index(&self, i: usize) -> &Self::Output {
121		&self.edges[i]
122	}
123}
124
125impl<A: Ord + Send> Edges<A> {
126	/// Returns the number of edges in `self`.
127	///
128	/// # Examples
129	///
130	/// ```
131	/// use ndarray_histogram::{histogram::Edges, o64};
132	///
133	/// let edges = Edges::from(vec![o64(0.), o64(1.), o64(3.)]);
134	/// assert_eq!(edges.len(), 3);
135	/// ```
136	#[must_use]
137	pub fn len(&self) -> usize {
138		self.edges.len()
139	}
140
141	/// Returns `true` if `self` contains no edges.
142	///
143	/// # Examples
144	///
145	/// ```
146	/// use ndarray_histogram::{O64, histogram::Edges, o64};
147	///
148	/// let edges = Edges::<O64>::from(vec![]);
149	/// assert_eq!(edges.is_empty(), true);
150	///
151	/// let edges = Edges::from(vec![o64(0.), o64(2.), o64(5.)]);
152	/// assert_eq!(edges.is_empty(), false);
153	/// ```
154	#[must_use]
155	pub fn is_empty(&self) -> bool {
156		self.edges.is_empty()
157	}
158
159	/// Returns an immutable 1-dimensional array view of edges.
160	///
161	/// # Examples
162	///
163	/// ```
164	/// use ndarray::array;
165	/// use ndarray_histogram::histogram::Edges;
166	///
167	/// let edges = Edges::from(vec![0, 5, 3]);
168	/// assert_eq!(edges.as_array_view(), array![0, 3, 5].view());
169	/// ```
170	#[must_use]
171	pub fn as_array_view(&self) -> ArrayView1<'_, A> {
172		ArrayView1::from(&self.edges)
173	}
174
175	/// Returns indices of two consecutive `edges` in `self`, if the interval they represent
176	/// contains the given `value`, or returns `None` otherwise.
177	///
178	/// That is to say, it returns
179	///
180	///   - `Some((left, right))`,
181	///
182	/// where `left` and `right` are the indices of two consecutive edges in `self` and
183	/// `right == left + 1`, if `self[left] <= value < self[right]` it returns
184	///
185	///   - `None`,
186	///
187	/// else otherwise.
188	///
189	/// # Examples
190	///
191	/// ```
192	/// use ndarray_histogram::histogram::Edges;
193	///
194	/// let edges = Edges::from(vec![0, 2, 3]);
195	/// // `1` is in the interval [0, 2), whose indices are (0, 1)
196	/// assert_eq!(edges.indices_of(&1), Some((0, 1)));
197	/// // `5` is not in any of intervals
198	/// assert_eq!(edges.indices_of(&5), None);
199	/// ```
200	pub fn indices_of(&self, value: &A) -> Option<(usize, usize)> {
201		// binary search for the correct bin
202		let n_edges = self.len();
203		match self.edges.binary_search(value) {
204			Ok(i) if i == n_edges - 1 => None,
205			Ok(i) => Some((i, i + 1)),
206			Err(i) => match i {
207				0 => None,
208				j if j == n_edges => None,
209				j => Some((j - 1, j)),
210			},
211		}
212	}
213
214	/// Returns an iterator over the `edges` in `self`.
215	pub fn iter(&self) -> impl Iterator<Item = &A> {
216		self.edges.iter()
217	}
218}
219
220/// A sorted collection of non-overlapping 1-dimensional intervals.
221///
222/// **Note** that all intervals are left-closed and right-open.
223///
224/// # Examples
225///
226/// ```
227/// use ndarray_histogram::{
228/// 	histogram::{Bins, Edges},
229/// 	o64,
230/// };
231///
232/// let edges = Edges::from(vec![o64(0.), o64(1.), o64(2.)]);
233/// let bins = Bins::new(edges);
234/// // first bin
235/// assert_eq!(
236/// 	bins.index(0),
237/// 	o64(0.)..o64(1.) // o64(1.) is not included in the bin!
238/// );
239/// // second bin
240/// assert_eq!(bins.index(1), o64(1.)..o64(2.));
241/// ```
242#[derive(Clone, Debug, Eq, PartialEq)]
243pub struct Bins<A: Ord + Send> {
244	edges: Edges<A>,
245}
246
247impl<A: Ord + Send> Bins<A> {
248	/// Returns a `Bins` instance where each bin corresponds to two consecutive members of the given
249	/// [`Edges`], consuming the edges.
250	///
251	/// [`Edges`]: struct.Edges.html
252	#[must_use]
253	pub fn new(edges: Edges<A>) -> Self {
254		Bins { edges }
255	}
256
257	/// Returns the number of bins in `self`.
258	///
259	/// # Examples
260	///
261	/// ```
262	/// use ndarray_histogram::{
263	/// 	histogram::{Bins, Edges},
264	/// 	o64,
265	/// };
266	///
267	/// let edges = Edges::from(vec![o64(0.), o64(1.), o64(2.)]);
268	/// let bins = Bins::new(edges);
269	/// assert_eq!(bins.len(), 2);
270	/// ```
271	#[must_use]
272	pub fn len(&self) -> usize {
273		match self.edges.len() {
274			0 => 0,
275			n => n - 1,
276		}
277	}
278
279	/// Returns `true` if the number of bins is zero, i.e. if the number of edges is 0 or 1.
280	///
281	/// # Examples
282	///
283	/// ```
284	/// use ndarray_histogram::{
285	/// 	O64,
286	/// 	histogram::{Bins, Edges},
287	/// 	o64,
288	/// };
289	///
290	/// // At least 2 edges is needed to represent 1 interval
291	/// let edges = Edges::from(vec![o64(0.), o64(1.), o64(3.)]);
292	/// let bins = Bins::new(edges);
293	/// assert_eq!(bins.is_empty(), false);
294	///
295	/// // No valid interval == Empty
296	/// let edges = Edges::<O64>::from(vec![]);
297	/// let bins = Bins::new(edges);
298	/// assert_eq!(bins.is_empty(), true);
299	/// let edges = Edges::from(vec![o64(0.)]);
300	/// let bins = Bins::new(edges);
301	/// assert_eq!(bins.is_empty(), true);
302	/// ```
303	#[must_use]
304	pub fn is_empty(&self) -> bool {
305		self.len() == 0
306	}
307
308	/// Returns the index of the bin in `self` that contains the given `value`,
309	/// or returns `None` if `value` does not belong to any bins in `self`.
310	///
311	/// # Examples
312	///
313	/// Basic usage:
314	///
315	/// ```
316	/// use ndarray_histogram::histogram::{Bins, Edges};
317	///
318	/// let edges = Edges::from(vec![0, 2, 4, 6]);
319	/// let bins = Bins::new(edges);
320	/// let value = 1;
321	/// // The first bin [0, 2) contains `1`
322	/// assert_eq!(bins.index_of(&1), Some(0));
323	/// // No bin contains 100
324	/// assert_eq!(bins.index_of(&100), None)
325	/// ```
326	///
327	/// Chaining [`Bins::index`] and [`Bins::index_of`] to get the boundaries of the bin containing
328	/// the value:
329	///
330	/// ```
331	/// # use ndarray_histogram::histogram::{Edges, Bins};
332	/// # let edges = Edges::from(vec![0, 2, 4, 6]);
333	/// # let bins = Bins::new(edges);
334	/// # let value = 1;
335	/// assert_eq!(
336	/// 	// using `Option::map` to avoid panic on index out-of-bounds
337	/// 	bins.index_of(&1).map(|i| bins.index(i)),
338	/// 	Some(0..2)
339	/// );
340	/// ```
341	pub fn index_of(&self, value: &A) -> Option<usize> {
342		self.edges.indices_of(value).map(|t| t.0)
343	}
344
345	/// Returns a range as the bin which contains the given `value`, or returns `None` otherwise.
346	///
347	/// # Examples
348	///
349	/// ```
350	/// use ndarray_histogram::histogram::{Bins, Edges};
351	///
352	/// let edges = Edges::from(vec![0, 2, 4, 6]);
353	/// let bins = Bins::new(edges);
354	/// // [0, 2) contains `1`
355	/// assert_eq!(bins.range_of(&1), Some(0..2));
356	/// // `10` is not in any interval
357	/// assert_eq!(bins.range_of(&10), None);
358	/// ```
359	pub fn range_of(&self, value: &A) -> Option<Range<A>>
360	where
361		A: Clone,
362	{
363		let edges_indexes = self.edges.indices_of(value);
364		edges_indexes.map(|(left, right)| Range {
365			start: self.edges[left].clone(),
366			end: self.edges[right].clone(),
367		})
368	}
369
370	/// Returns a range as the bin at the given `index` position.
371	///
372	/// # Panics
373	///
374	/// Panics if `index` is out of bounds.
375	///
376	/// # Examples
377	///
378	/// ```
379	/// use ndarray_histogram::histogram::{Bins, Edges};
380	///
381	/// let edges = Edges::from(vec![1, 5, 10, 20]);
382	/// let bins = Bins::new(edges);
383	/// assert_eq!(bins.index(1), 5..10);
384	/// ```
385	#[must_use]
386	pub fn index(&self, index: usize) -> Range<A>
387	where
388		A: Clone,
389	{
390		// It was not possible to implement this functionality
391		// using the `Index` trait unless we were willing to
392		// allocate a `Vec<Range<A>>` in the struct.
393		// Index, in fact, forces you to return a reference.
394		Range {
395			start: self.edges[index].clone(),
396			end: self.edges[index + 1].clone(),
397		}
398	}
399}
400
401#[cfg(test)]
402mod edges_tests {
403	use super::{Array1, Edges};
404	use quickcheck_macros::quickcheck;
405	use std::collections::BTreeSet;
406
407	#[quickcheck]
408	fn check_sorted_from_vec(v: Vec<i32>) -> bool {
409		let edges = Edges::from(v);
410		let n = edges.len();
411		for i in 1..n {
412			if edges[i - 1] > edges[i] {
413				return false;
414			}
415		}
416		true
417	}
418
419	#[quickcheck]
420	fn check_sorted_from_array(v: Vec<i32>) -> bool {
421		let a = Array1::from(v);
422		let edges = Edges::from(a);
423		let n = edges.len();
424		for i in 1..n {
425			if edges[i - 1] > edges[i] {
426				return false;
427			}
428		}
429		true
430	}
431
432	#[quickcheck]
433	fn edges_are_right_open(v: Vec<i32>) -> bool {
434		let edges = Edges::from(v);
435		let view = edges.as_array_view();
436		if view.is_empty() {
437			true
438		} else {
439			let last = view[view.len() - 1];
440			edges.indices_of(&last).is_none()
441		}
442	}
443
444	#[quickcheck]
445	fn edges_are_left_closed(v: Vec<i32>) -> bool {
446		let edges = Edges::from(v);
447		if let 1 = edges.len() {
448			true
449		} else {
450			let view = edges.as_array_view();
451			if view.is_empty() {
452				true
453			} else {
454				let first = view[0];
455				edges.indices_of(&first).is_some()
456			}
457		}
458	}
459
460	#[quickcheck]
461	#[allow(clippy::needless_pass_by_value)]
462	fn edges_are_deduped(v: Vec<i32>) -> bool {
463		let unique_elements = v.iter().collect::<BTreeSet<&i32>>();
464		let edges = Edges::from(v.clone());
465		let view = edges.as_array_view();
466		let unique_edges = view.iter().collect::<BTreeSet<&i32>>();
467		unique_edges == unique_elements
468	}
469}
470
471#[cfg(test)]
472mod bins_tests {
473	use super::{Bins, Edges};
474
475	#[test]
476	#[should_panic]
477	#[allow(unused_must_use)]
478	fn get_panics_for_out_of_bounds_indexes() {
479		let edges = Edges::from(vec![0]);
480		let bins = Bins::new(edges);
481		// we need at least two edges to make a valid bin!
482		bins.index(0);
483	}
484}