ndarray_histogram/histogram/bins.rs
1use ndarray::prelude::*;
2use std::ops::{Index, Range};
3
4#[cfg(feature = "rayon")]
5use rayon::slice::ParallelSliceMut;
6
7/// A sorted collection of type `A` elements used to represent the boundaries of intervals, i.e.
8/// [`Bins`] on a 1-dimensional axis.
9///
10/// **Note** that all intervals are left-closed and right-open. See examples below.
11///
12/// # Examples
13///
14/// ```
15/// use ndarray_histogram::{
16/// histogram::{Bins, Edges},
17/// o64,
18/// };
19///
20/// let unit_edges = Edges::from(vec![o64(0.), o64(1.)]);
21/// let unit_interval = Bins::new(unit_edges);
22/// // left-closed
23/// assert_eq!(unit_interval.range_of(&o64(0.)).unwrap(), o64(0.)..o64(1.),);
24/// // right-open
25/// assert_eq!(unit_interval.range_of(&o64(1.)), None);
26/// ```
27///
28/// [`Bins`]: struct.Bins.html
29#[derive(Clone, Debug, Eq, PartialEq)]
30pub struct Edges<A: Ord + Send> {
31 edges: Vec<A>,
32}
33
34impl<A: Ord + Send> From<Vec<A>> for Edges<A> {
35 /// Converts a `Vec<A>` into an `Edges<A>`, consuming the edges.
36 /// The vector will be sorted in increasing order using an unstable sorting algorithm, with
37 /// duplicates removed.
38 ///
39 /// # Current implementation
40 ///
41 /// The current sorting algorithm is the same as [`std::slice::sort_unstable()`][sort],
42 /// which is based on [pattern-defeating quicksort][pdqsort].
43 ///
44 /// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not allocate)
45 /// , and O(n log n) worst-case.
46 ///
47 /// # Examples
48 ///
49 /// ```
50 /// use ndarray::array;
51 /// use ndarray_histogram::histogram::Edges;
52 ///
53 /// let edges = Edges::from(array![1, 15, 10, 10, 20]);
54 /// // The array gets sorted!
55 /// assert_eq!(edges[2], 15);
56 /// ```
57 ///
58 /// [sort]: https://doc.rust-lang.org/stable/std/primitive.slice.html#method.sort_unstable
59 /// [pdqsort]: https://github.com/orlp/pdqsort
60 fn from(mut edges: Vec<A>) -> Self {
61 // sort the array in-place
62 #[cfg(feature = "rayon")]
63 edges.par_sort_unstable();
64 #[cfg(not(feature = "rayon"))]
65 edges.sort_unstable();
66 // remove duplicates
67 edges.dedup();
68 Edges { edges }
69 }
70}
71
72impl<A: Ord + Send + Clone> From<Array1<A>> for Edges<A> {
73 /// Converts an `Array1<A>` into an `Edges<A>`, consuming the 1-dimensional array.
74 /// The array will be sorted in increasing order using an unstable sorting algorithm, with
75 /// duplicates removed.
76 ///
77 /// # Current implementation
78 ///
79 /// The current sorting algorithm is the same as [`std::slice::sort_unstable()`][sort],
80 /// which is based on [pattern-defeating quicksort][pdqsort].
81 ///
82 /// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not allocate)
83 /// , and O(n log n) worst-case.
84 ///
85 /// # Examples
86 ///
87 /// ```
88 /// use ndarray_histogram::histogram::Edges;
89 ///
90 /// let edges = Edges::from(vec![1, 15, 10, 20]);
91 /// // The vec gets sorted!
92 /// assert_eq!(edges[1], 10);
93 /// ```
94 ///
95 /// [sort]: https://doc.rust-lang.org/stable/std/primitive.slice.html#method.sort_unstable
96 /// [pdqsort]: https://github.com/orlp/pdqsort
97 fn from(edges: Array1<A>) -> Self {
98 let edges = edges.to_vec();
99 Self::from(edges)
100 }
101}
102
103impl<A: Ord + Send> Index<usize> for Edges<A> {
104 type Output = A;
105
106 /// Returns a reference to the `i`-th edge in `self`.
107 ///
108 /// # Panics
109 ///
110 /// Panics if the index `i` is out of bounds.
111 ///
112 /// # Examples
113 ///
114 /// ```
115 /// use ndarray_histogram::histogram::Edges;
116 ///
117 /// let edges = Edges::from(vec![1, 5, 10, 20]);
118 /// assert_eq!(edges[1], 5);
119 /// ```
120 fn index(&self, i: usize) -> &Self::Output {
121 &self.edges[i]
122 }
123}
124
125impl<A: Ord + Send> Edges<A> {
126 /// Returns the number of edges in `self`.
127 ///
128 /// # Examples
129 ///
130 /// ```
131 /// use ndarray_histogram::{histogram::Edges, o64};
132 ///
133 /// let edges = Edges::from(vec![o64(0.), o64(1.), o64(3.)]);
134 /// assert_eq!(edges.len(), 3);
135 /// ```
136 #[must_use]
137 pub fn len(&self) -> usize {
138 self.edges.len()
139 }
140
141 /// Returns `true` if `self` contains no edges.
142 ///
143 /// # Examples
144 ///
145 /// ```
146 /// use ndarray_histogram::{O64, histogram::Edges, o64};
147 ///
148 /// let edges = Edges::<O64>::from(vec![]);
149 /// assert_eq!(edges.is_empty(), true);
150 ///
151 /// let edges = Edges::from(vec![o64(0.), o64(2.), o64(5.)]);
152 /// assert_eq!(edges.is_empty(), false);
153 /// ```
154 #[must_use]
155 pub fn is_empty(&self) -> bool {
156 self.edges.is_empty()
157 }
158
159 /// Returns an immutable 1-dimensional array view of edges.
160 ///
161 /// # Examples
162 ///
163 /// ```
164 /// use ndarray::array;
165 /// use ndarray_histogram::histogram::Edges;
166 ///
167 /// let edges = Edges::from(vec![0, 5, 3]);
168 /// assert_eq!(edges.as_array_view(), array![0, 3, 5].view());
169 /// ```
170 #[must_use]
171 pub fn as_array_view(&self) -> ArrayView1<'_, A> {
172 ArrayView1::from(&self.edges)
173 }
174
175 /// Returns indices of two consecutive `edges` in `self`, if the interval they represent
176 /// contains the given `value`, or returns `None` otherwise.
177 ///
178 /// That is to say, it returns
179 ///
180 /// - `Some((left, right))`,
181 ///
182 /// where `left` and `right` are the indices of two consecutive edges in `self` and
183 /// `right == left + 1`, if `self[left] <= value < self[right]` it returns
184 ///
185 /// - `None`,
186 ///
187 /// else otherwise.
188 ///
189 /// # Examples
190 ///
191 /// ```
192 /// use ndarray_histogram::histogram::Edges;
193 ///
194 /// let edges = Edges::from(vec![0, 2, 3]);
195 /// // `1` is in the interval [0, 2), whose indices are (0, 1)
196 /// assert_eq!(edges.indices_of(&1), Some((0, 1)));
197 /// // `5` is not in any of intervals
198 /// assert_eq!(edges.indices_of(&5), None);
199 /// ```
200 pub fn indices_of(&self, value: &A) -> Option<(usize, usize)> {
201 // binary search for the correct bin
202 let n_edges = self.len();
203 match self.edges.binary_search(value) {
204 Ok(i) if i == n_edges - 1 => None,
205 Ok(i) => Some((i, i + 1)),
206 Err(i) => match i {
207 0 => None,
208 j if j == n_edges => None,
209 j => Some((j - 1, j)),
210 },
211 }
212 }
213
214 /// Returns an iterator over the `edges` in `self`.
215 pub fn iter(&self) -> impl Iterator<Item = &A> {
216 self.edges.iter()
217 }
218}
219
220/// A sorted collection of non-overlapping 1-dimensional intervals.
221///
222/// **Note** that all intervals are left-closed and right-open.
223///
224/// # Examples
225///
226/// ```
227/// use ndarray_histogram::{
228/// histogram::{Bins, Edges},
229/// o64,
230/// };
231///
232/// let edges = Edges::from(vec![o64(0.), o64(1.), o64(2.)]);
233/// let bins = Bins::new(edges);
234/// // first bin
235/// assert_eq!(
236/// bins.index(0),
237/// o64(0.)..o64(1.) // o64(1.) is not included in the bin!
238/// );
239/// // second bin
240/// assert_eq!(bins.index(1), o64(1.)..o64(2.));
241/// ```
242#[derive(Clone, Debug, Eq, PartialEq)]
243pub struct Bins<A: Ord + Send> {
244 edges: Edges<A>,
245}
246
247impl<A: Ord + Send> Bins<A> {
248 /// Returns a `Bins` instance where each bin corresponds to two consecutive members of the given
249 /// [`Edges`], consuming the edges.
250 ///
251 /// [`Edges`]: struct.Edges.html
252 #[must_use]
253 pub fn new(edges: Edges<A>) -> Self {
254 Bins { edges }
255 }
256
257 /// Returns the number of bins in `self`.
258 ///
259 /// # Examples
260 ///
261 /// ```
262 /// use ndarray_histogram::{
263 /// histogram::{Bins, Edges},
264 /// o64,
265 /// };
266 ///
267 /// let edges = Edges::from(vec![o64(0.), o64(1.), o64(2.)]);
268 /// let bins = Bins::new(edges);
269 /// assert_eq!(bins.len(), 2);
270 /// ```
271 #[must_use]
272 pub fn len(&self) -> usize {
273 match self.edges.len() {
274 0 => 0,
275 n => n - 1,
276 }
277 }
278
279 /// Returns `true` if the number of bins is zero, i.e. if the number of edges is 0 or 1.
280 ///
281 /// # Examples
282 ///
283 /// ```
284 /// use ndarray_histogram::{
285 /// O64,
286 /// histogram::{Bins, Edges},
287 /// o64,
288 /// };
289 ///
290 /// // At least 2 edges is needed to represent 1 interval
291 /// let edges = Edges::from(vec![o64(0.), o64(1.), o64(3.)]);
292 /// let bins = Bins::new(edges);
293 /// assert_eq!(bins.is_empty(), false);
294 ///
295 /// // No valid interval == Empty
296 /// let edges = Edges::<O64>::from(vec![]);
297 /// let bins = Bins::new(edges);
298 /// assert_eq!(bins.is_empty(), true);
299 /// let edges = Edges::from(vec![o64(0.)]);
300 /// let bins = Bins::new(edges);
301 /// assert_eq!(bins.is_empty(), true);
302 /// ```
303 #[must_use]
304 pub fn is_empty(&self) -> bool {
305 self.len() == 0
306 }
307
308 /// Returns the index of the bin in `self` that contains the given `value`,
309 /// or returns `None` if `value` does not belong to any bins in `self`.
310 ///
311 /// # Examples
312 ///
313 /// Basic usage:
314 ///
315 /// ```
316 /// use ndarray_histogram::histogram::{Bins, Edges};
317 ///
318 /// let edges = Edges::from(vec![0, 2, 4, 6]);
319 /// let bins = Bins::new(edges);
320 /// let value = 1;
321 /// // The first bin [0, 2) contains `1`
322 /// assert_eq!(bins.index_of(&1), Some(0));
323 /// // No bin contains 100
324 /// assert_eq!(bins.index_of(&100), None)
325 /// ```
326 ///
327 /// Chaining [`Bins::index`] and [`Bins::index_of`] to get the boundaries of the bin containing
328 /// the value:
329 ///
330 /// ```
331 /// # use ndarray_histogram::histogram::{Edges, Bins};
332 /// # let edges = Edges::from(vec![0, 2, 4, 6]);
333 /// # let bins = Bins::new(edges);
334 /// # let value = 1;
335 /// assert_eq!(
336 /// // using `Option::map` to avoid panic on index out-of-bounds
337 /// bins.index_of(&1).map(|i| bins.index(i)),
338 /// Some(0..2)
339 /// );
340 /// ```
341 pub fn index_of(&self, value: &A) -> Option<usize> {
342 self.edges.indices_of(value).map(|t| t.0)
343 }
344
345 /// Returns a range as the bin which contains the given `value`, or returns `None` otherwise.
346 ///
347 /// # Examples
348 ///
349 /// ```
350 /// use ndarray_histogram::histogram::{Bins, Edges};
351 ///
352 /// let edges = Edges::from(vec![0, 2, 4, 6]);
353 /// let bins = Bins::new(edges);
354 /// // [0, 2) contains `1`
355 /// assert_eq!(bins.range_of(&1), Some(0..2));
356 /// // `10` is not in any interval
357 /// assert_eq!(bins.range_of(&10), None);
358 /// ```
359 pub fn range_of(&self, value: &A) -> Option<Range<A>>
360 where
361 A: Clone,
362 {
363 let edges_indexes = self.edges.indices_of(value);
364 edges_indexes.map(|(left, right)| Range {
365 start: self.edges[left].clone(),
366 end: self.edges[right].clone(),
367 })
368 }
369
370 /// Returns a range as the bin at the given `index` position.
371 ///
372 /// # Panics
373 ///
374 /// Panics if `index` is out of bounds.
375 ///
376 /// # Examples
377 ///
378 /// ```
379 /// use ndarray_histogram::histogram::{Bins, Edges};
380 ///
381 /// let edges = Edges::from(vec![1, 5, 10, 20]);
382 /// let bins = Bins::new(edges);
383 /// assert_eq!(bins.index(1), 5..10);
384 /// ```
385 #[must_use]
386 pub fn index(&self, index: usize) -> Range<A>
387 where
388 A: Clone,
389 {
390 // It was not possible to implement this functionality
391 // using the `Index` trait unless we were willing to
392 // allocate a `Vec<Range<A>>` in the struct.
393 // Index, in fact, forces you to return a reference.
394 Range {
395 start: self.edges[index].clone(),
396 end: self.edges[index + 1].clone(),
397 }
398 }
399}
400
401#[cfg(test)]
402mod edges_tests {
403 use super::{Array1, Edges};
404 use quickcheck_macros::quickcheck;
405 use std::collections::BTreeSet;
406
407 #[quickcheck]
408 fn check_sorted_from_vec(v: Vec<i32>) -> bool {
409 let edges = Edges::from(v);
410 let n = edges.len();
411 for i in 1..n {
412 if edges[i - 1] > edges[i] {
413 return false;
414 }
415 }
416 true
417 }
418
419 #[quickcheck]
420 fn check_sorted_from_array(v: Vec<i32>) -> bool {
421 let a = Array1::from(v);
422 let edges = Edges::from(a);
423 let n = edges.len();
424 for i in 1..n {
425 if edges[i - 1] > edges[i] {
426 return false;
427 }
428 }
429 true
430 }
431
432 #[quickcheck]
433 fn edges_are_right_open(v: Vec<i32>) -> bool {
434 let edges = Edges::from(v);
435 let view = edges.as_array_view();
436 if view.is_empty() {
437 true
438 } else {
439 let last = view[view.len() - 1];
440 edges.indices_of(&last).is_none()
441 }
442 }
443
444 #[quickcheck]
445 fn edges_are_left_closed(v: Vec<i32>) -> bool {
446 let edges = Edges::from(v);
447 if let 1 = edges.len() {
448 true
449 } else {
450 let view = edges.as_array_view();
451 if view.is_empty() {
452 true
453 } else {
454 let first = view[0];
455 edges.indices_of(&first).is_some()
456 }
457 }
458 }
459
460 #[quickcheck]
461 #[allow(clippy::needless_pass_by_value)]
462 fn edges_are_deduped(v: Vec<i32>) -> bool {
463 let unique_elements = v.iter().collect::<BTreeSet<&i32>>();
464 let edges = Edges::from(v.clone());
465 let view = edges.as_array_view();
466 let unique_edges = view.iter().collect::<BTreeSet<&i32>>();
467 unique_edges == unique_elements
468 }
469}
470
471#[cfg(test)]
472mod bins_tests {
473 use super::{Bins, Edges};
474
475 #[test]
476 #[should_panic]
477 #[allow(unused_must_use)]
478 fn get_panics_for_out_of_bounds_indexes() {
479 let edges = Edges::from(vec![0]);
480 let bins = Bins::new(edges);
481 // we need at least two edges to make a valid bin!
482 bins.index(0);
483 }
484}