ndarray_stats/histogram/bins.rs
1#![warn(missing_docs, clippy::all, clippy::pedantic)]
2
3use ndarray::prelude::*;
4use std::ops::{Index, Range};
5
6/// A sorted collection of type `A` elements used to represent the boundaries of intervals, i.e.
7/// [`Bins`] on a 1-dimensional axis.
8///
9/// **Note** that all intervals are left-closed and right-open. See examples below.
10///
11/// # Examples
12///
13/// ```
14/// use ndarray_stats::histogram::{Bins, Edges};
15/// use noisy_float::types::n64;
16///
17/// let unit_edges = Edges::from(vec![n64(0.), n64(1.)]);
18/// let unit_interval = Bins::new(unit_edges);
19/// // left-closed
20/// assert_eq!(
21/// unit_interval.range_of(&n64(0.)).unwrap(),
22/// n64(0.)..n64(1.),
23/// );
24/// // right-open
25/// assert_eq!(
26/// unit_interval.range_of(&n64(1.)),
27/// None
28/// );
29/// ```
30///
31/// [`Bins`]: struct.Bins.html
32#[derive(Clone, Debug, Eq, PartialEq)]
33pub struct Edges<A: Ord> {
34 edges: Vec<A>,
35}
36
37impl<A: Ord> From<Vec<A>> for Edges<A> {
38 /// Converts a `Vec<A>` into an `Edges<A>`, consuming the edges.
39 /// The vector will be sorted in increasing order using an unstable sorting algorithm, with
40 /// duplicates removed.
41 ///
42 /// # Current implementation
43 ///
44 /// The current sorting algorithm is the same as [`std::slice::sort_unstable()`][sort],
45 /// which is based on [pattern-defeating quicksort][pdqsort].
46 ///
47 /// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not allocate)
48 /// , and O(n log n) worst-case.
49 ///
50 /// # Examples
51 ///
52 /// ```
53 /// use ndarray::array;
54 /// use ndarray_stats::histogram::Edges;
55 ///
56 /// let edges = Edges::from(array![1, 15, 10, 10, 20]);
57 /// // The array gets sorted!
58 /// assert_eq!(
59 /// edges[2],
60 /// 15
61 /// );
62 /// ```
63 ///
64 /// [sort]: https://doc.rust-lang.org/stable/std/primitive.slice.html#method.sort_unstable
65 /// [pdqsort]: https://github.com/orlp/pdqsort
66 fn from(mut edges: Vec<A>) -> Self {
67 // sort the array in-place
68 edges.sort_unstable();
69 // remove duplicates
70 edges.dedup();
71 Edges { edges }
72 }
73}
74
75impl<A: Ord + Clone> From<Array1<A>> for Edges<A> {
76 /// Converts an `Array1<A>` into an `Edges<A>`, consuming the 1-dimensional array.
77 /// The array will be sorted in increasing order using an unstable sorting algorithm, with
78 /// duplicates removed.
79 ///
80 /// # Current implementation
81 ///
82 /// The current sorting algorithm is the same as [`std::slice::sort_unstable()`][sort],
83 /// which is based on [pattern-defeating quicksort][pdqsort].
84 ///
85 /// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not allocate)
86 /// , and O(n log n) worst-case.
87 ///
88 /// # Examples
89 ///
90 /// ```
91 /// use ndarray_stats::histogram::Edges;
92 ///
93 /// let edges = Edges::from(vec![1, 15, 10, 20]);
94 /// // The vec gets sorted!
95 /// assert_eq!(
96 /// edges[1],
97 /// 10
98 /// );
99 /// ```
100 ///
101 /// [sort]: https://doc.rust-lang.org/stable/std/primitive.slice.html#method.sort_unstable
102 /// [pdqsort]: https://github.com/orlp/pdqsort
103 fn from(edges: Array1<A>) -> Self {
104 let edges = edges.to_vec();
105 Self::from(edges)
106 }
107}
108
109impl<A: Ord> Index<usize> for Edges<A> {
110 type Output = A;
111
112 /// Returns a reference to the `i`-th edge in `self`.
113 ///
114 /// # Panics
115 ///
116 /// Panics if the index `i` is out of bounds.
117 ///
118 /// # Examples
119 ///
120 /// ```
121 /// use ndarray_stats::histogram::Edges;
122 ///
123 /// let edges = Edges::from(vec![1, 5, 10, 20]);
124 /// assert_eq!(
125 /// edges[1],
126 /// 5
127 /// );
128 /// ```
129 fn index(&self, i: usize) -> &Self::Output {
130 &self.edges[i]
131 }
132}
133
134impl<A: Ord> Edges<A> {
135 /// Returns the number of edges in `self`.
136 ///
137 /// # Examples
138 ///
139 /// ```
140 /// use ndarray_stats::histogram::Edges;
141 /// use noisy_float::types::n64;
142 ///
143 /// let edges = Edges::from(vec![n64(0.), n64(1.), n64(3.)]);
144 /// assert_eq!(
145 /// edges.len(),
146 /// 3
147 /// );
148 /// ```
149 #[must_use]
150 pub fn len(&self) -> usize {
151 self.edges.len()
152 }
153
154 /// Returns `true` if `self` contains no edges.
155 ///
156 /// # Examples
157 ///
158 /// ```
159 /// use ndarray_stats::histogram::Edges;
160 /// use noisy_float::types::{N64, n64};
161 ///
162 /// let edges = Edges::<N64>::from(vec![]);
163 /// assert_eq!(edges.is_empty(), true);
164 ///
165 /// let edges = Edges::from(vec![n64(0.), n64(2.), n64(5.)]);
166 /// assert_eq!(edges.is_empty(), false);
167 /// ```
168 #[must_use]
169 pub fn is_empty(&self) -> bool {
170 self.edges.is_empty()
171 }
172
173 /// Returns an immutable 1-dimensional array view of edges.
174 ///
175 /// # Examples
176 ///
177 /// ```
178 /// use ndarray::array;
179 /// use ndarray_stats::histogram::Edges;
180 ///
181 /// let edges = Edges::from(vec![0, 5, 3]);
182 /// assert_eq!(
183 /// edges.as_array_view(),
184 /// array![0, 3, 5].view()
185 /// );
186 /// ```
187 #[must_use]
188 pub fn as_array_view(&self) -> ArrayView1<'_, A> {
189 ArrayView1::from(&self.edges)
190 }
191
192 /// Returns indices of two consecutive `edges` in `self`, if the interval they represent
193 /// contains the given `value`, or returns `None` otherwise.
194 ///
195 /// That is to say, it returns
196 /// - `Some((left, right))`, where `left` and `right` are the indices of two consecutive edges
197 /// in `self` and `right == left + 1`, if `self[left] <= value < self[right]`;
198 /// - `None`, otherwise.
199 ///
200 /// # Examples
201 ///
202 /// ```
203 /// use ndarray_stats::histogram::Edges;
204 ///
205 /// let edges = Edges::from(vec![0, 2, 3]);
206 /// // `1` is in the interval [0, 2), whose indices are (0, 1)
207 /// assert_eq!(
208 /// edges.indices_of(&1),
209 /// Some((0, 1))
210 /// );
211 /// // `5` is not in any of intervals
212 /// assert_eq!(
213 /// edges.indices_of(&5),
214 /// None
215 /// );
216 /// ```
217 pub fn indices_of(&self, value: &A) -> Option<(usize, usize)> {
218 // binary search for the correct bin
219 let n_edges = self.len();
220 match self.edges.binary_search(value) {
221 Ok(i) if i == n_edges - 1 => None,
222 Ok(i) => Some((i, i + 1)),
223 Err(i) => match i {
224 0 => None,
225 j if j == n_edges => None,
226 j => Some((j - 1, j)),
227 },
228 }
229 }
230
231 /// Returns an iterator over the `edges` in `self`.
232 pub fn iter(&self) -> impl Iterator<Item = &A> {
233 self.edges.iter()
234 }
235}
236
237/// A sorted collection of non-overlapping 1-dimensional intervals.
238///
239/// **Note** that all intervals are left-closed and right-open.
240///
241/// # Examples
242///
243/// ```
244/// use ndarray_stats::histogram::{Edges, Bins};
245/// use noisy_float::types::n64;
246///
247/// let edges = Edges::from(vec![n64(0.), n64(1.), n64(2.)]);
248/// let bins = Bins::new(edges);
249/// // first bin
250/// assert_eq!(
251/// bins.index(0),
252/// n64(0.)..n64(1.) // n64(1.) is not included in the bin!
253/// );
254/// // second bin
255/// assert_eq!(
256/// bins.index(1),
257/// n64(1.)..n64(2.)
258/// );
259/// ```
260#[derive(Clone, Debug, Eq, PartialEq)]
261pub struct Bins<A: Ord> {
262 edges: Edges<A>,
263}
264
265impl<A: Ord> Bins<A> {
266 /// Returns a `Bins` instance where each bin corresponds to two consecutive members of the given
267 /// [`Edges`], consuming the edges.
268 ///
269 /// [`Edges`]: struct.Edges.html
270 #[must_use]
271 pub fn new(edges: Edges<A>) -> Self {
272 Bins { edges }
273 }
274
275 /// Returns the number of bins in `self`.
276 ///
277 /// # Examples
278 ///
279 /// ```
280 /// use ndarray_stats::histogram::{Edges, Bins};
281 /// use noisy_float::types::n64;
282 ///
283 /// let edges = Edges::from(vec![n64(0.), n64(1.), n64(2.)]);
284 /// let bins = Bins::new(edges);
285 /// assert_eq!(
286 /// bins.len(),
287 /// 2
288 /// );
289 /// ```
290 #[must_use]
291 pub fn len(&self) -> usize {
292 match self.edges.len() {
293 0 => 0,
294 n => n - 1,
295 }
296 }
297
298 /// Returns `true` if the number of bins is zero, i.e. if the number of edges is 0 or 1.
299 ///
300 /// # Examples
301 ///
302 /// ```
303 /// use ndarray_stats::histogram::{Edges, Bins};
304 /// use noisy_float::types::{N64, n64};
305 ///
306 /// // At least 2 edges is needed to represent 1 interval
307 /// let edges = Edges::from(vec![n64(0.), n64(1.), n64(3.)]);
308 /// let bins = Bins::new(edges);
309 /// assert_eq!(bins.is_empty(), false);
310 ///
311 /// // No valid interval == Empty
312 /// let edges = Edges::<N64>::from(vec![]);
313 /// let bins = Bins::new(edges);
314 /// assert_eq!(bins.is_empty(), true);
315 /// let edges = Edges::from(vec![n64(0.)]);
316 /// let bins = Bins::new(edges);
317 /// assert_eq!(bins.is_empty(), true);
318 /// ```
319 #[must_use]
320 pub fn is_empty(&self) -> bool {
321 self.len() == 0
322 }
323
324 /// Returns the index of the bin in `self` that contains the given `value`,
325 /// or returns `None` if `value` does not belong to any bins in `self`.
326 ///
327 /// # Examples
328 ///
329 /// Basic usage:
330 ///
331 /// ```
332 /// use ndarray_stats::histogram::{Edges, Bins};
333 ///
334 /// let edges = Edges::from(vec![0, 2, 4, 6]);
335 /// let bins = Bins::new(edges);
336 /// let value = 1;
337 /// // The first bin [0, 2) contains `1`
338 /// assert_eq!(
339 /// bins.index_of(&1),
340 /// Some(0)
341 /// );
342 /// // No bin contains 100
343 /// assert_eq!(
344 /// bins.index_of(&100),
345 /// None
346 /// )
347 /// ```
348 ///
349 /// Chaining [`Bins::index`] and [`Bins::index_of`] to get the boundaries of the bin containing
350 /// the value:
351 ///
352 /// ```
353 /// # use ndarray_stats::histogram::{Edges, Bins};
354 /// # let edges = Edges::from(vec![0, 2, 4, 6]);
355 /// # let bins = Bins::new(edges);
356 /// # let value = 1;
357 /// assert_eq!(
358 /// // using `Option::map` to avoid panic on index out-of-bounds
359 /// bins.index_of(&1).map(|i| bins.index(i)),
360 /// Some(0..2)
361 /// );
362 /// ```
363 pub fn index_of(&self, value: &A) -> Option<usize> {
364 self.edges.indices_of(value).map(|t| t.0)
365 }
366
367 /// Returns a range as the bin which contains the given `value`, or returns `None` otherwise.
368 ///
369 /// # Examples
370 ///
371 /// ```
372 /// use ndarray_stats::histogram::{Edges, Bins};
373 ///
374 /// let edges = Edges::from(vec![0, 2, 4, 6]);
375 /// let bins = Bins::new(edges);
376 /// // [0, 2) contains `1`
377 /// assert_eq!(
378 /// bins.range_of(&1),
379 /// Some(0..2)
380 /// );
381 /// // `10` is not in any interval
382 /// assert_eq!(
383 /// bins.range_of(&10),
384 /// None
385 /// );
386 /// ```
387 pub fn range_of(&self, value: &A) -> Option<Range<A>>
388 where
389 A: Clone,
390 {
391 let edges_indexes = self.edges.indices_of(value);
392 edges_indexes.map(|(left, right)| Range {
393 start: self.edges[left].clone(),
394 end: self.edges[right].clone(),
395 })
396 }
397
398 /// Returns a range as the bin at the given `index` position.
399 ///
400 /// # Panics
401 ///
402 /// Panics if `index` is out of bounds.
403 ///
404 /// # Examples
405 ///
406 /// ```
407 /// use ndarray_stats::histogram::{Edges, Bins};
408 ///
409 /// let edges = Edges::from(vec![1, 5, 10, 20]);
410 /// let bins = Bins::new(edges);
411 /// assert_eq!(
412 /// bins.index(1),
413 /// 5..10
414 /// );
415 /// ```
416 #[must_use]
417 pub fn index(&self, index: usize) -> Range<A>
418 where
419 A: Clone,
420 {
421 // It was not possible to implement this functionality
422 // using the `Index` trait unless we were willing to
423 // allocate a `Vec<Range<A>>` in the struct.
424 // Index, in fact, forces you to return a reference.
425 Range {
426 start: self.edges[index].clone(),
427 end: self.edges[index + 1].clone(),
428 }
429 }
430}
431
432#[cfg(test)]
433mod edges_tests {
434 use super::{Array1, Edges};
435 use quickcheck_macros::quickcheck;
436 use std::collections::BTreeSet;
437 use std::iter::FromIterator;
438
439 #[quickcheck]
440 fn check_sorted_from_vec(v: Vec<i32>) -> bool {
441 let edges = Edges::from(v);
442 let n = edges.len();
443 for i in 1..n {
444 if edges[i - 1] > edges[i] {
445 return false;
446 }
447 }
448 true
449 }
450
451 #[quickcheck]
452 fn check_sorted_from_array(v: Vec<i32>) -> bool {
453 let a = Array1::from(v);
454 let edges = Edges::from(a);
455 let n = edges.len();
456 for i in 1..n {
457 if edges[i - 1] > edges[i] {
458 return false;
459 }
460 }
461 true
462 }
463
464 #[quickcheck]
465 fn edges_are_right_open(v: Vec<i32>) -> bool {
466 let edges = Edges::from(v);
467 let view = edges.as_array_view();
468 if view.is_empty() {
469 true
470 } else {
471 let last = view[view.len() - 1];
472 edges.indices_of(&last).is_none()
473 }
474 }
475
476 #[quickcheck]
477 fn edges_are_left_closed(v: Vec<i32>) -> bool {
478 let edges = Edges::from(v);
479 if let 1 = edges.len() {
480 true
481 } else {
482 let view = edges.as_array_view();
483 if view.is_empty() {
484 true
485 } else {
486 let first = view[0];
487 edges.indices_of(&first).is_some()
488 }
489 }
490 }
491
492 #[quickcheck]
493 #[allow(clippy::needless_pass_by_value)]
494 fn edges_are_deduped(v: Vec<i32>) -> bool {
495 let unique_elements = BTreeSet::from_iter(v.iter());
496 let edges = Edges::from(v.clone());
497 let view = edges.as_array_view();
498 let unique_edges = BTreeSet::from_iter(view.iter());
499 unique_edges == unique_elements
500 }
501}
502
503#[cfg(test)]
504mod bins_tests {
505 use super::{Bins, Edges};
506
507 #[test]
508 #[should_panic]
509 #[allow(unused_must_use)]
510 fn get_panics_for_out_of_bounds_indexes() {
511 let edges = Edges::from(vec![0]);
512 let bins = Bins::new(edges);
513 // we need at least two edges to make a valid bin!
514 bins.index(0);
515 }
516}