ndarray_stats/histogram/
bins.rs

1#![warn(missing_docs, clippy::all, clippy::pedantic)]
2
3use ndarray::prelude::*;
4use std::ops::{Index, Range};
5
6/// A sorted collection of type `A` elements used to represent the boundaries of intervals, i.e.
7/// [`Bins`] on a 1-dimensional axis.
8///
9/// **Note** that all intervals are left-closed and right-open. See examples below.
10///
11/// # Examples
12///
13/// ```
14/// use ndarray_stats::histogram::{Bins, Edges};
15/// use noisy_float::types::n64;
16///
17/// let unit_edges = Edges::from(vec![n64(0.), n64(1.)]);
18/// let unit_interval = Bins::new(unit_edges);
19/// // left-closed
20/// assert_eq!(
21///     unit_interval.range_of(&n64(0.)).unwrap(),
22///     n64(0.)..n64(1.),
23/// );
24/// // right-open
25/// assert_eq!(
26///     unit_interval.range_of(&n64(1.)),
27///     None
28/// );
29/// ```
30///
31/// [`Bins`]: struct.Bins.html
32#[derive(Clone, Debug, Eq, PartialEq)]
33pub struct Edges<A: Ord> {
34    edges: Vec<A>,
35}
36
37impl<A: Ord> From<Vec<A>> for Edges<A> {
38    /// Converts a `Vec<A>` into an `Edges<A>`, consuming the edges.
39    /// The vector will be sorted in increasing order using an unstable sorting algorithm, with
40    /// duplicates removed.
41    ///
42    /// # Current implementation
43    ///
44    /// The current sorting algorithm is the same as [`std::slice::sort_unstable()`][sort],
45    /// which is based on [pattern-defeating quicksort][pdqsort].
46    ///
47    /// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not allocate)
48    /// , and O(n log n) worst-case.
49    ///
50    /// # Examples
51    ///
52    /// ```
53    /// use ndarray::array;
54    /// use ndarray_stats::histogram::Edges;
55    ///
56    /// let edges = Edges::from(array![1, 15, 10, 10, 20]);
57    /// // The array gets sorted!
58    /// assert_eq!(
59    ///     edges[2],
60    ///     15
61    /// );
62    /// ```
63    ///
64    /// [sort]: https://doc.rust-lang.org/stable/std/primitive.slice.html#method.sort_unstable
65    /// [pdqsort]: https://github.com/orlp/pdqsort
66    fn from(mut edges: Vec<A>) -> Self {
67        // sort the array in-place
68        edges.sort_unstable();
69        // remove duplicates
70        edges.dedup();
71        Edges { edges }
72    }
73}
74
75impl<A: Ord + Clone> From<Array1<A>> for Edges<A> {
76    /// Converts an `Array1<A>` into an `Edges<A>`, consuming the 1-dimensional array.
77    /// The array will be sorted in increasing order using an unstable sorting algorithm, with
78    /// duplicates removed.
79    ///
80    /// # Current implementation
81    ///
82    /// The current sorting algorithm is the same as [`std::slice::sort_unstable()`][sort],
83    /// which is based on [pattern-defeating quicksort][pdqsort].
84    ///
85    /// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not allocate)
86    /// , and O(n log n) worst-case.
87    ///
88    /// # Examples
89    ///
90    /// ```
91    /// use ndarray_stats::histogram::Edges;
92    ///
93    /// let edges = Edges::from(vec![1, 15, 10, 20]);
94    /// // The vec gets sorted!
95    /// assert_eq!(
96    ///     edges[1],
97    ///     10
98    /// );
99    /// ```
100    ///
101    /// [sort]: https://doc.rust-lang.org/stable/std/primitive.slice.html#method.sort_unstable
102    /// [pdqsort]: https://github.com/orlp/pdqsort
103    fn from(edges: Array1<A>) -> Self {
104        let edges = edges.to_vec();
105        Self::from(edges)
106    }
107}
108
109impl<A: Ord> Index<usize> for Edges<A> {
110    type Output = A;
111
112    /// Returns a reference to the `i`-th edge in `self`.
113    ///
114    /// # Panics
115    ///
116    /// Panics if the index `i` is out of bounds.
117    ///
118    /// # Examples
119    ///
120    /// ```
121    /// use ndarray_stats::histogram::Edges;
122    ///
123    /// let edges = Edges::from(vec![1, 5, 10, 20]);
124    /// assert_eq!(
125    ///     edges[1],
126    ///     5
127    /// );
128    /// ```
129    fn index(&self, i: usize) -> &Self::Output {
130        &self.edges[i]
131    }
132}
133
134impl<A: Ord> Edges<A> {
135    /// Returns the number of edges in `self`.
136    ///
137    /// # Examples
138    ///
139    /// ```
140    /// use ndarray_stats::histogram::Edges;
141    /// use noisy_float::types::n64;
142    ///
143    /// let edges = Edges::from(vec![n64(0.), n64(1.), n64(3.)]);
144    /// assert_eq!(
145    ///     edges.len(),
146    ///     3
147    /// );
148    /// ```
149    #[must_use]
150    pub fn len(&self) -> usize {
151        self.edges.len()
152    }
153
154    /// Returns `true` if `self` contains no edges.
155    ///
156    /// # Examples
157    ///
158    /// ```
159    /// use ndarray_stats::histogram::Edges;
160    /// use noisy_float::types::{N64, n64};
161    ///
162    /// let edges = Edges::<N64>::from(vec![]);
163    /// assert_eq!(edges.is_empty(), true);
164    ///
165    /// let edges = Edges::from(vec![n64(0.), n64(2.), n64(5.)]);
166    /// assert_eq!(edges.is_empty(), false);
167    /// ```
168    #[must_use]
169    pub fn is_empty(&self) -> bool {
170        self.edges.is_empty()
171    }
172
173    /// Returns an immutable 1-dimensional array view of edges.
174    ///
175    /// # Examples
176    ///
177    /// ```
178    /// use ndarray::array;
179    /// use ndarray_stats::histogram::Edges;
180    ///
181    /// let edges = Edges::from(vec![0, 5, 3]);
182    /// assert_eq!(
183    ///     edges.as_array_view(),
184    ///     array![0, 3, 5].view()
185    /// );
186    /// ```
187    #[must_use]
188    pub fn as_array_view(&self) -> ArrayView1<'_, A> {
189        ArrayView1::from(&self.edges)
190    }
191
192    /// Returns indices of two consecutive `edges` in `self`, if the interval they represent
193    /// contains the given `value`, or returns `None` otherwise.
194    ///
195    /// That is to say, it returns
196    /// - `Some((left, right))`, where `left` and `right` are the indices of two consecutive edges
197    /// in `self` and `right == left + 1`, if `self[left] <= value < self[right]`;
198    /// - `None`, otherwise.
199    ///
200    /// # Examples
201    ///
202    /// ```
203    /// use ndarray_stats::histogram::Edges;
204    ///
205    /// let edges = Edges::from(vec![0, 2, 3]);
206    /// // `1` is in the interval [0, 2), whose indices are (0, 1)
207    /// assert_eq!(
208    ///     edges.indices_of(&1),
209    ///     Some((0, 1))
210    /// );
211    /// // `5` is not in any of intervals
212    /// assert_eq!(
213    ///     edges.indices_of(&5),
214    ///     None
215    /// );
216    /// ```
217    pub fn indices_of(&self, value: &A) -> Option<(usize, usize)> {
218        // binary search for the correct bin
219        let n_edges = self.len();
220        match self.edges.binary_search(value) {
221            Ok(i) if i == n_edges - 1 => None,
222            Ok(i) => Some((i, i + 1)),
223            Err(i) => match i {
224                0 => None,
225                j if j == n_edges => None,
226                j => Some((j - 1, j)),
227            },
228        }
229    }
230
231    /// Returns an iterator over the `edges` in `self`.
232    pub fn iter(&self) -> impl Iterator<Item = &A> {
233        self.edges.iter()
234    }
235}
236
237/// A sorted collection of non-overlapping 1-dimensional intervals.
238///
239/// **Note** that all intervals are left-closed and right-open.
240///
241/// # Examples
242///
243/// ```
244/// use ndarray_stats::histogram::{Edges, Bins};
245/// use noisy_float::types::n64;
246///
247/// let edges = Edges::from(vec![n64(0.), n64(1.), n64(2.)]);
248/// let bins = Bins::new(edges);
249/// // first bin
250/// assert_eq!(
251///     bins.index(0),
252///     n64(0.)..n64(1.) // n64(1.) is not included in the bin!
253/// );
254/// // second bin
255/// assert_eq!(
256///     bins.index(1),
257///     n64(1.)..n64(2.)
258/// );
259/// ```
260#[derive(Clone, Debug, Eq, PartialEq)]
261pub struct Bins<A: Ord> {
262    edges: Edges<A>,
263}
264
265impl<A: Ord> Bins<A> {
266    /// Returns a `Bins` instance where each bin corresponds to two consecutive members of the given
267    /// [`Edges`], consuming the edges.
268    ///
269    /// [`Edges`]: struct.Edges.html
270    #[must_use]
271    pub fn new(edges: Edges<A>) -> Self {
272        Bins { edges }
273    }
274
275    /// Returns the number of bins in `self`.
276    ///
277    /// # Examples
278    ///
279    /// ```
280    /// use ndarray_stats::histogram::{Edges, Bins};
281    /// use noisy_float::types::n64;
282    ///
283    /// let edges = Edges::from(vec![n64(0.), n64(1.), n64(2.)]);
284    /// let bins = Bins::new(edges);
285    /// assert_eq!(
286    ///     bins.len(),
287    ///     2
288    /// );
289    /// ```
290    #[must_use]
291    pub fn len(&self) -> usize {
292        match self.edges.len() {
293            0 => 0,
294            n => n - 1,
295        }
296    }
297
298    /// Returns `true` if the number of bins is zero, i.e. if the number of edges is 0 or 1.
299    ///
300    /// # Examples
301    ///
302    /// ```
303    /// use ndarray_stats::histogram::{Edges, Bins};
304    /// use noisy_float::types::{N64, n64};
305    ///
306    /// // At least 2 edges is needed to represent 1 interval
307    /// let edges = Edges::from(vec![n64(0.), n64(1.), n64(3.)]);
308    /// let bins = Bins::new(edges);
309    /// assert_eq!(bins.is_empty(), false);
310    ///
311    /// // No valid interval == Empty
312    /// let edges = Edges::<N64>::from(vec![]);
313    /// let bins = Bins::new(edges);
314    /// assert_eq!(bins.is_empty(), true);
315    /// let edges = Edges::from(vec![n64(0.)]);
316    /// let bins = Bins::new(edges);
317    /// assert_eq!(bins.is_empty(), true);
318    /// ```
319    #[must_use]
320    pub fn is_empty(&self) -> bool {
321        self.len() == 0
322    }
323
324    /// Returns the index of the bin in `self` that contains the given `value`,
325    /// or returns `None` if `value` does not belong to any bins in `self`.
326    ///
327    /// # Examples
328    ///
329    /// Basic usage:
330    ///
331    /// ```
332    /// use ndarray_stats::histogram::{Edges, Bins};
333    ///
334    /// let edges = Edges::from(vec![0, 2, 4, 6]);
335    /// let bins = Bins::new(edges);
336    /// let value = 1;
337    /// // The first bin [0, 2) contains `1`
338    /// assert_eq!(
339    ///     bins.index_of(&1),
340    ///     Some(0)
341    /// );
342    /// // No bin contains 100
343    /// assert_eq!(
344    ///     bins.index_of(&100),
345    ///     None
346    /// )
347    /// ```
348    ///
349    /// Chaining [`Bins::index`] and [`Bins::index_of`] to get the boundaries of the bin containing
350    /// the value:
351    ///
352    /// ```
353    /// # use ndarray_stats::histogram::{Edges, Bins};
354    /// # let edges = Edges::from(vec![0, 2, 4, 6]);
355    /// # let bins = Bins::new(edges);
356    /// # let value = 1;
357    /// assert_eq!(
358    ///     // using `Option::map` to avoid panic on index out-of-bounds
359    ///     bins.index_of(&1).map(|i| bins.index(i)),
360    ///     Some(0..2)
361    /// );
362    /// ```
363    pub fn index_of(&self, value: &A) -> Option<usize> {
364        self.edges.indices_of(value).map(|t| t.0)
365    }
366
367    /// Returns a range as the bin which contains the given `value`, or returns `None` otherwise.
368    ///
369    /// # Examples
370    ///
371    /// ```
372    /// use ndarray_stats::histogram::{Edges, Bins};
373    ///
374    /// let edges = Edges::from(vec![0, 2, 4, 6]);
375    /// let bins = Bins::new(edges);
376    /// // [0, 2) contains `1`
377    /// assert_eq!(
378    ///     bins.range_of(&1),
379    ///     Some(0..2)
380    /// );
381    /// // `10` is not in any interval
382    /// assert_eq!(
383    ///     bins.range_of(&10),
384    ///     None
385    /// );
386    /// ```
387    pub fn range_of(&self, value: &A) -> Option<Range<A>>
388    where
389        A: Clone,
390    {
391        let edges_indexes = self.edges.indices_of(value);
392        edges_indexes.map(|(left, right)| Range {
393            start: self.edges[left].clone(),
394            end: self.edges[right].clone(),
395        })
396    }
397
398    /// Returns a range as the bin at the given `index` position.
399    ///
400    /// # Panics
401    ///
402    /// Panics if `index` is out of bounds.
403    ///
404    /// # Examples
405    ///
406    /// ```
407    /// use ndarray_stats::histogram::{Edges, Bins};
408    ///
409    /// let edges = Edges::from(vec![1, 5, 10, 20]);
410    /// let bins = Bins::new(edges);
411    /// assert_eq!(
412    ///     bins.index(1),
413    ///     5..10
414    /// );
415    /// ```
416    #[must_use]
417    pub fn index(&self, index: usize) -> Range<A>
418    where
419        A: Clone,
420    {
421        // It was not possible to implement this functionality
422        // using the `Index` trait unless we were willing to
423        // allocate a `Vec<Range<A>>` in the struct.
424        // Index, in fact, forces you to return a reference.
425        Range {
426            start: self.edges[index].clone(),
427            end: self.edges[index + 1].clone(),
428        }
429    }
430}
431
432#[cfg(test)]
433mod edges_tests {
434    use super::{Array1, Edges};
435    use quickcheck_macros::quickcheck;
436    use std::collections::BTreeSet;
437    use std::iter::FromIterator;
438
439    #[quickcheck]
440    fn check_sorted_from_vec(v: Vec<i32>) -> bool {
441        let edges = Edges::from(v);
442        let n = edges.len();
443        for i in 1..n {
444            if edges[i - 1] > edges[i] {
445                return false;
446            }
447        }
448        true
449    }
450
451    #[quickcheck]
452    fn check_sorted_from_array(v: Vec<i32>) -> bool {
453        let a = Array1::from(v);
454        let edges = Edges::from(a);
455        let n = edges.len();
456        for i in 1..n {
457            if edges[i - 1] > edges[i] {
458                return false;
459            }
460        }
461        true
462    }
463
464    #[quickcheck]
465    fn edges_are_right_open(v: Vec<i32>) -> bool {
466        let edges = Edges::from(v);
467        let view = edges.as_array_view();
468        if view.is_empty() {
469            true
470        } else {
471            let last = view[view.len() - 1];
472            edges.indices_of(&last).is_none()
473        }
474    }
475
476    #[quickcheck]
477    fn edges_are_left_closed(v: Vec<i32>) -> bool {
478        let edges = Edges::from(v);
479        if let 1 = edges.len() {
480            true
481        } else {
482            let view = edges.as_array_view();
483            if view.is_empty() {
484                true
485            } else {
486                let first = view[0];
487                edges.indices_of(&first).is_some()
488            }
489        }
490    }
491
492    #[quickcheck]
493    #[allow(clippy::needless_pass_by_value)]
494    fn edges_are_deduped(v: Vec<i32>) -> bool {
495        let unique_elements = BTreeSet::from_iter(v.iter());
496        let edges = Edges::from(v.clone());
497        let view = edges.as_array_view();
498        let unique_edges = BTreeSet::from_iter(view.iter());
499        unique_edges == unique_elements
500    }
501}
502
503#[cfg(test)]
504mod bins_tests {
505    use super::{Bins, Edges};
506
507    #[test]
508    #[should_panic]
509    #[allow(unused_must_use)]
510    fn get_panics_for_out_of_bounds_indexes() {
511        let edges = Edges::from(vec![0]);
512        let bins = Bins::new(edges);
513        // we need at least two edges to make a valid bin!
514        bins.index(0);
515    }
516}