kn0sys_ndarray_stats/histogram/bins.rs
1#![warn(missing_docs, clippy::all, clippy::pedantic)]
2
3use ndarray::prelude::*;
4use std::ops::{Index, Range};
5
6/// A sorted collection of type `A` elements used to represent the boundaries of intervals, i.e.
7/// [`Bins`] on a 1-dimensional axis.
8///
9/// **Note** that all intervals are left-closed and right-open. See examples below.
10///
11/// # Examples
12///
13/// ```
14/// use kn0sys_ndarray_stats::histogram::{Bins, Edges};
15/// use noisy_float::types::n64;
16///
17/// let unit_edges = Edges::from(vec![n64(0.), n64(1.)]);
18/// let unit_interval = Bins::new(unit_edges);
19/// // left-closed
20/// assert_eq!(
21/// unit_interval.range_of(&n64(0.)).unwrap(),
22/// n64(0.)..n64(1.),
23/// );
24/// // right-open
25/// assert_eq!(
26/// unit_interval.range_of(&n64(1.)),
27/// None
28/// );
29/// ```
30///
31/// [`Bins`]: struct.Bins.html
32#[derive(Clone, Debug, Eq, PartialEq)]
33pub struct Edges<A: Ord> {
34 edges: Vec<A>,
35}
36
37impl<A: Ord> From<Vec<A>> for Edges<A> {
38 /// Converts a `Vec<A>` into an `Edges<A>`, consuming the edges.
39 /// The vector will be sorted in increasing order using an unstable sorting algorithm, with
40 /// duplicates removed.
41 ///
42 /// # Current implementation
43 ///
44 /// The current sorting algorithm is the same as [`std::slice::sort_unstable()`][sort],
45 /// which is based on [pattern-defeating quicksort][pdqsort].
46 ///
47 /// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not allocate)
48 /// , and O(n log n) worst-case.
49 ///
50 /// # Examples
51 ///
52 /// ```
53 /// use ndarray::array;
54 /// use kn0sys_ndarray_stats::histogram::Edges;
55 ///
56 /// let edges = Edges::from(array![1, 15, 10, 10, 20]);
57 /// // The array gets sorted!
58 /// assert_eq!(
59 /// edges[2],
60 /// 15
61 /// );
62 /// ```
63 ///
64 /// [sort]: https://doc.rust-lang.org/stable/std/primitive.slice.html#method.sort_unstable
65 /// [pdqsort]: https://github.com/orlp/pdqsort
66 fn from(mut edges: Vec<A>) -> Self {
67 // sort the array in-place
68 edges.sort_unstable();
69 // remove duplicates
70 edges.dedup();
71 Edges { edges }
72 }
73}
74
75impl<A: Ord + Clone> From<Array1<A>> for Edges<A> {
76 /// Converts an `Array1<A>` into an `Edges<A>`, consuming the 1-dimensional array.
77 /// The array will be sorted in increasing order using an unstable sorting algorithm, with
78 /// duplicates removed.
79 ///
80 /// # Current implementation
81 ///
82 /// The current sorting algorithm is the same as [`std::slice::sort_unstable()`][sort],
83 /// which is based on [pattern-defeating quicksort][pdqsort].
84 ///
85 /// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not allocate)
86 /// , and O(n log n) worst-case.
87 ///
88 /// # Examples
89 ///
90 /// ```
91 /// use kn0sys_ndarray_stats::histogram::Edges;
92 ///
93 /// let edges = Edges::from(vec![1, 15, 10, 20]);
94 /// // The vec gets sorted!
95 /// assert_eq!(
96 /// edges[1],
97 /// 10
98 /// );
99 /// ```
100 ///
101 /// [sort]: https://doc.rust-lang.org/stable/std/primitive.slice.html#method.sort_unstable
102 /// [pdqsort]: https://github.com/orlp/pdqsort
103 fn from(edges: Array1<A>) -> Self {
104 let edges = edges.to_vec();
105 Self::from(edges)
106 }
107}
108
109impl<A: Ord> Index<usize> for Edges<A> {
110 type Output = A;
111
112 /// Returns a reference to the `i`-th edge in `self`.
113 ///
114 /// # Panics
115 ///
116 /// Panics if the index `i` is out of bounds.
117 ///
118 /// # Examples
119 ///
120 /// ```
121 /// use kn0sys_ndarray_stats::histogram::Edges;
122 ///
123 /// let edges = Edges::from(vec![1, 5, 10, 20]);
124 /// assert_eq!(
125 /// edges[1],
126 /// 5
127 /// );
128 /// ```
129 fn index(&self, i: usize) -> &Self::Output {
130 &self.edges[i]
131 }
132}
133
134impl<A: Ord> Edges<A> {
135 /// Returns the number of edges in `self`.
136 ///
137 /// # Examples
138 ///
139 /// ```
140 /// use kn0sys_ndarray_stats::histogram::Edges;
141 /// use noisy_float::types::n64;
142 ///
143 /// let edges = Edges::from(vec![n64(0.), n64(1.), n64(3.)]);
144 /// assert_eq!(
145 /// edges.len(),
146 /// 3
147 /// );
148 /// ```
149 #[must_use]
150 pub fn len(&self) -> usize {
151 self.edges.len()
152 }
153
154 /// Returns `true` if `self` contains no edges.
155 ///
156 /// # Examples
157 ///
158 /// ```
159 /// use kn0sys_ndarray_stats::histogram::Edges;
160 /// use noisy_float::types::{N64, n64};
161 ///
162 /// let edges = Edges::<N64>::from(vec![]);
163 /// assert_eq!(edges.is_empty(), true);
164 ///
165 /// let edges = Edges::from(vec![n64(0.), n64(2.), n64(5.)]);
166 /// assert_eq!(edges.is_empty(), false);
167 /// ```
168 #[must_use]
169 pub fn is_empty(&self) -> bool {
170 self.edges.is_empty()
171 }
172
173 /// Returns an immutable 1-dimensional array view of edges.
174 ///
175 /// # Examples
176 ///
177 /// ```
178 /// use ndarray::array;
179 /// use kn0sys_ndarray_stats::histogram::Edges;
180 ///
181 /// let edges = Edges::from(vec![0, 5, 3]);
182 /// assert_eq!(
183 /// edges.as_array_view(),
184 /// array![0, 3, 5].view()
185 /// );
186 /// ```
187 #[must_use]
188 pub fn as_array_view(&self) -> ArrayView1<'_, A> {
189 ArrayView1::from(&self.edges)
190 }
191
192 /// Returns indices of two consecutive `edges` in `self`, if the interval they represent
193 /// contains the given `value`, or returns `None` otherwise.
194 ///
195 /// That is to say, it returns
196 /// - `Some((left, right))`, where `left` and `right` are the indices of two consecutive edges
197 /// in `self` and `right == left + 1`, if `self[left] <= value < self[right]`;
198 /// - `None`, otherwise.
199 ///
200 /// # Examples
201 ///
202 /// ```
203 /// use kn0sys_ndarray_stats::histogram::Edges;
204 ///
205 /// let edges = Edges::from(vec![0, 2, 3]);
206 /// // `1` is in the interval [0, 2), whose indices are (0, 1)
207 /// assert_eq!(
208 /// edges.indices_of(&1),
209 /// Some((0, 1))
210 /// );
211 /// // `5` is not in any of intervals
212 /// assert_eq!(
213 /// edges.indices_of(&5),
214 /// None
215 /// );
216 /// ```
217 pub fn indices_of(&self, value: &A) -> Option<(usize, usize)> {
218 // binary search for the correct bin
219 let n_edges = self.len();
220 match self.edges.binary_search(value) {
221 Ok(i) if i == n_edges - 1 => None,
222 Ok(i) => Some((i, i + 1)),
223 Err(i) => match i {
224 0 => None,
225 j if j == n_edges => None,
226 j => Some((j - 1, j)),
227 },
228 }
229 }
230
231 /// Returns an iterator over the `edges` in `self`.
232 pub fn iter(&self) -> impl Iterator<Item = &A> {
233 self.edges.iter()
234 }
235}
236
237/// A sorted collection of non-overlapping 1-dimensional intervals.
238///
239/// **Note** that all intervals are left-closed and right-open.
240///
241/// # Examples
242///
243/// ```
244/// use kn0sys_ndarray_stats::histogram::{Edges, Bins};
245/// use noisy_float::types::n64;
246///
247/// let edges = Edges::from(vec![n64(0.), n64(1.), n64(2.)]);
248/// let bins = Bins::new(edges);
249/// // first bin
250/// assert_eq!(
251/// bins.index(0),
252/// n64(0.)..n64(1.) // n64(1.) is not included in the bin!
253/// );
254/// // second bin
255/// assert_eq!(
256/// bins.index(1),
257/// n64(1.)..n64(2.)
258/// );
259/// ```
260#[derive(Clone, Debug, Eq, PartialEq)]
261pub struct Bins<A: Ord> {
262 edges: Edges<A>,
263}
264
265impl<A: Ord> Bins<A> {
266 /// Returns a `Bins` instance where each bin corresponds to two consecutive members of the given
267 /// [`Edges`], consuming the edges.
268 ///
269 /// [`Edges`]: struct.Edges.html
270 #[must_use]
271 pub fn new(edges: Edges<A>) -> Self {
272 Bins { edges }
273 }
274
275 /// Returns the number of bins in `self`.
276 ///
277 /// # Examples
278 ///
279 /// ```
280 /// use kn0sys_ndarray_stats::histogram::{Edges, Bins};
281 /// use noisy_float::types::n64;
282 ///
283 /// let edges = Edges::from(vec![n64(0.), n64(1.), n64(2.)]);
284 /// let bins = Bins::new(edges);
285 /// assert_eq!(
286 /// bins.len(),
287 /// 2
288 /// );
289 /// ```
290 #[must_use]
291 pub fn len(&self) -> usize {
292 match self.edges.len() {
293 0 => 0,
294 n => n - 1,
295 }
296 }
297
298 /// Returns `true` if the number of bins is zero, i.e. if the number of edges is 0 or 1.
299 ///
300 /// # Examples
301 ///
302 /// ```
303 /// use kn0sys_ndarray_stats::histogram::{Edges, Bins};
304 /// use noisy_float::types::{N64, n64};
305 ///
306 /// // At least 2 edges is needed to represent 1 interval
307 /// let edges = Edges::from(vec![n64(0.), n64(1.), n64(3.)]);
308 /// let bins = Bins::new(edges);
309 /// assert_eq!(bins.is_empty(), false);
310 ///
311 /// // No valid interval == Empty
312 /// let edges = Edges::<N64>::from(vec![]);
313 /// let bins = Bins::new(edges);
314 /// assert_eq!(bins.is_empty(), true);
315 /// let edges = Edges::from(vec![n64(0.)]);
316 /// let bins = Bins::new(edges);
317 /// assert_eq!(bins.is_empty(), true);
318 /// ```
319 #[must_use]
320 pub fn is_empty(&self) -> bool {
321 self.len() == 0
322 }
323
324 /// Returns the index of the bin in `self` that contains the given `value`,
325 /// or returns `None` if `value` does not belong to any bins in `self`.
326 ///
327 /// # Examples
328 ///
329 /// Basic usage:
330 ///
331 /// ```
332 /// use kn0sys_ndarray_stats::histogram::{Edges, Bins};
333 ///
334 /// let edges = Edges::from(vec![0, 2, 4, 6]);
335 /// let bins = Bins::new(edges);
336 /// let value = 1;
337 /// // The first bin [0, 2) contains `1`
338 /// assert_eq!(
339 /// bins.index_of(&1),
340 /// Some(0)
341 /// );
342 /// // No bin contains 100
343 /// assert_eq!(
344 /// bins.index_of(&100),
345 /// None
346 /// )
347 /// ```
348 ///
349 /// Chaining [`Bins::index`] and [`Bins::index_of`] to get the boundaries of the bin containing
350 /// the value:
351 ///
352 /// ```
353 /// # use kn0sys_ndarray_stats::histogram::{Edges, Bins};
354 /// # let edges = Edges::from(vec![0, 2, 4, 6]);
355 /// # let bins = Bins::new(edges);
356 /// # let value = 1;
357 /// assert_eq!(
358 /// // using `Option::map` to avoid panic on index out-of-bounds
359 /// bins.index_of(&1).map(|i| bins.index(i)),
360 /// Some(0..2)
361 /// );
362 /// ```
363 pub fn index_of(&self, value: &A) -> Option<usize> {
364 self.edges.indices_of(value).map(|t| t.0)
365 }
366
367 /// Returns a range as the bin which contains the given `value`, or returns `None` otherwise.
368 ///
369 /// # Examples
370 ///
371 /// ```
372 /// use kn0sys_ndarray_stats::histogram::{Edges, Bins};
373 ///
374 /// let edges = Edges::from(vec![0, 2, 4, 6]);
375 /// let bins = Bins::new(edges);
376 /// // [0, 2) contains `1`
377 /// assert_eq!(
378 /// bins.range_of(&1),
379 /// Some(0..2)
380 /// );
381 /// // `10` is not in any interval
382 /// assert_eq!(
383 /// bins.range_of(&10),
384 /// None
385 /// );
386 /// ```
387 pub fn range_of(&self, value: &A) -> Option<Range<A>>
388 where
389 A: Clone,
390 {
391 let edges_indexes = self.edges.indices_of(value);
392 edges_indexes.map(|(left, right)| Range {
393 start: self.edges[left].clone(),
394 end: self.edges[right].clone(),
395 })
396 }
397
398 /// Returns a range as the bin at the given `index` position.
399 ///
400 /// # Panics
401 ///
402 /// Panics if `index` is out of bounds.
403 ///
404 /// # Examples
405 ///
406 /// ```
407 /// use kn0sys_ndarray_stats::histogram::{Edges, Bins};
408 ///
409 /// let edges = Edges::from(vec![1, 5, 10, 20]);
410 /// let bins = Bins::new(edges);
411 /// assert_eq!(
412 /// bins.index(1),
413 /// 5..10
414 /// );
415 /// ```
416 #[must_use]
417 pub fn index(&self, index: usize) -> Range<A>
418 where
419 A: Clone,
420 {
421 // It was not possible to implement this functionality
422 // using the `Index` trait unless we were willing to
423 // allocate a `Vec<Range<A>>` in the struct.
424 // Index, in fact, forces you to return a reference.
425 Range {
426 start: self.edges[index].clone(),
427 end: self.edges[index + 1].clone(),
428 }
429 }
430}
431
432#[cfg(test)]
433mod edges_tests {
434 use super::{Array1, Edges};
435 use std::collections::BTreeSet;
436 use std::iter::FromIterator;
437
438 fn check_sorted_from_vec(v: Vec<i32>) -> bool {
439 let edges = Edges::from(v);
440 let n = edges.len();
441 for i in 1..n {
442 if edges[i - 1] > edges[i] {
443 return false;
444 }
445 }
446 true
447 }
448
449 fn check_sorted_from_array(v: Vec<i32>) -> bool {
450 let a = Array1::from(v);
451 let edges = Edges::from(a);
452 let n = edges.len();
453 for i in 1..n {
454 if edges[i - 1] > edges[i] {
455 return false;
456 }
457 }
458 true
459 }
460
461 fn edges_are_right_open(v: Vec<i32>) -> bool {
462 let edges = Edges::from(v);
463 let view = edges.as_array_view();
464 if view.is_empty() {
465 true
466 } else {
467 let last = view[view.len() - 1];
468 edges.indices_of(&last).is_none()
469 }
470 }
471
472 fn edges_are_left_closed(v: Vec<i32>) -> bool {
473 let edges = Edges::from(v);
474 if let 1 = edges.len() {
475 true
476 } else {
477 let view = edges.as_array_view();
478 if view.is_empty() {
479 true
480 } else {
481 let first = view[0];
482 edges.indices_of(&first).is_some()
483 }
484 }
485 }
486
487 #[allow(clippy::needless_pass_by_value)]
488 fn edges_are_deduped(v: Vec<i32>) -> bool {
489 let unique_elements = BTreeSet::from_iter(v.iter());
490 let edges = Edges::from(v.clone());
491 let view = edges.as_array_view();
492 let unique_edges = BTreeSet::from_iter(view.iter());
493 unique_edges == unique_elements
494 }
495}
496
497#[cfg(test)]
498mod bins_tests {
499 use super::{Bins, Edges};
500
501 #[test]
502 #[should_panic]
503 #[allow(unused_must_use)]
504 fn get_panics_for_out_of_bounds_indexes() {
505 let edges = Edges::from(vec![0]);
506 let bins = Bins::new(edges);
507 // we need at least two edges to make a valid bin!
508 bins.index(0);
509 }
510}