ndarray_stats/histogram/grid.rs
1#![warn(missing_docs, clippy::all, clippy::pedantic)]
2
3use super::{bins::Bins, errors::BinsBuildError, strategies::BinsBuildingStrategy};
4use itertools::izip;
5use ndarray::{ArrayBase, Axis, Data, Ix1, Ix2};
6use std::ops::Range;
7
8/// An orthogonal partition of a rectangular region in an *n*-dimensional space, e.g.
9/// [*a*<sub>0</sub>, *b*<sub>0</sub>) × ⋯ × [*a*<sub>*n*−1</sub>, *b*<sub>*n*−1</sub>),
10/// represented as a collection of rectangular *n*-dimensional bins.
11///
12/// The grid is **solely determined by the Cartesian product of its projections** on each coordinate
13/// axis. Therefore, each element in the product set should correspond to a sub-region in the grid.
14///
15/// For example, this partition can be represented as a `Grid` struct:
16///
17/// ```text
18///
19/// g +---+-------+---+
20/// | 3 | 4 | 5 |
21/// f +---+-------+---+
22/// | | | |
23/// | 0 | 1 | 2 |
24/// | | | |
25/// e +---+-------+---+
26/// a b c d
27///
28/// R0: [a, b) × [e, f)
29/// R1: [b, c) × [e, f)
30/// R2: [c, d) × [e, f)
31/// R3: [a, b) × [f, g)
32/// R4: [b, d) × [f, g)
33/// R5: [c, d) × [f, g)
34/// Grid: { [a, b), [b, c), [c, d) } × { [e, f), [f, g) } == { R0, R1, R2, R3, R4, R5 }
35/// ```
36///
37/// while the next one can't:
38///
39/// ```text
40/// g +---+-----+---+
41/// | | 2 | 3 |
42/// (f) | +-----+---+
43/// | 0 | |
44/// | | 1 |
45/// | | |
46/// e +---+-----+---+
47/// a b c d
48///
49/// R0: [a, b) × [e, g)
50/// R1: [b, d) × [e, f)
51/// R2: [b, c) × [f, g)
52/// R3: [c, d) × [f, g)
53/// // 'f', as long as 'R1', 'R2', or 'R3', doesn't appear on LHS
54/// // [b, c) × [e, g), [c, d) × [e, g) doesn't appear on RHS
55/// Grid: { [a, b), [b, c), [c, d) } × { [e, g) } != { R0, R1, R2, R3 }
56/// ```
57///
58/// # Examples
59///
60/// Basic usage, building a `Grid` via [`GridBuilder`], with optimal grid layout determined by
61/// a given [`strategy`], and generating a [`histogram`]:
62///
63/// ```
64/// use ndarray::{Array, array};
65/// use ndarray_stats::{
66/// histogram::{strategies::Auto, Bins, Edges, Grid, GridBuilder},
67/// HistogramExt,
68/// };
69///
70/// // 1-dimensional observations, as a (n_observations, n_dimension) 2-d matrix
71/// let observations = Array::from_shape_vec(
72/// (12, 1),
73/// vec![1, 4, 5, 2, 100, 20, 50, 65, 27, 40, 45, 23],
74/// ).unwrap();
75///
76/// // The optimal grid layout is inferred from the data, given a chosen strategy, Auto in this case
77/// let grid = GridBuilder::<Auto<usize>>::from_array(&observations).unwrap().build();
78///
79/// let histogram = observations.histogram(grid);
80///
81/// let histogram_matrix = histogram.counts();
82/// // Bins are left-closed, right-open!
83/// let expected = array![4, 3, 3, 1, 0, 1];
84/// assert_eq!(histogram_matrix, expected.into_dyn());
85/// ```
86///
87/// [`histogram`]: trait.HistogramExt.html
88/// [`GridBuilder`]: struct.GridBuilder.html
89/// [`strategy`]: strategies/index.html
90#[derive(Clone, Debug, Eq, PartialEq)]
91pub struct Grid<A: Ord> {
92 projections: Vec<Bins<A>>,
93}
94
95impl<A: Ord> From<Vec<Bins<A>>> for Grid<A> {
96 /// Converts a `Vec<Bins<A>>` into a `Grid<A>`, consuming the vector of bins.
97 ///
98 /// The `i`-th element in `Vec<Bins<A>>` represents the projection of the bin grid onto the
99 /// `i`-th axis.
100 ///
101 /// Alternatively, a `Grid` can be built directly from data using a [`GridBuilder`].
102 ///
103 /// [`GridBuilder`]: struct.GridBuilder.html
104 fn from(projections: Vec<Bins<A>>) -> Self {
105 Grid { projections }
106 }
107}
108
109impl<A: Ord> Grid<A> {
110 /// Returns the number of dimensions of the region partitioned by the grid.
111 ///
112 /// # Examples
113 ///
114 /// ```
115 /// use ndarray_stats::histogram::{Edges, Bins, Grid};
116 ///
117 /// let edges = Edges::from(vec![0, 1]);
118 /// let bins = Bins::new(edges);
119 /// let square_grid = Grid::from(vec![bins.clone(), bins.clone()]);
120 ///
121 /// assert_eq!(square_grid.ndim(), 2usize)
122 /// ```
123 #[must_use]
124 pub fn ndim(&self) -> usize {
125 self.projections.len()
126 }
127
128 /// Returns the numbers of bins along each coordinate axis.
129 ///
130 /// # Examples
131 ///
132 /// ```
133 /// use ndarray_stats::histogram::{Edges, Bins, Grid};
134 ///
135 /// let edges_x = Edges::from(vec![0, 1]);
136 /// let edges_y = Edges::from(vec![-1, 0, 1]);
137 /// let bins_x = Bins::new(edges_x);
138 /// let bins_y = Bins::new(edges_y);
139 /// let square_grid = Grid::from(vec![bins_x, bins_y]);
140 ///
141 /// assert_eq!(square_grid.shape(), vec![1usize, 2usize]);
142 /// ```
143 #[must_use]
144 pub fn shape(&self) -> Vec<usize> {
145 self.projections.iter().map(Bins::len).collect()
146 }
147
148 /// Returns the grid projections on each coordinate axis as a slice of immutable references.
149 #[must_use]
150 pub fn projections(&self) -> &[Bins<A>] {
151 &self.projections
152 }
153
154 /// Returns an `n-dimensional` index, of bins along each axis that contains the point, if one
155 /// exists.
156 ///
157 /// Returns `None` if the point is outside the grid.
158 ///
159 /// # Panics
160 ///
161 /// Panics if dimensionality of the point doesn't equal the grid's.
162 ///
163 /// # Examples
164 ///
165 /// Basic usage:
166 ///
167 /// ```
168 /// use ndarray::array;
169 /// use ndarray_stats::histogram::{Edges, Bins, Grid};
170 /// use noisy_float::types::n64;
171 ///
172 /// let edges = Edges::from(vec![n64(-1.), n64(0.), n64(1.)]);
173 /// let bins = Bins::new(edges);
174 /// let square_grid = Grid::from(vec![bins.clone(), bins.clone()]);
175 ///
176 /// // (0., -0.7) falls in 1st and 0th bin respectively
177 /// assert_eq!(
178 /// square_grid.index_of(&array![n64(0.), n64(-0.7)]),
179 /// Some(vec![1, 0]),
180 /// );
181 /// // Returns `None`, as `1.` is outside the grid since bins are right-open
182 /// assert_eq!(
183 /// square_grid.index_of(&array![n64(0.), n64(1.)]),
184 /// None,
185 /// );
186 /// ```
187 ///
188 /// A panic upon dimensionality mismatch:
189 ///
190 /// ```should_panic
191 /// # use ndarray::array;
192 /// # use ndarray_stats::histogram::{Edges, Bins, Grid};
193 /// # use noisy_float::types::n64;
194 /// # let edges = Edges::from(vec![n64(-1.), n64(0.), n64(1.)]);
195 /// # let bins = Bins::new(edges);
196 /// # let square_grid = Grid::from(vec![bins.clone(), bins.clone()]);
197 /// // the point has 3 dimensions, the grid expected 2 dimensions
198 /// assert_eq!(
199 /// square_grid.index_of(&array![n64(0.), n64(-0.7), n64(0.5)]),
200 /// Some(vec![1, 0, 1]),
201 /// );
202 /// ```
203 pub fn index_of<S>(&self, point: &ArrayBase<S, Ix1>) -> Option<Vec<usize>>
204 where
205 S: Data<Elem = A>,
206 {
207 assert_eq!(
208 point.len(),
209 self.ndim(),
210 "Dimension mismatch: the point has {:?} dimensions, the grid \
211 expected {:?} dimensions.",
212 point.len(),
213 self.ndim()
214 );
215 point
216 .iter()
217 .zip(self.projections.iter())
218 .map(|(v, e)| e.index_of(v))
219 .collect()
220 }
221}
222
223impl<A: Ord + Clone> Grid<A> {
224 /// Given an `n`-dimensional index, `i = (i_0, ..., i_{n-1})`, returns an `n`-dimensional bin,
225 /// `I_{i_0} x ... x I_{i_{n-1}}`, where `I_{i_j}` is the `i_j`-th interval on the `j`-th
226 /// projection of the grid on the coordinate axes.
227 ///
228 /// # Panics
229 ///
230 /// Panics if at least one in the index, `(i_0, ..., i_{n-1})`, is out of bounds on the
231 /// corresponding coordinate axis, i.e. if there exists `j` s.t.
232 /// `i_j >= self.projections[j].len()`.
233 ///
234 /// # Examples
235 ///
236 /// Basic usage:
237 ///
238 /// ```
239 /// use ndarray::array;
240 /// use ndarray_stats::histogram::{Edges, Bins, Grid};
241 ///
242 /// let edges_x = Edges::from(vec![0, 1]);
243 /// let edges_y = Edges::from(vec![2, 3, 4]);
244 /// let bins_x = Bins::new(edges_x);
245 /// let bins_y = Bins::new(edges_y);
246 /// let square_grid = Grid::from(vec![bins_x, bins_y]);
247 ///
248 /// // Query the 0-th bin on x-axis, and 1-st bin on y-axis
249 /// assert_eq!(
250 /// square_grid.index(&[0, 1]),
251 /// vec![0..1, 3..4],
252 /// );
253 /// ```
254 ///
255 /// A panic upon out-of-bounds:
256 ///
257 /// ```should_panic
258 /// # use ndarray::array;
259 /// # use ndarray_stats::histogram::{Edges, Bins, Grid};
260 /// # let edges_x = Edges::from(vec![0, 1]);
261 /// # let edges_y = Edges::from(vec![2, 3, 4]);
262 /// # let bins_x = Bins::new(edges_x);
263 /// # let bins_y = Bins::new(edges_y);
264 /// # let square_grid = Grid::from(vec![bins_x, bins_y]);
265 /// // out-of-bound on y-axis
266 /// assert_eq!(
267 /// square_grid.index(&[0, 2]),
268 /// vec![0..1, 3..4],
269 /// );
270 /// ```
271 #[must_use]
272 pub fn index(&self, index: &[usize]) -> Vec<Range<A>> {
273 assert_eq!(
274 index.len(),
275 self.ndim(),
276 "Dimension mismatch: the index has {0:?} dimensions, the grid \
277 expected {1:?} dimensions.",
278 index.len(),
279 self.ndim()
280 );
281 izip!(&self.projections, index)
282 .map(|(bins, &i)| bins.index(i))
283 .collect()
284 }
285}
286
287/// A builder used to create [`Grid`] instances for [`histogram`] computations.
288///
289/// # Examples
290///
291/// Basic usage, creating a `Grid` with some observations and a given [`strategy`]:
292///
293/// ```
294/// use ndarray::Array;
295/// use ndarray_stats::histogram::{strategies::Auto, Bins, Edges, Grid, GridBuilder};
296///
297/// // 1-dimensional observations, as a (n_observations, n_dimension) 2-d matrix
298/// let observations = Array::from_shape_vec(
299/// (12, 1),
300/// vec![1, 4, 5, 2, 100, 20, 50, 65, 27, 40, 45, 23],
301/// ).unwrap();
302///
303/// // The optimal grid layout is inferred from the data, given a chosen strategy, Auto in this case
304/// let grid = GridBuilder::<Auto<usize>>::from_array(&observations).unwrap().build();
305/// // Equivalently, build a Grid directly
306/// let expected_grid = Grid::from(vec![Bins::new(Edges::from(vec![1, 20, 39, 58, 77, 96, 115]))]);
307///
308/// assert_eq!(grid, expected_grid);
309/// ```
310///
311/// [`Grid`]: struct.Grid.html
312/// [`histogram`]: trait.HistogramExt.html
313/// [`strategy`]: strategies/index.html
314#[allow(clippy::module_name_repetitions)]
315pub struct GridBuilder<B: BinsBuildingStrategy> {
316 bin_builders: Vec<B>,
317}
318
319impl<A, B> GridBuilder<B>
320where
321 A: Ord,
322 B: BinsBuildingStrategy<Elem = A>,
323{
324 /// Returns a `GridBuilder` for building a [`Grid`] with a given [`strategy`] and some
325 /// observations in a 2-dimensionalarray with shape `(n_observations, n_dimension)`.
326 ///
327 /// # Errors
328 ///
329 /// It returns [`BinsBuildError`] if it is not possible to build a [`Grid`] given
330 /// the observed data according to the chosen [`strategy`].
331 ///
332 /// # Examples
333 ///
334 /// See [Trait-level examples] for basic usage.
335 ///
336 /// [`Grid`]: struct.Grid.html
337 /// [`strategy`]: strategies/index.html
338 /// [`BinsBuildError`]: errors/enum.BinsBuildError.html
339 /// [Trait-level examples]: struct.GridBuilder.html#examples
340 pub fn from_array<S>(array: &ArrayBase<S, Ix2>) -> Result<Self, BinsBuildError>
341 where
342 S: Data<Elem = A>,
343 {
344 let bin_builders = array
345 .axis_iter(Axis(1))
346 .map(|data| B::from_array(&data))
347 .collect::<Result<Vec<B>, BinsBuildError>>()?;
348 Ok(Self { bin_builders })
349 }
350
351 /// Returns a [`Grid`] instance, with building parameters infered in [`from_array`], according
352 /// to the specified [`strategy`] and observations provided.
353 ///
354 /// # Examples
355 ///
356 /// See [Trait-level examples] for basic usage.
357 ///
358 /// [`Grid`]: struct.Grid.html
359 /// [`strategy`]: strategies/index.html
360 /// [`from_array`]: #method.from_array.html
361 #[must_use]
362 pub fn build(&self) -> Grid<A> {
363 let projections: Vec<_> = self.bin_builders.iter().map(|b| b.build()).collect();
364 Grid::from(projections)
365 }
366}