h3ron_ndarray/
array.rs

1use std::cmp::min;
2use std::hash::Hash;
3
4use geo_types::{Coord, Rect};
5use log::debug;
6use ndarray::{ArrayView2, Axis};
7use rayon::prelude::*;
8
9use h3ron::collections::HashMap;
10use h3ron::{collections::CompactedCellVec, ToCoordinate, ToH3Cells};
11
12use crate::resolution::{nearest_h3_resolution, ResolutionSearchMode};
13use crate::{error::Error, transform::Transform};
14
15/// The order of the axis in the two-dimensional array
16#[derive(Copy, Clone)]
17#[allow(clippy::upper_case_acronyms)]
18pub enum AxisOrder {
19    /// `X,Y` ordering
20    XY,
21
22    /// `Y,X` ordering
23    ///
24    /// This is the order used by [github.com/georust/gdal](https://github.com/georust/gdal) (behind the `ndarray` feature gate)
25    YX,
26}
27
28impl AxisOrder {
29    pub const fn x_axis(&self) -> usize {
30        match self {
31            Self::XY => 0,
32            Self::YX => 1,
33        }
34    }
35
36    pub const fn y_axis(&self) -> usize {
37        match self {
38            Self::XY => 1,
39            Self::YX => 0,
40        }
41    }
42}
43
44fn find_continuous_chunks_along_axis<T>(
45    a: &ArrayView2<T>,
46    axis: usize,
47    nodata_value: &T,
48) -> Vec<(usize, usize)>
49where
50    T: Sized + PartialEq,
51{
52    let mut chunks = Vec::new();
53    let mut current_chunk_start: Option<usize> = None;
54
55    for (r0pos, r0) in a.axis_iter(Axis(axis)).enumerate() {
56        if r0.iter().any(|v| v != nodata_value) {
57            if current_chunk_start.is_none() {
58                current_chunk_start = Some(r0pos);
59            }
60        } else if let Some(begin) = current_chunk_start {
61            chunks.push((begin, r0pos - 1));
62            current_chunk_start = None;
63        }
64    }
65    if let Some(begin) = current_chunk_start {
66        chunks.push((begin, a.shape()[axis] - 1));
67    }
68    chunks
69}
70
71/// Find all boxes in the array where there are any values except the `nodata_value`
72///
73/// This implementation is far from perfect and often recognizes multiple smaller
74/// clusters as one as its based on completely empty columns and rows, but it is probably
75/// sufficient for the purpose to reduce the number of hexagons
76/// to be generated when dealing with fragmented/sparse datasets.
77fn find_boxes_containing_data<T>(
78    a: &ArrayView2<T>,
79    nodata_value: &T,
80    axis_order: &AxisOrder,
81) -> Vec<Rect<usize>>
82where
83    T: Sized + PartialEq,
84{
85    find_continuous_chunks_along_axis(a, axis_order.x_axis(), nodata_value)
86        .into_iter()
87        .flat_map(|chunk_x_raw_indexes| {
88            let sv = {
89                let x_raw_range = chunk_x_raw_indexes.0..=chunk_x_raw_indexes.1;
90                match axis_order {
91                    AxisOrder::XY => a.slice(s![x_raw_range, ..]),
92                    AxisOrder::YX => a.slice(s![.., x_raw_range]),
93                }
94            };
95            find_continuous_chunks_along_axis(&sv, axis_order.y_axis(), nodata_value)
96                .into_iter()
97                .flat_map(move |chunks_y_raw_indexes| {
98                    let sv2 = {
99                        let x_raw_range = 0..=(chunk_x_raw_indexes.1 - chunk_x_raw_indexes.0);
100                        let y_raw_range = chunks_y_raw_indexes.0..=chunks_y_raw_indexes.1;
101                        match axis_order {
102                            AxisOrder::XY => sv.slice(s![x_raw_range, y_raw_range]),
103                            AxisOrder::YX => sv.slice(s![y_raw_range, x_raw_range]),
104                        }
105                    };
106
107                    // one more iteration along axis 0 to get the specific range for that axis 1 range
108                    find_continuous_chunks_along_axis(&sv2, axis_order.x_axis(), nodata_value)
109                        .into_iter()
110                        .map(move |chunks_x_indexes| {
111                            Rect::new(
112                                Coord {
113                                    x: chunks_x_indexes.0 + chunk_x_raw_indexes.0,
114                                    y: chunks_y_raw_indexes.0,
115                                },
116                                Coord {
117                                    x: chunks_x_indexes.1 + chunk_x_raw_indexes.0,
118                                    y: chunks_y_raw_indexes.1,
119                                },
120                            )
121                        })
122                })
123        })
124        .collect::<Vec<_>>()
125}
126
127/// convert a 2-d ndarray to h3
128pub struct H3Converter<'a, T>
129where
130    T: Sized + PartialEq + Sync + Eq + Hash,
131{
132    arr: &'a ArrayView2<'a, T>,
133    nodata_value: &'a Option<T>,
134    transform: &'a Transform,
135    axis_order: AxisOrder,
136}
137
138impl<'a, T> H3Converter<'a, T>
139where
140    T: Sized + PartialEq + Sync + Eq + Hash,
141{
142    pub fn new(
143        arr: &'a ArrayView2<'a, T>,
144        nodata_value: &'a Option<T>,
145        transform: &'a Transform,
146        axis_order: AxisOrder,
147    ) -> Self {
148        Self {
149            arr,
150            nodata_value,
151            transform,
152            axis_order,
153        }
154    }
155
156    /// find the h3 resolution closest to the size of a pixel in an array
157    pub fn nearest_h3_resolution(&self, search_mode: ResolutionSearchMode) -> Result<u8, Error> {
158        nearest_h3_resolution(
159            self.arr.shape(),
160            self.transform,
161            &self.axis_order,
162            search_mode,
163        )
164    }
165
166    fn rects_with_data_with_nodata(&self, rect_size: usize, nodata: &T) -> Vec<Rect<f64>> {
167        self.arr
168            .axis_chunks_iter(Axis(self.axis_order.x_axis()), rect_size)
169            .into_par_iter() // requires T to be Sync
170            .enumerate()
171            .map(|(axis_x_chunk_i, axis_x_chunk)| {
172                let mut rects = Vec::new();
173                for chunk_x_rect in
174                    find_boxes_containing_data(&axis_x_chunk, nodata, &self.axis_order)
175                {
176                    let offset_x = (axis_x_chunk_i * rect_size) + chunk_x_rect.min().x;
177                    let chunk_rect_view = {
178                        let x_range = chunk_x_rect.min().x..chunk_x_rect.max().x;
179                        let y_range = chunk_x_rect.min().y..chunk_x_rect.max().y;
180                        match self.axis_order {
181                            AxisOrder::XY => axis_x_chunk.slice(s![x_range, y_range]),
182                            AxisOrder::YX => axis_x_chunk.slice(s![y_range, x_range]),
183                        }
184                    };
185                    rects.extend(
186                        chunk_rect_view
187                            .axis_chunks_iter(Axis(self.axis_order.y_axis()), rect_size)
188                            .enumerate()
189                            .map(|(axis_y_chunk_i, axis_y_chunk)| {
190                                let offset_y = (axis_y_chunk_i * rect_size) + chunk_x_rect.min().y;
191
192                                // the window in array coordinates
193                                Rect::new(
194                                    Coord {
195                                        x: offset_x as f64,
196                                        y: offset_y as f64,
197                                    },
198                                    // add 1 to the max coordinate to include the whole last pixel
199                                    Coord {
200                                        x: (offset_x
201                                            + axis_y_chunk.shape()[self.axis_order.x_axis()]
202                                            + 1) as f64,
203                                        y: (offset_y
204                                            + axis_y_chunk.shape()[self.axis_order.y_axis()]
205                                            + 1) as f64,
206                                    },
207                                )
208                            }),
209                    )
210                }
211                rects
212            })
213            .flatten()
214            .collect()
215    }
216
217    fn rects_with_data_without_nodata(&self, rect_size: usize) -> Vec<Rect<f64>> {
218        // just create tiles covering the complete array
219        let x_size = self.arr.shape()[self.axis_order.x_axis()];
220        let y_size = self.arr.shape()[self.axis_order.y_axis()];
221        (0..((x_size as f64 / rect_size as f64).ceil() as usize))
222            .flat_map(move |r_x| {
223                (0..((y_size as f64 / rect_size as f64).ceil() as usize)).map(move |r_y| {
224                    Rect::new(
225                        Coord {
226                            x: (r_x * rect_size) as f64,
227                            y: (r_y * rect_size) as f64,
228                        },
229                        Coord {
230                            x: (min(x_size, (r_x + 1) * rect_size)) as f64,
231                            y: (min(y_size, (r_y + 1) * rect_size)) as f64,
232                        },
233                    )
234                })
235            })
236            .collect()
237    }
238
239    fn rects_with_data(&self, rect_size: usize) -> Vec<Rect<f64>> {
240        self.nodata_value.as_ref().map_or_else(
241            || self.rects_with_data_without_nodata(rect_size),
242            |nodata| self.rects_with_data_with_nodata(rect_size, nodata),
243        )
244    }
245
246    pub fn to_h3(
247        &self,
248        h3_resolution: u8,
249        compact: bool,
250    ) -> Result<HashMap<&'a T, CompactedCellVec>, Error> {
251        let inverse_transform = self.transform.invert()?;
252
253        let rect_size = (self.arr.shape()[self.axis_order.x_axis()] / 10).clamp(10, 100);
254        let rects = self.rects_with_data(rect_size);
255        let n_rects = rects.len();
256        debug!(
257            "to_h3: found {} rects containing non-nodata values",
258            n_rects
259        );
260
261        let chunk_h3_maps = rects
262            .into_par_iter()
263            .enumerate()
264            .map(|(array_window_i, array_window)| {
265                debug!(
266                    "to_h3: rect {}/{} with size {} x {}",
267                    array_window_i,
268                    n_rects,
269                    array_window.width(),
270                    array_window.height()
271                );
272
273                // the window in geographical coordinates
274                let window_box = self.transform * &array_window;
275
276                convert_array_window(
277                    self.arr,
278                    window_box,
279                    &inverse_transform,
280                    self.axis_order,
281                    self.nodata_value,
282                    h3_resolution,
283                    compact,
284                )
285            })
286            .collect::<Result<Vec<_>, _>>()?;
287
288        // combine the results from all chunks
289        let mut h3_map = HashMap::default();
290        for chunk_h3_map in chunk_h3_maps.into_iter() {
291            for (value, mut compacted_vec) in chunk_h3_map {
292                h3_map
293                    .entry(value)
294                    .or_insert_with(CompactedCellVec::new)
295                    .append(&mut compacted_vec, false)?;
296            }
297        }
298
299        finalize_chunk_map(h3_map, compact)
300    }
301}
302
303fn convert_array_window<'a, T>(
304    arr: &'a ArrayView2<'a, T>,
305    window_box: Rect<f64>,
306    inverse_transform: &Transform,
307    axis_order: AxisOrder,
308    nodata_value: &Option<T>,
309    h3_resolution: u8,
310    compact: bool,
311) -> Result<HashMap<&'a T, CompactedCellVec>, Error>
312where
313    T: Sized + PartialEq + Sync + Eq + Hash,
314{
315    let mut chunk_h3_map = HashMap::<&T, CompactedCellVec>::default();
316    for cell in window_box.to_h3_cells(h3_resolution)?.iter() {
317        // find the array element for the coordinate of the h3ron index
318        let arr_coord = {
319            let transformed = inverse_transform * cell.to_coordinate()?;
320
321            match axis_order {
322                AxisOrder::XY => [
323                    transformed.x.floor() as usize,
324                    transformed.y.floor() as usize,
325                ],
326                AxisOrder::YX => [
327                    transformed.y.floor() as usize,
328                    transformed.x.floor() as usize,
329                ],
330            }
331        };
332        if let Some(value) = arr.get(arr_coord) {
333            if let Some(nodata) = nodata_value {
334                if nodata == value {
335                    continue;
336                }
337            }
338            chunk_h3_map
339                .entry(value)
340                .or_insert_with(CompactedCellVec::new)
341                .add_cell(cell, false)?;
342        }
343    }
344
345    // do an early compacting to free a bit of memory
346    finalize_chunk_map(chunk_h3_map, compact)
347}
348
349fn finalize_chunk_map<T>(
350    chunk_map: HashMap<&T, CompactedCellVec>,
351    compact: bool,
352) -> Result<HashMap<&T, CompactedCellVec>, Error>
353where
354    T: Sync + Eq + Hash,
355{
356    chunk_map
357        .into_par_iter()
358        .map(|(k, mut compact_vec)| {
359            if compact {
360                compact_vec.compact().map_err(Error::from)
361            } else {
362                compact_vec.dedup().map_err(Error::from)
363            }
364            .map(|_| {
365                compact_vec.shrink_to_fit();
366                (k, compact_vec)
367            })
368        })
369        .collect()
370}
371
372#[cfg(test)]
373mod tests {
374    use crate::array::find_boxes_containing_data;
375    use crate::{AxisOrder, H3Converter, ResolutionSearchMode, Transform};
376
377    #[test]
378    fn test_find_boxes_containing_data() {
379        let arr = array![
380            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
381            [0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
382            [0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0],
383            [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
384            [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
385            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
386            [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
387            [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1],
388            [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1],
389        ];
390        let mut arr_copy = arr.clone();
391
392        let n_elements = arr_copy.shape()[0] * arr_copy.shape()[1];
393        let mut n_elements_in_boxes = 0;
394
395        for rect in find_boxes_containing_data(&arr.view(), &0, &AxisOrder::YX) {
396            n_elements_in_boxes +=
397                (rect.max().x - rect.min().x + 1) * (rect.max().y - rect.min().y + 1);
398
399            for x in rect.min().x..=rect.max().x {
400                for y in rect.min().y..=rect.max().y {
401                    arr_copy[(y, x)] = 0;
402                }
403            }
404        }
405
406        // there should be far less indexes to visit now
407        assert!(n_elements_in_boxes < (n_elements / 2));
408
409        // all elements should have been removed
410        assert_eq!(arr_copy.sum(), 0);
411    }
412
413    #[test]
414    fn preserve_nan_values() {
415        use ordered_float::OrderedFloat;
416        #[rustfmt::skip]
417        let arr = array![
418            [OrderedFloat(f32::NAN), OrderedFloat(1.0_f32)],
419            [OrderedFloat(f32::NAN), OrderedFloat(1.0_f32)],
420        ];
421        let transform = Transform::from_gdal(&[11.0, 1.0, 0.0, 10.0, 1.2, 0.2]);
422
423        let view = arr.view();
424        let converter = H3Converter::new(&view, &None, &transform, AxisOrder::XY);
425        let h3_resolution = converter
426            .nearest_h3_resolution(ResolutionSearchMode::SmallerThanPixel)
427            .unwrap();
428        let cell_map = converter.to_h3(h3_resolution, false).unwrap();
429        assert!(cell_map.contains_key(&OrderedFloat(f32::NAN)));
430        assert!(cell_map.contains_key(&OrderedFloat(1.0_f32)));
431    }
432}