h3ron_polars/algorithm/chunkedarray/
cell_clusters.rs

1use crate::{Error, FromIndexIterator, IndexChunked};
2use h3ron::algorithm::{find_cell_clusters, find_cell_clusters_eq_value};
3use h3ron::H3Cell;
4use polars_core::frame::DataFrame;
5use polars_core::prelude::{NamedFrom, Series};
6
7pub trait H3CellClusters {
8    /// find clusters of neighboring cells
9    ///
10    /// Returns a new dataframe with two columns:
11    /// * `cluster_num`: artificial id (u32) for the cluster.
12    /// * `cells`: list of cells in the cluster
13    ///
14    fn h3_cell_clusters(&self) -> Result<DataFrame, Error>;
15
16    /// find clusters of neighboring cells where the same value is associated with the cells.
17    ///
18    /// The `self` and `values` should have the same length. Any excess in either of them
19    /// will be ignored.
20    ///
21    /// Cells are assumed to be unique, otherwise the behaviour is undefined.
22    ///
23    /// Returns a new dataframe with three columns:
24    /// * `cluster_num`: artificial id (u32) for the cluster.
25    /// * `cells`: list of cells in the cluster
26    /// * The value of the series given as the `values` parameter using the name of that series.
27    ///
28    fn h3_cell_clusters_eq_value(&self, values: &Series) -> Result<DataFrame, Error>;
29}
30
31impl<'a> H3CellClusters for IndexChunked<'a, H3Cell> {
32    fn h3_cell_clusters(&self) -> Result<DataFrame, Error> {
33        let clusters = find_cell_clusters(self.iter_indexes_nonvalidated().flatten())?;
34        let capacity = clusters.len();
35
36        let (cluster_num, cells) = clusters.into_iter().enumerate().fold(
37            (Vec::with_capacity(capacity), Vec::with_capacity(capacity)),
38            |mut acc, (cluster_num, cells)| {
39                acc.0.push(cluster_num as u32);
40                acc.1.push(Series::from_index_iter(cells));
41                acc
42            },
43        );
44        DataFrame::new(vec![
45            Series::new("cluster_num", cluster_num),
46            Series::new("cells", cells),
47        ])
48        .map_err(Error::from)
49    }
50
51    fn h3_cell_clusters_eq_value(&self, values: &Series) -> Result<DataFrame, Error> {
52        self.chunked_array.rechunk(); // avoid panic in values.iter
53        let clusters = find_cell_clusters_eq_value(
54            self.iter_indexes_nonvalidated()
55                .zip(values.iter())
56                .flat_map(|(cell, value)| cell.map(|cell| (cell, value))),
57        )?;
58
59        let capacity = clusters.len();
60
61        let (cluster_num, cluster_value, cells) = clusters.into_iter().enumerate().fold(
62            (
63                Vec::with_capacity(capacity),
64                Vec::with_capacity(capacity),
65                Vec::with_capacity(capacity),
66            ),
67            |mut acc, (cluster_num, (cells, value))| {
68                acc.0.push(cluster_num as u32);
69                acc.1.push(value);
70                acc.2.push(Series::from_index_iter(cells));
71                acc
72            },
73        );
74        DataFrame::new(vec![
75            Series::new("cluster_num", cluster_num),
76            Series::from_any_values(values.name(), &cluster_value, true)?,
77            Series::new("cells", cells),
78        ])
79        .map_err(Error::from)
80    }
81}
82
83#[cfg(test)]
84mod tests {
85    use crate::algorithm::H3CellClusters;
86    use crate::{AsH3CellChunked, FromIndexIterator};
87    use h3ron::H3Cell;
88    use polars_core::prelude::{NamedFrom, Series, UInt64Chunked};
89    use std::iter::repeat;
90
91    #[test]
92    fn find_cell_clusters_simple() {
93        let mut cells: Vec<_> = H3Cell::from_coordinate((12.2, 14.5).into(), 6)
94            .unwrap()
95            .grid_disk(3)
96            .unwrap()
97            .iter()
98            .collect();
99        let mut values: Vec<_> = repeat(1u32).take(cells.len()).collect();
100
101        cells.extend(
102            H3Cell::from_coordinate((42.2, 45.5).into(), 6)
103                .unwrap()
104                .grid_disk(2)
105                .unwrap()
106                .iter(),
107        );
108        values.extend(repeat(5u32).take(cells.len() - values.len()));
109
110        let cells = UInt64Chunked::from_index_iter::<_, H3Cell>(cells.iter());
111        let values = Series::new("value", values);
112        assert_eq!(cells.len(), values.len());
113
114        let clusters = cells.h3cell().h3_cell_clusters_eq_value(&values).unwrap();
115        assert_eq!(clusters.shape().0, 2);
116        //dbg!(clusters);
117    }
118}