h3ron_polars/algorithm/frame/
resolution.rs

1use polars_core::prelude::{ChunkCompare, DataFrame, NamedFrom, Series, UInt8Chunked};
2
3use crate::algorithm::chunkedarray::H3Resolution;
4use crate::frame::H3DataFrame;
5use crate::{AsH3IndexChunked, Error, IndexValue};
6
7pub trait H3ResolutionOp {
8    /// obtain the contained H3 resolutions
9    fn h3_resolution<IX, S>(&self, index_column_name: S) -> Result<UInt8Chunked, Error>
10    where
11        IX: IndexValue,
12        S: AsRef<str>;
13
14    /// Partition the dataframe into separate frames for each H3 resolution found in the contents.
15    fn h3_partition_by_resolution<IX, S>(
16        &self,
17        index_column_name: S,
18    ) -> Result<Vec<(u8, Self)>, Error>
19    where
20        Self: Sized,
21        IX: IndexValue,
22        S: AsRef<str>;
23}
24
25const RSPLIT_R_COL_NAME: &str = "_rsplit_resolution";
26
27fn col_resolutions<IX>(df: &DataFrame, col: &str) -> Result<UInt8Chunked, Error>
28where
29    IX: IndexValue,
30{
31    let ic = df.column(col)?.u64()?.h3indexchunked::<IX>();
32    Ok(ic.h3_resolution())
33}
34
35fn h3_partition_by_resolution<IX>(
36    df: &DataFrame,
37    index_column_name: &str,
38) -> Result<Vec<(u8, DataFrame)>, Error>
39where
40    IX: IndexValue,
41{
42    let resolutions = Series::new(
43        RSPLIT_R_COL_NAME,
44        col_resolutions::<IX>(df, index_column_name)?,
45    );
46
47    let distinct_resolutions: Vec<u8> = resolutions
48        .drop_nulls()
49        .unique()?
50        .u8()?
51        .into_iter()
52        .flatten()
53        .collect();
54
55    match distinct_resolutions.len() {
56        0 => Ok(vec![]),
57        1 => Ok(vec![(distinct_resolutions[0], df.clone())]),
58        _ => {
59            let mut out_dfs = Vec::with_capacity(distinct_resolutions.len());
60            for h3_resolution in distinct_resolutions {
61                let filtered = df.filter(&resolutions.equal(h3_resolution)?)?;
62                out_dfs.push((h3_resolution, filtered))
63            }
64            Ok(out_dfs)
65        }
66    }
67}
68
69impl H3ResolutionOp for DataFrame {
70    fn h3_resolution<IX, S>(&self, index_column_name: S) -> Result<UInt8Chunked, Error>
71    where
72        IX: IndexValue,
73        S: AsRef<str>,
74    {
75        col_resolutions::<IX>(self, index_column_name.as_ref())
76    }
77
78    fn h3_partition_by_resolution<IX, S>(
79        &self,
80        index_column_name: S,
81    ) -> Result<Vec<(u8, Self)>, Error>
82    where
83        Self: Sized,
84        IX: IndexValue,
85        S: AsRef<str>,
86    {
87        h3_partition_by_resolution::<IX>(self, index_column_name.as_ref())
88    }
89}
90
91impl<IX: IndexValue> H3DataFrame<IX> {
92    /// obtain the contained H3 resolutions
93    pub fn h3_resolution(&self) -> Result<UInt8Chunked, Error> {
94        col_resolutions::<IX>(self.dataframe(), self.h3index_column_name())
95    }
96
97    /// Partition the dataframe into separate frames for each H3 resolution found in the contents.
98    pub fn h3_partition_by_resolution(&self) -> Result<Vec<(u8, Self)>, Error> {
99        self.dataframe()
100            .h3_partition_by_resolution::<IX, _>(self.h3index_column_name())
101            .map(|vc| {
102                vc.into_iter()
103                    .map(|(r, df)| {
104                        (
105                            r,
106                            H3DataFrame::from_dataframe_nonvalidated(
107                                df,
108                                self.h3index_column_name(),
109                            ),
110                        )
111                    })
112                    .collect()
113            })
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use h3ron::{H3Cell, Index};
120    use polars_core::frame::DataFrame;
121    use polars_core::prelude::{NamedFrom, Series};
122
123    use crate::algorithm::frame::H3ResolutionOp;
124
125    #[test]
126    fn partition_frame_by_resolution() {
127        let series = Series::new(
128            "cell",
129            vec![
130                H3Cell::from_coordinate((45.6, -45.8).into(), 7)
131                    .unwrap()
132                    .h3index(),
133                H3Cell::from_coordinate((45.6, -10.2).into(), 8)
134                    .unwrap()
135                    .h3index(),
136                H3Cell::from_coordinate((45.6, 50.2).into(), 8)
137                    .unwrap()
138                    .h3index(),
139                H3Cell::from_coordinate((-23.1, -60.5).into(), 5)
140                    .unwrap()
141                    .h3index(),
142            ],
143        );
144        let value_series = Series::new("value", &(0u32..(series.len() as u32)).collect::<Vec<_>>());
145        let df = DataFrame::new(vec![series, value_series]).unwrap();
146
147        let parts = df.h3_partition_by_resolution::<H3Cell, _>("cell").unwrap();
148        assert_eq!(parts.len(), 3);
149        for (h3_resolution, df) in parts {
150            let expected = if h3_resolution == 8 { 2 } else { 1 };
151            assert_eq!(df.shape(), (expected, 2));
152        }
153    }
154}