supercluster_rs/
builder.rs

1use std::collections::HashMap;
2
3use geo_index::kdtree::KDTreeIndex;
4
5use crate::cluster::{ClusterData, ClusterId};
6use crate::options::SuperclusterOptions;
7use crate::tree::TreeWithData;
8use crate::Supercluster;
9
10/// A data class used to construct a [Supercluster] instance.
11pub struct SuperclusterBuilder {
12    options: SuperclusterOptions,
13    // TODO: in the future, this should be a chunked array of geoarrow points
14    points: Vec<(f64, f64)>,
15    pos: usize,
16    // If points are already in spherical mercator
17    // preprojected: bool,
18}
19
20impl SuperclusterBuilder {
21    /// Construct a new [SuperclusterBuilder] with the given number of points and default options.
22    pub fn new(num_items: usize) -> Self {
23        Self::new_with_options(num_items, Default::default())
24    }
25
26    /// Construct a new [SuperclusterBuilder] with the given number of points and default options.
27    pub fn new_with_options(num_items: usize, options: SuperclusterOptions) -> Self {
28        let points = Vec::with_capacity(num_items);
29
30        Self {
31            options,
32            points,
33            pos: 0,
34        }
35    }
36
37    // Add a point to the index
38    pub fn add(&mut self, x: f64, y: f64) -> usize {
39        let idx = self.pos;
40        self.points.push((x, y));
41        self.pos += 1;
42        idx
43    }
44
45    /// Convert a [SuperclusterBuilder] to a [Supercluster] by running hierarchical clustering.
46    pub fn finish(self) -> Supercluster {
47        assert_eq!(
48            self.pos,
49            self.points.len(),
50            "Expected {} added points, got {}.",
51            self.points.len(),
52            self.pos
53        );
54
55        let min_zoom = self.options.min_zoom;
56        let max_zoom = self.options.max_zoom;
57        let node_size = self.options.node_size;
58
59        let mut data = Vec::with_capacity(self.points.len());
60        for (i, (lon, lat)) in self.points.iter().enumerate() {
61            data.push(ClusterData::new_geographic(
62                *lon,
63                *lat,
64                ClusterId::new_source_id(i),
65            ));
66        }
67
68        let full_res_tree = TreeWithData::new(data, node_size);
69
70        let mut trees = HashMap::with_capacity(max_zoom - min_zoom + 1);
71        trees.insert(max_zoom + 1, full_res_tree);
72
73        for zoom in (min_zoom..=max_zoom).rev() {
74            // The tree at the next higher zoom
75            let previous_tree = trees.get_mut(&(zoom + 1)).unwrap();
76            let current = self.cluster(previous_tree, zoom);
77
78            trees.insert(zoom, current);
79        }
80
81        Supercluster::new(self.points, trees, self.options)
82    }
83
84    /// Note: this mutates previous_tree's `data`.
85    // This is derived from Supercluster._cluster in the original JS implementation
86    fn cluster(&self, previous_tree_with_data: &mut TreeWithData, zoom: usize) -> TreeWithData {
87        let radius = self.options.radius;
88        let extent = self.options.extent;
89        let min_points = self.options.min_points;
90
91        let r = radius / (extent * usize::pow(2, zoom.try_into().unwrap()) as f64);
92        let data = &mut previous_tree_with_data.data;
93        let previous_tree = &previous_tree_with_data.tree;
94        let mut next_data = vec![];
95
96        // loop through each point
97        for i in 0..data.len() {
98            // if we've already visited the point at this zoom level, skip it
99            if data[i].zoom.is_some_and(|z| z <= zoom) {
100                continue;
101            }
102
103            data[i].zoom = Some(zoom);
104
105            // find all nearby points
106            let x = data[i].x;
107            let y = data[i].y;
108            let neighbor_ids = previous_tree.as_ref().within(x, y, r);
109
110            let num_points_origin = data[i].num_points;
111            let mut num_points = num_points_origin;
112
113            // count the number of points in a potential cluster
114            for neighbor_id in &neighbor_ids {
115                // filter out neighbors that are already processed
116
117                // NOTE: in the original implementation, it checked
118                // `if (data[k + OFFSET_ZOOM] > zoom)`
119                // But note that the `OFFSET_ZOOM` in the data array was **initialized** to
120                // `Infinity`. Therefore, this should also be true when `.zoom.is_none()`.
121
122                if data[*neighbor_id].zoom.is_none()
123                    || data[*neighbor_id].zoom.is_some_and(|z| z > zoom)
124                {
125                    num_points += data[*neighbor_id].num_points;
126                }
127            }
128
129            // if there were neighbors to merge, and there are enough points to form a cluster
130            if num_points > num_points_origin && num_points >= min_points {
131                let mut wx = x * num_points_origin as f64;
132                let mut wy = y * num_points_origin as f64;
133
134                // encode both zoom and point index on which the cluster originated -- offset by total length of features
135                let id = ClusterId::new(i, zoom, self.points.len());
136
137                for neighbor_id in neighbor_ids {
138                    if data[neighbor_id].zoom.is_some_and(|z| z <= zoom) {
139                        continue;
140                    }
141
142                    // save the zoom (so it doesn't get processed twice)
143                    data[neighbor_id].zoom = Some(zoom);
144
145                    let num_points2 = data[neighbor_id].num_points as f64;
146
147                    // accumulate coordinates for calculating weighted center
148                    wx += data[neighbor_id].x * num_points2;
149                    wy += data[neighbor_id].y * num_points2;
150
151                    data[neighbor_id].parent_id = Some(id);
152                }
153
154                data[i].parent_id = Some(id);
155
156                next_data.push(ClusterData {
157                    x: wx / num_points as f64,
158                    y: wy / num_points as f64,
159                    zoom: None,
160                    source_id: id,
161                    parent_id: None,
162                    num_points,
163                });
164            } else {
165                // left points as unclustered
166                next_data.push(data[i].clone());
167
168                if num_points > 1 {
169                    for neighbor_id in neighbor_ids {
170                        if data[neighbor_id].zoom.is_some_and(|z| z <= zoom) {
171                            continue;
172                        }
173
174                        data[neighbor_id].zoom = Some(zoom);
175
176                        next_data.push(data[neighbor_id].clone());
177                    }
178                }
179            }
180        }
181
182        TreeWithData::new(next_data, self.options.node_size)
183    }
184}
185
186#[cfg(test)]
187mod test {
188    use crate::test::load_fixture::load_places;
189
190    use super::*;
191
192    #[test]
193    fn test_builder() {
194        let coords = load_places();
195        let mut builder = SuperclusterBuilder::new(coords.len());
196        for coord in coords {
197            builder.add(coord[0], coord[1]);
198        }
199        let _supercluster = builder.finish();
200        // dbg!(supercluster);
201    }
202}