use crate::geometry::{Feature, FeatureCollection, Geometry, Point, PropertyValue};
use rustial_math::GeoCoord;
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct ClusterOptions {
pub radius: f64,
pub max_zoom: u8,
pub min_zoom: u8,
pub min_points: usize,
pub tile_extent: f64,
}
impl Default for ClusterOptions {
fn default() -> Self {
Self {
radius: 50.0,
max_zoom: 16,
min_zoom: 0,
min_points: 2,
tile_extent: 512.0,
}
}
}
#[derive(Debug, Clone)]
struct ClusterNode {
x: f64,
y: f64,
point_count: u32,
source_index: u32,
cluster_id: u32,
cluster_zoom: u8,
children: Vec<u32>,
properties: HashMap<String, PropertyValue>,
}
struct SpatialGrid {
cells: HashMap<u64, Vec<usize>>,
cell_size: f64,
}
impl SpatialGrid {
fn new(cell_size: f64) -> Self {
Self {
cells: HashMap::new(),
cell_size,
}
}
fn insert(&mut self, idx: usize, x: f64, y: f64) {
let key = self.cell_key(x, y);
self.cells.entry(key).or_default().push(idx);
}
fn query_radius(&self, cx: f64, cy: f64, radius: f64) -> Vec<usize> {
let min_cell_x = ((cx - radius) / self.cell_size).floor() as i32;
let max_cell_x = ((cx + radius) / self.cell_size).floor() as i32;
let min_cell_y = ((cy - radius) / self.cell_size).floor() as i32;
let max_cell_y = ((cy + radius) / self.cell_size).floor() as i32;
let mut result = Vec::new();
for gx in min_cell_x..=max_cell_x {
for gy in min_cell_y..=max_cell_y {
let key = Self::raw_cell_key(gx, gy);
if let Some(bucket) = self.cells.get(&key) {
result.extend_from_slice(bucket);
}
}
}
result
}
#[inline]
fn cell_key(&self, x: f64, y: f64) -> u64 {
let gx = (x / self.cell_size).floor() as i32;
let gy = (y / self.cell_size).floor() as i32;
Self::raw_cell_key(gx, gy)
}
#[inline]
fn raw_cell_key(gx: i32, gy: i32) -> u64 {
let ux = gx as u32;
let uy = gy as u32;
(ux as u64) << 32 | (uy as u64)
}
}
#[inline]
fn lng_x(lng: f64) -> f64 {
lng / 360.0 + 0.5
}
#[inline]
fn lat_y(lat: f64) -> f64 {
let sin_lat = (lat * std::f64::consts::PI / 180.0).sin();
let y = 0.5 - 0.25 * ((1.0 + sin_lat) / (1.0 - sin_lat)).ln() / std::f64::consts::PI;
y.clamp(0.0, 1.0)
}
#[inline]
fn x_lng(x: f64) -> f64 {
(x - 0.5) * 360.0
}
#[inline]
fn y_lat(y: f64) -> f64 {
let y2 = (180.0 - 360.0 * y) * std::f64::consts::PI / 180.0;
360.0 * y2.exp().atan() / std::f64::consts::PI - 90.0
}
pub struct PointCluster {
options: ClusterOptions,
nodes: Vec<ClusterNode>,
input_count: usize,
original_features: Vec<Feature>,
zoom_nodes: Vec<Vec<usize>>,
next_cluster_id: u32,
}
impl PointCluster {
pub fn new(options: ClusterOptions) -> Self {
let zoom_range = (options.max_zoom as usize + 2).max(1);
Self {
options,
nodes: Vec::new(),
input_count: 0,
original_features: Vec::new(),
zoom_nodes: vec![Vec::new(); zoom_range],
next_cluster_id: 0,
}
}
pub fn load(&mut self, features: &FeatureCollection) {
self.nodes.clear();
self.original_features.clear();
self.next_cluster_id = 0;
for v in &mut self.zoom_nodes {
v.clear();
}
for feature in &features.features {
if let Some(coord) = extract_point_coord(&feature.geometry) {
let node = ClusterNode {
x: lng_x(coord.lon),
y: lat_y(coord.lat),
point_count: 1,
source_index: self.nodes.len() as u32,
cluster_id: u32::MAX,
cluster_zoom: u8::MAX,
children: Vec::new(),
properties: HashMap::new(),
};
self.nodes.push(node);
self.original_features.push(feature.clone());
}
}
self.input_count = self.nodes.len();
if self.input_count == 0 {
return;
}
let top_zoom = self.options.max_zoom as usize + 1;
if top_zoom < self.zoom_nodes.len() {
self.zoom_nodes[top_zoom] = (0..self.input_count).collect();
}
for z in (self.options.min_zoom..=self.options.max_zoom).rev() {
self.cluster_zoom(z);
}
}
pub fn get_clusters_for_zoom(&self, zoom: u8) -> FeatureCollection {
let z = zoom.min(self.options.max_zoom + 1) as usize;
if z >= self.zoom_nodes.len() {
return FeatureCollection {
features: self.original_features.clone(),
};
}
let active = &self.zoom_nodes[z];
let mut out = Vec::with_capacity(active.len());
for &idx in active {
let node = &self.nodes[idx];
if node.point_count == 1 && (node.source_index as usize) < self.original_features.len()
{
out.push(self.original_features[node.source_index as usize].clone());
} else {
let coord = GeoCoord::new(y_lat(node.y), x_lng(node.x), 0.0);
let mut props = node.properties.clone();
props.insert("cluster".into(), PropertyValue::Bool(true));
props.insert(
"cluster_id".into(),
PropertyValue::Number(node.cluster_id as f64),
);
props.insert(
"point_count".into(),
PropertyValue::Number(node.point_count as f64),
);
props.insert(
"point_count_abbreviated".into(),
PropertyValue::String(abbreviate_count(node.point_count)),
);
out.push(Feature {
geometry: Geometry::Point(Point { coord }),
properties: props,
});
}
}
FeatureCollection { features: out }
}
pub fn get_cluster_expansion_zoom(&self, cluster_id: u32) -> Option<u8> {
for node in &self.nodes {
if node.cluster_id == cluster_id && node.point_count > 1 {
return Some(node.cluster_zoom.saturating_add(1));
}
}
None
}
pub fn get_cluster_children(&self, cluster_id: u32) -> Option<Vec<Feature>> {
let node = self
.nodes
.iter()
.find(|n| n.cluster_id == cluster_id && n.point_count > 1)?;
let mut children = Vec::new();
for &child_idx in &node.children {
let child_idx = child_idx as usize;
if child_idx >= self.nodes.len() {
continue;
}
let child = &self.nodes[child_idx];
if child.point_count == 1
&& (child.source_index as usize) < self.original_features.len()
{
children.push(self.original_features[child.source_index as usize].clone());
} else {
let coord = GeoCoord::new(y_lat(child.y), x_lng(child.x), 0.0);
let mut props = child.properties.clone();
props.insert("cluster".into(), PropertyValue::Bool(true));
props.insert(
"cluster_id".into(),
PropertyValue::Number(child.cluster_id as f64),
);
props.insert(
"point_count".into(),
PropertyValue::Number(child.point_count as f64),
);
children.push(Feature {
geometry: Geometry::Point(Point { coord }),
properties: props,
});
}
}
Some(children)
}
pub fn get_cluster_leaves(
&self,
cluster_id: u32,
limit: usize,
offset: usize,
) -> Option<Vec<Feature>> {
let node = self
.nodes
.iter()
.find(|n| n.cluster_id == cluster_id && n.point_count > 1)?;
let mut leaves = Vec::new();
let mut skipped = 0usize;
self.collect_leaves(node, limit, offset, &mut leaves, &mut skipped);
Some(leaves)
}
pub fn input_count(&self) -> usize {
self.input_count
}
fn cluster_zoom(&mut self, zoom: u8) {
let z = zoom as usize;
let parent_z = z + 1;
if parent_z >= self.zoom_nodes.len() {
return;
}
let r = self.options.radius / (self.options.tile_extent * (1u64 << zoom) as f64);
let parent_indices = self.zoom_nodes[parent_z].clone();
let mut grid = SpatialGrid::new(r);
for &idx in &parent_indices {
let node = &self.nodes[idx];
grid.insert(idx, node.x, node.y);
}
let mut consumed = vec![false; self.nodes.len()];
let mut active_at_zoom: Vec<usize> = Vec::new();
for &idx in &parent_indices {
if consumed[idx] {
continue;
}
let node = &self.nodes[idx];
let cx = node.x;
let cy = node.y;
let candidates = grid.query_radius(cx, cy, r);
let mut neighbours: Vec<usize> = Vec::new();
for &cand_idx in &candidates {
if cand_idx == idx || consumed[cand_idx] {
continue;
}
let cand = &self.nodes[cand_idx];
let dx = cand.x - cx;
let dy = cand.y - cy;
if dx * dx + dy * dy <= r * r {
neighbours.push(cand_idx);
}
}
let total_count: u32 = node.point_count
+ neighbours
.iter()
.map(|&i| self.nodes[i].point_count)
.sum::<u32>();
if total_count < self.options.min_points as u32 || neighbours.is_empty() {
active_at_zoom.push(idx);
continue;
}
let mut wx = cx * node.point_count as f64;
let mut wy = cy * node.point_count as f64;
let mut children = vec![idx as u32];
consumed[idx] = true;
for &ni in &neighbours {
let n = &self.nodes[ni];
wx += n.x * n.point_count as f64;
wy += n.y * n.point_count as f64;
children.push(ni as u32);
consumed[ni] = true;
}
let cluster_id = self.next_cluster_id;
self.next_cluster_id += 1;
let cluster_node = ClusterNode {
x: wx / total_count as f64,
y: wy / total_count as f64,
point_count: total_count,
source_index: u32::MAX,
cluster_id,
cluster_zoom: zoom,
children,
properties: HashMap::new(),
};
let cluster_idx = self.nodes.len();
self.nodes.push(cluster_node);
consumed.push(false);
active_at_zoom.push(cluster_idx);
}
for &idx in &parent_indices {
if !consumed[idx] {
}
}
if z < self.zoom_nodes.len() {
self.zoom_nodes[z] = active_at_zoom;
}
}
fn collect_leaves(
&self,
node: &ClusterNode,
limit: usize,
offset: usize,
leaves: &mut Vec<Feature>,
skipped: &mut usize,
) {
if leaves.len() >= limit {
return;
}
for &child_idx in &node.children {
let ci = child_idx as usize;
if ci >= self.nodes.len() {
continue;
}
let child = &self.nodes[ci];
if child.point_count == 1 {
if *skipped < offset {
*skipped += 1;
} else if leaves.len() < limit {
if let Some(f) = self.original_features.get(child.source_index as usize) {
leaves.push(f.clone());
}
}
} else {
self.collect_leaves(child, limit, offset, leaves, skipped);
}
}
}
}
fn extract_point_coord(geometry: &Geometry) -> Option<GeoCoord> {
match geometry {
Geometry::Point(p) => Some(p.coord),
Geometry::MultiPoint(mp) => mp.points.first().map(|p| p.coord),
_ => None,
}
}
fn abbreviate_count(n: u32) -> String {
if n >= 1_000_000 {
format!("{:.1}M", n as f64 / 1_000_000.0)
} else if n >= 10_000 {
format!("{:.0}k", n as f64 / 1_000.0)
} else if n >= 1_000 {
format!("{:.1}k", n as f64 / 1_000.0)
} else {
n.to_string()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_point_feature(lat: f64, lon: f64, name: &str) -> Feature {
let mut props = HashMap::new();
props.insert("name".into(), PropertyValue::String(name.into()));
Feature {
geometry: Geometry::Point(Point {
coord: GeoCoord::new(lat, lon, 0.0),
}),
properties: props,
}
}
fn sample_features() -> FeatureCollection {
FeatureCollection {
features: vec![
make_point_feature(40.7128, -74.0060, "New York"),
make_point_feature(40.7138, -74.0070, "Near NY 1"),
make_point_feature(40.7118, -74.0050, "Near NY 2"),
make_point_feature(34.0522, -118.2437, "Los Angeles"),
make_point_feature(41.8781, -87.6298, "Chicago"),
make_point_feature(41.8791, -87.6308, "Near Chicago"),
],
}
}
#[test]
fn empty_input_produces_empty_output() {
let mut cluster = PointCluster::new(ClusterOptions::default());
cluster.load(&FeatureCollection::default());
assert_eq!(cluster.input_count(), 0);
let result = cluster.get_clusters_for_zoom(5);
assert!(result.is_empty());
}
#[test]
fn single_point_never_clusters() {
let fc = FeatureCollection {
features: vec![make_point_feature(51.5074, -0.1278, "London")],
};
let mut cluster = PointCluster::new(ClusterOptions::default());
cluster.load(&fc);
for z in 0..=20 {
let result = cluster.get_clusters_for_zoom(z);
assert_eq!(result.len(), 1, "zoom {z} should have 1 feature");
}
}
#[test]
fn nearby_points_cluster_at_low_zoom() {
let fc = sample_features();
let mut cluster = PointCluster::new(ClusterOptions {
radius: 80.0,
max_zoom: 14,
min_points: 2,
..Default::default()
});
cluster.load(&fc);
assert_eq!(cluster.input_count(), 6);
let low = cluster.get_clusters_for_zoom(2);
assert!(
low.len() < 6,
"at zoom 2 some points should cluster (got {} features)",
low.len()
);
let high = cluster.get_clusters_for_zoom(20);
assert_eq!(high.len(), 6);
}
#[test]
fn cluster_features_have_expected_properties() {
let fc = sample_features();
let mut cluster = PointCluster::new(ClusterOptions {
radius: 80.0,
max_zoom: 14,
min_points: 2,
..Default::default()
});
cluster.load(&fc);
let result = cluster.get_clusters_for_zoom(2);
for feature in &result.features {
if let Some(PropertyValue::Bool(true)) = feature.properties.get("cluster") {
let count = feature
.properties
.get("point_count")
.and_then(|v| v.as_f64())
.unwrap_or(0.0);
assert!(count >= 2.0, "cluster point_count should be >= 2");
assert!(
feature.properties.contains_key("cluster_id"),
"cluster should have cluster_id"
);
assert!(
feature.properties.contains_key("point_count_abbreviated"),
"cluster should have point_count_abbreviated"
);
}
}
}
#[test]
fn cluster_expansion_zoom() {
let fc = sample_features();
let mut cluster = PointCluster::new(ClusterOptions {
radius: 80.0,
max_zoom: 14,
min_points: 2,
..Default::default()
});
cluster.load(&fc);
let result = cluster.get_clusters_for_zoom(2);
for feature in &result.features {
if let Some(PropertyValue::Bool(true)) = feature.properties.get("cluster") {
let cid = feature
.properties
.get("cluster_id")
.and_then(|v| v.as_f64())
.unwrap_or(0.0) as u32;
let exp_zoom = cluster.get_cluster_expansion_zoom(cid);
assert!(exp_zoom.is_some(), "expansion zoom should exist");
let ez = exp_zoom.unwrap();
assert!(ez > 2, "expansion zoom should be above query zoom");
}
}
}
#[test]
fn cluster_children_and_leaves() {
let fc = sample_features();
let mut cluster = PointCluster::new(ClusterOptions {
radius: 80.0,
max_zoom: 14,
min_points: 2,
..Default::default()
});
cluster.load(&fc);
let result = cluster.get_clusters_for_zoom(2);
for feature in &result.features {
if let Some(PropertyValue::Bool(true)) = feature.properties.get("cluster") {
let cid = feature
.properties
.get("cluster_id")
.and_then(|v| v.as_f64())
.unwrap_or(0.0) as u32;
let children = cluster.get_cluster_children(cid);
assert!(children.is_some(), "children should exist");
assert!(
!children.unwrap().is_empty(),
"children should not be empty"
);
let leaves = cluster.get_cluster_leaves(cid, 100, 0);
assert!(leaves.is_some(), "leaves should exist");
assert!(!leaves.unwrap().is_empty(), "leaves should not be empty");
}
}
}
#[test]
fn high_zoom_returns_all_originals() {
let fc = sample_features();
let mut cluster = PointCluster::new(ClusterOptions::default());
cluster.load(&fc);
let result = cluster.get_clusters_for_zoom(20);
assert_eq!(result.len(), fc.len());
}
#[test]
fn non_point_features_are_ignored() {
let fc = FeatureCollection {
features: vec![
make_point_feature(40.0, -74.0, "point"),
Feature {
geometry: Geometry::LineString(crate::geometry::LineString {
coords: vec![
GeoCoord::new(40.0, -74.0, 0.0),
GeoCoord::new(41.0, -73.0, 0.0),
],
}),
properties: HashMap::new(),
},
],
};
let mut cluster = PointCluster::new(ClusterOptions::default());
cluster.load(&fc);
assert_eq!(cluster.input_count(), 1);
}
#[test]
fn abbreviate_count_formatting() {
assert_eq!(abbreviate_count(1), "1");
assert_eq!(abbreviate_count(999), "999");
assert_eq!(abbreviate_count(1_000), "1.0k");
assert_eq!(abbreviate_count(5_432), "5.4k");
assert_eq!(abbreviate_count(10_000), "10k");
assert_eq!(abbreviate_count(99_999), "100k");
assert_eq!(abbreviate_count(1_000_000), "1.0M");
assert_eq!(abbreviate_count(1_500_000), "1.5M");
}
#[test]
fn mercator_round_trip() {
let coords = [
(0.0, 0.0),
(51.5, -0.12),
(40.71, -74.0),
(-33.87, 151.21),
(85.0, 180.0),
(-85.0, -180.0),
];
for (lat, lon) in coords {
let x = lng_x(lon);
let y = lat_y(lat);
let lon2 = x_lng(x);
let lat2 = y_lat(y);
assert!(
(lat - lat2).abs() < 0.01,
"lat round-trip failed: {lat} -> {lat2}"
);
assert!(
(lon - lon2).abs() < 0.01,
"lon round-trip failed: {lon} -> {lon2}"
);
}
}
}