Skip to main content

geo_clustering/
lib.rs

1#![doc = include_str!("../README.md")]
2
3pub mod surface;
4
5use std::collections::BTreeMap;
6
7use geo_core::{BBox, Coordinate};
8use serde::{Deserialize, Serialize};
9use video_analysis_core::{DetectError, Result};
10
11fn invalid_argument(message: impl Into<String>) -> DetectError {
12    DetectError::InvalidArgument(message.into())
13}
14
15/// Point accepted by clustering indexes.
16#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
17pub struct ClusterPoint<Properties = ()> {
18    /// Stable caller-owned point id.
19    pub id: String,
20    /// Longitude in degrees.
21    pub longitude: f64,
22    /// Latitude in degrees.
23    pub latitude: f64,
24    /// Caller-owned properties kept opaque by the clustering algorithm.
25    pub properties: Properties,
26}
27
28/// Cluster returned by viewport queries.
29#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
30pub struct Cluster {
31    /// Stable cluster id for the current query.
32    pub id: String,
33    /// Cluster centroid longitude in degrees.
34    pub longitude: f64,
35    /// Cluster centroid latitude in degrees.
36    pub latitude: f64,
37    /// Number of source points represented by the cluster.
38    pub point_count: usize,
39}
40
41/// Point or aggregate cluster returned by viewport queries.
42#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
43pub enum ClusterItem<Properties = ()> {
44    /// Individual source point.
45    Point(ClusterPoint<Properties>),
46    /// Aggregate cluster.
47    Cluster(Cluster),
48}
49
50/// Bounding box in `[west, south, east, north]` order.
51pub type ClusterBounds = [f64; 4];
52
53/// Minimal grid clustering options.
54#[derive(Debug, Clone, Copy, PartialEq, Deserialize, Serialize)]
55#[serde(rename_all = "camelCase")]
56pub struct ClusterOptions {
57    /// Minimum zoom that may cluster.
58    pub min_zoom: u8,
59    /// Zooms at or above this value return individual points.
60    pub max_zoom: u8,
61    /// Approximate grid divisions at zoom zero. Higher values produce smaller clusters.
62    pub base_cell_count: u32,
63}
64
65impl Default for ClusterOptions {
66    fn default() -> Self {
67        Self {
68            min_zoom: 0,
69            max_zoom: 16,
70            base_cell_count: 1,
71        }
72    }
73}
74
75/// Format-agnostic point clustering index.
76#[derive(Debug, Clone)]
77pub struct ClusterIndex<Properties = ()> {
78    points: Vec<ClusterPoint<Properties>>,
79    options: ClusterOptions,
80}
81
82impl<Properties: Clone> ClusterIndex<Properties> {
83    /// Builds an index from finite longitude/latitude points.
84    pub fn new(
85        points: impl IntoIterator<Item = ClusterPoint<Properties>>,
86        options: ClusterOptions,
87    ) -> Result<Self> {
88        if options.min_zoom > options.max_zoom {
89            return Err(invalid_argument("min_zoom must be <= max_zoom"));
90        }
91        if options.base_cell_count == 0 {
92            return Err(invalid_argument(
93                "base_cell_count must be greater than zero",
94            ));
95        }
96        let points = points
97            .into_iter()
98            .map(validate_point)
99            .collect::<Result<Vec<_>>>()?;
100
101        Ok(Self { points, options })
102    }
103
104    /// Returns clusters or points visible in a bounding box at a zoom level.
105    pub fn get_clusters(
106        &self,
107        bounds: ClusterBounds,
108        zoom: u8,
109    ) -> Result<Vec<ClusterItem<Properties>>> {
110        validate_bounds(bounds)?;
111        let visible = self
112            .points
113            .iter()
114            .filter(|point| point_in_bounds(point.longitude, point.latitude, bounds))
115            .cloned()
116            .collect::<Vec<_>>();
117
118        if zoom < self.options.min_zoom || zoom >= self.options.max_zoom {
119            return Ok(visible.into_iter().map(ClusterItem::Point).collect());
120        }
121
122        let mut grouped: BTreeMap<(i64, i64), Vec<ClusterPoint<Properties>>> = BTreeMap::new();
123        let cell_size = cell_size_degrees(zoom, self.options);
124        for point in visible {
125            let key = (
126                ((point.longitude + 180.0) / cell_size).floor() as i64,
127                ((point.latitude + 90.0) / cell_size).floor() as i64,
128            );
129            grouped.entry(key).or_default().push(point);
130        }
131
132        Ok(grouped
133            .into_iter()
134            .flat_map(|((x, y), points)| {
135                if points.len() == 1 {
136                    vec![ClusterItem::Point(points.into_iter().next().unwrap())]
137                } else {
138                    vec![ClusterItem::Cluster(cluster_for_points(
139                        zoom, x, y, &points,
140                    ))]
141                }
142            })
143            .collect())
144    }
145
146    /// Returns bounds for all indexed points.
147    pub fn get_bounds(&self) -> Option<ClusterBounds> {
148        let first = self.points.first()?;
149        let mut west = first.longitude;
150        let mut south = first.latitude;
151        let mut east = first.longitude;
152        let mut north = first.latitude;
153
154        for point in self.points.iter().skip(1) {
155            west = west.min(point.longitude);
156            south = south.min(point.latitude);
157            east = east.max(point.longitude);
158            north = north.max(point.latitude);
159        }
160
161        Some([west, south, east, north])
162    }
163
164    /// Returns source points represented by a cluster id from this index.
165    pub fn get_leaves(
166        &self,
167        cluster_id: &str,
168        limit: usize,
169        offset: usize,
170    ) -> Vec<ClusterPoint<Properties>> {
171        let Some((x, y)) = parse_cluster_id(cluster_id) else {
172            return Vec::new();
173        };
174        self.points
175            .iter()
176            .filter(|point| cluster_id_for_point(point, x.zoom, self.options) == (x.cell, y.cell))
177            .skip(offset)
178            .take(limit)
179            .cloned()
180            .collect()
181    }
182
183    /// Returns a conservative expansion zoom for the cluster.
184    pub fn get_cluster_expansion_zoom(&self, cluster_id: &str) -> usize {
185        parse_cluster_id(cluster_id)
186            .map(|(x, _)| usize::from((x.zoom + 1).min(self.options.max_zoom)))
187            .unwrap_or_else(|| usize::from(self.options.max_zoom))
188    }
189}
190
191#[derive(Debug, Clone, Copy)]
192struct ClusterCell {
193    zoom: u8,
194    cell: i64,
195}
196
197fn validate_point<Properties>(point: ClusterPoint<Properties>) -> Result<ClusterPoint<Properties>> {
198    if point.id.is_empty() {
199        return Err(invalid_argument("point id must not be empty"));
200    }
201    Coordinate::new(point.longitude, point.latitude)?.validate_geographic()?;
202    Ok(point)
203}
204
205fn validate_bounds(bounds: ClusterBounds) -> Result<()> {
206    if bounds.iter().any(|value| !value.is_finite()) {
207        return Err(invalid_argument("bounds must be finite"));
208    }
209    if bounds[1] > bounds[3] {
210        return Err(invalid_argument("bounds south must be <= north"));
211    }
212    BBox::new([
213        bounds[0].min(bounds[2]),
214        bounds[1],
215        bounds[0].max(bounds[2]),
216        bounds[3],
217    ])?;
218    if bounds[1] < -90.0 || bounds[3] > 90.0 {
219        return Err(invalid_argument(
220            "bounds latitude values must be between -90 and 90",
221        ));
222    }
223    Ok(())
224}
225
226fn point_in_bounds(longitude: f64, latitude: f64, bounds: ClusterBounds) -> bool {
227    let longitude_visible = if bounds[0] <= bounds[2] {
228        longitude >= bounds[0] && longitude <= bounds[2]
229    } else {
230        longitude >= bounds[0] || longitude <= bounds[2]
231    };
232
233    longitude_visible && latitude >= bounds[1] && latitude <= bounds[3]
234}
235
236fn cell_size_degrees(zoom: u8, options: ClusterOptions) -> f64 {
237    let divisions = options
238        .base_cell_count
239        .saturating_mul(2_u32.saturating_pow(u32::from(zoom)));
240    360.0 / f64::from(divisions.max(1))
241}
242
243fn cluster_for_points<Properties>(
244    zoom: u8,
245    x: i64,
246    y: i64,
247    points: &[ClusterPoint<Properties>],
248) -> Cluster {
249    let point_count = points.len();
250    let (lon_sum, lat_sum) = points.iter().fold((0.0, 0.0), |(lon, lat), point| {
251        (lon + point.longitude, lat + point.latitude)
252    });
253    Cluster {
254        id: format!("z{zoom}:{x}:{y}"),
255        longitude: lon_sum / point_count as f64,
256        latitude: lat_sum / point_count as f64,
257        point_count,
258    }
259}
260
261fn cluster_id_for_point<Properties>(
262    point: &ClusterPoint<Properties>,
263    zoom: u8,
264    options: ClusterOptions,
265) -> (i64, i64) {
266    let cell_size = cell_size_degrees(zoom, options);
267    (
268        ((point.longitude + 180.0) / cell_size).floor() as i64,
269        ((point.latitude + 90.0) / cell_size).floor() as i64,
270    )
271}
272
273fn parse_cluster_id(id: &str) -> Option<(ClusterCell, ClusterCell)> {
274    let mut parts = id.split(':');
275    let zoom = parts.next()?.strip_prefix('z')?.parse::<u8>().ok()?;
276    let x = parts.next()?.parse::<i64>().ok()?;
277    let y = parts.next()?.parse::<i64>().ok()?;
278    Some((ClusterCell { zoom, cell: x }, ClusterCell { zoom, cell: y }))
279}
280
281#[cfg(test)]
282mod tests {
283    use super::*;
284
285    fn point(id: &str, longitude: f64, latitude: f64) -> ClusterPoint {
286        ClusterPoint {
287            id: id.to_string(),
288            longitude,
289            latitude,
290            properties: (),
291        }
292    }
293
294    #[test]
295    fn validates_points_and_bounds() {
296        assert!(validate_point(point("bad-lon", 181.0, 0.0)).is_err());
297        assert!(validate_point(point("bad-lat", 0.0, 91.0)).is_err());
298        assert!(validate_point(point("", 0.0, 0.0)).is_err());
299
300        assert!(validate_bounds([0.0, 1.0, 1.0, 0.0]).is_err());
301        assert!(validate_bounds([0.0, -91.0, 1.0, 0.0]).is_err());
302        assert!(validate_bounds([170.0, -10.0, -170.0, 10.0]).is_ok());
303    }
304
305    #[test]
306    fn cell_size_decreases_as_zoom_increases() {
307        let options = ClusterOptions {
308            base_cell_count: 2,
309            ..ClusterOptions::default()
310        };
311
312        assert!(cell_size_degrees(4, options) < cell_size_degrees(3, options));
313    }
314
315    #[test]
316    fn parses_generated_cluster_ids_and_rejects_malformed_ids() {
317        let cluster = cluster_for_points(3, 42, 17, &[point("a", 0.0, 0.0), point("b", 1.0, 1.0)]);
318        let (x, y) = parse_cluster_id(&cluster.id).expect("generated cluster id");
319
320        assert_eq!(x.zoom, 3);
321        assert_eq!(x.cell, 42);
322        assert_eq!(y.zoom, 3);
323        assert_eq!(y.cell, 17);
324        assert!(parse_cluster_id("3:42:17").is_none());
325        assert!(parse_cluster_id("z3:42").is_none());
326        assert!(parse_cluster_id("z3:x:17").is_none());
327    }
328
329    #[test]
330    fn clusters_points_for_bbox_and_zoom() {
331        let index = ClusterIndex::new(
332            [
333                point("a", 13.0, 52.0),
334                point("b", 13.0001, 52.0001),
335                point("far", 80.0, 0.0),
336            ],
337            ClusterOptions::default(),
338        )
339        .unwrap();
340
341        let items = index.get_clusters([12.0, 51.0, 14.0, 53.0], 1).unwrap();
342
343        assert_eq!(items.len(), 1);
344        assert!(matches!(
345            &items[0],
346            ClusterItem::Cluster(Cluster { point_count: 2, .. })
347        ));
348    }
349
350    #[test]
351    fn clusters_geo_core_coordinate_derived_points() {
352        let coordinates = [
353            Coordinate::new(13.0, 52.0).unwrap(),
354            Coordinate::new(13.0001, 52.0001).unwrap(),
355        ];
356        let points = coordinates
357            .into_iter()
358            .enumerate()
359            .map(|(index, coordinate)| point(&format!("p{index}"), coordinate.lon, coordinate.lat));
360        let index = ClusterIndex::new(points, ClusterOptions::default()).unwrap();
361
362        let items = index.get_clusters([12.0, 51.0, 14.0, 53.0], 1).unwrap();
363
364        assert!(matches!(
365            &items[0],
366            ClusterItem::Cluster(Cluster { point_count: 2, .. })
367        ));
368    }
369
370    #[test]
371    fn returns_points_at_max_zoom() {
372        let index = ClusterIndex::new([point("a", 13.0, 52.0)], ClusterOptions::default()).unwrap();
373
374        let items = index.get_clusters([12.0, 51.0, 14.0, 53.0], 16).unwrap();
375
376        assert_eq!(items, vec![ClusterItem::Point(point("a", 13.0, 52.0))]);
377    }
378}