spatio/compute/spatial/
algorithms.rs

1//! Spatial operations using the geo crate.
2
3use crate::error::{Result, SpatioError};
4use geo::{
5    BoundingRect, ChamberlainDuquetteArea, Contains, ConvexHull, Distance, GeodesicArea,
6    Intersects, Rect, Rhumb,
7};
8use spatio_types::geo::{Point, Polygon};
9use std::cmp::Ordering;
10use std::collections::BinaryHeap;
11
12/// Distance metric for spatial calculations.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub enum DistanceMetric {
15    #[default]
16    Haversine,
17    Geodesic,
18    Rhumb,
19    Euclidean,
20}
21
22/// Distance between two points in meters.
23pub fn distance_between(point1: &Point, point2: &Point, metric: DistanceMetric) -> f64 {
24    match metric {
25        DistanceMetric::Haversine => point1.haversine_distance(point2),
26        DistanceMetric::Geodesic => point1.geodesic_distance(point2),
27        DistanceMetric::Rhumb => Rhumb.distance(*point1.inner(), *point2.inner()),
28        DistanceMetric::Euclidean => point1.euclidean_distance(point2),
29    }
30}
31
32/// Helper struct for KNN heap ordering (max-heap by distance, so we pop largest)
33#[derive(Clone)]
34struct KnnEntry<'a, T> {
35    point: Point,
36    distance: f64,
37    data: &'a T,
38}
39
40impl<'a, T> PartialEq for KnnEntry<'a, T> {
41    fn eq(&self, other: &Self) -> bool {
42        self.distance == other.distance
43    }
44}
45
46impl<'a, T> Eq for KnnEntry<'a, T> {}
47
48impl<'a, T> PartialOrd for KnnEntry<'a, T> {
49    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
50        Some(self.cmp(other))
51    }
52}
53
54impl<'a, T> Ord for KnnEntry<'a, T> {
55    fn cmp(&self, other: &Self) -> Ordering {
56        // Max-heap: larger distances have higher priority
57        self.distance
58            .partial_cmp(&other.distance)
59            .unwrap_or(Ordering::Equal)
60    }
61}
62
63/// K nearest neighbors. Returns (point, distance, data) sorted by distance (ascending).
64///
65/// Uses a bounded max-heap to efficiently find K nearest neighbors without sorting all points.
66/// Complexity: O(n log k) instead of O(n log n), with O(k) clones instead of O(n).
67///
68/// # Behavior
69///
70/// - If `k == 0` or `points.is_empty()`, returns an empty vector
71/// - If `k > points.len()`, returns all valid points (up to points.len())
72/// - Non-finite distances (NaN, Infinity) are automatically filtered out
73///
74/// # Arguments
75///
76/// * `center` - The query point to find neighbors around
77/// * `points` - Slice of (Point, data) pairs to search through
78/// * `k` - Maximum number of neighbors to return
79/// * `metric` - Distance calculation method (Haversine, Geodesic, Rhumb, Euclidean)
80///
81/// # Examples
82///
83/// ```
84/// use spatio::compute::spatial::{knn, DistanceMetric};
85/// use spatio::Point;
86///
87/// let center = Point::new(-74.0, 40.7);
88/// let points = vec![
89///     (Point::new(-74.1, 40.8), "NYC"),
90///     (Point::new(-73.9, 40.6), "Brooklyn"),
91///     (Point::new(-75.0, 41.0), "Far"),
92/// ];
93///
94/// let nearest = knn(&center, &points, 2, DistanceMetric::Haversine);
95/// assert_eq!(nearest.len(), 2); // Returns 2 closest points
96/// ```
97pub fn knn<T: Clone>(
98    center: &Point,
99    points: &[(Point, T)],
100    k: usize,
101    metric: DistanceMetric,
102) -> Vec<(Point, f64, T)> {
103    if k == 0 || points.is_empty() {
104        return Vec::new();
105    }
106
107    // Capacity: min(k, points.len()) to avoid over-allocation
108    let mut heap = BinaryHeap::with_capacity(k.min(points.len()));
109
110    for (pt, data) in points.iter() {
111        let dist = distance_between(center, pt, metric);
112
113        // Skip non-finite distances (NaN, Infinity)
114        // This can occur with degenerate points or extreme coordinates
115        if !dist.is_finite() {
116            continue;
117        }
118
119        if heap.len() < k {
120            heap.push(KnnEntry {
121                point: *pt,
122                distance: dist,
123                data,
124            });
125        } else if let Some(worst) = heap.peek()
126            && dist < worst.distance
127        {
128            heap.pop();
129            heap.push(KnnEntry {
130                point: *pt,
131                distance: dist,
132                data,
133            });
134        }
135    }
136
137    // Convert the max-heap to ascending results by popping then reversing.
138    let mut results = Vec::with_capacity(heap.len());
139    while let Some(entry) = heap.pop() {
140        results.push((entry.point, entry.distance, entry.data.clone()));
141    }
142    results.reverse();
143    results
144}
145
146pub fn bounding_box(min_lon: f64, min_lat: f64, max_lon: f64, max_lat: f64) -> Result<Rect> {
147    if min_lon > max_lon {
148        return Err(SpatioError::InvalidInput(format!(
149            "min_lon ({}) must be <= max_lon ({})",
150            min_lon, max_lon
151        )));
152    }
153    if min_lat > max_lat {
154        return Err(SpatioError::InvalidInput(format!(
155            "min_lat ({}) must be <= max_lat ({})",
156            min_lat, max_lat
157        )));
158    }
159
160    Ok(Rect::new(
161        geo::coord! { x: min_lon, y: min_lat },
162        geo::coord! { x: max_lon, y: max_lat },
163    ))
164}
165
166pub fn point_in_polygon(polygon: &Polygon, point: &Point) -> bool {
167    polygon.contains(point)
168}
169
170pub fn point_in_bbox(bbox: &Rect, point: &Point) -> bool {
171    bbox.contains(point.inner())
172}
173
174/// Calculate the planar area of a polygon using Chamberlain-Duquette formula.
175///
176/// This is a **fast approximation** suitable for small to medium polygons.
177/// Uses planar geometry (treats Earth as flat), which introduces errors for large areas.
178///
179/// # When to use
180///
181/// - Small areas (< 1000 km²)
182/// - Performance is critical
183/// - Approximate area is sufficient
184/// - Polygons are not near poles
185///
186/// # When to use `geodesic_polygon_area` instead
187///
188/// - Large areas (countries, oceans)
189/// - High accuracy required
190/// - Polygons span multiple UTM zones
191/// - Polygons near polar regions
192///
193/// # Examples
194///
195/// ```
196/// use spatio::compute::spatial::polygon_area;
197/// use spatio::Polygon;
198/// use geo::polygon;
199///
200/// let poly = polygon![
201///     (x: -1.0, y: -1.0),
202///     (x: 1.0, y: -1.0),
203///     (x: 1.0, y: 1.0),
204///     (x: -1.0, y: 1.0),
205/// ];
206/// let area = polygon_area(&poly.into());
207/// ```
208pub fn polygon_area(polygon: &Polygon) -> f64 {
209    polygon.inner().chamberlain_duquette_unsigned_area()
210}
211
212/// Calculate the geodesic area of a polygon on Earth's surface.
213///
214/// This uses the **Karney geodesic algorithm** for high accuracy.
215/// Accounts for Earth's curvature and ellipsoidal shape (more accurate, slower).
216///
217/// # When to use
218///
219/// - Large polygons (countries, regions, oceans)
220/// - High-accuracy requirements (land surveys, legal boundaries)
221/// - Polygons near poles (> ±60° latitude)
222/// - Any polygon where precision matters more than speed
223///
224/// # Performance note
225///
226/// ~10-100x slower than `polygon_area()`, but dramatically more accurate
227/// for large areas. For a 1000 km² polygon, planar error can be >5%.
228///
229/// # Examples
230///
231/// ```
232/// use spatio::compute::spatial::geodesic_polygon_area;
233/// use spatio::Polygon;
234/// use geo::polygon;
235///
236/// // Large country polygon
237/// let france = polygon![
238///     (x: -4.0, y: 43.0),  // Southwest
239///     (x: 8.0, y: 43.0),   // Southeast  
240///     (x: 8.0, y: 51.0),   // Northeast
241///     (x: -4.0, y: 51.0),  // Northwest
242/// ];
243/// let area_m2 = geodesic_polygon_area(&france.into());
244/// let area_km2 = area_m2 / 1_000_000.0;
245/// ```
246pub fn geodesic_polygon_area(polygon: &Polygon) -> f64 {
247    polygon.inner().geodesic_area_unsigned()
248}
249
250pub fn convex_hull(points: &[Point]) -> Option<Polygon> {
251    if points.is_empty() {
252        return None;
253    }
254    let geo_points: Vec<geo::Point> = points.iter().map(|p| (*p).into()).collect();
255    let multi_point = geo::MultiPoint::new(geo_points);
256    Some(multi_point.convex_hull().into())
257}
258
259pub fn bounding_rect_for_points(points: &[Point]) -> Option<Rect> {
260    if points.is_empty() {
261        return None;
262    }
263
264    let geo_points: Vec<geo::Point> = points.iter().map(|p| (*p).into()).collect();
265    let multi_point = geo::MultiPoint::new(geo_points);
266    multi_point.bounding_rect()
267}
268
269pub fn bboxes_intersect(bbox1: &Rect, bbox2: &Rect) -> bool {
270    bbox1.intersects(bbox2)
271}
272
273/// Expand a bounding box by a distance in all directions.
274///
275/// Creates a new bounding box that is `distance_meters` larger on all sides.
276/// Uses geodesic approximations for expansion calculations.
277///
278/// # Algorithm
279///
280/// - Latitude: Simple linear expansion (1° ≈ 111 km everywhere)
281/// - Longitude: Cosine-corrected expansion based on latitude
282///   - Uses the maximum absolute latitude for conservative (larger) expansion
283///   - Clamped at 89.9° to avoid extreme values near poles
284///
285/// # Limitations
286///
287/// - **Not recommended for polar regions** (latitude > ±80°)
288/// - Longitude expansion near poles can produce very large values
289/// - Does NOT handle date line (180°/-180°) wrapping
290/// - May produce coordinates outside ±180° longitude range
291///
292/// # Safety
293///
294/// - Latitude is automatically clamped to [-90, 90]
295/// - Longitude calculation uses max_abs_lat.min(89.9) to prevent division issues
296/// - Result longitude MAY exceed ±180° for large expansions or polar regions
297///
298/// # Examples
299///
300/// ```
301/// use spatio::compute::spatial::{bounding_box, expand_bbox};
302///
303/// // Create a bbox around NYC
304/// let bbox = bounding_box(-74.1, 40.6, -73.9, 40.8).unwrap();
305///
306/// // Expand by 10 km in all directions
307/// let expanded = expand_bbox(&bbox, 10_000.0);
308/// ```
309///
310/// # Panics
311///
312/// Does not panic. Invalid coordinates are clamped or may wrap.
313pub fn expand_bbox(bbox: &Rect, distance_meters: f64) -> Rect {
314    // 1 degree of latitude is approximately 111km everywhere
315    let lat_offset = distance_meters / 111_000.0;
316
317    // Clamp latitude to valid range [-90, 90]
318    let min_y = (bbox.min().y - lat_offset).max(-90.0);
319    let max_y = (bbox.max().y + lat_offset).min(90.0);
320
321    // Longitude expansion depends on latitude. We use the latitude closest to the pole
322    // (max absolute latitude) to be conservative (larger expansion).
323    let max_abs_lat = bbox.min().y.abs().max(bbox.max().y.abs());
324
325    // Clamp to 89.9° to avoid:
326    // - Division by zero at exactly 90°
327    // - Extreme expansion near poles (cos(89.9°) ≈ 0.00175, giving ~6.4° per km)
328    let calc_lat = max_abs_lat.min(89.9);
329
330    // Calculate longitude offset with cosine correction
331    // Note: This can produce large values (>180°) near poles or with large distances
332    let lon_offset = distance_meters / (111_000.0 * calc_lat.to_radians().cos());
333
334    // WARNING: No longitude wrapping is performed here.
335    Rect::new(
336        geo::coord! {
337            x: bbox.min().x - lon_offset,
338            y: min_y
339        },
340        geo::coord! {
341            x: bbox.max().x + lon_offset,
342            y: max_y
343        },
344    )
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350
351    #[test]
352    fn test_distance_between() {
353        let p1 = Point::new(-74.0060, 40.7128); // NYC
354        let p2 = Point::new(-118.2437, 34.0522); // LA
355
356        let dist_haversine = distance_between(&p1, &p2, DistanceMetric::Haversine);
357        let dist_geodesic = distance_between(&p1, &p2, DistanceMetric::Geodesic);
358
359        assert!(dist_haversine > 3_900_000.0 && dist_haversine < 4_000_000.0);
360        assert!(dist_geodesic > 3_900_000.0 && dist_geodesic < 4_000_000.0);
361
362        let diff = (dist_haversine - dist_geodesic).abs();
363        assert!(diff < 10_000.0);
364    }
365
366    #[test]
367    fn test_knn() {
368        let center = Point::new(-74.0060, 40.7128);
369        let candidates = vec![
370            (Point::new(-73.9442, 40.6782), "Brooklyn"),
371            (Point::new(-73.9356, 40.7306), "Queens"),
372            (Point::new(-118.2437, 34.0522), "LA"),
373            (Point::new(-73.9712, 40.7831), "Upper West Side"),
374        ];
375
376        let nearest = knn(&center, &candidates, 2, DistanceMetric::Haversine);
377
378        assert_eq!(nearest.len(), 2);
379        assert_ne!(nearest[0].2, "LA");
380        assert_ne!(nearest[1].2, "LA");
381    }
382
383    #[test]
384    fn test_bounding_box() {
385        let bbox = bounding_box(-74.0, 40.7, -73.9, 40.8).unwrap();
386
387        assert_eq!(bbox.min().x, -74.0);
388        assert_eq!(bbox.min().y, 40.7);
389        assert_eq!(bbox.max().x, -73.9);
390        assert_eq!(bbox.max().y, 40.8);
391    }
392
393    #[test]
394    fn test_bounding_box_invalid() {
395        let result = bounding_box(-73.9, 40.7, -74.0, 40.8);
396        assert!(result.is_err());
397    }
398
399    #[test]
400    fn test_point_in_bbox() {
401        let bbox = bounding_box(-74.0, 40.7, -73.9, 40.8).unwrap();
402
403        assert!(point_in_bbox(&bbox, &Point::new(-73.95, 40.75)));
404        assert!(!point_in_bbox(&bbox, &Point::new(-73.85, 40.75)));
405    }
406
407    #[test]
408    fn test_bboxes_intersect() {
409        let bbox1 = bounding_box(-74.0, 40.7, -73.9, 40.8).unwrap();
410        let bbox2 = bounding_box(-73.95, 40.75, -73.85, 40.85).unwrap();
411        let bbox3 = bounding_box(-73.0, 40.0, -72.9, 40.1).unwrap();
412
413        assert!(bboxes_intersect(&bbox1, &bbox2));
414        assert!(!bboxes_intersect(&bbox1, &bbox3));
415    }
416
417    #[test]
418    fn test_convex_hull() {
419        let points = vec![
420            Point::new(-74.0, 40.7),
421            Point::new(-73.9, 40.7),
422            Point::new(-73.95, 40.8),
423            Point::new(-73.95, 40.75), // Interior point
424        ];
425
426        let hull = convex_hull(&points).unwrap();
427        assert_eq!(hull.exterior().0.len(), 4);
428    }
429
430    #[test]
431    fn test_bounding_rect_for_points() {
432        let points = vec![
433            Point::new(-74.0, 40.7),
434            Point::new(-73.9, 40.8),
435            Point::new(-73.95, 40.75),
436        ];
437
438        let bbox = bounding_rect_for_points(&points).unwrap();
439        assert_eq!(bbox.min().x, -74.0);
440        assert_eq!(bbox.min().y, 40.7);
441        assert_eq!(bbox.max().x, -73.9);
442        assert_eq!(bbox.max().y, 40.8);
443    }
444
445    #[test]
446    fn test_expand_bbox() {
447        let bbox = bounding_box(-74.0, 40.7, -73.9, 40.8).unwrap();
448        let expanded = expand_bbox(&bbox, 1000.0);
449        assert!(expanded.min().x < bbox.min().x);
450        assert!(expanded.min().y < bbox.min().y);
451        assert!(expanded.max().x > bbox.max().x);
452        assert!(expanded.max().y > bbox.max().y);
453    }
454}