Skip to main content

geonative_utils/
index.rs

1//! Spatial-locality indexing helpers — primarily Hilbert curve encoding.
2//!
3//! ## What this module is
4//!
5//! Spatial keys that map 2D points to 1D distances along a space-filling
6//! curve. Used to cluster physically-near features into the same Parquet
7//! row group / SQLite page / R-tree bucket, making bbox-based predicates
8//! highly selective.
9//!
10//! ## Architecture position
11//!
12//! Originally lived in `geonative-geoparquet::hilbert`; moved here so any
13//! format crate (GeoParquet, GPKG, FlatGeoBuf, R-tree builders) can share
14//! the same primitives.
15//!
16//! ## Clever bits
17//!
18//! - **Order = 16 bits per axis** → 65,536-cell grid. Sub-meter resolution
19//!   for typical lng/lat datasets; Hilbert distances fit in u32 with room
20//!   to spare (u64 return type is just for future expansion).
21//! - **`u64::MAX` for non-quantizable inputs.** NaN / degenerate-bbox cases
22//!   sort to the end of any Hilbert-sorted sequence rather than panicking.
23//! - **Pure functions, no Indexer struct.** Callers compute their dataset
24//!   bbox via [`union_bbox`], then map each feature with
25//!   [`hilbert_distance_for`]. No hidden state, no allocation in the hot loop.
26
27/// Convert grid-quantized `(x, y)` (each `0..2^order`) to a Hilbert distance.
28/// `order` must be ≤ 32. For order = 16 the output is ≤ 2^32.
29pub fn hilbert_xy2d(order: u32, x: u32, y: u32) -> u64 {
30    let mut x = x as i64;
31    let mut y = y as i64;
32    let mut d: u64 = 0;
33    let mut s: i64 = (1i64 << order) / 2;
34    while s > 0 {
35        let rx = if (x & s) > 0 { 1i64 } else { 0 };
36        let ry = if (y & s) > 0 { 1i64 } else { 0 };
37        d = d.wrapping_add((s as u64) * (s as u64) * ((3 * rx) ^ ry) as u64);
38        // rotate quadrant
39        if ry == 0 {
40            if rx == 1 {
41                x = s - 1 - x;
42                y = s - 1 - y;
43            }
44            std::mem::swap(&mut x, &mut y);
45        }
46        s /= 2;
47    }
48    d
49}
50
51/// Quantize a real `(x, y)` to integer grid coordinates `0..2^order`,
52/// given the dataset's overall bbox. Returns `None` if any input is non-finite
53/// or the bbox is degenerate (zero width or height).
54pub fn quantize(
55    x: f64,
56    y: f64,
57    bbox: [f64; 4], // xmin, ymin, xmax, ymax
58    order: u32,
59) -> Option<(u32, u32)> {
60    if !x.is_finite() || !y.is_finite() {
61        return None;
62    }
63    let width = bbox[2] - bbox[0];
64    let height = bbox[3] - bbox[1];
65    // Reject degenerate bboxes (zero or negative width/height) AND NaN values.
66    // Plain `<= 0.0` would mishandle NaN, so we check positivity explicitly.
67    if width.is_nan() || height.is_nan() || width <= 0.0 || height <= 0.0 {
68        return None;
69    }
70    let grid = (1u64 << order) - 1; // max index
71    let nx = ((x - bbox[0]) / width).clamp(0.0, 1.0) * grid as f64;
72    let ny = ((y - bbox[1]) / height).clamp(0.0, 1.0) * grid as f64;
73    Some((nx as u32, ny as u32))
74}
75
76/// Combined: given a centroid and dataset bbox, return its Hilbert distance.
77/// Returns `u64::MAX` for non-quantizable points so they sort to the end.
78pub fn hilbert_distance_for(centroid: (f64, f64), bbox: [f64; 4], order: u32) -> u64 {
79    match quantize(centroid.0, centroid.1, bbox, order) {
80        Some((x, y)) => hilbert_xy2d(order, x, y),
81        None => u64::MAX,
82    }
83}
84
85/// Compute the union bbox of an iterator of `[xmin, ymin, xmax, ymax]`s.
86/// Returns `None` if no finite bbox is seen.
87pub fn union_bbox<I: IntoIterator<Item = [f64; 4]>>(iter: I) -> Option<[f64; 4]> {
88    let mut acc: Option<[f64; 4]> = None;
89    for b in iter {
90        if !b.iter().all(|v| v.is_finite()) {
91            continue;
92        }
93        match &mut acc {
94            None => acc = Some(b),
95            Some(a) => {
96                a[0] = a[0].min(b[0]);
97                a[1] = a[1].min(b[1]);
98                a[2] = a[2].max(b[2]);
99                a[3] = a[3].max(b[3]);
100            }
101        }
102    }
103    acc
104}
105
106#[cfg(test)]
107mod tests {
108    use super::*;
109
110    #[test]
111    fn hilbert_origin_is_zero() {
112        assert_eq!(hilbert_xy2d(16, 0, 0), 0);
113    }
114
115    #[test]
116    fn hilbert_orders_corners_uniquely() {
117        // Four corners of a 2x2 grid have unique distances 0,1,2,3.
118        let mut dists: Vec<u64> = vec![
119            hilbert_xy2d(1, 0, 0),
120            hilbert_xy2d(1, 0, 1),
121            hilbert_xy2d(1, 1, 0),
122            hilbert_xy2d(1, 1, 1),
123        ];
124        dists.sort();
125        assert_eq!(dists, vec![0, 1, 2, 3]);
126    }
127
128    #[test]
129    fn nearby_points_cluster_under_hilbert() {
130        // Two close points should have closer Hilbert distance than two far points,
131        // averaged over many random pairs. We verify on a single deterministic case:
132        // points in the same quadrant of order=4 grid → small distance diff;
133        // points across diagonally opposite quadrants → larger diff.
134        let order = 4;
135        let close_a = hilbert_xy2d(order, 1, 1);
136        let close_b = hilbert_xy2d(order, 2, 1);
137        let far_a = hilbert_xy2d(order, 0, 0);
138        let far_b = hilbert_xy2d(order, (1 << order) - 1, (1 << order) - 1);
139        assert!(close_a.abs_diff(close_b) < far_a.abs_diff(far_b));
140    }
141
142    #[test]
143    fn quantize_rejects_non_finite_or_degenerate() {
144        let bbox = [0.0, 0.0, 10.0, 10.0];
145        assert!(quantize(f64::NAN, 5.0, bbox, 16).is_none());
146        assert!(quantize(5.0, f64::NEG_INFINITY, bbox, 16).is_none());
147
148        // Degenerate (zero width)
149        let degen = [0.0, 0.0, 0.0, 10.0];
150        assert!(quantize(0.0, 5.0, degen, 16).is_none());
151    }
152
153    #[test]
154    fn quantize_maps_extremes_to_grid_edges() {
155        let bbox = [0.0, 0.0, 10.0, 10.0];
156        let max = (1u32 << 16) - 1;
157        assert_eq!(quantize(0.0, 0.0, bbox, 16), Some((0, 0)));
158        assert_eq!(quantize(10.0, 10.0, bbox, 16), Some((max, max)));
159        // Clamping
160        assert_eq!(quantize(-1.0, 11.0, bbox, 16), Some((0, max)));
161    }
162
163    #[test]
164    fn distance_for_null_inputs_sorts_last() {
165        let bbox = [0.0, 0.0, 10.0, 10.0];
166        let real = hilbert_distance_for((5.0, 5.0), bbox, 16);
167        let null = hilbert_distance_for((f64::NAN, f64::NAN), bbox, 16);
168        assert!(real < null);
169        assert_eq!(null, u64::MAX);
170    }
171
172    #[test]
173    fn union_bbox_grows_to_envelope() {
174        let bboxes = vec![
175            [0.0, 0.0, 1.0, 1.0],
176            [-1.0, 5.0, 2.0, 6.0],
177            [3.0, -2.0, 4.0, 0.5],
178        ];
179        let u = union_bbox(bboxes).unwrap();
180        assert_eq!(u, [-1.0, -2.0, 4.0, 6.0]);
181    }
182
183    #[test]
184    fn union_bbox_skips_non_finite() {
185        let bboxes = vec![[0.0, 0.0, 1.0, 1.0], [f64::NAN, 0.0, 1.0, 1.0]];
186        let u = union_bbox(bboxes).unwrap();
187        assert_eq!(u, [0.0, 0.0, 1.0, 1.0]);
188    }
189}