gistools/data_structures/
box_index.rs

1use crate::{
2    data_structures::FlatQueue,
3    geometry::{LonLat, S2CellId, S2Point},
4};
5use alloc::{vec, vec::Vec};
6use core::{cmp::max, ops::Div};
7use libm::{floor, fmax, fmin};
8use s2json::{BBox, GetXY, VectorPoint};
9
10const HILBERT_MAX: f64 = ((1 << 16) - 1) as f64;
11
12// TODO: boxes (Vec<f64>) could be simplifed to an actual BBox and sorting would be simpler
13
14/// # Box Index Accessor
15///
16/// ## Description
17/// Designed to be used with the BoxIndex.
18///
19/// Has [`BoxIndexAccessor::bbox`] and optionally updated [`BoxIndexAccessor::hilbert`] to properly index the item
20/// for the [`BoxIndex`]. hilbert is auto-generated for items whre you only supplied the [`BoxIndexAccessor::bbox`].
21/// Note that [`LonLat`] and [`S2Point`] implements the [`S2CellId`] hilbert curve for sorting.
22pub trait BoxIndexAccessor {
23    /// Get the bounding box of the item
24    fn bbox(&self) -> BBox;
25    /// Get the hilbert value of the item
26    fn hilbert(&self, index_bbox: &BBox) -> u64 {
27        // lets default to the local hilbert here (can use s2 later)
28        let width = index_bbox.right - index_bbox.left;
29        let height = index_bbox.top - index_bbox.bottom;
30        let BBox { left, bottom, right, top } = self.bbox();
31        let x = floor((HILBERT_MAX * ((left + right) / 2. - index_bbox.left)) / width) as u32;
32        let y = floor((HILBERT_MAX * ((bottom + top) / 2. - index_bbox.bottom)) / height) as u32;
33        _hilbert(x, y) as u64
34    }
35}
36impl<M: Clone + Default> BoxIndexAccessor for LonLat<M> {
37    fn bbox(&self) -> BBox {
38        BBox::new(self.lon(), self.lat(), self.lon(), self.lat())
39    }
40    fn hilbert(&self, _index_bbox: &BBox) -> u64 {
41        let s2cell: S2CellId = self.into();
42        s2cell.id
43    }
44}
45impl<M: Clone + Default> BoxIndexAccessor for VectorPoint<M> {
46    fn bbox(&self) -> BBox {
47        BBox::new(self.x, self.y, self.x, self.y)
48    }
49    fn hilbert(&self, _index_bbox: &BBox) -> u64 {
50        let s2cell: S2CellId = self.into();
51        s2cell.id
52    }
53}
54impl BoxIndexAccessor for S2Point {
55    fn bbox(&self) -> BBox {
56        let ll: LonLat = self.into();
57        BBox::new(ll.lon(), ll.lat(), ll.lon(), ll.lat())
58    }
59    fn hilbert(&self, _index_bbox: &BBox) -> u64 {
60        let s2cell: S2CellId = self.into();
61        s2cell.id
62    }
63}
64
65/// # BoxIndex
66///
67/// ## Description
68/// An Index for points and rectangles
69///
70/// A really fast static spatial index for 2D points and rectangles in JavaScript.
71/// Uses either a fast simple Hilbert curve algorithm or a more complex Hilbert curve (S2) algorithm.
72///
73/// This is a partial port/rust port of the [flatbush](https://github.com/mourner/flatbush)
74/// codebase. The port is designed to work with this library and also S2 cells.
75///
76/// The default hilbert curve ported is simpler and originally designed for simpler datasets.
77/// This library also provides the S2 hilbert curve for cm precision of the Earth using S2 cells.
78///
79/// ## Usage
80/// ```rust
81/// use gistools::data_structures::{BoxIndex, BoxIndexAccessor};
82/// use gistools::proj::Coords;
83/// use s2json::BBox;
84///
85/// // Test item
86/// #[derive(Debug, Clone, PartialEq)]
87/// struct Item {
88///     pub id: usize,
89///     pub min_x: f64,
90///     pub min_y: f64,
91///     pub max_x: f64,
92///     pub max_y: f64,
93/// }
94/// // define how to access the min_x, min_y, max_x, and max_y properties
95/// impl BoxIndexAccessor for Item {
96///     fn bbox(&self) -> BBox {
97///         BBox::new(self.min_x, self.min_y, self.max_x, self.max_y)
98///     }
99/// }
100///
101/// // create the index
102/// let mut index = BoxIndex::<Item>::new(vec![], None);
103///
104/// // make a bounding box query
105/// let items = index.search(&BBox::new(40.0, 40.0, 60.0, 60.0), None::<fn(&Item) -> bool>);
106///
107/// // make a k-nearest-neighbors query
108/// let items =
109///     index.neighbors(Coords::new_xy(50., 50.), Some(3), None, None::<fn(&Item) -> bool>);
110/// ```
111///
112/// ## Links
113/// - <https://github.com/mourner/flatbush>
114#[derive(Debug, Clone, PartialEq)]
115pub struct BoxIndex<T: BoxIndexAccessor + Clone> {
116    node_size: usize,
117    num_items: usize,
118    level_bounds: Vec<usize>,
119    pos: usize,
120    /// full bounding box of the index
121    pub bbox: BBox,
122    boxes: Vec<f64>,
123    indices: Vec<usize>,
124    items: Vec<T>,
125}
126impl<T: BoxIndexAccessor + Clone> BoxIndex<T> {
127    /// Create a BoxIndex index that will hold a given number of items.
128    ///
129    /// ## Parameters
130    /// - `items`: The items to index. Requires the item type to implement [`BoxIndexAccessor`].
131    /// - `accessor`: A function for accessing the min_x, min_y, max_x, and max_y properties of the items.
132    /// - `[node_size]`: Size of the tree node (16 by default).
133    pub fn new(items: Vec<T>, node_size: Option<usize>) -> Self {
134        let node_size = usize::min(usize::max(node_size.unwrap_or(16), 2), 65_535);
135        let num_items = items.len();
136        // calculate the total number of nodes in the R-tree to allocate space for
137        // and the index of each tree level (used in search later)
138        let mut n = num_items;
139        let mut num_nodes = n;
140        let mut level_bounds = vec![n * 4];
141        if num_items > 0 {
142            loop {
143                n = n.div_ceil(node_size);
144                num_nodes += n;
145                level_bounds.push(num_nodes * 4);
146                if n == 1 {
147                    break;
148                }
149            }
150        }
151
152        let mut box_index = Self {
153            node_size,
154            num_items,
155            level_bounds,
156            pos: 0,
157            bbox: BBox::default(),
158            boxes: vec![0.0; num_nodes * 4],
159            indices: vec![0; num_nodes],
160            items,
161        };
162
163        box_index.insert_items();
164        box_index.index();
165
166        box_index
167    }
168
169    /// Insert all a given rectangle to the boxes and indices arrays
170    fn insert_items(&mut self) {
171        for item in &self.items {
172            let index = self.pos >> 2;
173            let boxes = &mut self.boxes;
174            self.indices[index] = index;
175            let item_bbox = item.bbox();
176            self.bbox.merge_in_place(&item_bbox);
177            let BBox { left, bottom, right, top } = item_bbox;
178            boxes[self.pos] = left;
179            boxes[self.pos + 1] = bottom;
180            boxes[self.pos + 2] = right;
181            boxes[self.pos + 3] = top;
182            self.pos += 4;
183        }
184    }
185
186    /// Perform indexing of the added rectangles/points after all data has been added about the
187    /// items.
188    pub fn index(&mut self) {
189        if self.items.is_empty() {
190            return;
191        }
192        if self.pos >> 2 != self.num_items {
193            panic!("Added {} items when expected {}.", self.pos >> 2, self.num_items);
194        }
195        let boxes = &mut self.boxes;
196
197        if self.num_items <= self.node_size {
198            // only one node, skip sorting and just fill the root box
199            boxes[self.pos] = self.bbox.left;
200            self.pos += 1;
201            boxes[self.pos] = self.bbox.bottom;
202            self.pos += 1;
203            boxes[self.pos] = self.bbox.right;
204            self.pos += 1;
205            boxes[self.pos] = self.bbox.top;
206            self.pos += 1;
207            return;
208        }
209
210        let mut hilbert_values = vec![0; self.num_items];
211        // map item centers into Hilbert coordinate space and calculate Hilbert values
212        for (i, item) in self.items.iter().enumerate() {
213            hilbert_values[i] = item.hilbert(&self.bbox);
214        }
215
216        // sort items by their Hilbert value (for packing later)
217        sort(&mut hilbert_values, boxes, &mut self.indices, 0, self.num_items - 1, self.node_size);
218
219        // generate nodes at each tree level, bottom-up
220        let mut i = 0;
221        let mut pos = 0;
222        while i < self.level_bounds.len() - 1 {
223            let end = self.level_bounds[i];
224
225            // generate a parent node for each block of consecutive <node_size> nodes
226            while pos < end {
227                let node_index = pos;
228
229                // calculate bbox for the new node
230                let mut node_min_x = boxes[pos];
231                pos += 1;
232                let mut node_min_y = boxes[pos];
233                pos += 1;
234                let mut node_max_x = boxes[pos];
235                pos += 1;
236                let mut node_max_y = boxes[pos];
237                pos += 1;
238                let mut j = 1;
239                while j < self.node_size && pos < end {
240                    node_min_x = fmin(node_min_x, boxes[pos]);
241                    pos += 1;
242                    node_min_y = fmin(node_min_y, boxes[pos]);
243                    pos += 1;
244                    node_max_x = fmax(node_max_x, boxes[pos]);
245                    pos += 1;
246                    node_max_y = fmax(node_max_y, boxes[pos]);
247                    pos += 1;
248                    j += 1;
249                }
250
251                // add the new node to the tree data
252                self.indices[self.pos >> 2] = node_index;
253                boxes[self.pos] = node_min_x;
254                self.pos += 1;
255                boxes[self.pos] = node_min_y;
256                self.pos += 1;
257                boxes[self.pos] = node_max_x;
258                self.pos += 1;
259                boxes[self.pos] = node_max_y;
260                self.pos += 1;
261            }
262
263            i += 1;
264        }
265    }
266
267    /// Search the index by a bounding box.
268    ///
269    /// ## Parameters
270    /// - `min_x`: The minimum x coordinate of the query point.
271    /// - `min_y`: The minimum y coordinate of the query point.
272    /// - `max_x`: The maximum x coordinate of the query point.
273    /// - `max_y`: The maximum y coordinate of the query point.
274    /// - `[filter_fn]`: An optional function that is called on every found item; if supplied, only items
275    ///   for which this function returns true will be included in the results array.
276    ///
277    /// ## Returns
278    /// An array of indices of items intersecting or touching the given bounding box.
279    pub fn search(&self, bbox: &BBox, filter_fn: Option<impl Fn(&T) -> bool>) -> Vec<T> {
280        let BBox { left: min_x, bottom: min_y, right: max_x, top: max_y } = bbox;
281        let mut results = vec![];
282        if self.items.is_empty() {
283            return results;
284        }
285        let mut node_index = Some(self.boxes.len() - 4);
286        let mut queue = vec![];
287
288        while let Some(n_index) = node_index {
289            // find the end index of the node
290            let end =
291                usize::min(n_index + self.node_size * 4, upper_bound(n_index, &self.level_bounds));
292
293            // search through child nodes
294            for pos in (n_index..end).step_by(4) {
295                // check if node bbox intersects with query bbox
296                let x0 = self.boxes[pos];
297                if *max_x < x0 {
298                    continue;
299                }
300                let y0 = self.boxes[pos + 1];
301                if *max_y < y0 {
302                    continue;
303                }
304                let x1 = self.boxes[pos + 2];
305                if *min_x > x1 {
306                    continue;
307                }
308                let y1 = self.boxes[pos + 3];
309                if *min_y > y1 {
310                    continue;
311                }
312
313                let index = self.indices[pos >> 2];
314
315                if n_index >= self.num_items * 4 {
316                    queue.push(index); // node; add it to the search queue
317                } else if let Some(item) = self.items.get(index)
318                    && filter_fn.as_ref().is_none_or(|f| f(item))
319                {
320                    results.push(item.clone());
321                }
322            }
323
324            node_index = queue.pop();
325        }
326
327        results
328    }
329
330    /// Search items in order of distance from the given point.
331    ///
332    /// ## Parameters
333    /// - `x`: The x coordinate of the query point.
334    /// - `y`: The y coordinate of the query point.
335    /// - `[max_results]`: The maximum number of results to return.
336    /// - `[max_distance]`: The maximum distance to search.
337    /// - `[filter_fn]`: An optional function for filtering the results.
338    ///
339    /// ## Returns
340    /// An array of indices of items found.
341    pub fn neighbors<P: GetXY>(
342        &self,
343        p: P,
344        max_results: Option<usize>,
345        max_distance: Option<f64>,
346        filter_fn: Option<impl Fn(&T) -> bool>,
347    ) -> Vec<T> {
348        let mut results = vec![];
349        if self.items.is_empty() {
350            return results;
351        }
352        let x = p.x();
353        let y = p.y();
354        let max_results = max_results.unwrap_or(usize::MAX);
355        let max_distance = max_distance.unwrap_or(f64::MAX);
356        let mut q = FlatQueue::new();
357        let max_dist_squared = max_distance * max_distance;
358        let mut node_index = Some(self.boxes.len() - 4);
359
360        'outer: while let Some(n_index) = node_index {
361            // find the end index of the node
362            let end =
363                usize::min(n_index + self.node_size * 4, upper_bound(n_index, &self.level_bounds));
364
365            // add child nodes to the queue
366            for pos in (n_index..end).step_by(4) {
367                let index = self.indices[pos >> 2];
368                let min_x = self.boxes[pos];
369                let min_y = self.boxes[pos + 1];
370                let max_x = self.boxes[pos + 2];
371                let max_y = self.boxes[pos + 3];
372                let dx = if x < min_x {
373                    min_x - x
374                } else if x > max_x {
375                    x - max_x
376                } else {
377                    0.
378                };
379                let dy = if y < min_y {
380                    min_y - y
381                } else if y > max_y {
382                    y - max_y
383                } else {
384                    0.
385                };
386                let dist = dx * dx + dy * dy;
387                if dist > max_dist_squared {
388                    continue;
389                }
390
391                if n_index >= self.num_items * 4 {
392                    q.push(index << 1, dist); // node (use even id)
393                } else {
394                    q.push((index << 1) + 1, dist); // leaf item (use odd id)
395                }
396            }
397
398            // pop items from the queue
399            while !q.is_empty() && (q.peek().unwrap_or(&0) & 1) != 0 {
400                let dist = q.peek_value();
401                if dist.is_none() || dist.unwrap() > max_dist_squared {
402                    break 'outer;
403                }
404                if let Some(item) = self.items.get(q.pop().unwrap_or(0) >> 1)
405                    && filter_fn.as_ref().is_none_or(|f| f(item))
406                {
407                    results.push(item.clone());
408                }
409                if results.len() == max_results {
410                    break 'outer;
411                }
412            }
413
414            node_index = if !q.is_empty() { Some(q.pop().unwrap_or(0) >> 1) } else { None };
415        }
416
417        results
418    }
419
420    /// Get the items that have been added to the index. If taken, this index will be empty.
421    pub fn take(&mut self) -> Vec<T> {
422        core::mem::take(&mut self.items)
423    }
424}
425
426/// Binary search for the first value in the array bigger than the given.
427///
428/// ## Parameters
429/// - `value`: the value to search for
430/// - `arr`: the array to search
431///
432/// ## Returns
433/// The first value in the array bigger than the given
434fn upper_bound(value: usize, arr: &[usize]) -> usize {
435    let mut i = 0;
436    let mut j = arr.len() - 1;
437    while i < j {
438        let m = (i + j) >> 1;
439        if arr[m] > value {
440            j = m;
441        } else {
442            i = m + 1;
443        }
444    }
445
446    arr[i]
447}
448
449/// Custom quicksort that partially sorts bbox data alongside the hilbert values.
450///
451/// ## Parameters
452/// - `values`: the hilbert values
453/// - `boxes`: the boxes
454/// - `indices`: the indices
455/// - `left`: the left index
456/// - `right`: the right index
457/// - `node_size`: the node size
458fn sort(
459    values: &mut [u64],
460    boxes: &mut [f64],
461    indices: &mut [usize],
462    left: usize,
463    right: usize,
464    node_size: usize,
465) {
466    if left.div(node_size) >= right.div(node_size) {
467        return;
468    }
469
470    // apply median of three method
471    let start = values[left];
472    let mid = values[(left + right) >> 1];
473    let end = values[right];
474
475    let mut pivot = end;
476
477    let x = max(start, mid);
478    if end > x {
479        pivot = x;
480    } else if x == start {
481        pivot = max(mid, end);
482    } else if x == mid {
483        pivot = max(start, end);
484    }
485
486    let i = left as isize - 1;
487    let mut j = right + 1;
488
489    loop {
490        let mut i = (i + 1) as usize;
491        while values[i] < pivot {
492            i += 1;
493        }
494        j -= 1;
495        while values[j] > pivot {
496            j -= 1;
497        }
498        if i >= j {
499            break;
500        }
501        swap(values, boxes, indices, i, j);
502    }
503
504    sort(values, boxes, indices, left, j, node_size);
505    sort(values, boxes, indices, j + 1, right, node_size);
506}
507
508/// Swap two values and two corresponding boxes.
509///
510/// ## Parameters
511/// - `values`: the hilbert values
512/// - `boxes`: the boxes
513/// - `indices`: the indices
514/// - `i`: index
515/// - `j`: index
516fn swap(values: &mut [u64], boxes: &mut [f64], indices: &mut [usize], i: usize, j: usize) {
517    values.swap(i, j);
518
519    let k = 4 * i;
520    let m = 4 * j;
521
522    boxes.swap(k, m);
523    boxes.swap(k + 1, m + 1);
524    boxes.swap(k + 2, m + 2);
525    boxes.swap(k + 3, m + 3);
526
527    indices.swap(i, j);
528}
529
530/// Fast Hilbert curve algorithm by http://threadlocalmutex.com/
531/// Ported from C++ https://github.com/rawrunprotected/hilbert_curves (public domain)
532///
533/// ## Parameters
534/// - `x`: x coordinate
535/// - `y`: y coordinate
536///
537/// ## Returns
538/// The Hilbert value (32-bit integer)
539fn _hilbert(x: u32, y: u32) -> u32 {
540    let mut a = x ^ y;
541    let mut b = 0xffff ^ a;
542    let mut c = 0xffff ^ (x | y);
543    let mut d = x & (y ^ 0xffff);
544
545    let mut _a = a | (b >> 1);
546    let mut _b = (a >> 1) ^ a;
547    let mut _c = (c >> 1) ^ (b & (d >> 1)) ^ c;
548    let mut _d = (a & (c >> 1)) ^ (d >> 1) ^ d;
549
550    a = _a;
551    b = _b;
552    c = _c;
553    d = _d;
554    _a = (a & (a >> 2)) ^ (b & (b >> 2));
555    _b = (a & (b >> 2)) ^ (b & ((a ^ b) >> 2));
556    _c ^= (a & (c >> 2)) ^ (b & (d >> 2));
557    _d ^= (b & (c >> 2)) ^ ((a ^ b) & (d >> 2));
558
559    a = _a;
560    b = _b;
561    c = _c;
562    d = _d;
563    _a = (a & (a >> 4)) ^ (b & (b >> 4));
564    _b = (a & (b >> 4)) ^ (b & ((a ^ b) >> 4));
565    _c ^= (a & (c >> 4)) ^ (b & (d >> 4));
566    _d ^= (b & (c >> 4)) ^ ((a ^ b) & (d >> 4));
567
568    a = _a;
569    b = _b;
570    c = _c;
571    d = _d;
572    _c ^= (a & (c >> 8)) ^ (b & (d >> 8));
573    _d ^= (b & (c >> 8)) ^ ((a ^ b) & (d >> 8));
574
575    a = _c ^ (_c >> 1);
576    b = _d ^ (_d >> 1);
577
578    let mut i0 = x ^ y;
579    let mut i1 = b | (0xffff ^ (i0 | a));
580
581    i0 = (i0 | (i0 << 8)) & 0x00ff00ff;
582    i0 = (i0 | (i0 << 4)) & 0x0f0f0f0f;
583    i0 = (i0 | (i0 << 2)) & 0x33333333;
584    i0 = (i0 | (i0 << 1)) & 0x55555555;
585
586    i1 = (i1 | (i1 << 8)) & 0x00ff00ff;
587    i1 = (i1 | (i1 << 4)) & 0x0f0f0f0f;
588    i1 = (i1 | (i1 << 2)) & 0x33333333;
589    i1 = (i1 | (i1 << 1)) & 0x55555555;
590
591    (i1 << 1) | i0
592}