Skip to main content

nodedb_spatial/rtree/
bulk_load.rs

1//! Sort-Tile-Recursive (STR) bulk loading for R-tree.
2
3use super::node::{ChildRef, INTERNAL_CAPACITY, LEAF_CAPACITY, Node, NodeKind, RTreeEntry};
4use super::tree::RTree;
5
6impl RTree {
7    /// Bulk load entries using Sort-Tile-Recursive packing.
8    ///
9    /// More efficient than repeated single inserts for large datasets.
10    /// Produces better packing (less overlap between nodes).
11    pub fn bulk_load(entries: Vec<RTreeEntry>) -> Self {
12        if entries.is_empty() {
13            return Self::new();
14        }
15        let len = entries.len();
16        let mut tree = Self {
17            nodes: Vec::new(),
18            root: 0,
19            len,
20        };
21        tree.root = str_pack(&mut tree.nodes, entries);
22        tree
23    }
24}
25
26/// Recursively pack entries into leaf nodes, then group into internal nodes.
27fn str_pack(nodes: &mut Vec<Node>, mut entries: Vec<RTreeEntry>) -> usize {
28    if entries.len() <= LEAF_CAPACITY {
29        let mut node = Node::new_leaf();
30        if let NodeKind::Leaf {
31            entries: ref mut leaf,
32        } = node.kind
33        {
34            *leaf = entries;
35        }
36        node.recompute_bbox();
37        let idx = nodes.len();
38        nodes.push(node);
39        return idx;
40    }
41
42    let num_leaves = entries.len().div_ceil(LEAF_CAPACITY);
43    let num_slices = (num_leaves as f64).sqrt().ceil() as usize;
44    let slice_size = entries.len().div_ceil(num_slices);
45
46    // Sort by longitude (X).
47    entries.sort_by(|a, b| {
48        let ca = (a.bbox.min_lng + a.bbox.max_lng) / 2.0;
49        let cb = (b.bbox.min_lng + b.bbox.max_lng) / 2.0;
50        ca.partial_cmp(&cb).unwrap_or(std::cmp::Ordering::Equal)
51    });
52
53    let mut child_nodes: Vec<usize> = Vec::new();
54
55    for slice in entries.chunks(slice_size) {
56        let mut slice_vec: Vec<RTreeEntry> = slice.to_vec();
57        // Sort each slice by latitude (Y).
58        slice_vec.sort_by(|a, b| {
59            let ca = (a.bbox.min_lat + a.bbox.max_lat) / 2.0;
60            let cb = (b.bbox.min_lat + b.bbox.max_lat) / 2.0;
61            ca.partial_cmp(&cb).unwrap_or(std::cmp::Ordering::Equal)
62        });
63
64        for chunk in slice_vec.chunks(LEAF_CAPACITY) {
65            let child_idx = str_pack(nodes, chunk.to_vec());
66            child_nodes.push(child_idx);
67        }
68    }
69
70    pack_internal(nodes, child_nodes)
71}
72
73/// Recursively group child node indices into internal nodes.
74fn pack_internal(nodes: &mut Vec<Node>, child_indices: Vec<usize>) -> usize {
75    if child_indices.len() <= INTERNAL_CAPACITY {
76        let level = nodes[child_indices[0]].level + 1;
77        let mut node = Node::new_internal(level);
78        if let NodeKind::Internal { children } = &mut node.kind {
79            for idx in child_indices {
80                children.push(ChildRef {
81                    bbox: nodes[idx].bbox,
82                    node_idx: idx,
83                });
84            }
85        }
86        node.recompute_bbox();
87        let idx = nodes.len();
88        nodes.push(node);
89        return idx;
90    }
91
92    let mut new_children = Vec::new();
93    for chunk in child_indices.chunks(INTERNAL_CAPACITY) {
94        let level = nodes[chunk[0]].level + 1;
95        let mut node = Node::new_internal(level);
96        if let NodeKind::Internal { children } = &mut node.kind {
97            for &idx in chunk {
98                children.push(ChildRef {
99                    bbox: nodes[idx].bbox,
100                    node_idx: idx,
101                });
102            }
103        }
104        node.recompute_bbox();
105        let idx = nodes.len();
106        nodes.push(node);
107        new_children.push(idx);
108    }
109
110    pack_internal(nodes, new_children)
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116    use nodedb_types::BoundingBox;
117
118    fn make_entry(id: u64, lng: f64, lat: f64) -> RTreeEntry {
119        RTreeEntry {
120            id,
121            bbox: BoundingBox::from_point(lng, lat),
122        }
123    }
124
125    #[test]
126    fn bulk_load_500() {
127        let entries: Vec<RTreeEntry> = (0..500)
128            .map(|i| make_entry(i, (i as f64) * 0.1 - 25.0, (i as f64) * 0.06 - 15.0))
129            .collect();
130        let tree = RTree::bulk_load(entries);
131        assert_eq!(tree.len(), 500);
132
133        let all = tree.search(&BoundingBox::new(-180.0, -90.0, 180.0, 90.0));
134        assert_eq!(all.len(), 500);
135
136        let subset = tree.search(&BoundingBox::new(-5.0, -5.0, 5.0, 5.0));
137        assert!(!subset.is_empty());
138    }
139
140    #[test]
141    fn bulk_load_small() {
142        let entries = vec![make_entry(1, 0.0, 0.0), make_entry(2, 1.0, 1.0)];
143        let tree = RTree::bulk_load(entries);
144        assert_eq!(tree.len(), 2);
145    }
146
147    #[test]
148    fn bulk_load_empty() {
149        let tree = RTree::bulk_load(Vec::new());
150        assert!(tree.is_empty());
151    }
152}