Skip to main content

nodedb_spatial/rtree/
bulk_load.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Sort-Tile-Recursive (STR) bulk loading for R-tree.
4
5use nodedb_mem::{EngineId, MemoryGovernor};
6use std::sync::Arc;
7
8use super::node::{ChildRef, INTERNAL_CAPACITY, LEAF_CAPACITY, Node, NodeKind, RTreeEntry};
9use super::tree::RTree;
10
11impl RTree {
12    /// Bulk load entries using Sort-Tile-Recursive packing.
13    ///
14    /// More efficient than repeated single inserts for large datasets.
15    /// Produces better packing (less overlap between nodes).
16    pub fn bulk_load(entries: Vec<RTreeEntry>) -> Self {
17        Self::bulk_load_inner(entries, None)
18    }
19
20    /// Bulk load with an optional governor for budget accounting.
21    ///
22    /// The governor is stored on the returned tree and used for subsequent
23    /// batch operations (full-scan, checkpoint serialization).
24    pub fn bulk_load_with_governor(
25        entries: Vec<RTreeEntry>,
26        governor: Arc<MemoryGovernor>,
27    ) -> Self {
28        Self::bulk_load_inner(entries, Some(governor))
29    }
30
31    fn bulk_load_inner(entries: Vec<RTreeEntry>, governor: Option<Arc<MemoryGovernor>>) -> Self {
32        if entries.is_empty() {
33            let mut tree = Self::new();
34            if let Some(gov) = governor {
35                tree.governor = Some(gov);
36            }
37            return tree;
38        }
39        let len = entries.len();
40
41        // Reserve budget for nodes vec (approximately len / LEAF_CAPACITY nodes,
42        // each holding LEAF_CAPACITY RTreeEntry slots). Best-effort: budget
43        // pressure is a backpressure signal, not a hard gate on bulk load.
44        let _guard = governor.as_ref().and_then(|gov| {
45            let node_count = len.div_ceil(LEAF_CAPACITY) * 2; // leaves + internals
46            let bytes = node_count
47                * (std::mem::size_of::<Node>() + LEAF_CAPACITY * std::mem::size_of::<RTreeEntry>());
48            gov.reserve(EngineId::Spatial, bytes).ok()
49        });
50
51        let mut tree = Self {
52            nodes: Vec::new(),
53            root: 0,
54            len,
55            governor,
56        };
57        tree.root = str_pack(&mut tree.nodes, entries);
58        tree
59    }
60}
61
62/// Recursively pack entries into leaf nodes, then group into internal nodes.
63fn str_pack(nodes: &mut Vec<Node>, mut entries: Vec<RTreeEntry>) -> usize {
64    if entries.len() <= LEAF_CAPACITY {
65        let mut node = Node::new_leaf();
66        if let NodeKind::Leaf {
67            entries: ref mut leaf,
68        } = node.kind
69        {
70            *leaf = entries;
71        }
72        node.recompute_bbox();
73        let idx = nodes.len();
74        nodes.push(node);
75        return idx;
76    }
77
78    let num_leaves = entries.len().div_ceil(LEAF_CAPACITY);
79    let num_slices = (num_leaves as f64).sqrt().ceil() as usize;
80    let slice_size = entries.len().div_ceil(num_slices);
81
82    // Sort by longitude (X).
83    entries.sort_by(|a, b| {
84        let ca = (a.bbox.min_lng + a.bbox.max_lng) / 2.0;
85        let cb = (b.bbox.min_lng + b.bbox.max_lng) / 2.0;
86        ca.partial_cmp(&cb).unwrap_or(std::cmp::Ordering::Equal)
87    });
88
89    let mut child_nodes: Vec<usize> = Vec::new();
90
91    for slice in entries.chunks(slice_size) {
92        let mut slice_vec: Vec<RTreeEntry> = slice.to_vec();
93        // Sort each slice by latitude (Y).
94        slice_vec.sort_by(|a, b| {
95            let ca = (a.bbox.min_lat + a.bbox.max_lat) / 2.0;
96            let cb = (b.bbox.min_lat + b.bbox.max_lat) / 2.0;
97            ca.partial_cmp(&cb).unwrap_or(std::cmp::Ordering::Equal)
98        });
99
100        for chunk in slice_vec.chunks(LEAF_CAPACITY) {
101            let child_idx = str_pack(nodes, chunk.to_vec());
102            child_nodes.push(child_idx);
103        }
104    }
105
106    pack_internal(nodes, child_nodes)
107}
108
109/// Recursively group child node indices into internal nodes.
110fn pack_internal(nodes: &mut Vec<Node>, child_indices: Vec<usize>) -> usize {
111    if child_indices.len() <= INTERNAL_CAPACITY {
112        let level = nodes[child_indices[0]].level + 1;
113        let mut node = Node::new_internal(level);
114        if let NodeKind::Internal { children } = &mut node.kind {
115            for idx in child_indices {
116                children.push(ChildRef {
117                    bbox: nodes[idx].bbox,
118                    node_idx: idx,
119                });
120            }
121        }
122        node.recompute_bbox();
123        let idx = nodes.len();
124        nodes.push(node);
125        return idx;
126    }
127
128    let mut new_children = Vec::new();
129    for chunk in child_indices.chunks(INTERNAL_CAPACITY) {
130        let level = nodes[chunk[0]].level + 1;
131        let mut node = Node::new_internal(level);
132        if let NodeKind::Internal { children } = &mut node.kind {
133            for &idx in chunk {
134                children.push(ChildRef {
135                    bbox: nodes[idx].bbox,
136                    node_idx: idx,
137                });
138            }
139        }
140        node.recompute_bbox();
141        let idx = nodes.len();
142        nodes.push(node);
143        new_children.push(idx);
144    }
145
146    pack_internal(nodes, new_children)
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152    use nodedb_types::BoundingBox;
153
154    fn make_entry(id: u64, lng: f64, lat: f64) -> RTreeEntry {
155        RTreeEntry {
156            id,
157            bbox: BoundingBox::from_point(lng, lat),
158        }
159    }
160
161    #[test]
162    fn bulk_load_500() {
163        let entries: Vec<RTreeEntry> = (0..500)
164            .map(|i| make_entry(i, (i as f64) * 0.1 - 25.0, (i as f64) * 0.06 - 15.0))
165            .collect();
166        let tree = RTree::bulk_load(entries);
167        assert_eq!(tree.len(), 500);
168
169        let all = tree.search(&BoundingBox::new(-180.0, -90.0, 180.0, 90.0));
170        assert_eq!(all.len(), 500);
171
172        let subset = tree.search(&BoundingBox::new(-5.0, -5.0, 5.0, 5.0));
173        assert!(!subset.is_empty());
174    }
175
176    #[test]
177    fn bulk_load_small() {
178        let entries = vec![make_entry(1, 0.0, 0.0), make_entry(2, 1.0, 1.0)];
179        let tree = RTree::bulk_load(entries);
180        assert_eq!(tree.len(), 2);
181    }
182
183    #[test]
184    fn bulk_load_empty() {
185        let tree = RTree::bulk_load(Vec::new());
186        assert!(tree.is_empty());
187    }
188}