Skip to main content

nodedb_array/segment/mbr_index/
build.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Hilbert-packed R-tree bulk loader + read-only tree.
4//!
5//! Tiles in a NodeDB array segment are already written in Hilbert order
6//! (the segment writer appends them in order of `tile_id.hilbert_prefix`).
7//! That means consecutive [`TileEntry`]s are spatially clustered, so a
8//! packed R-tree built by simple chunking yields tight, well-formed
9//! leaves with zero spatial sorting work. Each leaf groups
10//! [`FANOUT`] consecutive tiles; internal levels group [`FANOUT`]
11//! children, recursively, until a single root remains.
12
13use super::node::{BBox, RNode, RNodeKind};
14use super::predicate::MbrQueryPredicate;
15use crate::segment::format::TileEntry;
16
17/// Branching factor. 16 keeps internal nodes cache-friendly while
18/// matching typical leaf-tile-per-node ratios for genomic / EO scale.
19pub const FANOUT: usize = 16;
20
21/// Read-only Hilbert-packed R-tree over a segment's tile MBRs.
22#[derive(Debug, Clone)]
23pub struct HilbertPackedRTree {
24    nodes: Vec<RNode>,
25    /// Index of the root in `nodes`. `None` iff the segment has zero
26    /// tiles (no tree to query).
27    root: Option<usize>,
28}
29
30impl HilbertPackedRTree {
31    /// Bulk-load from segment tile entries (assumed Hilbert-ordered by
32    /// the writer). Time complexity O(n) — one pass per level.
33    pub fn build(entries: &[TileEntry]) -> Self {
34        let mut nodes: Vec<RNode> = Vec::new();
35        if entries.is_empty() {
36            return Self { nodes, root: None };
37        }
38        // Leaves — chunk consecutive tiles, recording each tile's
39        // absolute index and individual bbox so per-tile filtering can
40        // happen at leaf descent time.
41        let mut current_level: Vec<usize> = Vec::new();
42        let mut cursor = 0usize;
43        for chunk in entries.chunks(FANOUT) {
44            let bbox = chunk_bbox(chunk);
45            let tiles: Vec<(usize, BBox)> = chunk
46                .iter()
47                .enumerate()
48                .map(|(i, e)| (cursor + i, BBox::from_mbr(&e.mbr)))
49                .collect();
50            cursor += chunk.len();
51            nodes.push(RNode {
52                bbox,
53                kind: RNodeKind::Leaf { tiles },
54            });
55            current_level.push(nodes.len() - 1);
56        }
57        // Internal levels
58        while current_level.len() > 1 {
59            let mut next_level: Vec<usize> = Vec::new();
60            for chunk in current_level.chunks(FANOUT) {
61                let mut bbox = BBox {
62                    min: vec![],
63                    max: vec![],
64                };
65                for &child in chunk {
66                    bbox.extend(&nodes[child].bbox);
67                }
68                let children = chunk.to_vec();
69                nodes.push(RNode {
70                    bbox,
71                    kind: RNodeKind::Internal { children },
72                });
73                next_level.push(nodes.len() - 1);
74            }
75            current_level = next_level;
76        }
77        let root = current_level.first().copied();
78        Self { nodes, root }
79    }
80
81    pub fn node_count(&self) -> usize {
82        self.nodes.len()
83    }
84
85    pub fn is_empty(&self) -> bool {
86        self.root.is_none()
87    }
88
89    /// Return the indices of tiles whose MBR intersects `pred`.
90    /// Order matches segment Hilbert order.
91    pub fn query(&self, pred: &MbrQueryPredicate) -> Vec<usize> {
92        let mut hits = Vec::new();
93        if let Some(root) = self.root {
94            self.descend(root, pred, &mut hits);
95        }
96        hits.sort_unstable();
97        hits
98    }
99
100    fn descend(&self, idx: usize, pred: &MbrQueryPredicate, hits: &mut Vec<usize>) {
101        let node = &self.nodes[idx];
102        if !pred.intersects(&node.bbox) {
103            return;
104        }
105        match &node.kind {
106            RNodeKind::Leaf { tiles } => {
107                for (idx, bbox) in tiles {
108                    if pred.intersects(bbox) {
109                        hits.push(*idx);
110                    }
111                }
112            }
113            RNodeKind::Internal { children } => {
114                for &c in children {
115                    self.descend(c, pred, hits);
116                }
117            }
118        }
119    }
120}
121
122fn chunk_bbox(chunk: &[TileEntry]) -> BBox {
123    let mut bbox = BBox {
124        min: vec![],
125        max: vec![],
126    };
127    for e in chunk {
128        bbox.extend(&BBox::from_mbr(&e.mbr));
129    }
130    bbox
131}
132
133#[cfg(test)]
134mod tests {
135    use super::super::predicate::DimPredicate;
136    use super::*;
137    use crate::segment::format::TileKind;
138    use crate::tile::mbr::{AttrStats, TileMBR};
139    use crate::types::TileId;
140    use crate::types::domain::DomainBound;
141
142    fn entry(tid: u64, lo: i64, hi: i64) -> TileEntry {
143        let mbr = TileMBR {
144            dim_mins: vec![DomainBound::Int64(lo)],
145            dim_maxs: vec![DomainBound::Int64(hi)],
146            nnz: 1,
147            attr_stats: vec![AttrStats::AllNull { null_count: 0 }],
148        };
149        TileEntry::new(TileId::snapshot(tid), TileKind::Sparse, 0, 0, mbr)
150    }
151
152    fn pred(lo: i64, hi: i64) -> MbrQueryPredicate {
153        MbrQueryPredicate::new(vec![DimPredicate {
154            lo: Some(DomainBound::Int64(lo)),
155            hi: Some(DomainBound::Int64(hi)),
156        }])
157    }
158
159    #[test]
160    fn empty_tree_returns_empty_query() {
161        let t = HilbertPackedRTree::build(&[]);
162        assert!(t.is_empty());
163        assert!(t.query(&pred(0, 100)).is_empty());
164    }
165
166    #[test]
167    fn single_tile_match_and_miss() {
168        let entries = vec![entry(1, 0, 10)];
169        let t = HilbertPackedRTree::build(&entries);
170        assert_eq!(t.query(&pred(5, 7)), vec![0]);
171        assert!(t.query(&pred(20, 30)).is_empty());
172    }
173
174    #[test]
175    fn many_tiles_packed_into_levels() {
176        // 50 tiles, ranges [0..10], [10..20], ...
177        let entries: Vec<TileEntry> = (0..50)
178            .map(|i| entry(i as u64, i * 10, i * 10 + 9))
179            .collect();
180        let t = HilbertPackedRTree::build(&entries);
181        // Predicate [25, 45] → tiles 2,3,4 (covering 20-29, 30-39, 40-49)
182        let hits = t.query(&pred(25, 45));
183        assert_eq!(hits, vec![2, 3, 4]);
184    }
185
186    #[test]
187    fn query_returns_sorted_indices() {
188        let entries: Vec<TileEntry> = (0..40).map(|i| entry(i as u64, i * 5, i * 5 + 4)).collect();
189        let t = HilbertPackedRTree::build(&entries);
190        let hits = t.query(&pred(0, 200));
191        let mut sorted = hits.clone();
192        sorted.sort_unstable();
193        assert_eq!(hits, sorted);
194        assert_eq!(hits.len(), 40);
195    }
196
197    #[test]
198    fn query_ignores_unbounded_dims() {
199        let entries: Vec<TileEntry> = (0..5)
200            .map(|i| entry(i, i as i64 * 10, i as i64 * 10 + 9))
201            .collect();
202        let t = HilbertPackedRTree::build(&entries);
203        let p = MbrQueryPredicate::new(vec![DimPredicate { lo: None, hi: None }]);
204        assert_eq!(t.query(&p).len(), 5);
205    }
206}