Skip to main content

nodedb_spatial/
hybrid.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Hybrid spatial-vector search: spatial R-tree as a pre-filter for
4//! HNSW vector search.
5//!
6//! Query pattern:
7//! ```sql
8//! SELECT * FROM restaurants
9//! WHERE ST_DWithin(location, geo_point(-73.98, 40.75), 500)
10//! ORDER BY vector_distance(embedding, $query_vec) LIMIT 10
11//! ```
12//!
13//! Pipeline:
14//! 1. R-tree range search → candidate entry IDs
15//! 2. Convert entry IDs to RoaringBitmap
16//! 3. Pass bitmap into vector search as pre-filter
17//! 4. HNSW traversal skips entries not in bitmap
18//!
19//! This is a unique NodeDB differentiator — PostGIS + pgvector can't do
20//! this in a single index scan.
21
22use nodedb_mem::{EngineId, MemoryGovernor};
23#[cfg(test)]
24use nodedb_types::BoundingBox;
25use nodedb_types::geometry::Geometry;
26use nodedb_types::geometry_bbox;
27use std::sync::Arc;
28
29use crate::predicates;
30use crate::rtree::RTree;
31
32/// Result of the spatial pre-filter phase.
33///
34/// Contains the candidate entry IDs that passed the spatial predicate.
35/// These are then passed as a filter bitmap to the vector search engine.
36pub struct SpatialPreFilterResult {
37    /// Entry IDs that passed the spatial predicate.
38    pub candidate_ids: Vec<u64>,
39    /// Number of entries evaluated by R-tree range search.
40    pub rtree_candidates: usize,
41    /// Number of entries that passed exact predicate refinement.
42    pub exact_matches: usize,
43}
44
45/// Execute the spatial pre-filter phase.
46///
47/// 1. Compute search bounding box from query geometry + optional distance expansion
48/// 2. R-tree range search → rough candidates
49/// 3. For each candidate, load geometry and apply exact predicate
50/// 4. Return filtered entry IDs
51///
52/// The caller (vector engine) uses the candidate IDs to build a RoaringBitmap
53/// that restricts HNSW graph traversal.
54///
55/// When `governor` is `Some`, the `candidate_ids` allocation is budgeted via
56/// [`MemoryGovernor::reserve`] before the `Vec::with_capacity`. Budget pressure
57/// is a backpressure signal; the allocation still proceeds on budget exhaustion.
58pub fn spatial_prefilter(
59    rtree: &RTree,
60    query_geometry: &Geometry,
61    distance_meters: Option<f64>,
62    exact_geometries: &dyn Fn(u64) -> Option<Geometry>,
63    governor: Option<&Arc<MemoryGovernor>>,
64) -> SpatialPreFilterResult {
65    // Step 1: Compute search bbox.
66    let search_bbox = if let Some(dist) = distance_meters {
67        geometry_bbox(query_geometry).expand_meters(dist)
68    } else {
69        geometry_bbox(query_geometry)
70    };
71
72    // Step 2: R-tree range search.
73    let rtree_results = rtree.search(&search_bbox);
74    let rtree_candidates = rtree_results.len();
75
76    // Step 3: Exact predicate refinement.
77    let _guard = governor.and_then(|gov| {
78        let bytes = rtree_candidates * std::mem::size_of::<u64>();
79        gov.reserve(EngineId::Spatial, bytes).ok()
80    });
81    let mut candidate_ids = Vec::with_capacity(rtree_candidates);
82    for entry in &rtree_results {
83        if let Some(doc_geom) = exact_geometries(entry.id) {
84            let passes = if let Some(dist) = distance_meters {
85                predicates::st_dwithin(&doc_geom, query_geometry, dist)
86            } else {
87                predicates::st_intersects(&doc_geom, query_geometry)
88            };
89            if passes {
90                candidate_ids.push(entry.id);
91            }
92        }
93    }
94
95    let exact_matches = candidate_ids.len();
96    SpatialPreFilterResult {
97        candidate_ids,
98        rtree_candidates,
99        exact_matches,
100    }
101}
102
103/// Convert candidate entry IDs to a packed bitmap suitable for vector
104/// engine pre-filtering.
105///
106/// The bitmap format matches the existing `filter_bitmap` used by
107/// `PhysicalPlan::VectorSearch`: byte array where bit N is set if
108/// entry N is a candidate. This is a simple bitset, not RoaringBitmap,
109/// for compatibility with the existing HNSW filter path.
110pub fn ids_to_bitmap(candidate_ids: &[u64], max_id: u64) -> Vec<u8> {
111    let num_bytes = ((max_id + 8) / 8) as usize;
112    let mut bitmap = vec![0u8; num_bytes];
113    for &id in candidate_ids {
114        let byte_idx = (id / 8) as usize;
115        let bit_idx = (id % 8) as u32;
116        if byte_idx < bitmap.len() {
117            bitmap[byte_idx] |= 1 << bit_idx;
118        }
119    }
120    bitmap
121}
122
123/// Check if an entry ID is set in a bitmap.
124pub fn bitmap_contains(bitmap: &[u8], id: u64) -> bool {
125    let byte_idx = (id / 8) as usize;
126    let bit_idx = (id % 8) as u32;
127    if byte_idx < bitmap.len() {
128        bitmap[byte_idx] & (1 << bit_idx) != 0
129    } else {
130        false
131    }
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137    use crate::rtree::RTreeEntry;
138
139    fn make_tree() -> RTree {
140        let mut tree = RTree::new();
141        // 10 points in a grid from (0,0) to (9,9).
142        for i in 0..10 {
143            tree.insert(RTreeEntry {
144                id: i,
145                bbox: BoundingBox::from_point(i as f64, i as f64),
146            });
147        }
148        tree
149    }
150
151    #[test]
152    fn prefilter_with_distance() {
153        let tree = make_tree();
154        let query = Geometry::point(5.0, 5.0);
155
156        // Geometries: each entry ID maps to Point(id, id).
157        let get_geom = |id: u64| -> Option<Geometry> {
158            if id < 10 {
159                Some(Geometry::point(id as f64, id as f64))
160            } else {
161                None
162            }
163        };
164
165        // 200km radius should capture several nearby points.
166        let result = spatial_prefilter(&tree, &query, Some(200_000.0), &get_geom, None);
167        assert!(!result.candidate_ids.is_empty());
168        // Point (5,5) should definitely be in results (distance = 0).
169        assert!(result.candidate_ids.contains(&5));
170    }
171
172    #[test]
173    fn prefilter_intersects_no_distance() {
174        let tree = make_tree();
175        // A polygon covering (3,3) to (7,7).
176        let query = Geometry::polygon(vec![vec![
177            [3.0, 3.0],
178            [7.0, 3.0],
179            [7.0, 7.0],
180            [3.0, 7.0],
181            [3.0, 3.0],
182        ]]);
183
184        let get_geom = |id: u64| -> Option<Geometry> {
185            if id < 10 {
186                Some(Geometry::point(id as f64, id as f64))
187            } else {
188                None
189            }
190        };
191
192        let result = spatial_prefilter(&tree, &query, None, &get_geom, None);
193        // Points 4, 5, 6 should be inside (3 is on edge → not contained by intersects returns true).
194        assert!(result.candidate_ids.contains(&4));
195        assert!(result.candidate_ids.contains(&5));
196        assert!(result.candidate_ids.contains(&6));
197        // Points 0, 1, 2, 8, 9 should not be in results.
198        assert!(!result.candidate_ids.contains(&0));
199        assert!(!result.candidate_ids.contains(&9));
200    }
201
202    #[test]
203    fn bitmap_roundtrip() {
204        let ids = vec![0, 5, 7, 63, 100];
205        let bitmap = ids_to_bitmap(&ids, 128);
206
207        for &id in &ids {
208            assert!(bitmap_contains(&bitmap, id), "id {id} should be set");
209        }
210        assert!(!bitmap_contains(&bitmap, 1));
211        assert!(!bitmap_contains(&bitmap, 64));
212    }
213
214    #[test]
215    fn empty_bitmap() {
216        let bitmap = ids_to_bitmap(&[], 0);
217        assert!(!bitmap_contains(&bitmap, 0));
218    }
219}