Skip to main content

nodedb_spatial/
hybrid.rs

1//! Hybrid spatial-vector search: spatial R-tree as a pre-filter for
2//! HNSW vector search.
3//!
4//! Query pattern:
5//! ```sql
6//! SELECT * FROM restaurants
7//! WHERE ST_DWithin(location, geo_point(-73.98, 40.75), 500)
8//! ORDER BY vector_distance(embedding, $query_vec) LIMIT 10
9//! ```
10//!
11//! Pipeline:
12//! 1. R-tree range search → candidate entry IDs
13//! 2. Convert entry IDs to RoaringBitmap
14//! 3. Pass bitmap into vector search as pre-filter
15//! 4. HNSW traversal skips entries not in bitmap
16//!
17//! This is a unique NodeDB differentiator — PostGIS + pgvector can't do
18//! this in a single index scan.
19
20#[cfg(test)]
21use nodedb_types::BoundingBox;
22use nodedb_types::geometry::Geometry;
23use nodedb_types::geometry_bbox;
24
25use crate::predicates;
26use crate::rtree::RTree;
27
28/// Result of the spatial pre-filter phase.
29///
30/// Contains the candidate entry IDs that passed the spatial predicate.
31/// These are then passed as a filter bitmap to the vector search engine.
32pub struct SpatialPreFilterResult {
33    /// Entry IDs that passed the spatial predicate.
34    pub candidate_ids: Vec<u64>,
35    /// Number of entries evaluated by R-tree range search.
36    pub rtree_candidates: usize,
37    /// Number of entries that passed exact predicate refinement.
38    pub exact_matches: usize,
39}
40
41/// Execute the spatial pre-filter phase.
42///
43/// 1. Compute search bounding box from query geometry + optional distance expansion
44/// 2. R-tree range search → rough candidates
45/// 3. For each candidate, load geometry and apply exact predicate
46/// 4. Return filtered entry IDs
47///
48/// The caller (vector engine) uses the candidate IDs to build a RoaringBitmap
49/// that restricts HNSW graph traversal.
50pub fn spatial_prefilter(
51    rtree: &RTree,
52    query_geometry: &Geometry,
53    distance_meters: Option<f64>,
54    exact_geometries: &dyn Fn(u64) -> Option<Geometry>,
55) -> SpatialPreFilterResult {
56    // Step 1: Compute search bbox.
57    let search_bbox = if let Some(dist) = distance_meters {
58        geometry_bbox(query_geometry).expand_meters(dist)
59    } else {
60        geometry_bbox(query_geometry)
61    };
62
63    // Step 2: R-tree range search.
64    let rtree_results = rtree.search(&search_bbox);
65    let rtree_candidates = rtree_results.len();
66
67    // Step 3: Exact predicate refinement.
68    let mut candidate_ids = Vec::with_capacity(rtree_candidates);
69    for entry in &rtree_results {
70        if let Some(doc_geom) = exact_geometries(entry.id) {
71            let passes = if let Some(dist) = distance_meters {
72                predicates::st_dwithin(&doc_geom, query_geometry, dist)
73            } else {
74                predicates::st_intersects(&doc_geom, query_geometry)
75            };
76            if passes {
77                candidate_ids.push(entry.id);
78            }
79        }
80    }
81
82    let exact_matches = candidate_ids.len();
83    SpatialPreFilterResult {
84        candidate_ids,
85        rtree_candidates,
86        exact_matches,
87    }
88}
89
90/// Convert candidate entry IDs to a packed bitmap suitable for vector
91/// engine pre-filtering.
92///
93/// The bitmap format matches the existing `filter_bitmap` used by
94/// `PhysicalPlan::VectorSearch`: byte array where bit N is set if
95/// entry N is a candidate. This is a simple bitset, not RoaringBitmap,
96/// for compatibility with the existing HNSW filter path.
97pub fn ids_to_bitmap(candidate_ids: &[u64], max_id: u64) -> Vec<u8> {
98    let num_bytes = ((max_id + 8) / 8) as usize;
99    let mut bitmap = vec![0u8; num_bytes];
100    for &id in candidate_ids {
101        let byte_idx = (id / 8) as usize;
102        let bit_idx = (id % 8) as u32;
103        if byte_idx < bitmap.len() {
104            bitmap[byte_idx] |= 1 << bit_idx;
105        }
106    }
107    bitmap
108}
109
110/// Check if an entry ID is set in a bitmap.
111pub fn bitmap_contains(bitmap: &[u8], id: u64) -> bool {
112    let byte_idx = (id / 8) as usize;
113    let bit_idx = (id % 8) as u32;
114    if byte_idx < bitmap.len() {
115        bitmap[byte_idx] & (1 << bit_idx) != 0
116    } else {
117        false
118    }
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124    use crate::rtree::RTreeEntry;
125
126    fn make_tree() -> RTree {
127        let mut tree = RTree::new();
128        // 10 points in a grid from (0,0) to (9,9).
129        for i in 0..10 {
130            tree.insert(RTreeEntry {
131                id: i,
132                bbox: BoundingBox::from_point(i as f64, i as f64),
133            });
134        }
135        tree
136    }
137
138    #[test]
139    fn prefilter_with_distance() {
140        let tree = make_tree();
141        let query = Geometry::point(5.0, 5.0);
142
143        // Geometries: each entry ID maps to Point(id, id).
144        let get_geom = |id: u64| -> Option<Geometry> {
145            if id < 10 {
146                Some(Geometry::point(id as f64, id as f64))
147            } else {
148                None
149            }
150        };
151
152        // 200km radius should capture several nearby points.
153        let result = spatial_prefilter(&tree, &query, Some(200_000.0), &get_geom);
154        assert!(!result.candidate_ids.is_empty());
155        // Point (5,5) should definitely be in results (distance = 0).
156        assert!(result.candidate_ids.contains(&5));
157    }
158
159    #[test]
160    fn prefilter_intersects_no_distance() {
161        let tree = make_tree();
162        // A polygon covering (3,3) to (7,7).
163        let query = Geometry::polygon(vec![vec![
164            [3.0, 3.0],
165            [7.0, 3.0],
166            [7.0, 7.0],
167            [3.0, 7.0],
168            [3.0, 3.0],
169        ]]);
170
171        let get_geom = |id: u64| -> Option<Geometry> {
172            if id < 10 {
173                Some(Geometry::point(id as f64, id as f64))
174            } else {
175                None
176            }
177        };
178
179        let result = spatial_prefilter(&tree, &query, None, &get_geom);
180        // Points 4, 5, 6 should be inside (3 is on edge → not contained by intersects returns true).
181        assert!(result.candidate_ids.contains(&4));
182        assert!(result.candidate_ids.contains(&5));
183        assert!(result.candidate_ids.contains(&6));
184        // Points 0, 1, 2, 8, 9 should not be in results.
185        assert!(!result.candidate_ids.contains(&0));
186        assert!(!result.candidate_ids.contains(&9));
187    }
188
189    #[test]
190    fn bitmap_roundtrip() {
191        let ids = vec![0, 5, 7, 63, 100];
192        let bitmap = ids_to_bitmap(&ids, 128);
193
194        for &id in &ids {
195            assert!(bitmap_contains(&bitmap, id), "id {id} should be set");
196        }
197        assert!(!bitmap_contains(&bitmap, 1));
198        assert!(!bitmap_contains(&bitmap, 64));
199    }
200
201    #[test]
202    fn empty_bitmap() {
203        let bitmap = ids_to_bitmap(&[], 0);
204        assert!(!bitmap_contains(&bitmap, 0));
205    }
206}