Skip to main content

haystack_core/graph/
query_planner.rs

1// Query planner — bitmap-accelerated filter evaluation.
2//
3// Two-phase approach:
4// 1. **Bitmap phase**: For Has/Missing nodes, combine tag bitmaps with
5//    set operations (AND/OR/NOT) to produce a candidate bitset.
6// 2. **Scan phase**: Iterate set bits from candidates, evaluate the
7//    full filter AST on each candidate entity.
8//
9// Returns `None` when bitmap optimization is not applicable (e.g.
10// comparison nodes), causing the caller to fall back to a full scan.
11
12use roaring::RoaringBitmap;
13
14use crate::filter::{CmpOp, FilterNode};
15use crate::kinds::Kind;
16
17use super::adjacency::RefAdjacency;
18use super::bitmap::TagBitmapIndex;
19use super::value_index::ValueIndex;
20
21/// Helper: convert a list of entity IDs into a roaring bitmap.
22fn ids_to_bitmap(entity_ids: &[usize]) -> RoaringBitmap {
23    entity_ids.iter().map(|&id| id as u32).collect()
24}
25
26/// Compute a bitmap of candidate entity ids that *may* match the given
27/// filter node, using tag presence bitmaps only.
28///
29/// Returns `None` if the filter contains terms that cannot be resolved
30/// via bitmap (e.g. comparison operators), meaning a full scan is needed.
31pub fn bitmap_candidates(
32    node: &FilterNode,
33    tag_index: &TagBitmapIndex,
34    max_id: usize,
35) -> Option<RoaringBitmap> {
36    match node {
37        FilterNode::Has(path) => {
38            if !path.is_single() {
39                return None; // multi-segment paths need entity resolution
40            }
41            tag_index.has_tag(path.first()).cloned()
42        }
43
44        FilterNode::Missing(path) => {
45            if !path.is_single() {
46                return None;
47            }
48            match tag_index.has_tag(path.first()) {
49                Some(bm) => Some(TagBitmapIndex::negate(bm, max_id)),
50                None => {
51                    // Tag never seen: all entities match "missing".
52                    if max_id == 0 {
53                        Some(RoaringBitmap::new())
54                    } else {
55                        Some(TagBitmapIndex::negate(&RoaringBitmap::new(), max_id))
56                    }
57                }
58            }
59        }
60
61        FilterNode::And(left, right) => {
62            let l = bitmap_candidates(left, tag_index, max_id);
63            let r = bitmap_candidates(right, tag_index, max_id);
64            match (l, r) {
65                (Some(lb), Some(rb)) => {
66                    if lb.is_empty() || rb.is_empty() {
67                        return Some(RoaringBitmap::new());
68                    }
69                    Some(TagBitmapIndex::intersect(&[&lb, &rb]))
70                }
71                // If only one side is optimizable, use it as the candidate
72                // set; the other side will be checked during the scan phase.
73                (Some(bm), None) | (None, Some(bm)) => Some(bm),
74                (None, None) => None,
75            }
76        }
77
78        FilterNode::Or(left, right) => {
79            let l = bitmap_candidates(left, tag_index, max_id);
80            let r = bitmap_candidates(right, tag_index, max_id);
81            match (l, r) {
82                (Some(lb), Some(rb)) => Some(TagBitmapIndex::union(&[&lb, &rb])),
83                // If either side is not optimizable, we cannot narrow.
84                _ => None,
85            }
86        }
87
88        FilterNode::Cmp { path, .. } => {
89            // Comparison nodes cannot be resolved via bitmap, but we can
90            // still use the tag existence bitmap to prune candidates.
91            if path.is_single() {
92                tag_index.has_tag(path.first()).cloned()
93            } else {
94                None
95            }
96        }
97
98        FilterNode::SpecMatch(_) => None,
99    }
100}
101
102/// Enhanced candidate computation that uses B-Tree value indexes and
103/// ref adjacency for Cmp nodes in addition to tag bitmap indexes.
104pub fn bitmap_candidates_with_values(
105    node: &FilterNode,
106    tag_index: &TagBitmapIndex,
107    value_index: &ValueIndex,
108    adjacency: &RefAdjacency,
109    max_id: usize,
110) -> Option<RoaringBitmap> {
111    match node {
112        // Ref equality: use adjacency reverse map for O(1) lookup.
113        FilterNode::Cmp {
114            path,
115            op: CmpOp::Eq,
116            val: Kind::Ref(r),
117        } if path.is_single() => {
118            let field = path.first();
119            let entity_ids = adjacency.sources_for(field, &r.val);
120            Some(ids_to_bitmap(&entity_ids))
121        }
122
123        FilterNode::Cmp { path, op, val }
124            if path.is_single() && value_index.has_index(path.first()) =>
125        {
126            let field = path.first();
127            let entity_ids = match op {
128                CmpOp::Eq => value_index.eq_lookup(field, val),
129                CmpOp::Ne => value_index.ne_lookup(field, val),
130                CmpOp::Lt => value_index.lt_lookup(field, val),
131                CmpOp::Le => value_index.le_lookup(field, val),
132                CmpOp::Gt => value_index.gt_lookup(field, val),
133                CmpOp::Ge => value_index.ge_lookup(field, val),
134            };
135
136            Some(ids_to_bitmap(&entity_ids))
137        }
138
139        FilterNode::And(left, right) => {
140            let l = bitmap_candidates_with_values(left, tag_index, value_index, adjacency, max_id);
141            let r = bitmap_candidates_with_values(right, tag_index, value_index, adjacency, max_id);
142            match (l, r) {
143                (Some(lb), Some(rb)) => {
144                    if lb.is_empty() || rb.is_empty() {
145                        return Some(RoaringBitmap::new());
146                    }
147                    Some(TagBitmapIndex::intersect(&[&lb, &rb]))
148                }
149                (Some(bm), None) | (None, Some(bm)) => Some(bm),
150                (None, None) => None,
151            }
152        }
153
154        FilterNode::Or(left, right) => {
155            let l = bitmap_candidates_with_values(left, tag_index, value_index, adjacency, max_id);
156            let r = bitmap_candidates_with_values(right, tag_index, value_index, adjacency, max_id);
157            match (l, r) {
158                (Some(lb), Some(rb)) => Some(TagBitmapIndex::union(&[&lb, &rb])),
159                _ => None,
160            }
161        }
162
163        // Delegate to base bitmap_candidates for all other node types.
164        _ => bitmap_candidates(node, tag_index, max_id),
165    }
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171    use crate::filter::parse_filter;
172
173    fn build_test_index() -> (TagBitmapIndex, usize) {
174        let mut idx = TagBitmapIndex::new();
175        // entity 0: site
176        idx.add(0, &["site".into(), "dis".into()]);
177        // entity 1: equip
178        idx.add(1, &["equip".into(), "dis".into()]);
179        // entity 2: point, sensor
180        idx.add(2, &["point".into(), "sensor".into(), "dis".into()]);
181        // entity 3: site, geoCity
182        idx.add(3, &["site".into(), "geoCity".into(), "dis".into()]);
183        (idx, 4) // max_id = 4
184    }
185
186    #[test]
187    fn bitmap_candidates_for_has() {
188        let (idx, max_id) = build_test_index();
189        let ast = parse_filter("site").unwrap();
190        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
191        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
192        assert_eq!(bits, vec![0, 3]);
193    }
194
195    #[test]
196    fn bitmap_candidates_for_missing() {
197        let (idx, max_id) = build_test_index();
198        let ast = parse_filter("not site").unwrap();
199        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
200        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
201        assert_eq!(bits, vec![1, 2]);
202    }
203
204    #[test]
205    fn bitmap_candidates_for_and() {
206        let (idx, max_id) = build_test_index();
207        let ast = parse_filter("site and geoCity").unwrap();
208        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
209        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
210        assert_eq!(bits, vec![3]);
211    }
212
213    #[test]
214    fn bitmap_candidates_for_or() {
215        let (idx, max_id) = build_test_index();
216        let ast = parse_filter("site or equip").unwrap();
217        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
218        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
219        assert_eq!(bits, vec![0, 1, 3]);
220    }
221
222    #[test]
223    fn bitmap_candidates_for_comparison_uses_tag_existence() {
224        let (idx, max_id) = build_test_index();
225        let ast = parse_filter("geoCity == \"Richmond\"").unwrap();
226        let bm = bitmap_candidates(&ast, &idx, max_id);
227        assert!(bm.is_some());
228        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm.unwrap()).collect();
229        assert_eq!(bits, vec![3]);
230    }
231
232    #[test]
233    fn bitmap_candidates_returns_none_for_multi_segment_path() {
234        let (idx, max_id) = build_test_index();
235        let ast = parse_filter("siteRef->area").unwrap();
236        let bm = bitmap_candidates(&ast, &idx, max_id);
237        assert!(bm.is_none());
238    }
239
240    #[test]
241    fn bitmap_candidates_returns_none_for_spec_match() {
242        let (idx, max_id) = build_test_index();
243        let ast = parse_filter("ph::Site").unwrap();
244        let bm = bitmap_candidates(&ast, &idx, max_id);
245        assert!(bm.is_none());
246    }
247
248    #[test]
249    fn and_with_one_side_optimizable() {
250        let (idx, max_id) = build_test_index();
251        let ast = parse_filter("site and dis == \"hello\"").unwrap();
252        let bm = bitmap_candidates(&ast, &idx, max_id);
253        assert!(bm.is_some());
254    }
255
256    #[test]
257    fn or_with_one_side_not_optimizable() {
258        let (idx, max_id) = build_test_index();
259        let ast = parse_filter("site or siteRef->area").unwrap();
260        let bm = bitmap_candidates(&ast, &idx, max_id);
261        assert!(bm.is_none());
262    }
263
264    #[test]
265    fn empty_index() {
266        let idx = TagBitmapIndex::new();
267        let ast = parse_filter("site").unwrap();
268        let bm = bitmap_candidates(&ast, &idx, 0);
269        assert!(bm.is_none());
270    }
271
272    #[test]
273    fn missing_unknown_tag_returns_all() {
274        let (idx, max_id) = build_test_index();
275        let ast = parse_filter("not unknownTag").unwrap();
276        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
277        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
278        assert_eq!(bits, vec![0, 1, 2, 3]);
279    }
280}