Skip to main content

haystack_core/graph/
query_planner.rs

1// Query planner — bitmap-accelerated filter evaluation.
2//
3// Two-phase approach:
4// 1. **Bitmap phase**: For Has/Missing nodes, combine tag bitmaps with
5//    set operations (AND/OR/NOT) to produce a candidate bitset.
6// 2. **Scan phase**: Iterate set bits from candidates, evaluate the
7//    full filter AST on each candidate entity.
8//
9// Returns `None` when bitmap optimization is not applicable (e.g.
10// comparison nodes), causing the caller to fall back to a full scan.
11
12use crate::filter::{CmpOp, FilterNode};
13use crate::kinds::Kind;
14
15use super::adjacency::RefAdjacency;
16use super::bitmap::TagBitmapIndex;
17use super::value_index::ValueIndex;
18
19/// Helper: convert a list of entity IDs into a bitmap.
20fn ids_to_bitmap(entity_ids: &[usize], max_id: usize) -> Vec<u64> {
21    if max_id == 0 {
22        return Vec::new();
23    }
24    let mut bitmap = vec![0u64; max_id.div_ceil(64)];
25    for &eid in entity_ids {
26        if eid < max_id {
27            bitmap[eid / 64] |= 1u64 << (eid % 64);
28        }
29    }
30    bitmap
31}
32
33/// Compute a bitmap of candidate entity ids that *may* match the given
34/// filter node, using tag presence bitmaps only.
35///
36/// Returns `None` if the filter contains terms that cannot be resolved
37/// via bitmap (e.g. comparison operators), meaning a full scan is needed.
38pub fn bitmap_candidates(
39    node: &FilterNode,
40    tag_index: &TagBitmapIndex,
41    max_id: usize,
42) -> Option<Vec<u64>> {
43    match node {
44        FilterNode::Has(path) => {
45            if !path.is_single() {
46                return None; // multi-segment paths need entity resolution
47            }
48            let bm = tag_index.has_tag(path.first())?;
49            Some(bm.clone())
50        }
51
52        FilterNode::Missing(path) => {
53            if !path.is_single() {
54                return None;
55            }
56            match tag_index.has_tag(path.first()) {
57                Some(bm) => Some(TagBitmapIndex::negate(bm, max_id)),
58                None => {
59                    // Tag never seen: all entities match "missing".
60                    if max_id == 0 {
61                        Some(Vec::new())
62                    } else {
63                        Some(TagBitmapIndex::negate(&[], max_id))
64                    }
65                }
66            }
67        }
68
69        FilterNode::And(left, right) => {
70            let l = bitmap_candidates(left, tag_index, max_id);
71            let r = bitmap_candidates(right, tag_index, max_id);
72            match (l, r) {
73                (Some(lb), Some(rb)) => {
74                    // Selectivity optimization: order by popcount.
75                    let lc = TagBitmapIndex::count_ones(&lb);
76                    let rc = TagBitmapIndex::count_ones(&rb);
77                    if lc == 0 || rc == 0 {
78                        // Short-circuit: one side is empty.
79                        return Some(Vec::new());
80                    }
81                    if lc <= rc {
82                        Some(TagBitmapIndex::intersect(&[&lb, &rb]))
83                    } else {
84                        Some(TagBitmapIndex::intersect(&[&rb, &lb]))
85                    }
86                }
87                // If only one side is optimizable, use it as the candidate
88                // set; the other side will be checked during the scan phase.
89                (Some(bm), None) | (None, Some(bm)) => Some(bm),
90                (None, None) => None,
91            }
92        }
93
94        FilterNode::Or(left, right) => {
95            let l = bitmap_candidates(left, tag_index, max_id);
96            let r = bitmap_candidates(right, tag_index, max_id);
97            match (l, r) {
98                (Some(lb), Some(rb)) => Some(TagBitmapIndex::union(&[&lb, &rb])),
99                // If either side is not optimizable, we cannot narrow.
100                _ => None,
101            }
102        }
103
104        FilterNode::Cmp { path, .. } => {
105            // Comparison nodes cannot be resolved via bitmap, but we can
106            // still use the tag existence bitmap to prune candidates.
107            if path.is_single() {
108                tag_index.has_tag(path.first()).cloned()
109            } else {
110                None
111            }
112        }
113
114        FilterNode::SpecMatch(_) => None,
115    }
116}
117
118/// Enhanced candidate computation that uses B-Tree value indexes and
119/// ref adjacency for Cmp nodes in addition to tag bitmap indexes.
120///
121/// When a `Cmp` node references a single-segment path with an available value
122/// index, this produces a precise candidate set (only entities whose value
123/// satisfies the comparison) rather than just tag-existence candidates.
124///
125/// For Ref equality (`siteRef == @site-0`), uses the adjacency reverse map
126/// to produce an exact bitmap without scanning entities.
127///
128/// Falls back to `bitmap_candidates` behavior for non-indexed fields.
129pub fn bitmap_candidates_with_values(
130    node: &FilterNode,
131    tag_index: &TagBitmapIndex,
132    value_index: &ValueIndex,
133    adjacency: &RefAdjacency,
134    max_id: usize,
135) -> Option<Vec<u64>> {
136    match node {
137        // Ref equality: use adjacency reverse map for O(1) lookup.
138        FilterNode::Cmp {
139            path,
140            op: CmpOp::Eq,
141            val: Kind::Ref(r),
142        } if path.is_single() => {
143            let field = path.first();
144            let entity_ids = adjacency.sources_for(field, &r.val);
145            Some(ids_to_bitmap(&entity_ids, max_id))
146        }
147
148        FilterNode::Cmp { path, op, val }
149            if path.is_single() && value_index.has_index(path.first()) =>
150        {
151            let field = path.first();
152            let entity_ids = match op {
153                CmpOp::Eq => value_index.eq_lookup(field, val),
154                CmpOp::Ne => value_index.ne_lookup(field, val),
155                CmpOp::Lt => value_index.lt_lookup(field, val),
156                CmpOp::Le => value_index.le_lookup(field, val),
157                CmpOp::Gt => value_index.gt_lookup(field, val),
158                CmpOp::Ge => value_index.ge_lookup(field, val),
159            };
160
161            Some(ids_to_bitmap(&entity_ids, max_id))
162        }
163
164        FilterNode::And(left, right) => {
165            let l = bitmap_candidates_with_values(left, tag_index, value_index, adjacency, max_id);
166            let r = bitmap_candidates_with_values(right, tag_index, value_index, adjacency, max_id);
167            match (l, r) {
168                (Some(lb), Some(rb)) => {
169                    let lc = TagBitmapIndex::count_ones(&lb);
170                    let rc = TagBitmapIndex::count_ones(&rb);
171                    if lc == 0 || rc == 0 {
172                        return Some(Vec::new());
173                    }
174                    if lc <= rc {
175                        Some(TagBitmapIndex::intersect(&[&lb, &rb]))
176                    } else {
177                        Some(TagBitmapIndex::intersect(&[&rb, &lb]))
178                    }
179                }
180                (Some(bm), None) | (None, Some(bm)) => Some(bm),
181                (None, None) => None,
182            }
183        }
184
185        FilterNode::Or(left, right) => {
186            let l = bitmap_candidates_with_values(left, tag_index, value_index, adjacency, max_id);
187            let r = bitmap_candidates_with_values(right, tag_index, value_index, adjacency, max_id);
188            match (l, r) {
189                (Some(lb), Some(rb)) => Some(TagBitmapIndex::union(&[&lb, &rb])),
190                _ => None,
191            }
192        }
193
194        // Delegate to base bitmap_candidates for all other node types.
195        _ => bitmap_candidates(node, tag_index, max_id),
196    }
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202    use crate::filter::parse_filter;
203
204    fn build_test_index() -> (TagBitmapIndex, usize) {
205        let mut idx = TagBitmapIndex::new();
206        // entity 0: site
207        idx.add(0, &["site".into(), "dis".into()]);
208        // entity 1: equip
209        idx.add(1, &["equip".into(), "dis".into()]);
210        // entity 2: point, sensor
211        idx.add(2, &["point".into(), "sensor".into(), "dis".into()]);
212        // entity 3: site, geoCity
213        idx.add(3, &["site".into(), "geoCity".into(), "dis".into()]);
214        (idx, 4) // max_id = 4
215    }
216
217    #[test]
218    fn bitmap_candidates_for_has() {
219        let (idx, max_id) = build_test_index();
220        let ast = parse_filter("site").unwrap();
221        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
222        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
223        assert_eq!(bits, vec![0, 3]);
224    }
225
226    #[test]
227    fn bitmap_candidates_for_missing() {
228        let (idx, max_id) = build_test_index();
229        let ast = parse_filter("not site").unwrap();
230        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
231        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
232        assert_eq!(bits, vec![1, 2]);
233    }
234
235    #[test]
236    fn bitmap_candidates_for_and() {
237        let (idx, max_id) = build_test_index();
238        let ast = parse_filter("site and geoCity").unwrap();
239        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
240        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
241        assert_eq!(bits, vec![3]);
242    }
243
244    #[test]
245    fn bitmap_candidates_for_or() {
246        let (idx, max_id) = build_test_index();
247        let ast = parse_filter("site or equip").unwrap();
248        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
249        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
250        assert_eq!(bits, vec![0, 1, 3]);
251    }
252
253    #[test]
254    fn bitmap_candidates_for_comparison_uses_tag_existence() {
255        let (idx, max_id) = build_test_index();
256        let ast = parse_filter("geoCity == \"Richmond\"").unwrap();
257        let bm = bitmap_candidates(&ast, &idx, max_id);
258        // Comparison on a single-segment path uses tag existence bitmap.
259        assert!(bm.is_some());
260        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm.unwrap()).collect();
261        assert_eq!(bits, vec![3]);
262    }
263
264    #[test]
265    fn bitmap_candidates_returns_none_for_multi_segment_path() {
266        let (idx, max_id) = build_test_index();
267        let ast = parse_filter("siteRef->area").unwrap();
268        let bm = bitmap_candidates(&ast, &idx, max_id);
269        assert!(bm.is_none());
270    }
271
272    #[test]
273    fn bitmap_candidates_returns_none_for_spec_match() {
274        let (idx, max_id) = build_test_index();
275        let ast = parse_filter("ph::Site").unwrap();
276        let bm = bitmap_candidates(&ast, &idx, max_id);
277        assert!(bm.is_none());
278    }
279
280    #[test]
281    fn and_with_one_side_optimizable() {
282        let (idx, max_id) = build_test_index();
283        // "site" is optimizable, "dis == \"hello\"" is a comparison.
284        let ast = parse_filter("site and dis == \"hello\"").unwrap();
285        let bm = bitmap_candidates(&ast, &idx, max_id);
286        // Should still produce a bitmap (intersection of site and dis existence).
287        assert!(bm.is_some());
288    }
289
290    #[test]
291    fn or_with_one_side_not_optimizable() {
292        let (idx, max_id) = build_test_index();
293        // "site" optimizable, "siteRef->area" is not.
294        let ast = parse_filter("site or siteRef->area").unwrap();
295        let bm = bitmap_candidates(&ast, &idx, max_id);
296        // Cannot optimize OR if one side is unknown.
297        assert!(bm.is_none());
298    }
299
300    #[test]
301    fn empty_index() {
302        let idx = TagBitmapIndex::new();
303        let ast = parse_filter("site").unwrap();
304        let bm = bitmap_candidates(&ast, &idx, 0);
305        // No bitmap for unknown tag.
306        assert!(bm.is_none());
307    }
308
309    #[test]
310    fn missing_unknown_tag_returns_all() {
311        let (idx, max_id) = build_test_index();
312        let ast = parse_filter("not unknownTag").unwrap();
313        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
314        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
315        // All 4 entities should match.
316        assert_eq!(bits, vec![0, 1, 2, 3]);
317    }
318}