Skip to main content

haystack_core/graph/
query_planner.rs

1// Query planner — bitmap-accelerated filter evaluation.
2//
3// Two-phase approach:
4// 1. **Bitmap phase**: For Has/Missing nodes, combine tag bitmaps with
5//    set operations (AND/OR/NOT) to produce a candidate bitset.
6// 2. **Scan phase**: Iterate set bits from candidates, evaluate the
7//    full filter AST on each candidate entity.
8//
9// Returns `None` when bitmap optimization is not applicable (e.g.
10// comparison nodes), causing the caller to fall back to a full scan.
11
12use crate::filter::{CmpOp, FilterNode};
13
14use super::bitmap::TagBitmapIndex;
15use super::value_index::ValueIndex;
16
17/// Compute a bitmap of candidate entity ids that *may* match the given
18/// filter node, using tag presence bitmaps only.
19///
20/// Returns `None` if the filter contains terms that cannot be resolved
21/// via bitmap (e.g. comparison operators), meaning a full scan is needed.
22pub fn bitmap_candidates(
23    node: &FilterNode,
24    tag_index: &TagBitmapIndex,
25    max_id: usize,
26) -> Option<Vec<u64>> {
27    match node {
28        FilterNode::Has(path) => {
29            if !path.is_single() {
30                return None; // multi-segment paths need entity resolution
31            }
32            let bm = tag_index.has_tag(path.first())?;
33            Some(bm.clone())
34        }
35
36        FilterNode::Missing(path) => {
37            if !path.is_single() {
38                return None;
39            }
40            match tag_index.has_tag(path.first()) {
41                Some(bm) => Some(TagBitmapIndex::negate(bm, max_id)),
42                None => {
43                    // Tag never seen: all entities match "missing".
44                    if max_id == 0 {
45                        Some(Vec::new())
46                    } else {
47                        Some(TagBitmapIndex::negate(&[], max_id))
48                    }
49                }
50            }
51        }
52
53        FilterNode::And(left, right) => {
54            let l = bitmap_candidates(left, tag_index, max_id);
55            let r = bitmap_candidates(right, tag_index, max_id);
56            match (l, r) {
57                (Some(lb), Some(rb)) => {
58                    // Selectivity optimization: order by popcount.
59                    let lc = TagBitmapIndex::count_ones(&lb);
60                    let rc = TagBitmapIndex::count_ones(&rb);
61                    if lc == 0 || rc == 0 {
62                        // Short-circuit: one side is empty.
63                        return Some(Vec::new());
64                    }
65                    if lc <= rc {
66                        Some(TagBitmapIndex::intersect(&[&lb, &rb]))
67                    } else {
68                        Some(TagBitmapIndex::intersect(&[&rb, &lb]))
69                    }
70                }
71                // If only one side is optimizable, use it as the candidate
72                // set; the other side will be checked during the scan phase.
73                (Some(bm), None) | (None, Some(bm)) => Some(bm),
74                (None, None) => None,
75            }
76        }
77
78        FilterNode::Or(left, right) => {
79            let l = bitmap_candidates(left, tag_index, max_id);
80            let r = bitmap_candidates(right, tag_index, max_id);
81            match (l, r) {
82                (Some(lb), Some(rb)) => Some(TagBitmapIndex::union(&[&lb, &rb])),
83                // If either side is not optimizable, we cannot narrow.
84                _ => None,
85            }
86        }
87
88        FilterNode::Cmp { path, .. } => {
89            // Comparison nodes cannot be resolved via bitmap, but we can
90            // still use the tag existence bitmap to prune candidates.
91            if path.is_single() {
92                tag_index.has_tag(path.first()).cloned()
93            } else {
94                None
95            }
96        }
97
98        FilterNode::SpecMatch(_) => None,
99    }
100}
101
102/// Enhanced candidate computation that uses B-Tree value indexes for Cmp nodes
103/// in addition to tag bitmap indexes.
104///
105/// When a `Cmp` node references a single-segment path with an available value
106/// index, this produces a precise candidate set (only entities whose value
107/// satisfies the comparison) rather than just tag-existence candidates.
108///
109/// Falls back to `bitmap_candidates` behavior for non-indexed fields.
110pub fn bitmap_candidates_with_values(
111    node: &FilterNode,
112    tag_index: &TagBitmapIndex,
113    value_index: &ValueIndex,
114    max_id: usize,
115) -> Option<Vec<u64>> {
116    match node {
117        FilterNode::Cmp { path, op, val }
118            if path.is_single() && value_index.has_index(path.first()) =>
119        {
120            let field = path.first();
121            let entity_ids = match op {
122                CmpOp::Eq => value_index.eq_lookup(field, val),
123                CmpOp::Ne => value_index.ne_lookup(field, val),
124                CmpOp::Lt => value_index.lt_lookup(field, val),
125                CmpOp::Le => value_index.le_lookup(field, val),
126                CmpOp::Gt => value_index.gt_lookup(field, val),
127                CmpOp::Ge => value_index.ge_lookup(field, val),
128            };
129
130            // Convert entity IDs to a bitmap.
131            let mut bitmap = vec![0u64; (max_id + 63) / 64];
132            for eid in entity_ids {
133                if eid < max_id {
134                    bitmap[eid / 64] |= 1u64 << (eid % 64);
135                }
136            }
137            Some(bitmap)
138        }
139
140        FilterNode::And(left, right) => {
141            let l = bitmap_candidates_with_values(left, tag_index, value_index, max_id);
142            let r = bitmap_candidates_with_values(right, tag_index, value_index, max_id);
143            match (l, r) {
144                (Some(lb), Some(rb)) => {
145                    let lc = TagBitmapIndex::count_ones(&lb);
146                    let rc = TagBitmapIndex::count_ones(&rb);
147                    if lc == 0 || rc == 0 {
148                        return Some(Vec::new());
149                    }
150                    if lc <= rc {
151                        Some(TagBitmapIndex::intersect(&[&lb, &rb]))
152                    } else {
153                        Some(TagBitmapIndex::intersect(&[&rb, &lb]))
154                    }
155                }
156                (Some(bm), None) | (None, Some(bm)) => Some(bm),
157                (None, None) => None,
158            }
159        }
160
161        FilterNode::Or(left, right) => {
162            let l = bitmap_candidates_with_values(left, tag_index, value_index, max_id);
163            let r = bitmap_candidates_with_values(right, tag_index, value_index, max_id);
164            match (l, r) {
165                (Some(lb), Some(rb)) => Some(TagBitmapIndex::union(&[&lb, &rb])),
166                _ => None,
167            }
168        }
169
170        // Delegate to base bitmap_candidates for all other node types.
171        _ => bitmap_candidates(node, tag_index, max_id),
172    }
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178    use crate::filter::parse_filter;
179
180    fn build_test_index() -> (TagBitmapIndex, usize) {
181        let mut idx = TagBitmapIndex::new();
182        // entity 0: site
183        idx.add(0, &["site".into(), "dis".into()]);
184        // entity 1: equip
185        idx.add(1, &["equip".into(), "dis".into()]);
186        // entity 2: point, sensor
187        idx.add(2, &["point".into(), "sensor".into(), "dis".into()]);
188        // entity 3: site, geoCity
189        idx.add(3, &["site".into(), "geoCity".into(), "dis".into()]);
190        (idx, 4) // max_id = 4
191    }
192
193    #[test]
194    fn bitmap_candidates_for_has() {
195        let (idx, max_id) = build_test_index();
196        let ast = parse_filter("site").unwrap();
197        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
198        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
199        assert_eq!(bits, vec![0, 3]);
200    }
201
202    #[test]
203    fn bitmap_candidates_for_missing() {
204        let (idx, max_id) = build_test_index();
205        let ast = parse_filter("not site").unwrap();
206        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
207        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
208        assert_eq!(bits, vec![1, 2]);
209    }
210
211    #[test]
212    fn bitmap_candidates_for_and() {
213        let (idx, max_id) = build_test_index();
214        let ast = parse_filter("site and geoCity").unwrap();
215        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
216        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
217        assert_eq!(bits, vec![3]);
218    }
219
220    #[test]
221    fn bitmap_candidates_for_or() {
222        let (idx, max_id) = build_test_index();
223        let ast = parse_filter("site or equip").unwrap();
224        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
225        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
226        assert_eq!(bits, vec![0, 1, 3]);
227    }
228
229    #[test]
230    fn bitmap_candidates_for_comparison_uses_tag_existence() {
231        let (idx, max_id) = build_test_index();
232        let ast = parse_filter("geoCity == \"Richmond\"").unwrap();
233        let bm = bitmap_candidates(&ast, &idx, max_id);
234        // Comparison on a single-segment path uses tag existence bitmap.
235        assert!(bm.is_some());
236        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm.unwrap()).collect();
237        assert_eq!(bits, vec![3]);
238    }
239
240    #[test]
241    fn bitmap_candidates_returns_none_for_multi_segment_path() {
242        let (idx, max_id) = build_test_index();
243        let ast = parse_filter("siteRef->area").unwrap();
244        let bm = bitmap_candidates(&ast, &idx, max_id);
245        assert!(bm.is_none());
246    }
247
248    #[test]
249    fn bitmap_candidates_returns_none_for_spec_match() {
250        let (idx, max_id) = build_test_index();
251        let ast = parse_filter("ph::Site").unwrap();
252        let bm = bitmap_candidates(&ast, &idx, max_id);
253        assert!(bm.is_none());
254    }
255
256    #[test]
257    fn and_with_one_side_optimizable() {
258        let (idx, max_id) = build_test_index();
259        // "site" is optimizable, "dis == \"hello\"" is a comparison.
260        let ast = parse_filter("site and dis == \"hello\"").unwrap();
261        let bm = bitmap_candidates(&ast, &idx, max_id);
262        // Should still produce a bitmap (intersection of site and dis existence).
263        assert!(bm.is_some());
264    }
265
266    #[test]
267    fn or_with_one_side_not_optimizable() {
268        let (idx, max_id) = build_test_index();
269        // "site" optimizable, "siteRef->area" is not.
270        let ast = parse_filter("site or siteRef->area").unwrap();
271        let bm = bitmap_candidates(&ast, &idx, max_id);
272        // Cannot optimize OR if one side is unknown.
273        assert!(bm.is_none());
274    }
275
276    #[test]
277    fn empty_index() {
278        let idx = TagBitmapIndex::new();
279        let ast = parse_filter("site").unwrap();
280        let bm = bitmap_candidates(&ast, &idx, 0);
281        // No bitmap for unknown tag.
282        assert!(bm.is_none());
283    }
284
285    #[test]
286    fn missing_unknown_tag_returns_all() {
287        let (idx, max_id) = build_test_index();
288        let ast = parse_filter("not unknownTag").unwrap();
289        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
290        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
291        // All 4 entities should match.
292        assert_eq!(bits, vec![0, 1, 2, 3]);
293    }
294}