Skip to main content

haystack_core/graph/
query_planner.rs

1// Query planner — bitmap-accelerated filter evaluation.
2//
3// Two-phase approach:
4// 1. **Bitmap phase**: For Has/Missing nodes, combine tag bitmaps with
5//    set operations (AND/OR/NOT) to produce a candidate bitset.
6// 2. **Scan phase**: Iterate set bits from candidates, evaluate the
7//    full filter AST on each candidate entity.
8//
9// Returns `None` when bitmap optimization is not applicable (e.g.
10// comparison nodes), causing the caller to fall back to a full scan.
11
12use crate::filter::FilterNode;
13
14use super::bitmap::TagBitmapIndex;
15
16/// Compute a bitmap of candidate entity ids that *may* match the given
17/// filter node, using tag presence bitmaps only.
18///
19/// Returns `None` if the filter contains terms that cannot be resolved
20/// via bitmap (e.g. comparison operators), meaning a full scan is needed.
21pub fn bitmap_candidates(
22    node: &FilterNode,
23    tag_index: &TagBitmapIndex,
24    max_id: usize,
25) -> Option<Vec<u64>> {
26    match node {
27        FilterNode::Has(path) => {
28            if !path.is_single() {
29                return None; // multi-segment paths need entity resolution
30            }
31            let bm = tag_index.has_tag(path.first())?;
32            Some(bm.clone())
33        }
34
35        FilterNode::Missing(path) => {
36            if !path.is_single() {
37                return None;
38            }
39            match tag_index.has_tag(path.first()) {
40                Some(bm) => Some(TagBitmapIndex::negate(bm, max_id)),
41                None => {
42                    // Tag never seen: all entities match "missing".
43                    if max_id == 0 {
44                        Some(Vec::new())
45                    } else {
46                        Some(TagBitmapIndex::negate(&[], max_id))
47                    }
48                }
49            }
50        }
51
52        FilterNode::And(left, right) => {
53            let l = bitmap_candidates(left, tag_index, max_id);
54            let r = bitmap_candidates(right, tag_index, max_id);
55            match (l, r) {
56                (Some(lb), Some(rb)) => {
57                    // Selectivity optimization: order by popcount.
58                    let lc = TagBitmapIndex::count_ones(&lb);
59                    let rc = TagBitmapIndex::count_ones(&rb);
60                    if lc == 0 || rc == 0 {
61                        // Short-circuit: one side is empty.
62                        return Some(Vec::new());
63                    }
64                    if lc <= rc {
65                        Some(TagBitmapIndex::intersect(&[&lb, &rb]))
66                    } else {
67                        Some(TagBitmapIndex::intersect(&[&rb, &lb]))
68                    }
69                }
70                // If only one side is optimizable, use it as the candidate
71                // set; the other side will be checked during the scan phase.
72                (Some(bm), None) | (None, Some(bm)) => Some(bm),
73                (None, None) => None,
74            }
75        }
76
77        FilterNode::Or(left, right) => {
78            let l = bitmap_candidates(left, tag_index, max_id);
79            let r = bitmap_candidates(right, tag_index, max_id);
80            match (l, r) {
81                (Some(lb), Some(rb)) => Some(TagBitmapIndex::union(&[&lb, &rb])),
82                // If either side is not optimizable, we cannot narrow.
83                _ => None,
84            }
85        }
86
87        FilterNode::Cmp { path, .. } => {
88            // Comparison nodes cannot be resolved via bitmap, but we can
89            // still use the tag existence bitmap to prune candidates.
90            if path.is_single() {
91                tag_index.has_tag(path.first()).cloned()
92            } else {
93                None
94            }
95        }
96
97        FilterNode::SpecMatch(_) => None,
98    }
99}
100
101#[cfg(test)]
102mod tests {
103    use super::*;
104    use crate::filter::parse_filter;
105
106    fn build_test_index() -> (TagBitmapIndex, usize) {
107        let mut idx = TagBitmapIndex::new();
108        // entity 0: site
109        idx.add(0, &["site".into(), "dis".into()]);
110        // entity 1: equip
111        idx.add(1, &["equip".into(), "dis".into()]);
112        // entity 2: point, sensor
113        idx.add(2, &["point".into(), "sensor".into(), "dis".into()]);
114        // entity 3: site, geoCity
115        idx.add(3, &["site".into(), "geoCity".into(), "dis".into()]);
116        (idx, 4) // max_id = 4
117    }
118
119    #[test]
120    fn bitmap_candidates_for_has() {
121        let (idx, max_id) = build_test_index();
122        let ast = parse_filter("site").unwrap();
123        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
124        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
125        assert_eq!(bits, vec![0, 3]);
126    }
127
128    #[test]
129    fn bitmap_candidates_for_missing() {
130        let (idx, max_id) = build_test_index();
131        let ast = parse_filter("not site").unwrap();
132        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
133        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
134        assert_eq!(bits, vec![1, 2]);
135    }
136
137    #[test]
138    fn bitmap_candidates_for_and() {
139        let (idx, max_id) = build_test_index();
140        let ast = parse_filter("site and geoCity").unwrap();
141        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
142        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
143        assert_eq!(bits, vec![3]);
144    }
145
146    #[test]
147    fn bitmap_candidates_for_or() {
148        let (idx, max_id) = build_test_index();
149        let ast = parse_filter("site or equip").unwrap();
150        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
151        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
152        assert_eq!(bits, vec![0, 1, 3]);
153    }
154
155    #[test]
156    fn bitmap_candidates_for_comparison_uses_tag_existence() {
157        let (idx, max_id) = build_test_index();
158        let ast = parse_filter("geoCity == \"Richmond\"").unwrap();
159        let bm = bitmap_candidates(&ast, &idx, max_id);
160        // Comparison on a single-segment path uses tag existence bitmap.
161        assert!(bm.is_some());
162        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm.unwrap()).collect();
163        assert_eq!(bits, vec![3]);
164    }
165
166    #[test]
167    fn bitmap_candidates_returns_none_for_multi_segment_path() {
168        let (idx, max_id) = build_test_index();
169        let ast = parse_filter("siteRef->area").unwrap();
170        let bm = bitmap_candidates(&ast, &idx, max_id);
171        assert!(bm.is_none());
172    }
173
174    #[test]
175    fn bitmap_candidates_returns_none_for_spec_match() {
176        let (idx, max_id) = build_test_index();
177        let ast = parse_filter("ph::Site").unwrap();
178        let bm = bitmap_candidates(&ast, &idx, max_id);
179        assert!(bm.is_none());
180    }
181
182    #[test]
183    fn and_with_one_side_optimizable() {
184        let (idx, max_id) = build_test_index();
185        // "site" is optimizable, "dis == \"hello\"" is a comparison.
186        let ast = parse_filter("site and dis == \"hello\"").unwrap();
187        let bm = bitmap_candidates(&ast, &idx, max_id);
188        // Should still produce a bitmap (intersection of site and dis existence).
189        assert!(bm.is_some());
190    }
191
192    #[test]
193    fn or_with_one_side_not_optimizable() {
194        let (idx, max_id) = build_test_index();
195        // "site" optimizable, "siteRef->area" is not.
196        let ast = parse_filter("site or siteRef->area").unwrap();
197        let bm = bitmap_candidates(&ast, &idx, max_id);
198        // Cannot optimize OR if one side is unknown.
199        assert!(bm.is_none());
200    }
201
202    #[test]
203    fn empty_index() {
204        let idx = TagBitmapIndex::new();
205        let ast = parse_filter("site").unwrap();
206        let bm = bitmap_candidates(&ast, &idx, 0);
207        // No bitmap for unknown tag.
208        assert!(bm.is_none());
209    }
210
211    #[test]
212    fn missing_unknown_tag_returns_all() {
213        let (idx, max_id) = build_test_index();
214        let ast = parse_filter("not unknownTag").unwrap();
215        let bm = bitmap_candidates(&ast, &idx, max_id).unwrap();
216        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
217        // All 4 entities should match.
218        assert_eq!(bits, vec![0, 1, 2, 3]);
219    }
220}