Skip to main content

haystack_core/graph/
bitmap.rs

1// Tag bitmap index — roaring bitmap-based tag-presence queries.
2
3use roaring::RoaringBitmap;
4use std::collections::HashMap;
5
6/// Tag-presence bitmap index.
7///
8/// Maps tag names to compressed roaring bitmaps. Each entity is assigned
9/// a numeric id; the bit at position `id` is set in every bitmap for every
10/// tag the entity carries.
11pub struct TagBitmapIndex {
12    bitmaps: HashMap<String, RoaringBitmap>,
13}
14
15impl TagBitmapIndex {
16    pub fn new() -> Self {
17        Self {
18            bitmaps: HashMap::new(),
19        }
20    }
21
22    /// Add an entity's tags to the index.
23    pub fn add(&mut self, entity_id: usize, tags: &[String]) {
24        let eid = u32::try_from(entity_id).expect("entity_id exceeds u32::MAX");
25        for tag in tags {
26            self.bitmaps.entry(tag.clone()).or_default().insert(eid);
27        }
28    }
29
30    /// Remove an entity from the given tag bitmaps.
31    ///
32    /// Empty bitmaps are cleaned up to avoid unbounded growth of the map.
33    pub fn remove(&mut self, entity_id: usize, tags: &[String]) {
34        let eid = u32::try_from(entity_id).expect("entity_id exceeds u32::MAX");
35        for tag in tags {
36            if let Some(bm) = self.bitmaps.get_mut(tag.as_str()) {
37                bm.remove(eid);
38            }
39        }
40        self.bitmaps.retain(|_, bm| !bm.is_empty());
41    }
42
43    /// Get the bitmap for a tag, if it exists.
44    pub fn has_tag(&self, tag: &str) -> Option<&RoaringBitmap> {
45        self.bitmaps.get(tag)
46    }
47
48    /// Bitwise AND of multiple bitmaps.
49    pub fn intersect(bitmaps: &[&RoaringBitmap]) -> RoaringBitmap {
50        if bitmaps.is_empty() {
51            return RoaringBitmap::new();
52        }
53        let mut result = bitmaps[0].clone();
54        for bm in &bitmaps[1..] {
55            result &= *bm;
56        }
57        result
58    }
59
60    /// Bitwise OR of multiple bitmaps.
61    pub fn union(bitmaps: &[&RoaringBitmap]) -> RoaringBitmap {
62        let mut result = RoaringBitmap::new();
63        for bm in bitmaps {
64            result |= *bm;
65        }
66        result
67    }
68
69    /// Bitwise NOT of a bitmap relative to a universe of live entity IDs.
70    pub fn negate(bitmap: &RoaringBitmap, universe: &RoaringBitmap) -> RoaringBitmap {
71        let mut result = universe.clone();
72        result -= bitmap;
73        result
74    }
75
76    /// Iterate over set bit positions.
77    pub fn iter_set_bits(bitmap: &RoaringBitmap) -> impl Iterator<Item = usize> + '_ {
78        bitmap.iter().map(|x| x as usize)
79    }
80
81    /// Count the number of set bits.
82    pub fn count_ones(bitmap: &RoaringBitmap) -> usize {
83        bitmap.len() as usize
84    }
85}
86
87impl Default for TagBitmapIndex {
88    fn default() -> Self {
89        Self::new()
90    }
91}
92
93#[cfg(test)]
94mod tests {
95    use super::*;
96
97    #[test]
98    fn set_and_check_individual_bits() {
99        let mut idx = TagBitmapIndex::new();
100        idx.add(0, &["site".into()]);
101        idx.add(5, &["site".into()]);
102        idx.add(63, &["site".into()]);
103        idx.add(64, &["site".into()]);
104
105        let bm = idx.has_tag("site").unwrap();
106        assert!(bm.contains(0));
107        assert!(bm.contains(5));
108        assert!(bm.contains(63));
109        assert!(bm.contains(64));
110    }
111
112    #[test]
113    fn add_remove_tag_tracking() {
114        let mut idx = TagBitmapIndex::new();
115        idx.add(3, &["equip".into(), "ahu".into()]);
116        assert!(idx.has_tag("equip").is_some());
117        assert!(idx.has_tag("ahu").is_some());
118
119        idx.remove(3, &["equip".into(), "ahu".into()]);
120        // Empty bitmaps are cleaned up after removal.
121        assert!(idx.has_tag("equip").is_none());
122        assert!(idx.has_tag("ahu").is_none());
123    }
124
125    #[test]
126    fn intersect_multiple_bitmaps() {
127        let mut idx = TagBitmapIndex::new();
128        idx.add(1, &["site".into(), "equip".into()]);
129        idx.add(2, &["site".into()]);
130        idx.add(3, &["equip".into()]);
131
132        let site_bm = idx.has_tag("site").unwrap();
133        let equip_bm = idx.has_tag("equip").unwrap();
134        let result = TagBitmapIndex::intersect(&[site_bm, equip_bm]);
135
136        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&result).collect();
137        assert_eq!(bits, vec![1]);
138    }
139
140    #[test]
141    fn union_multiple_bitmaps() {
142        let mut idx = TagBitmapIndex::new();
143        idx.add(1, &["site".into()]);
144        idx.add(3, &["equip".into()]);
145
146        let site_bm = idx.has_tag("site").unwrap();
147        let equip_bm = idx.has_tag("equip").unwrap();
148        let result = TagBitmapIndex::union(&[site_bm, equip_bm]);
149
150        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&result).collect();
151        assert_eq!(bits, vec![1, 3]);
152    }
153
154    #[test]
155    fn negate_bitmap() {
156        let mut bm = RoaringBitmap::new();
157        bm.insert(1);
158        bm.insert(3);
159        let universe = RoaringBitmap::from_iter(0..5u32);
160        let negated = TagBitmapIndex::negate(&bm, &universe);
161        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&negated).collect();
162        assert_eq!(bits, vec![0, 2, 4]);
163    }
164
165    #[test]
166    fn iterate_set_bits() {
167        let mut bm = RoaringBitmap::new();
168        bm.insert(0);
169        bm.insert(2);
170        bm.insert(4);
171        bm.insert(128);
172        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
173        assert_eq!(bits, vec![0, 2, 4, 128]);
174    }
175
176    #[test]
177    fn auto_grow_capacity() {
178        let mut idx = TagBitmapIndex::new();
179        idx.add(2000, &["far_away".into()]);
180        let bm = idx.has_tag("far_away").unwrap();
181        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(bm).collect();
182        assert_eq!(bits, vec![2000]);
183    }
184
185    #[test]
186    fn count_ones_popcount() {
187        let mut bm = RoaringBitmap::new();
188        for i in [0, 2, 3, 4, 64, 65] {
189            bm.insert(i);
190        }
191        assert_eq!(TagBitmapIndex::count_ones(&bm), 6);
192    }
193
194    #[test]
195    fn empty_bitmap_operations() {
196        assert_eq!(TagBitmapIndex::intersect(&[]).len(), 0);
197        assert_eq!(TagBitmapIndex::union(&[]).len(), 0);
198        assert_eq!(
199            TagBitmapIndex::negate(&RoaringBitmap::new(), &RoaringBitmap::new()).len(),
200            0
201        );
202        assert_eq!(TagBitmapIndex::count_ones(&RoaringBitmap::new()), 0);
203    }
204
205    #[test]
206    fn unknown_tag_returns_none() {
207        let idx = TagBitmapIndex::new();
208        assert!(idx.has_tag("nonexistent").is_none());
209    }
210
211    #[test]
212    fn remove_nonexistent_entity_is_noop() {
213        let mut idx = TagBitmapIndex::new();
214        idx.remove(999, &["site".into()]);
215    }
216
217    #[test]
218    fn intersect_single_bitmap() {
219        let mut bm = RoaringBitmap::new();
220        bm.insert(0);
221        bm.insert(1);
222        bm.insert(2);
223        let result = TagBitmapIndex::intersect(&[&bm]);
224        assert_eq!(result.len(), 3);
225    }
226
227    #[test]
228    fn negate_exact_word_boundary() {
229        let bm = RoaringBitmap::from_iter(0..64u32);
230        let universe = RoaringBitmap::from_iter(0..64u32);
231        let negated = TagBitmapIndex::negate(&bm, &universe);
232        assert_eq!(TagBitmapIndex::count_ones(&negated), 0);
233    }
234}