Skip to main content

haystack_core/graph/
bitmap.rs

1// Tag bitmap index — roaring bitmap-based tag-presence queries.
2
3use roaring::RoaringBitmap;
4use std::collections::HashMap;
5
6/// Tag-presence bitmap index.
7///
8/// Maps tag names to compressed roaring bitmaps. Each entity is assigned
9/// a numeric id; the bit at position `id` is set in every bitmap for every
10/// tag the entity carries.
11pub struct TagBitmapIndex {
12    bitmaps: HashMap<String, RoaringBitmap>,
13}
14
15impl TagBitmapIndex {
16    pub fn new() -> Self {
17        Self {
18            bitmaps: HashMap::new(),
19        }
20    }
21
22    /// Add an entity's tags to the index.
23    pub fn add(&mut self, entity_id: usize, tags: &[String]) {
24        let eid = entity_id as u32;
25        for tag in tags {
26            self.bitmaps.entry(tag.clone()).or_default().insert(eid);
27        }
28    }
29
30    /// Remove an entity from the given tag bitmaps.
31    pub fn remove(&mut self, entity_id: usize, tags: &[String]) {
32        let eid = entity_id as u32;
33        for tag in tags {
34            if let Some(bm) = self.bitmaps.get_mut(tag.as_str()) {
35                bm.remove(eid);
36            }
37        }
38    }
39
40    /// Get the bitmap for a tag, if it exists.
41    pub fn has_tag(&self, tag: &str) -> Option<&RoaringBitmap> {
42        self.bitmaps.get(tag)
43    }
44
45    /// Bitwise AND of multiple bitmaps.
46    pub fn intersect(bitmaps: &[&RoaringBitmap]) -> RoaringBitmap {
47        if bitmaps.is_empty() {
48            return RoaringBitmap::new();
49        }
50        let mut result = bitmaps[0].clone();
51        for bm in &bitmaps[1..] {
52            result &= *bm;
53        }
54        result
55    }
56
57    /// Bitwise OR of multiple bitmaps.
58    pub fn union(bitmaps: &[&RoaringBitmap]) -> RoaringBitmap {
59        let mut result = RoaringBitmap::new();
60        for bm in bitmaps {
61            result |= *bm;
62        }
63        result
64    }
65
66    /// Bitwise NOT of a bitmap, limited to max_id bits.
67    pub fn negate(bitmap: &RoaringBitmap, max_id: usize) -> RoaringBitmap {
68        if max_id == 0 {
69            return RoaringBitmap::new();
70        }
71        let mut all = RoaringBitmap::from_iter(0..max_id as u32);
72        all -= bitmap;
73        all
74    }
75
76    /// Iterate over set bit positions.
77    pub fn iter_set_bits(bitmap: &RoaringBitmap) -> impl Iterator<Item = usize> + '_ {
78        bitmap.iter().map(|x| x as usize)
79    }
80
81    /// Count the number of set bits.
82    pub fn count_ones(bitmap: &RoaringBitmap) -> usize {
83        bitmap.len() as usize
84    }
85}
86
87impl Default for TagBitmapIndex {
88    fn default() -> Self {
89        Self::new()
90    }
91}
92
93#[cfg(test)]
94mod tests {
95    use super::*;
96
97    #[test]
98    fn set_and_check_individual_bits() {
99        let mut idx = TagBitmapIndex::new();
100        idx.add(0, &["site".into()]);
101        idx.add(5, &["site".into()]);
102        idx.add(63, &["site".into()]);
103        idx.add(64, &["site".into()]);
104
105        let bm = idx.has_tag("site").unwrap();
106        assert!(bm.contains(0));
107        assert!(bm.contains(5));
108        assert!(bm.contains(63));
109        assert!(bm.contains(64));
110    }
111
112    #[test]
113    fn add_remove_tag_tracking() {
114        let mut idx = TagBitmapIndex::new();
115        idx.add(3, &["equip".into(), "ahu".into()]);
116        assert!(idx.has_tag("equip").is_some());
117        assert!(idx.has_tag("ahu").is_some());
118
119        idx.remove(3, &["equip".into(), "ahu".into()]);
120        let bm = idx.has_tag("equip").unwrap();
121        assert_eq!(TagBitmapIndex::count_ones(bm), 0);
122    }
123
124    #[test]
125    fn intersect_multiple_bitmaps() {
126        let mut idx = TagBitmapIndex::new();
127        idx.add(1, &["site".into(), "equip".into()]);
128        idx.add(2, &["site".into()]);
129        idx.add(3, &["equip".into()]);
130
131        let site_bm = idx.has_tag("site").unwrap();
132        let equip_bm = idx.has_tag("equip").unwrap();
133        let result = TagBitmapIndex::intersect(&[site_bm, equip_bm]);
134
135        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&result).collect();
136        assert_eq!(bits, vec![1]);
137    }
138
139    #[test]
140    fn union_multiple_bitmaps() {
141        let mut idx = TagBitmapIndex::new();
142        idx.add(1, &["site".into()]);
143        idx.add(3, &["equip".into()]);
144
145        let site_bm = idx.has_tag("site").unwrap();
146        let equip_bm = idx.has_tag("equip").unwrap();
147        let result = TagBitmapIndex::union(&[site_bm, equip_bm]);
148
149        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&result).collect();
150        assert_eq!(bits, vec![1, 3]);
151    }
152
153    #[test]
154    fn negate_bitmap() {
155        let mut bm = RoaringBitmap::new();
156        bm.insert(1);
157        bm.insert(3);
158        let negated = TagBitmapIndex::negate(&bm, 5);
159        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&negated).collect();
160        assert_eq!(bits, vec![0, 2, 4]);
161    }
162
163    #[test]
164    fn iterate_set_bits() {
165        let mut bm = RoaringBitmap::new();
166        bm.insert(0);
167        bm.insert(2);
168        bm.insert(4);
169        bm.insert(128);
170        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bm).collect();
171        assert_eq!(bits, vec![0, 2, 4, 128]);
172    }
173
174    #[test]
175    fn auto_grow_capacity() {
176        let mut idx = TagBitmapIndex::new();
177        idx.add(2000, &["far_away".into()]);
178        let bm = idx.has_tag("far_away").unwrap();
179        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(bm).collect();
180        assert_eq!(bits, vec![2000]);
181    }
182
183    #[test]
184    fn count_ones_popcount() {
185        let mut bm = RoaringBitmap::new();
186        for i in [0, 2, 3, 4, 64, 65] {
187            bm.insert(i);
188        }
189        assert_eq!(TagBitmapIndex::count_ones(&bm), 6);
190    }
191
192    #[test]
193    fn empty_bitmap_operations() {
194        assert_eq!(TagBitmapIndex::intersect(&[]).len(), 0);
195        assert_eq!(TagBitmapIndex::union(&[]).len(), 0);
196        assert_eq!(TagBitmapIndex::negate(&RoaringBitmap::new(), 0).len(), 0);
197        assert_eq!(TagBitmapIndex::count_ones(&RoaringBitmap::new()), 0);
198    }
199
200    #[test]
201    fn unknown_tag_returns_none() {
202        let idx = TagBitmapIndex::new();
203        assert!(idx.has_tag("nonexistent").is_none());
204    }
205
206    #[test]
207    fn remove_nonexistent_entity_is_noop() {
208        let mut idx = TagBitmapIndex::new();
209        idx.remove(999, &["site".into()]);
210    }
211
212    #[test]
213    fn intersect_single_bitmap() {
214        let mut bm = RoaringBitmap::new();
215        bm.insert(0);
216        bm.insert(1);
217        bm.insert(2);
218        let result = TagBitmapIndex::intersect(&[&bm]);
219        assert_eq!(result.len(), 3);
220    }
221
222    #[test]
223    fn negate_exact_word_boundary() {
224        let bm = RoaringBitmap::from_iter(0..64u32);
225        let negated = TagBitmapIndex::negate(&bm, 64);
226        assert_eq!(TagBitmapIndex::count_ones(&negated), 0);
227    }
228}