Skip to main content

haystack_core/graph/
bitmap.rs

1// Tag bitmap index — Vec<u64> bitsets for fast tag-presence queries.
2
3use std::collections::HashMap;
4
5/// Initial capacity in u64 words (covers 1024 entities).
6const INITIAL_WORDS: usize = 16;
7
8/// Tag-presence bitmap index.
9///
10/// Maps tag names to bitsets stored as `Vec<u64>`. Each entity is assigned
11/// a numeric id; the bit at position `id` is set in every bitmap for every
12/// tag the entity carries.
13pub struct TagBitmapIndex {
14    /// tag_name -> bitmap (Vec<u64>)
15    bitmaps: HashMap<String, Vec<u64>>,
16    /// Number of u64 words currently allocated per bitmap.
17    capacity: usize,
18}
19
20impl TagBitmapIndex {
21    /// Create a new empty index.
22    pub fn new() -> Self {
23        Self {
24            bitmaps: HashMap::new(),
25            capacity: INITIAL_WORDS,
26        }
27    }
28
29    /// Add an entity's tags to the index.
30    ///
31    /// Sets the bit at `entity_id` in each tag's bitmap.
32    pub fn add(&mut self, entity_id: usize, tags: &[String]) {
33        self.ensure_capacity(entity_id);
34        let word_idx = entity_id / 64;
35        let bit = 1u64 << (entity_id % 64);
36
37        for tag in tags {
38            let bm = self
39                .bitmaps
40                .entry(tag.clone())
41                .or_insert_with(|| vec![0u64; self.capacity]);
42            // Grow this bitmap if it was created before a capacity increase.
43            if bm.len() < self.capacity {
44                bm.resize(self.capacity, 0);
45            }
46            bm[word_idx] |= bit;
47        }
48    }
49
50    /// Remove an entity from the given tag bitmaps.
51    ///
52    /// Clears the bit at `entity_id` in each tag's bitmap.
53    pub fn remove(&mut self, entity_id: usize, tags: &[String]) {
54        let word_idx = entity_id / 64;
55        let bit = 1u64 << (entity_id % 64);
56
57        for tag in tags {
58            if let Some(bm) = self.bitmaps.get_mut(tag.as_str())
59                && word_idx < bm.len()
60            {
61                bm[word_idx] &= !bit;
62            }
63        }
64    }
65
66    /// Get the bitmap for a tag, if it exists.
67    pub fn has_tag(&self, tag: &str) -> Option<&Vec<u64>> {
68        self.bitmaps.get(tag)
69    }
70
71    /// Bitwise AND of multiple bitmaps.
72    ///
73    /// Returns an empty bitmap if `bitmaps` is empty.
74    pub fn intersect(bitmaps: &[&Vec<u64>]) -> Vec<u64> {
75        if bitmaps.is_empty() {
76            return Vec::new();
77        }
78        let len = bitmaps.iter().map(|b| b.len()).min().unwrap_or(0);
79        let mut result = bitmaps[0][..len].to_vec();
80        for bm in &bitmaps[1..] {
81            for (i, word) in result.iter_mut().enumerate() {
82                *word &= bm.get(i).copied().unwrap_or(0);
83            }
84        }
85        result
86    }
87
88    /// Bitwise OR of multiple bitmaps.
89    ///
90    /// Returns an empty bitmap if `bitmaps` is empty.
91    pub fn union(bitmaps: &[&Vec<u64>]) -> Vec<u64> {
92        if bitmaps.is_empty() {
93            return Vec::new();
94        }
95        let len = bitmaps.iter().map(|b| b.len()).max().unwrap_or(0);
96        let mut result = vec![0u64; len];
97        for bm in bitmaps {
98            for (i, &word) in bm.iter().enumerate() {
99                result[i] |= word;
100            }
101        }
102        result
103    }
104
105    /// Bitwise NOT of a bitmap, limited to `max_id` bits.
106    pub fn negate(bitmap: &[u64], max_id: usize) -> Vec<u64> {
107        if max_id == 0 {
108            return Vec::new();
109        }
110        let total_words = max_id.div_ceil(64);
111        let mut result = vec![0u64; total_words];
112        for (i, word) in result.iter_mut().enumerate() {
113            let src = bitmap.get(i).copied().unwrap_or(0);
114            *word = !src;
115        }
116        // Mask off bits beyond max_id in the last word.
117        let tail_bits = max_id % 64;
118        if tail_bits != 0 && !result.is_empty() {
119            let last = result.len() - 1;
120            result[last] &= (1u64 << tail_bits) - 1;
121        }
122        result
123    }
124
125    /// Iterate over positions of set bits in a bitmap.
126    pub fn iter_set_bits(bitmap: &[u64]) -> impl Iterator<Item = usize> + '_ {
127        bitmap
128            .iter()
129            .enumerate()
130            .flat_map(|(word_idx, &word)| SetBitIter {
131                word,
132                base: word_idx * 64,
133            })
134    }
135
136    /// Count the number of set bits (population count).
137    pub fn count_ones(bitmap: &[u64]) -> usize {
138        bitmap.iter().map(|w| w.count_ones() as usize).sum()
139    }
140
141    /// Ensure capacity covers `entity_id`.
142    fn ensure_capacity(&mut self, entity_id: usize) {
143        let needed = entity_id / 64 + 1;
144        if needed > self.capacity {
145            // Double until sufficient.
146            let mut new_cap = self.capacity;
147            while new_cap < needed {
148                new_cap *= 2;
149            }
150            self.capacity = new_cap;
151            // Grow all existing bitmaps.
152            for bm in self.bitmaps.values_mut() {
153                bm.resize(self.capacity, 0);
154            }
155        }
156    }
157}
158
159impl Default for TagBitmapIndex {
160    fn default() -> Self {
161        Self::new()
162    }
163}
164
165/// Iterator that yields set bit positions from a single u64 word.
166struct SetBitIter {
167    word: u64,
168    base: usize,
169}
170
171impl Iterator for SetBitIter {
172    type Item = usize;
173
174    #[inline]
175    fn next(&mut self) -> Option<usize> {
176        if self.word == 0 {
177            return None;
178        }
179        let tz = self.word.trailing_zeros() as usize;
180        // Clear the lowest set bit.
181        self.word &= self.word - 1;
182        Some(self.base + tz)
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189
190    #[test]
191    fn set_and_check_individual_bits() {
192        let mut idx = TagBitmapIndex::new();
193        idx.add(0, &["site".into()]);
194        idx.add(5, &["site".into()]);
195        idx.add(63, &["site".into()]);
196        idx.add(64, &["site".into()]);
197
198        let bm = idx.has_tag("site").unwrap();
199        assert_eq!(bm[0] & 1, 1); // bit 0
200        assert_ne!(bm[0] & (1u64 << 5), 0); // bit 5
201        assert_ne!(bm[0] & (1u64 << 63), 0); // bit 63
202        assert_ne!(bm[1] & 1, 0); // bit 64
203    }
204
205    #[test]
206    fn add_remove_tag_tracking() {
207        let mut idx = TagBitmapIndex::new();
208        idx.add(3, &["equip".into(), "ahu".into()]);
209        assert!(idx.has_tag("equip").is_some());
210        assert!(idx.has_tag("ahu").is_some());
211
212        idx.remove(3, &["equip".into(), "ahu".into()]);
213        // Bitmaps still exist but bit is cleared.
214        let bm = idx.has_tag("equip").unwrap();
215        assert_eq!(TagBitmapIndex::count_ones(bm), 0);
216    }
217
218    #[test]
219    fn intersect_multiple_bitmaps() {
220        let mut idx = TagBitmapIndex::new();
221        idx.add(1, &["site".into(), "equip".into()]);
222        idx.add(2, &["site".into()]);
223        idx.add(3, &["equip".into()]);
224
225        let site_bm = idx.has_tag("site").unwrap();
226        let equip_bm = idx.has_tag("equip").unwrap();
227        let result = TagBitmapIndex::intersect(&[site_bm, equip_bm]);
228
229        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&result).collect();
230        assert_eq!(bits, vec![1]); // only entity 1 has both
231    }
232
233    #[test]
234    fn union_multiple_bitmaps() {
235        let mut idx = TagBitmapIndex::new();
236        idx.add(1, &["site".into()]);
237        idx.add(3, &["equip".into()]);
238
239        let site_bm = idx.has_tag("site").unwrap();
240        let equip_bm = idx.has_tag("equip").unwrap();
241        let result = TagBitmapIndex::union(&[site_bm, equip_bm]);
242
243        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&result).collect();
244        assert_eq!(bits, vec![1, 3]);
245    }
246
247    #[test]
248    fn negate_bitmap() {
249        let bitmap = vec![0b1010u64]; // bits 1, 3 set
250        let negated = TagBitmapIndex::negate(&bitmap, 5);
251        // Bits 0, 2, 4 should be set (within max_id=5)
252        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&negated).collect();
253        assert_eq!(bits, vec![0, 2, 4]);
254    }
255
256    #[test]
257    fn iterate_set_bits() {
258        let bitmap = vec![0b10101u64, 0u64, 1u64]; // bits 0,2,4 in word 0; bit 128 in word 2
259        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(&bitmap).collect();
260        assert_eq!(bits, vec![0, 2, 4, 128]);
261    }
262
263    #[test]
264    fn auto_grow_capacity() {
265        let mut idx = TagBitmapIndex::new();
266        // Initial capacity is 16 words = 1024 bits.
267        // Add entity at id 2000 should trigger growth.
268        idx.add(2000, &["far_away".into()]);
269        let bm = idx.has_tag("far_away").unwrap();
270        let bits: Vec<usize> = TagBitmapIndex::iter_set_bits(bm).collect();
271        assert_eq!(bits, vec![2000]);
272    }
273
274    #[test]
275    fn count_ones_popcount() {
276        let bitmap = vec![0b11101u64, 0b11u64]; // 4 + 2 = 6
277        assert_eq!(TagBitmapIndex::count_ones(&bitmap), 6);
278    }
279
280    #[test]
281    fn empty_bitmap_operations() {
282        assert_eq!(TagBitmapIndex::intersect(&[]), Vec::<u64>::new());
283        assert_eq!(TagBitmapIndex::union(&[]), Vec::<u64>::new());
284        assert_eq!(TagBitmapIndex::negate(&[], 0), Vec::<u64>::new());
285        assert_eq!(TagBitmapIndex::count_ones(&[]), 0);
286    }
287
288    #[test]
289    fn unknown_tag_returns_none() {
290        let idx = TagBitmapIndex::new();
291        assert!(idx.has_tag("nonexistent").is_none());
292    }
293
294    #[test]
295    fn remove_nonexistent_entity_is_noop() {
296        let mut idx = TagBitmapIndex::new();
297        // Should not panic.
298        idx.remove(999, &["site".into()]);
299    }
300
301    #[test]
302    fn intersect_single_bitmap() {
303        let bm = vec![0b111u64];
304        let result = TagBitmapIndex::intersect(&[&bm]);
305        assert_eq!(result, vec![0b111u64]);
306    }
307
308    #[test]
309    fn negate_exact_word_boundary() {
310        // max_id = 64 means exactly 1 word, all bits valid.
311        let bitmap = vec![u64::MAX];
312        let negated = TagBitmapIndex::negate(&bitmap, 64);
313        assert_eq!(TagBitmapIndex::count_ones(&negated), 0);
314    }
315}