range_filters/
bitmap.rs

1const U64_BIT_SIZE: usize = 64;
2
3#[inline]
4pub fn set_bit(data: &mut [u64], pos: usize) {
5    if pos >= data.len() * U64_BIT_SIZE {
6        println!("pos: {} is out of bounds", pos);
7        println!("data.len(): {}", data.len());
8    }
9    data[pos / U64_BIT_SIZE] |= 1 << (pos % U64_BIT_SIZE);
10}
11
12#[inline]
13pub fn clear_bit(data: &mut [u64], pos: usize) {
14    data[pos / U64_BIT_SIZE] &= !(1 << (pos % U64_BIT_SIZE));
15}
16
17#[inline]
18pub fn get_bit(data: &[u64], pos: usize) -> bool {
19    data[pos / U64_BIT_SIZE] & (1 << (pos % U64_BIT_SIZE)) != 0
20}
21
22// count the number of 1s in the data up to the pos
23#[inline]
24pub fn rank(data: &[u64], pos: usize) -> usize {
25    let word_index = pos / U64_BIT_SIZE;
26    let bit_index = pos % U64_BIT_SIZE;
27
28    let mut count = 0;
29    for i in 0..word_index {
30        count += data[i].count_ones() as usize;
31    }
32
33    if bit_index > 0 {
34        let mask = (1 << bit_index) - 1;
35        count += (data[word_index] & mask).count_ones() as usize;
36    }
37
38    count
39}
40
41// find the position of the rank-th 1 in the data
42#[inline]
43pub fn select(data: &[u64], rank: usize) -> Option<usize> {
44    let mut count = 0;
45
46    let target = rank + 1;
47
48    for (word_index, &word) in data.iter().enumerate() {
49        let ones_in_word = word.count_ones() as usize;
50
51        if count + ones_in_word >= target {
52            let remaining = target - count;
53            let pos_in_word = select_in_word(word, remaining - 1)?;
54            return Some(word_index * U64_BIT_SIZE + pos_in_word);
55        }
56
57        count += ones_in_word;
58    }
59    None
60}
61
62#[inline]
63fn select_in_word(word: u64, rank: usize) -> Option<usize> {
64    let mut count = 0;
65
66    for i in 0..U64_BIT_SIZE {
67        if word & (1 << i) != 0 {
68            if count == rank {
69                return Some(i);
70            }
71            count += 1;
72        }
73    }
74    None
75}
76
77/// optimized rank using cached halfway popcount
78/// if pos is in second half, start counting from cached midpoint
79#[inline]
80pub fn rank_cached(data: &[u64], pos: usize, half_pos: usize, cached_popcount: usize) -> usize {
81    if pos <= half_pos {
82        // query is in first half, use regular rank
83        rank(data, pos)
84    } else {
85        // query is in second half, start from cached count
86        let word_offset = half_pos / U64_BIT_SIZE;
87        let remaining = rank(&data[word_offset..], pos - half_pos);
88        cached_popcount + remaining
89    }
90}
91
92/// optimized select using cached halfway popcount
93/// if target rank is past cached count, start from midpoint
94#[inline]
95pub fn select_cached(
96    data: &[u64],
97    rank_val: usize,
98    half_pos: usize,
99    cached_popcount: usize,
100) -> Option<usize> {
101    if rank_val < cached_popcount {
102        // target is in first half, use regular select
103        select(data, rank_val)
104    } else {
105        // target is in second half, start from cached midpoint
106        let remaining_rank = rank_val - cached_popcount;
107        let word_offset = half_pos / U64_BIT_SIZE;
108
109        // search in second half
110        select(&data[word_offset..], remaining_rank).map(|pos| pos + half_pos)
111    }
112}
113
114/// Check if there are any set bits in the range [start_pos, end_pos) (exclusive end)
115/// Optimized for range queries using bit manipulation
116#[inline]
117pub fn has_bits_in_range(data: &[u64], start_pos: usize, end_pos: usize) -> bool {
118    if start_pos >= end_pos {
119        return false;
120    }
121
122    let start_word = start_pos / U64_BIT_SIZE;
123    let end_word = (end_pos - 1) / U64_BIT_SIZE;
124
125    if start_word == end_word {
126        // Range within single word
127        let start_bit = start_pos % U64_BIT_SIZE;
128        let end_bit = end_pos % U64_BIT_SIZE;
129
130        // Create mask for bits [start_bit, end_bit)
131        let mask = if end_bit == 0 {
132            // Special case: end_bit wraps to 0, means we want all bits from start_bit to 63
133            !((1u64 << start_bit) - 1)
134        } else {
135            ((1u64 << end_bit) - 1) & !((1u64 << start_bit) - 1)
136        };
137
138        if start_word < data.len() {
139            return (data[start_word] & mask) != 0;
140        }
141        return false;
142    }
143
144    // Range spans multiple words
145
146    // Check start word (partial)
147    if start_word < data.len() {
148        let start_bit = start_pos % U64_BIT_SIZE;
149        let start_mask = !((1u64 << start_bit) - 1); // All bits from start_bit to 63
150        if (data[start_word] & start_mask) != 0 {
151            return true;
152        }
153    }
154
155    // Check complete intermediate words
156    for word_idx in (start_word + 1)..end_word.min(data.len()) {
157        if data[word_idx] != 0 {
158            return true;
159        }
160    }
161
162    // Check end word (partial)
163    if end_word < data.len() {
164        let end_bit = end_pos % U64_BIT_SIZE;
165        let end_mask = if end_bit == 0 {
166            u64::MAX // All bits if end_bit wraps to 0
167        } else {
168            (1u64 << end_bit) - 1 // Bits 0 to end_bit-1
169        };
170        if (data[end_word] & end_mask) != 0 {
171            return true;
172        }
173    }
174
175    false
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    #[test]
183    fn test_set_and_get_bit() {
184        let mut data = vec![0u64; 2];
185
186        // set bits in first word
187        set_bit(&mut data, 0);
188        set_bit(&mut data, 5);
189        set_bit(&mut data, 63);
190
191        assert!(get_bit(&data, 0));
192        assert!(!get_bit(&data, 1));
193        assert!(get_bit(&data, 5));
194        assert!(get_bit(&data, 63));
195
196        // set bits in second word
197        set_bit(&mut data, 64);
198        set_bit(&mut data, 127);
199
200        assert!(get_bit(&data, 64));
201        assert!(get_bit(&data, 127));
202        assert!(!get_bit(&data, 100));
203    }
204
205    #[test]
206    fn test_rank() {
207        let mut data = vec![0u64; 2];
208
209        set_bit(&mut data, 0);
210        set_bit(&mut data, 2);
211        set_bit(&mut data, 4);
212        set_bit(&mut data, 64);
213        set_bit(&mut data, 65);
214        set_bit(&mut data, 127);
215
216        assert_eq!(rank(&data, 0), 0); // before bit 0
217        assert_eq!(rank(&data, 1), 1); // after bit 0
218        assert_eq!(rank(&data, 3), 2); // after bits 0, 2
219        assert_eq!(rank(&data, 5), 3); // after bits 0, 2, 4
220        assert_eq!(rank(&data, 64), 3); // before second word
221        assert_eq!(rank(&data, 65), 4); // after bit 64
222        assert_eq!(rank(&data, 128), 6); // all bits
223    }
224
225    #[test]
226    fn test_select() {
227        let mut data = vec![0u64; 2];
228
229        set_bit(&mut data, 0);
230        set_bit(&mut data, 2);
231        set_bit(&mut data, 4);
232        set_bit(&mut data, 64);
233        set_bit(&mut data, 65);
234        set_bit(&mut data, 127);
235
236        assert_eq!(select(&data, 0), Some(0)); // 0th one at position 0
237        assert_eq!(select(&data, 1), Some(2)); // 1st one at position 2
238        assert_eq!(select(&data, 2), Some(4)); // 2nd one at position 4
239        assert_eq!(select(&data, 3), Some(64)); // 3rd one at position 64
240        assert_eq!(select(&data, 4), Some(65)); // 4th one at position 65
241        assert_eq!(select(&data, 5), Some(127)); // 5th one at position 127
242        assert_eq!(select(&data, 6), None); // no 6th one
243    }
244
245    #[test]
246    fn test_select_in_word() {
247        // word = 0b10101 (bits 0, 2, 4 set)
248        let word = 0b10101u64;
249
250        assert_eq!(select_in_word(word, 0), Some(0));
251        assert_eq!(select_in_word(word, 1), Some(2));
252        assert_eq!(select_in_word(word, 2), Some(4));
253        assert_eq!(select_in_word(word, 3), None);
254    }
255
256    #[test]
257    fn test_rank_select_consistency() {
258        let mut data = vec![0u64; 4];
259
260        // set various bits
261        let positions = vec![1, 7, 15, 63, 64, 100, 200, 255];
262        for &pos in &positions {
263            set_bit(&mut data, pos);
264        }
265
266        // verify rank-select consistency
267        for (rank_, &expected_pos) in positions.iter().enumerate() {
268            assert_eq!(select(&data, rank_), Some(expected_pos));
269            assert_eq!(rank(&data, expected_pos + 1), rank_ + 1usize);
270        }
271    }
272
273    #[test]
274    fn test_has_bits_in_range() {
275        let mut data = vec![0u64; 2];
276
277        // Set bits: 5, 10, 67, 100
278        set_bit(&mut data, 5);
279        set_bit(&mut data, 10);
280        set_bit(&mut data, 67);
281        set_bit(&mut data, 100);
282
283        // Test ranges that should find bits
284        assert!(has_bits_in_range(&data, 0, 15)); // Should find bit 5 and 10
285        assert!(has_bits_in_range(&data, 5, 6)); // Should find bit 5
286        assert!(has_bits_in_range(&data, 65, 70)); // Should find bit 67
287        assert!(has_bits_in_range(&data, 90, 110)); // Should find bit 100
288
289        // Test ranges that should not find bits
290        assert!(!has_bits_in_range(&data, 0, 5)); // Before bit 5
291        assert!(!has_bits_in_range(&data, 11, 67)); // Between bits 10 and 67
292        assert!(!has_bits_in_range(&data, 101, 120)); // After bit 100
293
294        // Test edge cases
295        assert!(!has_bits_in_range(&data, 10, 10)); // Empty range
296        assert!(!has_bits_in_range(&data, 15, 10)); // Invalid range
297        assert!(has_bits_in_range(&data, 10, 11)); // Single bit range
298    }
299
300    #[test]
301    fn test_has_bits_in_range_single_word() {
302        let mut data = vec![0b10101u64]; // Bits 0, 2, 4 set
303
304        assert!(has_bits_in_range(&data, 0, 3)); // Should find bit 0 and 2
305        assert!(has_bits_in_range(&data, 2, 5)); // Should find bit 2 and 4
306        assert!(!has_bits_in_range(&data, 1, 2)); // Between bits 0 and 2
307        assert!(!has_bits_in_range(&data, 5, 10)); // After all bits
308    }
309
310    #[test]
311    fn test_has_bits_in_range_multiple_words() {
312        let mut data = vec![0u64; 3];
313
314        // Set bits in different words
315        set_bit(&mut data, 10); // First word
316        set_bit(&mut data, 70); // Second word
317        set_bit(&mut data, 130); // Third word
318
319        // Test ranges spanning multiple words
320        assert!(has_bits_in_range(&data, 0, 80)); // Should find bits 10, 70
321        assert!(has_bits_in_range(&data, 65, 135)); // Should find bits 70, 130
322        assert!(has_bits_in_range(&data, 5, 140)); // Should find all bits
323
324        // Test ranges in gaps between words
325        assert!(!has_bits_in_range(&data, 15, 65)); // Between bits 10 and 70
326        assert!(!has_bits_in_range(&data, 75, 125)); // Between bits 70 and 130
327    }
328}