Skip to main content

grafeo_core/storage/
bitvec.rs

1//! Stores booleans as individual bits - 8x smaller than `Vec<bool>`.
2//!
3//! Use this when you're tracking lots of boolean flags (like "visited" markers
4//! in graph traversals, or null bitmaps). Backed by `Vec<u64>` so bitwise
5//! operations like AND/OR/XOR stay cache-friendly.
6//!
7//! # Example
8//!
9//! ```ignore
10//! let bools = vec![true, false, true, true, false, false, true, false];
11//! let bitvec = BitVector::from_bools(&bools);
12//! // Stored as: 0b01001101 (1 byte instead of 8)
13//!
14//! assert_eq!(bitvec.get(0), Some(true));
15//! assert_eq!(bitvec.get(1), Some(false));
16//! assert_eq!(bitvec.count_ones(), 4);
17//! ```
18
19use std::io;
20
21/// Stores booleans as individual bits - 8x smaller than `Vec<bool>`.
22///
23/// Supports bitwise operations ([`and`](Self::and), [`or`](Self::or),
24/// [`not`](Self::not)) for combining filter results efficiently.
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub struct BitVector {
27    /// Packed bits (little-endian within each word).
28    data: Vec<u64>,
29    /// Number of bits stored.
30    len: usize,
31}
32
33impl BitVector {
34    /// Creates an empty bit vector.
35    #[must_use]
36    pub fn new() -> Self {
37        Self {
38            data: Vec::new(),
39            len: 0,
40        }
41    }
42
43    /// Creates a bit vector with the specified capacity (in bits).
44    #[must_use]
45    pub fn with_capacity(bits: usize) -> Self {
46        let words = (bits + 63) / 64;
47        Self {
48            data: Vec::with_capacity(words),
49            len: 0,
50        }
51    }
52
53    /// Creates a bit vector from a slice of booleans.
54    #[must_use]
55    pub fn from_bools(bools: &[bool]) -> Self {
56        let num_words = (bools.len() + 63) / 64;
57        let mut data = vec![0u64; num_words];
58
59        for (i, &b) in bools.iter().enumerate() {
60            if b {
61                let word_idx = i / 64;
62                let bit_idx = i % 64;
63                data[word_idx] |= 1 << bit_idx;
64            }
65        }
66
67        Self {
68            data,
69            len: bools.len(),
70        }
71    }
72
73    /// Creates a bit vector with all bits set to the same value.
74    #[must_use]
75    pub fn filled(len: usize, value: bool) -> Self {
76        let num_words = (len + 63) / 64;
77        let fill = if value { u64::MAX } else { 0 };
78        let data = vec![fill; num_words];
79
80        Self { data, len }
81    }
82
83    /// Creates a bit vector with all bits set to false (0).
84    #[must_use]
85    pub fn zeros(len: usize) -> Self {
86        Self::filled(len, false)
87    }
88
89    /// Creates a bit vector with all bits set to true (1).
90    #[must_use]
91    pub fn ones(len: usize) -> Self {
92        Self::filled(len, true)
93    }
94
95    /// Returns the number of bits.
96    #[must_use]
97    pub fn len(&self) -> usize {
98        self.len
99    }
100
101    /// Returns whether the bit vector is empty.
102    #[must_use]
103    pub fn is_empty(&self) -> bool {
104        self.len == 0
105    }
106
107    /// Gets the bit at the given index.
108    #[must_use]
109    pub fn get(&self, index: usize) -> Option<bool> {
110        if index >= self.len {
111            return None;
112        }
113
114        let word_idx = index / 64;
115        let bit_idx = index % 64;
116        Some((self.data[word_idx] & (1 << bit_idx)) != 0)
117    }
118
119    /// Sets the bit at the given index.
120    ///
121    /// # Panics
122    ///
123    /// Panics if index >= len.
124    pub fn set(&mut self, index: usize, value: bool) {
125        assert!(index < self.len, "Index out of bounds");
126
127        let word_idx = index / 64;
128        let bit_idx = index % 64;
129
130        if value {
131            self.data[word_idx] |= 1 << bit_idx;
132        } else {
133            self.data[word_idx] &= !(1 << bit_idx);
134        }
135    }
136
137    /// Appends a bit to the end.
138    pub fn push(&mut self, value: bool) {
139        let word_idx = self.len / 64;
140        let bit_idx = self.len % 64;
141
142        if word_idx >= self.data.len() {
143            self.data.push(0);
144        }
145
146        if value {
147            self.data[word_idx] |= 1 << bit_idx;
148        }
149
150        self.len += 1;
151    }
152
153    /// Returns the number of bits set to true.
154    #[must_use]
155    pub fn count_ones(&self) -> usize {
156        if self.is_empty() {
157            return 0;
158        }
159
160        let full_words = self.len / 64;
161        let remaining_bits = self.len % 64;
162
163        let mut count: usize = self.data[..full_words]
164            .iter()
165            .map(|&w| w.count_ones() as usize)
166            .sum();
167
168        if remaining_bits > 0 && full_words < self.data.len() {
169            let mask = (1u64 << remaining_bits) - 1;
170            count += (self.data[full_words] & mask).count_ones() as usize;
171        }
172
173        count
174    }
175
176    /// Returns the number of bits set to false.
177    #[must_use]
178    pub fn count_zeros(&self) -> usize {
179        self.len - self.count_ones()
180    }
181
182    /// Converts back to a `Vec<bool>`.
183    #[must_use]
184    pub fn to_bools(&self) -> Vec<bool> {
185        (0..self.len).map(|i| self.get(i).unwrap()).collect()
186    }
187
188    /// Returns an iterator over the bits.
189    pub fn iter(&self) -> impl Iterator<Item = bool> + '_ {
190        (0..self.len).map(move |i| self.get(i).unwrap())
191    }
192
193    /// Returns an iterator over indices where bits are true.
194    pub fn ones_iter(&self) -> impl Iterator<Item = usize> + '_ {
195        (0..self.len).filter(move |&i| self.get(i).unwrap())
196    }
197
198    /// Returns an iterator over indices where bits are false.
199    pub fn zeros_iter(&self) -> impl Iterator<Item = usize> + '_ {
200        (0..self.len).filter(move |&i| !self.get(i).unwrap())
201    }
202
203    /// Returns the raw data.
204    #[must_use]
205    pub fn data(&self) -> &[u64] {
206        &self.data
207    }
208
209    /// Returns the compression ratio (original bytes / compressed bytes).
210    #[must_use]
211    pub fn compression_ratio(&self) -> f64 {
212        if self.is_empty() {
213            return 1.0;
214        }
215
216        // Original: 1 byte per bool
217        let original_size = self.len;
218        // Compressed: ceil(len / 8) bytes
219        let compressed_size = self.data.len() * 8;
220
221        if compressed_size == 0 {
222            return 1.0;
223        }
224
225        original_size as f64 / compressed_size as f64
226    }
227
228    /// Performs bitwise AND with another bit vector.
229    ///
230    /// The result has the length of the shorter vector.
231    #[must_use]
232    pub fn and(&self, other: &Self) -> Self {
233        let len = self.len.min(other.len);
234        let num_words = (len + 63) / 64;
235
236        let data: Vec<u64> = self
237            .data
238            .iter()
239            .zip(&other.data)
240            .take(num_words)
241            .map(|(&a, &b)| a & b)
242            .collect();
243
244        Self { data, len }
245    }
246
247    /// Performs bitwise OR with another bit vector.
248    ///
249    /// The result has the length of the shorter vector.
250    #[must_use]
251    pub fn or(&self, other: &Self) -> Self {
252        let len = self.len.min(other.len);
253        let num_words = (len + 63) / 64;
254
255        let data: Vec<u64> = self
256            .data
257            .iter()
258            .zip(&other.data)
259            .take(num_words)
260            .map(|(&a, &b)| a | b)
261            .collect();
262
263        Self { data, len }
264    }
265
266    /// Performs bitwise NOT.
267    #[must_use]
268    pub fn not(&self) -> Self {
269        let data: Vec<u64> = self.data.iter().map(|&w| !w).collect();
270        Self {
271            data,
272            len: self.len,
273        }
274    }
275
276    /// Performs bitwise XOR with another bit vector.
277    #[must_use]
278    pub fn xor(&self, other: &Self) -> Self {
279        let len = self.len.min(other.len);
280        let num_words = (len + 63) / 64;
281
282        let data: Vec<u64> = self
283            .data
284            .iter()
285            .zip(&other.data)
286            .take(num_words)
287            .map(|(&a, &b)| a ^ b)
288            .collect();
289
290        Self { data, len }
291    }
292
293    /// Serializes to bytes.
294    pub fn to_bytes(&self) -> Vec<u8> {
295        let mut buf = Vec::with_capacity(4 + self.data.len() * 8);
296        buf.extend_from_slice(&(self.len as u32).to_le_bytes());
297        for &word in &self.data {
298            buf.extend_from_slice(&word.to_le_bytes());
299        }
300        buf
301    }
302
303    /// Deserializes from bytes.
304    pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
305        if bytes.len() < 4 {
306            return Err(io::Error::new(
307                io::ErrorKind::InvalidData,
308                "BitVector too short",
309            ));
310        }
311
312        let len = u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as usize;
313        let num_words = (len + 63) / 64;
314
315        if bytes.len() < 4 + num_words * 8 {
316            return Err(io::Error::new(
317                io::ErrorKind::InvalidData,
318                "BitVector truncated",
319            ));
320        }
321
322        let mut data = Vec::with_capacity(num_words);
323        for i in 0..num_words {
324            let offset = 4 + i * 8;
325            let word = u64::from_le_bytes(bytes[offset..offset + 8].try_into().unwrap());
326            data.push(word);
327        }
328
329        Ok(Self { data, len })
330    }
331}
332
333impl Default for BitVector {
334    fn default() -> Self {
335        Self::new()
336    }
337}
338
339impl FromIterator<bool> for BitVector {
340    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
341        let mut bitvec = BitVector::new();
342        for b in iter {
343            bitvec.push(b);
344        }
345        bitvec
346    }
347}
348
349#[cfg(test)]
350mod tests {
351    use super::*;
352
353    #[test]
354    fn test_bitvec_basic() {
355        let bools = vec![true, false, true, true, false, false, true, false];
356        let bitvec = BitVector::from_bools(&bools);
357
358        assert_eq!(bitvec.len(), 8);
359        for (i, &expected) in bools.iter().enumerate() {
360            assert_eq!(bitvec.get(i), Some(expected));
361        }
362    }
363
364    #[test]
365    fn test_bitvec_empty() {
366        let bitvec = BitVector::new();
367        assert!(bitvec.is_empty());
368        assert_eq!(bitvec.get(0), None);
369    }
370
371    #[test]
372    fn test_bitvec_push() {
373        let mut bitvec = BitVector::new();
374        bitvec.push(true);
375        bitvec.push(false);
376        bitvec.push(true);
377
378        assert_eq!(bitvec.len(), 3);
379        assert_eq!(bitvec.get(0), Some(true));
380        assert_eq!(bitvec.get(1), Some(false));
381        assert_eq!(bitvec.get(2), Some(true));
382    }
383
384    #[test]
385    fn test_bitvec_set() {
386        let mut bitvec = BitVector::zeros(8);
387
388        bitvec.set(0, true);
389        bitvec.set(3, true);
390        bitvec.set(7, true);
391
392        assert_eq!(bitvec.get(0), Some(true));
393        assert_eq!(bitvec.get(1), Some(false));
394        assert_eq!(bitvec.get(3), Some(true));
395        assert_eq!(bitvec.get(7), Some(true));
396    }
397
398    #[test]
399    fn test_bitvec_count() {
400        let bools = vec![true, false, true, true, false, false, true, false];
401        let bitvec = BitVector::from_bools(&bools);
402
403        assert_eq!(bitvec.count_ones(), 4);
404        assert_eq!(bitvec.count_zeros(), 4);
405    }
406
407    #[test]
408    fn test_bitvec_filled() {
409        let zeros = BitVector::zeros(100);
410        assert_eq!(zeros.count_ones(), 0);
411        assert_eq!(zeros.count_zeros(), 100);
412
413        let ones = BitVector::ones(100);
414        assert_eq!(ones.count_ones(), 100);
415        assert_eq!(ones.count_zeros(), 0);
416    }
417
418    #[test]
419    fn test_bitvec_to_bools() {
420        let original = vec![true, false, true, true, false];
421        let bitvec = BitVector::from_bools(&original);
422        let restored = bitvec.to_bools();
423        assert_eq!(original, restored);
424    }
425
426    #[test]
427    fn test_bitvec_large() {
428        // Test with more than 64 bits
429        let bools: Vec<bool> = (0..200).map(|i| i % 3 == 0).collect();
430        let bitvec = BitVector::from_bools(&bools);
431
432        assert_eq!(bitvec.len(), 200);
433        for (i, &expected) in bools.iter().enumerate() {
434            assert_eq!(bitvec.get(i), Some(expected), "Mismatch at index {}", i);
435        }
436    }
437
438    #[test]
439    fn test_bitvec_and() {
440        let a = BitVector::from_bools(&[true, true, false, false]);
441        let b = BitVector::from_bools(&[true, false, true, false]);
442        let result = a.and(&b);
443
444        assert_eq!(result.to_bools(), vec![true, false, false, false]);
445    }
446
447    #[test]
448    fn test_bitvec_or() {
449        let a = BitVector::from_bools(&[true, true, false, false]);
450        let b = BitVector::from_bools(&[true, false, true, false]);
451        let result = a.or(&b);
452
453        assert_eq!(result.to_bools(), vec![true, true, true, false]);
454    }
455
456    #[test]
457    fn test_bitvec_not() {
458        let a = BitVector::from_bools(&[true, false, true, false]);
459        let result = a.not();
460
461        // Note: NOT inverts all bits in the word, so we check the relevant bits
462        assert_eq!(result.get(0), Some(false));
463        assert_eq!(result.get(1), Some(true));
464        assert_eq!(result.get(2), Some(false));
465        assert_eq!(result.get(3), Some(true));
466    }
467
468    #[test]
469    fn test_bitvec_xor() {
470        let a = BitVector::from_bools(&[true, true, false, false]);
471        let b = BitVector::from_bools(&[true, false, true, false]);
472        let result = a.xor(&b);
473
474        assert_eq!(result.to_bools(), vec![false, true, true, false]);
475    }
476
477    #[test]
478    fn test_bitvec_serialization() {
479        let bools = vec![true, false, true, true, false, false, true, false];
480        let bitvec = BitVector::from_bools(&bools);
481        let bytes = bitvec.to_bytes();
482        let restored = BitVector::from_bytes(&bytes).unwrap();
483        assert_eq!(bitvec, restored);
484    }
485
486    #[test]
487    fn test_bitvec_compression_ratio() {
488        let bitvec = BitVector::zeros(64);
489        let ratio = bitvec.compression_ratio();
490        // 64 bools = 64 bytes original, 8 bytes compressed = 8x
491        assert!((ratio - 8.0).abs() < 0.1);
492    }
493
494    #[test]
495    fn test_bitvec_ones_iter() {
496        let bools = vec![true, false, true, true, false];
497        let bitvec = BitVector::from_bools(&bools);
498        let ones: Vec<usize> = bitvec.ones_iter().collect();
499        assert_eq!(ones, vec![0, 2, 3]);
500    }
501
502    #[test]
503    fn test_bitvec_zeros_iter() {
504        let bools = vec![true, false, true, true, false];
505        let bitvec = BitVector::from_bools(&bools);
506        let zeros: Vec<usize> = bitvec.zeros_iter().collect();
507        assert_eq!(zeros, vec![1, 4]);
508    }
509
510    #[test]
511    fn test_bitvec_from_iter() {
512        let bitvec: BitVector = vec![true, false, true].into_iter().collect();
513        assert_eq!(bitvec.len(), 3);
514        assert_eq!(bitvec.get(0), Some(true));
515        assert_eq!(bitvec.get(1), Some(false));
516        assert_eq!(bitvec.get(2), Some(true));
517    }
518}