Skip to main content

grafeo_core/storage/
bitvec.rs

1//! Stores booleans as individual bits - 8x smaller than `Vec<bool>`.
2//!
3//! Use this when you're tracking lots of boolean flags (like "visited" markers
4//! in graph traversals, or null bitmaps). Backed by `Vec<u64>` so bitwise
5//! operations like AND/OR/XOR stay cache-friendly.
6//!
7//! # Example
8//!
9//! ```no_run
10//! # use grafeo_core::storage::bitvec::BitVector;
11//! let bools = vec![true, false, true, true, false, false, true, false];
12//! let bitvec = BitVector::from_bools(&bools);
13//! // Stored as: 0b01001101 (1 byte instead of 8)
14//!
15//! assert_eq!(bitvec.get(0), Some(true));
16//! assert_eq!(bitvec.get(1), Some(false));
17//! assert_eq!(bitvec.count_ones(), 4);
18//! ```
19
20use std::io;
21
22/// Stores booleans as individual bits - 8x smaller than `Vec<bool>`.
23///
24/// Supports bitwise operations ([`and`](Self::and), [`or`](Self::or),
25/// [`not`](Self::not)) for combining filter results efficiently.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct BitVector {
28    /// Packed bits (little-endian within each word).
29    data: Vec<u64>,
30    /// Number of bits stored.
31    len: usize,
32}
33
34impl BitVector {
35    /// Creates an empty bit vector.
36    #[must_use]
37    pub fn new() -> Self {
38        Self {
39            data: Vec::new(),
40            len: 0,
41        }
42    }
43
44    /// Creates a bit vector with the specified capacity (in bits).
45    #[must_use]
46    pub fn with_capacity(bits: usize) -> Self {
47        let words = (bits + 63) / 64;
48        Self {
49            data: Vec::with_capacity(words),
50            len: 0,
51        }
52    }
53
54    /// Creates a bit vector from a slice of booleans.
55    #[must_use]
56    pub fn from_bools(bools: &[bool]) -> Self {
57        let num_words = (bools.len() + 63) / 64;
58        let mut data = vec![0u64; num_words];
59
60        for (i, &b) in bools.iter().enumerate() {
61            if b {
62                let word_idx = i / 64;
63                let bit_idx = i % 64;
64                data[word_idx] |= 1 << bit_idx;
65            }
66        }
67
68        Self {
69            data,
70            len: bools.len(),
71        }
72    }
73
74    /// Creates a bit vector with all bits set to the same value.
75    #[must_use]
76    pub fn filled(len: usize, value: bool) -> Self {
77        let num_words = (len + 63) / 64;
78        let fill = if value { u64::MAX } else { 0 };
79        let data = vec![fill; num_words];
80
81        Self { data, len }
82    }
83
84    /// Creates a bit vector with all bits set to false (0).
85    #[must_use]
86    pub fn zeros(len: usize) -> Self {
87        Self::filled(len, false)
88    }
89
90    /// Creates a bit vector with all bits set to true (1).
91    #[must_use]
92    pub fn ones(len: usize) -> Self {
93        Self::filled(len, true)
94    }
95
96    /// Returns the number of bits.
97    #[must_use]
98    pub fn len(&self) -> usize {
99        self.len
100    }
101
102    /// Returns whether the bit vector is empty.
103    #[must_use]
104    pub fn is_empty(&self) -> bool {
105        self.len == 0
106    }
107
108    /// Gets the bit at the given index.
109    #[must_use]
110    pub fn get(&self, index: usize) -> Option<bool> {
111        if index >= self.len {
112            return None;
113        }
114
115        let word_idx = index / 64;
116        let bit_idx = index % 64;
117        Some((self.data[word_idx] & (1 << bit_idx)) != 0)
118    }
119
120    /// Sets the bit at the given index.
121    ///
122    /// # Panics
123    ///
124    /// Panics if index >= len.
125    pub fn set(&mut self, index: usize, value: bool) {
126        assert!(index < self.len, "Index out of bounds");
127
128        let word_idx = index / 64;
129        let bit_idx = index % 64;
130
131        if value {
132            self.data[word_idx] |= 1 << bit_idx;
133        } else {
134            self.data[word_idx] &= !(1 << bit_idx);
135        }
136    }
137
138    /// Appends a bit to the end.
139    pub fn push(&mut self, value: bool) {
140        let word_idx = self.len / 64;
141        let bit_idx = self.len % 64;
142
143        if word_idx >= self.data.len() {
144            self.data.push(0);
145        }
146
147        if value {
148            self.data[word_idx] |= 1 << bit_idx;
149        }
150
151        self.len += 1;
152    }
153
154    /// Returns the number of bits set to true.
155    #[must_use]
156    pub fn count_ones(&self) -> usize {
157        if self.is_empty() {
158            return 0;
159        }
160
161        let full_words = self.len / 64;
162        let remaining_bits = self.len % 64;
163
164        let mut count: usize = self.data[..full_words]
165            .iter()
166            .map(|&w| w.count_ones() as usize)
167            .sum();
168
169        if remaining_bits > 0 && full_words < self.data.len() {
170            let mask = (1u64 << remaining_bits) - 1;
171            count += (self.data[full_words] & mask).count_ones() as usize;
172        }
173
174        count
175    }
176
177    /// Returns the number of bits set to false.
178    #[must_use]
179    pub fn count_zeros(&self) -> usize {
180        self.len - self.count_ones()
181    }
182
183    /// Converts back to a `Vec<bool>`.
184    #[must_use]
185    pub fn to_bools(&self) -> Vec<bool> {
186        (0..self.len)
187            .map(|i| self.get(i).expect("index within len"))
188            .collect()
189    }
190
191    /// Returns an iterator over the bits.
192    pub fn iter(&self) -> impl Iterator<Item = bool> + '_ {
193        (0..self.len).map(move |i| self.get(i).expect("index within len"))
194    }
195
196    /// Returns an iterator over indices where bits are true.
197    pub fn ones_iter(&self) -> impl Iterator<Item = usize> + '_ {
198        (0..self.len).filter(move |&i| self.get(i).expect("index within len"))
199    }
200
201    /// Returns an iterator over indices where bits are false.
202    pub fn zeros_iter(&self) -> impl Iterator<Item = usize> + '_ {
203        (0..self.len).filter(move |&i| !self.get(i).expect("index within len"))
204    }
205
206    /// Returns the raw data.
207    #[must_use]
208    pub fn data(&self) -> &[u64] {
209        &self.data
210    }
211
212    /// Returns the compression ratio (original bytes / compressed bytes).
213    #[must_use]
214    pub fn compression_ratio(&self) -> f64 {
215        if self.is_empty() {
216            return 1.0;
217        }
218
219        // Original: 1 byte per bool
220        let original_size = self.len;
221        // Compressed: ceil(len / 8) bytes
222        let compressed_size = self.data.len() * 8;
223
224        if compressed_size == 0 {
225            return 1.0;
226        }
227
228        original_size as f64 / compressed_size as f64
229    }
230
231    /// Performs bitwise AND with another bit vector.
232    ///
233    /// The result has the length of the shorter vector.
234    #[must_use]
235    pub fn and(&self, other: &Self) -> Self {
236        let len = self.len.min(other.len);
237        let num_words = (len + 63) / 64;
238
239        let data: Vec<u64> = self
240            .data
241            .iter()
242            .zip(&other.data)
243            .take(num_words)
244            .map(|(&a, &b)| a & b)
245            .collect();
246
247        Self { data, len }
248    }
249
250    /// Performs bitwise OR with another bit vector.
251    ///
252    /// The result has the length of the shorter vector.
253    #[must_use]
254    pub fn or(&self, other: &Self) -> Self {
255        let len = self.len.min(other.len);
256        let num_words = (len + 63) / 64;
257
258        let data: Vec<u64> = self
259            .data
260            .iter()
261            .zip(&other.data)
262            .take(num_words)
263            .map(|(&a, &b)| a | b)
264            .collect();
265
266        Self { data, len }
267    }
268
269    /// Performs bitwise NOT.
270    #[must_use]
271    pub fn not(&self) -> Self {
272        let data: Vec<u64> = self.data.iter().map(|&w| !w).collect();
273        Self {
274            data,
275            len: self.len,
276        }
277    }
278
279    /// Performs bitwise XOR with another bit vector.
280    #[must_use]
281    pub fn xor(&self, other: &Self) -> Self {
282        let len = self.len.min(other.len);
283        let num_words = (len + 63) / 64;
284
285        let data: Vec<u64> = self
286            .data
287            .iter()
288            .zip(&other.data)
289            .take(num_words)
290            .map(|(&a, &b)| a ^ b)
291            .collect();
292
293        Self { data, len }
294    }
295
296    /// Serializes to bytes.
297    pub fn to_bytes(&self) -> Vec<u8> {
298        let mut buf = Vec::with_capacity(4 + self.data.len() * 8);
299        buf.extend_from_slice(&(self.len as u32).to_le_bytes());
300        for &word in &self.data {
301            buf.extend_from_slice(&word.to_le_bytes());
302        }
303        buf
304    }
305
306    /// Deserializes from bytes.
307    pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
308        if bytes.len() < 4 {
309            return Err(io::Error::new(
310                io::ErrorKind::InvalidData,
311                "BitVector too short",
312            ));
313        }
314
315        let len = u32::from_le_bytes(
316            bytes[0..4]
317                .try_into()
318                .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
319        ) as usize;
320        let num_words = (len + 63) / 64;
321
322        if bytes.len() < 4 + num_words * 8 {
323            return Err(io::Error::new(
324                io::ErrorKind::InvalidData,
325                "BitVector truncated",
326            ));
327        }
328
329        let mut data = Vec::with_capacity(num_words);
330        for i in 0..num_words {
331            let offset = 4 + i * 8;
332            let word = u64::from_le_bytes(
333                bytes[offset..offset + 8]
334                    .try_into()
335                    .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
336            );
337            data.push(word);
338        }
339
340        Ok(Self { data, len })
341    }
342}
343
344impl Default for BitVector {
345    fn default() -> Self {
346        Self::new()
347    }
348}
349
350impl FromIterator<bool> for BitVector {
351    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
352        let mut bitvec = BitVector::new();
353        for b in iter {
354            bitvec.push(b);
355        }
356        bitvec
357    }
358}
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363
364    #[test]
365    fn test_bitvec_basic() {
366        let bools = vec![true, false, true, true, false, false, true, false];
367        let bitvec = BitVector::from_bools(&bools);
368
369        assert_eq!(bitvec.len(), 8);
370        for (i, &expected) in bools.iter().enumerate() {
371            assert_eq!(bitvec.get(i), Some(expected));
372        }
373    }
374
375    #[test]
376    fn test_bitvec_empty() {
377        let bitvec = BitVector::new();
378        assert!(bitvec.is_empty());
379        assert_eq!(bitvec.get(0), None);
380    }
381
382    #[test]
383    fn test_bitvec_push() {
384        let mut bitvec = BitVector::new();
385        bitvec.push(true);
386        bitvec.push(false);
387        bitvec.push(true);
388
389        assert_eq!(bitvec.len(), 3);
390        assert_eq!(bitvec.get(0), Some(true));
391        assert_eq!(bitvec.get(1), Some(false));
392        assert_eq!(bitvec.get(2), Some(true));
393    }
394
395    #[test]
396    fn test_bitvec_set() {
397        let mut bitvec = BitVector::zeros(8);
398
399        bitvec.set(0, true);
400        bitvec.set(3, true);
401        bitvec.set(7, true);
402
403        assert_eq!(bitvec.get(0), Some(true));
404        assert_eq!(bitvec.get(1), Some(false));
405        assert_eq!(bitvec.get(3), Some(true));
406        assert_eq!(bitvec.get(7), Some(true));
407    }
408
409    #[test]
410    fn test_bitvec_count() {
411        let bools = vec![true, false, true, true, false, false, true, false];
412        let bitvec = BitVector::from_bools(&bools);
413
414        assert_eq!(bitvec.count_ones(), 4);
415        assert_eq!(bitvec.count_zeros(), 4);
416    }
417
418    #[test]
419    fn test_bitvec_filled() {
420        let zeros = BitVector::zeros(100);
421        assert_eq!(zeros.count_ones(), 0);
422        assert_eq!(zeros.count_zeros(), 100);
423
424        let ones = BitVector::ones(100);
425        assert_eq!(ones.count_ones(), 100);
426        assert_eq!(ones.count_zeros(), 0);
427    }
428
429    #[test]
430    fn test_bitvec_to_bools() {
431        let original = vec![true, false, true, true, false];
432        let bitvec = BitVector::from_bools(&original);
433        let restored = bitvec.to_bools();
434        assert_eq!(original, restored);
435    }
436
437    #[test]
438    fn test_bitvec_large() {
439        // Test with more than 64 bits
440        let bools: Vec<bool> = (0..200).map(|i| i % 3 == 0).collect();
441        let bitvec = BitVector::from_bools(&bools);
442
443        assert_eq!(bitvec.len(), 200);
444        for (i, &expected) in bools.iter().enumerate() {
445            assert_eq!(bitvec.get(i), Some(expected), "Mismatch at index {}", i);
446        }
447    }
448
449    #[test]
450    fn test_bitvec_and() {
451        let a = BitVector::from_bools(&[true, true, false, false]);
452        let b = BitVector::from_bools(&[true, false, true, false]);
453        let result = a.and(&b);
454
455        assert_eq!(result.to_bools(), vec![true, false, false, false]);
456    }
457
458    #[test]
459    fn test_bitvec_or() {
460        let a = BitVector::from_bools(&[true, true, false, false]);
461        let b = BitVector::from_bools(&[true, false, true, false]);
462        let result = a.or(&b);
463
464        assert_eq!(result.to_bools(), vec![true, true, true, false]);
465    }
466
467    #[test]
468    fn test_bitvec_not() {
469        let a = BitVector::from_bools(&[true, false, true, false]);
470        let result = a.not();
471
472        // Note: NOT inverts all bits in the word, so we check the relevant bits
473        assert_eq!(result.get(0), Some(false));
474        assert_eq!(result.get(1), Some(true));
475        assert_eq!(result.get(2), Some(false));
476        assert_eq!(result.get(3), Some(true));
477    }
478
479    #[test]
480    fn test_bitvec_xor() {
481        let a = BitVector::from_bools(&[true, true, false, false]);
482        let b = BitVector::from_bools(&[true, false, true, false]);
483        let result = a.xor(&b);
484
485        assert_eq!(result.to_bools(), vec![false, true, true, false]);
486    }
487
488    #[test]
489    fn test_bitvec_serialization() {
490        let bools = vec![true, false, true, true, false, false, true, false];
491        let bitvec = BitVector::from_bools(&bools);
492        let bytes = bitvec.to_bytes();
493        let restored = BitVector::from_bytes(&bytes).unwrap();
494        assert_eq!(bitvec, restored);
495    }
496
497    #[test]
498    fn test_bitvec_compression_ratio() {
499        let bitvec = BitVector::zeros(64);
500        let ratio = bitvec.compression_ratio();
501        // 64 bools = 64 bytes original, 8 bytes compressed = 8x
502        assert!((ratio - 8.0).abs() < 0.1);
503    }
504
505    #[test]
506    fn test_bitvec_ones_iter() {
507        let bools = vec![true, false, true, true, false];
508        let bitvec = BitVector::from_bools(&bools);
509        let ones: Vec<usize> = bitvec.ones_iter().collect();
510        assert_eq!(ones, vec![0, 2, 3]);
511    }
512
513    #[test]
514    fn test_bitvec_zeros_iter() {
515        let bools = vec![true, false, true, true, false];
516        let bitvec = BitVector::from_bools(&bools);
517        let zeros: Vec<usize> = bitvec.zeros_iter().collect();
518        assert_eq!(zeros, vec![1, 4]);
519    }
520
521    #[test]
522    fn test_bitvec_from_iter() {
523        let bitvec: BitVector = vec![true, false, true].into_iter().collect();
524        assert_eq!(bitvec.len(), 3);
525        assert_eq!(bitvec.get(0), Some(true));
526        assert_eq!(bitvec.get(1), Some(false));
527        assert_eq!(bitvec.get(2), Some(true));
528    }
529}