Skip to main content

grafeo_core/storage/
bitvec.rs

1//! Stores booleans as individual bits - 8x smaller than `Vec<bool>`.
2//!
3//! Use this when you're tracking lots of boolean flags (like "visited" markers
4//! in graph traversals, or null bitmaps). Backed by `Vec<u64>` so bitwise
5//! operations like AND/OR/XOR stay cache-friendly.
6//!
7//! # Example
8//!
9//! ```no_run
10//! # use grafeo_core::storage::bitvec::BitVector;
11//! let bools = vec![true, false, true, true, false, false, true, false];
12//! let bitvec = BitVector::from_bools(&bools);
13//! // Stored as: 0b01001101 (1 byte instead of 8)
14//!
15//! assert_eq!(bitvec.get(0), Some(true));
16//! assert_eq!(bitvec.get(1), Some(false));
17//! assert_eq!(bitvec.count_ones(), 4);
18//! ```
19
20use std::io;
21
22/// Stores booleans as individual bits - 8x smaller than `Vec<bool>`.
23///
24/// Supports bitwise operations ([`and`](Self::and), [`or`](Self::or),
25/// [`not`](Self::not)) for combining filter results efficiently.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct BitVector {
28    /// Packed bits (little-endian within each word).
29    data: Vec<u64>,
30    /// Number of bits stored.
31    len: usize,
32}
33
34impl BitVector {
35    /// Creates an empty bit vector.
36    #[must_use]
37    pub fn new() -> Self {
38        Self {
39            data: Vec::new(),
40            len: 0,
41        }
42    }
43
44    /// Creates a bit vector with the specified capacity (in bits).
45    #[must_use]
46    pub fn with_capacity(bits: usize) -> Self {
47        let words = (bits + 63) / 64;
48        Self {
49            data: Vec::with_capacity(words),
50            len: 0,
51        }
52    }
53
54    /// Creates a bit vector from a slice of booleans.
55    #[must_use]
56    pub fn from_bools(bools: &[bool]) -> Self {
57        let num_words = (bools.len() + 63) / 64;
58        let mut data = vec![0u64; num_words];
59
60        for (i, &b) in bools.iter().enumerate() {
61            if b {
62                let word_idx = i / 64;
63                let bit_idx = i % 64;
64                data[word_idx] |= 1 << bit_idx;
65            }
66        }
67
68        Self {
69            data,
70            len: bools.len(),
71        }
72    }
73
74    /// Creates a bit vector with all bits set to the same value.
75    #[must_use]
76    pub fn filled(len: usize, value: bool) -> Self {
77        let num_words = (len + 63) / 64;
78        let fill = if value { u64::MAX } else { 0 };
79        let data = vec![fill; num_words];
80
81        Self { data, len }
82    }
83
84    /// Creates a bit vector with all bits set to false (0).
85    #[must_use]
86    pub fn zeros(len: usize) -> Self {
87        Self::filled(len, false)
88    }
89
90    /// Creates a bit vector with all bits set to true (1).
91    #[must_use]
92    pub fn ones(len: usize) -> Self {
93        Self::filled(len, true)
94    }
95
96    /// Returns the number of bits.
97    #[must_use]
98    pub fn len(&self) -> usize {
99        self.len
100    }
101
102    /// Returns whether the bit vector is empty.
103    #[must_use]
104    pub fn is_empty(&self) -> bool {
105        self.len == 0
106    }
107
108    /// Gets the bit at the given index.
109    #[must_use]
110    pub fn get(&self, index: usize) -> Option<bool> {
111        if index >= self.len {
112            return None;
113        }
114
115        let word_idx = index / 64;
116        let bit_idx = index % 64;
117        Some((self.data[word_idx] & (1 << bit_idx)) != 0)
118    }
119
120    /// Sets the bit at the given index.
121    ///
122    /// # Panics
123    ///
124    /// Panics if index >= len.
125    pub fn set(&mut self, index: usize, value: bool) {
126        assert!(index < self.len, "Index out of bounds");
127
128        let word_idx = index / 64;
129        let bit_idx = index % 64;
130
131        if value {
132            self.data[word_idx] |= 1 << bit_idx;
133        } else {
134            self.data[word_idx] &= !(1 << bit_idx);
135        }
136    }
137
138    /// Appends a bit to the end.
139    pub fn push(&mut self, value: bool) {
140        let word_idx = self.len / 64;
141        let bit_idx = self.len % 64;
142
143        if word_idx >= self.data.len() {
144            self.data.push(0);
145        }
146
147        if value {
148            self.data[word_idx] |= 1 << bit_idx;
149        }
150
151        self.len += 1;
152    }
153
154    /// Returns the number of bits set to true.
155    #[must_use]
156    pub fn count_ones(&self) -> usize {
157        if self.is_empty() {
158            return 0;
159        }
160
161        let full_words = self.len / 64;
162        let remaining_bits = self.len % 64;
163
164        let mut count: usize = self.data[..full_words]
165            .iter()
166            .map(|&w| w.count_ones() as usize)
167            .sum();
168
169        if remaining_bits > 0 && full_words < self.data.len() {
170            let mask = (1u64 << remaining_bits) - 1;
171            count += (self.data[full_words] & mask).count_ones() as usize;
172        }
173
174        count
175    }
176
177    /// Returns the number of bits set to false.
178    #[must_use]
179    pub fn count_zeros(&self) -> usize {
180        self.len - self.count_ones()
181    }
182
183    /// Converts back to a `Vec<bool>`.
184    #[must_use]
185    pub fn to_bools(&self) -> Vec<bool> {
186        (0..self.len).map(|i| self.get(i).unwrap()).collect()
187    }
188
189    /// Returns an iterator over the bits.
190    pub fn iter(&self) -> impl Iterator<Item = bool> + '_ {
191        (0..self.len).map(move |i| self.get(i).unwrap())
192    }
193
194    /// Returns an iterator over indices where bits are true.
195    pub fn ones_iter(&self) -> impl Iterator<Item = usize> + '_ {
196        (0..self.len).filter(move |&i| self.get(i).unwrap())
197    }
198
199    /// Returns an iterator over indices where bits are false.
200    pub fn zeros_iter(&self) -> impl Iterator<Item = usize> + '_ {
201        (0..self.len).filter(move |&i| !self.get(i).unwrap())
202    }
203
204    /// Returns the raw data.
205    #[must_use]
206    pub fn data(&self) -> &[u64] {
207        &self.data
208    }
209
210    /// Returns the compression ratio (original bytes / compressed bytes).
211    #[must_use]
212    pub fn compression_ratio(&self) -> f64 {
213        if self.is_empty() {
214            return 1.0;
215        }
216
217        // Original: 1 byte per bool
218        let original_size = self.len;
219        // Compressed: ceil(len / 8) bytes
220        let compressed_size = self.data.len() * 8;
221
222        if compressed_size == 0 {
223            return 1.0;
224        }
225
226        original_size as f64 / compressed_size as f64
227    }
228
229    /// Performs bitwise AND with another bit vector.
230    ///
231    /// The result has the length of the shorter vector.
232    #[must_use]
233    pub fn and(&self, other: &Self) -> Self {
234        let len = self.len.min(other.len);
235        let num_words = (len + 63) / 64;
236
237        let data: Vec<u64> = self
238            .data
239            .iter()
240            .zip(&other.data)
241            .take(num_words)
242            .map(|(&a, &b)| a & b)
243            .collect();
244
245        Self { data, len }
246    }
247
248    /// Performs bitwise OR with another bit vector.
249    ///
250    /// The result has the length of the shorter vector.
251    #[must_use]
252    pub fn or(&self, other: &Self) -> Self {
253        let len = self.len.min(other.len);
254        let num_words = (len + 63) / 64;
255
256        let data: Vec<u64> = self
257            .data
258            .iter()
259            .zip(&other.data)
260            .take(num_words)
261            .map(|(&a, &b)| a | b)
262            .collect();
263
264        Self { data, len }
265    }
266
267    /// Performs bitwise NOT.
268    #[must_use]
269    pub fn not(&self) -> Self {
270        let data: Vec<u64> = self.data.iter().map(|&w| !w).collect();
271        Self {
272            data,
273            len: self.len,
274        }
275    }
276
277    /// Performs bitwise XOR with another bit vector.
278    #[must_use]
279    pub fn xor(&self, other: &Self) -> Self {
280        let len = self.len.min(other.len);
281        let num_words = (len + 63) / 64;
282
283        let data: Vec<u64> = self
284            .data
285            .iter()
286            .zip(&other.data)
287            .take(num_words)
288            .map(|(&a, &b)| a ^ b)
289            .collect();
290
291        Self { data, len }
292    }
293
294    /// Serializes to bytes.
295    pub fn to_bytes(&self) -> Vec<u8> {
296        let mut buf = Vec::with_capacity(4 + self.data.len() * 8);
297        buf.extend_from_slice(&(self.len as u32).to_le_bytes());
298        for &word in &self.data {
299            buf.extend_from_slice(&word.to_le_bytes());
300        }
301        buf
302    }
303
304    /// Deserializes from bytes.
305    pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
306        if bytes.len() < 4 {
307            return Err(io::Error::new(
308                io::ErrorKind::InvalidData,
309                "BitVector too short",
310            ));
311        }
312
313        let len = u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as usize;
314        let num_words = (len + 63) / 64;
315
316        if bytes.len() < 4 + num_words * 8 {
317            return Err(io::Error::new(
318                io::ErrorKind::InvalidData,
319                "BitVector truncated",
320            ));
321        }
322
323        let mut data = Vec::with_capacity(num_words);
324        for i in 0..num_words {
325            let offset = 4 + i * 8;
326            let word = u64::from_le_bytes(bytes[offset..offset + 8].try_into().unwrap());
327            data.push(word);
328        }
329
330        Ok(Self { data, len })
331    }
332}
333
334impl Default for BitVector {
335    fn default() -> Self {
336        Self::new()
337    }
338}
339
340impl FromIterator<bool> for BitVector {
341    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
342        let mut bitvec = BitVector::new();
343        for b in iter {
344            bitvec.push(b);
345        }
346        bitvec
347    }
348}
349
350#[cfg(test)]
351mod tests {
352    use super::*;
353
354    #[test]
355    fn test_bitvec_basic() {
356        let bools = vec![true, false, true, true, false, false, true, false];
357        let bitvec = BitVector::from_bools(&bools);
358
359        assert_eq!(bitvec.len(), 8);
360        for (i, &expected) in bools.iter().enumerate() {
361            assert_eq!(bitvec.get(i), Some(expected));
362        }
363    }
364
365    #[test]
366    fn test_bitvec_empty() {
367        let bitvec = BitVector::new();
368        assert!(bitvec.is_empty());
369        assert_eq!(bitvec.get(0), None);
370    }
371
372    #[test]
373    fn test_bitvec_push() {
374        let mut bitvec = BitVector::new();
375        bitvec.push(true);
376        bitvec.push(false);
377        bitvec.push(true);
378
379        assert_eq!(bitvec.len(), 3);
380        assert_eq!(bitvec.get(0), Some(true));
381        assert_eq!(bitvec.get(1), Some(false));
382        assert_eq!(bitvec.get(2), Some(true));
383    }
384
385    #[test]
386    fn test_bitvec_set() {
387        let mut bitvec = BitVector::zeros(8);
388
389        bitvec.set(0, true);
390        bitvec.set(3, true);
391        bitvec.set(7, true);
392
393        assert_eq!(bitvec.get(0), Some(true));
394        assert_eq!(bitvec.get(1), Some(false));
395        assert_eq!(bitvec.get(3), Some(true));
396        assert_eq!(bitvec.get(7), Some(true));
397    }
398
399    #[test]
400    fn test_bitvec_count() {
401        let bools = vec![true, false, true, true, false, false, true, false];
402        let bitvec = BitVector::from_bools(&bools);
403
404        assert_eq!(bitvec.count_ones(), 4);
405        assert_eq!(bitvec.count_zeros(), 4);
406    }
407
408    #[test]
409    fn test_bitvec_filled() {
410        let zeros = BitVector::zeros(100);
411        assert_eq!(zeros.count_ones(), 0);
412        assert_eq!(zeros.count_zeros(), 100);
413
414        let ones = BitVector::ones(100);
415        assert_eq!(ones.count_ones(), 100);
416        assert_eq!(ones.count_zeros(), 0);
417    }
418
419    #[test]
420    fn test_bitvec_to_bools() {
421        let original = vec![true, false, true, true, false];
422        let bitvec = BitVector::from_bools(&original);
423        let restored = bitvec.to_bools();
424        assert_eq!(original, restored);
425    }
426
427    #[test]
428    fn test_bitvec_large() {
429        // Test with more than 64 bits
430        let bools: Vec<bool> = (0..200).map(|i| i % 3 == 0).collect();
431        let bitvec = BitVector::from_bools(&bools);
432
433        assert_eq!(bitvec.len(), 200);
434        for (i, &expected) in bools.iter().enumerate() {
435            assert_eq!(bitvec.get(i), Some(expected), "Mismatch at index {}", i);
436        }
437    }
438
439    #[test]
440    fn test_bitvec_and() {
441        let a = BitVector::from_bools(&[true, true, false, false]);
442        let b = BitVector::from_bools(&[true, false, true, false]);
443        let result = a.and(&b);
444
445        assert_eq!(result.to_bools(), vec![true, false, false, false]);
446    }
447
448    #[test]
449    fn test_bitvec_or() {
450        let a = BitVector::from_bools(&[true, true, false, false]);
451        let b = BitVector::from_bools(&[true, false, true, false]);
452        let result = a.or(&b);
453
454        assert_eq!(result.to_bools(), vec![true, true, true, false]);
455    }
456
457    #[test]
458    fn test_bitvec_not() {
459        let a = BitVector::from_bools(&[true, false, true, false]);
460        let result = a.not();
461
462        // Note: NOT inverts all bits in the word, so we check the relevant bits
463        assert_eq!(result.get(0), Some(false));
464        assert_eq!(result.get(1), Some(true));
465        assert_eq!(result.get(2), Some(false));
466        assert_eq!(result.get(3), Some(true));
467    }
468
469    #[test]
470    fn test_bitvec_xor() {
471        let a = BitVector::from_bools(&[true, true, false, false]);
472        let b = BitVector::from_bools(&[true, false, true, false]);
473        let result = a.xor(&b);
474
475        assert_eq!(result.to_bools(), vec![false, true, true, false]);
476    }
477
478    #[test]
479    fn test_bitvec_serialization() {
480        let bools = vec![true, false, true, true, false, false, true, false];
481        let bitvec = BitVector::from_bools(&bools);
482        let bytes = bitvec.to_bytes();
483        let restored = BitVector::from_bytes(&bytes).unwrap();
484        assert_eq!(bitvec, restored);
485    }
486
487    #[test]
488    fn test_bitvec_compression_ratio() {
489        let bitvec = BitVector::zeros(64);
490        let ratio = bitvec.compression_ratio();
491        // 64 bools = 64 bytes original, 8 bytes compressed = 8x
492        assert!((ratio - 8.0).abs() < 0.1);
493    }
494
495    #[test]
496    fn test_bitvec_ones_iter() {
497        let bools = vec![true, false, true, true, false];
498        let bitvec = BitVector::from_bools(&bools);
499        let ones: Vec<usize> = bitvec.ones_iter().collect();
500        assert_eq!(ones, vec![0, 2, 3]);
501    }
502
503    #[test]
504    fn test_bitvec_zeros_iter() {
505        let bools = vec![true, false, true, true, false];
506        let bitvec = BitVector::from_bools(&bools);
507        let zeros: Vec<usize> = bitvec.zeros_iter().collect();
508        assert_eq!(zeros, vec![1, 4]);
509    }
510
511    #[test]
512    fn test_bitvec_from_iter() {
513        let bitvec: BitVector = vec![true, false, true].into_iter().collect();
514        assert_eq!(bitvec.len(), 3);
515        assert_eq!(bitvec.get(0), Some(true));
516        assert_eq!(bitvec.get(1), Some(false));
517        assert_eq!(bitvec.get(2), Some(true));
518    }
519}