Skip to main content

graphos_core/storage/
bitvec.rs

1//! Bit vector for boolean compression.
2//!
3//! Stores boolean values as individual bits, achieving 8x compression compared
4//! to storing each boolean as a byte.
5//!
6//! # Example
7//!
8//! ```ignore
9//! let bools = vec![true, false, true, true, false, false, true, false];
10//! let bitvec = BitVector::from_bools(&bools);
11//! // Stored as: 0b01001101 (1 byte instead of 8)
12//!
13//! assert_eq!(bitvec.get(0), Some(true));
14//! assert_eq!(bitvec.get(1), Some(false));
15//! ```
16
17use std::io;
18
19/// A compact bit vector for storing boolean values.
20///
21/// Each boolean is stored as a single bit, providing 8x compression
22/// compared to `Vec<bool>`.
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct BitVector {
25    /// Packed bits (little-endian within each word).
26    data: Vec<u64>,
27    /// Number of bits stored.
28    len: usize,
29}
30
31impl BitVector {
32    /// Creates an empty bit vector.
33    #[must_use]
34    pub fn new() -> Self {
35        Self {
36            data: Vec::new(),
37            len: 0,
38        }
39    }
40
41    /// Creates a bit vector with the specified capacity (in bits).
42    #[must_use]
43    pub fn with_capacity(bits: usize) -> Self {
44        let words = (bits + 63) / 64;
45        Self {
46            data: Vec::with_capacity(words),
47            len: 0,
48        }
49    }
50
51    /// Creates a bit vector from a slice of booleans.
52    #[must_use]
53    pub fn from_bools(bools: &[bool]) -> Self {
54        let num_words = (bools.len() + 63) / 64;
55        let mut data = vec![0u64; num_words];
56
57        for (i, &b) in bools.iter().enumerate() {
58            if b {
59                let word_idx = i / 64;
60                let bit_idx = i % 64;
61                data[word_idx] |= 1 << bit_idx;
62            }
63        }
64
65        Self {
66            data,
67            len: bools.len(),
68        }
69    }
70
71    /// Creates a bit vector with all bits set to the same value.
72    #[must_use]
73    pub fn filled(len: usize, value: bool) -> Self {
74        let num_words = (len + 63) / 64;
75        let fill = if value { u64::MAX } else { 0 };
76        let data = vec![fill; num_words];
77
78        Self { data, len }
79    }
80
81    /// Creates a bit vector with all bits set to false (0).
82    #[must_use]
83    pub fn zeros(len: usize) -> Self {
84        Self::filled(len, false)
85    }
86
87    /// Creates a bit vector with all bits set to true (1).
88    #[must_use]
89    pub fn ones(len: usize) -> Self {
90        Self::filled(len, true)
91    }
92
93    /// Returns the number of bits.
94    #[must_use]
95    pub fn len(&self) -> usize {
96        self.len
97    }
98
99    /// Returns whether the bit vector is empty.
100    #[must_use]
101    pub fn is_empty(&self) -> bool {
102        self.len == 0
103    }
104
105    /// Gets the bit at the given index.
106    #[must_use]
107    pub fn get(&self, index: usize) -> Option<bool> {
108        if index >= self.len {
109            return None;
110        }
111
112        let word_idx = index / 64;
113        let bit_idx = index % 64;
114        Some((self.data[word_idx] & (1 << bit_idx)) != 0)
115    }
116
117    /// Sets the bit at the given index.
118    ///
119    /// # Panics
120    ///
121    /// Panics if index >= len.
122    pub fn set(&mut self, index: usize, value: bool) {
123        assert!(index < self.len, "Index out of bounds");
124
125        let word_idx = index / 64;
126        let bit_idx = index % 64;
127
128        if value {
129            self.data[word_idx] |= 1 << bit_idx;
130        } else {
131            self.data[word_idx] &= !(1 << bit_idx);
132        }
133    }
134
135    /// Appends a bit to the end.
136    pub fn push(&mut self, value: bool) {
137        let word_idx = self.len / 64;
138        let bit_idx = self.len % 64;
139
140        if word_idx >= self.data.len() {
141            self.data.push(0);
142        }
143
144        if value {
145            self.data[word_idx] |= 1 << bit_idx;
146        }
147
148        self.len += 1;
149    }
150
151    /// Returns the number of bits set to true.
152    #[must_use]
153    pub fn count_ones(&self) -> usize {
154        if self.is_empty() {
155            return 0;
156        }
157
158        let full_words = self.len / 64;
159        let remaining_bits = self.len % 64;
160
161        let mut count: usize = self.data[..full_words]
162            .iter()
163            .map(|&w| w.count_ones() as usize)
164            .sum();
165
166        if remaining_bits > 0 && full_words < self.data.len() {
167            let mask = (1u64 << remaining_bits) - 1;
168            count += (self.data[full_words] & mask).count_ones() as usize;
169        }
170
171        count
172    }
173
174    /// Returns the number of bits set to false.
175    #[must_use]
176    pub fn count_zeros(&self) -> usize {
177        self.len - self.count_ones()
178    }
179
180    /// Converts back to a `Vec<bool>`.
181    #[must_use]
182    pub fn to_bools(&self) -> Vec<bool> {
183        (0..self.len).map(|i| self.get(i).unwrap()).collect()
184    }
185
186    /// Returns an iterator over the bits.
187    pub fn iter(&self) -> impl Iterator<Item = bool> + '_ {
188        (0..self.len).map(move |i| self.get(i).unwrap())
189    }
190
191    /// Returns an iterator over indices where bits are true.
192    pub fn ones_iter(&self) -> impl Iterator<Item = usize> + '_ {
193        (0..self.len).filter(move |&i| self.get(i).unwrap())
194    }
195
196    /// Returns an iterator over indices where bits are false.
197    pub fn zeros_iter(&self) -> impl Iterator<Item = usize> + '_ {
198        (0..self.len).filter(move |&i| !self.get(i).unwrap())
199    }
200
201    /// Returns the raw data.
202    #[must_use]
203    pub fn data(&self) -> &[u64] {
204        &self.data
205    }
206
207    /// Returns the compression ratio (original bytes / compressed bytes).
208    #[must_use]
209    pub fn compression_ratio(&self) -> f64 {
210        if self.is_empty() {
211            return 1.0;
212        }
213
214        // Original: 1 byte per bool
215        let original_size = self.len;
216        // Compressed: ceil(len / 8) bytes
217        let compressed_size = self.data.len() * 8;
218
219        if compressed_size == 0 {
220            return 1.0;
221        }
222
223        original_size as f64 / compressed_size as f64
224    }
225
226    /// Performs bitwise AND with another bit vector.
227    ///
228    /// The result has the length of the shorter vector.
229    #[must_use]
230    pub fn and(&self, other: &Self) -> Self {
231        let len = self.len.min(other.len);
232        let num_words = (len + 63) / 64;
233
234        let data: Vec<u64> = self
235            .data
236            .iter()
237            .zip(&other.data)
238            .take(num_words)
239            .map(|(&a, &b)| a & b)
240            .collect();
241
242        Self { data, len }
243    }
244
245    /// Performs bitwise OR with another bit vector.
246    ///
247    /// The result has the length of the shorter vector.
248    #[must_use]
249    pub fn or(&self, other: &Self) -> Self {
250        let len = self.len.min(other.len);
251        let num_words = (len + 63) / 64;
252
253        let data: Vec<u64> = self
254            .data
255            .iter()
256            .zip(&other.data)
257            .take(num_words)
258            .map(|(&a, &b)| a | b)
259            .collect();
260
261        Self { data, len }
262    }
263
264    /// Performs bitwise NOT.
265    #[must_use]
266    pub fn not(&self) -> Self {
267        let data: Vec<u64> = self.data.iter().map(|&w| !w).collect();
268        Self {
269            data,
270            len: self.len,
271        }
272    }
273
274    /// Performs bitwise XOR with another bit vector.
275    #[must_use]
276    pub fn xor(&self, other: &Self) -> Self {
277        let len = self.len.min(other.len);
278        let num_words = (len + 63) / 64;
279
280        let data: Vec<u64> = self
281            .data
282            .iter()
283            .zip(&other.data)
284            .take(num_words)
285            .map(|(&a, &b)| a ^ b)
286            .collect();
287
288        Self { data, len }
289    }
290
291    /// Serializes to bytes.
292    pub fn to_bytes(&self) -> Vec<u8> {
293        let mut buf = Vec::with_capacity(4 + self.data.len() * 8);
294        buf.extend_from_slice(&(self.len as u32).to_le_bytes());
295        for &word in &self.data {
296            buf.extend_from_slice(&word.to_le_bytes());
297        }
298        buf
299    }
300
301    /// Deserializes from bytes.
302    pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
303        if bytes.len() < 4 {
304            return Err(io::Error::new(
305                io::ErrorKind::InvalidData,
306                "BitVector too short",
307            ));
308        }
309
310        let len = u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as usize;
311        let num_words = (len + 63) / 64;
312
313        if bytes.len() < 4 + num_words * 8 {
314            return Err(io::Error::new(
315                io::ErrorKind::InvalidData,
316                "BitVector truncated",
317            ));
318        }
319
320        let mut data = Vec::with_capacity(num_words);
321        for i in 0..num_words {
322            let offset = 4 + i * 8;
323            let word = u64::from_le_bytes(bytes[offset..offset + 8].try_into().unwrap());
324            data.push(word);
325        }
326
327        Ok(Self { data, len })
328    }
329}
330
331impl Default for BitVector {
332    fn default() -> Self {
333        Self::new()
334    }
335}
336
337impl FromIterator<bool> for BitVector {
338    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
339        let mut bitvec = BitVector::new();
340        for b in iter {
341            bitvec.push(b);
342        }
343        bitvec
344    }
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350
351    #[test]
352    fn test_bitvec_basic() {
353        let bools = vec![true, false, true, true, false, false, true, false];
354        let bitvec = BitVector::from_bools(&bools);
355
356        assert_eq!(bitvec.len(), 8);
357        for (i, &expected) in bools.iter().enumerate() {
358            assert_eq!(bitvec.get(i), Some(expected));
359        }
360    }
361
362    #[test]
363    fn test_bitvec_empty() {
364        let bitvec = BitVector::new();
365        assert!(bitvec.is_empty());
366        assert_eq!(bitvec.get(0), None);
367    }
368
369    #[test]
370    fn test_bitvec_push() {
371        let mut bitvec = BitVector::new();
372        bitvec.push(true);
373        bitvec.push(false);
374        bitvec.push(true);
375
376        assert_eq!(bitvec.len(), 3);
377        assert_eq!(bitvec.get(0), Some(true));
378        assert_eq!(bitvec.get(1), Some(false));
379        assert_eq!(bitvec.get(2), Some(true));
380    }
381
382    #[test]
383    fn test_bitvec_set() {
384        let mut bitvec = BitVector::zeros(8);
385
386        bitvec.set(0, true);
387        bitvec.set(3, true);
388        bitvec.set(7, true);
389
390        assert_eq!(bitvec.get(0), Some(true));
391        assert_eq!(bitvec.get(1), Some(false));
392        assert_eq!(bitvec.get(3), Some(true));
393        assert_eq!(bitvec.get(7), Some(true));
394    }
395
396    #[test]
397    fn test_bitvec_count() {
398        let bools = vec![true, false, true, true, false, false, true, false];
399        let bitvec = BitVector::from_bools(&bools);
400
401        assert_eq!(bitvec.count_ones(), 4);
402        assert_eq!(bitvec.count_zeros(), 4);
403    }
404
405    #[test]
406    fn test_bitvec_filled() {
407        let zeros = BitVector::zeros(100);
408        assert_eq!(zeros.count_ones(), 0);
409        assert_eq!(zeros.count_zeros(), 100);
410
411        let ones = BitVector::ones(100);
412        assert_eq!(ones.count_ones(), 100);
413        assert_eq!(ones.count_zeros(), 0);
414    }
415
416    #[test]
417    fn test_bitvec_to_bools() {
418        let original = vec![true, false, true, true, false];
419        let bitvec = BitVector::from_bools(&original);
420        let restored = bitvec.to_bools();
421        assert_eq!(original, restored);
422    }
423
424    #[test]
425    fn test_bitvec_large() {
426        // Test with more than 64 bits
427        let bools: Vec<bool> = (0..200).map(|i| i % 3 == 0).collect();
428        let bitvec = BitVector::from_bools(&bools);
429
430        assert_eq!(bitvec.len(), 200);
431        for (i, &expected) in bools.iter().enumerate() {
432            assert_eq!(bitvec.get(i), Some(expected), "Mismatch at index {}", i);
433        }
434    }
435
436    #[test]
437    fn test_bitvec_and() {
438        let a = BitVector::from_bools(&[true, true, false, false]);
439        let b = BitVector::from_bools(&[true, false, true, false]);
440        let result = a.and(&b);
441
442        assert_eq!(result.to_bools(), vec![true, false, false, false]);
443    }
444
445    #[test]
446    fn test_bitvec_or() {
447        let a = BitVector::from_bools(&[true, true, false, false]);
448        let b = BitVector::from_bools(&[true, false, true, false]);
449        let result = a.or(&b);
450
451        assert_eq!(result.to_bools(), vec![true, true, true, false]);
452    }
453
454    #[test]
455    fn test_bitvec_not() {
456        let a = BitVector::from_bools(&[true, false, true, false]);
457        let result = a.not();
458
459        // Note: NOT inverts all bits in the word, so we check the relevant bits
460        assert_eq!(result.get(0), Some(false));
461        assert_eq!(result.get(1), Some(true));
462        assert_eq!(result.get(2), Some(false));
463        assert_eq!(result.get(3), Some(true));
464    }
465
466    #[test]
467    fn test_bitvec_xor() {
468        let a = BitVector::from_bools(&[true, true, false, false]);
469        let b = BitVector::from_bools(&[true, false, true, false]);
470        let result = a.xor(&b);
471
472        assert_eq!(result.to_bools(), vec![false, true, true, false]);
473    }
474
475    #[test]
476    fn test_bitvec_serialization() {
477        let bools = vec![true, false, true, true, false, false, true, false];
478        let bitvec = BitVector::from_bools(&bools);
479        let bytes = bitvec.to_bytes();
480        let restored = BitVector::from_bytes(&bytes).unwrap();
481        assert_eq!(bitvec, restored);
482    }
483
484    #[test]
485    fn test_bitvec_compression_ratio() {
486        let bitvec = BitVector::zeros(64);
487        let ratio = bitvec.compression_ratio();
488        // 64 bools = 64 bytes original, 8 bytes compressed = 8x
489        assert!((ratio - 8.0).abs() < 0.1);
490    }
491
492    #[test]
493    fn test_bitvec_ones_iter() {
494        let bools = vec![true, false, true, true, false];
495        let bitvec = BitVector::from_bools(&bools);
496        let ones: Vec<usize> = bitvec.ones_iter().collect();
497        assert_eq!(ones, vec![0, 2, 3]);
498    }
499
500    #[test]
501    fn test_bitvec_zeros_iter() {
502        let bools = vec![true, false, true, true, false];
503        let bitvec = BitVector::from_bools(&bools);
504        let zeros: Vec<usize> = bitvec.zeros_iter().collect();
505        assert_eq!(zeros, vec![1, 4]);
506    }
507
508    #[test]
509    fn test_bitvec_from_iter() {
510        let bitvec: BitVector = vec![true, false, true].into_iter().collect();
511        assert_eq!(bitvec.len(), 3);
512        assert_eq!(bitvec.get(0), Some(true));
513        assert_eq!(bitvec.get(1), Some(false));
514        assert_eq!(bitvec.get(2), Some(true));
515    }
516}