Skip to main content

grafeo_core/codec/
bitvec.rs

1//! Stores booleans as individual bits - 8x smaller than `Vec<bool>`.
2//!
3//! Use this when you're tracking lots of boolean flags (like "visited" markers
4//! in graph traversals, or null bitmaps). Backed by `Vec<u64>` so bitwise
5//! operations like AND/OR/XOR stay cache-friendly.
6//!
7//! # Example
8//!
9//! ```no_run
10//! # use grafeo_core::codec::bitvec::BitVector;
11//! let bools = vec![true, false, true, true, false, false, true, false];
12//! let bitvec = BitVector::from_bools(&bools);
13//! // Stored as: 0b01001101 (1 byte instead of 8)
14//!
15//! assert_eq!(bitvec.get(0), Some(true));
16//! assert_eq!(bitvec.get(1), Some(false));
17//! assert_eq!(bitvec.count_ones(), 4);
18//! ```
19
20use std::io;
21
22/// Stores booleans as individual bits - 8x smaller than `Vec<bool>`.
23///
24/// Supports bitwise operations ([`and`](Self::and), [`or`](Self::or),
25/// [`not`](Self::not)) for combining filter results efficiently.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct BitVector {
28    /// Packed bits (little-endian within each word).
29    data: Vec<u64>,
30    /// Number of bits stored.
31    len: usize,
32}
33
34impl BitVector {
35    /// Creates an empty bit vector.
36    #[must_use]
37    pub fn new() -> Self {
38        Self {
39            data: Vec::new(),
40            len: 0,
41        }
42    }
43
44    /// Creates a bit vector with the specified capacity (in bits).
45    #[must_use]
46    pub fn with_capacity(bits: usize) -> Self {
47        let words = (bits + 63) / 64;
48        Self {
49            data: Vec::with_capacity(words),
50            len: 0,
51        }
52    }
53
54    /// Creates a bit vector from a slice of booleans.
55    #[must_use]
56    pub fn from_bools(bools: &[bool]) -> Self {
57        let num_words = (bools.len() + 63) / 64;
58        let mut data = vec![0u64; num_words];
59
60        for (i, &b) in bools.iter().enumerate() {
61            if b {
62                let word_idx = i / 64;
63                let bit_idx = i % 64;
64                data[word_idx] |= 1 << bit_idx;
65            }
66        }
67
68        Self {
69            data,
70            len: bools.len(),
71        }
72    }
73
74    /// Creates a bit vector with all bits set to the same value.
75    #[must_use]
76    pub fn filled(len: usize, value: bool) -> Self {
77        let num_words = (len + 63) / 64;
78        let fill = if value { u64::MAX } else { 0 };
79        let data = vec![fill; num_words];
80
81        Self { data, len }
82    }
83
84    /// Creates a bit vector with all bits set to false (0).
85    #[must_use]
86    pub fn zeros(len: usize) -> Self {
87        Self::filled(len, false)
88    }
89
90    /// Creates a bit vector with all bits set to true (1).
91    #[must_use]
92    pub fn ones(len: usize) -> Self {
93        Self::filled(len, true)
94    }
95
96    /// Returns the number of bits.
97    #[must_use]
98    pub fn len(&self) -> usize {
99        self.len
100    }
101
102    /// Returns whether the bit vector is empty.
103    #[must_use]
104    pub fn is_empty(&self) -> bool {
105        self.len == 0
106    }
107
108    /// Gets the bit at the given index.
109    #[must_use]
110    pub fn get(&self, index: usize) -> Option<bool> {
111        if index >= self.len {
112            return None;
113        }
114
115        let word_idx = index / 64;
116        let bit_idx = index % 64;
117        Some((self.data[word_idx] & (1 << bit_idx)) != 0)
118    }
119
120    /// Sets the bit at the given index.
121    ///
122    /// # Panics
123    ///
124    /// Panics if index >= len.
125    pub fn set(&mut self, index: usize, value: bool) {
126        assert!(index < self.len, "Index out of bounds");
127
128        let word_idx = index / 64;
129        let bit_idx = index % 64;
130
131        if value {
132            self.data[word_idx] |= 1 << bit_idx;
133        } else {
134            self.data[word_idx] &= !(1 << bit_idx);
135        }
136    }
137
138    /// Appends a bit to the end.
139    pub fn push(&mut self, value: bool) {
140        let word_idx = self.len / 64;
141        let bit_idx = self.len % 64;
142
143        if word_idx >= self.data.len() {
144            self.data.push(0);
145        }
146
147        if value {
148            self.data[word_idx] |= 1 << bit_idx;
149        }
150
151        self.len += 1;
152    }
153
154    /// Returns the number of bits set to true.
155    #[must_use]
156    pub fn count_ones(&self) -> usize {
157        if self.is_empty() {
158            return 0;
159        }
160
161        let full_words = self.len / 64;
162        let remaining_bits = self.len % 64;
163
164        let mut count: usize = self.data[..full_words]
165            .iter()
166            .map(|&w| w.count_ones() as usize)
167            .sum();
168
169        if remaining_bits > 0 && full_words < self.data.len() {
170            let mask = (1u64 << remaining_bits) - 1;
171            count += (self.data[full_words] & mask).count_ones() as usize;
172        }
173
174        count
175    }
176
177    /// Returns the number of bits set to false.
178    #[must_use]
179    pub fn count_zeros(&self) -> usize {
180        self.len - self.count_ones()
181    }
182
183    /// Converts back to a `Vec<bool>`.
184    ///
185    /// # Panics
186    ///
187    /// Panics if an internal index is out of bounds (invariant violation).
188    #[must_use]
189    pub fn to_bools(&self) -> Vec<bool> {
190        (0..self.len)
191            .map(|i| self.get(i).expect("index within len"))
192            .collect()
193    }
194
195    /// Returns an iterator over the bits.
196    ///
197    /// # Panics
198    ///
199    /// Panics if an internal index is out of bounds (invariant violation).
200    pub fn iter(&self) -> impl Iterator<Item = bool> + '_ {
201        (0..self.len).map(move |i| self.get(i).expect("index within len"))
202    }
203
204    /// Returns an iterator over indices where bits are true.
205    ///
206    /// # Panics
207    ///
208    /// Panics if an internal index is out of bounds (invariant violation).
209    pub fn ones_iter(&self) -> impl Iterator<Item = usize> + '_ {
210        (0..self.len).filter(move |&i| self.get(i).expect("index within len"))
211    }
212
213    /// Returns an iterator over indices where bits are false.
214    ///
215    /// # Panics
216    ///
217    /// Panics if an internal index is out of bounds (invariant violation).
218    pub fn zeros_iter(&self) -> impl Iterator<Item = usize> + '_ {
219        (0..self.len).filter(move |&i| !self.get(i).expect("index within len"))
220    }
221
222    /// Returns the raw data.
223    #[must_use]
224    pub fn data(&self) -> &[u64] {
225        &self.data
226    }
227
228    /// Returns the compression ratio (original bytes / compressed bytes).
229    #[must_use]
230    pub fn compression_ratio(&self) -> f64 {
231        if self.is_empty() {
232            return 1.0;
233        }
234
235        // Original: 1 byte per bool
236        let original_size = self.len;
237        // Compressed: ceil(len / 8) bytes
238        let compressed_size = self.data.len() * 8;
239
240        if compressed_size == 0 {
241            return 1.0;
242        }
243
244        original_size as f64 / compressed_size as f64
245    }
246
247    /// Performs bitwise AND with another bit vector.
248    ///
249    /// The result has the length of the shorter vector.
250    #[must_use]
251    pub fn and(&self, other: &Self) -> Self {
252        let len = self.len.min(other.len);
253        let num_words = (len + 63) / 64;
254
255        let data: Vec<u64> = self
256            .data
257            .iter()
258            .zip(&other.data)
259            .take(num_words)
260            .map(|(&a, &b)| a & b)
261            .collect();
262
263        Self { data, len }
264    }
265
266    /// Performs bitwise OR with another bit vector.
267    ///
268    /// The result has the length of the shorter vector.
269    #[must_use]
270    pub fn or(&self, other: &Self) -> Self {
271        let len = self.len.min(other.len);
272        let num_words = (len + 63) / 64;
273
274        let data: Vec<u64> = self
275            .data
276            .iter()
277            .zip(&other.data)
278            .take(num_words)
279            .map(|(&a, &b)| a | b)
280            .collect();
281
282        Self { data, len }
283    }
284
285    /// Performs bitwise NOT.
286    #[must_use]
287    pub fn not(&self) -> Self {
288        let data: Vec<u64> = self.data.iter().map(|&w| !w).collect();
289        Self {
290            data,
291            len: self.len,
292        }
293    }
294
295    /// Performs bitwise XOR with another bit vector.
296    #[must_use]
297    pub fn xor(&self, other: &Self) -> Self {
298        let len = self.len.min(other.len);
299        let num_words = (len + 63) / 64;
300
301        let data: Vec<u64> = self
302            .data
303            .iter()
304            .zip(&other.data)
305            .take(num_words)
306            .map(|(&a, &b)| a ^ b)
307            .collect();
308
309        Self { data, len }
310    }
311
312    /// Serializes to bytes.
313    pub fn to_bytes(&self) -> Vec<u8> {
314        let mut buf = Vec::with_capacity(4 + self.data.len() * 8);
315        buf.extend_from_slice(&(self.len as u32).to_le_bytes());
316        for &word in &self.data {
317            buf.extend_from_slice(&word.to_le_bytes());
318        }
319        buf
320    }
321
322    /// Deserializes from bytes.
323    ///
324    /// # Errors
325    ///
326    /// Returns `Err` if the byte slice is too short or contains invalid data.
327    pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
328        if bytes.len() < 4 {
329            return Err(io::Error::new(
330                io::ErrorKind::InvalidData,
331                "BitVector too short",
332            ));
333        }
334
335        let len = u32::from_le_bytes(
336            bytes[0..4]
337                .try_into()
338                .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
339        ) as usize;
340        let num_words = (len + 63) / 64;
341
342        if bytes.len() < 4 + num_words * 8 {
343            return Err(io::Error::new(
344                io::ErrorKind::InvalidData,
345                "BitVector truncated",
346            ));
347        }
348
349        let mut data = Vec::with_capacity(num_words);
350        for i in 0..num_words {
351            let offset = 4 + i * 8;
352            let word = u64::from_le_bytes(
353                bytes[offset..offset + 8]
354                    .try_into()
355                    .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
356            );
357            data.push(word);
358        }
359
360        Ok(Self { data, len })
361    }
362}
363
364impl Default for BitVector {
365    fn default() -> Self {
366        Self::new()
367    }
368}
369
370impl FromIterator<bool> for BitVector {
371    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
372        let mut bitvec = BitVector::new();
373        for b in iter {
374            bitvec.push(b);
375        }
376        bitvec
377    }
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383
384    #[test]
385    fn test_bitvec_basic() {
386        let bools = vec![true, false, true, true, false, false, true, false];
387        let bitvec = BitVector::from_bools(&bools);
388
389        assert_eq!(bitvec.len(), 8);
390        for (i, &expected) in bools.iter().enumerate() {
391            assert_eq!(bitvec.get(i), Some(expected));
392        }
393    }
394
395    #[test]
396    fn test_bitvec_empty() {
397        let bitvec = BitVector::new();
398        assert!(bitvec.is_empty());
399        assert_eq!(bitvec.get(0), None);
400    }
401
402    #[test]
403    fn test_bitvec_push() {
404        let mut bitvec = BitVector::new();
405        bitvec.push(true);
406        bitvec.push(false);
407        bitvec.push(true);
408
409        assert_eq!(bitvec.len(), 3);
410        assert_eq!(bitvec.get(0), Some(true));
411        assert_eq!(bitvec.get(1), Some(false));
412        assert_eq!(bitvec.get(2), Some(true));
413    }
414
415    #[test]
416    fn test_bitvec_set() {
417        let mut bitvec = BitVector::zeros(8);
418
419        bitvec.set(0, true);
420        bitvec.set(3, true);
421        bitvec.set(7, true);
422
423        assert_eq!(bitvec.get(0), Some(true));
424        assert_eq!(bitvec.get(1), Some(false));
425        assert_eq!(bitvec.get(3), Some(true));
426        assert_eq!(bitvec.get(7), Some(true));
427    }
428
429    #[test]
430    fn test_bitvec_count() {
431        let bools = vec![true, false, true, true, false, false, true, false];
432        let bitvec = BitVector::from_bools(&bools);
433
434        assert_eq!(bitvec.count_ones(), 4);
435        assert_eq!(bitvec.count_zeros(), 4);
436    }
437
438    #[test]
439    fn test_bitvec_filled() {
440        let zeros = BitVector::zeros(100);
441        assert_eq!(zeros.count_ones(), 0);
442        assert_eq!(zeros.count_zeros(), 100);
443
444        let ones = BitVector::ones(100);
445        assert_eq!(ones.count_ones(), 100);
446        assert_eq!(ones.count_zeros(), 0);
447    }
448
449    #[test]
450    fn test_bitvec_to_bools() {
451        let original = vec![true, false, true, true, false];
452        let bitvec = BitVector::from_bools(&original);
453        let restored = bitvec.to_bools();
454        assert_eq!(original, restored);
455    }
456
457    #[test]
458    fn test_bitvec_large() {
459        // Test with more than 64 bits
460        let bools: Vec<bool> = (0..200).map(|i| i % 3 == 0).collect();
461        let bitvec = BitVector::from_bools(&bools);
462
463        assert_eq!(bitvec.len(), 200);
464        for (i, &expected) in bools.iter().enumerate() {
465            assert_eq!(bitvec.get(i), Some(expected), "Mismatch at index {}", i);
466        }
467    }
468
469    #[test]
470    fn test_bitvec_and() {
471        let a = BitVector::from_bools(&[true, true, false, false]);
472        let b = BitVector::from_bools(&[true, false, true, false]);
473        let result = a.and(&b);
474
475        assert_eq!(result.to_bools(), vec![true, false, false, false]);
476    }
477
478    #[test]
479    fn test_bitvec_or() {
480        let a = BitVector::from_bools(&[true, true, false, false]);
481        let b = BitVector::from_bools(&[true, false, true, false]);
482        let result = a.or(&b);
483
484        assert_eq!(result.to_bools(), vec![true, true, true, false]);
485    }
486
487    #[test]
488    fn test_bitvec_not() {
489        let a = BitVector::from_bools(&[true, false, true, false]);
490        let result = a.not();
491
492        // Note: NOT inverts all bits in the word, so we check the relevant bits
493        assert_eq!(result.get(0), Some(false));
494        assert_eq!(result.get(1), Some(true));
495        assert_eq!(result.get(2), Some(false));
496        assert_eq!(result.get(3), Some(true));
497    }
498
499    #[test]
500    fn test_bitvec_xor() {
501        let a = BitVector::from_bools(&[true, true, false, false]);
502        let b = BitVector::from_bools(&[true, false, true, false]);
503        let result = a.xor(&b);
504
505        assert_eq!(result.to_bools(), vec![false, true, true, false]);
506    }
507
508    #[test]
509    fn test_bitvec_serialization() {
510        let bools = vec![true, false, true, true, false, false, true, false];
511        let bitvec = BitVector::from_bools(&bools);
512        let bytes = bitvec.to_bytes();
513        let restored = BitVector::from_bytes(&bytes).unwrap();
514        assert_eq!(bitvec, restored);
515    }
516
517    #[test]
518    fn test_bitvec_compression_ratio() {
519        let bitvec = BitVector::zeros(64);
520        let ratio = bitvec.compression_ratio();
521        // 64 bools = 64 bytes original, 8 bytes compressed = 8x
522        assert!((ratio - 8.0).abs() < 0.1);
523    }
524
525    #[test]
526    fn test_bitvec_ones_iter() {
527        let bools = vec![true, false, true, true, false];
528        let bitvec = BitVector::from_bools(&bools);
529        let ones: Vec<usize> = bitvec.ones_iter().collect();
530        assert_eq!(ones, vec![0, 2, 3]);
531    }
532
533    #[test]
534    fn test_bitvec_zeros_iter() {
535        let bools = vec![true, false, true, true, false];
536        let bitvec = BitVector::from_bools(&bools);
537        let zeros: Vec<usize> = bitvec.zeros_iter().collect();
538        assert_eq!(zeros, vec![1, 4]);
539    }
540
541    #[test]
542    fn test_bitvec_from_iter() {
543        let bitvec: BitVector = vec![true, false, true].into_iter().collect();
544        assert_eq!(bitvec.len(), 3);
545        assert_eq!(bitvec.get(0), Some(true));
546        assert_eq!(bitvec.get(1), Some(false));
547        assert_eq!(bitvec.get(2), Some(true));
548    }
549}