Skip to main content

grafeo_core/codec/
bitvec.rs

1//! Stores booleans as individual bits - 8x smaller than `Vec<bool>`.
2//!
3//! Use this when you're tracking lots of boolean flags (like null bitmaps
4//! or set membership). Phase 3b: storage split into an immutable
5//! `BitVector` (refcounted [`Bytes`]) and a mutable [`BitVectorBuilder`]
6//! that produces one via [`freeze`](BitVectorBuilder::freeze). This is
7//! the Apache Arrow / Lance "Array + Builder" idiom; the immutable side
8//! supports zero-copy mmap-backing for Phase 3c, while the builder
9//! retains the cheap word-level mutations needed by succinct-index
10//! construction (rank/select, wavelet trees, Elias-Fano).
11//!
12//! # Example
13//!
14//! ```no_run
15//! # use grafeo_core::codec::bitvec::BitVector;
16//! let bools = vec![true, false, true, true, false, false, true, false];
17//! let bitvec = BitVector::from_bools(&bools);
18//! // Stored as: 0b01001101 (1 byte instead of 8)
19//!
20//! assert_eq!(bitvec.get(0), Some(true));
21//! assert_eq!(bitvec.get(1), Some(false));
22//! assert_eq!(bitvec.count_ones(), 4);
23//! ```
24
25use std::io;
26
27use bytes::{Bytes, BytesMut};
28use serde::de::{self, Deserialize, Deserializer, MapAccess, SeqAccess, Visitor};
29
30/// Immutable bitset stored in a refcounted [`Bytes`] buffer of LE u64
31/// words.
32///
33/// Supports bitwise combinators ([`and`](Self::and), [`or`](Self::or),
34/// [`not`](Self::not), [`xor`](Self::xor)) that *return new bitvectors*.
35/// Mutation lives on [`BitVectorBuilder`].
36#[derive(Debug, Clone)]
37pub struct BitVector {
38    /// LE u64 words concatenated, refcounted. Heap-owned and mmap-backed
39    /// columns share this type — only the constructor differs.
40    data: Bytes,
41    /// Number of bits stored.
42    len: usize,
43}
44
45impl PartialEq for BitVector {
46    fn eq(&self, other: &Self) -> bool {
47        self.len == other.len && self.data == other.data
48    }
49}
50
51impl Eq for BitVector {}
52
53impl serde::Serialize for BitVector {
54    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
55        use serde::ser::SerializeStruct;
56        // On-disk shape stays `{data: Vec<u64>, len: usize}` for backward
57        // compatibility with the v1 LpgStoreSection format. Internal
58        // storage is now `Bytes`; we materialize a `Vec<u64>` only for
59        // serialization.
60        let words: Vec<u64> = (0..self.word_count())
61            .map(|i| self.word_at(i).unwrap_or(0))
62            .collect();
63        let mut s = serializer.serialize_struct("BitVector", 2)?;
64        s.serialize_field("data", &words)?;
65        s.serialize_field("len", &self.len)?;
66        s.end()
67    }
68}
69
70impl<'de> Deserialize<'de> for BitVector {
71    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
72    where
73        D: Deserializer<'de>,
74    {
75        #[derive(serde::Deserialize)]
76        #[serde(field_identifier, rename_all = "lowercase")]
77        enum Field {
78            Data,
79            Len,
80        }
81
82        struct BitVectorVisitor;
83
84        impl<'de> Visitor<'de> for BitVectorVisitor {
85            type Value = BitVector;
86
87            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
88                formatter.write_str("struct BitVector with consistent data and len fields")
89            }
90
91            fn visit_seq<V>(self, mut seq: V) -> Result<BitVector, V::Error>
92            where
93                V: SeqAccess<'de>,
94            {
95                let data: Vec<u64> = seq
96                    .next_element()?
97                    .ok_or_else(|| de::Error::invalid_length(0, &self))?;
98                let len: usize = seq
99                    .next_element()?
100                    .ok_or_else(|| de::Error::invalid_length(1, &self))?;
101                validate_bitvec(len, &data).map_err(de::Error::custom)
102            }
103
104            fn visit_map<V>(self, mut map: V) -> Result<BitVector, V::Error>
105            where
106                V: MapAccess<'de>,
107            {
108                let mut data: Option<Vec<u64>> = None;
109                let mut len: Option<usize> = None;
110
111                while let Some(key) = map.next_key()? {
112                    match key {
113                        Field::Data => {
114                            if data.is_some() {
115                                return Err(de::Error::duplicate_field("data"));
116                            }
117                            data = Some(map.next_value()?);
118                        }
119                        Field::Len => {
120                            if len.is_some() {
121                                return Err(de::Error::duplicate_field("len"));
122                            }
123                            len = Some(map.next_value()?);
124                        }
125                    }
126                }
127
128                let data = data.ok_or_else(|| de::Error::missing_field("data"))?;
129                let len = len.ok_or_else(|| de::Error::missing_field("len"))?;
130                validate_bitvec(len, &data).map_err(de::Error::custom)
131            }
132        }
133
134        const FIELDS: &[&str] = &["data", "len"];
135        deserializer.deserialize_struct("BitVector", FIELDS, BitVectorVisitor)
136    }
137}
138
139/// Phase 6a: format-validation error returned by [`BitVector::from_mmap`].
140///
141/// Distinct from [`io::Error`] because format mismatches are a recoverable
142/// classification problem (caller may try a different format version),
143/// whereas I/O errors signal an unrelated underlying failure.
144#[derive(Debug, Clone, PartialEq, Eq)]
145pub enum BitVectorFormatError {
146    /// The provided byte buffer doesn't match the declared bit count.
147    ByteLengthMismatch {
148        /// Declared number of bits.
149        len: usize,
150        /// Bytes the caller should have provided.
151        expected_bytes: usize,
152        /// Bytes the caller actually provided.
153        actual_bytes: usize,
154    },
155}
156
157impl std::fmt::Display for BitVectorFormatError {
158    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
159        match self {
160            Self::ByteLengthMismatch {
161                len,
162                expected_bytes,
163                actual_bytes,
164            } => write!(
165                f,
166                "BitVector format mismatch: len={len} requires {expected_bytes} bytes (ceil(len/64)*8), got {actual_bytes}"
167            ),
168        }
169    }
170}
171
172impl std::error::Error for BitVectorFormatError {}
173
174/// Validates that `len` and `data` are consistent, returning a valid
175/// `BitVector` or an error message.
176fn validate_bitvec(len: usize, data: &[u64]) -> Result<BitVector, String> {
177    let expected_words = len.div_ceil(64);
178    if data.len() != expected_words {
179        return Err(format!(
180            "BitVector invariant violated: len={len} requires {expected_words} words, but data contains {} words",
181            data.len()
182        ));
183    }
184    Ok(BitVector {
185        data: words_to_bytes(data),
186        len,
187    })
188}
189
190/// Encodes `words` as little-endian bytes wrapped in a refcounted `Bytes`.
191fn words_to_bytes(words: &[u64]) -> Bytes {
192    let mut buf = BytesMut::with_capacity(words.len() * 8);
193    for &w in words {
194        buf.extend_from_slice(&w.to_le_bytes());
195    }
196    buf.freeze()
197}
198
199impl BitVector {
200    /// Reconstructs from pre-packed raw parts (legacy: `Vec<u64>` words).
201    ///
202    /// Used by section deserialization that holds words on the heap.
203    /// Phase 3c will add [`from_bytes_storage`](Self::from_bytes_storage)
204    /// for mmap-backed construction.
205    #[must_use]
206    pub fn from_raw_parts(data: Vec<u64>, len: usize) -> Self {
207        Self {
208            data: words_to_bytes(&data),
209            len,
210        }
211    }
212
213    /// Constructs from pre-encoded bytes (Phase 3c entry point).
214    ///
215    /// `data` must be `ceil(len / 64) * 8` bytes of little-endian
216    /// `u64` words. Used by the mmap path so a column can hold a
217    /// slice of mapped memory without copying.
218    #[must_use]
219    pub fn from_bytes_storage(data: Bytes, len: usize) -> Self {
220        Self { data, len }
221    }
222
223    /// Phase 6a: zero-copy mmap constructor.
224    ///
225    /// Adopts a refcounted [`Bytes`] slice (typically produced by
226    /// `Bytes::from_owner(mmap)` or a sub-slice thereof) as the
227    /// backing storage without copying. The on-disk contract is
228    /// little-endian `u64` words, with `len` total bits stored in
229    /// `ceil(len / 64) * 8` bytes.
230    ///
231    /// # Errors
232    ///
233    /// Returns an error if the byte length doesn't match the expected
234    /// `ceil(len / 64) * 8` for the declared bit count. This guards
235    /// against truncated or oversized mmap regions.
236    pub fn from_mmap(data: Bytes, len: usize) -> Result<Self, BitVectorFormatError> {
237        let expected_bytes = len.div_ceil(64) * 8;
238        if data.len() != expected_bytes {
239            return Err(BitVectorFormatError::ByteLengthMismatch {
240                len,
241                expected_bytes,
242                actual_bytes: data.len(),
243            });
244        }
245        Ok(Self { data, len })
246    }
247
248    /// Creates an empty bit vector.
249    #[must_use]
250    pub fn new() -> Self {
251        Self {
252            data: Bytes::new(),
253            len: 0,
254        }
255    }
256
257    /// Creates a bit vector from a slice of booleans.
258    ///
259    /// Routes through a [`BitVectorBuilder`] internally; Phase 3b
260    /// preserves the original convenience API for callers that already
261    /// have the bools materialized.
262    #[must_use]
263    pub fn from_bools(bools: &[bool]) -> Self {
264        let mut builder = BitVectorBuilder::with_capacity(bools.len());
265        for &b in bools {
266            builder.push(b);
267        }
268        builder.freeze()
269    }
270
271    /// Creates a bit vector with all bits set to the same value.
272    #[must_use]
273    pub fn filled(len: usize, value: bool) -> Self {
274        BitVectorBuilder::filled(len, value).freeze()
275    }
276
277    /// Creates a bit vector with all bits set to false.
278    #[must_use]
279    pub fn zeros(len: usize) -> Self {
280        Self::filled(len, false)
281    }
282
283    /// Creates a bit vector with all bits set to true.
284    #[must_use]
285    pub fn ones(len: usize) -> Self {
286        Self::filled(len, true)
287    }
288
289    /// Returns the number of bits.
290    #[must_use]
291    pub fn len(&self) -> usize {
292        self.len
293    }
294
295    /// Returns whether the bit vector is empty.
296    #[must_use]
297    pub fn is_empty(&self) -> bool {
298        self.len == 0
299    }
300
301    /// Gets the bit at the given index.
302    #[must_use]
303    pub fn get(&self, index: usize) -> Option<bool> {
304        if index >= self.len {
305            return None;
306        }
307        let word_idx = index / 64;
308        let bit_idx = index % 64;
309        let word = self.word_at(word_idx)?;
310        Some((word & (1 << bit_idx)) != 0)
311    }
312
313    /// Returns the number of `u64` words backing this bit vector.
314    #[must_use]
315    pub fn word_count(&self) -> usize {
316        self.data.len() / 8
317    }
318
319    /// Returns the word at `idx`, or `None` if out of range.
320    ///
321    /// Reads via `from_le_bytes`; supports unaligned `Bytes` slices
322    /// (e.g., mmap-backed sub-slices in Phase 3c).
323    #[must_use]
324    pub fn word_at(&self, idx: usize) -> Option<u64> {
325        let start = idx.checked_mul(8)?;
326        let end = start.checked_add(8)?;
327        let chunk: [u8; 8] = self.data.get(start..end)?.try_into().ok()?;
328        Some(u64::from_le_bytes(chunk))
329    }
330
331    /// Returns the raw byte storage.
332    ///
333    /// Phase 3c serializers use this to write the storage out directly
334    /// (the on-disk format already matches our LE word layout).
335    #[must_use]
336    pub fn data_bytes(&self) -> &Bytes {
337        &self.data
338    }
339
340    /// Returns the number of bits set to true.
341    #[must_use]
342    pub fn count_ones(&self) -> usize {
343        if self.is_empty() {
344            return 0;
345        }
346        let full_words = self.len / 64;
347        let remaining_bits = self.len % 64;
348
349        let mut count: usize = (0..full_words)
350            .map(|i| self.word_at(i).unwrap_or(0).count_ones() as usize)
351            .sum();
352
353        if remaining_bits > 0
354            && let Some(word) = self.word_at(full_words)
355        {
356            let mask = (1u64 << remaining_bits) - 1;
357            count += (word & mask).count_ones() as usize;
358        }
359
360        count
361    }
362
363    /// Returns the number of bits set to false.
364    #[must_use]
365    pub fn count_zeros(&self) -> usize {
366        self.len - self.count_ones()
367    }
368
369    /// Converts back to a `Vec<bool>`.
370    ///
371    /// # Panics
372    ///
373    /// Panics if internal storage is shorter than `len()` bits — an
374    /// invariant violation that would indicate a bug in
375    /// [`from_bytes_storage`](Self::from_bytes_storage) caller's
376    /// data validation.
377    #[must_use]
378    pub fn to_bools(&self) -> Vec<bool> {
379        (0..self.len)
380            .map(|i| self.get(i).expect("index within len"))
381            .collect()
382    }
383
384    /// Returns an iterator over the bits.
385    ///
386    /// # Panics
387    ///
388    /// Panics if internal storage is shorter than `len()` bits.
389    pub fn iter(&self) -> impl Iterator<Item = bool> + '_ {
390        (0..self.len).map(move |i| self.get(i).expect("index within len"))
391    }
392
393    /// Returns an iterator over indices where bits are true.
394    ///
395    /// # Panics
396    ///
397    /// Panics if internal storage is shorter than `len()` bits.
398    pub fn ones_iter(&self) -> impl Iterator<Item = usize> + '_ {
399        (0..self.len).filter(move |&i| self.get(i).expect("index within len"))
400    }
401
402    /// Returns an iterator over indices where bits are false.
403    ///
404    /// # Panics
405    ///
406    /// Panics if internal storage is shorter than `len()` bits.
407    pub fn zeros_iter(&self) -> impl Iterator<Item = usize> + '_ {
408        (0..self.len).filter(move |&i| !self.get(i).expect("index within len"))
409    }
410
411    /// Returns the compression ratio (original bytes / compressed bytes).
412    #[must_use]
413    pub fn compression_ratio(&self) -> f64 {
414        if self.is_empty() {
415            return 1.0;
416        }
417        let original_size = self.len;
418        let compressed_size = self.data.len();
419        if compressed_size == 0 {
420            return 1.0;
421        }
422        original_size as f64 / compressed_size as f64
423    }
424
425    /// Performs bitwise AND with another bit vector.
426    /// The result has the length of the shorter vector.
427    #[must_use]
428    pub fn and(&self, other: &Self) -> Self {
429        let len = self.len.min(other.len);
430        let num_words = len.div_ceil(64);
431        let words: Vec<u64> = (0..num_words)
432            .map(|i| self.word_at(i).unwrap_or(0) & other.word_at(i).unwrap_or(0))
433            .collect();
434        Self {
435            data: words_to_bytes(&words),
436            len,
437        }
438    }
439
440    /// Performs bitwise OR with another bit vector.
441    #[must_use]
442    pub fn or(&self, other: &Self) -> Self {
443        let len = self.len.min(other.len);
444        let num_words = len.div_ceil(64);
445        let words: Vec<u64> = (0..num_words)
446            .map(|i| self.word_at(i).unwrap_or(0) | other.word_at(i).unwrap_or(0))
447            .collect();
448        Self {
449            data: words_to_bytes(&words),
450            len,
451        }
452    }
453
454    /// Performs bitwise NOT.
455    #[must_use]
456    pub fn not(&self) -> Self {
457        let num_words = self.word_count();
458        let words: Vec<u64> = (0..num_words)
459            .map(|i| !self.word_at(i).unwrap_or(0))
460            .collect();
461        Self {
462            data: words_to_bytes(&words),
463            len: self.len,
464        }
465    }
466
467    /// Performs bitwise XOR with another bit vector.
468    #[must_use]
469    pub fn xor(&self, other: &Self) -> Self {
470        let len = self.len.min(other.len);
471        let num_words = len.div_ceil(64);
472        let words: Vec<u64> = (0..num_words)
473            .map(|i| self.word_at(i).unwrap_or(0) ^ other.word_at(i).unwrap_or(0))
474            .collect();
475        Self {
476            data: words_to_bytes(&words),
477            len,
478        }
479    }
480
481    /// Serializes to bytes.
482    ///
483    /// # Errors
484    ///
485    /// Returns `Err` if the bit-vector length exceeds `u32::MAX`.
486    pub fn to_bytes(&self) -> io::Result<Vec<u8>> {
487        let len_u32 = u32::try_from(self.len).map_err(|_| {
488            io::Error::new(
489                io::ErrorKind::InvalidInput,
490                format!(
491                    "BitVector length {} exceeds u32::MAX, cannot serialize",
492                    self.len
493                ),
494            )
495        })?;
496        let mut buf = Vec::with_capacity(4 + self.data.len());
497        buf.extend_from_slice(&len_u32.to_le_bytes());
498        // Storage is already LE bytes — append directly.
499        buf.extend_from_slice(&self.data);
500        Ok(buf)
501    }
502
503    /// Deserializes from bytes.
504    ///
505    /// # Errors
506    ///
507    /// Returns `Err` if the byte slice is too short or contains invalid data.
508    pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
509        if bytes.len() < 4 {
510            return Err(io::Error::new(
511                io::ErrorKind::InvalidData,
512                "BitVector too short",
513            ));
514        }
515
516        let len = u32::from_le_bytes(
517            bytes[0..4]
518                .try_into()
519                .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
520        ) as usize;
521        let num_words = len.div_ceil(64);
522        let needed = 4 + num_words * 8;
523
524        if bytes.len() < needed {
525            return Err(io::Error::new(
526                io::ErrorKind::InvalidData,
527                "BitVector truncated",
528            ));
529        }
530
531        Ok(Self {
532            data: Bytes::copy_from_slice(&bytes[4..needed]),
533            len,
534        })
535    }
536}
537
538impl Default for BitVector {
539    fn default() -> Self {
540        Self::new()
541    }
542}
543
544impl FromIterator<bool> for BitVector {
545    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
546        let mut builder = BitVectorBuilder::new();
547        for b in iter {
548            builder.push(b);
549        }
550        builder.freeze()
551    }
552}
553
554// ── BitVectorBuilder ─────────────────────────────────────────────────
555
556/// Mutable bit-vector builder. Word-level mutations stay cheap (`Vec<u64>`
557/// indexed access); call [`freeze`](Self::freeze) to produce an immutable
558/// [`BitVector`].
559///
560/// Mirrors the `BytesMut` → `Bytes` and Apache Arrow `BooleanBuilder` →
561/// `BooleanArray` pattern.
562#[derive(Debug, Clone)]
563pub struct BitVectorBuilder {
564    data: Vec<u64>,
565    len: usize,
566}
567
568impl BitVectorBuilder {
569    /// Creates an empty builder.
570    #[must_use]
571    pub fn new() -> Self {
572        Self {
573            data: Vec::new(),
574            len: 0,
575        }
576    }
577
578    /// Creates a builder with capacity for at least `bits` bits.
579    #[must_use]
580    pub fn with_capacity(bits: usize) -> Self {
581        let words = bits.div_ceil(64);
582        Self {
583            data: Vec::with_capacity(words),
584            len: 0,
585        }
586    }
587
588    /// Creates a builder with all bits set to `value`, length `len`.
589    #[must_use]
590    pub fn filled(len: usize, value: bool) -> Self {
591        let num_words = len.div_ceil(64);
592        let fill = if value { u64::MAX } else { 0 };
593        Self {
594            data: vec![fill; num_words],
595            len,
596        }
597    }
598
599    /// Creates a builder with all bits set to false.
600    #[must_use]
601    pub fn zeros(len: usize) -> Self {
602        Self::filled(len, false)
603    }
604
605    /// Creates a builder with all bits set to true.
606    #[must_use]
607    pub fn ones(len: usize) -> Self {
608        Self::filled(len, true)
609    }
610
611    /// Returns the current bit length.
612    #[must_use]
613    pub fn len(&self) -> usize {
614        self.len
615    }
616
617    /// Returns whether no bits have been pushed.
618    #[must_use]
619    pub fn is_empty(&self) -> bool {
620        self.len == 0
621    }
622
623    /// Gets the bit at the given index (read access during build).
624    #[must_use]
625    pub fn get(&self, index: usize) -> Option<bool> {
626        if index >= self.len {
627            return None;
628        }
629        let word_idx = index / 64;
630        let bit_idx = index % 64;
631        Some((self.data[word_idx] & (1 << bit_idx)) != 0)
632    }
633
634    /// Sets the bit at the given index.
635    ///
636    /// # Panics
637    ///
638    /// Panics if `index >= len()`.
639    pub fn set(&mut self, index: usize, value: bool) {
640        assert!(index < self.len, "Index out of bounds");
641        let word_idx = index / 64;
642        let bit_idx = index % 64;
643        if value {
644            self.data[word_idx] |= 1 << bit_idx;
645        } else {
646            self.data[word_idx] &= !(1 << bit_idx);
647        }
648    }
649
650    /// Appends a bit to the end.
651    pub fn push(&mut self, value: bool) {
652        let word_idx = self.len / 64;
653        let bit_idx = self.len % 64;
654        if word_idx >= self.data.len() {
655            self.data.push(0);
656        }
657        if value {
658            self.data[word_idx] |= 1 << bit_idx;
659        }
660        self.len += 1;
661    }
662
663    /// Freezes the builder into an immutable [`BitVector`].
664    #[must_use]
665    pub fn freeze(self) -> BitVector {
666        BitVector {
667            data: words_to_bytes(&self.data),
668            len: self.len,
669        }
670    }
671}
672
673impl Default for BitVectorBuilder {
674    fn default() -> Self {
675        Self::new()
676    }
677}
678
679impl FromIterator<bool> for BitVectorBuilder {
680    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
681        let mut builder = BitVectorBuilder::new();
682        for b in iter {
683            builder.push(b);
684        }
685        builder
686    }
687}
688
689#[cfg(test)]
690mod tests {
691    use super::*;
692
693    #[test]
694    fn test_bitvec_basic() {
695        let bools = vec![true, false, true, true, false, false, true, false];
696        let bitvec = BitVector::from_bools(&bools);
697
698        assert_eq!(bitvec.len(), 8);
699        for (i, &expected) in bools.iter().enumerate() {
700            assert_eq!(bitvec.get(i), Some(expected));
701        }
702    }
703
704    #[test]
705    fn test_bitvec_empty() {
706        let bitvec = BitVector::new();
707        assert!(bitvec.is_empty());
708        assert_eq!(bitvec.get(0), None);
709    }
710
711    #[test]
712    fn test_bitvec_builder_push() {
713        let mut builder = BitVectorBuilder::new();
714        builder.push(true);
715        builder.push(false);
716        builder.push(true);
717        let bitvec = builder.freeze();
718
719        assert_eq!(bitvec.len(), 3);
720        assert_eq!(bitvec.get(0), Some(true));
721        assert_eq!(bitvec.get(1), Some(false));
722        assert_eq!(bitvec.get(2), Some(true));
723    }
724
725    #[test]
726    fn test_bitvec_builder_set() {
727        let mut builder = BitVectorBuilder::zeros(8);
728
729        builder.set(0, true);
730        builder.set(3, true);
731        builder.set(7, true);
732        let bitvec = builder.freeze();
733
734        assert_eq!(bitvec.get(0), Some(true));
735        assert_eq!(bitvec.get(1), Some(false));
736        assert_eq!(bitvec.get(3), Some(true));
737        assert_eq!(bitvec.get(7), Some(true));
738    }
739
740    #[test]
741    fn test_bitvec_count() {
742        let bools = vec![true, false, true, true, false, false, true, false];
743        let bitvec = BitVector::from_bools(&bools);
744
745        assert_eq!(bitvec.count_ones(), 4);
746        assert_eq!(bitvec.count_zeros(), 4);
747    }
748
749    #[test]
750    fn test_bitvec_filled() {
751        let zeros = BitVector::zeros(100);
752        assert_eq!(zeros.count_ones(), 0);
753        assert_eq!(zeros.count_zeros(), 100);
754
755        let ones = BitVector::ones(100);
756        assert_eq!(ones.count_ones(), 100);
757        assert_eq!(ones.count_zeros(), 0);
758    }
759
760    #[test]
761    fn test_bitvec_to_bools() {
762        let original = vec![true, false, true, true, false];
763        let bitvec = BitVector::from_bools(&original);
764        let restored = bitvec.to_bools();
765        assert_eq!(original, restored);
766    }
767
768    #[test]
769    fn test_bitvec_large() {
770        // Test with more than 64 bits
771        let bools: Vec<bool> = (0..200).map(|i| i % 3 == 0).collect();
772        let bitvec = BitVector::from_bools(&bools);
773
774        assert_eq!(bitvec.len(), 200);
775        for (i, &expected) in bools.iter().enumerate() {
776            assert_eq!(bitvec.get(i), Some(expected), "Mismatch at index {}", i);
777        }
778    }
779
780    #[test]
781    fn test_bitvec_and() {
782        let a = BitVector::from_bools(&[true, true, false, false]);
783        let b = BitVector::from_bools(&[true, false, true, false]);
784        let result = a.and(&b);
785
786        assert_eq!(result.to_bools(), vec![true, false, false, false]);
787    }
788
789    #[test]
790    fn test_bitvec_or() {
791        let a = BitVector::from_bools(&[true, true, false, false]);
792        let b = BitVector::from_bools(&[true, false, true, false]);
793        let result = a.or(&b);
794
795        assert_eq!(result.to_bools(), vec![true, true, true, false]);
796    }
797
798    #[test]
799    fn test_bitvec_not() {
800        let a = BitVector::from_bools(&[true, false, true, false]);
801        let result = a.not();
802
803        // Note: NOT inverts all bits in the word, so we check the relevant bits
804        assert_eq!(result.get(0), Some(false));
805        assert_eq!(result.get(1), Some(true));
806        assert_eq!(result.get(2), Some(false));
807        assert_eq!(result.get(3), Some(true));
808    }
809
810    #[test]
811    fn test_bitvec_xor() {
812        let a = BitVector::from_bools(&[true, true, false, false]);
813        let b = BitVector::from_bools(&[true, false, true, false]);
814        let result = a.xor(&b);
815
816        assert_eq!(result.to_bools(), vec![false, true, true, false]);
817    }
818
819    #[test]
820    fn test_bitvec_serialization() {
821        let bools = vec![true, false, true, true, false, false, true, false];
822        let bitvec = BitVector::from_bools(&bools);
823        let bytes = bitvec.to_bytes().unwrap();
824        let restored = BitVector::from_bytes(&bytes).unwrap();
825        assert_eq!(bitvec, restored);
826    }
827
828    #[test]
829    fn test_bitvec_compression_ratio() {
830        let bitvec = BitVector::zeros(64);
831        let ratio = bitvec.compression_ratio();
832        // 64 bools = 64 bytes original, 8 bytes compressed = 8x
833        assert!((ratio - 8.0).abs() < 0.1);
834    }
835
836    #[test]
837    fn test_bitvec_ones_iter() {
838        let bools = vec![true, false, true, true, false];
839        let bitvec = BitVector::from_bools(&bools);
840        let ones: Vec<usize> = bitvec.ones_iter().collect();
841        assert_eq!(ones, vec![0, 2, 3]);
842    }
843
844    #[test]
845    fn test_bitvec_zeros_iter() {
846        let bools = vec![true, false, true, true, false];
847        let bitvec = BitVector::from_bools(&bools);
848        let zeros: Vec<usize> = bitvec.zeros_iter().collect();
849        assert_eq!(zeros, vec![1, 4]);
850    }
851
852    #[test]
853    fn test_bitvec_from_iter() {
854        let bitvec: BitVector = vec![true, false, true].into_iter().collect();
855        assert_eq!(bitvec.len(), 3);
856        assert_eq!(bitvec.get(0), Some(true));
857        assert_eq!(bitvec.get(1), Some(false));
858        assert_eq!(bitvec.get(2), Some(true));
859    }
860
861    #[test]
862    fn test_bitvec_deserialize_roundtrip() {
863        let bools = vec![true, false, true, true, false, false, true, false];
864        let original = BitVector::from_bools(&bools);
865        let json = serde_json::to_string(&original).unwrap();
866        let restored: BitVector = serde_json::from_str(&json).unwrap();
867        assert_eq!(original, restored);
868    }
869
870    #[test]
871    fn test_bitvec_deserialize_invalid_len_too_large() {
872        // len=200 requires ceil(200/64) = 4 words, but we only provide 1
873        let json = r#"{"data":[42],"len":200}"#;
874        let result: Result<BitVector, _> = serde_json::from_str(json);
875        assert!(result.is_err());
876        let err_msg = result.unwrap_err().to_string();
877        assert!(
878            err_msg.contains("invariant violated"),
879            "expected invariant error, got: {err_msg}"
880        );
881    }
882
883    #[test]
884    fn test_bitvec_deserialize_invalid_len_data_mismatch() {
885        // len=10 requires ceil(10/64) = 1 word, but we provide 3
886        let json = r#"{"data":[1,2,3],"len":10}"#;
887        let result: Result<BitVector, _> = serde_json::from_str(json);
888        assert!(result.is_err());
889        let err_msg = result.unwrap_err().to_string();
890        assert!(
891            err_msg.contains("invariant violated"),
892            "expected invariant error, got: {err_msg}"
893        );
894    }
895
896    #[test]
897    fn test_bitvec_deserialize_valid_edge_cases() {
898        // len=0 with empty data
899        let json = r#"{"data":[],"len":0}"#;
900        let bv: BitVector = serde_json::from_str(json).unwrap();
901        assert_eq!(bv.len(), 0);
902        assert!(bv.is_empty());
903
904        // len=1 with one u64
905        let json = r#"{"data":[1],"len":1}"#;
906        let bv: BitVector = serde_json::from_str(json).unwrap();
907        assert_eq!(bv.len(), 1);
908        assert_eq!(bv.get(0), Some(true));
909
910        // len=64 with one u64 (exactly fills one word)
911        let json = r#"{"data":[18446744073709551615],"len":64}"#;
912        let bv: BitVector = serde_json::from_str(json).unwrap();
913        assert_eq!(bv.len(), 64);
914        assert_eq!(bv.count_ones(), 64);
915    }
916
917    // ── Phase 3b: Bytes-backed storage ────────────────────────────────
918
919    #[test]
920    fn test_bitvec_word_at_returns_words_from_bytes() {
921        // Pattern: alternating bits → 0xAAAAAAAAAAAAAAAA in word 0.
922        let bools: Vec<bool> = (0..64).map(|i| i % 2 == 1).collect();
923        let bv = BitVector::from_bools(&bools);
924        assert_eq!(bv.word_count(), 1);
925        assert_eq!(bv.word_at(0), Some(0xAAAA_AAAA_AAAA_AAAA));
926    }
927
928    #[test]
929    fn test_bitvec_word_at_out_of_range_returns_none() {
930        let bv = BitVector::from_bools(&[true, false, true]);
931        assert!(bv.word_at(bv.word_count()).is_none());
932    }
933
934    #[test]
935    fn test_bitvec_data_bytes_length_matches_word_count() {
936        let bools: Vec<bool> = (0..200).map(|i| i % 3 == 0).collect();
937        let bv = BitVector::from_bools(&bools);
938        assert_eq!(bv.data_bytes().len(), bv.word_count() * 8);
939        // Round-trip via get(): every bit recoverable.
940        for (i, &b) in bools.iter().enumerate() {
941            assert_eq!(bv.get(i), Some(b));
942        }
943    }
944
945    #[test]
946    fn test_bitvec_serde_round_trip_with_bytes_storage() {
947        let bools: Vec<bool> = (0..130).map(|i| i % 7 == 0).collect();
948        let bv = BitVector::from_bools(&bools);
949        // Serialize to JSON, deserialize back: the on-disk shape stays
950        // `Vec<u64>` even though internal storage is now `Bytes`.
951        let json = serde_json::to_string(&bv).unwrap();
952        let bv2: BitVector = serde_json::from_str(&json).unwrap();
953        assert_eq!(bv.len(), bv2.len());
954        for (i, &b) in bools.iter().enumerate() {
955            assert_eq!(bv2.get(i), Some(b));
956        }
957    }
958
959    // ── Phase 6a: from_mmap zero-copy constructor ─────────────────────
960
961    /// Round-trip via `from_mmap`: take an existing BitVector's bytes,
962    /// adopt them as a fresh BitVector, verify all bits.
963    #[test]
964    fn alix_from_mmap_round_trips_via_data_bytes() {
965        let bools: Vec<bool> = (0..200).map(|i| i % 5 == 0).collect();
966        let original = BitVector::from_bools(&bools);
967        let bytes = original.data_bytes().clone();
968        let mmapped = BitVector::from_mmap(bytes, original.len()).expect("from_mmap");
969        assert_eq!(mmapped.len(), original.len());
970        for (i, &b) in bools.iter().enumerate() {
971            assert_eq!(mmapped.get(i), Some(b), "bit {i}");
972        }
973        // word-by-word equivalence locks the LE contract.
974        for i in 0..original.word_count() {
975            assert_eq!(mmapped.word_at(i), original.word_at(i));
976        }
977    }
978
979    /// Truncated buffer is rejected — guards against partial mmap regions.
980    #[test]
981    fn gus_from_mmap_rejects_short_buffer() {
982        // len=200 needs ceil(200/64)*8 = 32 bytes; provide only 16.
983        let short = bytes::Bytes::from(vec![0u8; 16]);
984        let result = BitVector::from_mmap(short, 200);
985        assert!(result.is_err());
986        let err = result.unwrap_err();
987        assert!(matches!(
988            err,
989            BitVectorFormatError::ByteLengthMismatch {
990                len: 200,
991                expected_bytes: 32,
992                actual_bytes: 16,
993            }
994        ));
995    }
996
997    /// Oversized buffer is also rejected — symmetric guard.
998    #[test]
999    fn vincent_from_mmap_rejects_long_buffer() {
1000        // len=10 needs ceil(10/64)*8 = 8 bytes; provide 16.
1001        let long = bytes::Bytes::from(vec![0u8; 16]);
1002        let result = BitVector::from_mmap(long, 10);
1003        assert!(result.is_err());
1004    }
1005
1006    /// Empty bitvector via from_mmap — len=0, 0 bytes.
1007    #[test]
1008    fn jules_from_mmap_handles_empty() {
1009        let empty = bytes::Bytes::new();
1010        let bv = BitVector::from_mmap(empty, 0).expect("empty mmap");
1011        assert_eq!(bv.len(), 0);
1012        assert!(bv.is_empty());
1013    }
1014
1015    /// Zero-copy assertion: mmap-backed BitVector shares the underlying
1016    /// allocation with the source `Bytes` (no heap copy).
1017    ///
1018    /// We verify by holding both Arcs and asserting `Bytes::as_ptr` returns
1019    /// the same address, then mutating one side is impossible (Bytes is
1020    /// immutable) — the test confirms shape, not write-isolation.
1021    #[test]
1022    fn mia_from_mmap_is_zero_copy() {
1023        let original = BitVector::from_bools(&[true; 1024]);
1024        let source_bytes = original.data_bytes().clone();
1025        let source_ptr = source_bytes.as_ptr();
1026
1027        let mmapped = BitVector::from_mmap(source_bytes, 1024).expect("from_mmap");
1028        // The mmapped bitvector's storage points at the same address —
1029        // no allocation occurred.
1030        assert_eq!(
1031            mmapped.data_bytes().as_ptr(),
1032            source_ptr,
1033            "from_mmap must NOT allocate; Bytes refcount sharing required"
1034        );
1035    }
1036
1037    /// LE word contract: a hand-crafted LE byte pattern decodes to the
1038    /// expected u64 words. Guards against accidental endianness drift.
1039    #[test]
1040    fn shosanna_from_mmap_locks_le_word_contract() {
1041        // Bytes [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08] in LE
1042        // decode to 0x0807060504030201.
1043        let bytes = bytes::Bytes::from(vec![
1044            0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, // word 0
1045            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // word 1: all-ones
1046        ]);
1047        let bv = BitVector::from_mmap(bytes, 128).expect("from_mmap");
1048        assert_eq!(bv.word_at(0), Some(0x0807_0605_0403_0201));
1049        assert_eq!(bv.word_at(1), Some(0xFFFF_FFFF_FFFF_FFFF));
1050    }
1051}