vers_vecs/bit_vec/
mod.rs

1//! This module contains a simple [bit vector][BitVec] implementation with no overhead and a fast succinct
2//! bit vector implementation with [rank and select queries][fast_rs_vec::RsVec].
3
4use crate::bit_vec::mask::MaskedBitVec;
5use crate::util::impl_vector_iterator;
6use std::cmp::min;
7use std::hash::{Hash, Hasher};
8use std::mem::size_of;
9
10pub mod fast_rs_vec;
11
12pub mod sparse;
13
14pub mod mask;
15
16/// Size of a word in bitvectors. All vectors operate on 64-bit words.
17const WORD_SIZE: usize = 64;
18
19/// Type alias for masked bitvectors that implement a simple bitwise binary operation.
20/// The first lifetime is for the bit vector that is being masked, the second lifetime is for the
21/// mask.
22pub type BitMask<'s, 'b> = MaskedBitVec<'s, 'b, fn(u64, u64) -> u64>;
23
24/// A simple bit vector that does not support rank and select queries.
25/// Bits are stored in little-endian order, i.e. the least significant bit is stored first.
26/// The bit vector is stored as a sequence of 64 bit limbs.
27/// The last limb may be partially filled.
28///
29/// The bit vector has a wide range of constructors that allow for easy creation from various
30/// sources.
31/// Among them are constructors for creating an empty vector ([`BitVec::new`]),
32/// creating one from single bits of various integer types ([`BitVec::from_bits`] and variations),
33/// creating limbs from u64 values directly ([`BitVec::from_limbs`] and variations),
34/// or packing a sequence of numerical values into a dense bit sequence
35/// ([`BitVec::pack_sequence_u64`] and variations).
36///
37/// The bit vector can be modified after creation
38/// (e.g. by appending [bits](BitVec::append_bits)
39/// or [words](BitVec::append_word),
40/// [flipping](BitVec::flip_bit),
41/// or [setting](BitVec::set) bits).
42/// Bits can be [accessed](BitVec::get) by position,
43/// and [multiple bits](BitVec::get_bits) can be accessed at once.
44/// Bits can be [dropped](BitVec::drop_last) from the end.
45///
46/// # Example
47/// ```rust
48/// use vers_vecs::{BitVec, RsVec};
49///
50/// let mut bit_vec = BitVec::new();
51/// bit_vec.append_bit(0u64);
52/// bit_vec.append_bit_u32(1u32);
53/// bit_vec.append_word(0b1010_1010_1010_1010u64); // appends exactly 64 bits
54///
55/// assert_eq!(bit_vec.len(), 66);
56/// assert_eq!(bit_vec.get(0), Some(0u64));
57/// assert_eq!(bit_vec.get(1), Some(1u64));
58/// ```
59#[derive(Clone, Debug, Default)]
60#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
61#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemSize, mem_dbg::MemDbg))]
62pub struct BitVec {
63    data: Vec<u64>,
64    len: usize,
65}
66
67impl BitVec {
68    /// Create a new empty bit vector.
69    #[must_use]
70    pub fn new() -> Self {
71        Self::default()
72    }
73
74    /// Create a new empty bit vector with the given capacity.
75    /// The capacity is measured in bits.
76    /// The bit vector will be able to hold at least `capacity` bits without reallocating.
77    /// More memory may be allocated according to the underlying allocation strategy.
78    #[must_use]
79    pub fn with_capacity(capacity: usize) -> Self {
80        Self {
81            data: Vec::with_capacity(capacity / WORD_SIZE + 1),
82            len: 0,
83        }
84    }
85
86    /// Create a new bit vector with all zeros and the given length.
87    /// The length is measured in bits.
88    #[must_use]
89    pub fn from_zeros(len: usize) -> Self {
90        let mut data = vec![0; len / WORD_SIZE];
91        if !len.is_multiple_of(WORD_SIZE) {
92            data.push(0);
93        }
94        Self { data, len }
95    }
96
97    /// Create a new bit vector with all ones and the given length.
98    /// The length is measured in bits.
99    #[must_use]
100    pub fn from_ones(len: usize) -> Self {
101        let mut data = vec![u64::MAX; len / WORD_SIZE];
102        if !len.is_multiple_of(WORD_SIZE) {
103            data.push((1 << (len % WORD_SIZE)) - 1);
104        }
105        Self { data, len }
106    }
107
108    /// Construct a bit vector from a set of bits given as distinct u8 values.
109    /// The constructor will take the least significant bit from each value and append it to a
110    /// bit vector.
111    /// All other bits are ignored.
112    ///
113    /// See also: [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
114    ///
115    /// # Example
116    /// ```rust
117    /// use vers_vecs::BitVec;
118    ///
119    /// let bits: &[u8] = &[1, 0, 1, 1, 1, 1];
120    /// let bv = BitVec::from_bits(&bits);
121    ///
122    /// assert_eq!(bv.len(), 6);
123    /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
124    /// ```
125    ///
126    /// [`from_bits_u16`]: BitVec::from_bits_u16
127    /// [`from_bits_u32`]: BitVec::from_bits_u32
128    /// [`from_bits_u64`]: BitVec::from_bits_u64
129    /// [`from_bits_iter`]: BitVec::from_bits_iter
130    #[must_use]
131    pub fn from_bits(bits: &[u8]) -> Self {
132        let mut bv = Self::with_capacity(bits.len());
133        bits.iter().for_each(|&b| bv.append_bit(b.into()));
134        bv
135    }
136
137    /// Construct a bit vector from a set of bits given as distinct u16 values.
138    /// The constructor will take the least significant bit from each value and append it to a
139    /// bit vector.
140    /// All other bits are ignored.
141    ///
142    /// See also: [`from_bits`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
143    ///
144    /// [`from_bits`]: BitVec::from_bits
145    /// [`from_bits_u32`]: BitVec::from_bits_u32
146    /// [`from_bits_u64`]: BitVec::from_bits_u64
147    /// [`from_bits_iter`]: BitVec::from_bits_iter
148    #[must_use]
149    pub fn from_bits_u16(bits: &[u16]) -> Self {
150        let mut bv = Self::with_capacity(bits.len());
151        bits.iter().for_each(|&b| bv.append_bit_u16(b));
152        bv
153    }
154
155    /// Construct a bit vector from a set of bits given as distinct u32 values.
156    /// The constructor will take the least significant bit from each value and append it to a
157    /// bit vector.
158    /// All other bits are ignored.
159    ///
160    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u64`], [`from_bits_iter`]
161    ///
162    /// [`from_bits`]: BitVec::from_bits
163    /// [`from_bits_u16`]: BitVec::from_bits_u16
164    /// [`from_bits_u64`]: BitVec::from_bits_u64
165    /// [`from_bits_iter`]: BitVec::from_bits_iter
166    #[must_use]
167    pub fn from_bits_u32(bits: &[u32]) -> Self {
168        let mut bv = Self::with_capacity(bits.len());
169        bits.iter().for_each(|&b| bv.append_bit_u32(b));
170        bv
171    }
172
173    /// Construct a bit vector from a set of bits given as distinct u64 values.
174    /// The constructor will take the least significant bit from each value and append it to a
175    /// bit vector.
176    /// All other bits are ignored.
177    ///
178    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_iter`]
179    ///
180    /// [`from_bits`]: BitVec::from_bits
181    /// [`from_bits_u16`]: BitVec::from_bits_u16
182    /// [`from_bits_u32`]: BitVec::from_bits_u32
183    /// [`from_bits_iter`]: BitVec::from_bits_iter
184    #[must_use]
185    pub fn from_bits_u64(bits: &[u64]) -> Self {
186        let mut bv = Self::with_capacity(bits.len());
187        bits.iter().for_each(|&b| bv.append_bit(b));
188        bv
189    }
190
191    /// Construct a bit vector from an iterator of bits.
192    /// The constructor will take the least significant bit from each value and append it to a
193    /// bit vector.
194    /// All other bits are ignored.
195    /// The iterator must yield values that can be converted into u64 values.
196    ///
197    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`]
198    ///
199    /// # Example
200    /// ```rust
201    /// use vers_vecs::BitVec;
202    ///
203    /// let bits = [true, false, true, true, true, true];
204    /// let bv = BitVec::from_bits_iter(bits.iter().copied());
205    ///
206    /// let bits = [0b1u8, 0b0, 0b1, 0b1, 0b1, 0b1];
207    /// let bv2 = BitVec::from_bits_iter(bits.iter().copied());
208    ///
209    /// assert_eq!(bv.len(), 6);
210    /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
211    /// assert_eq!(bv, bv2);
212    /// ```
213    ///
214    /// [`from_bits`]: BitVec::from_bits
215    /// [`from_bits_u16`]: BitVec::from_bits_u16
216    /// [`from_bits_u32`]: BitVec::from_bits_u32
217    /// [`from_bits_u64`]: BitVec::from_bits_u64
218    #[must_use]
219    pub fn from_bits_iter<I, E>(iter: I) -> Self
220    where
221        E: Into<u64>,
222        I: IntoIterator<Item = E>,
223    {
224        let iter = iter.into_iter();
225        let mut bv = Self::with_capacity(iter.size_hint().0);
226        for bit in iter {
227            bv.append_bit(bit.into());
228        }
229        bv
230    }
231
232    /// Construct a bit vector from a slice of u64 quad words.
233    /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
234    /// 64 bits to the bit vector).
235    /// Since the data is only cloned without any masking or transformation,
236    /// this is one of the fastest ways to create a bit vector.
237    ///
238    /// See also: [`from_vec`], [`from_limbs_iter`]
239    ///
240    /// # Example
241    /// ```rust
242    /// use vers_vecs::BitVec;
243    ///
244    /// let words = [0, 256, u64::MAX];
245    /// let bv = BitVec::from_limbs(&words);
246    ///
247    /// assert_eq!(bv.len(), 192);
248    /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
249    /// assert_eq!(bv.get(72), Some(1));
250    /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
251    /// ```
252    ///
253    /// [`from_vec`]: BitVec::from_vec
254    /// [`from_limbs_iter`]: BitVec::from_limbs_iter
255    #[must_use]
256    pub fn from_limbs(words: &[u64]) -> Self {
257        let len = words.len() * WORD_SIZE;
258        Self {
259            data: words.to_vec(),
260            len,
261        }
262    }
263
264    /// Construct a bit vector from an iterator of u64 quad words.
265    /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
266    /// 64 bits to the bit vector).
267    /// Since the data is only cloned without any masking or transformation,
268    /// this is one of the fastest ways to create a bit vector.
269    ///
270    /// See also: [`from_limbs`], [`from_vec`]
271    ///
272    /// # Example
273    /// ```rust
274    /// use std::iter::repeat;
275    /// use vers_vecs::BitVec;
276    ///
277    /// let zeros = repeat(0xaaaaaaaaaaaaaaaau64).take(10);
278    /// let bv = BitVec::from_limbs_iter(zeros);
279    ///
280    /// assert_eq!(bv.len(), 640);
281    /// for i in 0..640 {
282    ///    assert_eq!(bv.get(i), Some((i % 2 == 1) as u64));
283    /// }
284    /// ```
285    ///
286    /// [`from_limbs`]: BitVec::from_limbs
287    /// [`from_vec`]: BitVec::from_vec
288    pub fn from_limbs_iter<I, E>(iter: I) -> Self
289    where
290        E: Into<u64>,
291        I: IntoIterator<Item = E>,
292    {
293        let vec = iter.into_iter().map(Into::into).collect();
294        Self::from_vec(vec)
295    }
296
297    /// Construct a bit vector from a vector of u64 quad words.
298    /// The quad words are interpreted as limbs of the bit vector
299    /// (i.e. each quad word contributes 64 bits to the bit vector).
300    /// Since the data is moved without any masking or transformation, this is one of the fastest ways
301    /// to create a bit vector.
302    ///
303    /// See also: [`from_limbs`], [`from_limbs_iter`]
304    ///
305    /// # Example
306    /// ```rust
307    /// use vers_vecs::BitVec;
308    ///
309    /// let words = vec![0, 256, u64::MAX];
310    /// let bv = BitVec::from_vec(words);
311    ///
312    /// assert_eq!(bv.len(), 192);
313    /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
314    /// assert_eq!(bv.get(72), Some(1));
315    /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
316    /// ```
317    ///
318    /// [`from_limbs`]: BitVec::from_limbs
319    /// [`from_limbs_iter`]: BitVec::from_limbs_iter
320    #[must_use]
321    pub fn from_vec(data: Vec<u64>) -> Self {
322        let len = data.len() * WORD_SIZE;
323        Self { data, len }
324    }
325
326    /// Helper function for packing constructors to pack a slice of elements into a bit-vector,
327    /// taking the least significant `bits_per_element` per element.
328    fn pack_bits<T, const MAX_BITS: usize>(sequence: &[T], bits_per_element: usize) -> Self
329    where
330        T: Into<u64> + Copy,
331    {
332        let mut bv = Self::with_capacity(sequence.len() * bits_per_element);
333        for &word in sequence {
334            Self::pack_word_into_vector::<MAX_BITS>(&mut bv, word.into(), bits_per_element)
335        }
336        bv
337    }
338
339    /// Helper function for packing constructors to pack elements from an iterator into a bit-vector,
340    /// taking the least significant `bits_per_element` per element.
341    fn pack_bits_iter<T, I: IntoIterator<Item = T>, const MAX_BITS: usize>(
342        iter: I,
343        bits_per_element: usize,
344    ) -> Self
345    where
346        T: Into<u64> + Copy,
347    {
348        let mut bv = Self::new();
349
350        for word in iter {
351            Self::pack_word_into_vector::<MAX_BITS>(&mut bv, word.into(), bits_per_element)
352        }
353
354        bv
355    }
356
357    /// Helper function for packing constructors to pack `num_bits` of a given word into the given
358    /// vector.
359    #[inline(always)]
360    fn pack_word_into_vector<const MAX_BITS: usize>(bv: &mut BitVec, word: u64, num_bits: usize) {
361        if num_bits <= MAX_BITS {
362            bv.append_bits(word.into(), num_bits);
363        } else {
364            bv.append_bits(word.into(), MAX_BITS);
365            let mut rest = num_bits - MAX_BITS;
366            while rest > 0 {
367                bv.append_bits(0, min(rest, MAX_BITS));
368                rest = rest.saturating_sub(MAX_BITS);
369            }
370        }
371    }
372
373    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
374    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
375    /// The number of bits per element is given by `bits_per_element`.
376    /// The sequence is given as a slice of u64 values.
377    /// If the number of bits per element is smaller than 64, the function takes the
378    /// least significant bits of each element, and discards the rest.
379    /// If the number of bits per element is larger than 64, the function will pad the elements
380    /// with zeros.
381    /// The function will append the bits of each element to the bit vector in the order they are
382    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
383    ///
384    /// See also: [`pack_sequence_u32`], [`pack_sequence_u16`], [`pack_sequence_u8`]
385    ///
386    /// # Example
387    /// ```rust
388    /// use vers_vecs::BitVec;
389    ///
390    /// let sequence = [0b1010u64, 0b1100u64, 0b1111u64];
391    /// let bv = BitVec::pack_sequence_u64(&sequence, 4);
392    ///
393    /// assert_eq!(bv.len(), 12);
394    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
395    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
396    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
397    /// ```
398    ///
399    /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
400    /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
401    /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
402    #[must_use]
403    pub fn pack_sequence_u64(sequence: &[u64], bits_per_element: usize) -> Self {
404        Self::pack_bits::<_, 64>(sequence, bits_per_element)
405    }
406
407    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
408    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
409    /// The number of bits per element is given by `bits_per_element`.
410    /// The sequence is given as a slice of u32 values.
411    /// If the number of bits per element is smaller than 32, the function takes the
412    /// least significant bits of each element, and discards the rest.
413    /// If the number of bits per element is larger than 32, the function will pad the elements
414    /// with zeros.
415    /// The function will append the bits of each element to the bit vector in the order they are
416    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
417    ///
418    /// See also: [`pack_sequence_u64`], [`pack_sequence_u16`], [`pack_sequence_u8`]
419    ///
420    /// # Example
421    /// ```rust
422    /// use vers_vecs::BitVec;
423    ///
424    /// let sequence = [0b1010u32, 0b1100u32, 0b1111u32];
425    /// let bv = BitVec::pack_sequence_u32(&sequence, 4);
426    ///
427    /// assert_eq!(bv.len(), 12);
428    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
429    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
430    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
431    /// ```
432    ///
433    /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
434    /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
435    /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
436    #[must_use]
437    pub fn pack_sequence_u32(sequence: &[u32], bits_per_element: usize) -> Self {
438        Self::pack_bits::<_, 32>(sequence, bits_per_element)
439    }
440
441    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
442    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
443    /// The number of bits per element is given by `bits_per_element`.
444    /// The sequence is given as a slice of u16 values.
445    /// If the number of bits per element is smaller than 16, the function takes the
446    /// least significant bits of each element, and discards the rest.
447    /// If the number of bits per element is larger than 16, the function will pad the elements
448    /// with zeros.
449    /// The function will append the bits of each element to the bit vector in the order they are
450    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
451    ///
452    /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u8`]
453    ///
454    /// # Example
455    /// ```rust
456    /// use vers_vecs::BitVec;
457    ///
458    /// let sequence = [0b1010u16, 0b1100u16, 0b1111u16];
459    /// let bv = BitVec::pack_sequence_u16(&sequence, 4);
460    ///
461    /// assert_eq!(bv.len(), 12);
462    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
463    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
464    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
465    /// ```
466    ///
467    /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
468    /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
469    /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
470    #[must_use]
471    pub fn pack_sequence_u16(sequence: &[u16], bits_per_element: usize) -> Self {
472        Self::pack_bits::<_, 16>(sequence, bits_per_element)
473    }
474
475    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
476    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
477    /// The number of bits per element is given by `bits_per_element`.
478    /// The sequence is given as a slice of u8 values.
479    /// If the number of bits per element is smaller than 8, the function takes the
480    /// least significant bits of each element, and discards the rest.
481    /// If the number of bits per element is larger than 8, the function will pad the elements
482    /// with zeros.
483    /// The function will append the bits of each element to the bit vector in the order they are
484    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
485    ///
486    /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u16`]
487    ///
488    /// # Example
489    /// ```rust
490    /// use vers_vecs::BitVec;
491    ///
492    /// let sequence = [0b1010u8, 0b1100u8, 0b1111u8];
493    /// let bv = BitVec::pack_sequence_u8(&sequence, 4);
494    ///
495    /// assert_eq!(bv.len(), 12);
496    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
497    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
498    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
499    /// ```
500    ///
501    /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
502    /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
503    /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
504    #[must_use]
505    pub fn pack_sequence_u8(sequence: &[u8], bits_per_element: usize) -> Self {
506        Self::pack_bits::<_, 8>(sequence, bits_per_element)
507    }
508
509    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
510    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
511    /// The number of bits per element is given by `bits_per_element`.
512    /// The sequence is given as an iterator of u64 values.
513    /// If the number of bits per element is smaller than 64, the function takes the
514    /// least significant bits of each element, and discards the rest.
515    /// If the number of bits per element is larger than 64, the function will pad the elements
516    /// with zeros.
517    /// The function will append the bits of each element to the bit vector in the order they are
518    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
519    ///
520    /// See also: [`pack_from_iter_u32`], [`pack_from_iter_u16`], [`pack_from_iter_u8`], or the
521    /// functions to pack from a slice: [`pack_sequence_u64`]
522    ///
523    /// # Example
524    /// ```rust
525    /// use vers_vecs::BitVec;
526    ///
527    /// let sequence = [0b1010u64, 0b1100u64, 0b1111u64];
528    /// let bv = BitVec::pack_from_iter_u64(sequence.into_iter(), 4);
529    ///
530    /// assert_eq!(bv.len(), 12);
531    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
532    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
533    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
534    /// ```
535    ///
536    /// [`pack_from_iter_u32`]: BitVec::pack_from_iter_u32
537    /// [`pack_from_iter_u16`]: BitVec::pack_from_iter_u16
538    /// [`pack_from_iter_u8`]: BitVec::pack_from_iter_u8
539    /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
540    pub fn pack_from_iter_u64<I: IntoIterator<Item = u64>>(
541        iter: I,
542        bits_per_element: usize,
543    ) -> Self {
544        Self::pack_bits_iter::<_, _, 64>(iter, bits_per_element)
545    }
546
547    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
548    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
549    /// The number of bits per element is given by `bits_per_element`.
550    /// The sequence is given as an iterator of u32 values.
551    /// If the number of bits per element is smaller than 32, the function takes the
552    /// least significant bits of each element, and discards the rest.
553    /// If the number of bits per element is larger than 32, the function will pad the elements
554    /// with zeros.
555    /// The function will append the bits of each element to the bit vector in the order they are
556    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
557    ///
558    /// See also: [`pack_from_iter_u64`], [`pack_from_iter_u16`], [`pack_from_iter_u8`], or the
559    /// functions to pack from a slice: [`pack_sequence_u32`]
560    ///
561    /// # Example
562    /// ```rust
563    /// use vers_vecs::BitVec;
564    ///
565    /// let sequence = [0b1010u32, 0b1100u32, 0b1111u32];
566    /// let bv = BitVec::pack_from_iter_u32(sequence.into_iter(), 4);
567    ///
568    /// assert_eq!(bv.len(), 12);
569    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
570    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
571    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
572    /// ```
573    ///
574    /// [`pack_from_iter_u64`]: BitVec::pack_from_iter_u64
575    /// [`pack_from_iter_u16`]: BitVec::pack_from_iter_u16
576    /// [`pack_from_iter_u8`]: BitVec::pack_from_iter_u8
577    /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
578    pub fn pack_from_iter_u32<I: IntoIterator<Item = u32>>(
579        iter: I,
580        bits_per_element: usize,
581    ) -> Self {
582        Self::pack_bits_iter::<_, _, 32>(iter, bits_per_element)
583    }
584
585    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
586    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
587    /// The number of bits per element is given by `bits_per_element`.
588    /// The sequence is given as an iterator of u16 values.
589    /// If the number of bits per element is smaller than 16, the function takes the
590    /// least significant bits of each element, and discards the rest.
591    /// If the number of bits per element is larger than 16, the function will pad the elements
592    /// with zeros.
593    /// The function will append the bits of each element to the bit vector in the order they are
594    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
595    ///
596    /// See also: [`pack_from_iter_u64`], [`pack_from_iter_u32`], [`pack_from_iter_u8`], or the
597    /// functions to pack from a slice: [`pack_sequence_u16`]
598    ///
599    /// # Example
600    /// ```rust
601    /// use vers_vecs::BitVec;
602    ///
603    /// let sequence = [0b1010u16, 0b1100u16, 0b1111u16];
604    /// let bv = BitVec::pack_from_iter_u16(sequence.into_iter(), 4);
605    ///
606    /// assert_eq!(bv.len(), 12);
607    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
608    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
609    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
610    /// ```
611    ///
612    /// [`pack_from_iter_u64`]: BitVec::pack_from_iter_u64
613    /// [`pack_from_iter_u32`]: BitVec::pack_from_iter_u32
614    /// [`pack_from_iter_u8`]: BitVec::pack_from_iter_u8
615    /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
616    pub fn pack_from_iter_u16<I: IntoIterator<Item = u16>>(
617        iter: I,
618        bits_per_element: usize,
619    ) -> Self {
620        Self::pack_bits_iter::<_, _, 16>(iter, bits_per_element)
621    }
622
623    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
624    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
625    /// The number of bits per element is given by `bits_per_element`.
626    /// The sequence is given as an iterator of u8 values.
627    /// If the number of bits per element is smaller than 8, the function takes the
628    /// least significant bits of each element, and discards the rest.
629    /// If the number of bits per element is larger than 8, the function will pad the elements
630    /// with zeros.
631    /// The function will append the bits of each element to the bit vector in the order they are
632    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
633    ///
634    /// See also: [`pack_from_iter_u64`], [`pack_from_iter_u32`], [`pack_from_iter_u16`], or the
635    /// functions to pack from a slice: [`pack_sequence_u8`]
636    ///
637    /// # Example
638    /// ```rust
639    /// use vers_vecs::BitVec;
640    ///
641    /// let sequence = [0b1010u8, 0b1100u8, 0b1111u8];
642    /// let bv = BitVec::pack_from_iter_u8(sequence.into_iter(), 4);
643    ///
644    /// assert_eq!(bv.len(), 12);
645    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
646    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
647    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
648    /// ```
649    ///
650    /// [`pack_from_iter_u64`]: BitVec::pack_from_iter_u64
651    /// [`pack_from_iter_u32`]: BitVec::pack_from_iter_u32
652    /// [`pack_from_iter_u16`]: BitVec::pack_from_iter_u16
653    /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
654    pub fn pack_from_iter_u8<I: IntoIterator<Item = u8>>(iter: I, bits_per_element: usize) -> Self {
655        Self::pack_bits_iter::<_, _, 8>(iter, bits_per_element)
656    }
657
658    /// Construct a bit vector from bits given as boolean values in a slice.
659    /// The function will append the bits to the bit vector in the order they are
660    /// given in the sequence (i.e. the first bool is the first bit of the vector).
661    ///
662    /// # Example
663    /// ```rust
664    /// use vers_vecs::BitVec;
665    ///
666    /// let sequence = [true, false, true, true];
667    /// let bv = BitVec::from_bools(&sequence);
668    ///
669    /// assert_eq!(bv.len(), 4);
670    /// assert_eq!(bv.is_bit_set(0), Some(true));
671    /// assert_eq!(bv.is_bit_set(1), Some(false));
672    /// assert_eq!(bv.is_bit_set(2), Some(true));
673    /// assert_eq!(bv.is_bit_set(3), Some(true));
674    /// ```
675    pub fn from_bools(bools: &[bool]) -> Self {
676        let mut bv = BitVec::with_capacity(bools.len());
677        bools.iter().for_each(|&b| bv.append(b));
678        bv
679    }
680
681    /// Construct a bit vector from bits given as boolean values from an iterator.
682    /// The function will append the bits to the bit vector in the order they are
683    /// given in the sequence (i.e. the first bool is the first bit of the vector).
684    ///
685    /// # Example
686    /// ```rust
687    /// use vers_vecs::BitVec;
688    ///
689    /// let sequence = [true, false, true, true];
690    /// let bv = BitVec::from_bool_iter(sequence.into_iter());
691    ///
692    /// assert_eq!(bv.len(), 4);
693    /// assert_eq!(bv.is_bit_set(0), Some(true));
694    /// assert_eq!(bv.is_bit_set(1), Some(false));
695    /// assert_eq!(bv.is_bit_set(2), Some(true));
696    /// assert_eq!(bv.is_bit_set(3), Some(true));
697    /// ```
698    pub fn from_bool_iter<I: IntoIterator<Item = bool>>(iter: I) -> Self {
699        let mut bv = BitVec::new();
700        iter.into_iter().for_each(|b| bv.append(b));
701        bv
702    }
703
704    /// Append a bit encoded as a `bool` to the bit vector, where `true` means 1 and `false` means 0.
705    ///
706    /// See also: [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
707    ///
708    /// # Example
709    ///
710    /// ```rust
711    /// use vers_vecs::BitVec;
712    ///
713    /// let mut bv = BitVec::new();
714    /// bv.append(true);
715    ///
716    /// assert_eq!(bv.len(), 1);
717    /// assert_eq!(bv.get(0), Some(1));
718    /// ```
719    ///
720    /// [`append_bit`]: BitVec::append_bit
721    /// [`append_bit_u32`]: BitVec::append_bit_u32
722    /// [`append_bit_u16`]: BitVec::append_bit_u16
723    /// [`append_bit_u8`]: BitVec::append_bit_u8
724    /// [`append_word`]: BitVec::append_word
725    pub fn append(&mut self, bit: bool) {
726        if self.len.is_multiple_of(WORD_SIZE) {
727            self.data.push(0);
728        }
729        if bit {
730            self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
731        } else {
732            self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
733        }
734        self.len += 1;
735    }
736
737    /// Drop the last n bits from the bit vector. If more bits are dropped than the bit vector
738    /// contains, the bit vector is cleared.
739    ///
740    /// # Example
741    ///
742    /// ```rust
743    /// use vers_vecs::BitVec;
744    ///
745    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
746    /// bv.drop_last(3);
747    ///
748    /// assert_eq!(bv.len(), 3);
749    /// assert_eq!(bv.get_bits(0, 3), Some(0b101u64));
750    ///
751    /// bv.drop_last(4);
752    ///
753    /// assert!(bv.is_empty());
754    /// ```
755    pub fn drop_last(&mut self, n: usize) {
756        if n > self.len {
757            self.data.clear();
758            self.len = 0;
759            return;
760        }
761
762        let new_limb_count = (self.len - n).div_ceil(WORD_SIZE);
763
764        // cut off limbs that we no longer need
765        if new_limb_count < self.data.len() {
766            self.data.truncate(new_limb_count);
767        }
768
769        // update bit vector length
770        self.len -= n;
771    }
772
773    /// Append a bit encoded in a u64.
774    /// The least significant bit is appended to the bit vector.
775    /// All other bits are ignored.
776    ///
777    /// See also: [`append`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
778    ///
779    /// # Example
780    ///
781    /// ```rust
782    /// use vers_vecs::BitVec;
783    ///
784    /// let mut bv = BitVec::new();
785    ///
786    /// bv.append_bit(1);
787    /// bv.append_bit(0);
788    ///
789    /// assert_eq!(bv.len(), 2);
790    /// assert_eq!(bv.get(0), Some(1));
791    /// assert_eq!(bv.get(1), Some(0));
792    /// ```
793    ///
794    /// [`append`]: BitVec::append
795    /// [`append_bit_u32`]: BitVec::append_bit_u32
796    /// [`append_bit_u16`]: BitVec::append_bit_u16
797    /// [`append_bit_u8`]: BitVec::append_bit_u8
798    /// [`append_word`]: BitVec::append_word
799    pub fn append_bit(&mut self, bit: u64) {
800        if self.len.is_multiple_of(WORD_SIZE) {
801            self.data.push(0);
802        }
803        if bit % 2 == 1 {
804            self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
805        } else {
806            self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
807        }
808
809        self.len += 1;
810    }
811
812    /// Append a bit from a u32. The least significant bit is appended to the bit vector.
813    /// All other bits are ignored.
814    ///
815    /// See also: [`append`], [`append_bit`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
816    ///
817    /// [`append`]: BitVec::append
818    /// [`append_bit`]: BitVec::append_bit
819    /// [`append_bit_u16`]: BitVec::append_bit_u16
820    /// [`append_bit_u8`]: BitVec::append_bit_u8
821    /// [`append_word`]: BitVec::append_word
822    pub fn append_bit_u32(&mut self, bit: u32) {
823        self.append_bit(u64::from(bit));
824    }
825
826    /// Append a bit from a u16. The least significant bit is appended to the bit vector.
827    /// All other bits are ignored.
828    ///
829    /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u8`], [`append_word`]
830    ///
831    /// [`append`]: BitVec::append
832    /// [`append_bit`]: BitVec::append_bit
833    /// [`append_bit_u32`]: BitVec::append_bit_u32
834    /// [`append_bit_u8`]: BitVec::append_bit_u8
835    /// [`append_word`]: BitVec::append_word
836    pub fn append_bit_u16(&mut self, bit: u16) {
837        self.append_bit(u64::from(bit));
838    }
839
840    /// Append a bit from a u8. The least significant bit is appended to the bit vector.
841    /// All other bits are ignored.
842    ///
843    /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_word`]
844    ///
845    /// [`append`]: BitVec::append
846    /// [`append_bit`]: BitVec::append_bit
847    /// [`append_bit_u32`]: BitVec::append_bit_u32
848    /// [`append_bit_u16`]: BitVec::append_bit_u16
849    /// [`append_word`]: BitVec::append_word
850    pub fn append_bit_u8(&mut self, bit: u8) {
851        self.append_bit(u64::from(bit));
852    }
853
854    /// Append a word to the bit vector. The bits are appended in little endian order (i.e. the first
855    /// bit of the word is appended first).
856    ///
857    /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`]
858    ///
859    /// # Example
860    ///
861    /// ```rust
862    /// use vers_vecs::BitVec;
863    ///
864    /// let mut bv = BitVec::new();
865    /// bv.append_word(0b1010_1010_1010_1010u64);
866    ///
867    /// assert_eq!(bv.len(), 64);
868    /// for i in 0..64 {
869    ///    assert_eq!(bv.get(i), Some((0b1010_1010_1010_1010u64 >> i) & 1));
870    /// }
871    /// ```
872    ///
873    /// [`append`]: BitVec::append
874    /// [`append_bit`]: BitVec::append_bit
875    /// [`append_bit_u32`]: BitVec::append_bit_u32
876    /// [`append_bit_u16`]: BitVec::append_bit_u16
877    /// [`append_bit_u8`]: BitVec::append_bit_u8
878    pub fn append_word(&mut self, word: u64) {
879        if self.len.is_multiple_of(WORD_SIZE) {
880            self.data.push(word);
881        } else {
882            // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
883            self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
884            self.data[self.len / WORD_SIZE] |= word << (self.len % WORD_SIZE);
885
886            self.data.push(word >> (WORD_SIZE - self.len % WORD_SIZE));
887        }
888        self.len += WORD_SIZE;
889    }
890
891    /// Append multiple bits to the bit vector.
892    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
893    /// The number of bits to append is given by `len`. The bits are taken from the least
894    /// significant bits of `bits`.
895    /// All other bits are ignored.
896    ///
897    /// # Example
898    ///
899    /// ```rust
900    /// use vers_vecs::BitVec;
901    ///
902    /// let mut bv = BitVec::new();
903    /// bv.append_bits(0b1010_1010_1010_1010u64, 16);
904    ///
905    /// assert_eq!(bv.len(), 16);
906    /// assert_eq!(bv.get_bits(0, 16), Some(0b1010_1010_1010_1010u64));
907    /// ```
908    ///
909    /// # Panics
910    /// Panics if `len` is larger than 64.
911    pub fn append_bits(&mut self, bits: u64, len: usize) {
912        assert!(len <= 64, "Cannot append more than 64 bits");
913
914        if self.len.is_multiple_of(WORD_SIZE) {
915            self.data.push(bits);
916        } else {
917            // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
918            self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
919            self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
920
921            if self.len % WORD_SIZE + len > WORD_SIZE {
922                self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
923            }
924        }
925        self.len += len;
926    }
927
928    /// Append multiple bits to the bit vector.
929    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
930    /// The number of bits to append is given by `len`. The bits are taken from the least
931    /// significant bits of `bits`.
932    ///
933    /// This function does not check if `len` is larger than 64.
934    ///
935    /// Furthermore, if the bit-vector has trailing bits that are not zero
936    /// (i.e. the length is not a multiple of 64, and those bits are partially set),
937    /// the function will OR the new data with the trailing bits, destroying the appended data.
938    /// This can happen, if a call `append_bits[_unchecked](word, len)` appends a word which has
939    /// set bits beyond the `len - 1`-th bit,
940    /// or if bits have been dropped from the bit vector using [`drop_last`].
941    ///
942    /// See [`append_bits`] for a checked version of this function.
943    ///
944    /// # Panics
945    /// If `len` is larger than 64, the behavior is platform-dependent, and a processor
946    /// exception might be triggered.
947    ///
948    /// [`append_bits`]: BitVec::append_bits
949    /// [`drop_last`]: BitVec::drop_last
950    pub fn append_bits_unchecked(&mut self, bits: u64, len: usize) {
951        if self.len.is_multiple_of(WORD_SIZE) {
952            self.data.push(bits);
953        } else {
954            self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
955
956            if self.len % WORD_SIZE + len > WORD_SIZE {
957                self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
958            }
959        }
960        self.len += len;
961    }
962
963    /// Append the bits of another bit vector to the end of this vector.
964    /// If this vector does not contain a multiple of 64 bits, the appended limbs need to be
965    /// shifted to the left.
966    /// This function is guaranteed to reallocate the underlying vector at most once.
967    pub fn extend_bitvec(&mut self, other: &Self) {
968        // reserve space for the new bits, ensuring at most one re-allocation
969        self.data
970            .reserve((self.len + other.len).div_ceil(WORD_SIZE) - self.data.len());
971
972        let full_limbs = other.len() / WORD_SIZE;
973        for i in 0..full_limbs {
974            self.append_bits(other.data[i], WORD_SIZE);
975        }
976
977        let partial_bits = other.len % WORD_SIZE;
978        if partial_bits > 0 {
979            self.append_bits(other.data[full_limbs], partial_bits);
980        }
981    }
982
983    /// Return the length of the bit vector. The length is measured in bits.
984    #[must_use]
985    pub fn len(&self) -> usize {
986        self.len
987    }
988
989    /// Return whether the bit vector is empty (contains no bits).
990    #[must_use]
991    pub fn is_empty(&self) -> bool {
992        self.len == 0
993    }
994
995    /// Flip the bit at the given position.
996    ///
997    /// # Example
998    ///
999    /// ```rust
1000    /// use vers_vecs::BitVec;
1001    ///
1002    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
1003    /// bv.flip_bit(1);
1004    ///
1005    /// assert_eq!(bv.len(), 6);
1006    /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
1007    /// ```
1008    ///
1009    /// # Panics
1010    /// If the position is larger than the length of the vector, the function panics.
1011    pub fn flip_bit(&mut self, pos: usize) {
1012        assert!(pos < self.len, "Index out of bounds");
1013        self.flip_bit_unchecked(pos);
1014    }
1015
1016    /// Flip the bit at the given position.
1017    ///
1018    /// See also: [`flip_bit`]
1019    ///
1020    /// # Panics
1021    /// If the position is larger than the length of the
1022    /// vector, the function will either modify unused memory or panic.
1023    /// This will not corrupt memory.
1024    ///
1025    /// [`flip_bit`]: BitVec::flip_bit
1026    pub fn flip_bit_unchecked(&mut self, pos: usize) {
1027        self.data[pos / WORD_SIZE] ^= 1 << (pos % WORD_SIZE);
1028    }
1029
1030    /// Return the bit at the given position.
1031    /// The bit is encoded in the least significant bit of a u64 value.
1032    /// If the position is larger than the length of the vector, None is returned.
1033    ///
1034    /// See also: [`get_unchecked`]
1035    ///
1036    /// # Example
1037    ///
1038    /// ```rust
1039    /// use vers_vecs::BitVec;
1040    ///
1041    /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
1042    ///
1043    /// assert_eq!(bv.get(1), Some(0));
1044    /// assert_eq!(bv.get(2), Some(1));
1045    /// ```
1046    ///
1047    /// [`get_unchecked`]: Self::get_unchecked
1048    #[must_use]
1049    pub fn get(&self, pos: usize) -> Option<u64> {
1050        if pos >= self.len {
1051            None
1052        } else {
1053            Some(self.get_unchecked(pos))
1054        }
1055    }
1056
1057    /// Return the bit at the given position.
1058    /// The bit is encoded in the least significant bit of a u64 value.
1059    ///
1060    /// # Panics
1061    /// If the position is larger than the length of the vector,
1062    /// the function will either return unpredictable data, or panic.
1063    /// Use [`get`] to properly handle this case with an `Option`.
1064    ///
1065    /// [`get`]: BitVec::get
1066    #[must_use]
1067    pub fn get_unchecked(&self, pos: usize) -> u64 {
1068        (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
1069    }
1070
1071    /// Set the bit at the given position.
1072    /// The bit is encoded in the least significant bit of a u64 value.
1073    ///
1074    /// See also: [`set_unchecked`]
1075    ///
1076    /// # Example
1077    ///
1078    /// ```rust
1079    /// use vers_vecs::BitVec;
1080    ///
1081    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
1082    /// bv.set(1, 1).unwrap();
1083    ///
1084    /// assert_eq!(bv.len(), 6);
1085    /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
1086    /// ```
1087    ///
1088    /// # Errors
1089    /// If the position is out of range, the function will return `Err` with an error message,
1090    /// otherwise it will return an empty `Ok`.
1091    ///
1092    /// [`set_unchecked`]: BitVec::set_unchecked
1093    pub fn set(&mut self, pos: usize, value: u64) -> Result<(), &str> {
1094        if pos >= self.len {
1095            Err("out of range")
1096        } else {
1097            self.set_unchecked(pos, value);
1098            Ok(())
1099        }
1100    }
1101
1102    /// Set the bit at the given position.
1103    /// The bit is encoded in the least significant bit of a u64 value.
1104    ///
1105    /// # Panics
1106    /// If the position is larger than the length of the vector,
1107    /// the function will either do nothing, or panic.
1108    /// Use [`set`] to properly handle this case with a `Result`.
1109    ///
1110    /// [`set`]: BitVec::set
1111    pub fn set_unchecked(&mut self, pos: usize, value: u64) {
1112        self.data[pos / WORD_SIZE] = (self.data[pos / WORD_SIZE] & !(0x1 << (pos % WORD_SIZE)))
1113            | ((value & 0x1) << (pos % WORD_SIZE));
1114    }
1115
1116    /// Return whether the bit at the given position is set.
1117    /// If the position is larger than the length of the vector, None is returned.
1118    ///
1119    /// See also: [`is_bit_set_unchecked`]
1120    ///
1121    /// # Example
1122    ///
1123    /// ```rust
1124    /// use vers_vecs::BitVec;
1125    ///
1126    /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
1127    ///
1128    /// assert!(!bv.is_bit_set(1).unwrap());
1129    /// assert!(bv.is_bit_set(2).unwrap());
1130    /// ```
1131    ///
1132    /// [`is_bit_set_unchecked`]: BitVec::is_bit_set_unchecked
1133    #[must_use]
1134    pub fn is_bit_set(&self, pos: usize) -> Option<bool> {
1135        if pos >= self.len {
1136            None
1137        } else {
1138            Some(self.is_bit_set_unchecked(pos))
1139        }
1140    }
1141
1142    /// Return whether the bit at the given position is set.
1143    ///
1144    /// # Panics
1145    /// If the position is larger than the length of the vector,
1146    /// the function will either return unpredictable data, or panic.
1147    /// Use [`is_bit_set`] to properly handle this case with an `Option`.
1148    ///
1149    /// [`is_bit_set`]: BitVec::is_bit_set
1150    #[must_use]
1151    pub fn is_bit_set_unchecked(&self, pos: usize) -> bool {
1152        self.get_unchecked(pos) != 0
1153    }
1154
1155    /// Return multiple bits at the given position.
1156    /// The number of bits to return is given by `len`.
1157    /// At most 64 bits can be returned.
1158    /// If the position at the end of the query is larger than the length of the vector,
1159    /// None is returned (even if the query partially overlaps with the vector).
1160    /// If the length of the query is larger than 64, None is returned.
1161    ///
1162    /// The first bit at `pos` is the most significant bit of the return value
1163    /// limited to `len` bits.
1164    #[must_use]
1165    pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
1166        if len > WORD_SIZE || len == 0 {
1167            return None;
1168        }
1169        if pos + len > self.len {
1170            None
1171        } else {
1172            Some(self.get_bits_unchecked(pos, len))
1173        }
1174    }
1175
1176    /// Return multiple bits at the given position. The number of bits to return is given by `len`.
1177    /// At most 64 bits can be returned.
1178    ///
1179    /// This function is always inlined, because it gains a lot from loop optimization and
1180    /// can utilize the processor pre-fetcher better if it is.
1181    ///
1182    /// # Errors
1183    /// If the length of the query is larger than 64, unpredictable data will be returned.
1184    /// Use [`get_bits`] to avoid this.
1185    ///
1186    /// # Panics
1187    /// If the position or interval is larger than the length of the vector,
1188    /// the function will either return any valid results padded with unpredictable
1189    /// data or panic.
1190    ///
1191    /// [`get_bits`]: BitVec::get_bits
1192    #[must_use]
1193    #[allow(clippy::inline_always)]
1194    #[allow(clippy::comparison_chain)] // readability
1195    #[inline(always)] // inline to gain loop optimization and pipeline advantages for elias fano
1196    #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen
1197    pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
1198        debug_assert!(len <= WORD_SIZE);
1199        let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
1200        if pos % WORD_SIZE + len <= WORD_SIZE {
1201            partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
1202        } else {
1203            (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
1204                & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
1205        }
1206    }
1207
1208    /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
1209    /// `index`. The number of bits per encoded element is given by `n`.
1210    ///
1211    /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
1212    /// and is equivalent to calling [`get_bits(index * n, n)`].
1213    /// It is thus safe to use this method with any index and any size n <= 64.
1214    ///
1215    /// If the element is out of bounds, None is returned.
1216    /// The element is returned as a u64 value.
1217    ///
1218    /// # Example
1219    /// ```rust
1220    /// use vers_vecs::BitVec;
1221    ///
1222    /// let sequence = [10, 100, 124, 45, 223];
1223    /// let bv = BitVec::pack_sequence_u64(&sequence, 8);
1224    ///
1225    /// assert_eq!(bv.unpack_element(0, 8), Some(10));
1226    /// assert_eq!(bv.unpack_element(2, 8), Some(124));
1227    /// ```
1228    ///
1229    /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1230    /// [`get_bits(index * n, n)`]: BitVec::get_bits
1231    #[must_use]
1232    #[allow(clippy::inline_always)]
1233    #[inline(always)] // to gain optimization if n is constant
1234    pub fn unpack_element(&self, index: usize, n: usize) -> Option<u64> {
1235        self.get_bits(index * n, n)
1236    }
1237
1238    /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
1239    /// `index`. The number of bits per encoded element is given by `n`.
1240    ///
1241    /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
1242    /// and is equivalent to calling [`get_bits_unchecked(index * n, n)`].
1243    /// It is thus safe to use this method with any index where `index * n + n` is in-bounds,
1244    /// and any size n <= 64.
1245    ///
1246    /// # Panics
1247    /// If the element is out of bounds, the function will either return unpredictable data or panic.
1248    /// Use [`unpack_element`] for a checked version of this function.
1249    ///
1250    /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1251    /// [`get_bits_unchecked(index * n, n)`]: BitVec::get_bits_unchecked
1252    /// [`unpack_element`]: BitVec::unpack_element
1253    #[must_use]
1254    #[allow(clippy::inline_always)]
1255    #[inline(always)] // to gain optimization if n is constant
1256    pub fn unpack_element_unchecked(&self, index: usize, n: usize) -> u64 {
1257        self.get_bits_unchecked(index * n, n)
1258    }
1259
1260    /// Return the number of ones in the bit vector. Since the bit vector doesn't store additional
1261    /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1262    ///
1263    /// [`RsVec`]: crate::RsVec
1264    #[must_use]
1265    #[allow(clippy::missing_panics_doc)] // can't panic because of manual bounds check
1266    pub fn count_ones(&self) -> u64 {
1267        let mut ones: u64 = self.data[0..self.len / WORD_SIZE]
1268            .iter()
1269            .map(|limb| u64::from(limb.count_ones()))
1270            .sum();
1271        if !self.len.is_multiple_of(WORD_SIZE) {
1272            ones += u64::from(
1273                (self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1)).count_ones(),
1274            );
1275        }
1276        ones
1277    }
1278
1279    /// Return the number of zeros in the bit vector. Since the bit vector doesn't store additional
1280    /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1281    /// This method calls [`count_ones`].
1282    ///
1283    /// [`RsVec`]: crate::RsVec
1284    /// [`count_ones`]: BitVec::count_ones
1285    #[must_use]
1286    pub fn count_zeros(&self) -> u64 {
1287        self.len as u64 - self.count_ones()
1288    }
1289
1290    /// Mask this bit vector with another bitvector using bitwise or. The mask is applied lazily
1291    /// whenever an operation on the resulting vector is performed.
1292    ///
1293    /// # Errors
1294    /// Returns an error if the length of the vector doesn't match the mask length.
1295    #[inline]
1296    pub fn mask_or<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1297        MaskedBitVec::new(self, mask, |a, b| a | b)
1298    }
1299
1300    /// Mask this bit vector with another bitvector using bitwise or.
1301    /// The mask is applied immediately, unlike in [`mask_or`].
1302    ///
1303    /// # Errors
1304    /// Returns an error if the length of the vector doesn't match the mask length.
1305    ///
1306    /// [`mask_or`]: BitVec::mask_or
1307    pub fn apply_mask_or(&mut self, mask: &BitVec) -> Result<(), String> {
1308        if self.len != mask.len {
1309            return Err(String::from(
1310                "mask cannot have different length than vector",
1311            ));
1312        }
1313
1314        for i in 0..self.data.len() {
1315            self.data[i] |= mask.data[i];
1316        }
1317
1318        Ok(())
1319    }
1320
1321    /// Mask this bit vector with another bitvector using bitwise and. The mask is applied lazily
1322    /// whenever an operation on the resulting vector is performed.
1323    ///
1324    /// # Errors
1325    /// Returns an error if the length of the vector doesn't match the mask length.
1326    #[inline]
1327    pub fn mask_and<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1328        MaskedBitVec::new(self, mask, |a, b| a & b)
1329    }
1330
1331    /// Mask this bit vector with another bitvector using bitwise and.
1332    /// The mask is applied immediately, unlike in [`mask_and`].
1333    ///
1334    /// # Errors
1335    /// Returns an error if the length of the vector doesn't match the mask length.
1336    ///
1337    /// [`mask_and`]: BitVec::mask_and
1338    pub fn apply_mask_and(&mut self, mask: &BitVec) -> Result<(), String> {
1339        if self.len != mask.len {
1340            return Err(String::from(
1341                "mask cannot have different length than vector",
1342            ));
1343        }
1344
1345        for i in 0..self.data.len() {
1346            self.data[i] &= mask.data[i];
1347        }
1348
1349        Ok(())
1350    }
1351
1352    /// Mask this bit vector with another bitvector using bitwise xor. The mask is applied lazily
1353    /// whenever an operation on the resulting vector is performed.
1354    ///
1355    /// # Errors
1356    /// Returns an error if the length of the vector doesn't match the mask length.
1357    #[inline]
1358    pub fn mask_xor<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1359        MaskedBitVec::new(self, mask, |a, b| a ^ b)
1360    }
1361
1362    /// Mask this bit vector with another bitvector using bitwise xor.
1363    /// The mask is applied immediately, unlike in [`mask_xor`].
1364    ///
1365    /// # Errors
1366    /// Returns an error if the length of the vector doesn't match the mask length.
1367    ///
1368    /// [`mask_xor`]: BitVec::mask_xor
1369    pub fn apply_mask_xor(&mut self, mask: &BitVec) -> Result<(), String> {
1370        if self.len != mask.len {
1371            return Err(String::from(
1372                "mask cannot have different length than vector",
1373            ));
1374        }
1375
1376        for i in 0..self.data.len() {
1377            self.data[i] ^= mask.data[i];
1378        }
1379
1380        Ok(())
1381    }
1382
1383    /// Mask this bit vector with another bitvector using a custom masking operation. The mask is
1384    /// applied lazily whenever an operation on the resulting vector is performed.
1385    ///
1386    /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1387    /// The last block of a bit vector might contain fewer bits, and will be padded with
1388    /// unpredictable data. Implementations may choose to modify those padding bits without
1389    /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1390    /// within the vector is unspecified.
1391    ///
1392    /// # Errors
1393    /// Returns an error if the length of the vector doesn't match the mask length.
1394    #[inline]
1395    pub fn mask_custom<'s, 'b, F>(
1396        &'s self,
1397        mask: &'b BitVec,
1398        mask_op: F,
1399    ) -> Result<MaskedBitVec<'s, 'b, F>, String>
1400    where
1401        F: Fn(u64, u64) -> u64,
1402    {
1403        MaskedBitVec::new(self, mask, mask_op)
1404    }
1405
1406    /// Mask this bit vector with another bitvector using a custom masking operation.
1407    /// The mask is applied immediately, unlike in [`mask_custom`].
1408    ///
1409    /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1410    /// The last block of a bit vector might contain fewer bits, and will be padded with
1411    /// unpredictable data. Implementations may choose to modify those padding bits without
1412    /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1413    /// within the vector is unspecified.
1414    ///
1415    /// # Errors
1416    /// Returns an error if the length of the vector doesn't match the mask length.
1417    ///
1418    /// [`mask_custom`]: BitVec::mask_custom
1419    #[inline]
1420    pub fn apply_mask_custom(
1421        &mut self,
1422        mask: &BitVec,
1423        mask_op: fn(u64, u64) -> u64,
1424    ) -> Result<(), String> {
1425        if self.len != mask.len {
1426            return Err(String::from(
1427                "mask cannot have different length than vector",
1428            ));
1429        }
1430
1431        for i in 0..self.data.len() {
1432            self.data[i] = mask_op(self.data[i], mask.data[i]);
1433        }
1434
1435        Ok(())
1436    }
1437
1438    /// Returns the number of bytes on the heap for this vector.
1439    /// Does not include allocated memory that isn't used.
1440    #[must_use]
1441    pub fn heap_size(&self) -> usize {
1442        self.data.len() * size_of::<u64>()
1443    }
1444
1445    /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1446    /// right half the remaining bits `at..`. If the split index is larger than the length of the
1447    /// vector, the vector is returned unmodified in an `Err` variant.
1448    ///
1449    /// # Errors
1450    /// If the index is out of bounds, the function will return an error
1451    /// containing the original vector.
1452    ///
1453    /// See also: [`split_at_unchecked`]
1454    ///
1455    /// [`split_at_unchecked`]: Self::split_at_unchecked
1456    pub fn split_at(self, at: usize) -> Result<(Self, Self), Self> {
1457        if at > self.len {
1458            Err(self)
1459        } else {
1460            Ok(self.split_at_unchecked(at))
1461        }
1462    }
1463
1464    /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1465    /// right half the remaining bits `at..`.
1466    ///
1467    /// # Panics
1468    /// If the index is larger than the length of the vector the function will panic or run
1469    /// out of memory.
1470    /// Use [`split_at`] to properly handle this case.
1471    ///
1472    /// [`split_at`]: Self::split_at
1473    #[must_use]
1474    pub fn split_at_unchecked(mut self, at: usize) -> (Self, Self) {
1475        let other_len = self.len - at;
1476        let mut other = Self::with_capacity(other_len);
1477
1478        if other_len == 0 {
1479            return (self, other);
1480        }
1481
1482        let first_limb = at / WORD_SIZE;
1483        let last_limb = self.len / WORD_SIZE;
1484
1485        // First, we figure out the number of bits from the first limb to retain in this vector:
1486        let leading_partial = at % WORD_SIZE;
1487
1488        // If the split point is in the last limb, and the vector ends before the last bit, first_limb
1489        // and last_limb will be equal, and the other half is simply other_len bits off the limb
1490        // right shifted by the number of bits to retain in this vector.
1491        if first_limb == last_limb {
1492            other.append_bits_unchecked(self.data[first_limb] >> leading_partial, other_len);
1493        } else {
1494            // Otherwise, some range n..last_limb should be copied in their entirety to the other half,
1495            // with n=first_limb+1 if the split point is inside the first limb (leading_partial > 0), or
1496            // n=first_limb if the entire first limb belongs in the other half.
1497            let full_limbs = if leading_partial > 0 {
1498                // If the split point is inside the first limb, we also have to remember to copy over
1499                // the trailing bits to the new vector.
1500                other.append_bits_unchecked(
1501                    self.data[first_limb] >> leading_partial,
1502                    WORD_SIZE - leading_partial,
1503                );
1504                first_limb + 1..last_limb
1505            } else {
1506                first_limb..last_limb
1507            };
1508
1509            // Copy over any full limbs.
1510            for i in full_limbs {
1511                other.append_bits_unchecked(self.data[i], WORD_SIZE);
1512            }
1513
1514            // Finally, if the vector has a partially filled last limb, we need to put those bits
1515            // in the other half.
1516            let trailing_partial = self.len % WORD_SIZE;
1517            if trailing_partial > 0 {
1518                other.append_bits_unchecked(self.data[last_limb], trailing_partial);
1519            }
1520        }
1521
1522        // remove the copied bits from the original vector
1523        self.drop_last(other_len);
1524
1525        (self, other)
1526    }
1527
1528    /// Iterate through the u64 limbs of the bitvector.
1529    /// The limbs are encoded starting at the least significant bit (i.e., the first bit in the bit
1530    /// vector is the least significant bit in the first limb).
1531    /// Note that the last limb may be incomplete, if the number of bits in the vector is not
1532    /// divisible by 64.
1533    /// In this case, the least significant `self.len() % 64` bits are the correct bits.
1534    /// The remaining bits of the last limb are in an unspecified state and code should not rely
1535    /// on them being set or unset.
1536    #[inline]
1537    pub fn iter_limbs(&self) -> impl Iterator<Item = u64> + use<'_> {
1538        self.data.iter().copied()
1539    }
1540}
1541
1542impl_vector_iterator! { BitVec, BitVecIter, BitVecRefIter }
1543
1544/// Create a new bit vector from a slice of u64 values.
1545/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1546/// The function will append the bits of each element to the bit vector in the order they are
1547/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1548impl From<&[u64]> for BitVec {
1549    fn from(data: &[u64]) -> Self {
1550        BitVec::from_limbs(data)
1551    }
1552}
1553
1554/// Create a new bit vector from a slice of u64 values.
1555/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1556/// The function will append the bits of each element to the bit vector in the order they are
1557/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1558impl From<Vec<u64>> for BitVec {
1559    fn from(data: Vec<u64>) -> Self {
1560        BitVec::from_limbs(&data)
1561    }
1562}
1563
1564impl Extend<BitVec> for BitVec {
1565    fn extend<T: IntoIterator<Item = BitVec>>(&mut self, iter: T) {
1566        for v in iter {
1567            self.extend_bitvec(&v)
1568        }
1569    }
1570}
1571
1572impl<'t> Extend<&'t BitVec> for BitVec {
1573    fn extend<T: IntoIterator<Item = &'t BitVec>>(&mut self, iter: T) {
1574        for v in iter {
1575            self.extend_bitvec(v)
1576        }
1577    }
1578}
1579
1580/// Create a new bit vector from u64 values.
1581/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1582/// The function will append the bits of each element to the bit vector in the order they are
1583/// given in the iterator (i.e. the first element takes bits `0..64` of the vector).
1584impl FromIterator<u64> for BitVec {
1585    fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> Self {
1586        BitVec::from_limbs_iter(iter)
1587    }
1588}
1589
1590impl PartialEq for BitVec {
1591    // unlike the auto-derived implementation, this custom implementation ignores junk data at
1592    // the end of the bit vector
1593    fn eq(&self, other: &Self) -> bool {
1594        if self.len != other.len {
1595            return false;
1596        }
1597
1598        if self.len == 0 {
1599            return true;
1600        }
1601
1602        for i in 0..self.data.len() - 1 {
1603            if self.data[i] != other.data[i] {
1604                return false;
1605            }
1606        }
1607
1608        // in last limb, ignore junk data
1609        let mask = (1 << (self.len % WORD_SIZE)) - 1;
1610        if self.data[self.data.len() - 1] & mask != other.data[self.data.len() - 1] & mask {
1611            return false;
1612        }
1613
1614        true
1615    }
1616}
1617
1618impl Eq for BitVec {}
1619
1620impl Hash for BitVec {
1621    fn hash<H: Hasher>(&self, state: &mut H) {
1622        state.write_usize(self.len);
1623        if self.len > 0 {
1624            self.data[0..self.data.len() - 1]
1625                .iter()
1626                .for_each(|x| state.write_u64(*x));
1627            let masked_last_limb = self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1);
1628            state.write_u64(masked_last_limb);
1629        }
1630    }
1631}
1632
1633#[cfg(test)]
1634mod tests;
vers_vecs/bit_vec/mod.rs

vers_vecs/bit_vec/
mod.rs