vers_vecs/bit_vec/
mod.rs

1//! This module contains a simple [bit vector][BitVec] implementation with no overhead and a fast succinct
2//! bit vector implementation with [rank and select queries][fast_rs_vec::RsVec].
3
4use crate::bit_vec::mask::MaskedBitVec;
5use crate::util::impl_vector_iterator;
6use std::cmp::min;
7use std::hash::{Hash, Hasher};
8use std::mem::size_of;
9
10pub mod fast_rs_vec;
11
12pub mod sparse;
13
14pub mod mask;
15
16/// Size of a word in bitvectors. All vectors operate on 64-bit words.
17const WORD_SIZE: usize = 64;
18
19/// Type alias for masked bitvectors that implement a simple bitwise binary operation.
20/// The first lifetime is for the bit vector that is being masked, the second lifetime is for the
21/// mask.
22pub type BitMask<'s, 'b> = MaskedBitVec<'s, 'b, fn(u64, u64) -> u64>;
23
24/// A simple bit vector that does not support rank and select queries.
25/// Bits are stored in little-endian order, i.e. the least significant bit is stored first.
26/// The bit vector is stored as a sequence of 64 bit limbs.
27/// The last limb may be partially filled.
28///
29/// The bit vector has a wide range of constructors that allow for easy creation from various
30/// sources.
31/// Among them are constructors for creating an empty vector ([`BitVec::new`]),
32/// creating one from single bits of various integer types ([`BitVec::from_bits`] and variations),
33/// creating limbs from u64 values directly ([`BitVec::from_limbs`] and variations),
34/// or packing a sequence of numerical values into a dense bit sequence
35/// ([`BitVec::pack_sequence_u64`] and variations).
36///
37/// The bit vector can be modified after creation
38/// (e.g. by appending [bits](BitVec::append_bits)
39/// or [words](BitVec::append_word),
40/// [flipping](BitVec::flip_bit),
41/// or [setting](BitVec::set) bits).
42/// Bits can be [accessed](BitVec::get) by position,
43/// and [multiple bits](BitVec::get_bits) can be accessed at once.
44/// Bits can be [dropped](BitVec::drop_last) from the end.
45///
46/// # Example
47/// ```rust
48/// use vers_vecs::{BitVec, RsVec};
49///
50/// let mut bit_vec = BitVec::new();
51/// bit_vec.append_bit(0u64);
52/// bit_vec.append_bit_u32(1u32);
53/// bit_vec.append_word(0b1010_1010_1010_1010u64); // appends exactly 64 bits
54///
55/// assert_eq!(bit_vec.len(), 66);
56/// assert_eq!(bit_vec.get(0), Some(0u64));
57/// assert_eq!(bit_vec.get(1), Some(1u64));
58/// ```
59#[derive(Clone, Debug, Default)]
60#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
61#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemSize, mem_dbg::MemDbg))]
62pub struct BitVec {
63    data: Vec<u64>,
64    len: usize,
65}
66
67impl BitVec {
68    /// Create a new empty bit vector.
69    #[must_use]
70    pub fn new() -> Self {
71        Self::default()
72    }
73
74    /// Create a new empty bit vector with the given capacity.
75    /// The capacity is measured in bits.
76    /// The bit vector will be able to hold at least `capacity` bits without reallocating.
77    /// More memory may be allocated according to the underlying allocation strategy.
78    #[must_use]
79    pub fn with_capacity(capacity: usize) -> Self {
80        Self {
81            data: Vec::with_capacity(capacity / WORD_SIZE + 1),
82            len: 0,
83        }
84    }
85
86    /// Create a new bit vector with all zeros and the given length.
87    /// The length is measured in bits.
88    #[must_use]
89    pub fn from_zeros(len: usize) -> Self {
90        let mut data = vec![0; len / WORD_SIZE];
91        if !len.is_multiple_of(WORD_SIZE) {
92            data.push(0);
93        }
94        Self { data, len }
95    }
96
97    /// Create a new bit vector with all ones and the given length.
98    /// The length is measured in bits.
99    #[must_use]
100    pub fn from_ones(len: usize) -> Self {
101        let mut data = vec![u64::MAX; len / WORD_SIZE];
102        if !len.is_multiple_of(WORD_SIZE) {
103            data.push((1 << (len % WORD_SIZE)) - 1);
104        }
105        Self { data, len }
106    }
107
108    /// Construct a bit vector from a set of bits given as distinct u8 values.
109    /// The constructor will take the least significant bit from each value and append it to a
110    /// bit vector.
111    /// All other bits are ignored.
112    ///
113    /// See also: [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
114    ///
115    /// # Example
116    /// ```rust
117    /// use vers_vecs::BitVec;
118    ///
119    /// let bits: &[u8] = &[1, 0, 1, 1, 1, 1];
120    /// let bv = BitVec::from_bits(&bits);
121    ///
122    /// assert_eq!(bv.len(), 6);
123    /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
124    /// ```
125    ///
126    /// [`from_bits_u16`]: BitVec::from_bits_u16
127    /// [`from_bits_u32`]: BitVec::from_bits_u32
128    /// [`from_bits_u64`]: BitVec::from_bits_u64
129    /// [`from_bits_iter`]: BitVec::from_bits_iter
130    #[must_use]
131    pub fn from_bits(bits: &[u8]) -> Self {
132        let mut bv = Self::with_capacity(bits.len());
133        bits.iter().for_each(|&b| bv.append_bit(b.into()));
134        bv
135    }
136
137    /// Construct a bit vector from a set of bits given as distinct u16 values.
138    /// The constructor will take the least significant bit from each value and append it to a
139    /// bit vector.
140    /// All other bits are ignored.
141    ///
142    /// See also: [`from_bits`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
143    ///
144    /// [`from_bits`]: BitVec::from_bits
145    /// [`from_bits_u32`]: BitVec::from_bits_u32
146    /// [`from_bits_u64`]: BitVec::from_bits_u64
147    /// [`from_bits_iter`]: BitVec::from_bits_iter
148    #[must_use]
149    pub fn from_bits_u16(bits: &[u16]) -> Self {
150        let mut bv = Self::with_capacity(bits.len());
151        bits.iter().for_each(|&b| bv.append_bit_u16(b));
152        bv
153    }
154
155    /// Construct a bit vector from a set of bits given as distinct u32 values.
156    /// The constructor will take the least significant bit from each value and append it to a
157    /// bit vector.
158    /// All other bits are ignored.
159    ///
160    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u64`], [`from_bits_iter`]
161    ///
162    /// [`from_bits`]: BitVec::from_bits
163    /// [`from_bits_u16`]: BitVec::from_bits_u16
164    /// [`from_bits_u64`]: BitVec::from_bits_u64
165    /// [`from_bits_iter`]: BitVec::from_bits_iter
166    #[must_use]
167    pub fn from_bits_u32(bits: &[u32]) -> Self {
168        let mut bv = Self::with_capacity(bits.len());
169        bits.iter().for_each(|&b| bv.append_bit_u32(b));
170        bv
171    }
172
173    /// Construct a bit vector from a set of bits given as distinct u64 values.
174    /// The constructor will take the least significant bit from each value and append it to a
175    /// bit vector.
176    /// All other bits are ignored.
177    ///
178    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_iter`]
179    ///
180    /// [`from_bits`]: BitVec::from_bits
181    /// [`from_bits_u16`]: BitVec::from_bits_u16
182    /// [`from_bits_u32`]: BitVec::from_bits_u32
183    /// [`from_bits_iter`]: BitVec::from_bits_iter
184    #[must_use]
185    pub fn from_bits_u64(bits: &[u64]) -> Self {
186        let mut bv = Self::with_capacity(bits.len());
187        bits.iter().for_each(|&b| bv.append_bit(b));
188        bv
189    }
190
191    /// Construct a bit vector from an iterator of bits.
192    /// The constructor will take the least significant bit from each value and append it to a
193    /// bit vector.
194    /// All other bits are ignored.
195    /// The iterator must yield values that can be converted into u64 values.
196    ///
197    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`]
198    ///
199    /// # Example
200    /// ```rust
201    /// use vers_vecs::BitVec;
202    ///
203    /// let bits = [true, false, true, true, true, true];
204    /// let bv = BitVec::from_bits_iter(bits.iter().copied());
205    ///
206    /// let bits = [0b1u8, 0b0, 0b1, 0b1, 0b1, 0b1];
207    /// let bv2 = BitVec::from_bits_iter(bits.iter().copied());
208    ///
209    /// assert_eq!(bv.len(), 6);
210    /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
211    /// assert_eq!(bv, bv2);
212    /// ```
213    ///
214    /// [`from_bits`]: BitVec::from_bits
215    /// [`from_bits_u16`]: BitVec::from_bits_u16
216    /// [`from_bits_u32`]: BitVec::from_bits_u32
217    /// [`from_bits_u64`]: BitVec::from_bits_u64
218    #[must_use]
219    pub fn from_bits_iter<I, E>(iter: I) -> Self
220    where
221        E: Into<u64>,
222        I: IntoIterator<Item = E>,
223    {
224        let iter = iter.into_iter();
225        let mut bv = Self::with_capacity(iter.size_hint().0);
226        for bit in iter {
227            bv.append_bit(bit.into());
228        }
229        bv
230    }
231
232    /// Construct a bit vector from a slice of u64 quad words.
233    /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
234    /// 64 bits to the bit vector).
235    /// Since the data is only cloned without any masking or transformation,
236    /// this is one of the fastest ways to create a bit vector.
237    ///
238    /// See also: [`from_vec`], [`from_limbs_iter`]
239    ///
240    /// # Example
241    /// ```rust
242    /// use vers_vecs::BitVec;
243    ///
244    /// let words = [0, 256, u64::MAX];
245    /// let bv = BitVec::from_limbs(&words);
246    ///
247    /// assert_eq!(bv.len(), 192);
248    /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
249    /// assert_eq!(bv.get(72), Some(1));
250    /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
251    /// ```
252    ///
253    /// [`from_vec`]: BitVec::from_vec
254    /// [`from_limbs_iter`]: BitVec::from_limbs_iter
255    #[must_use]
256    pub fn from_limbs(words: &[u64]) -> Self {
257        let len = words.len() * WORD_SIZE;
258        Self {
259            data: words.to_vec(),
260            len,
261        }
262    }
263
264    /// Construct a bit vector from an iterator of u64 quad words.
265    /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
266    /// 64 bits to the bit vector).
267    /// Since the data is only cloned without any masking or transformation,
268    /// this is one of the fastest ways to create a bit vector.
269    ///
270    /// See also: [`from_limbs`], [`from_vec`]
271    ///
272    /// # Example
273    /// ```rust
274    /// use std::iter::repeat;
275    /// use vers_vecs::BitVec;
276    ///
277    /// let zeros = repeat(0xaaaaaaaaaaaaaaaau64).take(10);
278    /// let bv = BitVec::from_limbs_iter(zeros);
279    ///
280    /// assert_eq!(bv.len(), 640);
281    /// for i in 0..640 {
282    ///    assert_eq!(bv.get(i), Some((i % 2 == 1) as u64));
283    /// }
284    /// ```
285    ///
286    /// [`from_limbs`]: BitVec::from_limbs
287    /// [`from_vec`]: BitVec::from_vec
288    pub fn from_limbs_iter<I, E>(iter: I) -> Self
289    where
290        E: Into<u64>,
291        I: IntoIterator<Item = E>,
292    {
293        let vec = iter.into_iter().map(Into::into).collect();
294        Self::from_vec(vec)
295    }
296
297    /// Construct a bit vector from a vector of u64 quad words.
298    /// The quad words are interpreted as limbs of the bit vector
299    /// (i.e. each quad word contributes 64 bits to the bit vector).
300    /// Since the data is moved without any masking or transformation, this is one of the fastest ways
301    /// to create a bit vector.
302    ///
303    /// See also: [`from_limbs`], [`from_limbs_iter`]
304    ///
305    /// # Example
306    /// ```rust
307    /// use vers_vecs::BitVec;
308    ///
309    /// let words = vec![0, 256, u64::MAX];
310    /// let bv = BitVec::from_vec(words);
311    ///
312    /// assert_eq!(bv.len(), 192);
313    /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
314    /// assert_eq!(bv.get(72), Some(1));
315    /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
316    /// ```
317    ///
318    /// [`from_limbs`]: BitVec::from_limbs
319    /// [`from_limbs_iter`]: BitVec::from_limbs_iter
320    #[must_use]
321    pub fn from_vec(data: Vec<u64>) -> Self {
322        let len = data.len() * WORD_SIZE;
323        Self { data, len }
324    }
325
326    /// Helper function for packing constructors to pack a slice of elements into a bit-vector,
327    /// taking the least significant `bits_per_element` per element.
328    fn pack_bits<T, const MAX_BITS: usize>(sequence: &[T], bits_per_element: usize) -> Self
329    where
330        T: Into<u64> + Copy,
331    {
332        let mut bv = Self::with_capacity(sequence.len() * bits_per_element);
333        for &word in sequence {
334            Self::pack_word_into_vector::<MAX_BITS>(&mut bv, word.into(), bits_per_element)
335        }
336        bv
337    }
338
339    /// Helper function for packing constructors to pack elements from an iterator into a bit-vector,
340    /// taking the least significant `bits_per_element` per element.
341    fn pack_bits_iter<T, I: IntoIterator<Item=T>, const MAX_BITS: usize>(iter: I, bits_per_element: usize) -> Self
342        where T: Into<u64> + Copy
343    {
344        let mut bv = Self::new();
345
346        for word in iter {
347            Self::pack_word_into_vector::<MAX_BITS>(&mut bv, word.into(), bits_per_element)
348        }
349
350        bv
351    }
352
353    /// Helper function for packing constructors to pack `num_bits` of a given word into the given
354    /// vector.
355    #[inline(always)]
356    fn pack_word_into_vector<const MAX_BITS: usize>(bv: &mut BitVec, word: u64, num_bits: usize) {
357        if num_bits <= MAX_BITS {
358            bv.append_bits(word.into(), num_bits);
359        } else {
360            bv.append_bits(word.into(), MAX_BITS);
361            let mut rest = num_bits - MAX_BITS;
362            while rest > 0 {
363                bv.append_bits(0, min(rest, MAX_BITS));
364                rest = rest.saturating_sub(MAX_BITS);
365            }
366        }
367    }
368
369
370    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
371    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
372    /// The number of bits per element is given by `bits_per_element`.
373    /// The sequence is given as a slice of u64 values.
374    /// If the number of bits per element is smaller than 64, the function takes the
375    /// least significant bits of each element, and discards the rest.
376    /// If the number of bits per element is larger than 64, the function will pad the elements
377    /// with zeros.
378    /// The function will append the bits of each element to the bit vector in the order they are
379    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
380    ///
381    /// See also: [`pack_sequence_u32`], [`pack_sequence_u16`], [`pack_sequence_u8`]
382    ///
383    /// # Example
384    /// ```rust
385    /// use vers_vecs::BitVec;
386    ///
387    /// let sequence = [0b1010u64, 0b1100u64, 0b1111u64];
388    /// let bv = BitVec::pack_sequence_u64(&sequence, 4);
389    ///
390    /// assert_eq!(bv.len(), 12);
391    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
392    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
393    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
394    /// ```
395    ///
396    /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
397    /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
398    /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
399    #[must_use]
400    pub fn pack_sequence_u64(sequence: &[u64], bits_per_element: usize) -> Self {
401        Self::pack_bits::<_, 64>(sequence, bits_per_element)
402    }
403
404    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
405    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
406    /// The number of bits per element is given by `bits_per_element`.
407    /// The sequence is given as a slice of u32 values.
408    /// If the number of bits per element is smaller than 32, the function takes the
409    /// least significant bits of each element, and discards the rest.
410    /// If the number of bits per element is larger than 32, the function will pad the elements
411    /// with zeros.
412    /// The function will append the bits of each element to the bit vector in the order they are
413    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
414    ///
415    /// See also: [`pack_sequence_u64`], [`pack_sequence_u16`], [`pack_sequence_u8`]
416    ///
417    /// # Example
418    /// ```rust
419    /// use vers_vecs::BitVec;
420    ///
421    /// let sequence = [0b1010u32, 0b1100u32, 0b1111u32];
422    /// let bv = BitVec::pack_sequence_u32(&sequence, 4);
423    ///
424    /// assert_eq!(bv.len(), 12);
425    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
426    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
427    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
428    /// ```
429    ///
430    /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
431    /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
432    /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
433    #[must_use]
434    pub fn pack_sequence_u32(sequence: &[u32], bits_per_element: usize) -> Self {
435        Self::pack_bits::<_, 32>(sequence, bits_per_element)
436    }
437
438    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
439    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
440    /// The number of bits per element is given by `bits_per_element`.
441    /// The sequence is given as a slice of u16 values.
442    /// If the number of bits per element is smaller than 16, the function takes the
443    /// least significant bits of each element, and discards the rest.
444    /// If the number of bits per element is larger than 16, the function will pad the elements
445    /// with zeros.
446    /// The function will append the bits of each element to the bit vector in the order they are
447    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
448    ///
449    /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u8`]
450    ///
451    /// # Example
452    /// ```rust
453    /// use vers_vecs::BitVec;
454    ///
455    /// let sequence = [0b1010u16, 0b1100u16, 0b1111u16];
456    /// let bv = BitVec::pack_sequence_u16(&sequence, 4);
457    ///
458    /// assert_eq!(bv.len(), 12);
459    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
460    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
461    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
462    /// ```
463    ///
464    /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
465    /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
466    /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
467    #[must_use]
468    pub fn pack_sequence_u16(sequence: &[u16], bits_per_element: usize) -> Self {
469        Self::pack_bits::<_, 16>(sequence, bits_per_element)
470    }
471
472    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
473    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
474    /// The number of bits per element is given by `bits_per_element`.
475    /// The sequence is given as a slice of u8 values.
476    /// If the number of bits per element is smaller than 8, the function takes the
477    /// least significant bits of each element, and discards the rest.
478    /// If the number of bits per element is larger than 8, the function will pad the elements
479    /// with zeros.
480    /// The function will append the bits of each element to the bit vector in the order they are
481    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
482    ///
483    /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u16`]
484    ///
485    /// # Example
486    /// ```rust
487    /// use vers_vecs::BitVec;
488    ///
489    /// let sequence = [0b1010u8, 0b1100u8, 0b1111u8];
490    /// let bv = BitVec::pack_sequence_u8(&sequence, 4);
491    ///
492    /// assert_eq!(bv.len(), 12);
493    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
494    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
495    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
496    /// ```
497    ///
498    /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
499    /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
500    /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
501    #[must_use]
502    pub fn pack_sequence_u8(sequence: &[u8], bits_per_element: usize) -> Self {
503        Self::pack_bits::<_, 8>(sequence, bits_per_element)
504    }
505
506    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
507    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
508    /// The number of bits per element is given by `bits_per_element`.
509    /// The sequence is given as an iterator of u64 values.
510    /// If the number of bits per element is smaller than 64, the function takes the
511    /// least significant bits of each element, and discards the rest.
512    /// If the number of bits per element is larger than 64, the function will pad the elements
513    /// with zeros.
514    /// The function will append the bits of each element to the bit vector in the order they are
515    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
516    ///
517    /// See also: [`pack_from_iter_u32`], [`pack_from_iter_u16`], [`pack_from_iter_u8`], or the
518    /// functions to pack from a slice: [`pack_sequence_u64`]
519    ///
520    /// # Example
521    /// ```rust
522    /// use vers_vecs::BitVec;
523    ///
524    /// let sequence = [0b1010u64, 0b1100u64, 0b1111u64];
525    /// let bv = BitVec::pack_from_iter_u64(sequence.into_iter(), 4);
526    ///
527    /// assert_eq!(bv.len(), 12);
528    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
529    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
530    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
531    /// ```
532    ///
533    /// [`pack_from_iter_u32`]: BitVec::pack_from_iter_u32
534    /// [`pack_from_iter_u16`]: BitVec::pack_from_iter_u16
535    /// [`pack_from_iter_u8`]: BitVec::pack_from_iter_u8
536    /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
537    pub fn pack_from_iter_u64<I: IntoIterator<Item=u64>>(iter: I, bits_per_element: usize) -> Self {
538        Self::pack_bits_iter::<_, _, 64>(iter, bits_per_element)
539    }
540
541    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
542    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
543    /// The number of bits per element is given by `bits_per_element`.
544    /// The sequence is given as an iterator of u32 values.
545    /// If the number of bits per element is smaller than 32, the function takes the
546    /// least significant bits of each element, and discards the rest.
547    /// If the number of bits per element is larger than 32, the function will pad the elements
548    /// with zeros.
549    /// The function will append the bits of each element to the bit vector in the order they are
550    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
551    ///
552    /// See also: [`pack_from_iter_u64`], [`pack_from_iter_u16`], [`pack_from_iter_u8`], or the
553    /// functions to pack from a slice: [`pack_sequence_u32`]
554    ///
555    /// # Example
556    /// ```rust
557    /// use vers_vecs::BitVec;
558    ///
559    /// let sequence = [0b1010u32, 0b1100u32, 0b1111u32];
560    /// let bv = BitVec::pack_from_iter_u32(sequence.into_iter(), 4);
561    ///
562    /// assert_eq!(bv.len(), 12);
563    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
564    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
565    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
566    /// ```
567    ///
568    /// [`pack_from_iter_u64`]: BitVec::pack_from_iter_u64
569    /// [`pack_from_iter_u16`]: BitVec::pack_from_iter_u16
570    /// [`pack_from_iter_u8`]: BitVec::pack_from_iter_u8
571    /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
572    pub fn pack_from_iter_u32<I: IntoIterator<Item=u32>>(iter: I, bits_per_element: usize) -> Self {
573        Self::pack_bits_iter::<_, _, 32>(iter, bits_per_element)
574    }
575
576    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
577    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
578    /// The number of bits per element is given by `bits_per_element`.
579    /// The sequence is given as an iterator of u16 values.
580    /// If the number of bits per element is smaller than 16, the function takes the
581    /// least significant bits of each element, and discards the rest.
582    /// If the number of bits per element is larger than 16, the function will pad the elements
583    /// with zeros.
584    /// The function will append the bits of each element to the bit vector in the order they are
585    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
586    ///
587    /// See also: [`pack_from_iter_u64`], [`pack_from_iter_u32`], [`pack_from_iter_u8`], or the
588    /// functions to pack from a slice: [`pack_sequence_u16`]
589    ///
590    /// # Example
591    /// ```rust
592    /// use vers_vecs::BitVec;
593    ///
594    /// let sequence = [0b1010u16, 0b1100u16, 0b1111u16];
595    /// let bv = BitVec::pack_from_iter_u16(sequence.into_iter(), 4);
596    ///
597    /// assert_eq!(bv.len(), 12);
598    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
599    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
600    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
601    /// ```
602    ///
603    /// [`pack_from_iter_u64`]: BitVec::pack_from_iter_u64
604    /// [`pack_from_iter_u32`]: BitVec::pack_from_iter_u32
605    /// [`pack_from_iter_u8`]: BitVec::pack_from_iter_u8
606    /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
607    pub fn pack_from_iter_u16<I: IntoIterator<Item=u16>>(iter: I, bits_per_element: usize) -> Self {
608        Self::pack_bits_iter::<_, _, 16>(iter, bits_per_element)
609    }
610
611    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
612    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
613    /// The number of bits per element is given by `bits_per_element`.
614    /// The sequence is given as an iterator of u8 values.
615    /// If the number of bits per element is smaller than 8, the function takes the
616    /// least significant bits of each element, and discards the rest.
617    /// If the number of bits per element is larger than 8, the function will pad the elements
618    /// with zeros.
619    /// The function will append the bits of each element to the bit vector in the order they are
620    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
621    ///
622    /// See also: [`pack_from_iter_u64`], [`pack_from_iter_u32`], [`pack_from_iter_u16`], or the
623    /// functions to pack from a slice: [`pack_sequence_u8`]
624    ///
625    /// # Example
626    /// ```rust
627    /// use vers_vecs::BitVec;
628    ///
629    /// let sequence = [0b1010u8, 0b1100u8, 0b1111u8];
630    /// let bv = BitVec::pack_from_iter_u8(sequence.into_iter(), 4);
631    ///
632    /// assert_eq!(bv.len(), 12);
633    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
634    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
635    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
636    /// ```
637    ///
638    /// [`pack_from_iter_u64`]: BitVec::pack_from_iter_u64
639    /// [`pack_from_iter_u32`]: BitVec::pack_from_iter_u32
640    /// [`pack_from_iter_u16`]: BitVec::pack_from_iter_u16
641    /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
642    pub fn pack_from_iter_u8<I: IntoIterator<Item=u8>>(iter: I, bits_per_element: usize) -> Self {
643        Self::pack_bits_iter::<_, _, 8>(iter, bits_per_element)
644    }
645
646    /// Construct a bit vector from bits given as boolean values in a slice.
647    /// The function will append the bits to the bit vector in the order they are
648    /// given in the sequence (i.e. the first bool is the first bit of the vector).
649    ///
650    /// # Example
651    /// ```rust
652    /// use vers_vecs::BitVec;
653    ///
654    /// let sequence = [true, false, true, true];
655    /// let bv = BitVec::from_bools(&sequence);
656    ///
657    /// assert_eq!(bv.len(), 4);
658    /// assert_eq!(bv.is_bit_set(0), Some(true));
659    /// assert_eq!(bv.is_bit_set(1), Some(false));
660    /// assert_eq!(bv.is_bit_set(2), Some(true));
661    /// assert_eq!(bv.is_bit_set(3), Some(true));
662    /// ```
663    pub fn from_bools(bools: &[bool]) -> Self {
664        let mut bv = BitVec::with_capacity(bools.len());
665        bools.iter().for_each(|&b| bv.append(b));
666        bv
667    }
668
669    /// Construct a bit vector from bits given as boolean values from an iterator.
670    /// The function will append the bits to the bit vector in the order they are
671    /// given in the sequence (i.e. the first bool is the first bit of the vector).
672    ///
673    /// # Example
674    /// ```rust
675    /// use vers_vecs::BitVec;
676    ///
677    /// let sequence = [true, false, true, true];
678    /// let bv = BitVec::from_bool_iter(sequence.into_iter());
679    ///
680    /// assert_eq!(bv.len(), 4);
681    /// assert_eq!(bv.is_bit_set(0), Some(true));
682    /// assert_eq!(bv.is_bit_set(1), Some(false));
683    /// assert_eq!(bv.is_bit_set(2), Some(true));
684    /// assert_eq!(bv.is_bit_set(3), Some(true));
685    /// ```
686    pub fn from_bool_iter<I: IntoIterator<Item=bool>>(iter: I) -> Self {
687        let mut bv = BitVec::new();
688        iter.into_iter().for_each(|b| bv.append(b));
689        bv
690    }
691
692    /// Append a bit encoded as a `bool` to the bit vector, where `true` means 1 and `false` means 0.
693    ///
694    /// See also: [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
695    ///
696    /// # Example
697    ///
698    /// ```rust
699    /// use vers_vecs::BitVec;
700    ///
701    /// let mut bv = BitVec::new();
702    /// bv.append(true);
703    ///
704    /// assert_eq!(bv.len(), 1);
705    /// assert_eq!(bv.get(0), Some(1));
706    /// ```
707    ///
708    /// [`append_bit`]: BitVec::append_bit
709    /// [`append_bit_u32`]: BitVec::append_bit_u32
710    /// [`append_bit_u16`]: BitVec::append_bit_u16
711    /// [`append_bit_u8`]: BitVec::append_bit_u8
712    /// [`append_word`]: BitVec::append_word
713    pub fn append(&mut self, bit: bool) {
714        if self.len.is_multiple_of(WORD_SIZE) {
715            self.data.push(0);
716        }
717        if bit {
718            self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
719        } else {
720            self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
721        }
722        self.len += 1;
723    }
724
725    /// Drop the last n bits from the bit vector. If more bits are dropped than the bit vector
726    /// contains, the bit vector is cleared.
727    ///
728    /// # Example
729    ///
730    /// ```rust
731    /// use vers_vecs::BitVec;
732    ///
733    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
734    /// bv.drop_last(3);
735    ///
736    /// assert_eq!(bv.len(), 3);
737    /// assert_eq!(bv.get_bits(0, 3), Some(0b101u64));
738    ///
739    /// bv.drop_last(4);
740    ///
741    /// assert!(bv.is_empty());
742    /// ```
743    pub fn drop_last(&mut self, n: usize) {
744        if n > self.len {
745            self.data.clear();
746            self.len = 0;
747            return;
748        }
749
750        let new_limb_count = (self.len - n).div_ceil(WORD_SIZE);
751
752        // cut off limbs that we no longer need
753        if new_limb_count < self.data.len() {
754            self.data.truncate(new_limb_count);
755        }
756
757        // update bit vector length
758        self.len -= n;
759    }
760
761    /// Append a bit encoded in a u64.
762    /// The least significant bit is appended to the bit vector.
763    /// All other bits are ignored.
764    ///
765    /// See also: [`append`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
766    ///
767    /// # Example
768    ///
769    /// ```rust
770    /// use vers_vecs::BitVec;
771    ///
772    /// let mut bv = BitVec::new();
773    ///
774    /// bv.append_bit(1);
775    /// bv.append_bit(0);
776    ///
777    /// assert_eq!(bv.len(), 2);
778    /// assert_eq!(bv.get(0), Some(1));
779    /// assert_eq!(bv.get(1), Some(0));
780    /// ```
781    ///
782    /// [`append`]: BitVec::append
783    /// [`append_bit_u32`]: BitVec::append_bit_u32
784    /// [`append_bit_u16`]: BitVec::append_bit_u16
785    /// [`append_bit_u8`]: BitVec::append_bit_u8
786    /// [`append_word`]: BitVec::append_word
787    pub fn append_bit(&mut self, bit: u64) {
788        if self.len.is_multiple_of(WORD_SIZE) {
789            self.data.push(0);
790        }
791        if bit % 2 == 1 {
792            self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
793        } else {
794            self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
795        }
796
797        self.len += 1;
798    }
799
800    /// Append a bit from a u32. The least significant bit is appended to the bit vector.
801    /// All other bits are ignored.
802    ///
803    /// See also: [`append`], [`append_bit`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
804    ///
805    /// [`append`]: BitVec::append
806    /// [`append_bit`]: BitVec::append_bit
807    /// [`append_bit_u16`]: BitVec::append_bit_u16
808    /// [`append_bit_u8`]: BitVec::append_bit_u8
809    /// [`append_word`]: BitVec::append_word
810    pub fn append_bit_u32(&mut self, bit: u32) {
811        self.append_bit(u64::from(bit));
812    }
813
814    /// Append a bit from a u16. The least significant bit is appended to the bit vector.
815    /// All other bits are ignored.
816    ///
817    /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u8`], [`append_word`]
818    ///
819    /// [`append`]: BitVec::append
820    /// [`append_bit`]: BitVec::append_bit
821    /// [`append_bit_u32`]: BitVec::append_bit_u32
822    /// [`append_bit_u8`]: BitVec::append_bit_u8
823    /// [`append_word`]: BitVec::append_word
824    pub fn append_bit_u16(&mut self, bit: u16) {
825        self.append_bit(u64::from(bit));
826    }
827
828    /// Append a bit from a u8. The least significant bit is appended to the bit vector.
829    /// All other bits are ignored.
830    ///
831    /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_word`]
832    ///
833    /// [`append`]: BitVec::append
834    /// [`append_bit`]: BitVec::append_bit
835    /// [`append_bit_u32`]: BitVec::append_bit_u32
836    /// [`append_bit_u16`]: BitVec::append_bit_u16
837    /// [`append_word`]: BitVec::append_word
838    pub fn append_bit_u8(&mut self, bit: u8) {
839        self.append_bit(u64::from(bit));
840    }
841
842    /// Append a word to the bit vector. The bits are appended in little endian order (i.e. the first
843    /// bit of the word is appended first).
844    ///
845    /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`]
846    ///
847    /// # Example
848    ///
849    /// ```rust
850    /// use vers_vecs::BitVec;
851    ///
852    /// let mut bv = BitVec::new();
853    /// bv.append_word(0b1010_1010_1010_1010u64);
854    ///
855    /// assert_eq!(bv.len(), 64);
856    /// for i in 0..64 {
857    ///    assert_eq!(bv.get(i), Some((0b1010_1010_1010_1010u64 >> i) & 1));
858    /// }
859    /// ```
860    ///
861    /// [`append`]: BitVec::append
862    /// [`append_bit`]: BitVec::append_bit
863    /// [`append_bit_u32`]: BitVec::append_bit_u32
864    /// [`append_bit_u16`]: BitVec::append_bit_u16
865    /// [`append_bit_u8`]: BitVec::append_bit_u8
866    pub fn append_word(&mut self, word: u64) {
867        if self.len.is_multiple_of(WORD_SIZE) {
868            self.data.push(word);
869        } else {
870            // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
871            self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
872            self.data[self.len / WORD_SIZE] |= word << (self.len % WORD_SIZE);
873
874            self.data.push(word >> (WORD_SIZE - self.len % WORD_SIZE));
875        }
876        self.len += WORD_SIZE;
877    }
878
879    /// Append multiple bits to the bit vector.
880    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
881    /// The number of bits to append is given by `len`. The bits are taken from the least
882    /// significant bits of `bits`.
883    /// All other bits are ignored.
884    ///
885    /// # Example
886    ///
887    /// ```rust
888    /// use vers_vecs::BitVec;
889    ///
890    /// let mut bv = BitVec::new();
891    /// bv.append_bits(0b1010_1010_1010_1010u64, 16);
892    ///
893    /// assert_eq!(bv.len(), 16);
894    /// assert_eq!(bv.get_bits(0, 16), Some(0b1010_1010_1010_1010u64));
895    /// ```
896    ///
897    /// # Panics
898    /// Panics if `len` is larger than 64.
899    pub fn append_bits(&mut self, bits: u64, len: usize) {
900        assert!(len <= 64, "Cannot append more than 64 bits");
901
902        if self.len.is_multiple_of(WORD_SIZE) {
903            self.data.push(bits);
904        } else {
905            // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
906            self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
907            self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
908
909            if self.len % WORD_SIZE + len > WORD_SIZE {
910                self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
911            }
912        }
913        self.len += len;
914    }
915
916    /// Append multiple bits to the bit vector.
917    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
918    /// The number of bits to append is given by `len`. The bits are taken from the least
919    /// significant bits of `bits`.
920    ///
921    /// This function does not check if `len` is larger than 64.
922    ///
923    /// Furthermore, if the bit-vector has trailing bits that are not zero
924    /// (i.e. the length is not a multiple of 64, and those bits are partially set),
925    /// the function will OR the new data with the trailing bits, destroying the appended data.
926    /// This can happen, if a call `append_bits[_unchecked](word, len)` appends a word which has
927    /// set bits beyond the `len - 1`-th bit,
928    /// or if bits have been dropped from the bit vector using [`drop_last`].
929    ///
930    /// See [`append_bits`] for a checked version of this function.
931    ///
932    /// # Panics
933    /// If `len` is larger than 64, the behavior is platform-dependent, and a processor
934    /// exception might be triggered.
935    ///
936    /// [`append_bits`]: BitVec::append_bits
937    /// [`drop_last`]: BitVec::drop_last
938    pub fn append_bits_unchecked(&mut self, bits: u64, len: usize) {
939        if self.len.is_multiple_of(WORD_SIZE) {
940            self.data.push(bits);
941        } else {
942            self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
943
944            if self.len % WORD_SIZE + len > WORD_SIZE {
945                self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
946            }
947        }
948        self.len += len;
949    }
950
951    /// Append the bits of another bit vector to the end of this vector.
952    /// If this vector does not contain a multiple of 64 bits, the appended limbs need to be
953    /// shifted to the left.
954    /// This function is guaranteed to reallocate the underlying vector at most once.
955    pub fn extend_bitvec(&mut self, other: &Self) {
956        // reserve space for the new bits, ensuring at most one re-allocation
957        self.data
958            .reserve((self.len + other.len).div_ceil(WORD_SIZE) - self.data.len());
959
960        let full_limbs = other.len() / WORD_SIZE;
961        for i in 0..full_limbs {
962            self.append_bits(other.data[i], WORD_SIZE);
963        }
964
965        let partial_bits = other.len % WORD_SIZE;
966        if partial_bits > 0 {
967            self.append_bits(other.data[full_limbs], partial_bits);
968        }
969    }
970
971    /// Return the length of the bit vector. The length is measured in bits.
972    #[must_use]
973    pub fn len(&self) -> usize {
974        self.len
975    }
976
977    /// Return whether the bit vector is empty (contains no bits).
978    #[must_use]
979    pub fn is_empty(&self) -> bool {
980        self.len == 0
981    }
982
983    /// Flip the bit at the given position.
984    ///
985    /// # Example
986    ///
987    /// ```rust
988    /// use vers_vecs::BitVec;
989    ///
990    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
991    /// bv.flip_bit(1);
992    ///
993    /// assert_eq!(bv.len(), 6);
994    /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
995    /// ```
996    ///
997    /// # Panics
998    /// If the position is larger than the length of the vector, the function panics.
999    pub fn flip_bit(&mut self, pos: usize) {
1000        assert!(pos < self.len, "Index out of bounds");
1001        self.flip_bit_unchecked(pos);
1002    }
1003
1004    /// Flip the bit at the given position.
1005    ///
1006    /// See also: [`flip_bit`]
1007    ///
1008    /// # Panics
1009    /// If the position is larger than the length of the
1010    /// vector, the function will either modify unused memory or panic.
1011    /// This will not corrupt memory.
1012    ///
1013    /// [`flip_bit`]: BitVec::flip_bit
1014    pub fn flip_bit_unchecked(&mut self, pos: usize) {
1015        self.data[pos / WORD_SIZE] ^= 1 << (pos % WORD_SIZE);
1016    }
1017
1018    /// Return the bit at the given position.
1019    /// The bit is encoded in the least significant bit of a u64 value.
1020    /// If the position is larger than the length of the vector, None is returned.
1021    ///
1022    /// See also: [`get_unchecked`]
1023    ///
1024    /// # Example
1025    ///
1026    /// ```rust
1027    /// use vers_vecs::BitVec;
1028    ///
1029    /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
1030    ///
1031    /// assert_eq!(bv.get(1), Some(0));
1032    /// assert_eq!(bv.get(2), Some(1));
1033    /// ```
1034    ///
1035    /// [`get_unchecked`]: Self::get_unchecked
1036    #[must_use]
1037    pub fn get(&self, pos: usize) -> Option<u64> {
1038        if pos >= self.len {
1039            None
1040        } else {
1041            Some(self.get_unchecked(pos))
1042        }
1043    }
1044
1045    /// Return the bit at the given position.
1046    /// The bit is encoded in the least significant bit of a u64 value.
1047    ///
1048    /// # Panics
1049    /// If the position is larger than the length of the vector,
1050    /// the function will either return unpredictable data, or panic.
1051    /// Use [`get`] to properly handle this case with an `Option`.
1052    ///
1053    /// [`get`]: BitVec::get
1054    #[must_use]
1055    pub fn get_unchecked(&self, pos: usize) -> u64 {
1056        (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
1057    }
1058
1059    /// Set the bit at the given position.
1060    /// The bit is encoded in the least significant bit of a u64 value.
1061    ///
1062    /// See also: [`set_unchecked`]
1063    ///
1064    /// # Example
1065    ///
1066    /// ```rust
1067    /// use vers_vecs::BitVec;
1068    ///
1069    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
1070    /// bv.set(1, 1).unwrap();
1071    ///
1072    /// assert_eq!(bv.len(), 6);
1073    /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
1074    /// ```
1075    ///
1076    /// # Errors
1077    /// If the position is out of range, the function will return `Err` with an error message,
1078    /// otherwise it will return an empty `Ok`.
1079    ///
1080    /// [`set_unchecked`]: BitVec::set_unchecked
1081    pub fn set(&mut self, pos: usize, value: u64) -> Result<(), &str> {
1082        if pos >= self.len {
1083            Err("out of range")
1084        } else {
1085            self.set_unchecked(pos, value);
1086            Ok(())
1087        }
1088    }
1089
1090    /// Set the bit at the given position.
1091    /// The bit is encoded in the least significant bit of a u64 value.
1092    ///
1093    /// # Panics
1094    /// If the position is larger than the length of the vector,
1095    /// the function will either do nothing, or panic.
1096    /// Use [`set`] to properly handle this case with a `Result`.
1097    ///
1098    /// [`set`]: BitVec::set
1099    pub fn set_unchecked(&mut self, pos: usize, value: u64) {
1100        self.data[pos / WORD_SIZE] = (self.data[pos / WORD_SIZE] & !(0x1 << (pos % WORD_SIZE)))
1101            | ((value & 0x1) << (pos % WORD_SIZE));
1102    }
1103
1104    /// Return whether the bit at the given position is set.
1105    /// If the position is larger than the length of the vector, None is returned.
1106    ///
1107    /// See also: [`is_bit_set_unchecked`]
1108    ///
1109    /// # Example
1110    ///
1111    /// ```rust
1112    /// use vers_vecs::BitVec;
1113    ///
1114    /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
1115    ///
1116    /// assert!(!bv.is_bit_set(1).unwrap());
1117    /// assert!(bv.is_bit_set(2).unwrap());
1118    /// ```
1119    ///
1120    /// [`is_bit_set_unchecked`]: BitVec::is_bit_set_unchecked
1121    #[must_use]
1122    pub fn is_bit_set(&self, pos: usize) -> Option<bool> {
1123        if pos >= self.len {
1124            None
1125        } else {
1126            Some(self.is_bit_set_unchecked(pos))
1127        }
1128    }
1129
1130    /// Return whether the bit at the given position is set.
1131    ///
1132    /// # Panics
1133    /// If the position is larger than the length of the vector,
1134    /// the function will either return unpredictable data, or panic.
1135    /// Use [`is_bit_set`] to properly handle this case with an `Option`.
1136    ///
1137    /// [`is_bit_set`]: BitVec::is_bit_set
1138    #[must_use]
1139    pub fn is_bit_set_unchecked(&self, pos: usize) -> bool {
1140        self.get_unchecked(pos) != 0
1141    }
1142
1143    /// Return multiple bits at the given position.
1144    /// The number of bits to return is given by `len`.
1145    /// At most 64 bits can be returned.
1146    /// If the position at the end of the query is larger than the length of the vector,
1147    /// None is returned (even if the query partially overlaps with the vector).
1148    /// If the length of the query is larger than 64, None is returned.
1149    ///
1150    /// The first bit at `pos` is the most significant bit of the return value
1151    /// limited to `len` bits.
1152    #[must_use]
1153    pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
1154        if len > WORD_SIZE || len == 0 {
1155            return None;
1156        }
1157        if pos + len > self.len {
1158            None
1159        } else {
1160            Some(self.get_bits_unchecked(pos, len))
1161        }
1162    }
1163
1164    /// Return multiple bits at the given position. The number of bits to return is given by `len`.
1165    /// At most 64 bits can be returned.
1166    ///
1167    /// This function is always inlined, because it gains a lot from loop optimization and
1168    /// can utilize the processor pre-fetcher better if it is.
1169    ///
1170    /// # Errors
1171    /// If the length of the query is larger than 64, unpredictable data will be returned.
1172    /// Use [`get_bits`] to avoid this.
1173    ///
1174    /// # Panics
1175    /// If the position or interval is larger than the length of the vector,
1176    /// the function will either return any valid results padded with unpredictable
1177    /// data or panic.
1178    ///
1179    /// [`get_bits`]: BitVec::get_bits
1180    #[must_use]
1181    #[allow(clippy::inline_always)]
1182    #[allow(clippy::comparison_chain)] // readability
1183    #[inline(always)] // inline to gain loop optimization and pipeline advantages for elias fano
1184    #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen
1185    pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
1186        debug_assert!(len <= WORD_SIZE);
1187        let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
1188        if pos % WORD_SIZE + len <= WORD_SIZE {
1189            partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
1190        } else {
1191            (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
1192                & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
1193        }
1194    }
1195
1196    /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
1197    /// `index`. The number of bits per encoded element is given by `n`.
1198    ///
1199    /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
1200    /// and is equivalent to calling [`get_bits(index * n, n)`].
1201    /// It is thus safe to use this method with any index and any size n <= 64.
1202    ///
1203    /// If the element is out of bounds, None is returned.
1204    /// The element is returned as a u64 value.
1205    ///
1206    /// # Example
1207    /// ```rust
1208    /// use vers_vecs::BitVec;
1209    ///
1210    /// let sequence = [10, 100, 124, 45, 223];
1211    /// let bv = BitVec::pack_sequence_u64(&sequence, 8);
1212    ///
1213    /// assert_eq!(bv.unpack_element(0, 8), Some(10));
1214    /// assert_eq!(bv.unpack_element(2, 8), Some(124));
1215    /// ```
1216    ///
1217    /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1218    /// [`get_bits(index * n, n)`]: BitVec::get_bits
1219    #[must_use]
1220    #[allow(clippy::inline_always)]
1221    #[inline(always)] // to gain optimization if n is constant
1222    pub fn unpack_element(&self, index: usize, n: usize) -> Option<u64> {
1223        self.get_bits(index * n, n)
1224    }
1225
1226    /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
1227    /// `index`. The number of bits per encoded element is given by `n`.
1228    ///
1229    /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
1230    /// and is equivalent to calling [`get_bits_unchecked(index * n, n)`].
1231    /// It is thus safe to use this method with any index where `index * n + n` is in-bounds,
1232    /// and any size n <= 64.
1233    ///
1234    /// # Panics
1235    /// If the element is out of bounds, the function will either return unpredictable data or panic.
1236    /// Use [`unpack_element`] for a checked version of this function.
1237    ///
1238    /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1239    /// [`get_bits_unchecked(index * n, n)`]: BitVec::get_bits_unchecked
1240    /// [`unpack_element`]: BitVec::unpack_element
1241    #[must_use]
1242    #[allow(clippy::inline_always)]
1243    #[inline(always)] // to gain optimization if n is constant
1244    pub fn unpack_element_unchecked(&self, index: usize, n: usize) -> u64 {
1245        self.get_bits_unchecked(index * n, n)
1246    }
1247
1248    /// Return the number of ones in the bit vector. Since the bit vector doesn't store additional
1249    /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1250    ///
1251    /// [`RsVec`]: crate::RsVec
1252    #[must_use]
1253    #[allow(clippy::missing_panics_doc)] // can't panic because of manual bounds check
1254    pub fn count_ones(&self) -> u64 {
1255        let mut ones: u64 = self.data[0..self.len / WORD_SIZE]
1256            .iter()
1257            .map(|limb| u64::from(limb.count_ones()))
1258            .sum();
1259        if !self.len.is_multiple_of(WORD_SIZE) {
1260            ones += u64::from(
1261                (self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1)).count_ones(),
1262            );
1263        }
1264        ones
1265    }
1266
1267    /// Return the number of zeros in the bit vector. Since the bit vector doesn't store additional
1268    /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1269    /// This method calls [`count_ones`].
1270    ///
1271    /// [`RsVec`]: crate::RsVec
1272    /// [`count_ones`]: BitVec::count_ones
1273    #[must_use]
1274    pub fn count_zeros(&self) -> u64 {
1275        self.len as u64 - self.count_ones()
1276    }
1277
1278    /// Mask this bit vector with another bitvector using bitwise or. The mask is applied lazily
1279    /// whenever an operation on the resulting vector is performed.
1280    ///
1281    /// # Errors
1282    /// Returns an error if the length of the vector doesn't match the mask length.
1283    #[inline]
1284    pub fn mask_or<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1285        MaskedBitVec::new(self, mask, |a, b| a | b)
1286    }
1287
1288    /// Mask this bit vector with another bitvector using bitwise or.
1289    /// The mask is applied immediately, unlike in [`mask_or`].
1290    ///
1291    /// # Errors
1292    /// Returns an error if the length of the vector doesn't match the mask length.
1293    ///
1294    /// [`mask_or`]: BitVec::mask_or
1295    pub fn apply_mask_or(&mut self, mask: &BitVec) -> Result<(), String> {
1296        if self.len != mask.len {
1297            return Err(String::from(
1298                "mask cannot have different length than vector",
1299            ));
1300        }
1301
1302        for i in 0..self.data.len() {
1303            self.data[i] |= mask.data[i];
1304        }
1305
1306        Ok(())
1307    }
1308
1309    /// Mask this bit vector with another bitvector using bitwise and. The mask is applied lazily
1310    /// whenever an operation on the resulting vector is performed.
1311    ///
1312    /// # Errors
1313    /// Returns an error if the length of the vector doesn't match the mask length.
1314    #[inline]
1315    pub fn mask_and<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1316        MaskedBitVec::new(self, mask, |a, b| a & b)
1317    }
1318
1319    /// Mask this bit vector with another bitvector using bitwise and.
1320    /// The mask is applied immediately, unlike in [`mask_and`].
1321    ///
1322    /// # Errors
1323    /// Returns an error if the length of the vector doesn't match the mask length.
1324    ///
1325    /// [`mask_and`]: BitVec::mask_and
1326    pub fn apply_mask_and(&mut self, mask: &BitVec) -> Result<(), String> {
1327        if self.len != mask.len {
1328            return Err(String::from(
1329                "mask cannot have different length than vector",
1330            ));
1331        }
1332
1333        for i in 0..self.data.len() {
1334            self.data[i] &= mask.data[i];
1335        }
1336
1337        Ok(())
1338    }
1339
1340    /// Mask this bit vector with another bitvector using bitwise xor. The mask is applied lazily
1341    /// whenever an operation on the resulting vector is performed.
1342    ///
1343    /// # Errors
1344    /// Returns an error if the length of the vector doesn't match the mask length.
1345    #[inline]
1346    pub fn mask_xor<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1347        MaskedBitVec::new(self, mask, |a, b| a ^ b)
1348    }
1349
1350    /// Mask this bit vector with another bitvector using bitwise xor.
1351    /// The mask is applied immediately, unlike in [`mask_xor`].
1352    ///
1353    /// # Errors
1354    /// Returns an error if the length of the vector doesn't match the mask length.
1355    ///
1356    /// [`mask_xor`]: BitVec::mask_xor
1357    pub fn apply_mask_xor(&mut self, mask: &BitVec) -> Result<(), String> {
1358        if self.len != mask.len {
1359            return Err(String::from(
1360                "mask cannot have different length than vector",
1361            ));
1362        }
1363
1364        for i in 0..self.data.len() {
1365            self.data[i] ^= mask.data[i];
1366        }
1367
1368        Ok(())
1369    }
1370
1371    /// Mask this bit vector with another bitvector using a custom masking operation. The mask is
1372    /// applied lazily whenever an operation on the resulting vector is performed.
1373    ///
1374    /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1375    /// The last block of a bit vector might contain fewer bits, and will be padded with
1376    /// unpredictable data. Implementations may choose to modify those padding bits without
1377    /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1378    /// within the vector is unspecified.
1379    ///
1380    /// # Errors
1381    /// Returns an error if the length of the vector doesn't match the mask length.
1382    #[inline]
1383    pub fn mask_custom<'s, 'b, F>(
1384        &'s self,
1385        mask: &'b BitVec,
1386        mask_op: F,
1387    ) -> Result<MaskedBitVec<'s, 'b, F>, String>
1388    where
1389        F: Fn(u64, u64) -> u64,
1390    {
1391        MaskedBitVec::new(self, mask, mask_op)
1392    }
1393
1394    /// Mask this bit vector with another bitvector using a custom masking operation.
1395    /// The mask is applied immediately, unlike in [`mask_custom`].
1396    ///
1397    /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1398    /// The last block of a bit vector might contain fewer bits, and will be padded with
1399    /// unpredictable data. Implementations may choose to modify those padding bits without
1400    /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1401    /// within the vector is unspecified.
1402    ///
1403    /// # Errors
1404    /// Returns an error if the length of the vector doesn't match the mask length.
1405    ///
1406    /// [`mask_custom`]: BitVec::mask_custom
1407    #[inline]
1408    pub fn apply_mask_custom(
1409        &mut self,
1410        mask: &BitVec,
1411        mask_op: fn(u64, u64) -> u64,
1412    ) -> Result<(), String> {
1413        if self.len != mask.len {
1414            return Err(String::from(
1415                "mask cannot have different length than vector",
1416            ));
1417        }
1418
1419        for i in 0..self.data.len() {
1420            self.data[i] = mask_op(self.data[i], mask.data[i]);
1421        }
1422
1423        Ok(())
1424    }
1425
1426    /// Returns the number of bytes on the heap for this vector.
1427    /// Does not include allocated memory that isn't used.
1428    #[must_use]
1429    pub fn heap_size(&self) -> usize {
1430        self.data.len() * size_of::<u64>()
1431    }
1432
1433    /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1434    /// right half the remaining bits `at..`. If the split index is larger than the length of the
1435    /// vector, the vector is returned unmodified in an `Err` variant.
1436    ///
1437    /// # Errors
1438    /// If the index is out of bounds, the function will return an error
1439    /// containing the original vector.
1440    ///
1441    /// See also: [`split_at_unchecked`]
1442    ///
1443    /// [`split_at_unchecked`]: Self::split_at_unchecked
1444    pub fn split_at(self, at: usize) -> Result<(Self, Self), Self> {
1445        if at > self.len {
1446            Err(self)
1447        } else {
1448            Ok(self.split_at_unchecked(at))
1449        }
1450    }
1451
1452    /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1453    /// right half the remaining bits `at..`.
1454    ///
1455    /// # Panics
1456    /// If the index is larger than the length of the vector the function will panic or run
1457    /// out of memory.
1458    /// Use [`split_at`] to properly handle this case.
1459    ///
1460    /// [`split_at`]: Self::split_at
1461    #[must_use]
1462    pub fn split_at_unchecked(mut self, at: usize) -> (Self, Self) {
1463        let other_len = self.len - at;
1464        let mut other = Self::with_capacity(other_len);
1465
1466        if other_len == 0 {
1467            return (self, other);
1468        }
1469
1470        let first_limb = at / WORD_SIZE;
1471        let last_limb = self.len / WORD_SIZE;
1472
1473        // First, we figure out the number of bits from the first limb to retain in this vector:
1474        let leading_partial = at % WORD_SIZE;
1475
1476        // If the split point is in the last limb, and the vector ends before the last bit, first_limb
1477        // and last_limb will be equal, and the other half is simply other_len bits off the limb
1478        // right shifted by the number of bits to retain in this vector.
1479        if first_limb == last_limb {
1480            other.append_bits_unchecked(self.data[first_limb] >> leading_partial, other_len);
1481        } else {
1482            // Otherwise, some range n..last_limb should be copied in their entirety to the other half,
1483            // with n=first_limb+1 if the split point is inside the first limb (leading_partial > 0), or
1484            // n=first_limb if the entire first limb belongs in the other half.
1485            let full_limbs = if leading_partial > 0 {
1486                // If the split point is inside the first limb, we also have to remember to copy over
1487                // the trailing bits to the new vector.
1488                other.append_bits_unchecked(
1489                    self.data[first_limb] >> leading_partial,
1490                    WORD_SIZE - leading_partial,
1491                );
1492                first_limb + 1..last_limb
1493            } else {
1494                first_limb..last_limb
1495            };
1496
1497            // Copy over any full limbs.
1498            for i in full_limbs {
1499                other.append_bits_unchecked(self.data[i], WORD_SIZE);
1500            }
1501
1502            // Finally, if the vector has a partially filled last limb, we need to put those bits
1503            // in the other half.
1504            let trailing_partial = self.len % WORD_SIZE;
1505            if trailing_partial > 0 {
1506                other.append_bits_unchecked(self.data[last_limb], trailing_partial);
1507            }
1508        }
1509
1510        // remove the copied bits from the original vector
1511        self.drop_last(other_len);
1512
1513        (self, other)
1514    }
1515
1516    /// Iterate through the u64 limbs of the bitvector.
1517    /// The limbs are encoded starting at the least significant bit (i.e., the first bit in the bit
1518    /// vector is the least significant bit in the first limb).
1519    /// Note that the last limb may be incomplete, if the number of bits in the vector is not
1520    /// divisible by 64.
1521    /// In this case, the least significant `self.len() % 64` bits are the correct bits.
1522    /// The remaining bits of the last limb are in an unspecified state and code should not rely
1523    /// on them being set or unset.
1524    #[inline]
1525    pub fn iter_limbs(&self) -> impl Iterator<Item = u64> + use<'_> {
1526        self.data.iter().copied()
1527    }
1528}
1529
1530impl_vector_iterator! { BitVec, BitVecIter, BitVecRefIter }
1531
1532/// Create a new bit vector from a slice of u64 values.
1533/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1534/// The function will append the bits of each element to the bit vector in the order they are
1535/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1536impl From<&[u64]> for BitVec {
1537    fn from(data: &[u64]) -> Self {
1538        BitVec::from_limbs(data)
1539    }
1540}
1541
1542/// Create a new bit vector from a slice of u64 values.
1543/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1544/// The function will append the bits of each element to the bit vector in the order they are
1545/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1546impl From<Vec<u64>> for BitVec {
1547    fn from(data: Vec<u64>) -> Self {
1548        BitVec::from_limbs(&data)
1549    }
1550}
1551
1552impl Extend<BitVec> for BitVec {
1553    fn extend<T: IntoIterator<Item = BitVec>>(&mut self, iter: T) {
1554        for v in iter {
1555            self.extend_bitvec(&v)
1556        }
1557    }
1558}
1559
1560impl<'t> Extend<&'t BitVec> for BitVec {
1561    fn extend<T: IntoIterator<Item = &'t BitVec>>(&mut self, iter: T) {
1562        for v in iter {
1563            self.extend_bitvec(v)
1564        }
1565    }
1566}
1567
1568/// Create a new bit vector from u64 values.
1569/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1570/// The function will append the bits of each element to the bit vector in the order they are
1571/// given in the iterator (i.e. the first element takes bits `0..64` of the vector).
1572impl FromIterator<u64> for BitVec {
1573    fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> Self {
1574        BitVec::from_limbs_iter(iter)
1575    }
1576}
1577
1578impl PartialEq for BitVec {
1579    // unlike the auto-derived implementation, this custom implementation ignores junk data at
1580    // the end of the bit vector
1581    fn eq(&self, other: &Self) -> bool {
1582        if self.len != other.len {
1583            return false;
1584        }
1585
1586        if self.len == 0 {
1587            return true;
1588        }
1589
1590        for i in 0..self.data.len() - 1 {
1591            if self.data[i] != other.data[i] {
1592                return false;
1593            }
1594        }
1595
1596        // in last limb, ignore junk data
1597        let mask = (1 << (self.len % WORD_SIZE)) - 1;
1598        if self.data[self.data.len() - 1] & mask != other.data[self.data.len() - 1] & mask {
1599            return false;
1600        }
1601
1602        true
1603    }
1604}
1605
1606impl Eq for BitVec {}
1607
1608impl Hash for BitVec {
1609    fn hash<H: Hasher>(&self, state: &mut H) {
1610        state.write_usize(self.len);
1611        if self.len > 0 {
1612            self.data[0..self.data.len() - 1]
1613                .iter()
1614                .for_each(|x| state.write_u64(*x));
1615            let masked_last_limb = self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1);
1616            state.write_u64(masked_last_limb);
1617        }
1618    }
1619}
1620
1621#[cfg(test)]
1622mod tests;
vers_vecs/bit_vec/mod.rs

vers_vecs/bit_vec/
mod.rs