vers_vecs/bit_vec/
mod.rs

1//! This module contains a simple [bit vector][BitVec] implementation with no overhead and a fast succinct
2//! bit vector implementation with [rank and select queries][fast_rs_vec::RsVec].
3
4use crate::bit_vec::mask::MaskedBitVec;
5use crate::util::impl_vector_iterator;
6use std::cmp::min;
7use std::hash::{Hash, Hasher};
8use std::mem::size_of;
9
10pub mod fast_rs_vec;
11
12pub mod sparse;
13
14pub mod mask;
15
16/// Size of a word in bitvectors. All vectors operate on 64-bit words.
17const WORD_SIZE: usize = 64;
18
19/// Type alias for masked bitvectors that implement a simple bitwise binary operation.
20/// The first lifetime is for the bit vector that is being masked, the second lifetime is for the
21/// mask.
22pub type BitMask<'s, 'b> = MaskedBitVec<'s, 'b, fn(u64, u64) -> u64>;
23
24/// A simple bit vector that does not support rank and select queries.
25/// Bits are stored in little-endian order, i.e. the least significant bit is stored first.
26/// The bit vector is stored as a sequence of 64 bit limbs.
27/// The last limb may be partially filled.
28///
29/// The bit vector has a wide range of constructors that allow for easy creation from various
30/// sources.
31/// Among them are constructors for creating an empty vector ([`BitVec::new`]),
32/// creating one from single bits of various integer types ([`BitVec::from_bits`] and variations),
33/// creating limbs from u64 values directly ([`BitVec::from_limbs`] and variations),
34/// or packing a sequence of numerical values into a dense bit sequence
35/// ([`BitVec::pack_sequence_u64`] and variations).
36///
37/// The bit vector can be modified after creation
38/// (e.g. by appending [bits](BitVec::append_bits)
39/// or [words](BitVec::append_word),
40/// [flipping](BitVec::flip_bit),
41/// or [setting](BitVec::set) bits).
42/// Bits can be [accessed](BitVec::get) by position,
43/// and [multiple bits](BitVec::get_bits) can be accessed at once.
44/// Bits can be [dropped](BitVec::drop_last) from the end.
45///
46/// # Example
47/// ```rust
48/// use vers_vecs::{BitVec, RsVec};
49///
50/// let mut bit_vec = BitVec::new();
51/// bit_vec.append_bit(0u64);
52/// bit_vec.append_bit_u32(1u32);
53/// bit_vec.append_word(0b1010_1010_1010_1010u64); // appends exactly 64 bits
54///
55/// assert_eq!(bit_vec.len(), 66);
56/// assert_eq!(bit_vec.get(0), Some(0u64));
57/// assert_eq!(bit_vec.get(1), Some(1u64));
58/// ```
59#[derive(Clone, Debug, Default)]
60#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
61pub struct BitVec {
62    data: Vec<u64>,
63    len: usize,
64}
65
66impl BitVec {
67    /// Create a new empty bit vector.
68    #[must_use]
69    pub fn new() -> Self {
70        Self::default()
71    }
72
73    /// Create a new empty bit vector with the given capacity.
74    /// The capacity is measured in bits.
75    /// The bit vector will be able to hold at least `capacity` bits without reallocating.
76    /// More memory may be allocated according to the underlying allocation strategy.
77    #[must_use]
78    pub fn with_capacity(capacity: usize) -> Self {
79        Self {
80            data: Vec::with_capacity(capacity / WORD_SIZE + 1),
81            len: 0,
82        }
83    }
84
85    /// Create a new bit vector with all zeros and the given length.
86    /// The length is measured in bits.
87    #[must_use]
88    pub fn from_zeros(len: usize) -> Self {
89        let mut data = vec![0; len / WORD_SIZE];
90        if !len.is_multiple_of(WORD_SIZE) {
91            data.push(0);
92        }
93        Self { data, len }
94    }
95
96    /// Create a new bit vector with all ones and the given length.
97    /// The length is measured in bits.
98    #[must_use]
99    pub fn from_ones(len: usize) -> Self {
100        let mut data = vec![u64::MAX; len / WORD_SIZE];
101        if !len.is_multiple_of(WORD_SIZE) {
102            data.push((1 << (len % WORD_SIZE)) - 1);
103        }
104        Self { data, len }
105    }
106
107    /// Construct a bit vector from a set of bits given as distinct u8 values.
108    /// The constructor will take the least significant bit from each value and append it to a
109    /// bit vector.
110    /// All other bits are ignored.
111    ///
112    /// See also: [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
113    ///
114    /// # Example
115    /// ```rust
116    /// use vers_vecs::BitVec;
117    ///
118    /// let bits: &[u8] = &[1, 0, 1, 1, 1, 1];
119    /// let bv = BitVec::from_bits(&bits);
120    ///
121    /// assert_eq!(bv.len(), 6);
122    /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
123    /// ```
124    ///
125    /// [`from_bits_u16`]: BitVec::from_bits_u16
126    /// [`from_bits_u32`]: BitVec::from_bits_u32
127    /// [`from_bits_u64`]: BitVec::from_bits_u64
128    /// [`from_bits_iter`]: BitVec::from_bits_iter
129    #[must_use]
130    pub fn from_bits(bits: &[u8]) -> Self {
131        let mut bv = Self::with_capacity(bits.len());
132        bits.iter().for_each(|&b| bv.append_bit(b.into()));
133        bv
134    }
135
136    /// Construct a bit vector from a set of bits given as distinct u16 values.
137    /// The constructor will take the least significant bit from each value and append it to a
138    /// bit vector.
139    /// All other bits are ignored.
140    ///
141    /// See also: [`from_bits`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
142    ///
143    /// [`from_bits`]: BitVec::from_bits
144    /// [`from_bits_u32`]: BitVec::from_bits_u32
145    /// [`from_bits_u64`]: BitVec::from_bits_u64
146    /// [`from_bits_iter`]: BitVec::from_bits_iter
147    #[must_use]
148    pub fn from_bits_u16(bits: &[u16]) -> Self {
149        let mut bv = Self::with_capacity(bits.len());
150        bits.iter().for_each(|&b| bv.append_bit_u16(b));
151        bv
152    }
153
154    /// Construct a bit vector from a set of bits given as distinct u32 values.
155    /// The constructor will take the least significant bit from each value and append it to a
156    /// bit vector.
157    /// All other bits are ignored.
158    ///
159    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u64`], [`from_bits_iter`]
160    ///
161    /// [`from_bits`]: BitVec::from_bits
162    /// [`from_bits_u16`]: BitVec::from_bits_u16
163    /// [`from_bits_u64`]: BitVec::from_bits_u64
164    /// [`from_bits_iter`]: BitVec::from_bits_iter
165    #[must_use]
166    pub fn from_bits_u32(bits: &[u32]) -> Self {
167        let mut bv = Self::with_capacity(bits.len());
168        bits.iter().for_each(|&b| bv.append_bit_u32(b));
169        bv
170    }
171
172    /// Construct a bit vector from a set of bits given as distinct u64 values.
173    /// The constructor will take the least significant bit from each value and append it to a
174    /// bit vector.
175    /// All other bits are ignored.
176    ///
177    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_iter`]
178    ///
179    /// [`from_bits`]: BitVec::from_bits
180    /// [`from_bits_u16`]: BitVec::from_bits_u16
181    /// [`from_bits_u32`]: BitVec::from_bits_u32
182    /// [`from_bits_iter`]: BitVec::from_bits_iter
183    #[must_use]
184    pub fn from_bits_u64(bits: &[u64]) -> Self {
185        let mut bv = Self::with_capacity(bits.len());
186        bits.iter().for_each(|&b| bv.append_bit(b));
187        bv
188    }
189
190    /// Construct a bit vector from an iterator of bits.
191    /// The constructor will take the least significant bit from each value and append it to a
192    /// bit vector.
193    /// All other bits are ignored.
194    /// The iterator must yield values that can be converted into u64 values.
195    ///
196    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`]
197    ///
198    /// # Example
199    /// ```rust
200    /// use vers_vecs::BitVec;
201    ///
202    /// let bits = [true, false, true, true, true, true];
203    /// let bv = BitVec::from_bits_iter(bits.iter().copied());
204    ///
205    /// let bits = [0b1u8, 0b0, 0b1, 0b1, 0b1, 0b1];
206    /// let bv2 = BitVec::from_bits_iter(bits.iter().copied());
207    ///
208    /// assert_eq!(bv.len(), 6);
209    /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
210    /// assert_eq!(bv, bv2);
211    /// ```
212    ///
213    /// [`from_bits`]: BitVec::from_bits
214    /// [`from_bits_u16`]: BitVec::from_bits_u16
215    /// [`from_bits_u32`]: BitVec::from_bits_u32
216    /// [`from_bits_u64`]: BitVec::from_bits_u64
217    #[must_use]
218    pub fn from_bits_iter<I, E>(iter: I) -> Self
219    where
220        E: Into<u64>,
221        I: IntoIterator<Item = E>,
222    {
223        let iter = iter.into_iter();
224        let mut bv = Self::with_capacity(iter.size_hint().0);
225        for bit in iter {
226            bv.append_bit(bit.into());
227        }
228        bv
229    }
230
231    /// Construct a bit vector from a slice of u64 quad words.
232    /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
233    /// 64 bits to the bit vector).
234    /// Since the data is only cloned without any masking or transformation,
235    /// this is one of the fastest ways to create a bit vector.
236    ///
237    /// See also: [`from_vec`], [`from_limbs_iter`]
238    ///
239    /// # Example
240    /// ```rust
241    /// use vers_vecs::BitVec;
242    ///
243    /// let words = [0, 256, u64::MAX];
244    /// let bv = BitVec::from_limbs(&words);
245    ///
246    /// assert_eq!(bv.len(), 192);
247    /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
248    /// assert_eq!(bv.get(72), Some(1));
249    /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
250    /// ```
251    ///
252    /// [`from_vec`]: BitVec::from_vec
253    /// [`from_limbs_iter`]: BitVec::from_limbs_iter
254    #[must_use]
255    pub fn from_limbs(words: &[u64]) -> Self {
256        let len = words.len() * WORD_SIZE;
257        Self {
258            data: words.to_vec(),
259            len,
260        }
261    }
262
263    /// Construct a bit vector from an iterator of u64 quad words.
264    /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
265    /// 64 bits to the bit vector).
266    /// Since the data is only cloned without any masking or transformation,
267    /// this is one of the fastest ways to create a bit vector.
268    ///
269    /// See also: [`from_limbs`], [`from_vec`]
270    ///
271    /// # Example
272    /// ```rust
273    /// use std::iter::repeat;
274    /// use vers_vecs::BitVec;
275    ///
276    /// let zeros = repeat(0xaaaaaaaaaaaaaaaau64).take(10);
277    /// let bv = BitVec::from_limbs_iter(zeros);
278    ///
279    /// assert_eq!(bv.len(), 640);
280    /// for i in 0..640 {
281    ///    assert_eq!(bv.get(i), Some((i % 2 == 1) as u64));
282    /// }
283    /// ```
284    ///
285    /// [`from_limbs`]: BitVec::from_limbs
286    /// [`from_vec`]: BitVec::from_vec
287    pub fn from_limbs_iter<I, E>(iter: I) -> Self
288    where
289        E: Into<u64>,
290        I: IntoIterator<Item = E>,
291    {
292        let vec = iter.into_iter().map(Into::into).collect();
293        Self::from_vec(vec)
294    }
295
296    /// Construct a bit vector from a vector of u64 quad words.
297    /// The quad words are interpreted as limbs of the bit vector
298    /// (i.e. each quad word contributes 64 bits to the bit vector).
299    /// Since the data is moved without any masking or transformation, this is one of the fastest ways
300    /// to create a bit vector.
301    ///
302    /// See also: [`from_limbs`], [`from_limbs_iter`]
303    ///
304    /// # Example
305    /// ```rust
306    /// use vers_vecs::BitVec;
307    ///
308    /// let words = vec![0, 256, u64::MAX];
309    /// let bv = BitVec::from_vec(words);
310    ///
311    /// assert_eq!(bv.len(), 192);
312    /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
313    /// assert_eq!(bv.get(72), Some(1));
314    /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
315    /// ```
316    ///
317    /// [`from_limbs`]: BitVec::from_limbs
318    /// [`from_limbs_iter`]: BitVec::from_limbs_iter
319    #[must_use]
320    pub fn from_vec(data: Vec<u64>) -> Self {
321        let len = data.len() * WORD_SIZE;
322        Self { data, len }
323    }
324
325    fn pack_bits<T, const MAX_BITS: usize>(sequence: &[T], bits_per_element: usize) -> Self
326    where
327        T: Into<u64> + Copy,
328    {
329        let mut bv = Self::with_capacity(sequence.len() * bits_per_element);
330        for &word in sequence {
331            if bits_per_element <= MAX_BITS {
332                bv.append_bits(word.into(), bits_per_element);
333            } else {
334                bv.append_bits(word.into(), MAX_BITS);
335                let mut rest = bits_per_element - MAX_BITS;
336                while rest > 0 {
337                    bv.append_bits(0, min(rest, MAX_BITS));
338                    rest = rest.saturating_sub(MAX_BITS);
339                }
340            }
341        }
342        bv
343    }
344
345    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
346    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
347    /// The number of bits per element is given by `bits_per_element`.
348    /// The sequence is given as a slice of u64 values.
349    /// If the number of bits per element is smaller than 64, the function takes the
350    /// least significant bits of each element, and discards the rest.
351    /// If the number of bits per element is larger than 64, the function will pad the elements
352    /// with zeros.
353    /// The function will append the bits of each element to the bit vector in the order they are
354    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
355    ///
356    /// See also: [`pack_sequence_u32`], [`pack_sequence_u16`], [`pack_sequence_u8`]
357    ///
358    /// # Example
359    /// ```rust
360    /// use vers_vecs::BitVec;
361    ///
362    /// let sequence = [0b1010u64, 0b1100u64, 0b1111u64];
363    /// let bv = BitVec::pack_sequence_u64(&sequence, 4);
364    ///
365    /// assert_eq!(bv.len(), 12);
366    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
367    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
368    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
369    /// ```
370    ///
371    /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
372    /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
373    /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
374    #[must_use]
375    pub fn pack_sequence_u64(sequence: &[u64], bits_per_element: usize) -> Self {
376        Self::pack_bits::<_, 64>(sequence, bits_per_element)
377    }
378
379    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
380    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
381    /// The number of bits per element is given by `bits_per_element`.
382    /// The sequence is given as a slice of u32 values.
383    /// If the number of bits per element is smaller than 32, the function takes the
384    /// least significant bits of each element, and discards the rest.
385    /// If the number of bits per element is larger than 32, the function will pad the elements
386    /// with zeros.
387    /// The function will append the bits of each element to the bit vector in the order they are
388    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
389    ///
390    /// See also: [`pack_sequence_u64`], [`pack_sequence_u16`], [`pack_sequence_u8`]
391    ///
392    /// # Example
393    /// ```rust
394    /// use vers_vecs::BitVec;
395    ///
396    /// let sequence = [0b1010u32, 0b1100u32, 0b1111u32];
397    /// let bv = BitVec::pack_sequence_u32(&sequence, 4);
398    ///
399    /// assert_eq!(bv.len(), 12);
400    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
401    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
402    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
403    /// ```
404    ///
405    /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
406    /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
407    /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
408    #[must_use]
409    pub fn pack_sequence_u32(sequence: &[u32], bits_per_element: usize) -> Self {
410        Self::pack_bits::<_, 32>(sequence, bits_per_element)
411    }
412
413    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
414    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
415    /// The number of bits per element is given by `bits_per_element`.
416    /// The sequence is given as a slice of u16 values.
417    /// If the number of bits per element is smaller than 16, the function takes the
418    /// least significant bits of each element, and discards the rest.
419    /// If the number of bits per element is larger than 16, the function will pad the elements
420    /// with zeros.
421    /// The function will append the bits of each element to the bit vector in the order they are
422    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
423    ///
424    /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u8`]
425    ///
426    /// # Example
427    /// ```rust
428    /// use vers_vecs::BitVec;
429    ///
430    /// let sequence = [0b1010u16, 0b1100u16, 0b1111u16];
431    /// let bv = BitVec::pack_sequence_u16(&sequence, 4);
432    ///
433    /// assert_eq!(bv.len(), 12);
434    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
435    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
436    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
437    /// ```
438    ///
439    /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
440    /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
441    /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
442    #[must_use]
443    pub fn pack_sequence_u16(sequence: &[u16], bits_per_element: usize) -> Self {
444        Self::pack_bits::<_, 16>(sequence, bits_per_element)
445    }
446
447    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
448    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
449    /// The number of bits per element is given by `bits_per_element`.
450    /// The sequence is given as a slice of u8 values.
451    /// If the number of bits per element is smaller than 8, the function takes the
452    /// least significant bits of each element, and discards the rest.
453    /// If the number of bits per element is larger than 8, the function will pad the elements
454    /// with zeros.
455    /// The function will append the bits of each element to the bit vector in the order they are
456    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
457    ///
458    /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u16`]
459    ///
460    /// # Example
461    /// ```rust
462    /// use vers_vecs::BitVec;
463    ///
464    /// let sequence = [0b1010u8, 0b1100u8, 0b1111u8];
465    /// let bv = BitVec::pack_sequence_u8(&sequence, 4);
466    ///
467    /// assert_eq!(bv.len(), 12);
468    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
469    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
470    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
471    /// ```
472    ///
473    /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
474    /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
475    /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
476    #[must_use]
477    pub fn pack_sequence_u8(sequence: &[u8], bits_per_element: usize) -> Self {
478        Self::pack_bits::<_, 8>(sequence, bits_per_element)
479    }
480
481    /// Append a bit encoded as a `bool` to the bit vector, where `true` means 1 and `false` means 0.
482    ///
483    /// See also: [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
484    ///
485    /// # Example
486    ///
487    /// ```rust
488    /// use vers_vecs::BitVec;
489    ///
490    /// let mut bv = BitVec::new();
491    /// bv.append(true);
492    ///
493    /// assert_eq!(bv.len(), 1);
494    /// assert_eq!(bv.get(0), Some(1));
495    /// ```
496    ///
497    /// [`append_bit`]: BitVec::append_bit
498    /// [`append_bit_u32`]: BitVec::append_bit_u32
499    /// [`append_bit_u16`]: BitVec::append_bit_u16
500    /// [`append_bit_u8`]: BitVec::append_bit_u8
501    /// [`append_word`]: BitVec::append_word
502    pub fn append(&mut self, bit: bool) {
503        if self.len.is_multiple_of(WORD_SIZE) {
504            self.data.push(0);
505        }
506        if bit {
507            self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
508        } else {
509            self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
510        }
511        self.len += 1;
512    }
513
514    /// Drop the last n bits from the bit vector. If more bits are dropped than the bit vector
515    /// contains, the bit vector is cleared.
516    ///
517    /// # Example
518    ///
519    /// ```rust
520    /// use vers_vecs::BitVec;
521    ///
522    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
523    /// bv.drop_last(3);
524    ///
525    /// assert_eq!(bv.len(), 3);
526    /// assert_eq!(bv.get_bits(0, 3), Some(0b101u64));
527    ///
528    /// bv.drop_last(4);
529    ///
530    /// assert!(bv.is_empty());
531    /// ```
532    pub fn drop_last(&mut self, n: usize) {
533        if n > self.len {
534            self.data.clear();
535            self.len = 0;
536            return;
537        }
538
539        let new_limb_count = (self.len - n).div_ceil(WORD_SIZE);
540
541        // cut off limbs that we no longer need
542        if new_limb_count < self.data.len() {
543            self.data.truncate(new_limb_count);
544        }
545
546        // update bit vector length
547        self.len -= n;
548    }
549
550    /// Append a bit encoded in a u64.
551    /// The least significant bit is appended to the bit vector.
552    /// All other bits are ignored.
553    ///
554    /// See also: [`append`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
555    ///
556    /// # Example
557    ///
558    /// ```rust
559    /// use vers_vecs::BitVec;
560    ///
561    /// let mut bv = BitVec::new();
562    ///
563    /// bv.append_bit(1);
564    /// bv.append_bit(0);
565    ///
566    /// assert_eq!(bv.len(), 2);
567    /// assert_eq!(bv.get(0), Some(1));
568    /// assert_eq!(bv.get(1), Some(0));
569    /// ```
570    ///
571    /// [`append`]: BitVec::append
572    /// [`append_bit_u32`]: BitVec::append_bit_u32
573    /// [`append_bit_u16`]: BitVec::append_bit_u16
574    /// [`append_bit_u8`]: BitVec::append_bit_u8
575    /// [`append_word`]: BitVec::append_word
576    pub fn append_bit(&mut self, bit: u64) {
577        if self.len.is_multiple_of(WORD_SIZE) {
578            self.data.push(0);
579        }
580        if bit % 2 == 1 {
581            self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
582        } else {
583            self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
584        }
585
586        self.len += 1;
587    }
588
589    /// Append a bit from a u32. The least significant bit is appended to the bit vector.
590    /// All other bits are ignored.
591    ///
592    /// See also: [`append`], [`append_bit`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
593    ///
594    /// [`append`]: BitVec::append
595    /// [`append_bit`]: BitVec::append_bit
596    /// [`append_bit_u16`]: BitVec::append_bit_u16
597    /// [`append_bit_u8`]: BitVec::append_bit_u8
598    /// [`append_word`]: BitVec::append_word
599    pub fn append_bit_u32(&mut self, bit: u32) {
600        self.append_bit(u64::from(bit));
601    }
602
603    /// Append a bit from a u16. The least significant bit is appended to the bit vector.
604    /// All other bits are ignored.
605    ///
606    /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u8`], [`append_word`]
607    ///
608    /// [`append`]: BitVec::append
609    /// [`append_bit`]: BitVec::append_bit
610    /// [`append_bit_u32`]: BitVec::append_bit_u32
611    /// [`append_bit_u8`]: BitVec::append_bit_u8
612    /// [`append_word`]: BitVec::append_word
613    pub fn append_bit_u16(&mut self, bit: u16) {
614        self.append_bit(u64::from(bit));
615    }
616
617    /// Append a bit from a u8. The least significant bit is appended to the bit vector.
618    /// All other bits are ignored.
619    ///
620    /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_word`]
621    ///
622    /// [`append`]: BitVec::append
623    /// [`append_bit`]: BitVec::append_bit
624    /// [`append_bit_u32`]: BitVec::append_bit_u32
625    /// [`append_bit_u16`]: BitVec::append_bit_u16
626    /// [`append_word`]: BitVec::append_word
627    pub fn append_bit_u8(&mut self, bit: u8) {
628        self.append_bit(u64::from(bit));
629    }
630
631    /// Append a word to the bit vector. The bits are appended in little endian order (i.e. the first
632    /// bit of the word is appended first).
633    ///
634    /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`]
635    ///
636    /// # Example
637    ///
638    /// ```rust
639    /// use vers_vecs::BitVec;
640    ///
641    /// let mut bv = BitVec::new();
642    /// bv.append_word(0b1010_1010_1010_1010u64);
643    ///
644    /// assert_eq!(bv.len(), 64);
645    /// for i in 0..64 {
646    ///    assert_eq!(bv.get(i), Some((0b1010_1010_1010_1010u64 >> i) & 1));
647    /// }
648    /// ```
649    ///
650    /// [`append`]: BitVec::append
651    /// [`append_bit`]: BitVec::append_bit
652    /// [`append_bit_u32`]: BitVec::append_bit_u32
653    /// [`append_bit_u16`]: BitVec::append_bit_u16
654    /// [`append_bit_u8`]: BitVec::append_bit_u8
655    pub fn append_word(&mut self, word: u64) {
656        if self.len.is_multiple_of(WORD_SIZE) {
657            self.data.push(word);
658        } else {
659            // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
660            self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
661            self.data[self.len / WORD_SIZE] |= word << (self.len % WORD_SIZE);
662
663            self.data.push(word >> (WORD_SIZE - self.len % WORD_SIZE));
664        }
665        self.len += WORD_SIZE;
666    }
667
668    /// Append multiple bits to the bit vector.
669    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
670    /// The number of bits to append is given by `len`. The bits are taken from the least
671    /// significant bits of `bits`.
672    /// All other bits are ignored.
673    ///
674    /// # Example
675    ///
676    /// ```rust
677    /// use vers_vecs::BitVec;
678    ///
679    /// let mut bv = BitVec::new();
680    /// bv.append_bits(0b1010_1010_1010_1010u64, 16);
681    ///
682    /// assert_eq!(bv.len(), 16);
683    /// assert_eq!(bv.get_bits(0, 16), Some(0b1010_1010_1010_1010u64));
684    /// ```
685    ///
686    /// # Panics
687    /// Panics if `len` is larger than 64.
688    pub fn append_bits(&mut self, bits: u64, len: usize) {
689        assert!(len <= 64, "Cannot append more than 64 bits");
690
691        if self.len.is_multiple_of(WORD_SIZE) {
692            self.data.push(bits);
693        } else {
694            // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
695            self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
696            self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
697
698            if self.len % WORD_SIZE + len > WORD_SIZE {
699                self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
700            }
701        }
702        self.len += len;
703    }
704
705    /// Append multiple bits to the bit vector.
706    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
707    /// The number of bits to append is given by `len`. The bits are taken from the least
708    /// significant bits of `bits`.
709    ///
710    /// This function does not check if `len` is larger than 64.
711    ///
712    /// Furthermore, if the bit-vector has trailing bits that are not zero
713    /// (i.e. the length is not a multiple of 64, and those bits are partially set),
714    /// the function will OR the new data with the trailing bits, destroying the appended data.
715    /// This can happen, if a call `append_bits[_unchecked](word, len)` appends a word which has
716    /// set bits beyond the `len - 1`-th bit,
717    /// or if bits have been dropped from the bit vector using [`drop_last`].
718    ///
719    /// See [`append_bits`] for a checked version of this function.
720    ///
721    /// # Panics
722    /// If `len` is larger than 64, the behavior is platform-dependent, and a processor
723    /// exception might be triggered.
724    ///
725    /// [`append_bits`]: BitVec::append_bits
726    /// [`drop_last`]: BitVec::drop_last
727    pub fn append_bits_unchecked(&mut self, bits: u64, len: usize) {
728        if self.len.is_multiple_of(WORD_SIZE) {
729            self.data.push(bits);
730        } else {
731            self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
732
733            if self.len % WORD_SIZE + len > WORD_SIZE {
734                self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
735            }
736        }
737        self.len += len;
738    }
739
740    /// Append the bits of another bit vector to the end of this vector.
741    /// If this vector does not contain a multiple of 64 bits, the appended limbs need to be
742    /// shifted to the left.
743    /// This function is guaranteed to reallocate the underlying vector at most once.
744    pub fn extend_bitvec(&mut self, other: &Self) {
745        // reserve space for the new bits, ensuring at most one re-allocation
746        self.data
747            .reserve((self.len + other.len).div_ceil(WORD_SIZE) - self.data.len());
748
749        let full_limbs = other.len() / WORD_SIZE;
750        for i in 0..full_limbs {
751            self.append_bits(other.data[i], WORD_SIZE);
752        }
753
754        let partial_bits = other.len % WORD_SIZE;
755        if partial_bits > 0 {
756            self.append_bits(other.data[full_limbs], partial_bits);
757        }
758    }
759
760    /// Return the length of the bit vector. The length is measured in bits.
761    #[must_use]
762    pub fn len(&self) -> usize {
763        self.len
764    }
765
766    /// Return whether the bit vector is empty (contains no bits).
767    #[must_use]
768    pub fn is_empty(&self) -> bool {
769        self.len == 0
770    }
771
772    /// Flip the bit at the given position.
773    ///
774    /// # Example
775    ///
776    /// ```rust
777    /// use vers_vecs::BitVec;
778    ///
779    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
780    /// bv.flip_bit(1);
781    ///
782    /// assert_eq!(bv.len(), 6);
783    /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
784    /// ```
785    ///
786    /// # Panics
787    /// If the position is larger than the length of the vector, the function panics.
788    pub fn flip_bit(&mut self, pos: usize) {
789        assert!(pos < self.len, "Index out of bounds");
790        self.flip_bit_unchecked(pos);
791    }
792
793    /// Flip the bit at the given position.
794    ///
795    /// See also: [`flip_bit`]
796    ///
797    /// # Panics
798    /// If the position is larger than the length of the
799    /// vector, the function will either modify unused memory or panic.
800    /// This will not corrupt memory.
801    ///
802    /// [`flip_bit`]: BitVec::flip_bit
803    pub fn flip_bit_unchecked(&mut self, pos: usize) {
804        self.data[pos / WORD_SIZE] ^= 1 << (pos % WORD_SIZE);
805    }
806
807    /// Return the bit at the given position.
808    /// The bit is encoded in the least significant bit of a u64 value.
809    /// If the position is larger than the length of the vector, None is returned.
810    ///
811    /// See also: [`get_unchecked`]
812    ///
813    /// # Example
814    ///
815    /// ```rust
816    /// use vers_vecs::BitVec;
817    ///
818    /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
819    ///
820    /// assert_eq!(bv.get(1), Some(0));
821    /// assert_eq!(bv.get(2), Some(1));
822    /// ```
823    ///
824    /// [`get_unchecked`]: Self::get_unchecked
825    #[must_use]
826    pub fn get(&self, pos: usize) -> Option<u64> {
827        if pos >= self.len {
828            None
829        } else {
830            Some(self.get_unchecked(pos))
831        }
832    }
833
834    /// Return the bit at the given position.
835    /// The bit is encoded in the least significant bit of a u64 value.
836    ///
837    /// # Panics
838    /// If the position is larger than the length of the vector,
839    /// the function will either return unpredictable data, or panic.
840    /// Use [`get`] to properly handle this case with an `Option`.
841    ///
842    /// [`get`]: BitVec::get
843    #[must_use]
844    pub fn get_unchecked(&self, pos: usize) -> u64 {
845        (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
846    }
847
848    /// Set the bit at the given position.
849    /// The bit is encoded in the least significant bit of a u64 value.
850    ///
851    /// See also: [`set_unchecked`]
852    ///
853    /// # Example
854    ///
855    /// ```rust
856    /// use vers_vecs::BitVec;
857    ///
858    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
859    /// bv.set(1, 1).unwrap();
860    ///
861    /// assert_eq!(bv.len(), 6);
862    /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
863    /// ```
864    ///
865    /// # Errors
866    /// If the position is out of range, the function will return `Err` with an error message,
867    /// otherwise it will return an empty `Ok`.
868    ///
869    /// [`set_unchecked`]: BitVec::set_unchecked
870    pub fn set(&mut self, pos: usize, value: u64) -> Result<(), &str> {
871        if pos >= self.len {
872            Err("out of range")
873        } else {
874            self.set_unchecked(pos, value);
875            Ok(())
876        }
877    }
878
879    /// Set the bit at the given position.
880    /// The bit is encoded in the least significant bit of a u64 value.
881    ///
882    /// # Panics
883    /// If the position is larger than the length of the vector,
884    /// the function will either do nothing, or panic.
885    /// Use [`set`] to properly handle this case with a `Result`.
886    ///
887    /// [`set`]: BitVec::set
888    pub fn set_unchecked(&mut self, pos: usize, value: u64) {
889        self.data[pos / WORD_SIZE] = (self.data[pos / WORD_SIZE] & !(0x1 << (pos % WORD_SIZE)))
890            | ((value & 0x1) << (pos % WORD_SIZE));
891    }
892
893    /// Return whether the bit at the given position is set.
894    /// If the position is larger than the length of the vector, None is returned.
895    ///
896    /// See also: [`is_bit_set_unchecked`]
897    ///
898    /// # Example
899    ///
900    /// ```rust
901    /// use vers_vecs::BitVec;
902    ///
903    /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
904    ///
905    /// assert!(!bv.is_bit_set(1).unwrap());
906    /// assert!(bv.is_bit_set(2).unwrap());
907    /// ```
908    ///
909    /// [`is_bit_set_unchecked`]: BitVec::is_bit_set_unchecked
910    #[must_use]
911    pub fn is_bit_set(&self, pos: usize) -> Option<bool> {
912        if pos >= self.len {
913            None
914        } else {
915            Some(self.is_bit_set_unchecked(pos))
916        }
917    }
918
919    /// Return whether the bit at the given position is set.
920    ///
921    /// # Panics
922    /// If the position is larger than the length of the vector,
923    /// the function will either return unpredictable data, or panic.
924    /// Use [`is_bit_set`] to properly handle this case with an `Option`.
925    ///
926    /// [`is_bit_set`]: BitVec::is_bit_set
927    #[must_use]
928    pub fn is_bit_set_unchecked(&self, pos: usize) -> bool {
929        self.get_unchecked(pos) != 0
930    }
931
932    /// Return multiple bits at the given position.
933    /// The number of bits to return is given by `len`.
934    /// At most 64 bits can be returned.
935    /// If the position at the end of the query is larger than the length of the vector,
936    /// None is returned (even if the query partially overlaps with the vector).
937    /// If the length of the query is larger than 64, None is returned.
938    ///
939    /// The first bit at `pos` is the most significant bit of the return value
940    /// limited to `len` bits.
941    #[must_use]
942    pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
943        if len > WORD_SIZE || len == 0 {
944            return None;
945        }
946        if pos + len > self.len {
947            None
948        } else {
949            Some(self.get_bits_unchecked(pos, len))
950        }
951    }
952
953    /// Return multiple bits at the given position. The number of bits to return is given by `len`.
954    /// At most 64 bits can be returned.
955    ///
956    /// This function is always inlined, because it gains a lot from loop optimization and
957    /// can utilize the processor pre-fetcher better if it is.
958    ///
959    /// # Errors
960    /// If the length of the query is larger than 64, unpredictable data will be returned.
961    /// Use [`get_bits`] to avoid this.
962    ///
963    /// # Panics
964    /// If the position or interval is larger than the length of the vector,
965    /// the function will either return any valid results padded with unpredictable
966    /// data or panic.
967    ///
968    /// [`get_bits`]: BitVec::get_bits
969    #[must_use]
970    #[allow(clippy::inline_always)]
971    #[allow(clippy::comparison_chain)] // readability
972    #[inline(always)] // inline to gain loop optimization and pipeline advantages for elias fano
973    #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen
974    pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
975        debug_assert!(len <= WORD_SIZE);
976        let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
977        if pos % WORD_SIZE + len <= WORD_SIZE {
978            partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
979        } else {
980            (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
981                & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
982        }
983    }
984
985    /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
986    /// `index`. The number of bits per encoded element is given by `n`.
987    ///
988    /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
989    /// and is equivalent to calling [`get_bits(index * n, n)`].
990    /// It is thus safe to use this method with any index and any size n <= 64.
991    ///
992    /// If the element is out of bounds, None is returned.
993    /// The element is returned as a u64 value.
994    ///
995    /// # Example
996    /// ```rust
997    /// use vers_vecs::BitVec;
998    ///
999    /// let sequence = [10, 100, 124, 45, 223];
1000    /// let bv = BitVec::pack_sequence_u64(&sequence, 8);
1001    ///
1002    /// assert_eq!(bv.unpack_element(0, 8), Some(10));
1003    /// assert_eq!(bv.unpack_element(2, 8), Some(124));
1004    /// ```
1005    ///
1006    /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1007    /// [`get_bits(index * n, n)`]: BitVec::get_bits
1008    #[must_use]
1009    #[allow(clippy::inline_always)]
1010    #[inline(always)] // to gain optimization if n is constant
1011    pub fn unpack_element(&self, index: usize, n: usize) -> Option<u64> {
1012        self.get_bits(index * n, n)
1013    }
1014
1015    /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
1016    /// `index`. The number of bits per encoded element is given by `n`.
1017    ///
1018    /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
1019    /// and is equivalent to calling [`get_bits_unchecked(index * n, n)`].
1020    /// It is thus safe to use this method with any index where `index * n + n` is in-bounds,
1021    /// and any size n <= 64.
1022    ///
1023    /// # Panics
1024    /// If the element is out of bounds, the function will either return unpredictable data or panic.
1025    /// Use [`unpack_element`] for a checked version of this function.
1026    ///
1027    /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1028    /// [`get_bits_unchecked(index * n, n)`]: BitVec::get_bits_unchecked
1029    /// [`unpack_element`]: BitVec::unpack_element
1030    #[must_use]
1031    #[allow(clippy::inline_always)]
1032    #[inline(always)] // to gain optimization if n is constant
1033    pub fn unpack_element_unchecked(&self, index: usize, n: usize) -> u64 {
1034        self.get_bits_unchecked(index * n, n)
1035    }
1036
1037    /// Return the number of ones in the bit vector. Since the bit vector doesn't store additional
1038    /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1039    ///
1040    /// [`RsVec`]: crate::RsVec
1041    #[must_use]
1042    #[allow(clippy::missing_panics_doc)] // can't panic because of manual bounds check
1043    pub fn count_ones(&self) -> u64 {
1044        let mut ones: u64 = self.data[0..self.len / WORD_SIZE]
1045            .iter()
1046            .map(|limb| u64::from(limb.count_ones()))
1047            .sum();
1048        if !self.len.is_multiple_of(WORD_SIZE) {
1049            ones += u64::from(
1050                (self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1)).count_ones(),
1051            );
1052        }
1053        ones
1054    }
1055
1056    /// Return the number of zeros in the bit vector. Since the bit vector doesn't store additional
1057    /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1058    /// This method calls [`count_ones`].
1059    ///
1060    /// [`RsVec`]: crate::RsVec
1061    /// [`count_ones`]: BitVec::count_ones
1062    #[must_use]
1063    pub fn count_zeros(&self) -> u64 {
1064        self.len as u64 - self.count_ones()
1065    }
1066
1067    /// Mask this bit vector with another bitvector using bitwise or. The mask is applied lazily
1068    /// whenever an operation on the resulting vector is performed.
1069    ///
1070    /// # Errors
1071    /// Returns an error if the length of the vector doesn't match the mask length.
1072    #[inline]
1073    pub fn mask_or<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1074        MaskedBitVec::new(self, mask, |a, b| a | b)
1075    }
1076
1077    /// Mask this bit vector with another bitvector using bitwise or.
1078    /// The mask is applied immediately, unlike in [`mask_or`].
1079    ///
1080    /// # Errors
1081    /// Returns an error if the length of the vector doesn't match the mask length.
1082    ///
1083    /// [`mask_or`]: BitVec::mask_or
1084    pub fn apply_mask_or(&mut self, mask: &BitVec) -> Result<(), String> {
1085        if self.len != mask.len {
1086            return Err(String::from(
1087                "mask cannot have different length than vector",
1088            ));
1089        }
1090
1091        for i in 0..self.data.len() {
1092            self.data[i] |= mask.data[i];
1093        }
1094
1095        Ok(())
1096    }
1097
1098    /// Mask this bit vector with another bitvector using bitwise and. The mask is applied lazily
1099    /// whenever an operation on the resulting vector is performed.
1100    ///
1101    /// # Errors
1102    /// Returns an error if the length of the vector doesn't match the mask length.
1103    #[inline]
1104    pub fn mask_and<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1105        MaskedBitVec::new(self, mask, |a, b| a & b)
1106    }
1107
1108    /// Mask this bit vector with another bitvector using bitwise and.
1109    /// The mask is applied immediately, unlike in [`mask_and`].
1110    ///
1111    /// # Errors
1112    /// Returns an error if the length of the vector doesn't match the mask length.
1113    ///
1114    /// [`mask_and`]: BitVec::mask_and
1115    pub fn apply_mask_and(&mut self, mask: &BitVec) -> Result<(), String> {
1116        if self.len != mask.len {
1117            return Err(String::from(
1118                "mask cannot have different length than vector",
1119            ));
1120        }
1121
1122        for i in 0..self.data.len() {
1123            self.data[i] &= mask.data[i];
1124        }
1125
1126        Ok(())
1127    }
1128
1129    /// Mask this bit vector with another bitvector using bitwise xor. The mask is applied lazily
1130    /// whenever an operation on the resulting vector is performed.
1131    ///
1132    /// # Errors
1133    /// Returns an error if the length of the vector doesn't match the mask length.
1134    #[inline]
1135    pub fn mask_xor<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1136        MaskedBitVec::new(self, mask, |a, b| a ^ b)
1137    }
1138
1139    /// Mask this bit vector with another bitvector using bitwise xor.
1140    /// The mask is applied immediately, unlike in [`mask_xor`].
1141    ///
1142    /// # Errors
1143    /// Returns an error if the length of the vector doesn't match the mask length.
1144    ///
1145    /// [`mask_xor`]: BitVec::mask_xor
1146    pub fn apply_mask_xor(&mut self, mask: &BitVec) -> Result<(), String> {
1147        if self.len != mask.len {
1148            return Err(String::from(
1149                "mask cannot have different length than vector",
1150            ));
1151        }
1152
1153        for i in 0..self.data.len() {
1154            self.data[i] ^= mask.data[i];
1155        }
1156
1157        Ok(())
1158    }
1159
1160    /// Mask this bit vector with another bitvector using a custom masking operation. The mask is
1161    /// applied lazily whenever an operation on the resulting vector is performed.
1162    ///
1163    /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1164    /// The last block of a bit vector might contain fewer bits, and will be padded with
1165    /// unpredictable data. Implementations may choose to modify those padding bits without
1166    /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1167    /// within the vector is unspecified.
1168    ///
1169    /// # Errors
1170    /// Returns an error if the length of the vector doesn't match the mask length.
1171    #[inline]
1172    pub fn mask_custom<'s, 'b, F>(
1173        &'s self,
1174        mask: &'b BitVec,
1175        mask_op: F,
1176    ) -> Result<MaskedBitVec<'s, 'b, F>, String>
1177    where
1178        F: Fn(u64, u64) -> u64,
1179    {
1180        MaskedBitVec::new(self, mask, mask_op)
1181    }
1182
1183    /// Mask this bit vector with another bitvector using a custom masking operation.
1184    /// The mask is applied immediately, unlike in [`mask_custom`].
1185    ///
1186    /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1187    /// The last block of a bit vector might contain fewer bits, and will be padded with
1188    /// unpredictable data. Implementations may choose to modify those padding bits without
1189    /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1190    /// within the vector is unspecified.
1191    ///
1192    /// # Errors
1193    /// Returns an error if the length of the vector doesn't match the mask length.
1194    ///
1195    /// [`mask_custom`]: BitVec::mask_custom
1196    #[inline]
1197    pub fn apply_mask_custom(
1198        &mut self,
1199        mask: &BitVec,
1200        mask_op: fn(u64, u64) -> u64,
1201    ) -> Result<(), String> {
1202        if self.len != mask.len {
1203            return Err(String::from(
1204                "mask cannot have different length than vector",
1205            ));
1206        }
1207
1208        for i in 0..self.data.len() {
1209            self.data[i] = mask_op(self.data[i], mask.data[i]);
1210        }
1211
1212        Ok(())
1213    }
1214
1215    /// Returns the number of bytes on the heap for this vector.
1216    /// Does not include allocated memory that isn't used.
1217    #[must_use]
1218    pub fn heap_size(&self) -> usize {
1219        self.data.len() * size_of::<u64>()
1220    }
1221
1222    /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1223    /// right half the remaining bits `at..`. If the split index is larger than the length of the
1224    /// vector, the vector is returned unmodified in an `Err` variant.
1225    ///
1226    /// # Errors
1227    /// If the index is out of bounds, the function will return an error
1228    /// containing the original vector.
1229    ///
1230    /// See also: [`split_at_unchecked`]
1231    ///
1232    /// [`split_at_unchecked`]: Self::split_at_unchecked
1233    pub fn split_at(self, at: usize) -> Result<(Self, Self), Self> {
1234        if at > self.len {
1235            Err(self)
1236        } else {
1237            Ok(self.split_at_unchecked(at))
1238        }
1239    }
1240
1241    /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1242    /// right half the remaining bits `at..`.
1243    ///
1244    /// # Panics
1245    /// If the index is larger than the length of the vector the function will panic or run
1246    /// out of memory.
1247    /// Use [`split_at`] to properly handle this case.
1248    ///
1249    /// [`split_at`]: Self::split_at
1250    #[must_use]
1251    pub fn split_at_unchecked(mut self, at: usize) -> (Self, Self) {
1252        let other_len = self.len - at;
1253        let mut other = Self::with_capacity(other_len);
1254
1255        if other_len == 0 {
1256            return (self, other);
1257        }
1258
1259        let first_limb = at / WORD_SIZE;
1260        let last_limb = self.len / WORD_SIZE;
1261
1262        // First, we figure out the number of bits from the first limb to retain in this vector:
1263        let leading_partial = at % WORD_SIZE;
1264
1265        // If the split point is in the last limb, and the vector ends before the last bit, first_limb
1266        // and last_limb will be equal, and the other half is simply other_len bits off the limb
1267        // right shifted by the number of bits to retain in this vector.
1268        if first_limb == last_limb {
1269            other.append_bits_unchecked(self.data[first_limb] >> leading_partial, other_len);
1270        } else {
1271            // Otherwise, some range n..last_limb should be copied in their entirety to the other half,
1272            // with n=first_limb+1 if the split point is inside the first limb (leading_partial > 0), or
1273            // n=first_limb if the entire first limb belongs in the other half.
1274            let full_limbs = if leading_partial > 0 {
1275                // If the split point is inside the first limb, we also have to remember to copy over
1276                // the trailing bits to the new vector.
1277                other.append_bits_unchecked(
1278                    self.data[first_limb] >> leading_partial,
1279                    WORD_SIZE - leading_partial,
1280                );
1281                first_limb + 1..last_limb
1282            } else {
1283                first_limb..last_limb
1284            };
1285
1286            // Copy over any full limbs.
1287            for i in full_limbs {
1288                other.append_bits_unchecked(self.data[i], WORD_SIZE);
1289            }
1290
1291            // Finally, if the vector has a partially filled last limb, we need to put those bits
1292            // in the other half.
1293            let trailing_partial = self.len % WORD_SIZE;
1294            if trailing_partial > 0 {
1295                other.append_bits_unchecked(self.data[last_limb], trailing_partial);
1296            }
1297        }
1298
1299        // remove the copied bits from the original vector
1300        self.drop_last(other_len);
1301
1302        (self, other)
1303    }
1304}
1305
1306impl_vector_iterator! { BitVec, BitVecIter, BitVecRefIter }
1307
1308/// Create a new bit vector from a slice of u64 values.
1309/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1310/// The function will append the bits of each element to the bit vector in the order they are
1311/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1312impl From<&[u64]> for BitVec {
1313    fn from(data: &[u64]) -> Self {
1314        BitVec::from_limbs(data)
1315    }
1316}
1317
1318/// Create a new bit vector from a slice of u64 values.
1319/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1320/// The function will append the bits of each element to the bit vector in the order they are
1321/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1322impl From<Vec<u64>> for BitVec {
1323    fn from(data: Vec<u64>) -> Self {
1324        BitVec::from_limbs(&data)
1325    }
1326}
1327
1328impl Extend<BitVec> for BitVec {
1329    fn extend<T: IntoIterator<Item = BitVec>>(&mut self, iter: T) {
1330        for v in iter {
1331            self.extend_bitvec(&v)
1332        }
1333    }
1334}
1335
1336impl<'t> Extend<&'t BitVec> for BitVec {
1337    fn extend<T: IntoIterator<Item = &'t BitVec>>(&mut self, iter: T) {
1338        for v in iter {
1339            self.extend_bitvec(v)
1340        }
1341    }
1342}
1343
1344/// Create a new bit vector from u64 values.
1345/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1346/// The function will append the bits of each element to the bit vector in the order they are
1347/// given in the iterator (i.e. the first element takes bits `0..64` of the vector).
1348impl FromIterator<u64> for BitVec {
1349    fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> Self {
1350        BitVec::from_limbs_iter(iter)
1351    }
1352}
1353
1354impl PartialEq for BitVec {
1355    // unlike the auto-derived implementation, this custom implementation ignores junk data at
1356    // the end of the bit vector
1357    fn eq(&self, other: &Self) -> bool {
1358        if self.len != other.len {
1359            return false;
1360        }
1361
1362        if self.len == 0 {
1363            return true;
1364        }
1365
1366        for i in 0..self.data.len() - 1 {
1367            if self.data[i] != other.data[i] {
1368                return false;
1369            }
1370        }
1371
1372        // in last limb, ignore junk data
1373        let mask = (1 << (self.len % WORD_SIZE)) - 1;
1374        if self.data[self.data.len() - 1] & mask != other.data[self.data.len() - 1] & mask {
1375            return false;
1376        }
1377
1378        true
1379    }
1380}
1381
1382impl Eq for BitVec {}
1383
1384impl Hash for BitVec {
1385    fn hash<H: Hasher>(&self, state: &mut H) {
1386        state.write_usize(self.len);
1387        if self.len > 0 {
1388            self.data[0..self.data.len() - 1]
1389                .iter()
1390                .for_each(|x| state.write_u64(*x));
1391            let masked_last_limb = self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1);
1392            state.write_u64(masked_last_limb);
1393        }
1394    }
1395}
1396
1397#[cfg(test)]
1398mod tests;
vers_vecs/bit_vec/mod.rs

vers_vecs/bit_vec/
mod.rs