vers_vecs/bit_vec/
mod.rs

1//! This module contains a simple [bit vector][BitVec] implementation with no overhead and a fast succinct
2//! bit vector implementation with [rank and select queries][fast_rs_vec::RsVec].
3
4use crate::bit_vec::mask::MaskedBitVec;
5use crate::util::impl_vector_iterator;
6use std::cmp::min;
7use std::mem::size_of;
8
9pub mod fast_rs_vec;
10
11pub mod sparse;
12
13pub mod mask;
14
15/// Size of a word in bitvectors. All vectors operate on 64-bit words.
16const WORD_SIZE: usize = 64;
17
18/// Type alias for masked bitvectors that implement a simple bitwise binary operation.
19/// The first lifetime is for the bit vector that is being masked, the second lifetime is for the
20/// mask.
21pub type BitMask<'s, 'b> = MaskedBitVec<'s, 'b, fn(u64, u64) -> u64>;
22
23/// A simple bit vector that does not support rank and select queries.
24/// Bits are stored in little-endian order, i.e. the least significant bit is stored first.
25/// The bit vector is stored as a sequence of 64 bit limbs.
26/// The last limb may be partially filled.
27///
28/// The bit vector has a wide range of constructors that allow for easy creation from various
29/// sources.
30/// Among them are constructors for creating an empty vector ([`BitVec::new`]),
31/// creating one from single bits of various integer types ([`BitVec::from_bits`] and variations),
32/// creating limbs from u64 values directly ([`BitVec::from_limbs`] and variations),
33/// or packing a sequence of numerical values into a dense bit sequence
34/// ([`BitVec::pack_sequence_u64`] and variations).
35///
36/// The bit vector can be modified after creation
37/// (e.g. by appending [bits](BitVec::append_bits)
38/// or [words](BitVec::append_word),
39/// [flipping](BitVec::flip_bit),
40/// or [setting](BitVec::set) bits).
41/// Bits can be [accessed](BitVec::get) by position,
42/// and [multiple bits](BitVec::get_bits) can be accessed at once.
43/// Bits can be [dropped](BitVec::drop_last) from the end.
44///
45/// # Example
46/// ```rust
47/// use vers_vecs::{BitVec, RsVec};
48///
49/// let mut bit_vec = BitVec::new();
50/// bit_vec.append_bit(0u64);
51/// bit_vec.append_bit_u32(1u32);
52/// bit_vec.append_word(0b1010_1010_1010_1010u64); // appends exactly 64 bits
53///
54/// assert_eq!(bit_vec.len(), 66);
55/// assert_eq!(bit_vec.get(0), Some(0u64));
56/// assert_eq!(bit_vec.get(1), Some(1u64));
57/// ```
58#[derive(Clone, Debug, Default, Eq, PartialEq, Hash)]
59#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
60pub struct BitVec {
61    data: Vec<u64>,
62    len: usize,
63}
64
65impl BitVec {
66    /// Create a new empty bit vector.
67    #[must_use]
68    pub fn new() -> Self {
69        Self::default()
70    }
71
72    /// Create a new empty bit vector with the given capacity.
73    /// The capacity is measured in bits.
74    /// The bit vector will be able to hold at least `capacity` bits without reallocating.
75    /// More memory may be allocated according to the underlying allocation strategy.
76    #[must_use]
77    pub fn with_capacity(capacity: usize) -> Self {
78        Self {
79            data: Vec::with_capacity(capacity / WORD_SIZE + 1),
80            len: 0,
81        }
82    }
83
84    /// Create a new bit vector with all zeros and the given length.
85    /// The length is measured in bits.
86    #[must_use]
87    pub fn from_zeros(len: usize) -> Self {
88        let mut data = vec![0; len / WORD_SIZE];
89        if len % WORD_SIZE != 0 {
90            data.push(0);
91        }
92        Self { data, len }
93    }
94
95    /// Create a new bit vector with all ones and the given length.
96    /// The length is measured in bits.
97    #[must_use]
98    pub fn from_ones(len: usize) -> Self {
99        let mut data = vec![u64::MAX; len / WORD_SIZE];
100        if len % WORD_SIZE != 0 {
101            data.push((1 << (len % WORD_SIZE)) - 1);
102        }
103        Self { data, len }
104    }
105
106    /// Construct a bit vector from a set of bits given as distinct u8 values.
107    /// The constructor will take the least significant bit from each value and append it to a
108    /// bit vector.
109    /// All other bits are ignored.
110    ///
111    /// See also: [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
112    ///
113    /// # Example
114    /// ```rust
115    /// use vers_vecs::BitVec;
116    ///
117    /// let bits: &[u8] = &[1, 0, 1, 1, 1, 1];
118    /// let bv = BitVec::from_bits(&bits);
119    ///
120    /// assert_eq!(bv.len(), 6);
121    /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
122    /// ```
123    ///
124    /// [`from_bits_u16`]: BitVec::from_bits_u16
125    /// [`from_bits_u32`]: BitVec::from_bits_u32
126    /// [`from_bits_u64`]: BitVec::from_bits_u64
127    /// [`from_bits_iter`]: BitVec::from_bits_iter
128    #[must_use]
129    pub fn from_bits(bits: &[u8]) -> Self {
130        let mut bv = Self::with_capacity(bits.len());
131        bits.iter().for_each(|&b| bv.append_bit(b.into()));
132        bv
133    }
134
135    /// Construct a bit vector from a set of bits given as distinct u16 values.
136    /// The constructor will take the least significant bit from each value and append it to a
137    /// bit vector.
138    /// All other bits are ignored.
139    ///
140    /// See also: [`from_bits`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
141    ///
142    /// [`from_bits`]: BitVec::from_bits
143    /// [`from_bits_u32`]: BitVec::from_bits_u32
144    /// [`from_bits_u64`]: BitVec::from_bits_u64
145    /// [`from_bits_iter`]: BitVec::from_bits_iter
146    #[must_use]
147    pub fn from_bits_u16(bits: &[u16]) -> Self {
148        let mut bv = Self::with_capacity(bits.len());
149        bits.iter().for_each(|&b| bv.append_bit_u16(b));
150        bv
151    }
152
153    /// Construct a bit vector from a set of bits given as distinct u32 values.
154    /// The constructor will take the least significant bit from each value and append it to a
155    /// bit vector.
156    /// All other bits are ignored.
157    ///
158    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u64`], [`from_bits_iter`]
159    ///
160    /// [`from_bits`]: BitVec::from_bits
161    /// [`from_bits_u16`]: BitVec::from_bits_u16
162    /// [`from_bits_u64`]: BitVec::from_bits_u64
163    /// [`from_bits_iter`]: BitVec::from_bits_iter
164    #[must_use]
165    pub fn from_bits_u32(bits: &[u32]) -> Self {
166        let mut bv = Self::with_capacity(bits.len());
167        bits.iter().for_each(|&b| bv.append_bit_u32(b));
168        bv
169    }
170
171    /// Construct a bit vector from a set of bits given as distinct u64 values.
172    /// The constructor will take the least significant bit from each value and append it to a
173    /// bit vector.
174    /// All other bits are ignored.
175    ///
176    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_iter`]
177    ///
178    /// [`from_bits`]: BitVec::from_bits
179    /// [`from_bits_u16`]: BitVec::from_bits_u16
180    /// [`from_bits_u32`]: BitVec::from_bits_u32
181    /// [`from_bits_iter`]: BitVec::from_bits_iter
182    #[must_use]
183    pub fn from_bits_u64(bits: &[u64]) -> Self {
184        let mut bv = Self::with_capacity(bits.len());
185        bits.iter().for_each(|&b| bv.append_bit(b));
186        bv
187    }
188
189    /// Construct a bit vector from an iterator of bits.
190    /// The constructor will take the least significant bit from each value and append it to a
191    /// bit vector.
192    /// All other bits are ignored.
193    /// The iterator must yield values that can be converted into u64 values.
194    ///
195    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`]
196    ///
197    /// # Example
198    /// ```rust
199    /// use vers_vecs::BitVec;
200    ///
201    /// let bits = [true, false, true, true, true, true];
202    /// let bv = BitVec::from_bits_iter(bits.iter().copied());
203    ///
204    /// let bits = [0b1u8, 0b0, 0b1, 0b1, 0b1, 0b1];
205    /// let bv2 = BitVec::from_bits_iter(bits.iter().copied());
206    ///
207    /// assert_eq!(bv.len(), 6);
208    /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
209    /// assert_eq!(bv, bv2);
210    /// ```
211    ///
212    /// [`from_bits`]: BitVec::from_bits
213    /// [`from_bits_u16`]: BitVec::from_bits_u16
214    /// [`from_bits_u32`]: BitVec::from_bits_u32
215    /// [`from_bits_u64`]: BitVec::from_bits_u64
216    #[must_use]
217    pub fn from_bits_iter<I, E>(iter: I) -> Self
218    where
219        E: Into<u64>,
220        I: IntoIterator<Item = E>,
221    {
222        let iter = iter.into_iter();
223        let mut bv = Self::with_capacity(iter.size_hint().0);
224        for bit in iter {
225            bv.append_bit(bit.into());
226        }
227        bv
228    }
229
230    /// Construct a bit vector from a slice of u64 quad words.
231    /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
232    /// 64 bits to the bit vector).
233    /// Since the data is only cloned without any masking or transformation,
234    /// this is one of the fastest ways to create a bit vector.
235    ///
236    /// See also: [`from_vec`], [`from_limbs_iter`]
237    ///
238    /// # Example
239    /// ```rust
240    /// use vers_vecs::BitVec;
241    ///
242    /// let words = [0, 256, u64::MAX];
243    /// let bv = BitVec::from_limbs(&words);
244    ///
245    /// assert_eq!(bv.len(), 192);
246    /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
247    /// assert_eq!(bv.get(72), Some(1));
248    /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
249    /// ```
250    ///
251    /// [`from_vec`]: BitVec::from_vec
252    /// [`from_limbs_iter`]: BitVec::from_limbs_iter
253    #[must_use]
254    pub fn from_limbs(words: &[u64]) -> Self {
255        let len = words.len() * WORD_SIZE;
256        Self {
257            data: words.to_vec(),
258            len,
259        }
260    }
261
262    /// Construct a bit vector from an iterator of u64 quad words.
263    /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
264    /// 64 bits to the bit vector).
265    /// Since the data is only cloned without any masking or transformation,
266    /// this is one of the fastest ways to create a bit vector.
267    ///
268    /// See also: [`from_limbs`], [`from_vec`]
269    ///
270    /// # Example
271    /// ```rust
272    /// use std::iter::repeat;
273    /// use vers_vecs::BitVec;
274    ///
275    /// let zeros = repeat(0xaaaaaaaaaaaaaaaau64).take(10);
276    /// let bv = BitVec::from_limbs_iter(zeros);
277    ///
278    /// assert_eq!(bv.len(), 640);
279    /// for i in 0..640 {
280    ///    assert_eq!(bv.get(i), Some((i % 2 == 1) as u64));
281    /// }
282    /// ```
283    ///
284    /// [`from_limbs`]: BitVec::from_limbs
285    /// [`from_vec`]: BitVec::from_vec
286    pub fn from_limbs_iter<I, E>(iter: I) -> Self
287    where
288        E: Into<u64>,
289        I: IntoIterator<Item = E>,
290    {
291        let vec = iter.into_iter().map(Into::into).collect();
292        Self::from_vec(vec)
293    }
294
295    /// Construct a bit vector from a vector of u64 quad words.
296    /// The quad words are interpreted as limbs of the bit vector
297    /// (i.e. each quad word contributes 64 bits to the bit vector).
298    /// Since the data is moved without any masking or transformation, this is one of the fastest ways
299    /// to create a bit vector.
300    ///
301    /// See also: [`from_limbs`], [`from_limbs_iter`]
302    ///
303    /// # Example
304    /// ```rust
305    /// use vers_vecs::BitVec;
306    ///
307    /// let words = vec![0, 256, u64::MAX];
308    /// let bv = BitVec::from_vec(words);
309    ///
310    /// assert_eq!(bv.len(), 192);
311    /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
312    /// assert_eq!(bv.get(72), Some(1));
313    /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
314    /// ```
315    ///
316    /// [`from_limbs`]: BitVec::from_limbs
317    /// [`from_limbs_iter`]: BitVec::from_limbs_iter
318    #[must_use]
319    pub fn from_vec(data: Vec<u64>) -> Self {
320        let len = data.len() * WORD_SIZE;
321        Self { data, len }
322    }
323
324    fn pack_bits<T, const MAX_BITS: usize>(sequence: &[T], bits_per_element: usize) -> Self
325    where
326        T: Into<u64> + Copy,
327    {
328        let mut bv = Self::with_capacity(sequence.len() * bits_per_element);
329        for &word in sequence {
330            if bits_per_element <= MAX_BITS {
331                bv.append_bits(word.into(), bits_per_element);
332            } else {
333                bv.append_bits(word.into(), MAX_BITS);
334                let mut rest = bits_per_element - MAX_BITS;
335                while rest > 0 {
336                    bv.append_bits(0, min(rest, MAX_BITS));
337                    rest = rest.saturating_sub(MAX_BITS);
338                }
339            }
340        }
341        bv
342    }
343
344    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
345    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
346    /// The number of bits per element is given by `bits_per_element`.
347    /// The sequence is given as a slice of u64 values.
348    /// If the number of bits per element is smaller than 64, the function takes the
349    /// least significant bits of each element, and discards the rest.
350    /// If the number of bits per element is larger than 64, the function will pad the elements
351    /// with zeros.
352    /// The function will append the bits of each element to the bit vector in the order they are
353    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
354    ///
355    /// See also: [`pack_sequence_u32`], [`pack_sequence_u16`], [`pack_sequence_u8`]
356    ///
357    /// # Example
358    /// ```rust
359    /// use vers_vecs::BitVec;
360    ///
361    /// let sequence = [0b1010u64, 0b1100u64, 0b1111u64];
362    /// let bv = BitVec::pack_sequence_u64(&sequence, 4);
363    ///
364    /// assert_eq!(bv.len(), 12);
365    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
366    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
367    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
368    /// ```
369    ///
370    /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
371    /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
372    /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
373    #[must_use]
374    pub fn pack_sequence_u64(sequence: &[u64], bits_per_element: usize) -> Self {
375        Self::pack_bits::<_, 64>(sequence, bits_per_element)
376    }
377
378    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
379    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
380    /// The number of bits per element is given by `bits_per_element`.
381    /// The sequence is given as a slice of u32 values.
382    /// If the number of bits per element is smaller than 32, the function takes the
383    /// least significant bits of each element, and discards the rest.
384    /// If the number of bits per element is larger than 32, the function will pad the elements
385    /// with zeros.
386    /// The function will append the bits of each element to the bit vector in the order they are
387    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
388    ///
389    /// See also: [`pack_sequence_u64`], [`pack_sequence_u16`], [`pack_sequence_u8`]
390    ///
391    /// # Example
392    /// ```rust
393    /// use vers_vecs::BitVec;
394    ///
395    /// let sequence = [0b1010u32, 0b1100u32, 0b1111u32];
396    /// let bv = BitVec::pack_sequence_u32(&sequence, 4);
397    ///
398    /// assert_eq!(bv.len(), 12);
399    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
400    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
401    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
402    /// ```
403    ///
404    /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
405    /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
406    /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
407    #[must_use]
408    pub fn pack_sequence_u32(sequence: &[u32], bits_per_element: usize) -> Self {
409        Self::pack_bits::<_, 32>(sequence, bits_per_element)
410    }
411
412    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
413    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
414    /// The number of bits per element is given by `bits_per_element`.
415    /// The sequence is given as a slice of u16 values.
416    /// If the number of bits per element is smaller than 16, the function takes the
417    /// least significant bits of each element, and discards the rest.
418    /// If the number of bits per element is larger than 16, the function will pad the elements
419    /// with zeros.
420    /// The function will append the bits of each element to the bit vector in the order they are
421    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
422    ///
423    /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u8`]
424    ///
425    /// # Example
426    /// ```rust
427    /// use vers_vecs::BitVec;
428    ///
429    /// let sequence = [0b1010u16, 0b1100u16, 0b1111u16];
430    /// let bv = BitVec::pack_sequence_u16(&sequence, 4);
431    ///
432    /// assert_eq!(bv.len(), 12);
433    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
434    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
435    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
436    /// ```
437    ///
438    /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
439    /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
440    /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
441    #[must_use]
442    pub fn pack_sequence_u16(sequence: &[u16], bits_per_element: usize) -> Self {
443        Self::pack_bits::<_, 16>(sequence, bits_per_element)
444    }
445
446    /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
447    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
448    /// The number of bits per element is given by `bits_per_element`.
449    /// The sequence is given as a slice of u8 values.
450    /// If the number of bits per element is smaller than 8, the function takes the
451    /// least significant bits of each element, and discards the rest.
452    /// If the number of bits per element is larger than 8, the function will pad the elements
453    /// with zeros.
454    /// The function will append the bits of each element to the bit vector in the order they are
455    /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
456    ///
457    /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u16`]
458    ///
459    /// # Example
460    /// ```rust
461    /// use vers_vecs::BitVec;
462    ///
463    /// let sequence = [0b1010u8, 0b1100u8, 0b1111u8];
464    /// let bv = BitVec::pack_sequence_u8(&sequence, 4);
465    ///
466    /// assert_eq!(bv.len(), 12);
467    /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
468    /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
469    /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
470    /// ```
471    ///
472    /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
473    /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
474    /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
475    #[must_use]
476    pub fn pack_sequence_u8(sequence: &[u8], bits_per_element: usize) -> Self {
477        Self::pack_bits::<_, 8>(sequence, bits_per_element)
478    }
479
480    /// Append a bit encoded as a `bool` to the bit vector, where `true` means 1 and `false` means 0.
481    ///
482    /// See also: [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
483    ///
484    /// # Example
485    ///
486    /// ```rust
487    /// use vers_vecs::BitVec;
488    ///
489    /// let mut bv = BitVec::new();
490    /// bv.append(true);
491    ///
492    /// assert_eq!(bv.len(), 1);
493    /// assert_eq!(bv.get(0), Some(1));
494    /// ```
495    ///
496    /// [`append_bit`]: BitVec::append_bit
497    /// [`append_bit_u32`]: BitVec::append_bit_u32
498    /// [`append_bit_u16`]: BitVec::append_bit_u16
499    /// [`append_bit_u8`]: BitVec::append_bit_u8
500    /// [`append_word`]: BitVec::append_word
501    pub fn append(&mut self, bit: bool) {
502        if self.len % WORD_SIZE == 0 {
503            self.data.push(0);
504        }
505        if bit {
506            self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
507        } else {
508            self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
509        }
510        self.len += 1;
511    }
512
513    /// Drop the last n bits from the bit vector. If more bits are dropped than the bit vector
514    /// contains, the bit vector is cleared.
515    ///
516    /// # Example
517    ///
518    /// ```rust
519    /// use vers_vecs::BitVec;
520    ///
521    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
522    /// bv.drop_last(3);
523    ///
524    /// assert_eq!(bv.len(), 3);
525    /// assert_eq!(bv.get_bits(0, 3), Some(0b101u64));
526    ///
527    /// bv.drop_last(4);
528    ///
529    /// assert!(bv.is_empty());
530    /// ```
531    pub fn drop_last(&mut self, n: usize) {
532        if n > self.len {
533            self.data.clear();
534            self.len = 0;
535            return;
536        }
537
538        let new_limb_count = (self.len - n).div_ceil(WORD_SIZE);
539
540        // cut off limbs that we no longer need
541        if new_limb_count < self.data.len() {
542            self.data.truncate(new_limb_count);
543        }
544
545        // update bit vector length
546        self.len -= n;
547    }
548
549    /// Append a bit encoded in a u64.
550    /// The least significant bit is appended to the bit vector.
551    /// All other bits are ignored.
552    ///
553    /// See also: [`append`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
554    ///
555    /// # Example
556    ///
557    /// ```rust
558    /// use vers_vecs::BitVec;
559    ///
560    /// let mut bv = BitVec::new();
561    ///
562    /// bv.append_bit(1);
563    /// bv.append_bit(0);
564    ///
565    /// assert_eq!(bv.len(), 2);
566    /// assert_eq!(bv.get(0), Some(1));
567    /// assert_eq!(bv.get(1), Some(0));
568    /// ```
569    ///
570    /// [`append`]: BitVec::append
571    /// [`append_bit_u32`]: BitVec::append_bit_u32
572    /// [`append_bit_u16`]: BitVec::append_bit_u16
573    /// [`append_bit_u8`]: BitVec::append_bit_u8
574    /// [`append_word`]: BitVec::append_word
575    pub fn append_bit(&mut self, bit: u64) {
576        if self.len % WORD_SIZE == 0 {
577            self.data.push(0);
578        }
579        if bit % 2 == 1 {
580            self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
581        } else {
582            self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
583        }
584
585        self.len += 1;
586    }
587
588    /// Append a bit from a u32. The least significant bit is appended to the bit vector.
589    /// All other bits are ignored.
590    ///
591    /// See also: [`append`], [`append_bit`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
592    ///
593    /// [`append`]: BitVec::append
594    /// [`append_bit`]: BitVec::append_bit
595    /// [`append_bit_u16`]: BitVec::append_bit_u16
596    /// [`append_bit_u8`]: BitVec::append_bit_u8
597    /// [`append_word`]: BitVec::append_word
598    pub fn append_bit_u32(&mut self, bit: u32) {
599        self.append_bit(u64::from(bit));
600    }
601
602    /// Append a bit from a u16. The least significant bit is appended to the bit vector.
603    /// All other bits are ignored.
604    ///
605    /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u8`], [`append_word`]
606    ///
607    /// [`append`]: BitVec::append
608    /// [`append_bit`]: BitVec::append_bit
609    /// [`append_bit_u32`]: BitVec::append_bit_u32
610    /// [`append_bit_u8`]: BitVec::append_bit_u8
611    /// [`append_word`]: BitVec::append_word
612    pub fn append_bit_u16(&mut self, bit: u16) {
613        self.append_bit(u64::from(bit));
614    }
615
616    /// Append a bit from a u8. The least significant bit is appended to the bit vector.
617    /// All other bits are ignored.
618    ///
619    /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_word`]
620    ///
621    /// [`append`]: BitVec::append
622    /// [`append_bit`]: BitVec::append_bit
623    /// [`append_bit_u32`]: BitVec::append_bit_u32
624    /// [`append_bit_u16`]: BitVec::append_bit_u16
625    /// [`append_word`]: BitVec::append_word
626    pub fn append_bit_u8(&mut self, bit: u8) {
627        self.append_bit(u64::from(bit));
628    }
629
630    /// Append a word to the bit vector. The bits are appended in little endian order (i.e. the first
631    /// bit of the word is appended first).
632    ///
633    /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`]
634    ///
635    /// # Example
636    ///
637    /// ```rust
638    /// use vers_vecs::BitVec;
639    ///
640    /// let mut bv = BitVec::new();
641    /// bv.append_word(0b1010_1010_1010_1010u64);
642    ///
643    /// assert_eq!(bv.len(), 64);
644    /// for i in 0..64 {
645    ///    assert_eq!(bv.get(i), Some((0b1010_1010_1010_1010u64 >> i) & 1));
646    /// }
647    /// ```
648    ///
649    /// [`append`]: BitVec::append
650    /// [`append_bit`]: BitVec::append_bit
651    /// [`append_bit_u32`]: BitVec::append_bit_u32
652    /// [`append_bit_u16`]: BitVec::append_bit_u16
653    /// [`append_bit_u8`]: BitVec::append_bit_u8
654    pub fn append_word(&mut self, word: u64) {
655        if self.len % WORD_SIZE == 0 {
656            self.data.push(word);
657        } else {
658            // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
659            self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
660            self.data[self.len / WORD_SIZE] |= word << (self.len % WORD_SIZE);
661
662            self.data.push(word >> (WORD_SIZE - self.len % WORD_SIZE));
663        }
664        self.len += WORD_SIZE;
665    }
666
667    /// Append multiple bits to the bit vector.
668    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
669    /// The number of bits to append is given by `len`. The bits are taken from the least
670    /// significant bits of `bits`.
671    /// All other bits are ignored.
672    ///
673    /// # Example
674    ///
675    /// ```rust
676    /// use vers_vecs::BitVec;
677    ///
678    /// let mut bv = BitVec::new();
679    /// bv.append_bits(0b1010_1010_1010_1010u64, 16);
680    ///
681    /// assert_eq!(bv.len(), 16);
682    /// assert_eq!(bv.get_bits(0, 16), Some(0b1010_1010_1010_1010u64));
683    /// ```
684    ///
685    /// # Panics
686    /// Panics if `len` is larger than 64.
687    pub fn append_bits(&mut self, bits: u64, len: usize) {
688        assert!(len <= 64, "Cannot append more than 64 bits");
689
690        if self.len % WORD_SIZE == 0 {
691            self.data.push(bits);
692        } else {
693            // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
694            self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
695            self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
696
697            if self.len % WORD_SIZE + len > WORD_SIZE {
698                self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
699            }
700        }
701        self.len += len;
702    }
703
704    /// Append multiple bits to the bit vector.
705    /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
706    /// The number of bits to append is given by `len`. The bits are taken from the least
707    /// significant bits of `bits`.
708    ///
709    /// This function does not check if `len` is larger than 64.
710    ///
711    /// Furthermore, if the bit-vector has trailing bits that are not zero
712    /// (i.e. the length is not a multiple of 64, and those bits are partially set),
713    /// the function will OR the new data with the trailing bits, destroying the appended data.
714    /// This can happen, if a call `append_bits[_unchecked](word, len)` appends a word which has
715    /// set bits beyond the `len - 1`-th bit,
716    /// or if bits have been dropped from the bit vector using [`drop_last`].
717    ///
718    /// See [`append_bits`] for a checked version of this function.
719    ///
720    /// # Panics
721    /// If `len` is larger than 64, the behavior is platform-dependent, and a processor
722    /// exception might be triggered.
723    ///
724    /// [`append_bits`]: BitVec::append_bits
725    /// [`drop_last`]: BitVec::drop_last
726    pub fn append_bits_unchecked(&mut self, bits: u64, len: usize) {
727        if self.len % WORD_SIZE == 0 {
728            self.data.push(bits);
729        } else {
730            self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
731
732            if self.len % WORD_SIZE + len > WORD_SIZE {
733                self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
734            }
735        }
736        self.len += len;
737    }
738
739    /// Append the bits of another bit vector to the end of this vector.
740    /// If this vector does not contain a multiple of 64 bits, the appended limbs need to be
741    /// shifted to the left.
742    /// This function is guaranteed to reallocate the underlying vector at most once.
743    pub fn extend_bitvec(&mut self, other: &Self) {
744        // reserve space for the new bits, ensuring at most one re-allocation
745        self.data
746            .reserve((self.len + other.len).div_ceil(WORD_SIZE) - self.data.len());
747
748        let full_limbs = other.len() / WORD_SIZE;
749        for i in 0..full_limbs {
750            self.append_bits(other.data[i], WORD_SIZE);
751        }
752
753        let partial_bits = other.len % WORD_SIZE;
754        if partial_bits > 0 {
755            self.append_bits(other.data[full_limbs], partial_bits);
756        }
757    }
758
759    /// Return the length of the bit vector. The length is measured in bits.
760    #[must_use]
761    pub fn len(&self) -> usize {
762        self.len
763    }
764
765    /// Return whether the bit vector is empty (contains no bits).
766    #[must_use]
767    pub fn is_empty(&self) -> bool {
768        self.len == 0
769    }
770
771    /// Flip the bit at the given position.
772    ///
773    /// # Example
774    ///
775    /// ```rust
776    /// use vers_vecs::BitVec;
777    ///
778    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
779    /// bv.flip_bit(1);
780    ///
781    /// assert_eq!(bv.len(), 6);
782    /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
783    /// ```
784    ///
785    /// # Panics
786    /// If the position is larger than the length of the vector, the function panics.
787    pub fn flip_bit(&mut self, pos: usize) {
788        assert!(pos < self.len, "Index out of bounds");
789        self.flip_bit_unchecked(pos);
790    }
791
792    /// Flip the bit at the given position.
793    ///
794    /// See also: [`flip_bit`]
795    ///
796    /// # Panics
797    /// If the position is larger than the length of the
798    /// vector, the function will either modify unused memory or panic.
799    /// This will not corrupt memory.
800    ///
801    /// [`flip_bit`]: BitVec::flip_bit
802    pub fn flip_bit_unchecked(&mut self, pos: usize) {
803        self.data[pos / WORD_SIZE] ^= 1 << (pos % WORD_SIZE);
804    }
805
806    /// Return the bit at the given position.
807    /// The bit is encoded in the least significant bit of a u64 value.
808    /// If the position is larger than the length of the vector, None is returned.
809    ///
810    /// See also: [`get_unchecked`]
811    ///
812    /// # Example
813    ///
814    /// ```rust
815    /// use vers_vecs::BitVec;
816    ///
817    /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
818    ///
819    /// assert_eq!(bv.get(1), Some(0));
820    /// assert_eq!(bv.get(2), Some(1));
821    /// ```
822    #[must_use]
823    pub fn get(&self, pos: usize) -> Option<u64> {
824        if pos >= self.len {
825            None
826        } else {
827            Some(self.get_unchecked(pos))
828        }
829    }
830
831    /// Return the bit at the given position.
832    /// The bit is encoded in the least significant bit of a u64 value.
833    ///
834    /// # Panics
835    /// If the position is larger than the length of the vector,
836    /// the function will either return unpredictable data, or panic.
837    /// Use [`get`] to properly handle this case with an `Option`.
838    ///
839    /// [`get`]: BitVec::get
840    #[must_use]
841    pub fn get_unchecked(&self, pos: usize) -> u64 {
842        (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
843    }
844
845    /// Set the bit at the given position.
846    /// The bit is encoded in the least significant bit of a u64 value.
847    ///
848    /// See also: [`set_unchecked`]
849    ///
850    /// # Example
851    ///
852    /// ```rust
853    /// use vers_vecs::BitVec;
854    ///
855    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
856    /// bv.set(1, 1).unwrap();
857    ///
858    /// assert_eq!(bv.len(), 6);
859    /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
860    /// ```
861    ///
862    /// # Errors
863    /// If the position is out of range, the function will return `Err` with an error message,
864    /// otherwise it will return an empty `Ok`.
865    ///
866    /// [`set_unchecked`]: BitVec::set_unchecked
867    pub fn set(&mut self, pos: usize, value: u64) -> Result<(), &str> {
868        if pos >= self.len {
869            Err("out of range")
870        } else {
871            self.set_unchecked(pos, value);
872            Ok(())
873        }
874    }
875
876    /// Set the bit at the given position.
877    /// The bit is encoded in the least significant bit of a u64 value.
878    ///
879    /// # Panics
880    /// If the position is larger than the length of the vector,
881    /// the function will either do nothing, or panic.
882    /// Use [`set`] to properly handle this case with a `Result`.
883    ///
884    /// [`set`]: BitVec::set
885    pub fn set_unchecked(&mut self, pos: usize, value: u64) {
886        self.data[pos / WORD_SIZE] = (self.data[pos / WORD_SIZE] & !(0x1 << (pos % WORD_SIZE)))
887            | ((value & 0x1) << (pos % WORD_SIZE));
888    }
889
890    /// Return whether the bit at the given position is set.
891    /// If the position is larger than the length of the vector, None is returned.
892    ///
893    /// See also: [`is_bit_set_unchecked`]
894    ///
895    /// # Example
896    ///
897    /// ```rust
898    /// use vers_vecs::BitVec;
899    ///
900    /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
901    ///
902    /// assert!(!bv.is_bit_set(1).unwrap());
903    /// assert!(bv.is_bit_set(2).unwrap());
904    /// ```
905    ///
906    /// [`is_bit_set_unchecked`]: BitVec::is_bit_set_unchecked
907    #[must_use]
908    pub fn is_bit_set(&self, pos: usize) -> Option<bool> {
909        if pos >= self.len {
910            None
911        } else {
912            Some(self.is_bit_set_unchecked(pos))
913        }
914    }
915
916    /// Return whether the bit at the given position is set.
917    ///
918    /// # Panics
919    /// If the position is larger than the length of the vector,
920    /// the function will either return unpredictable data, or panic.
921    /// Use [`is_bit_set`] to properly handle this case with an `Option`.
922    ///
923    /// [`is_bit_set`]: BitVec::is_bit_set
924    #[must_use]
925    pub fn is_bit_set_unchecked(&self, pos: usize) -> bool {
926        self.get_unchecked(pos) != 0
927    }
928
929    /// Return multiple bits at the given position.
930    /// The number of bits to return is given by `len`.
931    /// At most 64 bits can be returned.
932    /// If the position at the end of the query is larger than the length of the vector,
933    /// None is returned (even if the query partially overlaps with the vector).
934    /// If the length of the query is larger than 64, None is returned.
935    ///
936    /// The first bit at `pos` is the most significant bit of the return value
937    /// limited to `len` bits.
938    #[must_use]
939    pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
940        if len > WORD_SIZE || len == 0 {
941            return None;
942        }
943        if pos + len > self.len {
944            None
945        } else {
946            Some(self.get_bits_unchecked(pos, len))
947        }
948    }
949
950    /// Return multiple bits at the given position. The number of bits to return is given by `len`.
951    /// At most 64 bits can be returned.
952    ///
953    /// This function is always inlined, because it gains a lot from loop optimization and
954    /// can utilize the processor pre-fetcher better if it is.
955    ///
956    /// # Errors
957    /// If the length of the query is larger than 64, unpredictable data will be returned.
958    /// Use [`get_bits`] to avoid this.
959    ///
960    /// # Panics
961    /// If the position or interval is larger than the length of the vector,
962    /// the function will either return any valid results padded with unpredictable
963    /// data or panic.
964    ///
965    /// [`get_bits`]: BitVec::get_bits
966    #[must_use]
967    #[allow(clippy::inline_always)]
968    #[allow(clippy::comparison_chain)] // readability
969    #[inline(always)] // inline to gain loop optimization and pipeline advantages for elias fano
970    #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen
971    pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
972        debug_assert!(len <= WORD_SIZE);
973        let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
974        if pos % WORD_SIZE + len <= WORD_SIZE {
975            partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
976        } else {
977            (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
978                & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
979        }
980    }
981
982    /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
983    /// `index`. The number of bits per encoded element is given by `n`.
984    ///
985    /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
986    /// and is equivalent to calling [`get_bits(index * n, n)`].
987    /// It is thus safe to use this method with any index and any size n <= 64.
988    ///
989    /// If the element is out of bounds, None is returned.
990    /// The element is returned as a u64 value.
991    ///
992    /// # Example
993    /// ```rust
994    /// use vers_vecs::BitVec;
995    ///
996    /// let sequence = [10, 100, 124, 45, 223];
997    /// let bv = BitVec::pack_sequence_u64(&sequence, 8);
998    ///
999    /// assert_eq!(bv.unpack_element(0, 8), Some(10));
1000    /// assert_eq!(bv.unpack_element(2, 8), Some(124));
1001    /// ```
1002    ///
1003    /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1004    /// [`get_bits(index * n, n)`]: BitVec::get_bits
1005    #[must_use]
1006    #[allow(clippy::inline_always)]
1007    #[inline(always)] // to gain optimization if n is constant
1008    pub fn unpack_element(&self, index: usize, n: usize) -> Option<u64> {
1009        self.get_bits(index * n, n)
1010    }
1011
1012    /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
1013    /// `index`. The number of bits per encoded element is given by `n`.
1014    ///
1015    /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
1016    /// and is equivalent to calling [`get_bits_unchecked(index * n, n)`].
1017    /// It is thus safe to use this method with any index where `index * n + n` is in-bounds,
1018    /// and any size n <= 64.
1019    ///
1020    /// # Panics
1021    /// If the element is out of bounds, the function will either return unpredictable data or panic.
1022    /// Use [`unpack_element`] for a checked version of this function.
1023    ///
1024    /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1025    /// [`get_bits_unchecked(index * n, n)`]: BitVec::get_bits_unchecked
1026    /// [`unpack_element`]: BitVec::unpack_element
1027    #[must_use]
1028    #[allow(clippy::inline_always)]
1029    #[inline(always)] // to gain optimization if n is constant
1030    pub fn unpack_element_unchecked(&self, index: usize, n: usize) -> u64 {
1031        self.get_bits_unchecked(index * n, n)
1032    }
1033
1034    /// Return the number of ones in the bit vector. Since the bit vector doesn't store additional
1035    /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1036    ///
1037    /// [`RsVec`]: crate::RsVec
1038    #[must_use]
1039    #[allow(clippy::missing_panics_doc)] // can't panic because of manual bounds check
1040    pub fn count_ones(&self) -> u64 {
1041        let mut ones: u64 = self.data[0..self.len / WORD_SIZE]
1042            .iter()
1043            .map(|limb| u64::from(limb.count_ones()))
1044            .sum();
1045        if self.len % WORD_SIZE > 0 {
1046            ones += u64::from(
1047                (self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1)).count_ones(),
1048            );
1049        }
1050        ones
1051    }
1052
1053    /// Return the number of zeros in the bit vector. Since the bit vector doesn't store additional
1054    /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1055    /// This method calls [`count_ones`].
1056    ///
1057    /// [`RsVec`]: crate::RsVec
1058    /// [`count_ones`]: BitVec::count_ones
1059    #[must_use]
1060    pub fn count_zeros(&self) -> u64 {
1061        self.len as u64 - self.count_ones()
1062    }
1063
1064    /// Mask this bit vector with another bitvector using bitwise or. The mask is applied lazily
1065    /// whenever an operation on the resulting vector is performed.
1066    ///
1067    /// # Errors
1068    /// Returns an error if the length of the vector doesn't match the mask length.
1069    #[inline]
1070    pub fn mask_or<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1071        MaskedBitVec::new(self, mask, |a, b| a | b)
1072    }
1073
1074    /// Mask this bit vector with another bitvector using bitwise or.
1075    /// The mask is applied immediately, unlike in [`mask_or`].
1076    ///
1077    /// # Errors
1078    /// Returns an error if the length of the vector doesn't match the mask length.
1079    ///
1080    /// [`mask_or`]: BitVec::mask_or
1081    pub fn apply_mask_or(&mut self, mask: &BitVec) -> Result<(), String> {
1082        if self.len != mask.len {
1083            return Err(String::from(
1084                "mask cannot have different length than vector",
1085            ));
1086        }
1087
1088        for i in 0..self.data.len() {
1089            self.data[i] |= mask.data[i];
1090        }
1091
1092        Ok(())
1093    }
1094
1095    /// Mask this bit vector with another bitvector using bitwise and. The mask is applied lazily
1096    /// whenever an operation on the resulting vector is performed.
1097    ///
1098    /// # Errors
1099    /// Returns an error if the length of the vector doesn't match the mask length.
1100    #[inline]
1101    pub fn mask_and<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1102        MaskedBitVec::new(self, mask, |a, b| a & b)
1103    }
1104
1105    /// Mask this bit vector with another bitvector using bitwise and.
1106    /// The mask is applied immediately, unlike in [`mask_and`].
1107    ///
1108    /// # Errors
1109    /// Returns an error if the length of the vector doesn't match the mask length.
1110    ///
1111    /// [`mask_and`]: BitVec::mask_and
1112    pub fn apply_mask_and(&mut self, mask: &BitVec) -> Result<(), String> {
1113        if self.len != mask.len {
1114            return Err(String::from(
1115                "mask cannot have different length than vector",
1116            ));
1117        }
1118
1119        for i in 0..self.data.len() {
1120            self.data[i] &= mask.data[i];
1121        }
1122
1123        Ok(())
1124    }
1125
1126    /// Mask this bit vector with another bitvector using bitwise xor. The mask is applied lazily
1127    /// whenever an operation on the resulting vector is performed.
1128    ///
1129    /// # Errors
1130    /// Returns an error if the length of the vector doesn't match the mask length.
1131    #[inline]
1132    pub fn mask_xor<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1133        MaskedBitVec::new(self, mask, |a, b| a ^ b)
1134    }
1135
1136    /// Mask this bit vector with another bitvector using bitwise xor.
1137    /// The mask is applied immediately, unlike in [`mask_xor`].
1138    ///
1139    /// # Errors
1140    /// Returns an error if the length of the vector doesn't match the mask length.
1141    ///
1142    /// [`mask_xor`]: BitVec::mask_xor
1143    pub fn apply_mask_xor(&mut self, mask: &BitVec) -> Result<(), String> {
1144        if self.len != mask.len {
1145            return Err(String::from(
1146                "mask cannot have different length than vector",
1147            ));
1148        }
1149
1150        for i in 0..self.data.len() {
1151            self.data[i] ^= mask.data[i];
1152        }
1153
1154        Ok(())
1155    }
1156
1157    /// Mask this bit vector with another bitvector using a custom masking operation. The mask is
1158    /// applied lazily whenever an operation on the resulting vector is performed.
1159    ///
1160    /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1161    /// The last block of a bit vector might contain fewer bits, and will be padded with
1162    /// unpredictable data. Implementations may choose to modify those padding bits without
1163    /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1164    /// within the vector is unspecified.
1165    ///
1166    /// # Errors
1167    /// Returns an error if the length of the vector doesn't match the mask length.
1168    #[inline]
1169    pub fn mask_custom<'s, 'b, F>(
1170        &'s self,
1171        mask: &'b BitVec,
1172        mask_op: F,
1173    ) -> Result<MaskedBitVec<'s, 'b, F>, String>
1174    where
1175        F: Fn(u64, u64) -> u64,
1176    {
1177        MaskedBitVec::new(self, mask, mask_op)
1178    }
1179
1180    /// Mask this bit vector with another bitvector using a custom masking operation.
1181    /// The mask is applied immediately, unlike in [`mask_custom`].
1182    ///
1183    /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1184    /// The last block of a bit vector might contain fewer bits, and will be padded with
1185    /// unpredictable data. Implementations may choose to modify those padding bits without
1186    /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1187    /// within the vector is unspecified.
1188    ///
1189    /// # Errors
1190    /// Returns an error if the length of the vector doesn't match the mask length.
1191    ///
1192    /// [`mask_custom`]: BitVec::mask_custom
1193    #[inline]
1194    pub fn apply_mask_custom(
1195        &mut self,
1196        mask: &BitVec,
1197        mask_op: fn(u64, u64) -> u64,
1198    ) -> Result<(), String> {
1199        if self.len != mask.len {
1200            return Err(String::from(
1201                "mask cannot have different length than vector",
1202            ));
1203        }
1204
1205        for i in 0..self.data.len() {
1206            self.data[i] = mask_op(self.data[i], mask.data[i]);
1207        }
1208
1209        Ok(())
1210    }
1211
1212    /// Returns the number of bytes on the heap for this vector.
1213    /// Does not include allocated memory that isn't used.
1214    #[must_use]
1215    pub fn heap_size(&self) -> usize {
1216        self.data.len() * size_of::<u64>()
1217    }
1218
1219    /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1220    /// right half the remaining bits `at..`. If the split index is larger than the length of the
1221    /// vector, the vector is returned unmodified in an `Err` variant.
1222    ///
1223    /// # Errors
1224    /// If the index is out of bounds, the function will return an error
1225    /// containing the original vector.
1226    ///
1227    /// See also: [`split_at_unchecked`]
1228    pub fn split_at(self, at: usize) -> Result<(Self, Self), Self> {
1229        if at > self.len {
1230            Err(self)
1231        } else {
1232            Ok(self.split_at_unchecked(at))
1233        }
1234    }
1235
1236    /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1237    /// right half the remaining bits `at..`.
1238    ///
1239    /// # Panics
1240    /// If the index is larger than the length of the vector the function will panic or run
1241    /// out of memory.
1242    /// Use [`split_at`] to properly handle this case.
1243    #[must_use]
1244    pub fn split_at_unchecked(mut self, at: usize) -> (Self, Self) {
1245        let other_len = self.len - at;
1246        let mut other = Self::with_capacity(other_len);
1247
1248        if other_len == 0 {
1249            return (self, other);
1250        }
1251
1252        let first_limb = at / WORD_SIZE;
1253        let last_limb = self.len / WORD_SIZE;
1254
1255        // First, we figure out the number of bits from the first limb to retain in this vector:
1256        let leading_partial = at % WORD_SIZE;
1257
1258        // If the split point is in the last limb, and the vector ends before the last bit, first_limb
1259        // and last_limb will be equal, and the other half is simply other_len bits off the limb
1260        // right shifted by the number of bits to retain in this vector.
1261        if first_limb == last_limb {
1262            other.append_bits_unchecked(self.data[first_limb] >> leading_partial, other_len);
1263        } else {
1264            // Otherwise, some range n..last_limb should be copied in their entirety to the other half,
1265            // with n=first_limb+1 if the split point is inside the first limb (leading_partial > 0), or
1266            // n=first_limb if the entire first limb belongs in the other half.
1267            let full_limbs = if leading_partial > 0 {
1268                // If the split point is inside the first limb, we also have to remember to copy over
1269                // the trailing bits to the new vector.
1270                other.append_bits_unchecked(
1271                    self.data[first_limb] >> leading_partial,
1272                    WORD_SIZE - leading_partial,
1273                );
1274                first_limb + 1..last_limb
1275            } else {
1276                first_limb..last_limb
1277            };
1278
1279            // Copy over any full limbs.
1280            for i in full_limbs {
1281                other.append_bits_unchecked(self.data[i], WORD_SIZE);
1282            }
1283
1284            // Finally, if the vector has a partially filled last limb, we need to put those bits
1285            // in the other half.
1286            let trailing_partial = self.len % WORD_SIZE;
1287            if trailing_partial > 0 {
1288                other.append_bits_unchecked(self.data[last_limb], trailing_partial);
1289            }
1290        }
1291
1292        // remove the copied bits from the original vector
1293        self.drop_last(other_len);
1294
1295        (self, other)
1296    }
1297}
1298
1299impl_vector_iterator! { BitVec, BitVecIter, BitVecRefIter }
1300
1301/// Create a new bit vector from a slice of u64 values.
1302/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1303/// The function will append the bits of each element to the bit vector in the order they are
1304/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1305impl From<&[u64]> for BitVec {
1306    fn from(data: &[u64]) -> Self {
1307        BitVec::from_limbs(data)
1308    }
1309}
1310
1311/// Create a new bit vector from a slice of u64 values.
1312/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1313/// The function will append the bits of each element to the bit vector in the order they are
1314/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1315impl From<Vec<u64>> for BitVec {
1316    fn from(data: Vec<u64>) -> Self {
1317        BitVec::from_limbs(&data)
1318    }
1319}
1320
1321impl Extend<BitVec> for BitVec {
1322    fn extend<T: IntoIterator<Item = BitVec>>(&mut self, iter: T) {
1323        for v in iter {
1324            self.extend_bitvec(&v)
1325        }
1326    }
1327}
1328
1329impl<'t> Extend<&'t BitVec> for BitVec {
1330    fn extend<T: IntoIterator<Item = &'t BitVec>>(&mut self, iter: T) {
1331        for v in iter {
1332            self.extend_bitvec(v)
1333        }
1334    }
1335}
1336
1337/// Create a new bit vector from u64 values.
1338/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1339/// The function will append the bits of each element to the bit vector in the order they are
1340/// given in the iterator (i.e. the first element takes bits `0..64` of the vector).
1341impl FromIterator<u64> for BitVec {
1342    fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> Self {
1343        BitVec::from_limbs_iter(iter)
1344    }
1345}
1346
1347#[cfg(test)]
1348mod tests;
vers_vecs/bit_vec/mod.rs

vers_vecs/bit_vec/
mod.rs