vers_vecs/bit_vec/mod.rs
1//! This module contains a simple [bit vector][BitVec] implementation with no overhead and a fast succinct
2//! bit vector implementation with [rank and select queries][fast_rs_vec::RsVec].
3
4use crate::bit_vec::mask::MaskedBitVec;
5use crate::util::impl_vector_iterator;
6use std::cmp::min;
7use std::hash::{Hash, Hasher};
8use std::mem::size_of;
9
10pub mod fast_rs_vec;
11
12pub mod sparse;
13
14pub mod mask;
15
16/// Size of a word in bitvectors. All vectors operate on 64-bit words.
17const WORD_SIZE: usize = 64;
18
19/// Type alias for masked bitvectors that implement a simple bitwise binary operation.
20/// The first lifetime is for the bit vector that is being masked, the second lifetime is for the
21/// mask.
22pub type BitMask<'s, 'b> = MaskedBitVec<'s, 'b, fn(u64, u64) -> u64>;
23
24/// A simple bit vector that does not support rank and select queries.
25/// Bits are stored in little-endian order, i.e. the least significant bit is stored first.
26/// The bit vector is stored as a sequence of 64 bit limbs.
27/// The last limb may be partially filled.
28///
29/// The bit vector has a wide range of constructors that allow for easy creation from various
30/// sources.
31/// Among them are constructors for creating an empty vector ([`BitVec::new`]),
32/// creating one from single bits of various integer types ([`BitVec::from_bits`] and variations),
33/// creating limbs from u64 values directly ([`BitVec::from_limbs`] and variations),
34/// or packing a sequence of numerical values into a dense bit sequence
35/// ([`BitVec::pack_sequence_u64`] and variations).
36///
37/// The bit vector can be modified after creation
38/// (e.g. by appending [bits](BitVec::append_bits)
39/// or [words](BitVec::append_word),
40/// [flipping](BitVec::flip_bit),
41/// or [setting](BitVec::set) bits).
42/// Bits can be [accessed](BitVec::get) by position,
43/// and [multiple bits](BitVec::get_bits) can be accessed at once.
44/// Bits can be [dropped](BitVec::drop_last) from the end.
45///
46/// # Example
47/// ```rust
48/// use vers_vecs::{BitVec, RsVec};
49///
50/// let mut bit_vec = BitVec::new();
51/// bit_vec.append_bit(0u64);
52/// bit_vec.append_bit_u32(1u32);
53/// bit_vec.append_word(0b1010_1010_1010_1010u64); // appends exactly 64 bits
54///
55/// assert_eq!(bit_vec.len(), 66);
56/// assert_eq!(bit_vec.get(0), Some(0u64));
57/// assert_eq!(bit_vec.get(1), Some(1u64));
58/// ```
59#[derive(Clone, Debug, Default)]
60#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
61pub struct BitVec {
62 data: Vec<u64>,
63 len: usize,
64}
65
66impl BitVec {
67 /// Create a new empty bit vector.
68 #[must_use]
69 pub fn new() -> Self {
70 Self::default()
71 }
72
73 /// Create a new empty bit vector with the given capacity.
74 /// The capacity is measured in bits.
75 /// The bit vector will be able to hold at least `capacity` bits without reallocating.
76 /// More memory may be allocated according to the underlying allocation strategy.
77 #[must_use]
78 pub fn with_capacity(capacity: usize) -> Self {
79 Self {
80 data: Vec::with_capacity(capacity / WORD_SIZE + 1),
81 len: 0,
82 }
83 }
84
85 /// Create a new bit vector with all zeros and the given length.
86 /// The length is measured in bits.
87 #[must_use]
88 pub fn from_zeros(len: usize) -> Self {
89 let mut data = vec![0; len / WORD_SIZE];
90 if !len.is_multiple_of(WORD_SIZE) {
91 data.push(0);
92 }
93 Self { data, len }
94 }
95
96 /// Create a new bit vector with all ones and the given length.
97 /// The length is measured in bits.
98 #[must_use]
99 pub fn from_ones(len: usize) -> Self {
100 let mut data = vec![u64::MAX; len / WORD_SIZE];
101 if !len.is_multiple_of(WORD_SIZE) {
102 data.push((1 << (len % WORD_SIZE)) - 1);
103 }
104 Self { data, len }
105 }
106
107 /// Construct a bit vector from a set of bits given as distinct u8 values.
108 /// The constructor will take the least significant bit from each value and append it to a
109 /// bit vector.
110 /// All other bits are ignored.
111 ///
112 /// See also: [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
113 ///
114 /// # Example
115 /// ```rust
116 /// use vers_vecs::BitVec;
117 ///
118 /// let bits: &[u8] = &[1, 0, 1, 1, 1, 1];
119 /// let bv = BitVec::from_bits(&bits);
120 ///
121 /// assert_eq!(bv.len(), 6);
122 /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
123 /// ```
124 ///
125 /// [`from_bits_u16`]: BitVec::from_bits_u16
126 /// [`from_bits_u32`]: BitVec::from_bits_u32
127 /// [`from_bits_u64`]: BitVec::from_bits_u64
128 /// [`from_bits_iter`]: BitVec::from_bits_iter
129 #[must_use]
130 pub fn from_bits(bits: &[u8]) -> Self {
131 let mut bv = Self::with_capacity(bits.len());
132 bits.iter().for_each(|&b| bv.append_bit(b.into()));
133 bv
134 }
135
136 /// Construct a bit vector from a set of bits given as distinct u16 values.
137 /// The constructor will take the least significant bit from each value and append it to a
138 /// bit vector.
139 /// All other bits are ignored.
140 ///
141 /// See also: [`from_bits`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
142 ///
143 /// [`from_bits`]: BitVec::from_bits
144 /// [`from_bits_u32`]: BitVec::from_bits_u32
145 /// [`from_bits_u64`]: BitVec::from_bits_u64
146 /// [`from_bits_iter`]: BitVec::from_bits_iter
147 #[must_use]
148 pub fn from_bits_u16(bits: &[u16]) -> Self {
149 let mut bv = Self::with_capacity(bits.len());
150 bits.iter().for_each(|&b| bv.append_bit_u16(b));
151 bv
152 }
153
154 /// Construct a bit vector from a set of bits given as distinct u32 values.
155 /// The constructor will take the least significant bit from each value and append it to a
156 /// bit vector.
157 /// All other bits are ignored.
158 ///
159 /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u64`], [`from_bits_iter`]
160 ///
161 /// [`from_bits`]: BitVec::from_bits
162 /// [`from_bits_u16`]: BitVec::from_bits_u16
163 /// [`from_bits_u64`]: BitVec::from_bits_u64
164 /// [`from_bits_iter`]: BitVec::from_bits_iter
165 #[must_use]
166 pub fn from_bits_u32(bits: &[u32]) -> Self {
167 let mut bv = Self::with_capacity(bits.len());
168 bits.iter().for_each(|&b| bv.append_bit_u32(b));
169 bv
170 }
171
172 /// Construct a bit vector from a set of bits given as distinct u64 values.
173 /// The constructor will take the least significant bit from each value and append it to a
174 /// bit vector.
175 /// All other bits are ignored.
176 ///
177 /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_iter`]
178 ///
179 /// [`from_bits`]: BitVec::from_bits
180 /// [`from_bits_u16`]: BitVec::from_bits_u16
181 /// [`from_bits_u32`]: BitVec::from_bits_u32
182 /// [`from_bits_iter`]: BitVec::from_bits_iter
183 #[must_use]
184 pub fn from_bits_u64(bits: &[u64]) -> Self {
185 let mut bv = Self::with_capacity(bits.len());
186 bits.iter().for_each(|&b| bv.append_bit(b));
187 bv
188 }
189
190 /// Construct a bit vector from an iterator of bits.
191 /// The constructor will take the least significant bit from each value and append it to a
192 /// bit vector.
193 /// All other bits are ignored.
194 /// The iterator must yield values that can be converted into u64 values.
195 ///
196 /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`]
197 ///
198 /// # Example
199 /// ```rust
200 /// use vers_vecs::BitVec;
201 ///
202 /// let bits = [true, false, true, true, true, true];
203 /// let bv = BitVec::from_bits_iter(bits.iter().copied());
204 ///
205 /// let bits = [0b1u8, 0b0, 0b1, 0b1, 0b1, 0b1];
206 /// let bv2 = BitVec::from_bits_iter(bits.iter().copied());
207 ///
208 /// assert_eq!(bv.len(), 6);
209 /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
210 /// assert_eq!(bv, bv2);
211 /// ```
212 ///
213 /// [`from_bits`]: BitVec::from_bits
214 /// [`from_bits_u16`]: BitVec::from_bits_u16
215 /// [`from_bits_u32`]: BitVec::from_bits_u32
216 /// [`from_bits_u64`]: BitVec::from_bits_u64
217 #[must_use]
218 pub fn from_bits_iter<I, E>(iter: I) -> Self
219 where
220 E: Into<u64>,
221 I: IntoIterator<Item = E>,
222 {
223 let iter = iter.into_iter();
224 let mut bv = Self::with_capacity(iter.size_hint().0);
225 for bit in iter {
226 bv.append_bit(bit.into());
227 }
228 bv
229 }
230
231 /// Construct a bit vector from a slice of u64 quad words.
232 /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
233 /// 64 bits to the bit vector).
234 /// Since the data is only cloned without any masking or transformation,
235 /// this is one of the fastest ways to create a bit vector.
236 ///
237 /// See also: [`from_vec`], [`from_limbs_iter`]
238 ///
239 /// # Example
240 /// ```rust
241 /// use vers_vecs::BitVec;
242 ///
243 /// let words = [0, 256, u64::MAX];
244 /// let bv = BitVec::from_limbs(&words);
245 ///
246 /// assert_eq!(bv.len(), 192);
247 /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
248 /// assert_eq!(bv.get(72), Some(1));
249 /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
250 /// ```
251 ///
252 /// [`from_vec`]: BitVec::from_vec
253 /// [`from_limbs_iter`]: BitVec::from_limbs_iter
254 #[must_use]
255 pub fn from_limbs(words: &[u64]) -> Self {
256 let len = words.len() * WORD_SIZE;
257 Self {
258 data: words.to_vec(),
259 len,
260 }
261 }
262
263 /// Construct a bit vector from an iterator of u64 quad words.
264 /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
265 /// 64 bits to the bit vector).
266 /// Since the data is only cloned without any masking or transformation,
267 /// this is one of the fastest ways to create a bit vector.
268 ///
269 /// See also: [`from_limbs`], [`from_vec`]
270 ///
271 /// # Example
272 /// ```rust
273 /// use std::iter::repeat;
274 /// use vers_vecs::BitVec;
275 ///
276 /// let zeros = repeat(0xaaaaaaaaaaaaaaaau64).take(10);
277 /// let bv = BitVec::from_limbs_iter(zeros);
278 ///
279 /// assert_eq!(bv.len(), 640);
280 /// for i in 0..640 {
281 /// assert_eq!(bv.get(i), Some((i % 2 == 1) as u64));
282 /// }
283 /// ```
284 ///
285 /// [`from_limbs`]: BitVec::from_limbs
286 /// [`from_vec`]: BitVec::from_vec
287 pub fn from_limbs_iter<I, E>(iter: I) -> Self
288 where
289 E: Into<u64>,
290 I: IntoIterator<Item = E>,
291 {
292 let vec = iter.into_iter().map(Into::into).collect();
293 Self::from_vec(vec)
294 }
295
296 /// Construct a bit vector from a vector of u64 quad words.
297 /// The quad words are interpreted as limbs of the bit vector
298 /// (i.e. each quad word contributes 64 bits to the bit vector).
299 /// Since the data is moved without any masking or transformation, this is one of the fastest ways
300 /// to create a bit vector.
301 ///
302 /// See also: [`from_limbs`], [`from_limbs_iter`]
303 ///
304 /// # Example
305 /// ```rust
306 /// use vers_vecs::BitVec;
307 ///
308 /// let words = vec![0, 256, u64::MAX];
309 /// let bv = BitVec::from_vec(words);
310 ///
311 /// assert_eq!(bv.len(), 192);
312 /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
313 /// assert_eq!(bv.get(72), Some(1));
314 /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
315 /// ```
316 ///
317 /// [`from_limbs`]: BitVec::from_limbs
318 /// [`from_limbs_iter`]: BitVec::from_limbs_iter
319 #[must_use]
320 pub fn from_vec(data: Vec<u64>) -> Self {
321 let len = data.len() * WORD_SIZE;
322 Self { data, len }
323 }
324
325 fn pack_bits<T, const MAX_BITS: usize>(sequence: &[T], bits_per_element: usize) -> Self
326 where
327 T: Into<u64> + Copy,
328 {
329 let mut bv = Self::with_capacity(sequence.len() * bits_per_element);
330 for &word in sequence {
331 if bits_per_element <= MAX_BITS {
332 bv.append_bits(word.into(), bits_per_element);
333 } else {
334 bv.append_bits(word.into(), MAX_BITS);
335 let mut rest = bits_per_element - MAX_BITS;
336 while rest > 0 {
337 bv.append_bits(0, min(rest, MAX_BITS));
338 rest = rest.saturating_sub(MAX_BITS);
339 }
340 }
341 }
342 bv
343 }
344
345 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
346 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
347 /// The number of bits per element is given by `bits_per_element`.
348 /// The sequence is given as a slice of u64 values.
349 /// If the number of bits per element is smaller than 64, the function takes the
350 /// least significant bits of each element, and discards the rest.
351 /// If the number of bits per element is larger than 64, the function will pad the elements
352 /// with zeros.
353 /// The function will append the bits of each element to the bit vector in the order they are
354 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
355 ///
356 /// See also: [`pack_sequence_u32`], [`pack_sequence_u16`], [`pack_sequence_u8`]
357 ///
358 /// # Example
359 /// ```rust
360 /// use vers_vecs::BitVec;
361 ///
362 /// let sequence = [0b1010u64, 0b1100u64, 0b1111u64];
363 /// let bv = BitVec::pack_sequence_u64(&sequence, 4);
364 ///
365 /// assert_eq!(bv.len(), 12);
366 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
367 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
368 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
369 /// ```
370 ///
371 /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
372 /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
373 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
374 #[must_use]
375 pub fn pack_sequence_u64(sequence: &[u64], bits_per_element: usize) -> Self {
376 Self::pack_bits::<_, 64>(sequence, bits_per_element)
377 }
378
379 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
380 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
381 /// The number of bits per element is given by `bits_per_element`.
382 /// The sequence is given as a slice of u32 values.
383 /// If the number of bits per element is smaller than 32, the function takes the
384 /// least significant bits of each element, and discards the rest.
385 /// If the number of bits per element is larger than 32, the function will pad the elements
386 /// with zeros.
387 /// The function will append the bits of each element to the bit vector in the order they are
388 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
389 ///
390 /// See also: [`pack_sequence_u64`], [`pack_sequence_u16`], [`pack_sequence_u8`]
391 ///
392 /// # Example
393 /// ```rust
394 /// use vers_vecs::BitVec;
395 ///
396 /// let sequence = [0b1010u32, 0b1100u32, 0b1111u32];
397 /// let bv = BitVec::pack_sequence_u32(&sequence, 4);
398 ///
399 /// assert_eq!(bv.len(), 12);
400 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
401 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
402 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
403 /// ```
404 ///
405 /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
406 /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
407 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
408 #[must_use]
409 pub fn pack_sequence_u32(sequence: &[u32], bits_per_element: usize) -> Self {
410 Self::pack_bits::<_, 32>(sequence, bits_per_element)
411 }
412
413 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
414 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
415 /// The number of bits per element is given by `bits_per_element`.
416 /// The sequence is given as a slice of u16 values.
417 /// If the number of bits per element is smaller than 16, the function takes the
418 /// least significant bits of each element, and discards the rest.
419 /// If the number of bits per element is larger than 16, the function will pad the elements
420 /// with zeros.
421 /// The function will append the bits of each element to the bit vector in the order they are
422 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
423 ///
424 /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u8`]
425 ///
426 /// # Example
427 /// ```rust
428 /// use vers_vecs::BitVec;
429 ///
430 /// let sequence = [0b1010u16, 0b1100u16, 0b1111u16];
431 /// let bv = BitVec::pack_sequence_u16(&sequence, 4);
432 ///
433 /// assert_eq!(bv.len(), 12);
434 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
435 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
436 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
437 /// ```
438 ///
439 /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
440 /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
441 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
442 #[must_use]
443 pub fn pack_sequence_u16(sequence: &[u16], bits_per_element: usize) -> Self {
444 Self::pack_bits::<_, 16>(sequence, bits_per_element)
445 }
446
447 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
448 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
449 /// The number of bits per element is given by `bits_per_element`.
450 /// The sequence is given as a slice of u8 values.
451 /// If the number of bits per element is smaller than 8, the function takes the
452 /// least significant bits of each element, and discards the rest.
453 /// If the number of bits per element is larger than 8, the function will pad the elements
454 /// with zeros.
455 /// The function will append the bits of each element to the bit vector in the order they are
456 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
457 ///
458 /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u16`]
459 ///
460 /// # Example
461 /// ```rust
462 /// use vers_vecs::BitVec;
463 ///
464 /// let sequence = [0b1010u8, 0b1100u8, 0b1111u8];
465 /// let bv = BitVec::pack_sequence_u8(&sequence, 4);
466 ///
467 /// assert_eq!(bv.len(), 12);
468 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
469 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
470 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
471 /// ```
472 ///
473 /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
474 /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
475 /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
476 #[must_use]
477 pub fn pack_sequence_u8(sequence: &[u8], bits_per_element: usize) -> Self {
478 Self::pack_bits::<_, 8>(sequence, bits_per_element)
479 }
480
481 /// Append a bit encoded as a `bool` to the bit vector, where `true` means 1 and `false` means 0.
482 ///
483 /// See also: [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
484 ///
485 /// # Example
486 ///
487 /// ```rust
488 /// use vers_vecs::BitVec;
489 ///
490 /// let mut bv = BitVec::new();
491 /// bv.append(true);
492 ///
493 /// assert_eq!(bv.len(), 1);
494 /// assert_eq!(bv.get(0), Some(1));
495 /// ```
496 ///
497 /// [`append_bit`]: BitVec::append_bit
498 /// [`append_bit_u32`]: BitVec::append_bit_u32
499 /// [`append_bit_u16`]: BitVec::append_bit_u16
500 /// [`append_bit_u8`]: BitVec::append_bit_u8
501 /// [`append_word`]: BitVec::append_word
502 pub fn append(&mut self, bit: bool) {
503 if self.len.is_multiple_of(WORD_SIZE) {
504 self.data.push(0);
505 }
506 if bit {
507 self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
508 } else {
509 self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
510 }
511 self.len += 1;
512 }
513
514 /// Drop the last n bits from the bit vector. If more bits are dropped than the bit vector
515 /// contains, the bit vector is cleared.
516 ///
517 /// # Example
518 ///
519 /// ```rust
520 /// use vers_vecs::BitVec;
521 ///
522 /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
523 /// bv.drop_last(3);
524 ///
525 /// assert_eq!(bv.len(), 3);
526 /// assert_eq!(bv.get_bits(0, 3), Some(0b101u64));
527 ///
528 /// bv.drop_last(4);
529 ///
530 /// assert!(bv.is_empty());
531 /// ```
532 pub fn drop_last(&mut self, n: usize) {
533 if n > self.len {
534 self.data.clear();
535 self.len = 0;
536 return;
537 }
538
539 let new_limb_count = (self.len - n).div_ceil(WORD_SIZE);
540
541 // cut off limbs that we no longer need
542 if new_limb_count < self.data.len() {
543 self.data.truncate(new_limb_count);
544 }
545
546 // update bit vector length
547 self.len -= n;
548 }
549
550 /// Append a bit encoded in a u64.
551 /// The least significant bit is appended to the bit vector.
552 /// All other bits are ignored.
553 ///
554 /// See also: [`append`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
555 ///
556 /// # Example
557 ///
558 /// ```rust
559 /// use vers_vecs::BitVec;
560 ///
561 /// let mut bv = BitVec::new();
562 ///
563 /// bv.append_bit(1);
564 /// bv.append_bit(0);
565 ///
566 /// assert_eq!(bv.len(), 2);
567 /// assert_eq!(bv.get(0), Some(1));
568 /// assert_eq!(bv.get(1), Some(0));
569 /// ```
570 ///
571 /// [`append`]: BitVec::append
572 /// [`append_bit_u32`]: BitVec::append_bit_u32
573 /// [`append_bit_u16`]: BitVec::append_bit_u16
574 /// [`append_bit_u8`]: BitVec::append_bit_u8
575 /// [`append_word`]: BitVec::append_word
576 pub fn append_bit(&mut self, bit: u64) {
577 if self.len.is_multiple_of(WORD_SIZE) {
578 self.data.push(0);
579 }
580 if bit % 2 == 1 {
581 self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
582 } else {
583 self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
584 }
585
586 self.len += 1;
587 }
588
589 /// Append a bit from a u32. The least significant bit is appended to the bit vector.
590 /// All other bits are ignored.
591 ///
592 /// See also: [`append`], [`append_bit`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
593 ///
594 /// [`append`]: BitVec::append
595 /// [`append_bit`]: BitVec::append_bit
596 /// [`append_bit_u16`]: BitVec::append_bit_u16
597 /// [`append_bit_u8`]: BitVec::append_bit_u8
598 /// [`append_word`]: BitVec::append_word
599 pub fn append_bit_u32(&mut self, bit: u32) {
600 self.append_bit(u64::from(bit));
601 }
602
603 /// Append a bit from a u16. The least significant bit is appended to the bit vector.
604 /// All other bits are ignored.
605 ///
606 /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u8`], [`append_word`]
607 ///
608 /// [`append`]: BitVec::append
609 /// [`append_bit`]: BitVec::append_bit
610 /// [`append_bit_u32`]: BitVec::append_bit_u32
611 /// [`append_bit_u8`]: BitVec::append_bit_u8
612 /// [`append_word`]: BitVec::append_word
613 pub fn append_bit_u16(&mut self, bit: u16) {
614 self.append_bit(u64::from(bit));
615 }
616
617 /// Append a bit from a u8. The least significant bit is appended to the bit vector.
618 /// All other bits are ignored.
619 ///
620 /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_word`]
621 ///
622 /// [`append`]: BitVec::append
623 /// [`append_bit`]: BitVec::append_bit
624 /// [`append_bit_u32`]: BitVec::append_bit_u32
625 /// [`append_bit_u16`]: BitVec::append_bit_u16
626 /// [`append_word`]: BitVec::append_word
627 pub fn append_bit_u8(&mut self, bit: u8) {
628 self.append_bit(u64::from(bit));
629 }
630
631 /// Append a word to the bit vector. The bits are appended in little endian order (i.e. the first
632 /// bit of the word is appended first).
633 ///
634 /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`]
635 ///
636 /// # Example
637 ///
638 /// ```rust
639 /// use vers_vecs::BitVec;
640 ///
641 /// let mut bv = BitVec::new();
642 /// bv.append_word(0b1010_1010_1010_1010u64);
643 ///
644 /// assert_eq!(bv.len(), 64);
645 /// for i in 0..64 {
646 /// assert_eq!(bv.get(i), Some((0b1010_1010_1010_1010u64 >> i) & 1));
647 /// }
648 /// ```
649 ///
650 /// [`append`]: BitVec::append
651 /// [`append_bit`]: BitVec::append_bit
652 /// [`append_bit_u32`]: BitVec::append_bit_u32
653 /// [`append_bit_u16`]: BitVec::append_bit_u16
654 /// [`append_bit_u8`]: BitVec::append_bit_u8
655 pub fn append_word(&mut self, word: u64) {
656 if self.len.is_multiple_of(WORD_SIZE) {
657 self.data.push(word);
658 } else {
659 // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
660 self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
661 self.data[self.len / WORD_SIZE] |= word << (self.len % WORD_SIZE);
662
663 self.data.push(word >> (WORD_SIZE - self.len % WORD_SIZE));
664 }
665 self.len += WORD_SIZE;
666 }
667
668 /// Append multiple bits to the bit vector.
669 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
670 /// The number of bits to append is given by `len`. The bits are taken from the least
671 /// significant bits of `bits`.
672 /// All other bits are ignored.
673 ///
674 /// # Example
675 ///
676 /// ```rust
677 /// use vers_vecs::BitVec;
678 ///
679 /// let mut bv = BitVec::new();
680 /// bv.append_bits(0b1010_1010_1010_1010u64, 16);
681 ///
682 /// assert_eq!(bv.len(), 16);
683 /// assert_eq!(bv.get_bits(0, 16), Some(0b1010_1010_1010_1010u64));
684 /// ```
685 ///
686 /// # Panics
687 /// Panics if `len` is larger than 64.
688 pub fn append_bits(&mut self, bits: u64, len: usize) {
689 assert!(len <= 64, "Cannot append more than 64 bits");
690
691 if self.len.is_multiple_of(WORD_SIZE) {
692 self.data.push(bits);
693 } else {
694 // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
695 self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
696 self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
697
698 if self.len % WORD_SIZE + len > WORD_SIZE {
699 self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
700 }
701 }
702 self.len += len;
703 }
704
705 /// Append multiple bits to the bit vector.
706 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
707 /// The number of bits to append is given by `len`. The bits are taken from the least
708 /// significant bits of `bits`.
709 ///
710 /// This function does not check if `len` is larger than 64.
711 ///
712 /// Furthermore, if the bit-vector has trailing bits that are not zero
713 /// (i.e. the length is not a multiple of 64, and those bits are partially set),
714 /// the function will OR the new data with the trailing bits, destroying the appended data.
715 /// This can happen, if a call `append_bits[_unchecked](word, len)` appends a word which has
716 /// set bits beyond the `len - 1`-th bit,
717 /// or if bits have been dropped from the bit vector using [`drop_last`].
718 ///
719 /// See [`append_bits`] for a checked version of this function.
720 ///
721 /// # Panics
722 /// If `len` is larger than 64, the behavior is platform-dependent, and a processor
723 /// exception might be triggered.
724 ///
725 /// [`append_bits`]: BitVec::append_bits
726 /// [`drop_last`]: BitVec::drop_last
727 pub fn append_bits_unchecked(&mut self, bits: u64, len: usize) {
728 if self.len.is_multiple_of(WORD_SIZE) {
729 self.data.push(bits);
730 } else {
731 self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
732
733 if self.len % WORD_SIZE + len > WORD_SIZE {
734 self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
735 }
736 }
737 self.len += len;
738 }
739
740 /// Append the bits of another bit vector to the end of this vector.
741 /// If this vector does not contain a multiple of 64 bits, the appended limbs need to be
742 /// shifted to the left.
743 /// This function is guaranteed to reallocate the underlying vector at most once.
744 pub fn extend_bitvec(&mut self, other: &Self) {
745 // reserve space for the new bits, ensuring at most one re-allocation
746 self.data
747 .reserve((self.len + other.len).div_ceil(WORD_SIZE) - self.data.len());
748
749 let full_limbs = other.len() / WORD_SIZE;
750 for i in 0..full_limbs {
751 self.append_bits(other.data[i], WORD_SIZE);
752 }
753
754 let partial_bits = other.len % WORD_SIZE;
755 if partial_bits > 0 {
756 self.append_bits(other.data[full_limbs], partial_bits);
757 }
758 }
759
760 /// Return the length of the bit vector. The length is measured in bits.
761 #[must_use]
762 pub fn len(&self) -> usize {
763 self.len
764 }
765
766 /// Return whether the bit vector is empty (contains no bits).
767 #[must_use]
768 pub fn is_empty(&self) -> bool {
769 self.len == 0
770 }
771
772 /// Flip the bit at the given position.
773 ///
774 /// # Example
775 ///
776 /// ```rust
777 /// use vers_vecs::BitVec;
778 ///
779 /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
780 /// bv.flip_bit(1);
781 ///
782 /// assert_eq!(bv.len(), 6);
783 /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
784 /// ```
785 ///
786 /// # Panics
787 /// If the position is larger than the length of the vector, the function panics.
788 pub fn flip_bit(&mut self, pos: usize) {
789 assert!(pos < self.len, "Index out of bounds");
790 self.flip_bit_unchecked(pos);
791 }
792
793 /// Flip the bit at the given position.
794 ///
795 /// See also: [`flip_bit`]
796 ///
797 /// # Panics
798 /// If the position is larger than the length of the
799 /// vector, the function will either modify unused memory or panic.
800 /// This will not corrupt memory.
801 ///
802 /// [`flip_bit`]: BitVec::flip_bit
803 pub fn flip_bit_unchecked(&mut self, pos: usize) {
804 self.data[pos / WORD_SIZE] ^= 1 << (pos % WORD_SIZE);
805 }
806
807 /// Return the bit at the given position.
808 /// The bit is encoded in the least significant bit of a u64 value.
809 /// If the position is larger than the length of the vector, None is returned.
810 ///
811 /// See also: [`get_unchecked`]
812 ///
813 /// # Example
814 ///
815 /// ```rust
816 /// use vers_vecs::BitVec;
817 ///
818 /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
819 ///
820 /// assert_eq!(bv.get(1), Some(0));
821 /// assert_eq!(bv.get(2), Some(1));
822 /// ```
823 ///
824 /// [`get_unchecked`]: Self::get_unchecked
825 #[must_use]
826 pub fn get(&self, pos: usize) -> Option<u64> {
827 if pos >= self.len {
828 None
829 } else {
830 Some(self.get_unchecked(pos))
831 }
832 }
833
834 /// Return the bit at the given position.
835 /// The bit is encoded in the least significant bit of a u64 value.
836 ///
837 /// # Panics
838 /// If the position is larger than the length of the vector,
839 /// the function will either return unpredictable data, or panic.
840 /// Use [`get`] to properly handle this case with an `Option`.
841 ///
842 /// [`get`]: BitVec::get
843 #[must_use]
844 pub fn get_unchecked(&self, pos: usize) -> u64 {
845 (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
846 }
847
848 /// Set the bit at the given position.
849 /// The bit is encoded in the least significant bit of a u64 value.
850 ///
851 /// See also: [`set_unchecked`]
852 ///
853 /// # Example
854 ///
855 /// ```rust
856 /// use vers_vecs::BitVec;
857 ///
858 /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
859 /// bv.set(1, 1).unwrap();
860 ///
861 /// assert_eq!(bv.len(), 6);
862 /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
863 /// ```
864 ///
865 /// # Errors
866 /// If the position is out of range, the function will return `Err` with an error message,
867 /// otherwise it will return an empty `Ok`.
868 ///
869 /// [`set_unchecked`]: BitVec::set_unchecked
870 pub fn set(&mut self, pos: usize, value: u64) -> Result<(), &str> {
871 if pos >= self.len {
872 Err("out of range")
873 } else {
874 self.set_unchecked(pos, value);
875 Ok(())
876 }
877 }
878
879 /// Set the bit at the given position.
880 /// The bit is encoded in the least significant bit of a u64 value.
881 ///
882 /// # Panics
883 /// If the position is larger than the length of the vector,
884 /// the function will either do nothing, or panic.
885 /// Use [`set`] to properly handle this case with a `Result`.
886 ///
887 /// [`set`]: BitVec::set
888 pub fn set_unchecked(&mut self, pos: usize, value: u64) {
889 self.data[pos / WORD_SIZE] = (self.data[pos / WORD_SIZE] & !(0x1 << (pos % WORD_SIZE)))
890 | ((value & 0x1) << (pos % WORD_SIZE));
891 }
892
893 /// Return whether the bit at the given position is set.
894 /// If the position is larger than the length of the vector, None is returned.
895 ///
896 /// See also: [`is_bit_set_unchecked`]
897 ///
898 /// # Example
899 ///
900 /// ```rust
901 /// use vers_vecs::BitVec;
902 ///
903 /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
904 ///
905 /// assert!(!bv.is_bit_set(1).unwrap());
906 /// assert!(bv.is_bit_set(2).unwrap());
907 /// ```
908 ///
909 /// [`is_bit_set_unchecked`]: BitVec::is_bit_set_unchecked
910 #[must_use]
911 pub fn is_bit_set(&self, pos: usize) -> Option<bool> {
912 if pos >= self.len {
913 None
914 } else {
915 Some(self.is_bit_set_unchecked(pos))
916 }
917 }
918
919 /// Return whether the bit at the given position is set.
920 ///
921 /// # Panics
922 /// If the position is larger than the length of the vector,
923 /// the function will either return unpredictable data, or panic.
924 /// Use [`is_bit_set`] to properly handle this case with an `Option`.
925 ///
926 /// [`is_bit_set`]: BitVec::is_bit_set
927 #[must_use]
928 pub fn is_bit_set_unchecked(&self, pos: usize) -> bool {
929 self.get_unchecked(pos) != 0
930 }
931
932 /// Return multiple bits at the given position.
933 /// The number of bits to return is given by `len`.
934 /// At most 64 bits can be returned.
935 /// If the position at the end of the query is larger than the length of the vector,
936 /// None is returned (even if the query partially overlaps with the vector).
937 /// If the length of the query is larger than 64, None is returned.
938 ///
939 /// The first bit at `pos` is the most significant bit of the return value
940 /// limited to `len` bits.
941 #[must_use]
942 pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
943 if len > WORD_SIZE || len == 0 {
944 return None;
945 }
946 if pos + len > self.len {
947 None
948 } else {
949 Some(self.get_bits_unchecked(pos, len))
950 }
951 }
952
953 /// Return multiple bits at the given position. The number of bits to return is given by `len`.
954 /// At most 64 bits can be returned.
955 ///
956 /// This function is always inlined, because it gains a lot from loop optimization and
957 /// can utilize the processor pre-fetcher better if it is.
958 ///
959 /// # Errors
960 /// If the length of the query is larger than 64, unpredictable data will be returned.
961 /// Use [`get_bits`] to avoid this.
962 ///
963 /// # Panics
964 /// If the position or interval is larger than the length of the vector,
965 /// the function will either return any valid results padded with unpredictable
966 /// data or panic.
967 ///
968 /// [`get_bits`]: BitVec::get_bits
969 #[must_use]
970 #[allow(clippy::inline_always)]
971 #[allow(clippy::comparison_chain)] // readability
972 #[inline(always)] // inline to gain loop optimization and pipeline advantages for elias fano
973 #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen
974 pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
975 debug_assert!(len <= WORD_SIZE);
976 let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
977 if pos % WORD_SIZE + len <= WORD_SIZE {
978 partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
979 } else {
980 (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
981 & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
982 }
983 }
984
985 /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
986 /// `index`. The number of bits per encoded element is given by `n`.
987 ///
988 /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
989 /// and is equivalent to calling [`get_bits(index * n, n)`].
990 /// It is thus safe to use this method with any index and any size n <= 64.
991 ///
992 /// If the element is out of bounds, None is returned.
993 /// The element is returned as a u64 value.
994 ///
995 /// # Example
996 /// ```rust
997 /// use vers_vecs::BitVec;
998 ///
999 /// let sequence = [10, 100, 124, 45, 223];
1000 /// let bv = BitVec::pack_sequence_u64(&sequence, 8);
1001 ///
1002 /// assert_eq!(bv.unpack_element(0, 8), Some(10));
1003 /// assert_eq!(bv.unpack_element(2, 8), Some(124));
1004 /// ```
1005 ///
1006 /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1007 /// [`get_bits(index * n, n)`]: BitVec::get_bits
1008 #[must_use]
1009 #[allow(clippy::inline_always)]
1010 #[inline(always)] // to gain optimization if n is constant
1011 pub fn unpack_element(&self, index: usize, n: usize) -> Option<u64> {
1012 self.get_bits(index * n, n)
1013 }
1014
1015 /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
1016 /// `index`. The number of bits per encoded element is given by `n`.
1017 ///
1018 /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
1019 /// and is equivalent to calling [`get_bits_unchecked(index * n, n)`].
1020 /// It is thus safe to use this method with any index where `index * n + n` is in-bounds,
1021 /// and any size n <= 64.
1022 ///
1023 /// # Panics
1024 /// If the element is out of bounds, the function will either return unpredictable data or panic.
1025 /// Use [`unpack_element`] for a checked version of this function.
1026 ///
1027 /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1028 /// [`get_bits_unchecked(index * n, n)`]: BitVec::get_bits_unchecked
1029 /// [`unpack_element`]: BitVec::unpack_element
1030 #[must_use]
1031 #[allow(clippy::inline_always)]
1032 #[inline(always)] // to gain optimization if n is constant
1033 pub fn unpack_element_unchecked(&self, index: usize, n: usize) -> u64 {
1034 self.get_bits_unchecked(index * n, n)
1035 }
1036
1037 /// Return the number of ones in the bit vector. Since the bit vector doesn't store additional
1038 /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1039 ///
1040 /// [`RsVec`]: crate::RsVec
1041 #[must_use]
1042 #[allow(clippy::missing_panics_doc)] // can't panic because of manual bounds check
1043 pub fn count_ones(&self) -> u64 {
1044 let mut ones: u64 = self.data[0..self.len / WORD_SIZE]
1045 .iter()
1046 .map(|limb| u64::from(limb.count_ones()))
1047 .sum();
1048 if !self.len.is_multiple_of(WORD_SIZE) {
1049 ones += u64::from(
1050 (self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1)).count_ones(),
1051 );
1052 }
1053 ones
1054 }
1055
1056 /// Return the number of zeros in the bit vector. Since the bit vector doesn't store additional
1057 /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1058 /// This method calls [`count_ones`].
1059 ///
1060 /// [`RsVec`]: crate::RsVec
1061 /// [`count_ones`]: BitVec::count_ones
1062 #[must_use]
1063 pub fn count_zeros(&self) -> u64 {
1064 self.len as u64 - self.count_ones()
1065 }
1066
1067 /// Mask this bit vector with another bitvector using bitwise or. The mask is applied lazily
1068 /// whenever an operation on the resulting vector is performed.
1069 ///
1070 /// # Errors
1071 /// Returns an error if the length of the vector doesn't match the mask length.
1072 #[inline]
1073 pub fn mask_or<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1074 MaskedBitVec::new(self, mask, |a, b| a | b)
1075 }
1076
1077 /// Mask this bit vector with another bitvector using bitwise or.
1078 /// The mask is applied immediately, unlike in [`mask_or`].
1079 ///
1080 /// # Errors
1081 /// Returns an error if the length of the vector doesn't match the mask length.
1082 ///
1083 /// [`mask_or`]: BitVec::mask_or
1084 pub fn apply_mask_or(&mut self, mask: &BitVec) -> Result<(), String> {
1085 if self.len != mask.len {
1086 return Err(String::from(
1087 "mask cannot have different length than vector",
1088 ));
1089 }
1090
1091 for i in 0..self.data.len() {
1092 self.data[i] |= mask.data[i];
1093 }
1094
1095 Ok(())
1096 }
1097
1098 /// Mask this bit vector with another bitvector using bitwise and. The mask is applied lazily
1099 /// whenever an operation on the resulting vector is performed.
1100 ///
1101 /// # Errors
1102 /// Returns an error if the length of the vector doesn't match the mask length.
1103 #[inline]
1104 pub fn mask_and<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1105 MaskedBitVec::new(self, mask, |a, b| a & b)
1106 }
1107
1108 /// Mask this bit vector with another bitvector using bitwise and.
1109 /// The mask is applied immediately, unlike in [`mask_and`].
1110 ///
1111 /// # Errors
1112 /// Returns an error if the length of the vector doesn't match the mask length.
1113 ///
1114 /// [`mask_and`]: BitVec::mask_and
1115 pub fn apply_mask_and(&mut self, mask: &BitVec) -> Result<(), String> {
1116 if self.len != mask.len {
1117 return Err(String::from(
1118 "mask cannot have different length than vector",
1119 ));
1120 }
1121
1122 for i in 0..self.data.len() {
1123 self.data[i] &= mask.data[i];
1124 }
1125
1126 Ok(())
1127 }
1128
1129 /// Mask this bit vector with another bitvector using bitwise xor. The mask is applied lazily
1130 /// whenever an operation on the resulting vector is performed.
1131 ///
1132 /// # Errors
1133 /// Returns an error if the length of the vector doesn't match the mask length.
1134 #[inline]
1135 pub fn mask_xor<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1136 MaskedBitVec::new(self, mask, |a, b| a ^ b)
1137 }
1138
1139 /// Mask this bit vector with another bitvector using bitwise xor.
1140 /// The mask is applied immediately, unlike in [`mask_xor`].
1141 ///
1142 /// # Errors
1143 /// Returns an error if the length of the vector doesn't match the mask length.
1144 ///
1145 /// [`mask_xor`]: BitVec::mask_xor
1146 pub fn apply_mask_xor(&mut self, mask: &BitVec) -> Result<(), String> {
1147 if self.len != mask.len {
1148 return Err(String::from(
1149 "mask cannot have different length than vector",
1150 ));
1151 }
1152
1153 for i in 0..self.data.len() {
1154 self.data[i] ^= mask.data[i];
1155 }
1156
1157 Ok(())
1158 }
1159
1160 /// Mask this bit vector with another bitvector using a custom masking operation. The mask is
1161 /// applied lazily whenever an operation on the resulting vector is performed.
1162 ///
1163 /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1164 /// The last block of a bit vector might contain fewer bits, and will be padded with
1165 /// unpredictable data. Implementations may choose to modify those padding bits without
1166 /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1167 /// within the vector is unspecified.
1168 ///
1169 /// # Errors
1170 /// Returns an error if the length of the vector doesn't match the mask length.
1171 #[inline]
1172 pub fn mask_custom<'s, 'b, F>(
1173 &'s self,
1174 mask: &'b BitVec,
1175 mask_op: F,
1176 ) -> Result<MaskedBitVec<'s, 'b, F>, String>
1177 where
1178 F: Fn(u64, u64) -> u64,
1179 {
1180 MaskedBitVec::new(self, mask, mask_op)
1181 }
1182
1183 /// Mask this bit vector with another bitvector using a custom masking operation.
1184 /// The mask is applied immediately, unlike in [`mask_custom`].
1185 ///
1186 /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1187 /// The last block of a bit vector might contain fewer bits, and will be padded with
1188 /// unpredictable data. Implementations may choose to modify those padding bits without
1189 /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1190 /// within the vector is unspecified.
1191 ///
1192 /// # Errors
1193 /// Returns an error if the length of the vector doesn't match the mask length.
1194 ///
1195 /// [`mask_custom`]: BitVec::mask_custom
1196 #[inline]
1197 pub fn apply_mask_custom(
1198 &mut self,
1199 mask: &BitVec,
1200 mask_op: fn(u64, u64) -> u64,
1201 ) -> Result<(), String> {
1202 if self.len != mask.len {
1203 return Err(String::from(
1204 "mask cannot have different length than vector",
1205 ));
1206 }
1207
1208 for i in 0..self.data.len() {
1209 self.data[i] = mask_op(self.data[i], mask.data[i]);
1210 }
1211
1212 Ok(())
1213 }
1214
1215 /// Returns the number of bytes on the heap for this vector.
1216 /// Does not include allocated memory that isn't used.
1217 #[must_use]
1218 pub fn heap_size(&self) -> usize {
1219 self.data.len() * size_of::<u64>()
1220 }
1221
1222 /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1223 /// right half the remaining bits `at..`. If the split index is larger than the length of the
1224 /// vector, the vector is returned unmodified in an `Err` variant.
1225 ///
1226 /// # Errors
1227 /// If the index is out of bounds, the function will return an error
1228 /// containing the original vector.
1229 ///
1230 /// See also: [`split_at_unchecked`]
1231 ///
1232 /// [`split_at_unchecked`]: Self::split_at_unchecked
1233 pub fn split_at(self, at: usize) -> Result<(Self, Self), Self> {
1234 if at > self.len {
1235 Err(self)
1236 } else {
1237 Ok(self.split_at_unchecked(at))
1238 }
1239 }
1240
1241 /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1242 /// right half the remaining bits `at..`.
1243 ///
1244 /// # Panics
1245 /// If the index is larger than the length of the vector the function will panic or run
1246 /// out of memory.
1247 /// Use [`split_at`] to properly handle this case.
1248 ///
1249 /// [`split_at`]: Self::split_at
1250 #[must_use]
1251 pub fn split_at_unchecked(mut self, at: usize) -> (Self, Self) {
1252 let other_len = self.len - at;
1253 let mut other = Self::with_capacity(other_len);
1254
1255 if other_len == 0 {
1256 return (self, other);
1257 }
1258
1259 let first_limb = at / WORD_SIZE;
1260 let last_limb = self.len / WORD_SIZE;
1261
1262 // First, we figure out the number of bits from the first limb to retain in this vector:
1263 let leading_partial = at % WORD_SIZE;
1264
1265 // If the split point is in the last limb, and the vector ends before the last bit, first_limb
1266 // and last_limb will be equal, and the other half is simply other_len bits off the limb
1267 // right shifted by the number of bits to retain in this vector.
1268 if first_limb == last_limb {
1269 other.append_bits_unchecked(self.data[first_limb] >> leading_partial, other_len);
1270 } else {
1271 // Otherwise, some range n..last_limb should be copied in their entirety to the other half,
1272 // with n=first_limb+1 if the split point is inside the first limb (leading_partial > 0), or
1273 // n=first_limb if the entire first limb belongs in the other half.
1274 let full_limbs = if leading_partial > 0 {
1275 // If the split point is inside the first limb, we also have to remember to copy over
1276 // the trailing bits to the new vector.
1277 other.append_bits_unchecked(
1278 self.data[first_limb] >> leading_partial,
1279 WORD_SIZE - leading_partial,
1280 );
1281 first_limb + 1..last_limb
1282 } else {
1283 first_limb..last_limb
1284 };
1285
1286 // Copy over any full limbs.
1287 for i in full_limbs {
1288 other.append_bits_unchecked(self.data[i], WORD_SIZE);
1289 }
1290
1291 // Finally, if the vector has a partially filled last limb, we need to put those bits
1292 // in the other half.
1293 let trailing_partial = self.len % WORD_SIZE;
1294 if trailing_partial > 0 {
1295 other.append_bits_unchecked(self.data[last_limb], trailing_partial);
1296 }
1297 }
1298
1299 // remove the copied bits from the original vector
1300 self.drop_last(other_len);
1301
1302 (self, other)
1303 }
1304}
1305
1306impl_vector_iterator! { BitVec, BitVecIter, BitVecRefIter }
1307
1308/// Create a new bit vector from a slice of u64 values.
1309/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1310/// The function will append the bits of each element to the bit vector in the order they are
1311/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1312impl From<&[u64]> for BitVec {
1313 fn from(data: &[u64]) -> Self {
1314 BitVec::from_limbs(data)
1315 }
1316}
1317
1318/// Create a new bit vector from a slice of u64 values.
1319/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1320/// The function will append the bits of each element to the bit vector in the order they are
1321/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1322impl From<Vec<u64>> for BitVec {
1323 fn from(data: Vec<u64>) -> Self {
1324 BitVec::from_limbs(&data)
1325 }
1326}
1327
1328impl Extend<BitVec> for BitVec {
1329 fn extend<T: IntoIterator<Item = BitVec>>(&mut self, iter: T) {
1330 for v in iter {
1331 self.extend_bitvec(&v)
1332 }
1333 }
1334}
1335
1336impl<'t> Extend<&'t BitVec> for BitVec {
1337 fn extend<T: IntoIterator<Item = &'t BitVec>>(&mut self, iter: T) {
1338 for v in iter {
1339 self.extend_bitvec(v)
1340 }
1341 }
1342}
1343
1344/// Create a new bit vector from u64 values.
1345/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1346/// The function will append the bits of each element to the bit vector in the order they are
1347/// given in the iterator (i.e. the first element takes bits `0..64` of the vector).
1348impl FromIterator<u64> for BitVec {
1349 fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> Self {
1350 BitVec::from_limbs_iter(iter)
1351 }
1352}
1353
1354impl PartialEq for BitVec {
1355 // unlike the auto-derived implementation, this custom implementation ignores junk data at
1356 // the end of the bit vector
1357 fn eq(&self, other: &Self) -> bool {
1358 if self.len != other.len {
1359 return false;
1360 }
1361
1362 if self.len == 0 {
1363 return true;
1364 }
1365
1366 for i in 0..self.data.len() - 1 {
1367 if self.data[i] != other.data[i] {
1368 return false;
1369 }
1370 }
1371
1372 // in last limb, ignore junk data
1373 let mask = (1 << (self.len % WORD_SIZE)) - 1;
1374 if self.data[self.data.len() - 1] & mask != other.data[self.data.len() - 1] & mask {
1375 return false;
1376 }
1377
1378 true
1379 }
1380}
1381
1382impl Eq for BitVec {}
1383
1384impl Hash for BitVec {
1385 fn hash<H: Hasher>(&self, state: &mut H) {
1386 state.write_usize(self.len);
1387 if self.len > 0 {
1388 self.data[0..self.data.len() - 1]
1389 .iter()
1390 .for_each(|x| state.write_u64(*x));
1391 let masked_last_limb = self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1);
1392 state.write_u64(masked_last_limb);
1393 }
1394 }
1395}
1396
1397#[cfg(test)]
1398mod tests;