vers_vecs/bit_vec/mod.rs
1//! This module contains a simple [bit vector][BitVec] implementation with no overhead and a fast succinct
2//! bit vector implementation with [rank and select queries][fast_rs_vec::RsVec].
3
4use crate::bit_vec::mask::MaskedBitVec;
5use crate::util::impl_vector_iterator;
6use std::cmp::min;
7use std::mem::size_of;
8
9pub mod fast_rs_vec;
10
11pub mod sparse;
12
13pub mod mask;
14
15/// Size of a word in bitvectors. All vectors operate on 64-bit words.
16const WORD_SIZE: usize = 64;
17
18/// Type alias for masked bitvectors that implement a simple bitwise binary operation.
19/// The first lifetime is for the bit vector that is being masked, the second lifetime is for the
20/// mask.
21pub type BitMask<'s, 'b> = MaskedBitVec<'s, 'b, fn(u64, u64) -> u64>;
22
23/// A simple bit vector that does not support rank and select queries.
24/// Bits are stored in little-endian order, i.e. the least significant bit is stored first.
25/// The bit vector is stored as a sequence of 64 bit limbs.
26/// The last limb may be partially filled.
27///
28/// The bit vector has a wide range of constructors that allow for easy creation from various
29/// sources.
30/// Among them are constructors for creating an empty vector ([`BitVec::new`]),
31/// creating one from single bits of various integer types ([`BitVec::from_bits`] and variations),
32/// creating limbs from u64 values directly ([`BitVec::from_limbs`] and variations),
33/// or packing a sequence of numerical values into a dense bit sequence
34/// ([`BitVec::pack_sequence_u64`] and variations).
35///
36/// The bit vector can be modified after creation
37/// (e.g. by appending [bits](BitVec::append_bits)
38/// or [words](BitVec::append_word),
39/// [flipping](BitVec::flip_bit),
40/// or [setting](BitVec::set) bits).
41/// Bits can be [accessed](BitVec::get) by position,
42/// and [multiple bits](BitVec::get_bits) can be accessed at once.
43/// Bits can be [dropped](BitVec::drop_last) from the end.
44///
45/// # Example
46/// ```rust
47/// use vers_vecs::{BitVec, RsVec};
48///
49/// let mut bit_vec = BitVec::new();
50/// bit_vec.append_bit(0u64);
51/// bit_vec.append_bit_u32(1u32);
52/// bit_vec.append_word(0b1010_1010_1010_1010u64); // appends exactly 64 bits
53///
54/// assert_eq!(bit_vec.len(), 66);
55/// assert_eq!(bit_vec.get(0), Some(0u64));
56/// assert_eq!(bit_vec.get(1), Some(1u64));
57/// ```
58#[derive(Clone, Debug, Default, Eq, PartialEq, Hash)]
59#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
60pub struct BitVec {
61 data: Vec<u64>,
62 len: usize,
63}
64
65impl BitVec {
66 /// Create a new empty bit vector.
67 #[must_use]
68 pub fn new() -> Self {
69 Self::default()
70 }
71
72 /// Create a new empty bit vector with the given capacity.
73 /// The capacity is measured in bits.
74 /// The bit vector will be able to hold at least `capacity` bits without reallocating.
75 /// More memory may be allocated according to the underlying allocation strategy.
76 #[must_use]
77 pub fn with_capacity(capacity: usize) -> Self {
78 Self {
79 data: Vec::with_capacity(capacity / WORD_SIZE + 1),
80 len: 0,
81 }
82 }
83
84 /// Create a new bit vector with all zeros and the given length.
85 /// The length is measured in bits.
86 #[must_use]
87 pub fn from_zeros(len: usize) -> Self {
88 let mut data = vec![0; len / WORD_SIZE];
89 if len % WORD_SIZE != 0 {
90 data.push(0);
91 }
92 Self { data, len }
93 }
94
95 /// Create a new bit vector with all ones and the given length.
96 /// The length is measured in bits.
97 #[must_use]
98 pub fn from_ones(len: usize) -> Self {
99 let mut data = vec![u64::MAX; len / WORD_SIZE];
100 if len % WORD_SIZE != 0 {
101 data.push((1 << (len % WORD_SIZE)) - 1);
102 }
103 Self { data, len }
104 }
105
106 /// Construct a bit vector from a set of bits given as distinct u8 values.
107 /// The constructor will take the least significant bit from each value and append it to a
108 /// bit vector.
109 /// All other bits are ignored.
110 ///
111 /// See also: [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
112 ///
113 /// # Example
114 /// ```rust
115 /// use vers_vecs::BitVec;
116 ///
117 /// let bits: &[u8] = &[1, 0, 1, 1, 1, 1];
118 /// let bv = BitVec::from_bits(&bits);
119 ///
120 /// assert_eq!(bv.len(), 6);
121 /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
122 /// ```
123 ///
124 /// [`from_bits_u16`]: BitVec::from_bits_u16
125 /// [`from_bits_u32`]: BitVec::from_bits_u32
126 /// [`from_bits_u64`]: BitVec::from_bits_u64
127 /// [`from_bits_iter`]: BitVec::from_bits_iter
128 #[must_use]
129 pub fn from_bits(bits: &[u8]) -> Self {
130 let mut bv = Self::with_capacity(bits.len());
131 bits.iter().for_each(|&b| bv.append_bit(b.into()));
132 bv
133 }
134
135 /// Construct a bit vector from a set of bits given as distinct u16 values.
136 /// The constructor will take the least significant bit from each value and append it to a
137 /// bit vector.
138 /// All other bits are ignored.
139 ///
140 /// See also: [`from_bits`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
141 ///
142 /// [`from_bits`]: BitVec::from_bits
143 /// [`from_bits_u32`]: BitVec::from_bits_u32
144 /// [`from_bits_u64`]: BitVec::from_bits_u64
145 /// [`from_bits_iter`]: BitVec::from_bits_iter
146 #[must_use]
147 pub fn from_bits_u16(bits: &[u16]) -> Self {
148 let mut bv = Self::with_capacity(bits.len());
149 bits.iter().for_each(|&b| bv.append_bit_u16(b));
150 bv
151 }
152
153 /// Construct a bit vector from a set of bits given as distinct u32 values.
154 /// The constructor will take the least significant bit from each value and append it to a
155 /// bit vector.
156 /// All other bits are ignored.
157 ///
158 /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u64`], [`from_bits_iter`]
159 ///
160 /// [`from_bits`]: BitVec::from_bits
161 /// [`from_bits_u16`]: BitVec::from_bits_u16
162 /// [`from_bits_u64`]: BitVec::from_bits_u64
163 /// [`from_bits_iter`]: BitVec::from_bits_iter
164 #[must_use]
165 pub fn from_bits_u32(bits: &[u32]) -> Self {
166 let mut bv = Self::with_capacity(bits.len());
167 bits.iter().for_each(|&b| bv.append_bit_u32(b));
168 bv
169 }
170
171 /// Construct a bit vector from a set of bits given as distinct u64 values.
172 /// The constructor will take the least significant bit from each value and append it to a
173 /// bit vector.
174 /// All other bits are ignored.
175 ///
176 /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_iter`]
177 ///
178 /// [`from_bits`]: BitVec::from_bits
179 /// [`from_bits_u16`]: BitVec::from_bits_u16
180 /// [`from_bits_u32`]: BitVec::from_bits_u32
181 /// [`from_bits_iter`]: BitVec::from_bits_iter
182 #[must_use]
183 pub fn from_bits_u64(bits: &[u64]) -> Self {
184 let mut bv = Self::with_capacity(bits.len());
185 bits.iter().for_each(|&b| bv.append_bit(b));
186 bv
187 }
188
189 /// Construct a bit vector from an iterator of bits.
190 /// The constructor will take the least significant bit from each value and append it to a
191 /// bit vector.
192 /// All other bits are ignored.
193 /// The iterator must yield values that can be converted into u64 values.
194 ///
195 /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`]
196 ///
197 /// # Example
198 /// ```rust
199 /// use vers_vecs::BitVec;
200 ///
201 /// let bits = [true, false, true, true, true, true];
202 /// let bv = BitVec::from_bits_iter(bits.iter().copied());
203 ///
204 /// let bits = [0b1u8, 0b0, 0b1, 0b1, 0b1, 0b1];
205 /// let bv2 = BitVec::from_bits_iter(bits.iter().copied());
206 ///
207 /// assert_eq!(bv.len(), 6);
208 /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
209 /// assert_eq!(bv, bv2);
210 /// ```
211 ///
212 /// [`from_bits`]: BitVec::from_bits
213 /// [`from_bits_u16`]: BitVec::from_bits_u16
214 /// [`from_bits_u32`]: BitVec::from_bits_u32
215 /// [`from_bits_u64`]: BitVec::from_bits_u64
216 #[must_use]
217 pub fn from_bits_iter<I, E>(iter: I) -> Self
218 where
219 E: Into<u64>,
220 I: IntoIterator<Item = E>,
221 {
222 let iter = iter.into_iter();
223 let mut bv = Self::with_capacity(iter.size_hint().0);
224 for bit in iter {
225 bv.append_bit(bit.into());
226 }
227 bv
228 }
229
230 /// Construct a bit vector from a slice of u64 quad words.
231 /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
232 /// 64 bits to the bit vector).
233 /// Since the data is only cloned without any masking or transformation,
234 /// this is one of the fastest ways to create a bit vector.
235 ///
236 /// See also: [`from_vec`], [`from_limbs_iter`]
237 ///
238 /// # Example
239 /// ```rust
240 /// use vers_vecs::BitVec;
241 ///
242 /// let words = [0, 256, u64::MAX];
243 /// let bv = BitVec::from_limbs(&words);
244 ///
245 /// assert_eq!(bv.len(), 192);
246 /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
247 /// assert_eq!(bv.get(72), Some(1));
248 /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
249 /// ```
250 ///
251 /// [`from_vec`]: BitVec::from_vec
252 /// [`from_limbs_iter`]: BitVec::from_limbs_iter
253 #[must_use]
254 pub fn from_limbs(words: &[u64]) -> Self {
255 let len = words.len() * WORD_SIZE;
256 Self {
257 data: words.to_vec(),
258 len,
259 }
260 }
261
262 /// Construct a bit vector from an iterator of u64 quad words.
263 /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
264 /// 64 bits to the bit vector).
265 /// Since the data is only cloned without any masking or transformation,
266 /// this is one of the fastest ways to create a bit vector.
267 ///
268 /// See also: [`from_limbs`], [`from_vec`]
269 ///
270 /// # Example
271 /// ```rust
272 /// use std::iter::repeat;
273 /// use vers_vecs::BitVec;
274 ///
275 /// let zeros = repeat(0xaaaaaaaaaaaaaaaau64).take(10);
276 /// let bv = BitVec::from_limbs_iter(zeros);
277 ///
278 /// assert_eq!(bv.len(), 640);
279 /// for i in 0..640 {
280 /// assert_eq!(bv.get(i), Some((i % 2 == 1) as u64));
281 /// }
282 /// ```
283 ///
284 /// [`from_limbs`]: BitVec::from_limbs
285 /// [`from_vec`]: BitVec::from_vec
286 pub fn from_limbs_iter<I, E>(iter: I) -> Self
287 where
288 E: Into<u64>,
289 I: IntoIterator<Item = E>,
290 {
291 let vec = iter.into_iter().map(Into::into).collect();
292 Self::from_vec(vec)
293 }
294
295 /// Construct a bit vector from a vector of u64 quad words.
296 /// The quad words are interpreted as limbs of the bit vector
297 /// (i.e. each quad word contributes 64 bits to the bit vector).
298 /// Since the data is moved without any masking or transformation, this is one of the fastest ways
299 /// to create a bit vector.
300 ///
301 /// See also: [`from_limbs`], [`from_limbs_iter`]
302 ///
303 /// # Example
304 /// ```rust
305 /// use vers_vecs::BitVec;
306 ///
307 /// let words = vec![0, 256, u64::MAX];
308 /// let bv = BitVec::from_vec(words);
309 ///
310 /// assert_eq!(bv.len(), 192);
311 /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
312 /// assert_eq!(bv.get(72), Some(1));
313 /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
314 /// ```
315 ///
316 /// [`from_limbs`]: BitVec::from_limbs
317 /// [`from_limbs_iter`]: BitVec::from_limbs_iter
318 #[must_use]
319 pub fn from_vec(data: Vec<u64>) -> Self {
320 let len = data.len() * WORD_SIZE;
321 Self { data, len }
322 }
323
324 fn pack_bits<T, const MAX_BITS: usize>(sequence: &[T], bits_per_element: usize) -> Self
325 where
326 T: Into<u64> + Copy,
327 {
328 let mut bv = Self::with_capacity(sequence.len() * bits_per_element);
329 for &word in sequence {
330 if bits_per_element <= MAX_BITS {
331 bv.append_bits(word.into(), bits_per_element);
332 } else {
333 bv.append_bits(word.into(), MAX_BITS);
334 let mut rest = bits_per_element - MAX_BITS;
335 while rest > 0 {
336 bv.append_bits(0, min(rest, MAX_BITS));
337 rest = rest.saturating_sub(MAX_BITS);
338 }
339 }
340 }
341 bv
342 }
343
344 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
345 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
346 /// The number of bits per element is given by `bits_per_element`.
347 /// The sequence is given as a slice of u64 values.
348 /// If the number of bits per element is smaller than 64, the function takes the
349 /// least significant bits of each element, and discards the rest.
350 /// If the number of bits per element is larger than 64, the function will pad the elements
351 /// with zeros.
352 /// The function will append the bits of each element to the bit vector in the order they are
353 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
354 ///
355 /// See also: [`pack_sequence_u32`], [`pack_sequence_u16`], [`pack_sequence_u8`]
356 ///
357 /// # Example
358 /// ```rust
359 /// use vers_vecs::BitVec;
360 ///
361 /// let sequence = [0b1010u64, 0b1100u64, 0b1111u64];
362 /// let bv = BitVec::pack_sequence_u64(&sequence, 4);
363 ///
364 /// assert_eq!(bv.len(), 12);
365 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
366 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
367 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
368 /// ```
369 ///
370 /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
371 /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
372 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
373 #[must_use]
374 pub fn pack_sequence_u64(sequence: &[u64], bits_per_element: usize) -> Self {
375 Self::pack_bits::<_, 64>(sequence, bits_per_element)
376 }
377
378 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
379 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
380 /// The number of bits per element is given by `bits_per_element`.
381 /// The sequence is given as a slice of u32 values.
382 /// If the number of bits per element is smaller than 32, the function takes the
383 /// least significant bits of each element, and discards the rest.
384 /// If the number of bits per element is larger than 32, the function will pad the elements
385 /// with zeros.
386 /// The function will append the bits of each element to the bit vector in the order they are
387 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
388 ///
389 /// See also: [`pack_sequence_u64`], [`pack_sequence_u16`], [`pack_sequence_u8`]
390 ///
391 /// # Example
392 /// ```rust
393 /// use vers_vecs::BitVec;
394 ///
395 /// let sequence = [0b1010u32, 0b1100u32, 0b1111u32];
396 /// let bv = BitVec::pack_sequence_u32(&sequence, 4);
397 ///
398 /// assert_eq!(bv.len(), 12);
399 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
400 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
401 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
402 /// ```
403 ///
404 /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
405 /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
406 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
407 #[must_use]
408 pub fn pack_sequence_u32(sequence: &[u32], bits_per_element: usize) -> Self {
409 Self::pack_bits::<_, 32>(sequence, bits_per_element)
410 }
411
412 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
413 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
414 /// The number of bits per element is given by `bits_per_element`.
415 /// The sequence is given as a slice of u16 values.
416 /// If the number of bits per element is smaller than 16, the function takes the
417 /// least significant bits of each element, and discards the rest.
418 /// If the number of bits per element is larger than 16, the function will pad the elements
419 /// with zeros.
420 /// The function will append the bits of each element to the bit vector in the order they are
421 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
422 ///
423 /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u8`]
424 ///
425 /// # Example
426 /// ```rust
427 /// use vers_vecs::BitVec;
428 ///
429 /// let sequence = [0b1010u16, 0b1100u16, 0b1111u16];
430 /// let bv = BitVec::pack_sequence_u16(&sequence, 4);
431 ///
432 /// assert_eq!(bv.len(), 12);
433 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
434 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
435 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
436 /// ```
437 ///
438 /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
439 /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
440 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
441 #[must_use]
442 pub fn pack_sequence_u16(sequence: &[u16], bits_per_element: usize) -> Self {
443 Self::pack_bits::<_, 16>(sequence, bits_per_element)
444 }
445
446 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
447 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
448 /// The number of bits per element is given by `bits_per_element`.
449 /// The sequence is given as a slice of u8 values.
450 /// If the number of bits per element is smaller than 8, the function takes the
451 /// least significant bits of each element, and discards the rest.
452 /// If the number of bits per element is larger than 8, the function will pad the elements
453 /// with zeros.
454 /// The function will append the bits of each element to the bit vector in the order they are
455 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
456 ///
457 /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u16`]
458 ///
459 /// # Example
460 /// ```rust
461 /// use vers_vecs::BitVec;
462 ///
463 /// let sequence = [0b1010u8, 0b1100u8, 0b1111u8];
464 /// let bv = BitVec::pack_sequence_u8(&sequence, 4);
465 ///
466 /// assert_eq!(bv.len(), 12);
467 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
468 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
469 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
470 /// ```
471 ///
472 /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
473 /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
474 /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
475 #[must_use]
476 pub fn pack_sequence_u8(sequence: &[u8], bits_per_element: usize) -> Self {
477 Self::pack_bits::<_, 8>(sequence, bits_per_element)
478 }
479
480 /// Append a bit encoded as a `bool` to the bit vector, where `true` means 1 and `false` means 0.
481 ///
482 /// See also: [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
483 ///
484 /// # Example
485 ///
486 /// ```rust
487 /// use vers_vecs::BitVec;
488 ///
489 /// let mut bv = BitVec::new();
490 /// bv.append(true);
491 ///
492 /// assert_eq!(bv.len(), 1);
493 /// assert_eq!(bv.get(0), Some(1));
494 /// ```
495 ///
496 /// [`append_bit`]: BitVec::append_bit
497 /// [`append_bit_u32`]: BitVec::append_bit_u32
498 /// [`append_bit_u16`]: BitVec::append_bit_u16
499 /// [`append_bit_u8`]: BitVec::append_bit_u8
500 /// [`append_word`]: BitVec::append_word
501 pub fn append(&mut self, bit: bool) {
502 if self.len % WORD_SIZE == 0 {
503 self.data.push(0);
504 }
505 if bit {
506 self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
507 } else {
508 self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
509 }
510 self.len += 1;
511 }
512
513 /// Drop the last n bits from the bit vector. If more bits are dropped than the bit vector
514 /// contains, the bit vector is cleared.
515 ///
516 /// # Example
517 ///
518 /// ```rust
519 /// use vers_vecs::BitVec;
520 ///
521 /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
522 /// bv.drop_last(3);
523 ///
524 /// assert_eq!(bv.len(), 3);
525 /// assert_eq!(bv.get_bits(0, 3), Some(0b101u64));
526 ///
527 /// bv.drop_last(4);
528 ///
529 /// assert!(bv.is_empty());
530 /// ```
531 pub fn drop_last(&mut self, n: usize) {
532 if n > self.len {
533 self.data.clear();
534 self.len = 0;
535 return;
536 }
537
538 let new_limb_count = (self.len - n).div_ceil(WORD_SIZE);
539
540 // cut off limbs that we no longer need
541 if new_limb_count < self.data.len() {
542 self.data.truncate(new_limb_count);
543 }
544
545 // update bit vector length
546 self.len -= n;
547 }
548
549 /// Append a bit encoded in a u64.
550 /// The least significant bit is appended to the bit vector.
551 /// All other bits are ignored.
552 ///
553 /// See also: [`append`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
554 ///
555 /// # Example
556 ///
557 /// ```rust
558 /// use vers_vecs::BitVec;
559 ///
560 /// let mut bv = BitVec::new();
561 ///
562 /// bv.append_bit(1);
563 /// bv.append_bit(0);
564 ///
565 /// assert_eq!(bv.len(), 2);
566 /// assert_eq!(bv.get(0), Some(1));
567 /// assert_eq!(bv.get(1), Some(0));
568 /// ```
569 ///
570 /// [`append`]: BitVec::append
571 /// [`append_bit_u32`]: BitVec::append_bit_u32
572 /// [`append_bit_u16`]: BitVec::append_bit_u16
573 /// [`append_bit_u8`]: BitVec::append_bit_u8
574 /// [`append_word`]: BitVec::append_word
575 pub fn append_bit(&mut self, bit: u64) {
576 if self.len % WORD_SIZE == 0 {
577 self.data.push(0);
578 }
579 if bit % 2 == 1 {
580 self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
581 } else {
582 self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
583 }
584
585 self.len += 1;
586 }
587
588 /// Append a bit from a u32. The least significant bit is appended to the bit vector.
589 /// All other bits are ignored.
590 ///
591 /// See also: [`append`], [`append_bit`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
592 ///
593 /// [`append`]: BitVec::append
594 /// [`append_bit`]: BitVec::append_bit
595 /// [`append_bit_u16`]: BitVec::append_bit_u16
596 /// [`append_bit_u8`]: BitVec::append_bit_u8
597 /// [`append_word`]: BitVec::append_word
598 pub fn append_bit_u32(&mut self, bit: u32) {
599 self.append_bit(u64::from(bit));
600 }
601
602 /// Append a bit from a u16. The least significant bit is appended to the bit vector.
603 /// All other bits are ignored.
604 ///
605 /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u8`], [`append_word`]
606 ///
607 /// [`append`]: BitVec::append
608 /// [`append_bit`]: BitVec::append_bit
609 /// [`append_bit_u32`]: BitVec::append_bit_u32
610 /// [`append_bit_u8`]: BitVec::append_bit_u8
611 /// [`append_word`]: BitVec::append_word
612 pub fn append_bit_u16(&mut self, bit: u16) {
613 self.append_bit(u64::from(bit));
614 }
615
616 /// Append a bit from a u8. The least significant bit is appended to the bit vector.
617 /// All other bits are ignored.
618 ///
619 /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_word`]
620 ///
621 /// [`append`]: BitVec::append
622 /// [`append_bit`]: BitVec::append_bit
623 /// [`append_bit_u32`]: BitVec::append_bit_u32
624 /// [`append_bit_u16`]: BitVec::append_bit_u16
625 /// [`append_word`]: BitVec::append_word
626 pub fn append_bit_u8(&mut self, bit: u8) {
627 self.append_bit(u64::from(bit));
628 }
629
630 /// Append a word to the bit vector. The bits are appended in little endian order (i.e. the first
631 /// bit of the word is appended first).
632 ///
633 /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`]
634 ///
635 /// # Example
636 ///
637 /// ```rust
638 /// use vers_vecs::BitVec;
639 ///
640 /// let mut bv = BitVec::new();
641 /// bv.append_word(0b1010_1010_1010_1010u64);
642 ///
643 /// assert_eq!(bv.len(), 64);
644 /// for i in 0..64 {
645 /// assert_eq!(bv.get(i), Some((0b1010_1010_1010_1010u64 >> i) & 1));
646 /// }
647 /// ```
648 ///
649 /// [`append`]: BitVec::append
650 /// [`append_bit`]: BitVec::append_bit
651 /// [`append_bit_u32`]: BitVec::append_bit_u32
652 /// [`append_bit_u16`]: BitVec::append_bit_u16
653 /// [`append_bit_u8`]: BitVec::append_bit_u8
654 pub fn append_word(&mut self, word: u64) {
655 if self.len % WORD_SIZE == 0 {
656 self.data.push(word);
657 } else {
658 // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
659 self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
660 self.data[self.len / WORD_SIZE] |= word << (self.len % WORD_SIZE);
661
662 self.data.push(word >> (WORD_SIZE - self.len % WORD_SIZE));
663 }
664 self.len += WORD_SIZE;
665 }
666
667 /// Append multiple bits to the bit vector.
668 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
669 /// The number of bits to append is given by `len`. The bits are taken from the least
670 /// significant bits of `bits`.
671 /// All other bits are ignored.
672 ///
673 /// # Example
674 ///
675 /// ```rust
676 /// use vers_vecs::BitVec;
677 ///
678 /// let mut bv = BitVec::new();
679 /// bv.append_bits(0b1010_1010_1010_1010u64, 16);
680 ///
681 /// assert_eq!(bv.len(), 16);
682 /// assert_eq!(bv.get_bits(0, 16), Some(0b1010_1010_1010_1010u64));
683 /// ```
684 ///
685 /// # Panics
686 /// Panics if `len` is larger than 64.
687 pub fn append_bits(&mut self, bits: u64, len: usize) {
688 assert!(len <= 64, "Cannot append more than 64 bits");
689
690 if self.len % WORD_SIZE == 0 {
691 self.data.push(bits);
692 } else {
693 // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
694 self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
695 self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
696
697 if self.len % WORD_SIZE + len > WORD_SIZE {
698 self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
699 }
700 }
701 self.len += len;
702 }
703
704 /// Append multiple bits to the bit vector.
705 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
706 /// The number of bits to append is given by `len`. The bits are taken from the least
707 /// significant bits of `bits`.
708 ///
709 /// This function does not check if `len` is larger than 64.
710 ///
711 /// Furthermore, if the bit-vector has trailing bits that are not zero
712 /// (i.e. the length is not a multiple of 64, and those bits are partially set),
713 /// the function will OR the new data with the trailing bits, destroying the appended data.
714 /// This can happen, if a call `append_bits[_unchecked](word, len)` appends a word which has
715 /// set bits beyond the `len - 1`-th bit,
716 /// or if bits have been dropped from the bit vector using [`drop_last`].
717 ///
718 /// See [`append_bits`] for a checked version of this function.
719 ///
720 /// # Panics
721 /// If `len` is larger than 64, the behavior is platform-dependent, and a processor
722 /// exception might be triggered.
723 ///
724 /// [`append_bits`]: BitVec::append_bits
725 /// [`drop_last`]: BitVec::drop_last
726 pub fn append_bits_unchecked(&mut self, bits: u64, len: usize) {
727 if self.len % WORD_SIZE == 0 {
728 self.data.push(bits);
729 } else {
730 self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
731
732 if self.len % WORD_SIZE + len > WORD_SIZE {
733 self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
734 }
735 }
736 self.len += len;
737 }
738
739 /// Append the bits of another bit vector to the end of this vector.
740 /// If this vector does not contain a multiple of 64 bits, the appended limbs need to be
741 /// shifted to the left.
742 /// This function is guaranteed to reallocate the underlying vector at most once.
743 pub fn extend_bitvec(&mut self, other: &Self) {
744 // reserve space for the new bits, ensuring at most one re-allocation
745 self.data
746 .reserve((self.len + other.len).div_ceil(WORD_SIZE) - self.data.len());
747
748 let full_limbs = other.len() / WORD_SIZE;
749 for i in 0..full_limbs {
750 self.append_bits(other.data[i], WORD_SIZE);
751 }
752
753 let partial_bits = other.len % WORD_SIZE;
754 if partial_bits > 0 {
755 self.append_bits(other.data[full_limbs], partial_bits);
756 }
757 }
758
759 /// Return the length of the bit vector. The length is measured in bits.
760 #[must_use]
761 pub fn len(&self) -> usize {
762 self.len
763 }
764
765 /// Return whether the bit vector is empty (contains no bits).
766 #[must_use]
767 pub fn is_empty(&self) -> bool {
768 self.len == 0
769 }
770
771 /// Flip the bit at the given position.
772 ///
773 /// # Example
774 ///
775 /// ```rust
776 /// use vers_vecs::BitVec;
777 ///
778 /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
779 /// bv.flip_bit(1);
780 ///
781 /// assert_eq!(bv.len(), 6);
782 /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
783 /// ```
784 ///
785 /// # Panics
786 /// If the position is larger than the length of the vector, the function panics.
787 pub fn flip_bit(&mut self, pos: usize) {
788 assert!(pos < self.len, "Index out of bounds");
789 self.flip_bit_unchecked(pos);
790 }
791
792 /// Flip the bit at the given position.
793 ///
794 /// See also: [`flip_bit`]
795 ///
796 /// # Panics
797 /// If the position is larger than the length of the
798 /// vector, the function will either modify unused memory or panic.
799 /// This will not corrupt memory.
800 ///
801 /// [`flip_bit`]: BitVec::flip_bit
802 pub fn flip_bit_unchecked(&mut self, pos: usize) {
803 self.data[pos / WORD_SIZE] ^= 1 << (pos % WORD_SIZE);
804 }
805
806 /// Return the bit at the given position.
807 /// The bit is encoded in the least significant bit of a u64 value.
808 /// If the position is larger than the length of the vector, None is returned.
809 ///
810 /// See also: [`get_unchecked`]
811 ///
812 /// # Example
813 ///
814 /// ```rust
815 /// use vers_vecs::BitVec;
816 ///
817 /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
818 ///
819 /// assert_eq!(bv.get(1), Some(0));
820 /// assert_eq!(bv.get(2), Some(1));
821 /// ```
822 #[must_use]
823 pub fn get(&self, pos: usize) -> Option<u64> {
824 if pos >= self.len {
825 None
826 } else {
827 Some(self.get_unchecked(pos))
828 }
829 }
830
831 /// Return the bit at the given position.
832 /// The bit is encoded in the least significant bit of a u64 value.
833 ///
834 /// # Panics
835 /// If the position is larger than the length of the vector,
836 /// the function will either return unpredictable data, or panic.
837 /// Use [`get`] to properly handle this case with an `Option`.
838 ///
839 /// [`get`]: BitVec::get
840 #[must_use]
841 pub fn get_unchecked(&self, pos: usize) -> u64 {
842 (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
843 }
844
845 /// Set the bit at the given position.
846 /// The bit is encoded in the least significant bit of a u64 value.
847 ///
848 /// See also: [`set_unchecked`]
849 ///
850 /// # Example
851 ///
852 /// ```rust
853 /// use vers_vecs::BitVec;
854 ///
855 /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
856 /// bv.set(1, 1).unwrap();
857 ///
858 /// assert_eq!(bv.len(), 6);
859 /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
860 /// ```
861 ///
862 /// # Errors
863 /// If the position is out of range, the function will return `Err` with an error message,
864 /// otherwise it will return an empty `Ok`.
865 ///
866 /// [`set_unchecked`]: BitVec::set_unchecked
867 pub fn set(&mut self, pos: usize, value: u64) -> Result<(), &str> {
868 if pos >= self.len {
869 Err("out of range")
870 } else {
871 self.set_unchecked(pos, value);
872 Ok(())
873 }
874 }
875
876 /// Set the bit at the given position.
877 /// The bit is encoded in the least significant bit of a u64 value.
878 ///
879 /// # Panics
880 /// If the position is larger than the length of the vector,
881 /// the function will either do nothing, or panic.
882 /// Use [`set`] to properly handle this case with a `Result`.
883 ///
884 /// [`set`]: BitVec::set
885 pub fn set_unchecked(&mut self, pos: usize, value: u64) {
886 self.data[pos / WORD_SIZE] = (self.data[pos / WORD_SIZE] & !(0x1 << (pos % WORD_SIZE)))
887 | ((value & 0x1) << (pos % WORD_SIZE));
888 }
889
890 /// Return whether the bit at the given position is set.
891 /// If the position is larger than the length of the vector, None is returned.
892 ///
893 /// See also: [`is_bit_set_unchecked`]
894 ///
895 /// # Example
896 ///
897 /// ```rust
898 /// use vers_vecs::BitVec;
899 ///
900 /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
901 ///
902 /// assert!(!bv.is_bit_set(1).unwrap());
903 /// assert!(bv.is_bit_set(2).unwrap());
904 /// ```
905 ///
906 /// [`is_bit_set_unchecked`]: BitVec::is_bit_set_unchecked
907 #[must_use]
908 pub fn is_bit_set(&self, pos: usize) -> Option<bool> {
909 if pos >= self.len {
910 None
911 } else {
912 Some(self.is_bit_set_unchecked(pos))
913 }
914 }
915
916 /// Return whether the bit at the given position is set.
917 ///
918 /// # Panics
919 /// If the position is larger than the length of the vector,
920 /// the function will either return unpredictable data, or panic.
921 /// Use [`is_bit_set`] to properly handle this case with an `Option`.
922 ///
923 /// [`is_bit_set`]: BitVec::is_bit_set
924 #[must_use]
925 pub fn is_bit_set_unchecked(&self, pos: usize) -> bool {
926 self.get_unchecked(pos) != 0
927 }
928
929 /// Return multiple bits at the given position.
930 /// The number of bits to return is given by `len`.
931 /// At most 64 bits can be returned.
932 /// If the position at the end of the query is larger than the length of the vector,
933 /// None is returned (even if the query partially overlaps with the vector).
934 /// If the length of the query is larger than 64, None is returned.
935 ///
936 /// The first bit at `pos` is the most significant bit of the return value
937 /// limited to `len` bits.
938 #[must_use]
939 pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
940 if len > WORD_SIZE || len == 0 {
941 return None;
942 }
943 if pos + len > self.len {
944 None
945 } else {
946 Some(self.get_bits_unchecked(pos, len))
947 }
948 }
949
950 /// Return multiple bits at the given position. The number of bits to return is given by `len`.
951 /// At most 64 bits can be returned.
952 ///
953 /// This function is always inlined, because it gains a lot from loop optimization and
954 /// can utilize the processor pre-fetcher better if it is.
955 ///
956 /// # Errors
957 /// If the length of the query is larger than 64, unpredictable data will be returned.
958 /// Use [`get_bits`] to avoid this.
959 ///
960 /// # Panics
961 /// If the position or interval is larger than the length of the vector,
962 /// the function will either return any valid results padded with unpredictable
963 /// data or panic.
964 ///
965 /// [`get_bits`]: BitVec::get_bits
966 #[must_use]
967 #[allow(clippy::inline_always)]
968 #[allow(clippy::comparison_chain)] // readability
969 #[inline(always)] // inline to gain loop optimization and pipeline advantages for elias fano
970 #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen
971 pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
972 debug_assert!(len <= WORD_SIZE);
973 let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
974 if pos % WORD_SIZE + len <= WORD_SIZE {
975 partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
976 } else {
977 (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
978 & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
979 }
980 }
981
982 /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
983 /// `index`. The number of bits per encoded element is given by `n`.
984 ///
985 /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
986 /// and is equivalent to calling [`get_bits(index * n, n)`].
987 /// It is thus safe to use this method with any index and any size n <= 64.
988 ///
989 /// If the element is out of bounds, None is returned.
990 /// The element is returned as a u64 value.
991 ///
992 /// # Example
993 /// ```rust
994 /// use vers_vecs::BitVec;
995 ///
996 /// let sequence = [10, 100, 124, 45, 223];
997 /// let bv = BitVec::pack_sequence_u64(&sequence, 8);
998 ///
999 /// assert_eq!(bv.unpack_element(0, 8), Some(10));
1000 /// assert_eq!(bv.unpack_element(2, 8), Some(124));
1001 /// ```
1002 ///
1003 /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1004 /// [`get_bits(index * n, n)`]: BitVec::get_bits
1005 #[must_use]
1006 #[allow(clippy::inline_always)]
1007 #[inline(always)] // to gain optimization if n is constant
1008 pub fn unpack_element(&self, index: usize, n: usize) -> Option<u64> {
1009 self.get_bits(index * n, n)
1010 }
1011
1012 /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
1013 /// `index`. The number of bits per encoded element is given by `n`.
1014 ///
1015 /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
1016 /// and is equivalent to calling [`get_bits_unchecked(index * n, n)`].
1017 /// It is thus safe to use this method with any index where `index * n + n` is in-bounds,
1018 /// and any size n <= 64.
1019 ///
1020 /// # Panics
1021 /// If the element is out of bounds, the function will either return unpredictable data or panic.
1022 /// Use [`unpack_element`] for a checked version of this function.
1023 ///
1024 /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1025 /// [`get_bits_unchecked(index * n, n)`]: BitVec::get_bits_unchecked
1026 /// [`unpack_element`]: BitVec::unpack_element
1027 #[must_use]
1028 #[allow(clippy::inline_always)]
1029 #[inline(always)] // to gain optimization if n is constant
1030 pub fn unpack_element_unchecked(&self, index: usize, n: usize) -> u64 {
1031 self.get_bits_unchecked(index * n, n)
1032 }
1033
1034 /// Return the number of ones in the bit vector. Since the bit vector doesn't store additional
1035 /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1036 ///
1037 /// [`RsVec`]: crate::RsVec
1038 #[must_use]
1039 #[allow(clippy::missing_panics_doc)] // can't panic because of manual bounds check
1040 pub fn count_ones(&self) -> u64 {
1041 let mut ones: u64 = self.data[0..self.len / WORD_SIZE]
1042 .iter()
1043 .map(|limb| u64::from(limb.count_ones()))
1044 .sum();
1045 if self.len % WORD_SIZE > 0 {
1046 ones += u64::from(
1047 (self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1)).count_ones(),
1048 );
1049 }
1050 ones
1051 }
1052
1053 /// Return the number of zeros in the bit vector. Since the bit vector doesn't store additional
1054 /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1055 /// This method calls [`count_ones`].
1056 ///
1057 /// [`RsVec`]: crate::RsVec
1058 /// [`count_ones`]: BitVec::count_ones
1059 #[must_use]
1060 pub fn count_zeros(&self) -> u64 {
1061 self.len as u64 - self.count_ones()
1062 }
1063
1064 /// Mask this bit vector with another bitvector using bitwise or. The mask is applied lazily
1065 /// whenever an operation on the resulting vector is performed.
1066 ///
1067 /// # Errors
1068 /// Returns an error if the length of the vector doesn't match the mask length.
1069 #[inline]
1070 pub fn mask_or<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1071 MaskedBitVec::new(self, mask, |a, b| a | b)
1072 }
1073
1074 /// Mask this bit vector with another bitvector using bitwise or.
1075 /// The mask is applied immediately, unlike in [`mask_or`].
1076 ///
1077 /// # Errors
1078 /// Returns an error if the length of the vector doesn't match the mask length.
1079 ///
1080 /// [`mask_or`]: BitVec::mask_or
1081 pub fn apply_mask_or(&mut self, mask: &BitVec) -> Result<(), String> {
1082 if self.len != mask.len {
1083 return Err(String::from(
1084 "mask cannot have different length than vector",
1085 ));
1086 }
1087
1088 for i in 0..self.data.len() {
1089 self.data[i] |= mask.data[i];
1090 }
1091
1092 Ok(())
1093 }
1094
1095 /// Mask this bit vector with another bitvector using bitwise and. The mask is applied lazily
1096 /// whenever an operation on the resulting vector is performed.
1097 ///
1098 /// # Errors
1099 /// Returns an error if the length of the vector doesn't match the mask length.
1100 #[inline]
1101 pub fn mask_and<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1102 MaskedBitVec::new(self, mask, |a, b| a & b)
1103 }
1104
1105 /// Mask this bit vector with another bitvector using bitwise and.
1106 /// The mask is applied immediately, unlike in [`mask_and`].
1107 ///
1108 /// # Errors
1109 /// Returns an error if the length of the vector doesn't match the mask length.
1110 ///
1111 /// [`mask_and`]: BitVec::mask_and
1112 pub fn apply_mask_and(&mut self, mask: &BitVec) -> Result<(), String> {
1113 if self.len != mask.len {
1114 return Err(String::from(
1115 "mask cannot have different length than vector",
1116 ));
1117 }
1118
1119 for i in 0..self.data.len() {
1120 self.data[i] &= mask.data[i];
1121 }
1122
1123 Ok(())
1124 }
1125
1126 /// Mask this bit vector with another bitvector using bitwise xor. The mask is applied lazily
1127 /// whenever an operation on the resulting vector is performed.
1128 ///
1129 /// # Errors
1130 /// Returns an error if the length of the vector doesn't match the mask length.
1131 #[inline]
1132 pub fn mask_xor<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1133 MaskedBitVec::new(self, mask, |a, b| a ^ b)
1134 }
1135
1136 /// Mask this bit vector with another bitvector using bitwise xor.
1137 /// The mask is applied immediately, unlike in [`mask_xor`].
1138 ///
1139 /// # Errors
1140 /// Returns an error if the length of the vector doesn't match the mask length.
1141 ///
1142 /// [`mask_xor`]: BitVec::mask_xor
1143 pub fn apply_mask_xor(&mut self, mask: &BitVec) -> Result<(), String> {
1144 if self.len != mask.len {
1145 return Err(String::from(
1146 "mask cannot have different length than vector",
1147 ));
1148 }
1149
1150 for i in 0..self.data.len() {
1151 self.data[i] ^= mask.data[i];
1152 }
1153
1154 Ok(())
1155 }
1156
1157 /// Mask this bit vector with another bitvector using a custom masking operation. The mask is
1158 /// applied lazily whenever an operation on the resulting vector is performed.
1159 ///
1160 /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1161 /// The last block of a bit vector might contain fewer bits, and will be padded with
1162 /// unpredictable data. Implementations may choose to modify those padding bits without
1163 /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1164 /// within the vector is unspecified.
1165 ///
1166 /// # Errors
1167 /// Returns an error if the length of the vector doesn't match the mask length.
1168 #[inline]
1169 pub fn mask_custom<'s, 'b, F>(
1170 &'s self,
1171 mask: &'b BitVec,
1172 mask_op: F,
1173 ) -> Result<MaskedBitVec<'s, 'b, F>, String>
1174 where
1175 F: Fn(u64, u64) -> u64,
1176 {
1177 MaskedBitVec::new(self, mask, mask_op)
1178 }
1179
1180 /// Mask this bit vector with another bitvector using a custom masking operation.
1181 /// The mask is applied immediately, unlike in [`mask_custom`].
1182 ///
1183 /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1184 /// The last block of a bit vector might contain fewer bits, and will be padded with
1185 /// unpredictable data. Implementations may choose to modify those padding bits without
1186 /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1187 /// within the vector is unspecified.
1188 ///
1189 /// # Errors
1190 /// Returns an error if the length of the vector doesn't match the mask length.
1191 ///
1192 /// [`mask_custom`]: BitVec::mask_custom
1193 #[inline]
1194 pub fn apply_mask_custom(
1195 &mut self,
1196 mask: &BitVec,
1197 mask_op: fn(u64, u64) -> u64,
1198 ) -> Result<(), String> {
1199 if self.len != mask.len {
1200 return Err(String::from(
1201 "mask cannot have different length than vector",
1202 ));
1203 }
1204
1205 for i in 0..self.data.len() {
1206 self.data[i] = mask_op(self.data[i], mask.data[i]);
1207 }
1208
1209 Ok(())
1210 }
1211
1212 /// Returns the number of bytes on the heap for this vector.
1213 /// Does not include allocated memory that isn't used.
1214 #[must_use]
1215 pub fn heap_size(&self) -> usize {
1216 self.data.len() * size_of::<u64>()
1217 }
1218
1219 /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1220 /// right half the remaining bits `at..`. If the split index is larger than the length of the
1221 /// vector, the vector is returned unmodified in an `Err` variant.
1222 ///
1223 /// # Errors
1224 /// If the index is out of bounds, the function will return an error
1225 /// containing the original vector.
1226 ///
1227 /// See also: [`split_at_unchecked`]
1228 pub fn split_at(self, at: usize) -> Result<(Self, Self), Self> {
1229 if at > self.len {
1230 Err(self)
1231 } else {
1232 Ok(self.split_at_unchecked(at))
1233 }
1234 }
1235
1236 /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1237 /// right half the remaining bits `at..`.
1238 ///
1239 /// # Panics
1240 /// If the index is larger than the length of the vector the function will panic or run
1241 /// out of memory.
1242 /// Use [`split_at`] to properly handle this case.
1243 #[must_use]
1244 pub fn split_at_unchecked(mut self, at: usize) -> (Self, Self) {
1245 let other_len = self.len - at;
1246 let mut other = Self::with_capacity(other_len);
1247
1248 if other_len == 0 {
1249 return (self, other);
1250 }
1251
1252 let first_limb = at / WORD_SIZE;
1253 let last_limb = self.len / WORD_SIZE;
1254
1255 // First, we figure out the number of bits from the first limb to retain in this vector:
1256 let leading_partial = at % WORD_SIZE;
1257
1258 // If the split point is in the last limb, and the vector ends before the last bit, first_limb
1259 // and last_limb will be equal, and the other half is simply other_len bits off the limb
1260 // right shifted by the number of bits to retain in this vector.
1261 if first_limb == last_limb {
1262 other.append_bits_unchecked(self.data[first_limb] >> leading_partial, other_len);
1263 } else {
1264 // Otherwise, some range n..last_limb should be copied in their entirety to the other half,
1265 // with n=first_limb+1 if the split point is inside the first limb (leading_partial > 0), or
1266 // n=first_limb if the entire first limb belongs in the other half.
1267 let full_limbs = if leading_partial > 0 {
1268 // If the split point is inside the first limb, we also have to remember to copy over
1269 // the trailing bits to the new vector.
1270 other.append_bits_unchecked(
1271 self.data[first_limb] >> leading_partial,
1272 WORD_SIZE - leading_partial,
1273 );
1274 first_limb + 1..last_limb
1275 } else {
1276 first_limb..last_limb
1277 };
1278
1279 // Copy over any full limbs.
1280 for i in full_limbs {
1281 other.append_bits_unchecked(self.data[i], WORD_SIZE);
1282 }
1283
1284 // Finally, if the vector has a partially filled last limb, we need to put those bits
1285 // in the other half.
1286 let trailing_partial = self.len % WORD_SIZE;
1287 if trailing_partial > 0 {
1288 other.append_bits_unchecked(self.data[last_limb], trailing_partial);
1289 }
1290 }
1291
1292 // remove the copied bits from the original vector
1293 self.drop_last(other_len);
1294
1295 (self, other)
1296 }
1297}
1298
1299impl_vector_iterator! { BitVec, BitVecIter, BitVecRefIter }
1300
1301/// Create a new bit vector from a slice of u64 values.
1302/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1303/// The function will append the bits of each element to the bit vector in the order they are
1304/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1305impl From<&[u64]> for BitVec {
1306 fn from(data: &[u64]) -> Self {
1307 BitVec::from_limbs(data)
1308 }
1309}
1310
1311/// Create a new bit vector from a slice of u64 values.
1312/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1313/// The function will append the bits of each element to the bit vector in the order they are
1314/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1315impl From<Vec<u64>> for BitVec {
1316 fn from(data: Vec<u64>) -> Self {
1317 BitVec::from_limbs(&data)
1318 }
1319}
1320
1321impl Extend<BitVec> for BitVec {
1322 fn extend<T: IntoIterator<Item = BitVec>>(&mut self, iter: T) {
1323 for v in iter {
1324 self.extend_bitvec(&v)
1325 }
1326 }
1327}
1328
1329impl<'t> Extend<&'t BitVec> for BitVec {
1330 fn extend<T: IntoIterator<Item = &'t BitVec>>(&mut self, iter: T) {
1331 for v in iter {
1332 self.extend_bitvec(v)
1333 }
1334 }
1335}
1336
1337/// Create a new bit vector from u64 values.
1338/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1339/// The function will append the bits of each element to the bit vector in the order they are
1340/// given in the iterator (i.e. the first element takes bits `0..64` of the vector).
1341impl FromIterator<u64> for BitVec {
1342 fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> Self {
1343 BitVec::from_limbs_iter(iter)
1344 }
1345}
1346
1347#[cfg(test)]
1348mod tests;