vers_vecs/bit_vec/mod.rs
1//! This module contains a simple [bit vector][BitVec] implementation with no overhead and a fast succinct
2//! bit vector implementation with [rank and select queries][fast_rs_vec::RsVec].
3
4use crate::bit_vec::mask::MaskedBitVec;
5use crate::util::impl_vector_iterator;
6use std::cmp::min;
7use std::hash::{Hash, Hasher};
8use std::mem::size_of;
9
10pub mod fast_rs_vec;
11
12pub mod sparse;
13
14pub mod mask;
15
16/// Size of a word in bitvectors. All vectors operate on 64-bit words.
17const WORD_SIZE: usize = 64;
18
19/// Type alias for masked bitvectors that implement a simple bitwise binary operation.
20/// The first lifetime is for the bit vector that is being masked, the second lifetime is for the
21/// mask.
22pub type BitMask<'s, 'b> = MaskedBitVec<'s, 'b, fn(u64, u64) -> u64>;
23
24/// A simple bit vector that does not support rank and select queries.
25/// Bits are stored in little-endian order, i.e. the least significant bit is stored first.
26/// The bit vector is stored as a sequence of 64 bit limbs.
27/// The last limb may be partially filled.
28///
29/// The bit vector has a wide range of constructors that allow for easy creation from various
30/// sources.
31/// Among them are constructors for creating an empty vector ([`BitVec::new`]),
32/// creating one from single bits of various integer types ([`BitVec::from_bits`] and variations),
33/// creating limbs from u64 values directly ([`BitVec::from_limbs`] and variations),
34/// or packing a sequence of numerical values into a dense bit sequence
35/// ([`BitVec::pack_sequence_u64`] and variations).
36///
37/// The bit vector can be modified after creation
38/// (e.g. by appending [bits](BitVec::append_bits)
39/// or [words](BitVec::append_word),
40/// [flipping](BitVec::flip_bit),
41/// or [setting](BitVec::set) bits).
42/// Bits can be [accessed](BitVec::get) by position,
43/// and [multiple bits](BitVec::get_bits) can be accessed at once.
44/// Bits can be [dropped](BitVec::drop_last) from the end.
45///
46/// # Example
47/// ```rust
48/// use vers_vecs::{BitVec, RsVec};
49///
50/// let mut bit_vec = BitVec::new();
51/// bit_vec.append_bit(0u64);
52/// bit_vec.append_bit_u32(1u32);
53/// bit_vec.append_word(0b1010_1010_1010_1010u64); // appends exactly 64 bits
54///
55/// assert_eq!(bit_vec.len(), 66);
56/// assert_eq!(bit_vec.get(0), Some(0u64));
57/// assert_eq!(bit_vec.get(1), Some(1u64));
58/// ```
59#[derive(Clone, Debug, Default)]
60#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
61#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemSize, mem_dbg::MemDbg))]
62pub struct BitVec {
63 data: Vec<u64>,
64 len: usize,
65}
66
67impl BitVec {
68 /// Create a new empty bit vector.
69 #[must_use]
70 pub fn new() -> Self {
71 Self::default()
72 }
73
74 /// Create a new empty bit vector with the given capacity.
75 /// The capacity is measured in bits.
76 /// The bit vector will be able to hold at least `capacity` bits without reallocating.
77 /// More memory may be allocated according to the underlying allocation strategy.
78 #[must_use]
79 pub fn with_capacity(capacity: usize) -> Self {
80 Self {
81 data: Vec::with_capacity(capacity / WORD_SIZE + 1),
82 len: 0,
83 }
84 }
85
86 /// Create a new bit vector with all zeros and the given length.
87 /// The length is measured in bits.
88 #[must_use]
89 pub fn from_zeros(len: usize) -> Self {
90 let mut data = vec![0; len / WORD_SIZE];
91 if !len.is_multiple_of(WORD_SIZE) {
92 data.push(0);
93 }
94 Self { data, len }
95 }
96
97 /// Create a new bit vector with all ones and the given length.
98 /// The length is measured in bits.
99 #[must_use]
100 pub fn from_ones(len: usize) -> Self {
101 let mut data = vec![u64::MAX; len / WORD_SIZE];
102 if !len.is_multiple_of(WORD_SIZE) {
103 data.push((1 << (len % WORD_SIZE)) - 1);
104 }
105 Self { data, len }
106 }
107
108 /// Construct a bit vector from a set of bits given as distinct u8 values.
109 /// The constructor will take the least significant bit from each value and append it to a
110 /// bit vector.
111 /// All other bits are ignored.
112 ///
113 /// See also: [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
114 ///
115 /// # Example
116 /// ```rust
117 /// use vers_vecs::BitVec;
118 ///
119 /// let bits: &[u8] = &[1, 0, 1, 1, 1, 1];
120 /// let bv = BitVec::from_bits(&bits);
121 ///
122 /// assert_eq!(bv.len(), 6);
123 /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
124 /// ```
125 ///
126 /// [`from_bits_u16`]: BitVec::from_bits_u16
127 /// [`from_bits_u32`]: BitVec::from_bits_u32
128 /// [`from_bits_u64`]: BitVec::from_bits_u64
129 /// [`from_bits_iter`]: BitVec::from_bits_iter
130 #[must_use]
131 pub fn from_bits(bits: &[u8]) -> Self {
132 let mut bv = Self::with_capacity(bits.len());
133 bits.iter().for_each(|&b| bv.append_bit(b.into()));
134 bv
135 }
136
137 /// Construct a bit vector from a set of bits given as distinct u16 values.
138 /// The constructor will take the least significant bit from each value and append it to a
139 /// bit vector.
140 /// All other bits are ignored.
141 ///
142 /// See also: [`from_bits`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
143 ///
144 /// [`from_bits`]: BitVec::from_bits
145 /// [`from_bits_u32`]: BitVec::from_bits_u32
146 /// [`from_bits_u64`]: BitVec::from_bits_u64
147 /// [`from_bits_iter`]: BitVec::from_bits_iter
148 #[must_use]
149 pub fn from_bits_u16(bits: &[u16]) -> Self {
150 let mut bv = Self::with_capacity(bits.len());
151 bits.iter().for_each(|&b| bv.append_bit_u16(b));
152 bv
153 }
154
155 /// Construct a bit vector from a set of bits given as distinct u32 values.
156 /// The constructor will take the least significant bit from each value and append it to a
157 /// bit vector.
158 /// All other bits are ignored.
159 ///
160 /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u64`], [`from_bits_iter`]
161 ///
162 /// [`from_bits`]: BitVec::from_bits
163 /// [`from_bits_u16`]: BitVec::from_bits_u16
164 /// [`from_bits_u64`]: BitVec::from_bits_u64
165 /// [`from_bits_iter`]: BitVec::from_bits_iter
166 #[must_use]
167 pub fn from_bits_u32(bits: &[u32]) -> Self {
168 let mut bv = Self::with_capacity(bits.len());
169 bits.iter().for_each(|&b| bv.append_bit_u32(b));
170 bv
171 }
172
173 /// Construct a bit vector from a set of bits given as distinct u64 values.
174 /// The constructor will take the least significant bit from each value and append it to a
175 /// bit vector.
176 /// All other bits are ignored.
177 ///
178 /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_iter`]
179 ///
180 /// [`from_bits`]: BitVec::from_bits
181 /// [`from_bits_u16`]: BitVec::from_bits_u16
182 /// [`from_bits_u32`]: BitVec::from_bits_u32
183 /// [`from_bits_iter`]: BitVec::from_bits_iter
184 #[must_use]
185 pub fn from_bits_u64(bits: &[u64]) -> Self {
186 let mut bv = Self::with_capacity(bits.len());
187 bits.iter().for_each(|&b| bv.append_bit(b));
188 bv
189 }
190
191 /// Construct a bit vector from an iterator of bits.
192 /// The constructor will take the least significant bit from each value and append it to a
193 /// bit vector.
194 /// All other bits are ignored.
195 /// The iterator must yield values that can be converted into u64 values.
196 ///
197 /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`]
198 ///
199 /// # Example
200 /// ```rust
201 /// use vers_vecs::BitVec;
202 ///
203 /// let bits = [true, false, true, true, true, true];
204 /// let bv = BitVec::from_bits_iter(bits.iter().copied());
205 ///
206 /// let bits = [0b1u8, 0b0, 0b1, 0b1, 0b1, 0b1];
207 /// let bv2 = BitVec::from_bits_iter(bits.iter().copied());
208 ///
209 /// assert_eq!(bv.len(), 6);
210 /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
211 /// assert_eq!(bv, bv2);
212 /// ```
213 ///
214 /// [`from_bits`]: BitVec::from_bits
215 /// [`from_bits_u16`]: BitVec::from_bits_u16
216 /// [`from_bits_u32`]: BitVec::from_bits_u32
217 /// [`from_bits_u64`]: BitVec::from_bits_u64
218 #[must_use]
219 pub fn from_bits_iter<I, E>(iter: I) -> Self
220 where
221 E: Into<u64>,
222 I: IntoIterator<Item = E>,
223 {
224 let iter = iter.into_iter();
225 let mut bv = Self::with_capacity(iter.size_hint().0);
226 for bit in iter {
227 bv.append_bit(bit.into());
228 }
229 bv
230 }
231
232 /// Construct a bit vector from a slice of u64 quad words.
233 /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
234 /// 64 bits to the bit vector).
235 /// Since the data is only cloned without any masking or transformation,
236 /// this is one of the fastest ways to create a bit vector.
237 ///
238 /// See also: [`from_vec`], [`from_limbs_iter`]
239 ///
240 /// # Example
241 /// ```rust
242 /// use vers_vecs::BitVec;
243 ///
244 /// let words = [0, 256, u64::MAX];
245 /// let bv = BitVec::from_limbs(&words);
246 ///
247 /// assert_eq!(bv.len(), 192);
248 /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
249 /// assert_eq!(bv.get(72), Some(1));
250 /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
251 /// ```
252 ///
253 /// [`from_vec`]: BitVec::from_vec
254 /// [`from_limbs_iter`]: BitVec::from_limbs_iter
255 #[must_use]
256 pub fn from_limbs(words: &[u64]) -> Self {
257 let len = words.len() * WORD_SIZE;
258 Self {
259 data: words.to_vec(),
260 len,
261 }
262 }
263
264 /// Construct a bit vector from an iterator of u64 quad words.
265 /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
266 /// 64 bits to the bit vector).
267 /// Since the data is only cloned without any masking or transformation,
268 /// this is one of the fastest ways to create a bit vector.
269 ///
270 /// See also: [`from_limbs`], [`from_vec`]
271 ///
272 /// # Example
273 /// ```rust
274 /// use std::iter::repeat;
275 /// use vers_vecs::BitVec;
276 ///
277 /// let zeros = repeat(0xaaaaaaaaaaaaaaaau64).take(10);
278 /// let bv = BitVec::from_limbs_iter(zeros);
279 ///
280 /// assert_eq!(bv.len(), 640);
281 /// for i in 0..640 {
282 /// assert_eq!(bv.get(i), Some((i % 2 == 1) as u64));
283 /// }
284 /// ```
285 ///
286 /// [`from_limbs`]: BitVec::from_limbs
287 /// [`from_vec`]: BitVec::from_vec
288 pub fn from_limbs_iter<I, E>(iter: I) -> Self
289 where
290 E: Into<u64>,
291 I: IntoIterator<Item = E>,
292 {
293 let vec = iter.into_iter().map(Into::into).collect();
294 Self::from_vec(vec)
295 }
296
297 /// Construct a bit vector from a vector of u64 quad words.
298 /// The quad words are interpreted as limbs of the bit vector
299 /// (i.e. each quad word contributes 64 bits to the bit vector).
300 /// Since the data is moved without any masking or transformation, this is one of the fastest ways
301 /// to create a bit vector.
302 ///
303 /// See also: [`from_limbs`], [`from_limbs_iter`]
304 ///
305 /// # Example
306 /// ```rust
307 /// use vers_vecs::BitVec;
308 ///
309 /// let words = vec![0, 256, u64::MAX];
310 /// let bv = BitVec::from_vec(words);
311 ///
312 /// assert_eq!(bv.len(), 192);
313 /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
314 /// assert_eq!(bv.get(72), Some(1));
315 /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
316 /// ```
317 ///
318 /// [`from_limbs`]: BitVec::from_limbs
319 /// [`from_limbs_iter`]: BitVec::from_limbs_iter
320 #[must_use]
321 pub fn from_vec(data: Vec<u64>) -> Self {
322 let len = data.len() * WORD_SIZE;
323 Self { data, len }
324 }
325
326 /// Helper function for packing constructors to pack a slice of elements into a bit-vector,
327 /// taking the least significant `bits_per_element` per element.
328 fn pack_bits<T, const MAX_BITS: usize>(sequence: &[T], bits_per_element: usize) -> Self
329 where
330 T: Into<u64> + Copy,
331 {
332 let mut bv = Self::with_capacity(sequence.len() * bits_per_element);
333 for &word in sequence {
334 Self::pack_word_into_vector::<MAX_BITS>(&mut bv, word.into(), bits_per_element)
335 }
336 bv
337 }
338
339 /// Helper function for packing constructors to pack elements from an iterator into a bit-vector,
340 /// taking the least significant `bits_per_element` per element.
341 fn pack_bits_iter<T, I: IntoIterator<Item = T>, const MAX_BITS: usize>(
342 iter: I,
343 bits_per_element: usize,
344 ) -> Self
345 where
346 T: Into<u64> + Copy,
347 {
348 let mut bv = Self::new();
349
350 for word in iter {
351 Self::pack_word_into_vector::<MAX_BITS>(&mut bv, word.into(), bits_per_element)
352 }
353
354 bv
355 }
356
357 /// Helper function for packing constructors to pack `num_bits` of a given word into the given
358 /// vector.
359 #[inline(always)]
360 fn pack_word_into_vector<const MAX_BITS: usize>(bv: &mut BitVec, word: u64, num_bits: usize) {
361 if num_bits <= MAX_BITS {
362 bv.append_bits(word.into(), num_bits);
363 } else {
364 bv.append_bits(word.into(), MAX_BITS);
365 let mut rest = num_bits - MAX_BITS;
366 while rest > 0 {
367 bv.append_bits(0, min(rest, MAX_BITS));
368 rest = rest.saturating_sub(MAX_BITS);
369 }
370 }
371 }
372
373 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
374 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
375 /// The number of bits per element is given by `bits_per_element`.
376 /// The sequence is given as a slice of u64 values.
377 /// If the number of bits per element is smaller than 64, the function takes the
378 /// least significant bits of each element, and discards the rest.
379 /// If the number of bits per element is larger than 64, the function will pad the elements
380 /// with zeros.
381 /// The function will append the bits of each element to the bit vector in the order they are
382 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
383 ///
384 /// See also: [`pack_sequence_u32`], [`pack_sequence_u16`], [`pack_sequence_u8`]
385 ///
386 /// # Example
387 /// ```rust
388 /// use vers_vecs::BitVec;
389 ///
390 /// let sequence = [0b1010u64, 0b1100u64, 0b1111u64];
391 /// let bv = BitVec::pack_sequence_u64(&sequence, 4);
392 ///
393 /// assert_eq!(bv.len(), 12);
394 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
395 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
396 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
397 /// ```
398 ///
399 /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
400 /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
401 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
402 #[must_use]
403 pub fn pack_sequence_u64(sequence: &[u64], bits_per_element: usize) -> Self {
404 Self::pack_bits::<_, 64>(sequence, bits_per_element)
405 }
406
407 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
408 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
409 /// The number of bits per element is given by `bits_per_element`.
410 /// The sequence is given as a slice of u32 values.
411 /// If the number of bits per element is smaller than 32, the function takes the
412 /// least significant bits of each element, and discards the rest.
413 /// If the number of bits per element is larger than 32, the function will pad the elements
414 /// with zeros.
415 /// The function will append the bits of each element to the bit vector in the order they are
416 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
417 ///
418 /// See also: [`pack_sequence_u64`], [`pack_sequence_u16`], [`pack_sequence_u8`]
419 ///
420 /// # Example
421 /// ```rust
422 /// use vers_vecs::BitVec;
423 ///
424 /// let sequence = [0b1010u32, 0b1100u32, 0b1111u32];
425 /// let bv = BitVec::pack_sequence_u32(&sequence, 4);
426 ///
427 /// assert_eq!(bv.len(), 12);
428 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
429 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
430 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
431 /// ```
432 ///
433 /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
434 /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
435 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
436 #[must_use]
437 pub fn pack_sequence_u32(sequence: &[u32], bits_per_element: usize) -> Self {
438 Self::pack_bits::<_, 32>(sequence, bits_per_element)
439 }
440
441 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
442 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
443 /// The number of bits per element is given by `bits_per_element`.
444 /// The sequence is given as a slice of u16 values.
445 /// If the number of bits per element is smaller than 16, the function takes the
446 /// least significant bits of each element, and discards the rest.
447 /// If the number of bits per element is larger than 16, the function will pad the elements
448 /// with zeros.
449 /// The function will append the bits of each element to the bit vector in the order they are
450 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
451 ///
452 /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u8`]
453 ///
454 /// # Example
455 /// ```rust
456 /// use vers_vecs::BitVec;
457 ///
458 /// let sequence = [0b1010u16, 0b1100u16, 0b1111u16];
459 /// let bv = BitVec::pack_sequence_u16(&sequence, 4);
460 ///
461 /// assert_eq!(bv.len(), 12);
462 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
463 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
464 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
465 /// ```
466 ///
467 /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
468 /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
469 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
470 #[must_use]
471 pub fn pack_sequence_u16(sequence: &[u16], bits_per_element: usize) -> Self {
472 Self::pack_bits::<_, 16>(sequence, bits_per_element)
473 }
474
475 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
476 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
477 /// The number of bits per element is given by `bits_per_element`.
478 /// The sequence is given as a slice of u8 values.
479 /// If the number of bits per element is smaller than 8, the function takes the
480 /// least significant bits of each element, and discards the rest.
481 /// If the number of bits per element is larger than 8, the function will pad the elements
482 /// with zeros.
483 /// The function will append the bits of each element to the bit vector in the order they are
484 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
485 ///
486 /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u16`]
487 ///
488 /// # Example
489 /// ```rust
490 /// use vers_vecs::BitVec;
491 ///
492 /// let sequence = [0b1010u8, 0b1100u8, 0b1111u8];
493 /// let bv = BitVec::pack_sequence_u8(&sequence, 4);
494 ///
495 /// assert_eq!(bv.len(), 12);
496 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
497 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
498 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
499 /// ```
500 ///
501 /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
502 /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
503 /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
504 #[must_use]
505 pub fn pack_sequence_u8(sequence: &[u8], bits_per_element: usize) -> Self {
506 Self::pack_bits::<_, 8>(sequence, bits_per_element)
507 }
508
509 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
510 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
511 /// The number of bits per element is given by `bits_per_element`.
512 /// The sequence is given as an iterator of u64 values.
513 /// If the number of bits per element is smaller than 64, the function takes the
514 /// least significant bits of each element, and discards the rest.
515 /// If the number of bits per element is larger than 64, the function will pad the elements
516 /// with zeros.
517 /// The function will append the bits of each element to the bit vector in the order they are
518 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
519 ///
520 /// See also: [`pack_from_iter_u32`], [`pack_from_iter_u16`], [`pack_from_iter_u8`], or the
521 /// functions to pack from a slice: [`pack_sequence_u64`]
522 ///
523 /// # Example
524 /// ```rust
525 /// use vers_vecs::BitVec;
526 ///
527 /// let sequence = [0b1010u64, 0b1100u64, 0b1111u64];
528 /// let bv = BitVec::pack_from_iter_u64(sequence.into_iter(), 4);
529 ///
530 /// assert_eq!(bv.len(), 12);
531 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
532 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
533 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
534 /// ```
535 ///
536 /// [`pack_from_iter_u32`]: BitVec::pack_from_iter_u32
537 /// [`pack_from_iter_u16`]: BitVec::pack_from_iter_u16
538 /// [`pack_from_iter_u8`]: BitVec::pack_from_iter_u8
539 /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
540 pub fn pack_from_iter_u64<I: IntoIterator<Item = u64>>(
541 iter: I,
542 bits_per_element: usize,
543 ) -> Self {
544 Self::pack_bits_iter::<_, _, 64>(iter, bits_per_element)
545 }
546
547 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
548 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
549 /// The number of bits per element is given by `bits_per_element`.
550 /// The sequence is given as an iterator of u32 values.
551 /// If the number of bits per element is smaller than 32, the function takes the
552 /// least significant bits of each element, and discards the rest.
553 /// If the number of bits per element is larger than 32, the function will pad the elements
554 /// with zeros.
555 /// The function will append the bits of each element to the bit vector in the order they are
556 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
557 ///
558 /// See also: [`pack_from_iter_u64`], [`pack_from_iter_u16`], [`pack_from_iter_u8`], or the
559 /// functions to pack from a slice: [`pack_sequence_u32`]
560 ///
561 /// # Example
562 /// ```rust
563 /// use vers_vecs::BitVec;
564 ///
565 /// let sequence = [0b1010u32, 0b1100u32, 0b1111u32];
566 /// let bv = BitVec::pack_from_iter_u32(sequence.into_iter(), 4);
567 ///
568 /// assert_eq!(bv.len(), 12);
569 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
570 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
571 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
572 /// ```
573 ///
574 /// [`pack_from_iter_u64`]: BitVec::pack_from_iter_u64
575 /// [`pack_from_iter_u16`]: BitVec::pack_from_iter_u16
576 /// [`pack_from_iter_u8`]: BitVec::pack_from_iter_u8
577 /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
578 pub fn pack_from_iter_u32<I: IntoIterator<Item = u32>>(
579 iter: I,
580 bits_per_element: usize,
581 ) -> Self {
582 Self::pack_bits_iter::<_, _, 32>(iter, bits_per_element)
583 }
584
585 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
586 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
587 /// The number of bits per element is given by `bits_per_element`.
588 /// The sequence is given as an iterator of u16 values.
589 /// If the number of bits per element is smaller than 16, the function takes the
590 /// least significant bits of each element, and discards the rest.
591 /// If the number of bits per element is larger than 16, the function will pad the elements
592 /// with zeros.
593 /// The function will append the bits of each element to the bit vector in the order they are
594 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
595 ///
596 /// See also: [`pack_from_iter_u64`], [`pack_from_iter_u32`], [`pack_from_iter_u8`], or the
597 /// functions to pack from a slice: [`pack_sequence_u16`]
598 ///
599 /// # Example
600 /// ```rust
601 /// use vers_vecs::BitVec;
602 ///
603 /// let sequence = [0b1010u16, 0b1100u16, 0b1111u16];
604 /// let bv = BitVec::pack_from_iter_u16(sequence.into_iter(), 4);
605 ///
606 /// assert_eq!(bv.len(), 12);
607 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
608 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
609 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
610 /// ```
611 ///
612 /// [`pack_from_iter_u64`]: BitVec::pack_from_iter_u64
613 /// [`pack_from_iter_u32`]: BitVec::pack_from_iter_u32
614 /// [`pack_from_iter_u8`]: BitVec::pack_from_iter_u8
615 /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
616 pub fn pack_from_iter_u16<I: IntoIterator<Item = u16>>(
617 iter: I,
618 bits_per_element: usize,
619 ) -> Self {
620 Self::pack_bits_iter::<_, _, 16>(iter, bits_per_element)
621 }
622
623 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
624 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
625 /// The number of bits per element is given by `bits_per_element`.
626 /// The sequence is given as an iterator of u8 values.
627 /// If the number of bits per element is smaller than 8, the function takes the
628 /// least significant bits of each element, and discards the rest.
629 /// If the number of bits per element is larger than 8, the function will pad the elements
630 /// with zeros.
631 /// The function will append the bits of each element to the bit vector in the order they are
632 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
633 ///
634 /// See also: [`pack_from_iter_u64`], [`pack_from_iter_u32`], [`pack_from_iter_u16`], or the
635 /// functions to pack from a slice: [`pack_sequence_u8`]
636 ///
637 /// # Example
638 /// ```rust
639 /// use vers_vecs::BitVec;
640 ///
641 /// let sequence = [0b1010u8, 0b1100u8, 0b1111u8];
642 /// let bv = BitVec::pack_from_iter_u8(sequence.into_iter(), 4);
643 ///
644 /// assert_eq!(bv.len(), 12);
645 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
646 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
647 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
648 /// ```
649 ///
650 /// [`pack_from_iter_u64`]: BitVec::pack_from_iter_u64
651 /// [`pack_from_iter_u32`]: BitVec::pack_from_iter_u32
652 /// [`pack_from_iter_u16`]: BitVec::pack_from_iter_u16
653 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
654 pub fn pack_from_iter_u8<I: IntoIterator<Item = u8>>(iter: I, bits_per_element: usize) -> Self {
655 Self::pack_bits_iter::<_, _, 8>(iter, bits_per_element)
656 }
657
658 /// Construct a bit vector from bits given as boolean values in a slice.
659 /// The function will append the bits to the bit vector in the order they are
660 /// given in the sequence (i.e. the first bool is the first bit of the vector).
661 ///
662 /// # Example
663 /// ```rust
664 /// use vers_vecs::BitVec;
665 ///
666 /// let sequence = [true, false, true, true];
667 /// let bv = BitVec::from_bools(&sequence);
668 ///
669 /// assert_eq!(bv.len(), 4);
670 /// assert_eq!(bv.is_bit_set(0), Some(true));
671 /// assert_eq!(bv.is_bit_set(1), Some(false));
672 /// assert_eq!(bv.is_bit_set(2), Some(true));
673 /// assert_eq!(bv.is_bit_set(3), Some(true));
674 /// ```
675 pub fn from_bools(bools: &[bool]) -> Self {
676 let mut bv = BitVec::with_capacity(bools.len());
677 bools.iter().for_each(|&b| bv.append(b));
678 bv
679 }
680
681 /// Construct a bit vector from bits given as boolean values from an iterator.
682 /// The function will append the bits to the bit vector in the order they are
683 /// given in the sequence (i.e. the first bool is the first bit of the vector).
684 ///
685 /// # Example
686 /// ```rust
687 /// use vers_vecs::BitVec;
688 ///
689 /// let sequence = [true, false, true, true];
690 /// let bv = BitVec::from_bool_iter(sequence.into_iter());
691 ///
692 /// assert_eq!(bv.len(), 4);
693 /// assert_eq!(bv.is_bit_set(0), Some(true));
694 /// assert_eq!(bv.is_bit_set(1), Some(false));
695 /// assert_eq!(bv.is_bit_set(2), Some(true));
696 /// assert_eq!(bv.is_bit_set(3), Some(true));
697 /// ```
698 pub fn from_bool_iter<I: IntoIterator<Item = bool>>(iter: I) -> Self {
699 let mut bv = BitVec::new();
700 iter.into_iter().for_each(|b| bv.append(b));
701 bv
702 }
703
704 /// Append a bit encoded as a `bool` to the bit vector, where `true` means 1 and `false` means 0.
705 ///
706 /// See also: [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
707 ///
708 /// # Example
709 ///
710 /// ```rust
711 /// use vers_vecs::BitVec;
712 ///
713 /// let mut bv = BitVec::new();
714 /// bv.append(true);
715 ///
716 /// assert_eq!(bv.len(), 1);
717 /// assert_eq!(bv.get(0), Some(1));
718 /// ```
719 ///
720 /// [`append_bit`]: BitVec::append_bit
721 /// [`append_bit_u32`]: BitVec::append_bit_u32
722 /// [`append_bit_u16`]: BitVec::append_bit_u16
723 /// [`append_bit_u8`]: BitVec::append_bit_u8
724 /// [`append_word`]: BitVec::append_word
725 pub fn append(&mut self, bit: bool) {
726 if self.len.is_multiple_of(WORD_SIZE) {
727 self.data.push(0);
728 }
729 if bit {
730 self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
731 } else {
732 self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
733 }
734 self.len += 1;
735 }
736
737 /// Drop the last n bits from the bit vector. If more bits are dropped than the bit vector
738 /// contains, the bit vector is cleared.
739 ///
740 /// # Example
741 ///
742 /// ```rust
743 /// use vers_vecs::BitVec;
744 ///
745 /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
746 /// bv.drop_last(3);
747 ///
748 /// assert_eq!(bv.len(), 3);
749 /// assert_eq!(bv.get_bits(0, 3), Some(0b101u64));
750 ///
751 /// bv.drop_last(4);
752 ///
753 /// assert!(bv.is_empty());
754 /// ```
755 pub fn drop_last(&mut self, n: usize) {
756 if n > self.len {
757 self.data.clear();
758 self.len = 0;
759 return;
760 }
761
762 let new_limb_count = (self.len - n).div_ceil(WORD_SIZE);
763
764 // cut off limbs that we no longer need
765 if new_limb_count < self.data.len() {
766 self.data.truncate(new_limb_count);
767 }
768
769 // update bit vector length
770 self.len -= n;
771 }
772
773 /// Append a bit encoded in a u64.
774 /// The least significant bit is appended to the bit vector.
775 /// All other bits are ignored.
776 ///
777 /// See also: [`append`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
778 ///
779 /// # Example
780 ///
781 /// ```rust
782 /// use vers_vecs::BitVec;
783 ///
784 /// let mut bv = BitVec::new();
785 ///
786 /// bv.append_bit(1);
787 /// bv.append_bit(0);
788 ///
789 /// assert_eq!(bv.len(), 2);
790 /// assert_eq!(bv.get(0), Some(1));
791 /// assert_eq!(bv.get(1), Some(0));
792 /// ```
793 ///
794 /// [`append`]: BitVec::append
795 /// [`append_bit_u32`]: BitVec::append_bit_u32
796 /// [`append_bit_u16`]: BitVec::append_bit_u16
797 /// [`append_bit_u8`]: BitVec::append_bit_u8
798 /// [`append_word`]: BitVec::append_word
799 pub fn append_bit(&mut self, bit: u64) {
800 if self.len.is_multiple_of(WORD_SIZE) {
801 self.data.push(0);
802 }
803 if bit % 2 == 1 {
804 self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
805 } else {
806 self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
807 }
808
809 self.len += 1;
810 }
811
812 /// Append a bit from a u32. The least significant bit is appended to the bit vector.
813 /// All other bits are ignored.
814 ///
815 /// See also: [`append`], [`append_bit`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
816 ///
817 /// [`append`]: BitVec::append
818 /// [`append_bit`]: BitVec::append_bit
819 /// [`append_bit_u16`]: BitVec::append_bit_u16
820 /// [`append_bit_u8`]: BitVec::append_bit_u8
821 /// [`append_word`]: BitVec::append_word
822 pub fn append_bit_u32(&mut self, bit: u32) {
823 self.append_bit(u64::from(bit));
824 }
825
826 /// Append a bit from a u16. The least significant bit is appended to the bit vector.
827 /// All other bits are ignored.
828 ///
829 /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u8`], [`append_word`]
830 ///
831 /// [`append`]: BitVec::append
832 /// [`append_bit`]: BitVec::append_bit
833 /// [`append_bit_u32`]: BitVec::append_bit_u32
834 /// [`append_bit_u8`]: BitVec::append_bit_u8
835 /// [`append_word`]: BitVec::append_word
836 pub fn append_bit_u16(&mut self, bit: u16) {
837 self.append_bit(u64::from(bit));
838 }
839
840 /// Append a bit from a u8. The least significant bit is appended to the bit vector.
841 /// All other bits are ignored.
842 ///
843 /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_word`]
844 ///
845 /// [`append`]: BitVec::append
846 /// [`append_bit`]: BitVec::append_bit
847 /// [`append_bit_u32`]: BitVec::append_bit_u32
848 /// [`append_bit_u16`]: BitVec::append_bit_u16
849 /// [`append_word`]: BitVec::append_word
850 pub fn append_bit_u8(&mut self, bit: u8) {
851 self.append_bit(u64::from(bit));
852 }
853
854 /// Append a word to the bit vector. The bits are appended in little endian order (i.e. the first
855 /// bit of the word is appended first).
856 ///
857 /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`]
858 ///
859 /// # Example
860 ///
861 /// ```rust
862 /// use vers_vecs::BitVec;
863 ///
864 /// let mut bv = BitVec::new();
865 /// bv.append_word(0b1010_1010_1010_1010u64);
866 ///
867 /// assert_eq!(bv.len(), 64);
868 /// for i in 0..64 {
869 /// assert_eq!(bv.get(i), Some((0b1010_1010_1010_1010u64 >> i) & 1));
870 /// }
871 /// ```
872 ///
873 /// [`append`]: BitVec::append
874 /// [`append_bit`]: BitVec::append_bit
875 /// [`append_bit_u32`]: BitVec::append_bit_u32
876 /// [`append_bit_u16`]: BitVec::append_bit_u16
877 /// [`append_bit_u8`]: BitVec::append_bit_u8
878 pub fn append_word(&mut self, word: u64) {
879 if self.len.is_multiple_of(WORD_SIZE) {
880 self.data.push(word);
881 } else {
882 // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
883 self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
884 self.data[self.len / WORD_SIZE] |= word << (self.len % WORD_SIZE);
885
886 self.data.push(word >> (WORD_SIZE - self.len % WORD_SIZE));
887 }
888 self.len += WORD_SIZE;
889 }
890
891 /// Append multiple bits to the bit vector.
892 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
893 /// The number of bits to append is given by `len`. The bits are taken from the least
894 /// significant bits of `bits`.
895 /// All other bits are ignored.
896 ///
897 /// # Example
898 ///
899 /// ```rust
900 /// use vers_vecs::BitVec;
901 ///
902 /// let mut bv = BitVec::new();
903 /// bv.append_bits(0b1010_1010_1010_1010u64, 16);
904 ///
905 /// assert_eq!(bv.len(), 16);
906 /// assert_eq!(bv.get_bits(0, 16), Some(0b1010_1010_1010_1010u64));
907 /// ```
908 ///
909 /// # Panics
910 /// Panics if `len` is larger than 64.
911 pub fn append_bits(&mut self, bits: u64, len: usize) {
912 assert!(len <= 64, "Cannot append more than 64 bits");
913
914 if self.len.is_multiple_of(WORD_SIZE) {
915 self.data.push(bits);
916 } else {
917 // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
918 self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
919 self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
920
921 if self.len % WORD_SIZE + len > WORD_SIZE {
922 self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
923 }
924 }
925 self.len += len;
926 }
927
928 /// Append multiple bits to the bit vector.
929 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
930 /// The number of bits to append is given by `len`. The bits are taken from the least
931 /// significant bits of `bits`.
932 ///
933 /// This function does not check if `len` is larger than 64.
934 ///
935 /// Furthermore, if the bit-vector has trailing bits that are not zero
936 /// (i.e. the length is not a multiple of 64, and those bits are partially set),
937 /// the function will OR the new data with the trailing bits, destroying the appended data.
938 /// This can happen, if a call `append_bits[_unchecked](word, len)` appends a word which has
939 /// set bits beyond the `len - 1`-th bit,
940 /// or if bits have been dropped from the bit vector using [`drop_last`].
941 ///
942 /// See [`append_bits`] for a checked version of this function.
943 ///
944 /// # Panics
945 /// If `len` is larger than 64, the behavior is platform-dependent, and a processor
946 /// exception might be triggered.
947 ///
948 /// [`append_bits`]: BitVec::append_bits
949 /// [`drop_last`]: BitVec::drop_last
950 pub fn append_bits_unchecked(&mut self, bits: u64, len: usize) {
951 if self.len.is_multiple_of(WORD_SIZE) {
952 self.data.push(bits);
953 } else {
954 self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
955
956 if self.len % WORD_SIZE + len > WORD_SIZE {
957 self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
958 }
959 }
960 self.len += len;
961 }
962
963 /// Append the bits of another bit vector to the end of this vector.
964 /// If this vector does not contain a multiple of 64 bits, the appended limbs need to be
965 /// shifted to the left.
966 /// This function is guaranteed to reallocate the underlying vector at most once.
967 pub fn extend_bitvec(&mut self, other: &Self) {
968 // reserve space for the new bits, ensuring at most one re-allocation
969 self.data
970 .reserve((self.len + other.len).div_ceil(WORD_SIZE) - self.data.len());
971
972 let full_limbs = other.len() / WORD_SIZE;
973 for i in 0..full_limbs {
974 self.append_bits(other.data[i], WORD_SIZE);
975 }
976
977 let partial_bits = other.len % WORD_SIZE;
978 if partial_bits > 0 {
979 self.append_bits(other.data[full_limbs], partial_bits);
980 }
981 }
982
983 /// Return the length of the bit vector. The length is measured in bits.
984 #[must_use]
985 pub fn len(&self) -> usize {
986 self.len
987 }
988
989 /// Return whether the bit vector is empty (contains no bits).
990 #[must_use]
991 pub fn is_empty(&self) -> bool {
992 self.len == 0
993 }
994
995 /// Flip the bit at the given position.
996 ///
997 /// # Example
998 ///
999 /// ```rust
1000 /// use vers_vecs::BitVec;
1001 ///
1002 /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
1003 /// bv.flip_bit(1);
1004 ///
1005 /// assert_eq!(bv.len(), 6);
1006 /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
1007 /// ```
1008 ///
1009 /// # Panics
1010 /// If the position is larger than the length of the vector, the function panics.
1011 pub fn flip_bit(&mut self, pos: usize) {
1012 assert!(pos < self.len, "Index out of bounds");
1013 self.flip_bit_unchecked(pos);
1014 }
1015
1016 /// Flip the bit at the given position.
1017 ///
1018 /// See also: [`flip_bit`]
1019 ///
1020 /// # Panics
1021 /// If the position is larger than the length of the
1022 /// vector, the function will either modify unused memory or panic.
1023 /// This will not corrupt memory.
1024 ///
1025 /// [`flip_bit`]: BitVec::flip_bit
1026 pub fn flip_bit_unchecked(&mut self, pos: usize) {
1027 self.data[pos / WORD_SIZE] ^= 1 << (pos % WORD_SIZE);
1028 }
1029
1030 /// Return the bit at the given position.
1031 /// The bit is encoded in the least significant bit of a u64 value.
1032 /// If the position is larger than the length of the vector, None is returned.
1033 ///
1034 /// See also: [`get_unchecked`]
1035 ///
1036 /// # Example
1037 ///
1038 /// ```rust
1039 /// use vers_vecs::BitVec;
1040 ///
1041 /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
1042 ///
1043 /// assert_eq!(bv.get(1), Some(0));
1044 /// assert_eq!(bv.get(2), Some(1));
1045 /// ```
1046 ///
1047 /// [`get_unchecked`]: Self::get_unchecked
1048 #[must_use]
1049 pub fn get(&self, pos: usize) -> Option<u64> {
1050 if pos >= self.len {
1051 None
1052 } else {
1053 Some(self.get_unchecked(pos))
1054 }
1055 }
1056
1057 /// Return the bit at the given position.
1058 /// The bit is encoded in the least significant bit of a u64 value.
1059 ///
1060 /// # Panics
1061 /// If the position is larger than the length of the vector,
1062 /// the function will either return unpredictable data, or panic.
1063 /// Use [`get`] to properly handle this case with an `Option`.
1064 ///
1065 /// [`get`]: BitVec::get
1066 #[must_use]
1067 pub fn get_unchecked(&self, pos: usize) -> u64 {
1068 (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
1069 }
1070
1071 /// Set the bit at the given position.
1072 /// The bit is encoded in the least significant bit of a u64 value.
1073 ///
1074 /// See also: [`set_unchecked`]
1075 ///
1076 /// # Example
1077 ///
1078 /// ```rust
1079 /// use vers_vecs::BitVec;
1080 ///
1081 /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
1082 /// bv.set(1, 1).unwrap();
1083 ///
1084 /// assert_eq!(bv.len(), 6);
1085 /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
1086 /// ```
1087 ///
1088 /// # Errors
1089 /// If the position is out of range, the function will return `Err` with an error message,
1090 /// otherwise it will return an empty `Ok`.
1091 ///
1092 /// [`set_unchecked`]: BitVec::set_unchecked
1093 pub fn set(&mut self, pos: usize, value: u64) -> Result<(), &str> {
1094 if pos >= self.len {
1095 Err("out of range")
1096 } else {
1097 self.set_unchecked(pos, value);
1098 Ok(())
1099 }
1100 }
1101
1102 /// Set the bit at the given position.
1103 /// The bit is encoded in the least significant bit of a u64 value.
1104 ///
1105 /// # Panics
1106 /// If the position is larger than the length of the vector,
1107 /// the function will either do nothing, or panic.
1108 /// Use [`set`] to properly handle this case with a `Result`.
1109 ///
1110 /// [`set`]: BitVec::set
1111 pub fn set_unchecked(&mut self, pos: usize, value: u64) {
1112 self.data[pos / WORD_SIZE] = (self.data[pos / WORD_SIZE] & !(0x1 << (pos % WORD_SIZE)))
1113 | ((value & 0x1) << (pos % WORD_SIZE));
1114 }
1115
1116 /// Return whether the bit at the given position is set.
1117 /// If the position is larger than the length of the vector, None is returned.
1118 ///
1119 /// See also: [`is_bit_set_unchecked`]
1120 ///
1121 /// # Example
1122 ///
1123 /// ```rust
1124 /// use vers_vecs::BitVec;
1125 ///
1126 /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
1127 ///
1128 /// assert!(!bv.is_bit_set(1).unwrap());
1129 /// assert!(bv.is_bit_set(2).unwrap());
1130 /// ```
1131 ///
1132 /// [`is_bit_set_unchecked`]: BitVec::is_bit_set_unchecked
1133 #[must_use]
1134 pub fn is_bit_set(&self, pos: usize) -> Option<bool> {
1135 if pos >= self.len {
1136 None
1137 } else {
1138 Some(self.is_bit_set_unchecked(pos))
1139 }
1140 }
1141
1142 /// Return whether the bit at the given position is set.
1143 ///
1144 /// # Panics
1145 /// If the position is larger than the length of the vector,
1146 /// the function will either return unpredictable data, or panic.
1147 /// Use [`is_bit_set`] to properly handle this case with an `Option`.
1148 ///
1149 /// [`is_bit_set`]: BitVec::is_bit_set
1150 #[must_use]
1151 pub fn is_bit_set_unchecked(&self, pos: usize) -> bool {
1152 self.get_unchecked(pos) != 0
1153 }
1154
1155 /// Return multiple bits at the given position.
1156 /// The number of bits to return is given by `len`.
1157 /// At most 64 bits can be returned.
1158 /// If the position at the end of the query is larger than the length of the vector,
1159 /// None is returned (even if the query partially overlaps with the vector).
1160 /// If the length of the query is larger than 64, None is returned.
1161 ///
1162 /// The first bit at `pos` is the most significant bit of the return value
1163 /// limited to `len` bits.
1164 #[must_use]
1165 pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
1166 if len > WORD_SIZE || len == 0 {
1167 return None;
1168 }
1169 if pos + len > self.len {
1170 None
1171 } else {
1172 Some(self.get_bits_unchecked(pos, len))
1173 }
1174 }
1175
1176 /// Return multiple bits at the given position. The number of bits to return is given by `len`.
1177 /// At most 64 bits can be returned.
1178 ///
1179 /// This function is always inlined, because it gains a lot from loop optimization and
1180 /// can utilize the processor pre-fetcher better if it is.
1181 ///
1182 /// # Errors
1183 /// If the length of the query is larger than 64, unpredictable data will be returned.
1184 /// Use [`get_bits`] to avoid this.
1185 ///
1186 /// # Panics
1187 /// If the position or interval is larger than the length of the vector,
1188 /// the function will either return any valid results padded with unpredictable
1189 /// data or panic.
1190 ///
1191 /// [`get_bits`]: BitVec::get_bits
1192 #[must_use]
1193 #[allow(clippy::inline_always)]
1194 #[allow(clippy::comparison_chain)] // readability
1195 #[inline(always)] // inline to gain loop optimization and pipeline advantages for elias fano
1196 #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen
1197 pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
1198 debug_assert!(len <= WORD_SIZE);
1199 let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
1200 if pos % WORD_SIZE + len <= WORD_SIZE {
1201 partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
1202 } else {
1203 (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
1204 & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
1205 }
1206 }
1207
1208 /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
1209 /// `index`. The number of bits per encoded element is given by `n`.
1210 ///
1211 /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
1212 /// and is equivalent to calling [`get_bits(index * n, n)`].
1213 /// It is thus safe to use this method with any index and any size n <= 64.
1214 ///
1215 /// If the element is out of bounds, None is returned.
1216 /// The element is returned as a u64 value.
1217 ///
1218 /// # Example
1219 /// ```rust
1220 /// use vers_vecs::BitVec;
1221 ///
1222 /// let sequence = [10, 100, 124, 45, 223];
1223 /// let bv = BitVec::pack_sequence_u64(&sequence, 8);
1224 ///
1225 /// assert_eq!(bv.unpack_element(0, 8), Some(10));
1226 /// assert_eq!(bv.unpack_element(2, 8), Some(124));
1227 /// ```
1228 ///
1229 /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1230 /// [`get_bits(index * n, n)`]: BitVec::get_bits
1231 #[must_use]
1232 #[allow(clippy::inline_always)]
1233 #[inline(always)] // to gain optimization if n is constant
1234 pub fn unpack_element(&self, index: usize, n: usize) -> Option<u64> {
1235 self.get_bits(index * n, n)
1236 }
1237
1238 /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
1239 /// `index`. The number of bits per encoded element is given by `n`.
1240 ///
1241 /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
1242 /// and is equivalent to calling [`get_bits_unchecked(index * n, n)`].
1243 /// It is thus safe to use this method with any index where `index * n + n` is in-bounds,
1244 /// and any size n <= 64.
1245 ///
1246 /// # Panics
1247 /// If the element is out of bounds, the function will either return unpredictable data or panic.
1248 /// Use [`unpack_element`] for a checked version of this function.
1249 ///
1250 /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1251 /// [`get_bits_unchecked(index * n, n)`]: BitVec::get_bits_unchecked
1252 /// [`unpack_element`]: BitVec::unpack_element
1253 #[must_use]
1254 #[allow(clippy::inline_always)]
1255 #[inline(always)] // to gain optimization if n is constant
1256 pub fn unpack_element_unchecked(&self, index: usize, n: usize) -> u64 {
1257 self.get_bits_unchecked(index * n, n)
1258 }
1259
1260 /// Return the number of ones in the bit vector. Since the bit vector doesn't store additional
1261 /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1262 ///
1263 /// [`RsVec`]: crate::RsVec
1264 #[must_use]
1265 #[allow(clippy::missing_panics_doc)] // can't panic because of manual bounds check
1266 pub fn count_ones(&self) -> u64 {
1267 let mut ones: u64 = self.data[0..self.len / WORD_SIZE]
1268 .iter()
1269 .map(|limb| u64::from(limb.count_ones()))
1270 .sum();
1271 if !self.len.is_multiple_of(WORD_SIZE) {
1272 ones += u64::from(
1273 (self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1)).count_ones(),
1274 );
1275 }
1276 ones
1277 }
1278
1279 /// Return the number of zeros in the bit vector. Since the bit vector doesn't store additional
1280 /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1281 /// This method calls [`count_ones`].
1282 ///
1283 /// [`RsVec`]: crate::RsVec
1284 /// [`count_ones`]: BitVec::count_ones
1285 #[must_use]
1286 pub fn count_zeros(&self) -> u64 {
1287 self.len as u64 - self.count_ones()
1288 }
1289
1290 /// Mask this bit vector with another bitvector using bitwise or. The mask is applied lazily
1291 /// whenever an operation on the resulting vector is performed.
1292 ///
1293 /// # Errors
1294 /// Returns an error if the length of the vector doesn't match the mask length.
1295 #[inline]
1296 pub fn mask_or<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1297 MaskedBitVec::new(self, mask, |a, b| a | b)
1298 }
1299
1300 /// Mask this bit vector with another bitvector using bitwise or.
1301 /// The mask is applied immediately, unlike in [`mask_or`].
1302 ///
1303 /// # Errors
1304 /// Returns an error if the length of the vector doesn't match the mask length.
1305 ///
1306 /// [`mask_or`]: BitVec::mask_or
1307 pub fn apply_mask_or(&mut self, mask: &BitVec) -> Result<(), String> {
1308 if self.len != mask.len {
1309 return Err(String::from(
1310 "mask cannot have different length than vector",
1311 ));
1312 }
1313
1314 for i in 0..self.data.len() {
1315 self.data[i] |= mask.data[i];
1316 }
1317
1318 Ok(())
1319 }
1320
1321 /// Mask this bit vector with another bitvector using bitwise and. The mask is applied lazily
1322 /// whenever an operation on the resulting vector is performed.
1323 ///
1324 /// # Errors
1325 /// Returns an error if the length of the vector doesn't match the mask length.
1326 #[inline]
1327 pub fn mask_and<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1328 MaskedBitVec::new(self, mask, |a, b| a & b)
1329 }
1330
1331 /// Mask this bit vector with another bitvector using bitwise and.
1332 /// The mask is applied immediately, unlike in [`mask_and`].
1333 ///
1334 /// # Errors
1335 /// Returns an error if the length of the vector doesn't match the mask length.
1336 ///
1337 /// [`mask_and`]: BitVec::mask_and
1338 pub fn apply_mask_and(&mut self, mask: &BitVec) -> Result<(), String> {
1339 if self.len != mask.len {
1340 return Err(String::from(
1341 "mask cannot have different length than vector",
1342 ));
1343 }
1344
1345 for i in 0..self.data.len() {
1346 self.data[i] &= mask.data[i];
1347 }
1348
1349 Ok(())
1350 }
1351
1352 /// Mask this bit vector with another bitvector using bitwise xor. The mask is applied lazily
1353 /// whenever an operation on the resulting vector is performed.
1354 ///
1355 /// # Errors
1356 /// Returns an error if the length of the vector doesn't match the mask length.
1357 #[inline]
1358 pub fn mask_xor<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1359 MaskedBitVec::new(self, mask, |a, b| a ^ b)
1360 }
1361
1362 /// Mask this bit vector with another bitvector using bitwise xor.
1363 /// The mask is applied immediately, unlike in [`mask_xor`].
1364 ///
1365 /// # Errors
1366 /// Returns an error if the length of the vector doesn't match the mask length.
1367 ///
1368 /// [`mask_xor`]: BitVec::mask_xor
1369 pub fn apply_mask_xor(&mut self, mask: &BitVec) -> Result<(), String> {
1370 if self.len != mask.len {
1371 return Err(String::from(
1372 "mask cannot have different length than vector",
1373 ));
1374 }
1375
1376 for i in 0..self.data.len() {
1377 self.data[i] ^= mask.data[i];
1378 }
1379
1380 Ok(())
1381 }
1382
1383 /// Mask this bit vector with another bitvector using a custom masking operation. The mask is
1384 /// applied lazily whenever an operation on the resulting vector is performed.
1385 ///
1386 /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1387 /// The last block of a bit vector might contain fewer bits, and will be padded with
1388 /// unpredictable data. Implementations may choose to modify those padding bits without
1389 /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1390 /// within the vector is unspecified.
1391 ///
1392 /// # Errors
1393 /// Returns an error if the length of the vector doesn't match the mask length.
1394 #[inline]
1395 pub fn mask_custom<'s, 'b, F>(
1396 &'s self,
1397 mask: &'b BitVec,
1398 mask_op: F,
1399 ) -> Result<MaskedBitVec<'s, 'b, F>, String>
1400 where
1401 F: Fn(u64, u64) -> u64,
1402 {
1403 MaskedBitVec::new(self, mask, mask_op)
1404 }
1405
1406 /// Mask this bit vector with another bitvector using a custom masking operation.
1407 /// The mask is applied immediately, unlike in [`mask_custom`].
1408 ///
1409 /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1410 /// The last block of a bit vector might contain fewer bits, and will be padded with
1411 /// unpredictable data. Implementations may choose to modify those padding bits without
1412 /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1413 /// within the vector is unspecified.
1414 ///
1415 /// # Errors
1416 /// Returns an error if the length of the vector doesn't match the mask length.
1417 ///
1418 /// [`mask_custom`]: BitVec::mask_custom
1419 #[inline]
1420 pub fn apply_mask_custom(
1421 &mut self,
1422 mask: &BitVec,
1423 mask_op: fn(u64, u64) -> u64,
1424 ) -> Result<(), String> {
1425 if self.len != mask.len {
1426 return Err(String::from(
1427 "mask cannot have different length than vector",
1428 ));
1429 }
1430
1431 for i in 0..self.data.len() {
1432 self.data[i] = mask_op(self.data[i], mask.data[i]);
1433 }
1434
1435 Ok(())
1436 }
1437
1438 /// Returns the number of bytes on the heap for this vector.
1439 /// Does not include allocated memory that isn't used.
1440 #[must_use]
1441 pub fn heap_size(&self) -> usize {
1442 self.data.len() * size_of::<u64>()
1443 }
1444
1445 /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1446 /// right half the remaining bits `at..`. If the split index is larger than the length of the
1447 /// vector, the vector is returned unmodified in an `Err` variant.
1448 ///
1449 /// # Errors
1450 /// If the index is out of bounds, the function will return an error
1451 /// containing the original vector.
1452 ///
1453 /// See also: [`split_at_unchecked`]
1454 ///
1455 /// [`split_at_unchecked`]: Self::split_at_unchecked
1456 pub fn split_at(self, at: usize) -> Result<(Self, Self), Self> {
1457 if at > self.len {
1458 Err(self)
1459 } else {
1460 Ok(self.split_at_unchecked(at))
1461 }
1462 }
1463
1464 /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1465 /// right half the remaining bits `at..`.
1466 ///
1467 /// # Panics
1468 /// If the index is larger than the length of the vector the function will panic or run
1469 /// out of memory.
1470 /// Use [`split_at`] to properly handle this case.
1471 ///
1472 /// [`split_at`]: Self::split_at
1473 #[must_use]
1474 pub fn split_at_unchecked(mut self, at: usize) -> (Self, Self) {
1475 let other_len = self.len - at;
1476 let mut other = Self::with_capacity(other_len);
1477
1478 if other_len == 0 {
1479 return (self, other);
1480 }
1481
1482 let first_limb = at / WORD_SIZE;
1483 let last_limb = self.len / WORD_SIZE;
1484
1485 // First, we figure out the number of bits from the first limb to retain in this vector:
1486 let leading_partial = at % WORD_SIZE;
1487
1488 // If the split point is in the last limb, and the vector ends before the last bit, first_limb
1489 // and last_limb will be equal, and the other half is simply other_len bits off the limb
1490 // right shifted by the number of bits to retain in this vector.
1491 if first_limb == last_limb {
1492 other.append_bits_unchecked(self.data[first_limb] >> leading_partial, other_len);
1493 } else {
1494 // Otherwise, some range n..last_limb should be copied in their entirety to the other half,
1495 // with n=first_limb+1 if the split point is inside the first limb (leading_partial > 0), or
1496 // n=first_limb if the entire first limb belongs in the other half.
1497 let full_limbs = if leading_partial > 0 {
1498 // If the split point is inside the first limb, we also have to remember to copy over
1499 // the trailing bits to the new vector.
1500 other.append_bits_unchecked(
1501 self.data[first_limb] >> leading_partial,
1502 WORD_SIZE - leading_partial,
1503 );
1504 first_limb + 1..last_limb
1505 } else {
1506 first_limb..last_limb
1507 };
1508
1509 // Copy over any full limbs.
1510 for i in full_limbs {
1511 other.append_bits_unchecked(self.data[i], WORD_SIZE);
1512 }
1513
1514 // Finally, if the vector has a partially filled last limb, we need to put those bits
1515 // in the other half.
1516 let trailing_partial = self.len % WORD_SIZE;
1517 if trailing_partial > 0 {
1518 other.append_bits_unchecked(self.data[last_limb], trailing_partial);
1519 }
1520 }
1521
1522 // remove the copied bits from the original vector
1523 self.drop_last(other_len);
1524
1525 (self, other)
1526 }
1527
1528 /// Iterate through the u64 limbs of the bitvector.
1529 /// The limbs are encoded starting at the least significant bit (i.e., the first bit in the bit
1530 /// vector is the least significant bit in the first limb).
1531 /// Note that the last limb may be incomplete, if the number of bits in the vector is not
1532 /// divisible by 64.
1533 /// In this case, the least significant `self.len() % 64` bits are the correct bits.
1534 /// The remaining bits of the last limb are in an unspecified state and code should not rely
1535 /// on them being set or unset.
1536 #[inline]
1537 pub fn iter_limbs(&self) -> impl Iterator<Item = u64> + use<'_> {
1538 self.data.iter().copied()
1539 }
1540}
1541
1542impl_vector_iterator! { BitVec, BitVecIter, BitVecRefIter }
1543
1544/// Create a new bit vector from a slice of u64 values.
1545/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1546/// The function will append the bits of each element to the bit vector in the order they are
1547/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1548impl From<&[u64]> for BitVec {
1549 fn from(data: &[u64]) -> Self {
1550 BitVec::from_limbs(data)
1551 }
1552}
1553
1554/// Create a new bit vector from a slice of u64 values.
1555/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1556/// The function will append the bits of each element to the bit vector in the order they are
1557/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1558impl From<Vec<u64>> for BitVec {
1559 fn from(data: Vec<u64>) -> Self {
1560 BitVec::from_limbs(&data)
1561 }
1562}
1563
1564impl Extend<BitVec> for BitVec {
1565 fn extend<T: IntoIterator<Item = BitVec>>(&mut self, iter: T) {
1566 for v in iter {
1567 self.extend_bitvec(&v)
1568 }
1569 }
1570}
1571
1572impl<'t> Extend<&'t BitVec> for BitVec {
1573 fn extend<T: IntoIterator<Item = &'t BitVec>>(&mut self, iter: T) {
1574 for v in iter {
1575 self.extend_bitvec(v)
1576 }
1577 }
1578}
1579
1580/// Create a new bit vector from u64 values.
1581/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1582/// The function will append the bits of each element to the bit vector in the order they are
1583/// given in the iterator (i.e. the first element takes bits `0..64` of the vector).
1584impl FromIterator<u64> for BitVec {
1585 fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> Self {
1586 BitVec::from_limbs_iter(iter)
1587 }
1588}
1589
1590impl PartialEq for BitVec {
1591 // unlike the auto-derived implementation, this custom implementation ignores junk data at
1592 // the end of the bit vector
1593 fn eq(&self, other: &Self) -> bool {
1594 if self.len != other.len {
1595 return false;
1596 }
1597
1598 if self.len == 0 {
1599 return true;
1600 }
1601
1602 for i in 0..self.data.len() - 1 {
1603 if self.data[i] != other.data[i] {
1604 return false;
1605 }
1606 }
1607
1608 // in last limb, ignore junk data
1609 let mask = (1 << (self.len % WORD_SIZE)) - 1;
1610 if self.data[self.data.len() - 1] & mask != other.data[self.data.len() - 1] & mask {
1611 return false;
1612 }
1613
1614 true
1615 }
1616}
1617
1618impl Eq for BitVec {}
1619
1620impl Hash for BitVec {
1621 fn hash<H: Hasher>(&self, state: &mut H) {
1622 state.write_usize(self.len);
1623 if self.len > 0 {
1624 self.data[0..self.data.len() - 1]
1625 .iter()
1626 .for_each(|x| state.write_u64(*x));
1627 let masked_last_limb = self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1);
1628 state.write_u64(masked_last_limb);
1629 }
1630 }
1631}
1632
1633#[cfg(test)]
1634mod tests;