vers_vecs/bit_vec/mod.rs
1//! This module contains a simple [bit vector][BitVec] implementation with no overhead and a fast succinct
2//! bit vector implementation with [rank and select queries][fast_rs_vec::RsVec].
3
4use crate::bit_vec::mask::MaskedBitVec;
5use crate::util::impl_vector_iterator;
6use std::cmp::min;
7use std::hash::{Hash, Hasher};
8use std::mem::size_of;
9
10pub mod fast_rs_vec;
11
12pub mod sparse;
13
14pub mod mask;
15
16/// Size of a word in bitvectors. All vectors operate on 64-bit words.
17const WORD_SIZE: usize = 64;
18
19/// Type alias for masked bitvectors that implement a simple bitwise binary operation.
20/// The first lifetime is for the bit vector that is being masked, the second lifetime is for the
21/// mask.
22pub type BitMask<'s, 'b> = MaskedBitVec<'s, 'b, fn(u64, u64) -> u64>;
23
24/// A simple bit vector that does not support rank and select queries.
25/// Bits are stored in little-endian order, i.e. the least significant bit is stored first.
26/// The bit vector is stored as a sequence of 64 bit limbs.
27/// The last limb may be partially filled.
28///
29/// The bit vector has a wide range of constructors that allow for easy creation from various
30/// sources.
31/// Among them are constructors for creating an empty vector ([`BitVec::new`]),
32/// creating one from single bits of various integer types ([`BitVec::from_bits`] and variations),
33/// creating limbs from u64 values directly ([`BitVec::from_limbs`] and variations),
34/// or packing a sequence of numerical values into a dense bit sequence
35/// ([`BitVec::pack_sequence_u64`] and variations).
36///
37/// The bit vector can be modified after creation
38/// (e.g. by appending [bits](BitVec::append_bits)
39/// or [words](BitVec::append_word),
40/// [flipping](BitVec::flip_bit),
41/// or [setting](BitVec::set) bits).
42/// Bits can be [accessed](BitVec::get) by position,
43/// and [multiple bits](BitVec::get_bits) can be accessed at once.
44/// Bits can be [dropped](BitVec::drop_last) from the end.
45///
46/// # Example
47/// ```rust
48/// use vers_vecs::{BitVec, RsVec};
49///
50/// let mut bit_vec = BitVec::new();
51/// bit_vec.append_bit(0u64);
52/// bit_vec.append_bit_u32(1u32);
53/// bit_vec.append_word(0b1010_1010_1010_1010u64); // appends exactly 64 bits
54///
55/// assert_eq!(bit_vec.len(), 66);
56/// assert_eq!(bit_vec.get(0), Some(0u64));
57/// assert_eq!(bit_vec.get(1), Some(1u64));
58/// ```
59#[derive(Clone, Debug, Default)]
60#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
61#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemSize, mem_dbg::MemDbg))]
62pub struct BitVec {
63 data: Vec<u64>,
64 len: usize,
65}
66
67impl BitVec {
68 /// Create a new empty bit vector.
69 #[must_use]
70 pub fn new() -> Self {
71 Self::default()
72 }
73
74 /// Create a new empty bit vector with the given capacity.
75 /// The capacity is measured in bits.
76 /// The bit vector will be able to hold at least `capacity` bits without reallocating.
77 /// More memory may be allocated according to the underlying allocation strategy.
78 #[must_use]
79 pub fn with_capacity(capacity: usize) -> Self {
80 Self {
81 data: Vec::with_capacity(capacity / WORD_SIZE + 1),
82 len: 0,
83 }
84 }
85
86 /// Create a new bit vector with all zeros and the given length.
87 /// The length is measured in bits.
88 #[must_use]
89 pub fn from_zeros(len: usize) -> Self {
90 let mut data = vec![0; len / WORD_SIZE];
91 if !len.is_multiple_of(WORD_SIZE) {
92 data.push(0);
93 }
94 Self { data, len }
95 }
96
97 /// Create a new bit vector with all ones and the given length.
98 /// The length is measured in bits.
99 #[must_use]
100 pub fn from_ones(len: usize) -> Self {
101 let mut data = vec![u64::MAX; len / WORD_SIZE];
102 if !len.is_multiple_of(WORD_SIZE) {
103 data.push((1 << (len % WORD_SIZE)) - 1);
104 }
105 Self { data, len }
106 }
107
108 /// Construct a bit vector from a set of bits given as distinct u8 values.
109 /// The constructor will take the least significant bit from each value and append it to a
110 /// bit vector.
111 /// All other bits are ignored.
112 ///
113 /// See also: [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
114 ///
115 /// # Example
116 /// ```rust
117 /// use vers_vecs::BitVec;
118 ///
119 /// let bits: &[u8] = &[1, 0, 1, 1, 1, 1];
120 /// let bv = BitVec::from_bits(&bits);
121 ///
122 /// assert_eq!(bv.len(), 6);
123 /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
124 /// ```
125 ///
126 /// [`from_bits_u16`]: BitVec::from_bits_u16
127 /// [`from_bits_u32`]: BitVec::from_bits_u32
128 /// [`from_bits_u64`]: BitVec::from_bits_u64
129 /// [`from_bits_iter`]: BitVec::from_bits_iter
130 #[must_use]
131 pub fn from_bits(bits: &[u8]) -> Self {
132 let mut bv = Self::with_capacity(bits.len());
133 bits.iter().for_each(|&b| bv.append_bit(b.into()));
134 bv
135 }
136
137 /// Construct a bit vector from a set of bits given as distinct u16 values.
138 /// The constructor will take the least significant bit from each value and append it to a
139 /// bit vector.
140 /// All other bits are ignored.
141 ///
142 /// See also: [`from_bits`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
143 ///
144 /// [`from_bits`]: BitVec::from_bits
145 /// [`from_bits_u32`]: BitVec::from_bits_u32
146 /// [`from_bits_u64`]: BitVec::from_bits_u64
147 /// [`from_bits_iter`]: BitVec::from_bits_iter
148 #[must_use]
149 pub fn from_bits_u16(bits: &[u16]) -> Self {
150 let mut bv = Self::with_capacity(bits.len());
151 bits.iter().for_each(|&b| bv.append_bit_u16(b));
152 bv
153 }
154
155 /// Construct a bit vector from a set of bits given as distinct u32 values.
156 /// The constructor will take the least significant bit from each value and append it to a
157 /// bit vector.
158 /// All other bits are ignored.
159 ///
160 /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u64`], [`from_bits_iter`]
161 ///
162 /// [`from_bits`]: BitVec::from_bits
163 /// [`from_bits_u16`]: BitVec::from_bits_u16
164 /// [`from_bits_u64`]: BitVec::from_bits_u64
165 /// [`from_bits_iter`]: BitVec::from_bits_iter
166 #[must_use]
167 pub fn from_bits_u32(bits: &[u32]) -> Self {
168 let mut bv = Self::with_capacity(bits.len());
169 bits.iter().for_each(|&b| bv.append_bit_u32(b));
170 bv
171 }
172
173 /// Construct a bit vector from a set of bits given as distinct u64 values.
174 /// The constructor will take the least significant bit from each value and append it to a
175 /// bit vector.
176 /// All other bits are ignored.
177 ///
178 /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_iter`]
179 ///
180 /// [`from_bits`]: BitVec::from_bits
181 /// [`from_bits_u16`]: BitVec::from_bits_u16
182 /// [`from_bits_u32`]: BitVec::from_bits_u32
183 /// [`from_bits_iter`]: BitVec::from_bits_iter
184 #[must_use]
185 pub fn from_bits_u64(bits: &[u64]) -> Self {
186 let mut bv = Self::with_capacity(bits.len());
187 bits.iter().for_each(|&b| bv.append_bit(b));
188 bv
189 }
190
191 /// Construct a bit vector from an iterator of bits.
192 /// The constructor will take the least significant bit from each value and append it to a
193 /// bit vector.
194 /// All other bits are ignored.
195 /// The iterator must yield values that can be converted into u64 values.
196 ///
197 /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`]
198 ///
199 /// # Example
200 /// ```rust
201 /// use vers_vecs::BitVec;
202 ///
203 /// let bits = [true, false, true, true, true, true];
204 /// let bv = BitVec::from_bits_iter(bits.iter().copied());
205 ///
206 /// let bits = [0b1u8, 0b0, 0b1, 0b1, 0b1, 0b1];
207 /// let bv2 = BitVec::from_bits_iter(bits.iter().copied());
208 ///
209 /// assert_eq!(bv.len(), 6);
210 /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
211 /// assert_eq!(bv, bv2);
212 /// ```
213 ///
214 /// [`from_bits`]: BitVec::from_bits
215 /// [`from_bits_u16`]: BitVec::from_bits_u16
216 /// [`from_bits_u32`]: BitVec::from_bits_u32
217 /// [`from_bits_u64`]: BitVec::from_bits_u64
218 #[must_use]
219 pub fn from_bits_iter<I, E>(iter: I) -> Self
220 where
221 E: Into<u64>,
222 I: IntoIterator<Item = E>,
223 {
224 let iter = iter.into_iter();
225 let mut bv = Self::with_capacity(iter.size_hint().0);
226 for bit in iter {
227 bv.append_bit(bit.into());
228 }
229 bv
230 }
231
232 /// Construct a bit vector from a slice of u64 quad words.
233 /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
234 /// 64 bits to the bit vector).
235 /// Since the data is only cloned without any masking or transformation,
236 /// this is one of the fastest ways to create a bit vector.
237 ///
238 /// See also: [`from_vec`], [`from_limbs_iter`]
239 ///
240 /// # Example
241 /// ```rust
242 /// use vers_vecs::BitVec;
243 ///
244 /// let words = [0, 256, u64::MAX];
245 /// let bv = BitVec::from_limbs(&words);
246 ///
247 /// assert_eq!(bv.len(), 192);
248 /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
249 /// assert_eq!(bv.get(72), Some(1));
250 /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
251 /// ```
252 ///
253 /// [`from_vec`]: BitVec::from_vec
254 /// [`from_limbs_iter`]: BitVec::from_limbs_iter
255 #[must_use]
256 pub fn from_limbs(words: &[u64]) -> Self {
257 let len = words.len() * WORD_SIZE;
258 Self {
259 data: words.to_vec(),
260 len,
261 }
262 }
263
264 /// Construct a bit vector from an iterator of u64 quad words.
265 /// The quad words are interpreted as limbs of the bit vector (i.e. each quad word contributes
266 /// 64 bits to the bit vector).
267 /// Since the data is only cloned without any masking or transformation,
268 /// this is one of the fastest ways to create a bit vector.
269 ///
270 /// See also: [`from_limbs`], [`from_vec`]
271 ///
272 /// # Example
273 /// ```rust
274 /// use std::iter::repeat;
275 /// use vers_vecs::BitVec;
276 ///
277 /// let zeros = repeat(0xaaaaaaaaaaaaaaaau64).take(10);
278 /// let bv = BitVec::from_limbs_iter(zeros);
279 ///
280 /// assert_eq!(bv.len(), 640);
281 /// for i in 0..640 {
282 /// assert_eq!(bv.get(i), Some((i % 2 == 1) as u64));
283 /// }
284 /// ```
285 ///
286 /// [`from_limbs`]: BitVec::from_limbs
287 /// [`from_vec`]: BitVec::from_vec
288 pub fn from_limbs_iter<I, E>(iter: I) -> Self
289 where
290 E: Into<u64>,
291 I: IntoIterator<Item = E>,
292 {
293 let vec = iter.into_iter().map(Into::into).collect();
294 Self::from_vec(vec)
295 }
296
297 /// Construct a bit vector from a vector of u64 quad words.
298 /// The quad words are interpreted as limbs of the bit vector
299 /// (i.e. each quad word contributes 64 bits to the bit vector).
300 /// Since the data is moved without any masking or transformation, this is one of the fastest ways
301 /// to create a bit vector.
302 ///
303 /// See also: [`from_limbs`], [`from_limbs_iter`]
304 ///
305 /// # Example
306 /// ```rust
307 /// use vers_vecs::BitVec;
308 ///
309 /// let words = vec![0, 256, u64::MAX];
310 /// let bv = BitVec::from_vec(words);
311 ///
312 /// assert_eq!(bv.len(), 192);
313 /// assert_eq!(bv.get_bits(0, 64), Some(0u64));
314 /// assert_eq!(bv.get(72), Some(1));
315 /// assert_eq!(bv.get_bits(128, 64), Some(u64::MAX));
316 /// ```
317 ///
318 /// [`from_limbs`]: BitVec::from_limbs
319 /// [`from_limbs_iter`]: BitVec::from_limbs_iter
320 #[must_use]
321 pub fn from_vec(data: Vec<u64>) -> Self {
322 let len = data.len() * WORD_SIZE;
323 Self { data, len }
324 }
325
326 /// Helper function for packing constructors to pack a slice of elements into a bit-vector,
327 /// taking the least significant `bits_per_element` per element.
328 fn pack_bits<T, const MAX_BITS: usize>(sequence: &[T], bits_per_element: usize) -> Self
329 where
330 T: Into<u64> + Copy,
331 {
332 let mut bv = Self::with_capacity(sequence.len() * bits_per_element);
333 for &word in sequence {
334 Self::pack_word_into_vector::<MAX_BITS>(&mut bv, word.into(), bits_per_element)
335 }
336 bv
337 }
338
339 /// Helper function for packing constructors to pack elements from an iterator into a bit-vector,
340 /// taking the least significant `bits_per_element` per element.
341 fn pack_bits_iter<T, I: IntoIterator<Item=T>, const MAX_BITS: usize>(iter: I, bits_per_element: usize) -> Self
342 where T: Into<u64> + Copy
343 {
344 let mut bv = Self::new();
345
346 for word in iter {
347 Self::pack_word_into_vector::<MAX_BITS>(&mut bv, word.into(), bits_per_element)
348 }
349
350 bv
351 }
352
353 /// Helper function for packing constructors to pack `num_bits` of a given word into the given
354 /// vector.
355 #[inline(always)]
356 fn pack_word_into_vector<const MAX_BITS: usize>(bv: &mut BitVec, word: u64, num_bits: usize) {
357 if num_bits <= MAX_BITS {
358 bv.append_bits(word.into(), num_bits);
359 } else {
360 bv.append_bits(word.into(), MAX_BITS);
361 let mut rest = num_bits - MAX_BITS;
362 while rest > 0 {
363 bv.append_bits(0, min(rest, MAX_BITS));
364 rest = rest.saturating_sub(MAX_BITS);
365 }
366 }
367 }
368
369
370 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
371 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
372 /// The number of bits per element is given by `bits_per_element`.
373 /// The sequence is given as a slice of u64 values.
374 /// If the number of bits per element is smaller than 64, the function takes the
375 /// least significant bits of each element, and discards the rest.
376 /// If the number of bits per element is larger than 64, the function will pad the elements
377 /// with zeros.
378 /// The function will append the bits of each element to the bit vector in the order they are
379 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
380 ///
381 /// See also: [`pack_sequence_u32`], [`pack_sequence_u16`], [`pack_sequence_u8`]
382 ///
383 /// # Example
384 /// ```rust
385 /// use vers_vecs::BitVec;
386 ///
387 /// let sequence = [0b1010u64, 0b1100u64, 0b1111u64];
388 /// let bv = BitVec::pack_sequence_u64(&sequence, 4);
389 ///
390 /// assert_eq!(bv.len(), 12);
391 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
392 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
393 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
394 /// ```
395 ///
396 /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
397 /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
398 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
399 #[must_use]
400 pub fn pack_sequence_u64(sequence: &[u64], bits_per_element: usize) -> Self {
401 Self::pack_bits::<_, 64>(sequence, bits_per_element)
402 }
403
404 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
405 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
406 /// The number of bits per element is given by `bits_per_element`.
407 /// The sequence is given as a slice of u32 values.
408 /// If the number of bits per element is smaller than 32, the function takes the
409 /// least significant bits of each element, and discards the rest.
410 /// If the number of bits per element is larger than 32, the function will pad the elements
411 /// with zeros.
412 /// The function will append the bits of each element to the bit vector in the order they are
413 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
414 ///
415 /// See also: [`pack_sequence_u64`], [`pack_sequence_u16`], [`pack_sequence_u8`]
416 ///
417 /// # Example
418 /// ```rust
419 /// use vers_vecs::BitVec;
420 ///
421 /// let sequence = [0b1010u32, 0b1100u32, 0b1111u32];
422 /// let bv = BitVec::pack_sequence_u32(&sequence, 4);
423 ///
424 /// assert_eq!(bv.len(), 12);
425 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
426 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
427 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
428 /// ```
429 ///
430 /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
431 /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
432 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
433 #[must_use]
434 pub fn pack_sequence_u32(sequence: &[u32], bits_per_element: usize) -> Self {
435 Self::pack_bits::<_, 32>(sequence, bits_per_element)
436 }
437
438 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
439 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
440 /// The number of bits per element is given by `bits_per_element`.
441 /// The sequence is given as a slice of u16 values.
442 /// If the number of bits per element is smaller than 16, the function takes the
443 /// least significant bits of each element, and discards the rest.
444 /// If the number of bits per element is larger than 16, the function will pad the elements
445 /// with zeros.
446 /// The function will append the bits of each element to the bit vector in the order they are
447 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
448 ///
449 /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u8`]
450 ///
451 /// # Example
452 /// ```rust
453 /// use vers_vecs::BitVec;
454 ///
455 /// let sequence = [0b1010u16, 0b1100u16, 0b1111u16];
456 /// let bv = BitVec::pack_sequence_u16(&sequence, 4);
457 ///
458 /// assert_eq!(bv.len(), 12);
459 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
460 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
461 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
462 /// ```
463 ///
464 /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
465 /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
466 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
467 #[must_use]
468 pub fn pack_sequence_u16(sequence: &[u16], bits_per_element: usize) -> Self {
469 Self::pack_bits::<_, 16>(sequence, bits_per_element)
470 }
471
472 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
473 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
474 /// The number of bits per element is given by `bits_per_element`.
475 /// The sequence is given as a slice of u8 values.
476 /// If the number of bits per element is smaller than 8, the function takes the
477 /// least significant bits of each element, and discards the rest.
478 /// If the number of bits per element is larger than 8, the function will pad the elements
479 /// with zeros.
480 /// The function will append the bits of each element to the bit vector in the order they are
481 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
482 ///
483 /// See also: [`pack_sequence_u64`], [`pack_sequence_u32`], [`pack_sequence_u16`]
484 ///
485 /// # Example
486 /// ```rust
487 /// use vers_vecs::BitVec;
488 ///
489 /// let sequence = [0b1010u8, 0b1100u8, 0b1111u8];
490 /// let bv = BitVec::pack_sequence_u8(&sequence, 4);
491 ///
492 /// assert_eq!(bv.len(), 12);
493 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
494 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
495 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
496 /// ```
497 ///
498 /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
499 /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
500 /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
501 #[must_use]
502 pub fn pack_sequence_u8(sequence: &[u8], bits_per_element: usize) -> Self {
503 Self::pack_bits::<_, 8>(sequence, bits_per_element)
504 }
505
506 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
507 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
508 /// The number of bits per element is given by `bits_per_element`.
509 /// The sequence is given as an iterator of u64 values.
510 /// If the number of bits per element is smaller than 64, the function takes the
511 /// least significant bits of each element, and discards the rest.
512 /// If the number of bits per element is larger than 64, the function will pad the elements
513 /// with zeros.
514 /// The function will append the bits of each element to the bit vector in the order they are
515 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
516 ///
517 /// See also: [`pack_from_iter_u32`], [`pack_from_iter_u16`], [`pack_from_iter_u8`], or the
518 /// functions to pack from a slice: [`pack_sequence_u64`]
519 ///
520 /// # Example
521 /// ```rust
522 /// use vers_vecs::BitVec;
523 ///
524 /// let sequence = [0b1010u64, 0b1100u64, 0b1111u64];
525 /// let bv = BitVec::pack_from_iter_u64(sequence.into_iter(), 4);
526 ///
527 /// assert_eq!(bv.len(), 12);
528 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
529 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
530 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
531 /// ```
532 ///
533 /// [`pack_from_iter_u32`]: BitVec::pack_from_iter_u32
534 /// [`pack_from_iter_u16`]: BitVec::pack_from_iter_u16
535 /// [`pack_from_iter_u8`]: BitVec::pack_from_iter_u8
536 /// [`pack_sequence_u64`]: BitVec::pack_sequence_u64
537 pub fn pack_from_iter_u64<I: IntoIterator<Item=u64>>(iter: I, bits_per_element: usize) -> Self {
538 Self::pack_bits_iter::<_, _, 64>(iter, bits_per_element)
539 }
540
541 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
542 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
543 /// The number of bits per element is given by `bits_per_element`.
544 /// The sequence is given as an iterator of u32 values.
545 /// If the number of bits per element is smaller than 32, the function takes the
546 /// least significant bits of each element, and discards the rest.
547 /// If the number of bits per element is larger than 32, the function will pad the elements
548 /// with zeros.
549 /// The function will append the bits of each element to the bit vector in the order they are
550 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
551 ///
552 /// See also: [`pack_from_iter_u64`], [`pack_from_iter_u16`], [`pack_from_iter_u8`], or the
553 /// functions to pack from a slice: [`pack_sequence_u32`]
554 ///
555 /// # Example
556 /// ```rust
557 /// use vers_vecs::BitVec;
558 ///
559 /// let sequence = [0b1010u32, 0b1100u32, 0b1111u32];
560 /// let bv = BitVec::pack_from_iter_u32(sequence.into_iter(), 4);
561 ///
562 /// assert_eq!(bv.len(), 12);
563 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
564 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
565 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
566 /// ```
567 ///
568 /// [`pack_from_iter_u64`]: BitVec::pack_from_iter_u64
569 /// [`pack_from_iter_u16`]: BitVec::pack_from_iter_u16
570 /// [`pack_from_iter_u8`]: BitVec::pack_from_iter_u8
571 /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
572 pub fn pack_from_iter_u32<I: IntoIterator<Item=u32>>(iter: I, bits_per_element: usize) -> Self {
573 Self::pack_bits_iter::<_, _, 32>(iter, bits_per_element)
574 }
575
576 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
577 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
578 /// The number of bits per element is given by `bits_per_element`.
579 /// The sequence is given as an iterator of u16 values.
580 /// If the number of bits per element is smaller than 16, the function takes the
581 /// least significant bits of each element, and discards the rest.
582 /// If the number of bits per element is larger than 16, the function will pad the elements
583 /// with zeros.
584 /// The function will append the bits of each element to the bit vector in the order they are
585 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
586 ///
587 /// See also: [`pack_from_iter_u64`], [`pack_from_iter_u32`], [`pack_from_iter_u8`], or the
588 /// functions to pack from a slice: [`pack_sequence_u16`]
589 ///
590 /// # Example
591 /// ```rust
592 /// use vers_vecs::BitVec;
593 ///
594 /// let sequence = [0b1010u16, 0b1100u16, 0b1111u16];
595 /// let bv = BitVec::pack_from_iter_u16(sequence.into_iter(), 4);
596 ///
597 /// assert_eq!(bv.len(), 12);
598 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
599 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
600 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
601 /// ```
602 ///
603 /// [`pack_from_iter_u64`]: BitVec::pack_from_iter_u64
604 /// [`pack_from_iter_u32`]: BitVec::pack_from_iter_u32
605 /// [`pack_from_iter_u8`]: BitVec::pack_from_iter_u8
606 /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
607 pub fn pack_from_iter_u16<I: IntoIterator<Item=u16>>(iter: I, bits_per_element: usize) -> Self {
608 Self::pack_bits_iter::<_, _, 16>(iter, bits_per_element)
609 }
610
611 /// Construct a bit vector by packing a sequence of numerical values into a dense sequence.
612 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
613 /// The number of bits per element is given by `bits_per_element`.
614 /// The sequence is given as an iterator of u8 values.
615 /// If the number of bits per element is smaller than 8, the function takes the
616 /// least significant bits of each element, and discards the rest.
617 /// If the number of bits per element is larger than 8, the function will pad the elements
618 /// with zeros.
619 /// The function will append the bits of each element to the bit vector in the order they are
620 /// given in the sequence (i.e. the first element takes bits `0..bits_per_element` of the vector).
621 ///
622 /// See also: [`pack_from_iter_u64`], [`pack_from_iter_u32`], [`pack_from_iter_u16`], or the
623 /// functions to pack from a slice: [`pack_sequence_u8`]
624 ///
625 /// # Example
626 /// ```rust
627 /// use vers_vecs::BitVec;
628 ///
629 /// let sequence = [0b1010u8, 0b1100u8, 0b1111u8];
630 /// let bv = BitVec::pack_from_iter_u8(sequence.into_iter(), 4);
631 ///
632 /// assert_eq!(bv.len(), 12);
633 /// assert_eq!(bv.get_bits(0, 4), Some(0b1010u64));
634 /// assert_eq!(bv.get_bits(4, 4), Some(0b1100u64));
635 /// assert_eq!(bv.get_bits(8, 4), Some(0b1111u64));
636 /// ```
637 ///
638 /// [`pack_from_iter_u64`]: BitVec::pack_from_iter_u64
639 /// [`pack_from_iter_u32`]: BitVec::pack_from_iter_u32
640 /// [`pack_from_iter_u16`]: BitVec::pack_from_iter_u16
641 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
642 pub fn pack_from_iter_u8<I: IntoIterator<Item=u8>>(iter: I, bits_per_element: usize) -> Self {
643 Self::pack_bits_iter::<_, _, 8>(iter, bits_per_element)
644 }
645
646 /// Construct a bit vector from bits given as boolean values in a slice.
647 /// The function will append the bits to the bit vector in the order they are
648 /// given in the sequence (i.e. the first bool is the first bit of the vector).
649 ///
650 /// # Example
651 /// ```rust
652 /// use vers_vecs::BitVec;
653 ///
654 /// let sequence = [true, false, true, true];
655 /// let bv = BitVec::from_bools(&sequence);
656 ///
657 /// assert_eq!(bv.len(), 4);
658 /// assert_eq!(bv.is_bit_set(0), Some(true));
659 /// assert_eq!(bv.is_bit_set(1), Some(false));
660 /// assert_eq!(bv.is_bit_set(2), Some(true));
661 /// assert_eq!(bv.is_bit_set(3), Some(true));
662 /// ```
663 pub fn from_bools(bools: &[bool]) -> Self {
664 let mut bv = BitVec::with_capacity(bools.len());
665 bools.iter().for_each(|&b| bv.append(b));
666 bv
667 }
668
669 /// Construct a bit vector from bits given as boolean values from an iterator.
670 /// The function will append the bits to the bit vector in the order they are
671 /// given in the sequence (i.e. the first bool is the first bit of the vector).
672 ///
673 /// # Example
674 /// ```rust
675 /// use vers_vecs::BitVec;
676 ///
677 /// let sequence = [true, false, true, true];
678 /// let bv = BitVec::from_bool_iter(sequence.into_iter());
679 ///
680 /// assert_eq!(bv.len(), 4);
681 /// assert_eq!(bv.is_bit_set(0), Some(true));
682 /// assert_eq!(bv.is_bit_set(1), Some(false));
683 /// assert_eq!(bv.is_bit_set(2), Some(true));
684 /// assert_eq!(bv.is_bit_set(3), Some(true));
685 /// ```
686 pub fn from_bool_iter<I: IntoIterator<Item=bool>>(iter: I) -> Self {
687 let mut bv = BitVec::new();
688 iter.into_iter().for_each(|b| bv.append(b));
689 bv
690 }
691
692 /// Append a bit encoded as a `bool` to the bit vector, where `true` means 1 and `false` means 0.
693 ///
694 /// See also: [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
695 ///
696 /// # Example
697 ///
698 /// ```rust
699 /// use vers_vecs::BitVec;
700 ///
701 /// let mut bv = BitVec::new();
702 /// bv.append(true);
703 ///
704 /// assert_eq!(bv.len(), 1);
705 /// assert_eq!(bv.get(0), Some(1));
706 /// ```
707 ///
708 /// [`append_bit`]: BitVec::append_bit
709 /// [`append_bit_u32`]: BitVec::append_bit_u32
710 /// [`append_bit_u16`]: BitVec::append_bit_u16
711 /// [`append_bit_u8`]: BitVec::append_bit_u8
712 /// [`append_word`]: BitVec::append_word
713 pub fn append(&mut self, bit: bool) {
714 if self.len.is_multiple_of(WORD_SIZE) {
715 self.data.push(0);
716 }
717 if bit {
718 self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
719 } else {
720 self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
721 }
722 self.len += 1;
723 }
724
725 /// Drop the last n bits from the bit vector. If more bits are dropped than the bit vector
726 /// contains, the bit vector is cleared.
727 ///
728 /// # Example
729 ///
730 /// ```rust
731 /// use vers_vecs::BitVec;
732 ///
733 /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
734 /// bv.drop_last(3);
735 ///
736 /// assert_eq!(bv.len(), 3);
737 /// assert_eq!(bv.get_bits(0, 3), Some(0b101u64));
738 ///
739 /// bv.drop_last(4);
740 ///
741 /// assert!(bv.is_empty());
742 /// ```
743 pub fn drop_last(&mut self, n: usize) {
744 if n > self.len {
745 self.data.clear();
746 self.len = 0;
747 return;
748 }
749
750 let new_limb_count = (self.len - n).div_ceil(WORD_SIZE);
751
752 // cut off limbs that we no longer need
753 if new_limb_count < self.data.len() {
754 self.data.truncate(new_limb_count);
755 }
756
757 // update bit vector length
758 self.len -= n;
759 }
760
761 /// Append a bit encoded in a u64.
762 /// The least significant bit is appended to the bit vector.
763 /// All other bits are ignored.
764 ///
765 /// See also: [`append`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
766 ///
767 /// # Example
768 ///
769 /// ```rust
770 /// use vers_vecs::BitVec;
771 ///
772 /// let mut bv = BitVec::new();
773 ///
774 /// bv.append_bit(1);
775 /// bv.append_bit(0);
776 ///
777 /// assert_eq!(bv.len(), 2);
778 /// assert_eq!(bv.get(0), Some(1));
779 /// assert_eq!(bv.get(1), Some(0));
780 /// ```
781 ///
782 /// [`append`]: BitVec::append
783 /// [`append_bit_u32`]: BitVec::append_bit_u32
784 /// [`append_bit_u16`]: BitVec::append_bit_u16
785 /// [`append_bit_u8`]: BitVec::append_bit_u8
786 /// [`append_word`]: BitVec::append_word
787 pub fn append_bit(&mut self, bit: u64) {
788 if self.len.is_multiple_of(WORD_SIZE) {
789 self.data.push(0);
790 }
791 if bit % 2 == 1 {
792 self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
793 } else {
794 self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
795 }
796
797 self.len += 1;
798 }
799
800 /// Append a bit from a u32. The least significant bit is appended to the bit vector.
801 /// All other bits are ignored.
802 ///
803 /// See also: [`append`], [`append_bit`], [`append_bit_u16`], [`append_bit_u8`], [`append_word`]
804 ///
805 /// [`append`]: BitVec::append
806 /// [`append_bit`]: BitVec::append_bit
807 /// [`append_bit_u16`]: BitVec::append_bit_u16
808 /// [`append_bit_u8`]: BitVec::append_bit_u8
809 /// [`append_word`]: BitVec::append_word
810 pub fn append_bit_u32(&mut self, bit: u32) {
811 self.append_bit(u64::from(bit));
812 }
813
814 /// Append a bit from a u16. The least significant bit is appended to the bit vector.
815 /// All other bits are ignored.
816 ///
817 /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u8`], [`append_word`]
818 ///
819 /// [`append`]: BitVec::append
820 /// [`append_bit`]: BitVec::append_bit
821 /// [`append_bit_u32`]: BitVec::append_bit_u32
822 /// [`append_bit_u8`]: BitVec::append_bit_u8
823 /// [`append_word`]: BitVec::append_word
824 pub fn append_bit_u16(&mut self, bit: u16) {
825 self.append_bit(u64::from(bit));
826 }
827
828 /// Append a bit from a u8. The least significant bit is appended to the bit vector.
829 /// All other bits are ignored.
830 ///
831 /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_word`]
832 ///
833 /// [`append`]: BitVec::append
834 /// [`append_bit`]: BitVec::append_bit
835 /// [`append_bit_u32`]: BitVec::append_bit_u32
836 /// [`append_bit_u16`]: BitVec::append_bit_u16
837 /// [`append_word`]: BitVec::append_word
838 pub fn append_bit_u8(&mut self, bit: u8) {
839 self.append_bit(u64::from(bit));
840 }
841
842 /// Append a word to the bit vector. The bits are appended in little endian order (i.e. the first
843 /// bit of the word is appended first).
844 ///
845 /// See also: [`append`], [`append_bit`], [`append_bit_u32`], [`append_bit_u16`], [`append_bit_u8`]
846 ///
847 /// # Example
848 ///
849 /// ```rust
850 /// use vers_vecs::BitVec;
851 ///
852 /// let mut bv = BitVec::new();
853 /// bv.append_word(0b1010_1010_1010_1010u64);
854 ///
855 /// assert_eq!(bv.len(), 64);
856 /// for i in 0..64 {
857 /// assert_eq!(bv.get(i), Some((0b1010_1010_1010_1010u64 >> i) & 1));
858 /// }
859 /// ```
860 ///
861 /// [`append`]: BitVec::append
862 /// [`append_bit`]: BitVec::append_bit
863 /// [`append_bit_u32`]: BitVec::append_bit_u32
864 /// [`append_bit_u16`]: BitVec::append_bit_u16
865 /// [`append_bit_u8`]: BitVec::append_bit_u8
866 pub fn append_word(&mut self, word: u64) {
867 if self.len.is_multiple_of(WORD_SIZE) {
868 self.data.push(word);
869 } else {
870 // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
871 self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
872 self.data[self.len / WORD_SIZE] |= word << (self.len % WORD_SIZE);
873
874 self.data.push(word >> (WORD_SIZE - self.len % WORD_SIZE));
875 }
876 self.len += WORD_SIZE;
877 }
878
879 /// Append multiple bits to the bit vector.
880 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
881 /// The number of bits to append is given by `len`. The bits are taken from the least
882 /// significant bits of `bits`.
883 /// All other bits are ignored.
884 ///
885 /// # Example
886 ///
887 /// ```rust
888 /// use vers_vecs::BitVec;
889 ///
890 /// let mut bv = BitVec::new();
891 /// bv.append_bits(0b1010_1010_1010_1010u64, 16);
892 ///
893 /// assert_eq!(bv.len(), 16);
894 /// assert_eq!(bv.get_bits(0, 16), Some(0b1010_1010_1010_1010u64));
895 /// ```
896 ///
897 /// # Panics
898 /// Panics if `len` is larger than 64.
899 pub fn append_bits(&mut self, bits: u64, len: usize) {
900 assert!(len <= 64, "Cannot append more than 64 bits");
901
902 if self.len.is_multiple_of(WORD_SIZE) {
903 self.data.push(bits);
904 } else {
905 // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
906 self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
907 self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
908
909 if self.len % WORD_SIZE + len > WORD_SIZE {
910 self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
911 }
912 }
913 self.len += len;
914 }
915
916 /// Append multiple bits to the bit vector.
917 /// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
918 /// The number of bits to append is given by `len`. The bits are taken from the least
919 /// significant bits of `bits`.
920 ///
921 /// This function does not check if `len` is larger than 64.
922 ///
923 /// Furthermore, if the bit-vector has trailing bits that are not zero
924 /// (i.e. the length is not a multiple of 64, and those bits are partially set),
925 /// the function will OR the new data with the trailing bits, destroying the appended data.
926 /// This can happen, if a call `append_bits[_unchecked](word, len)` appends a word which has
927 /// set bits beyond the `len - 1`-th bit,
928 /// or if bits have been dropped from the bit vector using [`drop_last`].
929 ///
930 /// See [`append_bits`] for a checked version of this function.
931 ///
932 /// # Panics
933 /// If `len` is larger than 64, the behavior is platform-dependent, and a processor
934 /// exception might be triggered.
935 ///
936 /// [`append_bits`]: BitVec::append_bits
937 /// [`drop_last`]: BitVec::drop_last
938 pub fn append_bits_unchecked(&mut self, bits: u64, len: usize) {
939 if self.len.is_multiple_of(WORD_SIZE) {
940 self.data.push(bits);
941 } else {
942 self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
943
944 if self.len % WORD_SIZE + len > WORD_SIZE {
945 self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
946 }
947 }
948 self.len += len;
949 }
950
951 /// Append the bits of another bit vector to the end of this vector.
952 /// If this vector does not contain a multiple of 64 bits, the appended limbs need to be
953 /// shifted to the left.
954 /// This function is guaranteed to reallocate the underlying vector at most once.
955 pub fn extend_bitvec(&mut self, other: &Self) {
956 // reserve space for the new bits, ensuring at most one re-allocation
957 self.data
958 .reserve((self.len + other.len).div_ceil(WORD_SIZE) - self.data.len());
959
960 let full_limbs = other.len() / WORD_SIZE;
961 for i in 0..full_limbs {
962 self.append_bits(other.data[i], WORD_SIZE);
963 }
964
965 let partial_bits = other.len % WORD_SIZE;
966 if partial_bits > 0 {
967 self.append_bits(other.data[full_limbs], partial_bits);
968 }
969 }
970
971 /// Return the length of the bit vector. The length is measured in bits.
972 #[must_use]
973 pub fn len(&self) -> usize {
974 self.len
975 }
976
977 /// Return whether the bit vector is empty (contains no bits).
978 #[must_use]
979 pub fn is_empty(&self) -> bool {
980 self.len == 0
981 }
982
983 /// Flip the bit at the given position.
984 ///
985 /// # Example
986 ///
987 /// ```rust
988 /// use vers_vecs::BitVec;
989 ///
990 /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
991 /// bv.flip_bit(1);
992 ///
993 /// assert_eq!(bv.len(), 6);
994 /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
995 /// ```
996 ///
997 /// # Panics
998 /// If the position is larger than the length of the vector, the function panics.
999 pub fn flip_bit(&mut self, pos: usize) {
1000 assert!(pos < self.len, "Index out of bounds");
1001 self.flip_bit_unchecked(pos);
1002 }
1003
1004 /// Flip the bit at the given position.
1005 ///
1006 /// See also: [`flip_bit`]
1007 ///
1008 /// # Panics
1009 /// If the position is larger than the length of the
1010 /// vector, the function will either modify unused memory or panic.
1011 /// This will not corrupt memory.
1012 ///
1013 /// [`flip_bit`]: BitVec::flip_bit
1014 pub fn flip_bit_unchecked(&mut self, pos: usize) {
1015 self.data[pos / WORD_SIZE] ^= 1 << (pos % WORD_SIZE);
1016 }
1017
1018 /// Return the bit at the given position.
1019 /// The bit is encoded in the least significant bit of a u64 value.
1020 /// If the position is larger than the length of the vector, None is returned.
1021 ///
1022 /// See also: [`get_unchecked`]
1023 ///
1024 /// # Example
1025 ///
1026 /// ```rust
1027 /// use vers_vecs::BitVec;
1028 ///
1029 /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
1030 ///
1031 /// assert_eq!(bv.get(1), Some(0));
1032 /// assert_eq!(bv.get(2), Some(1));
1033 /// ```
1034 ///
1035 /// [`get_unchecked`]: Self::get_unchecked
1036 #[must_use]
1037 pub fn get(&self, pos: usize) -> Option<u64> {
1038 if pos >= self.len {
1039 None
1040 } else {
1041 Some(self.get_unchecked(pos))
1042 }
1043 }
1044
1045 /// Return the bit at the given position.
1046 /// The bit is encoded in the least significant bit of a u64 value.
1047 ///
1048 /// # Panics
1049 /// If the position is larger than the length of the vector,
1050 /// the function will either return unpredictable data, or panic.
1051 /// Use [`get`] to properly handle this case with an `Option`.
1052 ///
1053 /// [`get`]: BitVec::get
1054 #[must_use]
1055 pub fn get_unchecked(&self, pos: usize) -> u64 {
1056 (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
1057 }
1058
1059 /// Set the bit at the given position.
1060 /// The bit is encoded in the least significant bit of a u64 value.
1061 ///
1062 /// See also: [`set_unchecked`]
1063 ///
1064 /// # Example
1065 ///
1066 /// ```rust
1067 /// use vers_vecs::BitVec;
1068 ///
1069 /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
1070 /// bv.set(1, 1).unwrap();
1071 ///
1072 /// assert_eq!(bv.len(), 6);
1073 /// assert_eq!(bv.get_bits(0, 6), Some(0b111111u64));
1074 /// ```
1075 ///
1076 /// # Errors
1077 /// If the position is out of range, the function will return `Err` with an error message,
1078 /// otherwise it will return an empty `Ok`.
1079 ///
1080 /// [`set_unchecked`]: BitVec::set_unchecked
1081 pub fn set(&mut self, pos: usize, value: u64) -> Result<(), &str> {
1082 if pos >= self.len {
1083 Err("out of range")
1084 } else {
1085 self.set_unchecked(pos, value);
1086 Ok(())
1087 }
1088 }
1089
1090 /// Set the bit at the given position.
1091 /// The bit is encoded in the least significant bit of a u64 value.
1092 ///
1093 /// # Panics
1094 /// If the position is larger than the length of the vector,
1095 /// the function will either do nothing, or panic.
1096 /// Use [`set`] to properly handle this case with a `Result`.
1097 ///
1098 /// [`set`]: BitVec::set
1099 pub fn set_unchecked(&mut self, pos: usize, value: u64) {
1100 self.data[pos / WORD_SIZE] = (self.data[pos / WORD_SIZE] & !(0x1 << (pos % WORD_SIZE)))
1101 | ((value & 0x1) << (pos % WORD_SIZE));
1102 }
1103
1104 /// Return whether the bit at the given position is set.
1105 /// If the position is larger than the length of the vector, None is returned.
1106 ///
1107 /// See also: [`is_bit_set_unchecked`]
1108 ///
1109 /// # Example
1110 ///
1111 /// ```rust
1112 /// use vers_vecs::BitVec;
1113 ///
1114 /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
1115 ///
1116 /// assert!(!bv.is_bit_set(1).unwrap());
1117 /// assert!(bv.is_bit_set(2).unwrap());
1118 /// ```
1119 ///
1120 /// [`is_bit_set_unchecked`]: BitVec::is_bit_set_unchecked
1121 #[must_use]
1122 pub fn is_bit_set(&self, pos: usize) -> Option<bool> {
1123 if pos >= self.len {
1124 None
1125 } else {
1126 Some(self.is_bit_set_unchecked(pos))
1127 }
1128 }
1129
1130 /// Return whether the bit at the given position is set.
1131 ///
1132 /// # Panics
1133 /// If the position is larger than the length of the vector,
1134 /// the function will either return unpredictable data, or panic.
1135 /// Use [`is_bit_set`] to properly handle this case with an `Option`.
1136 ///
1137 /// [`is_bit_set`]: BitVec::is_bit_set
1138 #[must_use]
1139 pub fn is_bit_set_unchecked(&self, pos: usize) -> bool {
1140 self.get_unchecked(pos) != 0
1141 }
1142
1143 /// Return multiple bits at the given position.
1144 /// The number of bits to return is given by `len`.
1145 /// At most 64 bits can be returned.
1146 /// If the position at the end of the query is larger than the length of the vector,
1147 /// None is returned (even if the query partially overlaps with the vector).
1148 /// If the length of the query is larger than 64, None is returned.
1149 ///
1150 /// The first bit at `pos` is the most significant bit of the return value
1151 /// limited to `len` bits.
1152 #[must_use]
1153 pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
1154 if len > WORD_SIZE || len == 0 {
1155 return None;
1156 }
1157 if pos + len > self.len {
1158 None
1159 } else {
1160 Some(self.get_bits_unchecked(pos, len))
1161 }
1162 }
1163
1164 /// Return multiple bits at the given position. The number of bits to return is given by `len`.
1165 /// At most 64 bits can be returned.
1166 ///
1167 /// This function is always inlined, because it gains a lot from loop optimization and
1168 /// can utilize the processor pre-fetcher better if it is.
1169 ///
1170 /// # Errors
1171 /// If the length of the query is larger than 64, unpredictable data will be returned.
1172 /// Use [`get_bits`] to avoid this.
1173 ///
1174 /// # Panics
1175 /// If the position or interval is larger than the length of the vector,
1176 /// the function will either return any valid results padded with unpredictable
1177 /// data or panic.
1178 ///
1179 /// [`get_bits`]: BitVec::get_bits
1180 #[must_use]
1181 #[allow(clippy::inline_always)]
1182 #[allow(clippy::comparison_chain)] // readability
1183 #[inline(always)] // inline to gain loop optimization and pipeline advantages for elias fano
1184 #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen
1185 pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
1186 debug_assert!(len <= WORD_SIZE);
1187 let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
1188 if pos % WORD_SIZE + len <= WORD_SIZE {
1189 partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
1190 } else {
1191 (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
1192 & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
1193 }
1194 }
1195
1196 /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
1197 /// `index`. The number of bits per encoded element is given by `n`.
1198 ///
1199 /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
1200 /// and is equivalent to calling [`get_bits(index * n, n)`].
1201 /// It is thus safe to use this method with any index and any size n <= 64.
1202 ///
1203 /// If the element is out of bounds, None is returned.
1204 /// The element is returned as a u64 value.
1205 ///
1206 /// # Example
1207 /// ```rust
1208 /// use vers_vecs::BitVec;
1209 ///
1210 /// let sequence = [10, 100, 124, 45, 223];
1211 /// let bv = BitVec::pack_sequence_u64(&sequence, 8);
1212 ///
1213 /// assert_eq!(bv.unpack_element(0, 8), Some(10));
1214 /// assert_eq!(bv.unpack_element(2, 8), Some(124));
1215 /// ```
1216 ///
1217 /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1218 /// [`get_bits(index * n, n)`]: BitVec::get_bits
1219 #[must_use]
1220 #[allow(clippy::inline_always)]
1221 #[inline(always)] // to gain optimization if n is constant
1222 pub fn unpack_element(&self, index: usize, n: usize) -> Option<u64> {
1223 self.get_bits(index * n, n)
1224 }
1225
1226 /// Extract a packed element from a bit vector. The element is encoded in the bits at the given
1227 /// `index`. The number of bits per encoded element is given by `n`.
1228 ///
1229 /// This is a convenience method to access elements previously packed using the [`pack_sequence_*`] methods,
1230 /// and is equivalent to calling [`get_bits_unchecked(index * n, n)`].
1231 /// It is thus safe to use this method with any index where `index * n + n` is in-bounds,
1232 /// and any size n <= 64.
1233 ///
1234 /// # Panics
1235 /// If the element is out of bounds, the function will either return unpredictable data or panic.
1236 /// Use [`unpack_element`] for a checked version of this function.
1237 ///
1238 /// [`pack_sequence_*`]: BitVec::pack_sequence_u64
1239 /// [`get_bits_unchecked(index * n, n)`]: BitVec::get_bits_unchecked
1240 /// [`unpack_element`]: BitVec::unpack_element
1241 #[must_use]
1242 #[allow(clippy::inline_always)]
1243 #[inline(always)] // to gain optimization if n is constant
1244 pub fn unpack_element_unchecked(&self, index: usize, n: usize) -> u64 {
1245 self.get_bits_unchecked(index * n, n)
1246 }
1247
1248 /// Return the number of ones in the bit vector. Since the bit vector doesn't store additional
1249 /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1250 ///
1251 /// [`RsVec`]: crate::RsVec
1252 #[must_use]
1253 #[allow(clippy::missing_panics_doc)] // can't panic because of manual bounds check
1254 pub fn count_ones(&self) -> u64 {
1255 let mut ones: u64 = self.data[0..self.len / WORD_SIZE]
1256 .iter()
1257 .map(|limb| u64::from(limb.count_ones()))
1258 .sum();
1259 if !self.len.is_multiple_of(WORD_SIZE) {
1260 ones += u64::from(
1261 (self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1)).count_ones(),
1262 );
1263 }
1264 ones
1265 }
1266
1267 /// Return the number of zeros in the bit vector. Since the bit vector doesn't store additional
1268 /// metadata, this value is calculated. Use [`RsVec`] for constant-time rank operations.
1269 /// This method calls [`count_ones`].
1270 ///
1271 /// [`RsVec`]: crate::RsVec
1272 /// [`count_ones`]: BitVec::count_ones
1273 #[must_use]
1274 pub fn count_zeros(&self) -> u64 {
1275 self.len as u64 - self.count_ones()
1276 }
1277
1278 /// Mask this bit vector with another bitvector using bitwise or. The mask is applied lazily
1279 /// whenever an operation on the resulting vector is performed.
1280 ///
1281 /// # Errors
1282 /// Returns an error if the length of the vector doesn't match the mask length.
1283 #[inline]
1284 pub fn mask_or<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1285 MaskedBitVec::new(self, mask, |a, b| a | b)
1286 }
1287
1288 /// Mask this bit vector with another bitvector using bitwise or.
1289 /// The mask is applied immediately, unlike in [`mask_or`].
1290 ///
1291 /// # Errors
1292 /// Returns an error if the length of the vector doesn't match the mask length.
1293 ///
1294 /// [`mask_or`]: BitVec::mask_or
1295 pub fn apply_mask_or(&mut self, mask: &BitVec) -> Result<(), String> {
1296 if self.len != mask.len {
1297 return Err(String::from(
1298 "mask cannot have different length than vector",
1299 ));
1300 }
1301
1302 for i in 0..self.data.len() {
1303 self.data[i] |= mask.data[i];
1304 }
1305
1306 Ok(())
1307 }
1308
1309 /// Mask this bit vector with another bitvector using bitwise and. The mask is applied lazily
1310 /// whenever an operation on the resulting vector is performed.
1311 ///
1312 /// # Errors
1313 /// Returns an error if the length of the vector doesn't match the mask length.
1314 #[inline]
1315 pub fn mask_and<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1316 MaskedBitVec::new(self, mask, |a, b| a & b)
1317 }
1318
1319 /// Mask this bit vector with another bitvector using bitwise and.
1320 /// The mask is applied immediately, unlike in [`mask_and`].
1321 ///
1322 /// # Errors
1323 /// Returns an error if the length of the vector doesn't match the mask length.
1324 ///
1325 /// [`mask_and`]: BitVec::mask_and
1326 pub fn apply_mask_and(&mut self, mask: &BitVec) -> Result<(), String> {
1327 if self.len != mask.len {
1328 return Err(String::from(
1329 "mask cannot have different length than vector",
1330 ));
1331 }
1332
1333 for i in 0..self.data.len() {
1334 self.data[i] &= mask.data[i];
1335 }
1336
1337 Ok(())
1338 }
1339
1340 /// Mask this bit vector with another bitvector using bitwise xor. The mask is applied lazily
1341 /// whenever an operation on the resulting vector is performed.
1342 ///
1343 /// # Errors
1344 /// Returns an error if the length of the vector doesn't match the mask length.
1345 #[inline]
1346 pub fn mask_xor<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
1347 MaskedBitVec::new(self, mask, |a, b| a ^ b)
1348 }
1349
1350 /// Mask this bit vector with another bitvector using bitwise xor.
1351 /// The mask is applied immediately, unlike in [`mask_xor`].
1352 ///
1353 /// # Errors
1354 /// Returns an error if the length of the vector doesn't match the mask length.
1355 ///
1356 /// [`mask_xor`]: BitVec::mask_xor
1357 pub fn apply_mask_xor(&mut self, mask: &BitVec) -> Result<(), String> {
1358 if self.len != mask.len {
1359 return Err(String::from(
1360 "mask cannot have different length than vector",
1361 ));
1362 }
1363
1364 for i in 0..self.data.len() {
1365 self.data[i] ^= mask.data[i];
1366 }
1367
1368 Ok(())
1369 }
1370
1371 /// Mask this bit vector with another bitvector using a custom masking operation. The mask is
1372 /// applied lazily whenever an operation on the resulting vector is performed.
1373 ///
1374 /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1375 /// The last block of a bit vector might contain fewer bits, and will be padded with
1376 /// unpredictable data. Implementations may choose to modify those padding bits without
1377 /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1378 /// within the vector is unspecified.
1379 ///
1380 /// # Errors
1381 /// Returns an error if the length of the vector doesn't match the mask length.
1382 #[inline]
1383 pub fn mask_custom<'s, 'b, F>(
1384 &'s self,
1385 mask: &'b BitVec,
1386 mask_op: F,
1387 ) -> Result<MaskedBitVec<'s, 'b, F>, String>
1388 where
1389 F: Fn(u64, u64) -> u64,
1390 {
1391 MaskedBitVec::new(self, mask, mask_op)
1392 }
1393
1394 /// Mask this bit vector with another bitvector using a custom masking operation.
1395 /// The mask is applied immediately, unlike in [`mask_custom`].
1396 ///
1397 /// The masking operation takes two 64 bit values which contain blocks of 64 bits each.
1398 /// The last block of a bit vector might contain fewer bits, and will be padded with
1399 /// unpredictable data. Implementations may choose to modify those padding bits without
1400 /// repercussions. Implementations shouldn't use operations like bit shift, because the bit order
1401 /// within the vector is unspecified.
1402 ///
1403 /// # Errors
1404 /// Returns an error if the length of the vector doesn't match the mask length.
1405 ///
1406 /// [`mask_custom`]: BitVec::mask_custom
1407 #[inline]
1408 pub fn apply_mask_custom(
1409 &mut self,
1410 mask: &BitVec,
1411 mask_op: fn(u64, u64) -> u64,
1412 ) -> Result<(), String> {
1413 if self.len != mask.len {
1414 return Err(String::from(
1415 "mask cannot have different length than vector",
1416 ));
1417 }
1418
1419 for i in 0..self.data.len() {
1420 self.data[i] = mask_op(self.data[i], mask.data[i]);
1421 }
1422
1423 Ok(())
1424 }
1425
1426 /// Returns the number of bytes on the heap for this vector.
1427 /// Does not include allocated memory that isn't used.
1428 #[must_use]
1429 pub fn heap_size(&self) -> usize {
1430 self.data.len() * size_of::<u64>()
1431 }
1432
1433 /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1434 /// right half the remaining bits `at..`. If the split index is larger than the length of the
1435 /// vector, the vector is returned unmodified in an `Err` variant.
1436 ///
1437 /// # Errors
1438 /// If the index is out of bounds, the function will return an error
1439 /// containing the original vector.
1440 ///
1441 /// See also: [`split_at_unchecked`]
1442 ///
1443 /// [`split_at_unchecked`]: Self::split_at_unchecked
1444 pub fn split_at(self, at: usize) -> Result<(Self, Self), Self> {
1445 if at > self.len {
1446 Err(self)
1447 } else {
1448 Ok(self.split_at_unchecked(at))
1449 }
1450 }
1451
1452 /// Split the vector in two at the specified index. The left half contains bits `0..at` and the
1453 /// right half the remaining bits `at..`.
1454 ///
1455 /// # Panics
1456 /// If the index is larger than the length of the vector the function will panic or run
1457 /// out of memory.
1458 /// Use [`split_at`] to properly handle this case.
1459 ///
1460 /// [`split_at`]: Self::split_at
1461 #[must_use]
1462 pub fn split_at_unchecked(mut self, at: usize) -> (Self, Self) {
1463 let other_len = self.len - at;
1464 let mut other = Self::with_capacity(other_len);
1465
1466 if other_len == 0 {
1467 return (self, other);
1468 }
1469
1470 let first_limb = at / WORD_SIZE;
1471 let last_limb = self.len / WORD_SIZE;
1472
1473 // First, we figure out the number of bits from the first limb to retain in this vector:
1474 let leading_partial = at % WORD_SIZE;
1475
1476 // If the split point is in the last limb, and the vector ends before the last bit, first_limb
1477 // and last_limb will be equal, and the other half is simply other_len bits off the limb
1478 // right shifted by the number of bits to retain in this vector.
1479 if first_limb == last_limb {
1480 other.append_bits_unchecked(self.data[first_limb] >> leading_partial, other_len);
1481 } else {
1482 // Otherwise, some range n..last_limb should be copied in their entirety to the other half,
1483 // with n=first_limb+1 if the split point is inside the first limb (leading_partial > 0), or
1484 // n=first_limb if the entire first limb belongs in the other half.
1485 let full_limbs = if leading_partial > 0 {
1486 // If the split point is inside the first limb, we also have to remember to copy over
1487 // the trailing bits to the new vector.
1488 other.append_bits_unchecked(
1489 self.data[first_limb] >> leading_partial,
1490 WORD_SIZE - leading_partial,
1491 );
1492 first_limb + 1..last_limb
1493 } else {
1494 first_limb..last_limb
1495 };
1496
1497 // Copy over any full limbs.
1498 for i in full_limbs {
1499 other.append_bits_unchecked(self.data[i], WORD_SIZE);
1500 }
1501
1502 // Finally, if the vector has a partially filled last limb, we need to put those bits
1503 // in the other half.
1504 let trailing_partial = self.len % WORD_SIZE;
1505 if trailing_partial > 0 {
1506 other.append_bits_unchecked(self.data[last_limb], trailing_partial);
1507 }
1508 }
1509
1510 // remove the copied bits from the original vector
1511 self.drop_last(other_len);
1512
1513 (self, other)
1514 }
1515
1516 /// Iterate through the u64 limbs of the bitvector.
1517 /// The limbs are encoded starting at the least significant bit (i.e., the first bit in the bit
1518 /// vector is the least significant bit in the first limb).
1519 /// Note that the last limb may be incomplete, if the number of bits in the vector is not
1520 /// divisible by 64.
1521 /// In this case, the least significant `self.len() % 64` bits are the correct bits.
1522 /// The remaining bits of the last limb are in an unspecified state and code should not rely
1523 /// on them being set or unset.
1524 #[inline]
1525 pub fn iter_limbs(&self) -> impl Iterator<Item = u64> + use<'_> {
1526 self.data.iter().copied()
1527 }
1528}
1529
1530impl_vector_iterator! { BitVec, BitVecIter, BitVecRefIter }
1531
1532/// Create a new bit vector from a slice of u64 values.
1533/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1534/// The function will append the bits of each element to the bit vector in the order they are
1535/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1536impl From<&[u64]> for BitVec {
1537 fn from(data: &[u64]) -> Self {
1538 BitVec::from_limbs(data)
1539 }
1540}
1541
1542/// Create a new bit vector from a slice of u64 values.
1543/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1544/// The function will append the bits of each element to the bit vector in the order they are
1545/// given in the slice (i.e. the first element takes bits `0..64` of the vector).
1546impl From<Vec<u64>> for BitVec {
1547 fn from(data: Vec<u64>) -> Self {
1548 BitVec::from_limbs(&data)
1549 }
1550}
1551
1552impl Extend<BitVec> for BitVec {
1553 fn extend<T: IntoIterator<Item = BitVec>>(&mut self, iter: T) {
1554 for v in iter {
1555 self.extend_bitvec(&v)
1556 }
1557 }
1558}
1559
1560impl<'t> Extend<&'t BitVec> for BitVec {
1561 fn extend<T: IntoIterator<Item = &'t BitVec>>(&mut self, iter: T) {
1562 for v in iter {
1563 self.extend_bitvec(v)
1564 }
1565 }
1566}
1567
1568/// Create a new bit vector from u64 values.
1569/// The bits are appended in little-endian order (i.e. the least significant bit is appended first).
1570/// The function will append the bits of each element to the bit vector in the order they are
1571/// given in the iterator (i.e. the first element takes bits `0..64` of the vector).
1572impl FromIterator<u64> for BitVec {
1573 fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> Self {
1574 BitVec::from_limbs_iter(iter)
1575 }
1576}
1577
1578impl PartialEq for BitVec {
1579 // unlike the auto-derived implementation, this custom implementation ignores junk data at
1580 // the end of the bit vector
1581 fn eq(&self, other: &Self) -> bool {
1582 if self.len != other.len {
1583 return false;
1584 }
1585
1586 if self.len == 0 {
1587 return true;
1588 }
1589
1590 for i in 0..self.data.len() - 1 {
1591 if self.data[i] != other.data[i] {
1592 return false;
1593 }
1594 }
1595
1596 // in last limb, ignore junk data
1597 let mask = (1 << (self.len % WORD_SIZE)) - 1;
1598 if self.data[self.data.len() - 1] & mask != other.data[self.data.len() - 1] & mask {
1599 return false;
1600 }
1601
1602 true
1603 }
1604}
1605
1606impl Eq for BitVec {}
1607
1608impl Hash for BitVec {
1609 fn hash<H: Hasher>(&self, state: &mut H) {
1610 state.write_usize(self.len);
1611 if self.len > 0 {
1612 self.data[0..self.data.len() - 1]
1613 .iter()
1614 .for_each(|x| state.write_u64(*x));
1615 let masked_last_limb = self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1);
1616 state.write_u64(masked_last_limb);
1617 }
1618 }
1619}
1620
1621#[cfg(test)]
1622mod tests;