vers_vecs/bit_vec/fast_rs_vec/mod.rs
1//! A fast succinct bit vector implementation with rank and select queries. Rank computes in
2//! constant-time, select on average in constant-time, with a logarithmic worst case.
3
4use std::mem::size_of;
5
6#[cfg(all(
7 feature = "simd",
8 target_arch = "x86_64",
9 target_feature = "avx",
10 target_feature = "avx2",
11 target_feature = "avx512f",
12 target_feature = "avx512bw",
13))]
14pub use bitset::*;
15pub use iter::*;
16
17use crate::util::impl_vector_iterator;
18use crate::BitVec;
19
20use super::WORD_SIZE;
21
22/// Size of a block in the bitvector.
23const BLOCK_SIZE: usize = 512;
24
25/// Size of a super block in the bitvector. Super-blocks exist to decrease the memory overhead
26/// of block descriptors.
27/// Increasing or decreasing the super block size has negligible effect on performance of rank
28/// instruction. This means we want to make the super block size as large as possible, as long as
29/// the zero-counter in normal blocks still fits in a reasonable amount of bits. However, this has
30/// impact on the performance of select queries. The larger the super block size, the deeper will
31/// a binary search be. We found 2^13 to be a good compromise between memory overhead and
32/// performance.
33const SUPER_BLOCK_SIZE: usize = 1 << 13;
34
35/// Size of a select block. The select block is used to speed up select queries. The select block
36/// contains the indices of every `SELECT_BLOCK_SIZE`'th 1-bit and 0-bit in the bitvector.
37/// The smaller this block-size, the faster are select queries, but the more memory is used.
38const SELECT_BLOCK_SIZE: usize = 1 << 13;
39
40/// Meta-data for a block. The `zeros` field stores the number of zeros up to the block,
41/// beginning from the last super-block boundary. This means the first block in a super-block
42/// always stores the number zero, which serves as a sentinel value to avoid special-casing the
43/// first block in a super-block (which would be a performance hit due branch prediction failures).
44#[derive(Clone, Copy, Debug)]
45#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
46struct BlockDescriptor {
47 zeros: u16,
48}
49
50/// Meta-data for a super-block. The `zeros` field stores the number of zeros up to this super-block.
51/// This allows the `BlockDescriptor` to store the number of zeros in a much smaller
52/// space. The `zeros` field is the number of zeros up to the super-block.
53#[derive(Clone, Copy, Debug)]
54#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
55struct SuperBlockDescriptor {
56 zeros: usize,
57}
58
59/// Meta-data for the select query. Each entry i in the select vector contains the indices to find
60/// the i * `SELECT_BLOCK_SIZE`'th 0- and 1-bit in the bitvector. Those indices may be very far apart.
61/// The indices do not point into the bit-vector, but into the super-block vector.
62#[derive(Clone, Debug)]
63#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
64struct SelectSuperBlockDescriptor {
65 index_0: usize,
66 index_1: usize,
67}
68
69/// A bitvector that supports constant-time rank and select queries and is optimized for fast queries.
70/// The bitvector is stored as a vector of `u64`s. The bit-vector stores meta-data for constant-time
71/// rank and select queries, which takes sub-linear additional space. The space overhead is
72/// 28 bits per 512 bits of user data (~5.47%).
73///
74/// # Example
75/// ```rust
76/// use vers_vecs::{BitVec, RsVec};
77///
78/// let mut bit_vec = BitVec::new();
79/// bit_vec.append_word(u64::MAX);
80///
81/// let rs_vec = RsVec::from_bit_vec(bit_vec);
82/// assert_eq!(rs_vec.rank1(64), 64);
83/// assert_eq!(rs_vec.select1(64), 64);
84///```
85#[derive(Clone, Debug)]
86#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
87pub struct RsVec {
88 data: Vec<u64>,
89 len: usize,
90 blocks: Vec<BlockDescriptor>,
91 super_blocks: Vec<SuperBlockDescriptor>,
92 select_blocks: Vec<SelectSuperBlockDescriptor>,
93 pub(crate) rank0: usize,
94 pub(crate) rank1: usize,
95}
96
97impl RsVec {
98 /// Build an [`RsVec`] from a [`BitVec`]. This will consume the [`BitVec`]. Since [`RsVec`]s are
99 /// immutable, this is the only way to construct an [`RsVec`].
100 ///
101 /// # Example
102 /// See the example for [`RsVec`].
103 ///
104 /// [`BitVec`]: ../struct.BitVec.html
105 /// [`RsVec`]: struct.RsVec.html
106 #[must_use]
107 pub fn from_bit_vec(mut vec: BitVec) -> RsVec {
108 // Construct the block descriptor meta data. Each block descriptor contains the number of
109 // zeros in the super-block, up to but excluding the block.
110 let mut blocks = Vec::with_capacity(vec.len() / BLOCK_SIZE + 1);
111 let mut super_blocks = Vec::with_capacity(vec.len() / SUPER_BLOCK_SIZE + 1);
112 let mut select_blocks = Vec::new();
113
114 // sentinel value
115 select_blocks.push(SelectSuperBlockDescriptor {
116 index_0: 0,
117 index_1: 0,
118 });
119
120 let mut total_zeros: usize = 0;
121 let mut current_zeros: usize = 0;
122 let mut last_zero_select_block: usize = 0;
123 let mut last_one_select_block: usize = 0;
124
125 for (idx, &word) in vec.data.iter().enumerate() {
126 // if we moved past a block boundary, append the block information for the previous
127 // block and reset the counter if we moved past a super-block boundary.
128 if idx % (BLOCK_SIZE / WORD_SIZE) == 0 {
129 if idx % (SUPER_BLOCK_SIZE / WORD_SIZE) == 0 {
130 total_zeros += current_zeros;
131 current_zeros = 0;
132 super_blocks.push(SuperBlockDescriptor { zeros: total_zeros });
133 }
134
135 // this cannot overflow because a super block isn't 2^16 bits long
136 #[allow(clippy::cast_possible_truncation)]
137 blocks.push(BlockDescriptor {
138 zeros: current_zeros as u16,
139 });
140 }
141
142 // count the zeros in the current word and add them to the counter
143 // the last word may contain padding zeros, which should not be counted,
144 // but since we do not append the last block descriptor, this is not a problem
145 let mut new_zeros = word.count_zeros() as usize;
146
147 // in the last block, remove remaining zeros of limb that aren't part of the vector
148 if idx == vec.data.len() - 1 && vec.len % WORD_SIZE > 0 {
149 let mask = (1 << (vec.len % WORD_SIZE)) - 1;
150 new_zeros -= (word | mask).count_zeros() as usize;
151 }
152
153 let all_zeros = total_zeros + current_zeros + new_zeros;
154 if all_zeros / SELECT_BLOCK_SIZE > (total_zeros + current_zeros) / SELECT_BLOCK_SIZE {
155 if all_zeros / SELECT_BLOCK_SIZE == select_blocks.len() {
156 select_blocks.push(SelectSuperBlockDescriptor {
157 index_0: super_blocks.len() - 1,
158 index_1: 0,
159 });
160 } else {
161 select_blocks[all_zeros / SELECT_BLOCK_SIZE].index_0 = super_blocks.len() - 1;
162 }
163
164 last_zero_select_block += 1;
165 }
166
167 let total_bits = (idx + 1) * WORD_SIZE;
168 let all_ones = total_bits - all_zeros;
169 if all_ones / SELECT_BLOCK_SIZE
170 > (idx * WORD_SIZE - total_zeros - current_zeros) / SELECT_BLOCK_SIZE
171 {
172 if all_ones / SELECT_BLOCK_SIZE == select_blocks.len() {
173 select_blocks.push(SelectSuperBlockDescriptor {
174 index_0: 0,
175 index_1: super_blocks.len() - 1,
176 });
177 } else {
178 select_blocks[all_ones / SELECT_BLOCK_SIZE].index_1 = super_blocks.len() - 1;
179 }
180
181 last_one_select_block += 1;
182 }
183
184 current_zeros += new_zeros;
185 }
186
187 // insert dummy select blocks at the end that just report the same index like the last real
188 // block, so the bound check for binary search doesn't overflow
189 // this is technically the incorrect value, but since all valid queries will be smaller,
190 // this will only tell select to stay in the current super block, which is correct.
191 // we cannot use a real value here, because this would change the size of the super-block
192 if last_zero_select_block == select_blocks.len() - 1 {
193 select_blocks.push(SelectSuperBlockDescriptor {
194 index_0: select_blocks[last_zero_select_block].index_0,
195 index_1: 0,
196 });
197 } else {
198 debug_assert!(select_blocks[last_zero_select_block + 1].index_0 == 0);
199 select_blocks[last_zero_select_block + 1].index_0 =
200 select_blocks[last_zero_select_block].index_0;
201 }
202 if last_one_select_block == select_blocks.len() - 1 {
203 select_blocks.push(SelectSuperBlockDescriptor {
204 index_0: 0,
205 index_1: select_blocks[last_one_select_block].index_1,
206 });
207 } else {
208 debug_assert!(select_blocks[last_one_select_block + 1].index_1 == 0);
209 select_blocks[last_one_select_block + 1].index_1 =
210 select_blocks[last_one_select_block].index_1;
211 }
212
213 // pad the internal vector to be block-aligned, so SIMD operations don't try to read
214 // past the end of the vector. Note that this does not affect the content of the vector,
215 // because those bits are not considered part of the vector.
216 // Note further, that currently no SIMD implementation exists.
217 while vec.data.len() % (BLOCK_SIZE / WORD_SIZE) != 0 {
218 vec.data.push(0);
219 }
220
221 total_zeros += current_zeros;
222
223 RsVec {
224 data: vec.data,
225 len: vec.len,
226 blocks,
227 super_blocks,
228 select_blocks,
229 rank0: total_zeros,
230 rank1: vec.len - total_zeros,
231 }
232 }
233
234 /// Return the 0-rank of the bit at the given position. The 0-rank is the number of
235 /// 0-bits in the vector up to but excluding the bit at the given position. Calling this
236 /// function with an index larger than the length of the bit-vector will report the total
237 /// number of 0-bits in the bit-vector.
238 ///
239 /// # Parameters
240 /// - `pos`: The position of the bit to return the rank of.
241 #[must_use]
242 pub fn rank0(&self, pos: usize) -> usize {
243 self.rank(true, pos)
244 }
245
246 /// Return the 1-rank of the bit at the given position. The 1-rank is the number of
247 /// 1-bits in the vector up to but excluding the bit at the given position. Calling this
248 /// function with an index larger than the length of the bit-vector will report the total
249 /// number of 1-bits in the bit-vector.
250 ///
251 /// # Parameters
252 /// - `pos`: The position of the bit to return the rank of.
253 #[must_use]
254 pub fn rank1(&self, pos: usize) -> usize {
255 self.rank(false, pos)
256 }
257
258 // I measured 5-10% improvement with this. I don't know why it's not inlined by default, the
259 // branch elimination profits alone should make it worth it.
260 #[allow(clippy::inline_always)]
261 #[inline(always)]
262 fn rank(&self, zero: bool, pos: usize) -> usize {
263 #[allow(clippy::collapsible_else_if)]
264 // readability and more obvious where dead branch elimination happens
265 if zero {
266 if pos >= self.len() {
267 return self.rank0;
268 }
269 } else {
270 if pos >= self.len() {
271 return self.rank1;
272 }
273 }
274
275 let index = pos / WORD_SIZE;
276 let block_index = pos / BLOCK_SIZE;
277 let super_block_index = pos / SUPER_BLOCK_SIZE;
278 let mut rank = 0;
279
280 // at first add the number of zeros/ones before the current super block
281 rank += if zero {
282 self.super_blocks[super_block_index].zeros
283 } else {
284 (super_block_index * SUPER_BLOCK_SIZE) - self.super_blocks[super_block_index].zeros
285 };
286
287 // then add the number of zeros/ones before the current block
288 rank += if zero {
289 self.blocks[block_index].zeros as usize
290 } else {
291 ((block_index % (SUPER_BLOCK_SIZE / BLOCK_SIZE)) * BLOCK_SIZE)
292 - self.blocks[block_index].zeros as usize
293 };
294
295 // naive popcount of blocks
296 for &i in &self.data[(block_index * BLOCK_SIZE) / WORD_SIZE..index] {
297 rank += if zero {
298 i.count_zeros() as usize
299 } else {
300 i.count_ones() as usize
301 };
302 }
303
304 rank += if zero {
305 (!self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as usize
306 } else {
307 (self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as usize
308 };
309
310 rank
311 }
312
313 /// Return the length of the vector, i.e. the number of bits it contains.
314 #[must_use]
315 pub fn len(&self) -> usize {
316 self.len
317 }
318
319 /// Return whether the vector is empty.
320 #[must_use]
321 pub fn is_empty(&self) -> bool {
322 self.len() == 0
323 }
324
325 /// Return the bit at the given position. The bit takes the least significant
326 /// bit of the returned u64 word.
327 /// If the position is larger than the length of the vector, `None` is returned.
328 #[must_use]
329 pub fn get(&self, pos: usize) -> Option<u64> {
330 if pos >= self.len() {
331 None
332 } else {
333 Some(self.get_unchecked(pos))
334 }
335 }
336
337 /// Return the bit at the given position. The bit takes the least significant
338 /// bit of the returned u64 word.
339 ///
340 /// # Panics
341 /// This function may panic if `pos >= self.len()` (alternatively, it may return garbage).
342 #[must_use]
343 pub fn get_unchecked(&self, pos: usize) -> u64 {
344 (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
345 }
346
347 /// Return multiple bits at the given position. The number of bits to return is given by `len`.
348 /// At most 64 bits can be returned.
349 /// If the position at the end of the query is larger than the length of the vector,
350 /// None is returned (even if the query partially overlaps with the vector).
351 /// If the length of the query is larger than 64, None is returned.
352 #[must_use]
353 pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
354 if len > WORD_SIZE {
355 return None;
356 }
357 if pos + len > self.len {
358 None
359 } else {
360 Some(self.get_bits_unchecked(pos, len))
361 }
362 }
363
364 /// Return multiple bits at the given position. The number of bits to return is given by `len`.
365 /// At most 64 bits can be returned.
366 ///
367 /// This function is always inlined, because it gains a lot from loop optimization and
368 /// can utilize the processor pre-fetcher better if it is.
369 ///
370 /// # Errors
371 /// If the length of the query is larger than 64, unpredictable data will be returned.
372 /// Use [`get_bits`] to properly handle this case with an `Option`.
373 ///
374 /// # Panics
375 /// If the position or interval is larger than the length of the vector,
376 /// the function will either return unpredictable data, or panic.
377 ///
378 /// [`get_bits`]: #method.get_bits
379 #[must_use]
380 #[allow(clippy::comparison_chain)] // readability
381 #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen
382 pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
383 debug_assert!(len <= WORD_SIZE);
384 let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
385 if pos % WORD_SIZE + len == WORD_SIZE {
386 partial_word
387 } else if pos % WORD_SIZE + len < WORD_SIZE {
388 partial_word & ((1 << (len % WORD_SIZE)) - 1)
389 } else {
390 (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
391 & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
392 }
393 }
394
395 /// Check if two `RsVec`s are equal. For sparse vectors (either sparsely filled with 1-bits or
396 /// 0-bits), this is faster than comparing the vectors bit by bit.
397 /// Choose the value of `ZERO` depending on which bits are more sparse.
398 ///
399 /// This method is faster than [`full_equals`] for sparse vectors beginning at roughly 1
400 /// million bits. Above 4 million bits, this method becomes faster than full equality in general.
401 ///
402 /// # Parameters
403 /// - `other`: The other `RsVec` to compare to.
404 /// - `ZERO`: Whether to compare the sparse 0-bits (true) or the sparse 1-bits (false).
405 ///
406 /// # Returns
407 /// `true` if the vectors' contents are equal, `false` otherwise.
408 ///
409 /// [`full_equals`]: RsVec::full_equals
410 #[must_use]
411 pub fn sparse_equals<const ZERO: bool>(&self, other: &Self) -> bool {
412 if self.len() != other.len() {
413 return false;
414 }
415
416 if self.rank0 != other.rank0 || self.rank1 != other.rank1 {
417 return false;
418 }
419
420 let iter: SelectIter<ZERO> = self.select_iter();
421
422 for (rank, bit_index) in iter.enumerate() {
423 // since rank is inlined, we get dead code elimination depending on ZERO
424 if (other.get_unchecked(bit_index) == 0) != ZERO || other.rank(ZERO, bit_index) != rank
425 {
426 return false;
427 }
428 }
429
430 true
431 }
432
433 /// Check if two `RsVec`s are equal. This compares limb by limb. This is usually faster than a
434 /// [`sparse_equals`] call for small vectors.
435 ///
436 /// # Parameters
437 /// - `other`: The other `RsVec` to compare to.
438 ///
439 /// # Returns
440 /// `true` if the vectors' contents are equal, `false` otherwise.
441 ///
442 /// [`sparse_equals`]: RsVec::sparse_equals
443 #[must_use]
444 pub fn full_equals(&self, other: &Self) -> bool {
445 if self.len() != other.len() {
446 return false;
447 }
448
449 if self.rank0 != other.rank0 || self.rank1 != other.rank1 {
450 return false;
451 }
452
453 if self.data[..self.len / 64]
454 .iter()
455 .zip(other.data[..other.len / 64].iter())
456 .any(|(a, b)| a != b)
457 {
458 return false;
459 }
460
461 // if last incomplete block exists, test it without junk data
462 if self.len % 64 > 0
463 && self.data[self.len / 64] & ((1 << (self.len % 64)) - 1)
464 != other.data[self.len / 64] & ((1 << (other.len % 64)) - 1)
465 {
466 return false;
467 }
468
469 true
470 }
471
472 /// Returns the number of bytes used on the heap for this vector. This does not include
473 /// allocated space that is not used (e.g. by the allocation behavior of `Vec`).
474 #[must_use]
475 pub fn heap_size(&self) -> usize {
476 self.data.len() * size_of::<u64>()
477 + self.blocks.len() * size_of::<BlockDescriptor>()
478 + self.super_blocks.len() * size_of::<SuperBlockDescriptor>()
479 + self.select_blocks.len() * size_of::<SelectSuperBlockDescriptor>()
480 }
481}
482
483impl_vector_iterator! { RsVec, RsVecIter, RsVecRefIter }
484
485impl PartialEq for RsVec {
486 /// Check if two `RsVec`s are equal. This method calls [`sparse_equals`] if the vector has more
487 /// than 4'000'000 bits, and [`full_equals`] otherwise.
488 ///
489 /// This was determined with benchmarks on an `x86_64` machine,
490 /// on which [`sparse_equals`] outperforms [`full_equals`] consistently above this threshold.
491 ///
492 /// # Parameters
493 /// - `other`: The other `RsVec` to compare to.
494 ///
495 /// # Returns
496 /// `true` if the vectors' contents are equal, `false` otherwise.
497 ///
498 /// [`sparse_equals`]: RsVec::sparse_equals
499 /// [`full_equals`]: RsVec::full_equals
500 fn eq(&self, other: &Self) -> bool {
501 if self.len > 4_000_000 {
502 if self.rank1 > self.rank0 {
503 self.sparse_equals::<true>(other)
504 } else {
505 self.sparse_equals::<false>(other)
506 }
507 } else {
508 self.full_equals(other)
509 }
510 }
511}
512
513impl From<BitVec> for RsVec {
514 /// Build an [`RsVec`] from a [`BitVec`]. This will consume the [`BitVec`]. Since [`RsVec`]s are
515 /// immutable, this is the only way to construct an [`RsVec`].
516 ///
517 /// # Example
518 /// See the example for [`RsVec`].
519 ///
520 /// [`BitVec`]: BitVec
521 /// [`RsVec`]: RsVec
522 fn from(vec: BitVec) -> Self {
523 RsVec::from_bit_vec(vec)
524 }
525}
526
527// iter code in here to keep it more organized
528mod iter;
529// select code in here to keep it more organized
530mod select;
531
532#[cfg(all(
533 feature = "simd",
534 target_arch = "x86_64",
535 target_feature = "avx",
536 target_feature = "avx2",
537 target_feature = "avx512f",
538 target_feature = "avx512bw",
539))]
540mod bitset;
541
542#[cfg(test)]
543mod tests;