vers_vecs/bit_vec/fast_rs_vec/
mod.rs

1//! A fast succinct bit vector implementation with rank and select queries. Rank computes in
2//! constant-time, select on average in constant-time, with a logarithmic worst case.
3
4use std::mem::size_of;
5
6#[cfg(all(
7    feature = "simd",
8    target_arch = "x86_64",
9    target_feature = "avx",
10    target_feature = "avx2",
11    target_feature = "avx512f",
12    target_feature = "avx512bw",
13))]
14pub use bitset::*;
15pub use iter::*;
16
17use crate::util::impl_vector_iterator;
18use crate::BitVec;
19
20use super::WORD_SIZE;
21
22/// Size of a block in the bitvector.
23const BLOCK_SIZE: usize = 512;
24
25/// Size of a super block in the bitvector. Super-blocks exist to decrease the memory overhead
26/// of block descriptors.
27/// Increasing or decreasing the super block size has negligible effect on performance of rank
28/// instruction. This means we want to make the super block size as large as possible, as long as
29/// the zero-counter in normal blocks still fits in a reasonable amount of bits. However, this has
30/// impact on the performance of select queries. The larger the super block size, the deeper will
31/// a binary search be. We found 2^13 to be a good compromise between memory overhead and
32/// performance.
33const SUPER_BLOCK_SIZE: usize = 1 << 13;
34
35/// Size of a select block. The select block is used to speed up select queries. The select block
36/// contains the indices of every `SELECT_BLOCK_SIZE`'th 1-bit and 0-bit in the bitvector.
37/// The smaller this block-size, the faster are select queries, but the more memory is used.
38const SELECT_BLOCK_SIZE: usize = 1 << 13;
39
40/// Meta-data for a block. The `zeros` field stores the number of zeros up to the block,
41/// beginning from the last super-block boundary. This means the first block in a super-block
42/// always stores the number zero, which serves as a sentinel value to avoid special-casing the
43/// first block in a super-block (which would be a performance hit due branch prediction failures).
44#[derive(Clone, Copy, Debug)]
45#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
46struct BlockDescriptor {
47    zeros: u16,
48}
49
50/// Meta-data for a super-block. The `zeros` field stores the number of zeros up to this super-block.
51/// This allows the `BlockDescriptor` to store the number of zeros in a much smaller
52/// space. The `zeros` field is the number of zeros up to the super-block.
53#[derive(Clone, Copy, Debug)]
54#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
55struct SuperBlockDescriptor {
56    zeros: usize,
57}
58
59/// Meta-data for the select query. Each entry i in the select vector contains the indices to find
60/// the i * `SELECT_BLOCK_SIZE`'th 0- and 1-bit in the bitvector. Those indices may be very far apart.
61/// The indices do not point into the bit-vector, but into the super-block vector.
62#[derive(Clone, Debug)]
63#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
64struct SelectSuperBlockDescriptor {
65    index_0: usize,
66    index_1: usize,
67}
68
69/// A bitvector that supports constant-time rank and select queries and is optimized for fast queries.
70/// The bitvector is stored as a vector of `u64`s. The bit-vector stores meta-data for constant-time
71/// rank and select queries, which takes sub-linear additional space. The space overhead is
72/// 28 bits per 512 bits of user data (~5.47%).
73///
74/// # Example
75/// ```rust
76/// use vers_vecs::{BitVec, RsVec};
77///
78/// let mut bit_vec = BitVec::new();
79/// bit_vec.append_word(u64::MAX);
80///
81/// let rs_vec = RsVec::from_bit_vec(bit_vec);
82/// assert_eq!(rs_vec.rank1(64), 64);
83/// assert_eq!(rs_vec.select1(64), 64);
84///```
85#[derive(Clone, Debug)]
86#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
87pub struct RsVec {
88    data: Vec<u64>,
89    len: usize,
90    blocks: Vec<BlockDescriptor>,
91    super_blocks: Vec<SuperBlockDescriptor>,
92    select_blocks: Vec<SelectSuperBlockDescriptor>,
93    pub(crate) rank0: usize,
94    pub(crate) rank1: usize,
95}
96
97impl RsVec {
98    /// Build an `RsVec` from a [`BitVec`]. This will consume the `BitVec`. Since `RsVec`s are
99    /// immutable, this is the only way to construct an `RsVec`.
100    ///
101    /// # Example
102    /// See the example for `RsVec`.
103    ///
104    /// [`BitVec`]: BitVec
105    #[must_use]
106    pub fn from_bit_vec(vec: BitVec) -> RsVec {
107        // Construct the block descriptor meta data. Each block descriptor contains the number of
108        // zeros in the super-block, up to but excluding the block.
109        let mut blocks = Vec::with_capacity(vec.len() / BLOCK_SIZE + 1);
110        let mut super_blocks = Vec::with_capacity(vec.len() / SUPER_BLOCK_SIZE + 1);
111        let mut select_blocks = Vec::new();
112
113        // sentinel value
114        select_blocks.push(SelectSuperBlockDescriptor {
115            index_0: 0,
116            index_1: 0,
117        });
118
119        let mut total_zeros: usize = 0;
120        let mut current_zeros: usize = 0;
121        let mut last_zero_select_block: usize = 0;
122        let mut last_one_select_block: usize = 0;
123
124        for (idx, &word) in vec.data.iter().enumerate() {
125            // if we moved past a block boundary, append the block information for the previous
126            // block and reset the counter if we moved past a super-block boundary.
127            if idx % (BLOCK_SIZE / WORD_SIZE) == 0 {
128                if idx % (SUPER_BLOCK_SIZE / WORD_SIZE) == 0 {
129                    total_zeros += current_zeros;
130                    current_zeros = 0;
131                    super_blocks.push(SuperBlockDescriptor { zeros: total_zeros });
132                }
133
134                // this cannot overflow because a super block isn't 2^16 bits long
135                #[allow(clippy::cast_possible_truncation)]
136                blocks.push(BlockDescriptor {
137                    zeros: current_zeros as u16,
138                });
139            }
140
141            // count the zeros in the current word and add them to the counter
142            // the last word may contain padding zeros, which should not be counted,
143            // but since we do not append the last block descriptor, this is not a problem
144            let mut new_zeros = word.count_zeros() as usize;
145
146            // in the last block, remove remaining zeros of limb that aren't part of the vector
147            if idx == vec.data.len() - 1 && !vec.len.is_multiple_of(WORD_SIZE) {
148                let mask = (1 << (vec.len % WORD_SIZE)) - 1;
149                new_zeros -= (word | mask).count_zeros() as usize;
150            }
151
152            let all_zeros = total_zeros + current_zeros + new_zeros;
153            if all_zeros / SELECT_BLOCK_SIZE > (total_zeros + current_zeros) / SELECT_BLOCK_SIZE {
154                if all_zeros / SELECT_BLOCK_SIZE == select_blocks.len() {
155                    select_blocks.push(SelectSuperBlockDescriptor {
156                        index_0: super_blocks.len() - 1,
157                        index_1: 0,
158                    });
159                } else {
160                    select_blocks[all_zeros / SELECT_BLOCK_SIZE].index_0 = super_blocks.len() - 1;
161                }
162
163                last_zero_select_block += 1;
164            }
165
166            let total_bits = (idx + 1) * WORD_SIZE;
167            let all_ones = total_bits - all_zeros;
168            if all_ones / SELECT_BLOCK_SIZE
169                > (idx * WORD_SIZE - total_zeros - current_zeros) / SELECT_BLOCK_SIZE
170            {
171                if all_ones / SELECT_BLOCK_SIZE == select_blocks.len() {
172                    select_blocks.push(SelectSuperBlockDescriptor {
173                        index_0: 0,
174                        index_1: super_blocks.len() - 1,
175                    });
176                } else {
177                    select_blocks[all_ones / SELECT_BLOCK_SIZE].index_1 = super_blocks.len() - 1;
178                }
179
180                last_one_select_block += 1;
181            }
182
183            current_zeros += new_zeros;
184        }
185
186        // insert dummy select blocks at the end that just report the same index like the last real
187        // block, so the bound check for binary search doesn't overflow
188        // this is technically the incorrect value, but since all valid queries will be smaller,
189        // this will only tell select to stay in the current super block, which is correct.
190        // we cannot use a real value here, because this would change the size of the super-block
191        if last_zero_select_block == select_blocks.len() - 1 {
192            select_blocks.push(SelectSuperBlockDescriptor {
193                index_0: select_blocks[last_zero_select_block].index_0,
194                index_1: 0,
195            });
196        } else {
197            debug_assert!(select_blocks[last_zero_select_block + 1].index_0 == 0);
198            select_blocks[last_zero_select_block + 1].index_0 =
199                select_blocks[last_zero_select_block].index_0;
200        }
201        if last_one_select_block == select_blocks.len() - 1 {
202            select_blocks.push(SelectSuperBlockDescriptor {
203                index_0: 0,
204                index_1: select_blocks[last_one_select_block].index_1,
205            });
206        } else {
207            debug_assert!(select_blocks[last_one_select_block + 1].index_1 == 0);
208            select_blocks[last_one_select_block + 1].index_1 =
209                select_blocks[last_one_select_block].index_1;
210        }
211
212        total_zeros += current_zeros;
213
214        RsVec {
215            data: vec.data,
216            len: vec.len,
217            blocks,
218            super_blocks,
219            select_blocks,
220            rank0: total_zeros,
221            rank1: vec.len - total_zeros,
222        }
223    }
224
225    /// Return the 0-rank of the bit at the given position. The 0-rank is the number of
226    /// 0-bits in the vector up to but excluding the bit at the given position. Calling this
227    /// function with an index larger than the length of the bit-vector will report the total
228    /// number of 0-bits in the bit-vector.
229    ///
230    /// # Parameters
231    /// - `pos`: The position of the bit to return the rank of.
232    #[must_use]
233    pub fn rank0(&self, pos: usize) -> usize {
234        self.rank(true, pos)
235    }
236
237    /// Return the 1-rank of the bit at the given position. The 1-rank is the number of
238    /// 1-bits in the vector up to but excluding the bit at the given position. Calling this
239    /// function with an index larger than the length of the bit-vector will report the total
240    /// number of 1-bits in the bit-vector.
241    ///
242    /// # Parameters
243    /// - `pos`: The position of the bit to return the rank of.
244    #[must_use]
245    pub fn rank1(&self, pos: usize) -> usize {
246        self.rank(false, pos)
247    }
248
249    // I measured 5-10% improvement with this. I don't know why it's not inlined by default, the
250    // branch elimination profits alone should make it worth it.
251    #[allow(clippy::inline_always)]
252    #[inline(always)]
253    fn rank(&self, zero: bool, pos: usize) -> usize {
254        #[allow(clippy::collapsible_else_if)]
255        // readability and more obvious where dead branch elimination happens
256        if zero {
257            if pos >= self.len() {
258                return self.rank0;
259            }
260        } else {
261            if pos >= self.len() {
262                return self.rank1;
263            }
264        }
265
266        let index = pos / WORD_SIZE;
267        let block_index = pos / BLOCK_SIZE;
268        let super_block_index = pos / SUPER_BLOCK_SIZE;
269        let mut rank = 0;
270
271        // at first add the number of zeros/ones before the current super block
272        rank += if zero {
273            self.super_blocks[super_block_index].zeros
274        } else {
275            (super_block_index * SUPER_BLOCK_SIZE) - self.super_blocks[super_block_index].zeros
276        };
277
278        // then add the number of zeros/ones before the current block
279        rank += if zero {
280            self.blocks[block_index].zeros as usize
281        } else {
282            ((block_index % (SUPER_BLOCK_SIZE / BLOCK_SIZE)) * BLOCK_SIZE)
283                - self.blocks[block_index].zeros as usize
284        };
285
286        // naive popcount of blocks
287        for &i in &self.data[(block_index * BLOCK_SIZE) / WORD_SIZE..index] {
288            rank += if zero {
289                i.count_zeros() as usize
290            } else {
291                i.count_ones() as usize
292            };
293        }
294
295        rank += if zero {
296            (!self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as usize
297        } else {
298            (self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as usize
299        };
300
301        rank
302    }
303
304    /// Return the length of the vector, i.e. the number of bits it contains.
305    #[must_use]
306    pub fn len(&self) -> usize {
307        self.len
308    }
309
310    /// Return whether the vector is empty.
311    #[must_use]
312    pub fn is_empty(&self) -> bool {
313        self.len() == 0
314    }
315
316    /// Return the bit at the given position. The bit takes the least significant
317    /// bit of the returned u64 word.
318    /// If the position is larger than the length of the vector, `None` is returned.
319    #[must_use]
320    pub fn get(&self, pos: usize) -> Option<u64> {
321        if pos >= self.len() {
322            None
323        } else {
324            Some(self.get_unchecked(pos))
325        }
326    }
327
328    /// Return the bit at the given position. The bit takes the least significant
329    /// bit of the returned u64 word.
330    ///
331    /// # Panics
332    /// This function may panic if `pos >= self.len()` (alternatively, it may return garbage).
333    #[must_use]
334    pub fn get_unchecked(&self, pos: usize) -> u64 {
335        (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
336    }
337
338    /// Return multiple bits at the given position. The number of bits to return is given by `len`.
339    /// At most 64 bits can be returned.
340    /// If the position at the end of the query is larger than the length of the vector,
341    /// None is returned (even if the query partially overlaps with the vector).
342    /// If the length of the query is larger than 64, None is returned.
343    #[must_use]
344    pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
345        if len > WORD_SIZE {
346            return None;
347        }
348        if pos + len > self.len {
349            None
350        } else {
351            Some(self.get_bits_unchecked(pos, len))
352        }
353    }
354
355    /// Return multiple bits at the given position. The number of bits to return is given by `len`.
356    /// At most 64 bits can be returned.
357    ///
358    /// This function is always inlined, because it gains a lot from loop optimization and
359    /// can utilize the processor pre-fetcher better if it is.
360    ///
361    /// # Errors
362    /// If the length of the query is larger than 64, unpredictable data will be returned.
363    /// Use [`get_bits`] to properly handle this case with an `Option`.
364    ///
365    /// # Panics
366    /// If the position or interval is larger than the length of the vector,
367    /// the function will either return unpredictable data, or panic.
368    ///
369    /// [`get_bits`]: #method.get_bits
370    #[must_use]
371    #[allow(clippy::comparison_chain)] // readability
372    #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen
373    pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
374        debug_assert!(len <= WORD_SIZE);
375        let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
376        if pos % WORD_SIZE + len <= WORD_SIZE {
377            partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
378        } else {
379            (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
380                & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
381        }
382    }
383
384    /// Convert the `RsVec` into a [`BitVec`].
385    /// This consumes the `RsVec`, and discards all meta-data.
386    /// Since [`RsVec`]s are innately immutable, this conversion is the only way to modify the
387    /// underlying data.
388    ///
389    /// # Example
390    /// ```rust
391    /// use vers_vecs::{BitVec, RsVec};
392    ///
393    /// let mut bit_vec = BitVec::new();
394    /// bit_vec.append_word(u64::MAX);
395    ///
396    /// let rs_vec = RsVec::from_bit_vec(bit_vec);
397    /// assert_eq!(rs_vec.rank1(64), 64);
398    ///
399    /// let mut bit_vec = rs_vec.into_bit_vec();
400    /// bit_vec.flip_bit(32);
401    /// let rs_vec = RsVec::from_bit_vec(bit_vec);
402    /// assert_eq!(rs_vec.rank1(64), 63);
403    /// assert_eq!(rs_vec.select0(0), 32);
404    /// ```
405    #[must_use]
406    pub fn into_bit_vec(self) -> BitVec {
407        BitVec {
408            data: self.data,
409            len: self.len,
410        }
411    }
412
413    /// Check if two `RsVec`s are equal. For sparse vectors (either sparsely filled with 1-bits or
414    /// 0-bits), this is faster than comparing the vectors bit by bit.
415    /// Choose the value of `ZERO` depending on which bits are more sparse.
416    ///
417    /// This method is faster than [`full_equals`] for sparse vectors beginning at roughly 1
418    /// million bits. Above 4 million bits, this method becomes faster than full equality in general.
419    ///
420    /// # Parameters
421    /// - `other`: The other `RsVec` to compare to.
422    /// - `ZERO`: Whether to compare the sparse 0-bits (true) or the sparse 1-bits (false).
423    ///
424    /// # Returns
425    /// `true` if the vectors' contents are equal, `false` otherwise.
426    ///
427    /// [`full_equals`]: RsVec::full_equals
428    #[must_use]
429    pub fn sparse_equals<const ZERO: bool>(&self, other: &Self) -> bool {
430        if self.len() != other.len() {
431            return false;
432        }
433
434        if self.rank0 != other.rank0 || self.rank1 != other.rank1 {
435            return false;
436        }
437
438        let iter: SelectIter<ZERO> = self.select_iter();
439
440        for (rank, bit_index) in iter.enumerate() {
441            // since rank is inlined, we get dead code elimination depending on ZERO
442            if (other.get_unchecked(bit_index) == 0) != ZERO || other.rank(ZERO, bit_index) != rank
443            {
444                return false;
445            }
446        }
447
448        true
449    }
450
451    /// Check if two `RsVec`s are equal. This compares limb by limb. This is usually faster than a
452    /// [`sparse_equals`] call for small vectors.
453    ///
454    /// # Parameters
455    /// - `other`: The other `RsVec` to compare to.
456    ///
457    /// # Returns
458    /// `true` if the vectors' contents are equal, `false` otherwise.
459    ///
460    /// [`sparse_equals`]: RsVec::sparse_equals
461    #[must_use]
462    pub fn full_equals(&self, other: &Self) -> bool {
463        if self.len() != other.len() {
464            return false;
465        }
466
467        if self.rank0 != other.rank0 || self.rank1 != other.rank1 {
468            return false;
469        }
470
471        if self.data[..self.len / 64]
472            .iter()
473            .zip(other.data[..other.len / 64].iter())
474            .any(|(a, b)| a != b)
475        {
476            return false;
477        }
478
479        // if last incomplete block exists, test it without junk data
480        if !self.len.is_multiple_of(WORD_SIZE)
481            && self.data[self.len / WORD_SIZE] & ((1 << (self.len % WORD_SIZE)) - 1)
482                != other.data[self.len / WORD_SIZE] & ((1 << (other.len % WORD_SIZE)) - 1)
483        {
484            return false;
485        }
486
487        true
488    }
489
490    /// Returns the number of bytes used on the heap for this vector. This does not include
491    /// allocated space that is not used (e.g. by the allocation behavior of `Vec`).
492    #[must_use]
493    pub fn heap_size(&self) -> usize {
494        self.data.len() * size_of::<u64>()
495            + self.blocks.len() * size_of::<BlockDescriptor>()
496            + self.super_blocks.len() * size_of::<SuperBlockDescriptor>()
497            + self.select_blocks.len() * size_of::<SelectSuperBlockDescriptor>()
498    }
499}
500
501impl_vector_iterator! { RsVec, RsVecIter, RsVecRefIter }
502
503impl PartialEq for RsVec {
504    /// Check if two `RsVec`s are equal. This method calls [`sparse_equals`] if the vector has more
505    /// than 4'000'000 bits, and [`full_equals`] otherwise.
506    ///
507    /// This was determined with benchmarks on an `x86_64` machine,
508    /// on which [`sparse_equals`] outperforms [`full_equals`] consistently above this threshold.
509    ///
510    /// # Parameters
511    /// - `other`: The other `RsVec` to compare to.
512    ///
513    /// # Returns
514    /// `true` if the vectors' contents are equal, `false` otherwise.
515    ///
516    /// [`sparse_equals`]: RsVec::sparse_equals
517    /// [`full_equals`]: RsVec::full_equals
518    fn eq(&self, other: &Self) -> bool {
519        if self.len > 4_000_000 {
520            if self.rank1 > self.rank0 {
521                self.sparse_equals::<true>(other)
522            } else {
523                self.sparse_equals::<false>(other)
524            }
525        } else {
526            self.full_equals(other)
527        }
528    }
529}
530
531impl From<BitVec> for RsVec {
532    /// Build an [`RsVec`] from a [`BitVec`]. This will consume the [`BitVec`]. Since [`RsVec`]s are
533    /// immutable, this is the only way to construct an [`RsVec`].
534    ///
535    /// # Example
536    /// See the example for [`RsVec`].
537    ///
538    /// [`BitVec`]: BitVec
539    /// [`RsVec`]: RsVec
540    fn from(vec: BitVec) -> Self {
541        RsVec::from_bit_vec(vec)
542    }
543}
544
545impl From<RsVec> for BitVec {
546    fn from(value: RsVec) -> Self {
547        value.into_bit_vec()
548    }
549}
550
551// iter code in here to keep it more organized
552mod iter;
553// select code in here to keep it more organized
554mod select;
555
556#[cfg(all(
557    feature = "simd",
558    target_arch = "x86_64",
559    target_feature = "avx",
560    target_feature = "avx2",
561    target_feature = "avx512f",
562    target_feature = "avx512bw",
563))]
564mod bitset;
565
566#[cfg(test)]
567mod tests;