vers_vecs/bit_vec/
sparse.rs

1//! A sparse bit vector with `rank1`, `rank0` and `select1` support.
2//! The vector requires `O(n log u/n) + 2n + o(n)` bits of space, where `n` is the number of bits in the vector
3//! and `u` is the number of 1-bits.
4//! The vector is constructed from a sorted list of indices of 1-bits, or from an existing
5//! [`BitVec`].
6
7use crate::{BitVec, EliasFanoVec};
8
9/// A succinct representation of a sparse vector with rank and select support.
10/// It is a thin wrapper around an [`EliasFanoVec`] that compresses the indices of 1-bits.
11///
12/// Therefore, no `select0` function is provided.
13/// However, the constructor [`from_bitvec_inverted`] can be used to cheaply invert the input `BitVec`,
14/// reversing the roles of 1-bits and 0-bits.
15///
16/// # Examples
17/// ```
18/// use vers_vecs::SparseRSVec;
19///
20/// let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
21/// assert_eq!(sparse.get(5), Some(1));
22/// assert_eq!(sparse.get(11), Some(0));
23/// assert_eq!(sparse.get(12), None);
24///
25/// assert_eq!(sparse.rank1(5), 2);
26/// assert_eq!(sparse.select1(2), 5);
27/// ```
28///
29/// It cn also be constructed from a `BitVec` directly:
30/// ```
31/// use vers_vecs::SparseRSVec;
32/// use vers_vecs::BitVec;
33///
34/// let mut bv = BitVec::from_zeros(12);
35/// bv.flip_bit(6);
36/// bv.flip_bit(7);
37///
38/// let sparse = SparseRSVec::from_bitvec(&bv);
39/// assert_eq!(sparse.rank1(5), 0);
40/// assert_eq!(sparse.select1(0), 6);
41/// ```
42///
43/// [`EliasFanoVec`]: struct.EliasFanoVec.html
44/// [`from_bitvec_inverted`]: #method.from_bitvec_inverted
45#[derive(Debug, Clone)]
46#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
47pub struct SparseRSVec {
48    vec: EliasFanoVec,
49    len: u64,
50}
51
52impl SparseRSVec {
53    /// Creates a new `SparseRSVec` from a sequence of set bits represented as indices.
54    /// The input must be sorted in ascending order and free of duplicates.
55    ///
56    /// The length of the vector must be passed as well, as it cannot be inferred from the input,
57    /// if the last bit in the vector is not set.
58    ///
59    /// # Parameters
60    /// - `input`: The positions of set bits, or unset bits if the sparse vector should compress
61    ///   zeros.
62    /// - `len`: The length of the vector, which is needed if the last bit is not in the input slice.
63    #[must_use]
64    pub fn new(input: &[u64], len: u64) -> Self {
65        debug_assert!(input.is_sorted(), "input must be sorted");
66        debug_assert!(
67            input.windows(2).all(|w| w[0] != w[1]),
68            "input must be free of duplicates"
69        );
70
71        Self {
72            vec: EliasFanoVec::from_slice(input),
73            len,
74        }
75    }
76
77    /// Creates a new `SparseRSVec` from a `BitVec`, by compressing the sparse 1-bits.
78    ///
79    /// # Parameters
80    /// - `input`: The input `BitVec` to compress.
81    #[must_use]
82    pub fn from_bitvec(input: &BitVec) -> Self {
83        let len = input.len() as u64;
84        Self::new(
85            input
86                .iter()
87                .enumerate()
88                .filter(|&(_, bit)| bit == 1)
89                .map(|(i, _)| i as u64)
90                .collect::<Vec<_>>()
91                .as_slice(),
92            len,
93        )
94    }
95
96    /// Creates a new `SparseRSVec` from a `BitVec`.
97    /// However, before compressing the 1-bits, the input is inverted.
98    /// This means that the sparse vector will compress the 0-bits instead of the 1-bits,
99    /// and the [`rank1`] and [`select1`] functions will return the number of 0-bits and the position of 0-bits.
100    ///
101    /// This is a convenience function to allow for easy creation of sparse vectors that compress
102    /// zeros, despite the lack of a `select0` function.
103    ///
104    /// However, do note that [`get`] will return the inverted value of the bit at position `i` from
105    /// the original `BitVec`.
106    ///
107    /// # Parameters
108    /// - `input`: The input `BitVec` to compress.
109    ///
110    /// # Example
111    /// ```
112    /// use vers_vecs::SparseRSVec;
113    /// use vers_vecs::BitVec;
114    ///
115    /// let mut bv = BitVec::from_ones(12);
116    /// // set 6 and 7 to 0
117    /// bv.flip_bit(6);
118    /// bv.flip_bit(7);
119    ///
120    /// let sparse = SparseRSVec::from_bitvec_inverted(&bv);
121    /// // now select1 gives the position of 0-bits
122    /// assert_eq!(sparse.select1(1), 7);
123    /// ```
124    ///
125    /// [`rank1`]: #method.rank1
126    /// [`select1`]: #method.select1
127    /// [`get`]: #method.get
128    #[must_use]
129    pub fn from_bitvec_inverted(input: &BitVec) -> Self {
130        let len = input.len() as u64;
131        Self::new(
132            input
133                .iter()
134                .enumerate()
135                .filter(|&(_, bit)| bit == 0)
136                .map(|(i, _)| i as u64)
137                .collect::<Vec<_>>()
138                .as_slice(),
139            len,
140        )
141    }
142
143    /// Returns true if the bit at position `i` is set.
144    ///
145    /// If `i` is out of bounds the function produces incorrect results.
146    /// Use [`is_set`] for a checked version.
147    ///
148    /// [`is_set`]: #method.is_set
149    #[must_use]
150    pub fn is_set_unchecked(&self, i: u64) -> bool {
151        self.vec.predecessor_unchecked(i) == i
152    }
153
154    /// Returns true if the bit at position `i` is set.
155    ///
156    /// Returns `None` if `i` is out of bounds.
157    #[must_use]
158    pub fn is_set(&self, i: u64) -> Option<bool> {
159        if i >= self.len {
160            None
161        } else {
162            // if the predecessor is None, the bit is left of the first 1-bit
163            Some(self.vec.predecessor(i).is_some_and(|p| p == i))
164        }
165    }
166
167    /// Gets the bit at position `i`.
168    /// Returns 1 if the bit is set, 0 if it is not set.
169    ///
170    /// # Panics
171    /// If `i` is out of bounds the function might panic or produce incorrect results.
172    /// Use [`get`] for a checked version.
173    ///
174    /// [`get`]: Self::get
175    #[must_use]
176    pub fn get_unchecked(&self, i: u64) -> u64 {
177        self.is_set_unchecked(i).into()
178    }
179
180    /// Gets the bit at position `i`.
181    /// Returns `Some(1)` if the bit is set, `Some(0)` if it is not set, and `None` if `i` is out of bounds.
182    #[must_use]
183    pub fn get(&self, i: u64) -> Option<u64> {
184        self.is_set(i).map(std::convert::Into::into)
185    }
186
187    /// Return the position of the 1-bit with the given rank.
188    /// The following holds for all `pos` with 1-bits:
189    /// ``select1(rank1(pos)) == pos``
190    ///
191    /// If the rank is larger than the number of sparse bits in the vector, the vector length is returned.
192    #[must_use]
193    pub fn select1(&self, i: usize) -> u64 {
194        self.vec.get(i).unwrap_or(self.len)
195    }
196
197    /// Returns the number of 1-bits in the vector up to position `i`.
198    ///
199    /// If `i` is out of bounds, the number of 1-bits in the vector is returned.
200    #[must_use]
201    pub fn rank1(&self, i: u64) -> u64 {
202        self.vec.rank(i)
203    }
204
205    /// Returns the number of 0-bits in the vector up to position `i`.
206    ///
207    /// If `i` is out of bounds, the number of 0-bits in the vector is returned.
208    #[must_use]
209    pub fn rank0(&self, i: u64) -> u64 {
210        if i >= self.len {
211            self.len - self.vec.rank(self.len)
212        } else {
213            i - self.vec.rank(i)
214        }
215    }
216
217    /// Returns an iterator over the 1-bits in the vector.
218    /// The iterator yields the positions of the 1-bits in ascending order.
219    pub fn iter1(&self) -> impl Iterator<Item = u64> + '_ {
220        self.vec.iter()
221    }
222
223    /// Returns the length of the bit vector if it was uncompressed.
224    #[must_use]
225    pub fn len(&self) -> u64 {
226        self.len
227    }
228
229    /// Returns true if the vector is empty.
230    #[must_use]
231    pub fn is_empty(&self) -> bool {
232        self.len == 0
233    }
234
235    /// Returns the number of bytes used by the vector on the heap.
236    ///  Does not include allocated memory that isn't used.
237    #[must_use]
238    pub fn heap_size(&self) -> usize {
239        self.vec.heap_size()
240    }
241}
242
243impl From<BitVec> for SparseRSVec {
244    fn from(input: BitVec) -> Self {
245        Self::from_bitvec_inverted(&input)
246    }
247}
248
249impl<'a> From<&'a BitVec> for SparseRSVec {
250    fn from(input: &'a BitVec) -> Self {
251        Self::from_bitvec_inverted(input)
252    }
253}
254
255#[cfg(test)]
256mod tests {
257    use super::SparseRSVec;
258    use crate::BitVec;
259    use rand::prelude::StdRng;
260    use rand::{Rng, SeedableRng};
261
262    #[test]
263    fn test_sparse_rank() {
264        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
265        assert_eq!(sparse.rank1(0), 0);
266        assert_eq!(sparse.rank1(1), 0);
267        assert_eq!(sparse.rank1(2), 1);
268        assert_eq!(sparse.rank1(3), 1);
269        assert_eq!(sparse.rank1(4), 2);
270        assert_eq!(sparse.rank1(5), 2);
271        assert_eq!(sparse.rank1(6), 3);
272        assert_eq!(sparse.rank1(7), 3);
273        assert_eq!(sparse.rank1(8), 4);
274        assert_eq!(sparse.rank1(9), 4);
275        assert_eq!(sparse.rank1(10), 5);
276        assert_eq!(sparse.rank1(11), 5);
277        assert_eq!(sparse.rank1(12), 5);
278        assert_eq!(sparse.rank1(999), 5);
279    }
280
281    #[test]
282    fn test_sparse_select() {
283        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
284        assert_eq!(sparse.select1(0), 1);
285        assert_eq!(sparse.select1(1), 3);
286        assert_eq!(sparse.select1(2), 5);
287        assert_eq!(sparse.select1(3), 7);
288        assert_eq!(sparse.select1(4), 9);
289        assert_eq!(sparse.select1(5), 12);
290        assert_eq!(sparse.select1(6), 12);
291    }
292
293    #[test]
294    fn test_sparse_rank0() {
295        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
296        assert_eq!(sparse.rank0(0), 0);
297        assert_eq!(sparse.rank0(1), 1);
298        assert_eq!(sparse.rank0(2), 1);
299        assert_eq!(sparse.rank0(3), 2);
300        assert_eq!(sparse.rank0(4), 2);
301        assert_eq!(sparse.rank0(5), 3);
302        assert_eq!(sparse.rank0(6), 3);
303        assert_eq!(sparse.rank0(7), 4);
304        assert_eq!(sparse.rank0(8), 4);
305        assert_eq!(sparse.rank0(9), 5);
306        assert_eq!(sparse.rank0(10), 5);
307        assert_eq!(sparse.rank0(11), 6);
308        assert_eq!(sparse.rank0(12), 7);
309        assert_eq!(sparse.rank0(999), 7);
310    }
311
312    #[test]
313    fn test_empty_sparse() {
314        let sparse = SparseRSVec::new(&[], 0);
315        assert_eq!(sparse.rank1(0), 0);
316        assert_eq!(sparse.rank1(1), 0);
317        assert_eq!(sparse.rank1(999), 0);
318        assert_eq!(sparse.select1(0), 0);
319        assert_eq!(sparse.select1(1), 0);
320        assert_eq!(sparse.select1(999), 0);
321        assert_eq!(sparse.rank0(0), 0);
322        assert_eq!(sparse.rank0(1), 0);
323        assert_eq!(sparse.rank0(999), 0);
324        assert!(sparse.is_empty());
325        assert_eq!(sparse.len(), 0);
326    }
327
328    #[test]
329    fn test_sparse_get() {
330        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
331        assert_eq!(sparse.get(0), Some(0));
332        assert_eq!(sparse.get(1), Some(1));
333        assert_eq!(sparse.get(2), Some(0));
334        assert_eq!(sparse.get(3), Some(1));
335        assert_eq!(sparse.get(4), Some(0));
336        assert_eq!(sparse.get(5), Some(1));
337        assert_eq!(sparse.get(6), Some(0));
338        assert_eq!(sparse.get(7), Some(1));
339        assert_eq!(sparse.get(8), Some(0));
340        assert_eq!(sparse.get(9), Some(1));
341        assert_eq!(sparse.get(10), Some(0));
342        assert_eq!(sparse.get(11), Some(0));
343        assert_eq!(sparse.get(12), None);
344        assert_eq!(sparse.get(999), None);
345    }
346
347    #[test]
348    fn test_from_bitvector() {
349        let mut bv = BitVec::from_ones(12);
350        bv.flip_bit(6);
351        bv.flip_bit(7);
352
353        let sparse = SparseRSVec::from_bitvec(&bv);
354        assert_eq!(sparse.rank1(0), 0);
355        assert_eq!(sparse.rank1(1), 1);
356        assert_eq!(sparse.rank1(2), 2);
357        assert_eq!(sparse.rank1(7), 6);
358        assert_eq!(sparse.rank1(8), 6);
359        assert_eq!(sparse.rank1(9), 7);
360        assert_eq!(sparse.rank1(12), 10);
361
362        let sparse = SparseRSVec::from_bitvec_inverted(&bv);
363        assert_eq!(sparse.rank1(0), 0);
364        assert_eq!(sparse.rank1(1), 0);
365        assert_eq!(sparse.rank1(2), 0);
366        assert_eq!(sparse.rank1(7), 1);
367        assert_eq!(sparse.rank1(8), 2);
368        assert_eq!(sparse.rank1(9), 2);
369        assert_eq!(sparse.rank1(12), 2);
370    }
371
372    #[test]
373    fn test_large_block() {
374        // test that the implementation works correctly if the search triggers a binary search
375        let sparse = SparseRSVec::new(
376            &[
377                1, 100_000, 100_001, 100_002, 100_003, 100_004, 100_005, 100_006, 100_007, 100_008,
378                100_009, 100_010, 1_000_000,
379            ],
380            2_000_000,
381        );
382        assert_eq!(sparse.rank1(100_008), 9);
383        assert_eq!(sparse.rank1(100_012), 12);
384    }
385
386    #[test]
387    fn test_fuzzy() {
388        const L: usize = 100_000;
389        let mut bv = BitVec::from_zeros(L);
390        let mut rng = StdRng::from_seed([0; 32]);
391
392        for _ in 0..L / 4 {
393            bv.flip_bit(rng.gen_range(0..L));
394        }
395
396        let sparse = SparseRSVec::from_bitvec(&bv);
397
398        let mut ones = 0;
399        for i in 0..L {
400            assert_eq!(bv.get(i), sparse.get(i as u64));
401            assert_eq!(ones, sparse.rank1(i as u64));
402            assert_eq!(i as u64 - ones, sparse.rank0(i as u64));
403            if bv.get(i) == Some(1) {
404                assert_eq!(i, sparse.select1(ones as usize).try_into().unwrap());
405                ones += 1;
406            }
407        }
408    }
409
410    #[test]
411    fn test_from_padded_bitvec() {
412        // test no garbage is added to the sparse vec when the bit vector contains trailing data
413        let mut bv = BitVec::new();
414        bv.append_bit(1);
415        bv.append_bit(0);
416        bv.append_bits(u64::MAX, 10);
417        bv.drop_last(10);
418        bv.append_bit(0);
419        bv.drop_last(1);
420
421        let sparse = SparseRSVec::from_bitvec(&bv);
422        assert_eq!(sparse.len(), 2);
423        assert_eq!(sparse.get(0), Some(1));
424        assert_eq!(sparse.get(1), Some(0));
425        assert_eq!(sparse.iter1().collect::<Vec<_>>(), vec![0]);
426    }
427}