vers_vecs/bit_vec/
sparse.rs

1//! A sparse bit vector with `rank1`, `rank0` and `select1` support.
2//! The vector requires `O(n log u/n) + 2n + o(n)` bits of space, where `n` is the number of bits in the vector
3//! and `u` is the number of 1-bits.
4//! The vector is constructed from a sorted list of indices of 1-bits, or from an existing
5//! [`BitVec`](crate::BitVec).
6
7use crate::{BitVec, EliasFanoVec};
8
9/// A succinct representation of a sparse vector with rank and select support.
10/// It is a thin wrapper around an [`EliasFanoVec`] that compresses the indices of 1-bits.
11///
12/// Therefore, no `select0` function is provided.
13/// However, the constructor [`from_bitvec_inverted`] can be used to cheaply invert the input `BitVec`,
14/// reversing the roles of 1-bits and 0-bits.
15///
16/// # Examples
17/// ```
18/// use vers_vecs::SparseRSVec;
19///
20/// let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
21/// assert_eq!(sparse.get(5), Some(1));
22/// assert_eq!(sparse.get(11), Some(0));
23/// assert_eq!(sparse.get(12), None);
24///
25/// assert_eq!(sparse.rank1(5), 2);
26/// assert_eq!(sparse.select1(2), 5);
27/// ```
28///
29/// It cn also be constructed from a `BitVec` directly:
30/// ```
31/// use vers_vecs::SparseRSVec;
32/// use vers_vecs::BitVec;
33///
34/// let mut bv = BitVec::from_zeros(12);
35/// bv.flip_bit(6);
36/// bv.flip_bit(7);
37///
38/// let sparse = SparseRSVec::from_bitvec(&bv);
39/// assert_eq!(sparse.rank1(5), 0);
40/// assert_eq!(sparse.select1(0), 6);
41/// ```
42///
43/// [`EliasFanoVec`]: struct.EliasFanoVec.html
44/// [`from_bitvec_inverted`]: #method.from_bitvec_inverted
45#[derive(Debug, Clone)]
46#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
47pub struct SparseRSVec {
48    vec: EliasFanoVec,
49    len: u64,
50}
51
52impl SparseRSVec {
53    /// Creates a new `SparseRSVec` from a sequence of set bits represented as indices.
54    /// The input must be sorted in ascending order and free of duplicates.
55    ///
56    /// The length of the vector must be passed as well, as it cannot be inferred from the input,
57    /// if the last bit in the vector is not set.
58    ///
59    /// # Parameters
60    /// - `input`: The positions of set bits, or unset bits if the sparse vector should compress
61    ///   zeros.
62    /// - `len`: The length of the vector, which is needed if the last bit is not in the input slice.
63    #[must_use]
64    pub fn new(input: &[u64], len: u64) -> Self {
65        debug_assert!(input.is_sorted(), "input must be sorted");
66        debug_assert!(
67            input.windows(2).all(|w| w[0] != w[1]),
68            "input must be free of duplicates"
69        );
70
71        Self {
72            vec: EliasFanoVec::from_slice(input),
73            len,
74        }
75    }
76
77    /// Creates a new `SparseRSVec` from a `BitVec`, by compressing the sparse 1-bits.
78    ///
79    /// # Parameters
80    /// - `input`: The input `BitVec` to compress.
81    #[must_use]
82    pub fn from_bitvec(input: &BitVec) -> Self {
83        let len = input.len() as u64;
84        Self::new(
85            input
86                .iter()
87                .enumerate()
88                .filter(|&(_, bit)| bit == 1)
89                .map(|(i, _)| i as u64)
90                .collect::<Vec<_>>()
91                .as_slice(),
92            len,
93        )
94    }
95
96    /// Creates a new `SparseRSVec` from a `BitVec`.
97    /// However, before compressing the 1-bits, the input is inverted.
98    /// This means that the sparse vector will compress the 0-bits instead of the 1-bits,
99    /// and the [`rank1`] and [`select1`] functions will return the number of 0-bits and the position of 0-bits.
100    ///
101    /// This is a convenience function to allow for easy creation of sparse vectors that compress
102    /// zeros, despite the lack of a `select0` function.
103    ///
104    /// However, do note that [`get`] will return the inverted value of the bit at position `i` from
105    /// the original `BitVec`.
106    ///
107    /// # Parameters
108    /// - `input`: The input `BitVec` to compress.
109    ///
110    /// # Example
111    /// ```
112    /// use vers_vecs::SparseRSVec;
113    /// use vers_vecs::BitVec;
114    ///
115    /// let mut bv = BitVec::from_ones(12);
116    /// // set 6 and 7 to 0
117    /// bv.flip_bit(6);
118    /// bv.flip_bit(7);
119    ///
120    /// let sparse = SparseRSVec::from_bitvec_inverted(&bv);
121    /// // now select1 gives the position of 0-bits
122    /// assert_eq!(sparse.select1(1), 7);
123    /// ```
124    ///
125    /// [`rank1`]: #method.rank1
126    /// [`select1`]: #method.select1
127    /// [`get`]: #method.get
128    #[must_use]
129    pub fn from_bitvec_inverted(input: &BitVec) -> Self {
130        let len = input.len() as u64;
131        Self::new(
132            input
133                .iter()
134                .enumerate()
135                .filter(|&(_, bit)| bit == 0)
136                .map(|(i, _)| i as u64)
137                .collect::<Vec<_>>()
138                .as_slice(),
139            len,
140        )
141    }
142
143    /// Returns true if the bit at position `i` is set.
144    ///
145    /// If `i` is out of bounds the function produces incorrect results.
146    /// Use [`is_set`] for a checked version.
147    ///
148    /// [`is_set`]: #method.is_set
149    #[must_use]
150    pub fn is_set_unchecked(&self, i: u64) -> bool {
151        self.vec.predecessor_unchecked(i) == i
152    }
153
154    /// Returns true if the bit at position `i` is set.
155    ///
156    /// Returns `None` if `i` is out of bounds.
157    #[must_use]
158    pub fn is_set(&self, i: u64) -> Option<bool> {
159        if i >= self.len {
160            None
161        } else {
162            // if the predecessor is None, the bit is left of the first 1-bit
163            Some(self.vec.predecessor(i).is_some_and(|p| p == i))
164        }
165    }
166
167    /// Gets the bit at position `i`.
168    /// Returns 1 if the bit is set, 0 if it is not set.
169    ///
170    /// # Panics
171    /// If `i` is out of bounds the function might panic or produce incorrect results.
172    /// Use [`get`] for a checked version.
173    #[must_use]
174    pub fn get_unchecked(&self, i: u64) -> u64 {
175        self.is_set_unchecked(i).into()
176    }
177
178    /// Gets the bit at position `i`.
179    /// Returns `Some(1)` if the bit is set, `Some(0)` if it is not set, and `None` if `i` is out of bounds.
180    #[must_use]
181    pub fn get(&self, i: u64) -> Option<u64> {
182        self.is_set(i).map(std::convert::Into::into)
183    }
184
185    /// Return the position of the 1-bit with the given rank.
186    /// The following holds for all `pos` with 1-bits:
187    /// ``select1(rank1(pos)) == pos``
188    ///
189    /// If the rank is larger than the number of sparse bits in the vector, the vector length is returned.
190    #[must_use]
191    pub fn select1(&self, i: usize) -> u64 {
192        self.vec.get(i).unwrap_or(self.len)
193    }
194
195    /// Returns the number of 1-bits in the vector up to position `i`.
196    ///
197    /// If `i` is out of bounds, the number of 1-bits in the vector is returned.
198    #[must_use]
199    pub fn rank1(&self, i: u64) -> u64 {
200        self.vec.rank(i)
201    }
202
203    /// Returns the number of 0-bits in the vector up to position `i`.
204    ///
205    /// If `i` is out of bounds, the number of 0-bits in the vector is returned.
206    #[must_use]
207    pub fn rank0(&self, i: u64) -> u64 {
208        if i >= self.len {
209            self.len - self.vec.rank(self.len)
210        } else {
211            i - self.vec.rank(i)
212        }
213    }
214
215    /// Returns an iterator over the 1-bits in the vector.
216    /// The iterator yields the positions of the 1-bits in ascending order.
217    pub fn iter1(&self) -> impl Iterator<Item = u64> + '_ {
218        self.vec.iter()
219    }
220
221    /// Returns the length of the bit vector if it was uncompressed.
222    #[must_use]
223    pub fn len(&self) -> u64 {
224        self.len
225    }
226
227    /// Returns true if the vector is empty.
228    #[must_use]
229    pub fn is_empty(&self) -> bool {
230        self.len == 0
231    }
232
233    /// Returns the number of bytes used by the vector on the heap.
234    ///  Does not include allocated memory that isn't used.
235    #[must_use]
236    pub fn heap_size(&self) -> usize {
237        self.vec.heap_size()
238    }
239}
240
241impl From<BitVec> for SparseRSVec {
242    fn from(input: BitVec) -> Self {
243        Self::from_bitvec_inverted(&input)
244    }
245}
246
247impl<'a> From<&'a BitVec> for SparseRSVec {
248    fn from(input: &'a BitVec) -> Self {
249        Self::from_bitvec_inverted(input)
250    }
251}
252
253#[cfg(test)]
254mod tests {
255    use super::SparseRSVec;
256    use crate::BitVec;
257    use rand::prelude::StdRng;
258    use rand::{Rng, SeedableRng};
259
260    #[test]
261    fn test_sparse_rank() {
262        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
263        assert_eq!(sparse.rank1(0), 0);
264        assert_eq!(sparse.rank1(1), 0);
265        assert_eq!(sparse.rank1(2), 1);
266        assert_eq!(sparse.rank1(3), 1);
267        assert_eq!(sparse.rank1(4), 2);
268        assert_eq!(sparse.rank1(5), 2);
269        assert_eq!(sparse.rank1(6), 3);
270        assert_eq!(sparse.rank1(7), 3);
271        assert_eq!(sparse.rank1(8), 4);
272        assert_eq!(sparse.rank1(9), 4);
273        assert_eq!(sparse.rank1(10), 5);
274        assert_eq!(sparse.rank1(11), 5);
275        assert_eq!(sparse.rank1(12), 5);
276        assert_eq!(sparse.rank1(999), 5);
277    }
278
279    #[test]
280    fn test_sparse_select() {
281        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
282        assert_eq!(sparse.select1(0), 1);
283        assert_eq!(sparse.select1(1), 3);
284        assert_eq!(sparse.select1(2), 5);
285        assert_eq!(sparse.select1(3), 7);
286        assert_eq!(sparse.select1(4), 9);
287        assert_eq!(sparse.select1(5), 12);
288        assert_eq!(sparse.select1(6), 12);
289    }
290
291    #[test]
292    fn test_sparse_rank0() {
293        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
294        assert_eq!(sparse.rank0(0), 0);
295        assert_eq!(sparse.rank0(1), 1);
296        assert_eq!(sparse.rank0(2), 1);
297        assert_eq!(sparse.rank0(3), 2);
298        assert_eq!(sparse.rank0(4), 2);
299        assert_eq!(sparse.rank0(5), 3);
300        assert_eq!(sparse.rank0(6), 3);
301        assert_eq!(sparse.rank0(7), 4);
302        assert_eq!(sparse.rank0(8), 4);
303        assert_eq!(sparse.rank0(9), 5);
304        assert_eq!(sparse.rank0(10), 5);
305        assert_eq!(sparse.rank0(11), 6);
306        assert_eq!(sparse.rank0(12), 7);
307        assert_eq!(sparse.rank0(999), 7);
308    }
309
310    #[test]
311    fn test_empty_sparse() {
312        let sparse = SparseRSVec::new(&[], 0);
313        assert_eq!(sparse.rank1(0), 0);
314        assert_eq!(sparse.rank1(1), 0);
315        assert_eq!(sparse.rank1(999), 0);
316        assert_eq!(sparse.select1(0), 0);
317        assert_eq!(sparse.select1(1), 0);
318        assert_eq!(sparse.select1(999), 0);
319        assert_eq!(sparse.rank0(0), 0);
320        assert_eq!(sparse.rank0(1), 0);
321        assert_eq!(sparse.rank0(999), 0);
322        assert!(sparse.is_empty());
323        assert_eq!(sparse.len(), 0);
324    }
325
326    #[test]
327    fn test_sparse_get() {
328        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
329        assert_eq!(sparse.get(0), Some(0));
330        assert_eq!(sparse.get(1), Some(1));
331        assert_eq!(sparse.get(2), Some(0));
332        assert_eq!(sparse.get(3), Some(1));
333        assert_eq!(sparse.get(4), Some(0));
334        assert_eq!(sparse.get(5), Some(1));
335        assert_eq!(sparse.get(6), Some(0));
336        assert_eq!(sparse.get(7), Some(1));
337        assert_eq!(sparse.get(8), Some(0));
338        assert_eq!(sparse.get(9), Some(1));
339        assert_eq!(sparse.get(10), Some(0));
340        assert_eq!(sparse.get(11), Some(0));
341        assert_eq!(sparse.get(12), None);
342        assert_eq!(sparse.get(999), None);
343    }
344
345    #[test]
346    fn test_from_bitvector() {
347        let mut bv = BitVec::from_ones(12);
348        bv.flip_bit(6);
349        bv.flip_bit(7);
350
351        let sparse = SparseRSVec::from_bitvec(&bv);
352        assert_eq!(sparse.rank1(0), 0);
353        assert_eq!(sparse.rank1(1), 1);
354        assert_eq!(sparse.rank1(2), 2);
355        assert_eq!(sparse.rank1(7), 6);
356        assert_eq!(sparse.rank1(8), 6);
357        assert_eq!(sparse.rank1(9), 7);
358        assert_eq!(sparse.rank1(12), 10);
359
360        let sparse = SparseRSVec::from_bitvec_inverted(&bv);
361        assert_eq!(sparse.rank1(0), 0);
362        assert_eq!(sparse.rank1(1), 0);
363        assert_eq!(sparse.rank1(2), 0);
364        assert_eq!(sparse.rank1(7), 1);
365        assert_eq!(sparse.rank1(8), 2);
366        assert_eq!(sparse.rank1(9), 2);
367        assert_eq!(sparse.rank1(12), 2);
368    }
369
370    #[test]
371    fn test_large_block() {
372        // test that the implementation works correctly if the search triggers a binary search
373        let sparse = SparseRSVec::new(
374            &[
375                1, 100_000, 100_001, 100_002, 100_003, 100_004, 100_005, 100_006, 100_007, 100_008,
376                100_009, 100_010, 1_000_000,
377            ],
378            2_000_000,
379        );
380        assert_eq!(sparse.rank1(100_008), 9);
381        assert_eq!(sparse.rank1(100_012), 12);
382    }
383
384    #[test]
385    fn test_fuzzy() {
386        const L: usize = 100_000;
387        let mut bv = BitVec::from_zeros(L);
388        let mut rng = StdRng::from_seed([0; 32]);
389
390        for _ in 0..L / 4 {
391            bv.flip_bit(rng.gen_range(0..L));
392        }
393
394        let sparse = SparseRSVec::from_bitvec(&bv);
395
396        let mut ones = 0;
397        for i in 0..L {
398            assert_eq!(bv.get(i), sparse.get(i as u64));
399            assert_eq!(ones, sparse.rank1(i as u64));
400            assert_eq!(i as u64 - ones, sparse.rank0(i as u64));
401            if bv.get(i) == Some(1) {
402                assert_eq!(i, sparse.select1(ones as usize).try_into().unwrap());
403                ones += 1;
404            }
405        }
406    }
407
408    #[test]
409    fn test_from_padded_bitvec() {
410        // test no garbage is added to the sparse vec when the bit vector contains trailing data
411        let mut bv = BitVec::new();
412        bv.append_bit(1);
413        bv.append_bit(0);
414        bv.append_bits(u64::MAX, 10);
415        bv.drop_last(10);
416        bv.append_bit(0);
417        bv.drop_last(1);
418
419        let sparse = SparseRSVec::from_bitvec(&bv);
420        assert_eq!(sparse.len(), 2);
421        assert_eq!(sparse.get(0), Some(1));
422        assert_eq!(sparse.get(1), Some(0));
423        assert_eq!(sparse.iter1().collect::<Vec<_>>(), vec![0]);
424    }
425}