Skip to main content

vers_vecs/bit_vec/
sparse.rs

1//! A sparse bit vector with `rank1`, `rank0` and `select1` support.
2//! The vector requires `O(n log u/n) + 2n + o(n)` bits of space, where `n` is the number of bits in the vector
3//! and `u` is the number of 1-bits.
4//! The vector is constructed from a sorted list of indices of 1-bits, or from an existing
5//! [`BitVec`].
6
7use crate::{BitVec, EliasFanoVec};
8
9/// A succinct representation of a sparse vector with rank and select support.
10/// It is a thin wrapper around an [`EliasFanoVec`] that compresses the indices of 1-bits.
11///
12/// Therefore, no `select0` function is provided.
13/// However, the constructor [`from_bitvec_inverted`] can be used to cheaply invert the input `BitVec`,
14/// reversing the roles of 1-bits and 0-bits.
15///
16/// # Examples
17/// ```
18/// use vers_vecs::SparseRSVec;
19///
20/// let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
21/// assert_eq!(sparse.get(5), Some(1));
22/// assert_eq!(sparse.get(11), Some(0));
23/// assert_eq!(sparse.get(12), None);
24///
25/// assert_eq!(sparse.rank1(5), 2);
26/// assert_eq!(sparse.select1(2), 5);
27/// ```
28///
29/// It cn also be constructed from a `BitVec` directly:
30/// ```
31/// use vers_vecs::SparseRSVec;
32/// use vers_vecs::BitVec;
33///
34/// let mut bv = BitVec::from_zeros(12);
35/// bv.flip_bit(6);
36/// bv.flip_bit(7);
37///
38/// let sparse = SparseRSVec::from_bitvec(&bv);
39/// assert_eq!(sparse.rank1(5), 0);
40/// assert_eq!(sparse.select1(0), 6);
41/// ```
42///
43/// [`EliasFanoVec`]: struct.EliasFanoVec.html
44/// [`from_bitvec_inverted`]: #method.from_bitvec_inverted
45#[derive(Debug, Clone)]
46#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
47#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemSize, mem_dbg::MemDbg))]
48pub struct SparseRSVec {
49    vec: EliasFanoVec,
50    len: u64,
51}
52
53impl SparseRSVec {
54    /// Creates a new `SparseRSVec` from a sequence of set bits represented as indices.
55    /// The input must be sorted in ascending order and free of duplicates.
56    ///
57    /// The length of the vector must be passed as well, as it cannot be inferred from the input,
58    /// if the last bit in the vector is not set.
59    ///
60    /// # Parameters
61    /// - `input`: The positions of set bits, or unset bits if the sparse vector should compress
62    ///   zeros.
63    /// - `len`: The length of the vector, which is needed if the last bit is not in the input slice.
64    #[must_use]
65    pub fn new(input: &[u64], len: u64) -> Self {
66        debug_assert!(input.is_sorted(), "input must be sorted");
67        debug_assert!(
68            input.windows(2).all(|w| w[0] != w[1]),
69            "input must be free of duplicates"
70        );
71
72        Self {
73            vec: EliasFanoVec::from_slice(input),
74            len,
75        }
76    }
77
78    /// Creates a new `SparseRSVec` from a `BitVec`, by compressing the sparse 1-bits.
79    ///
80    /// # Parameters
81    /// - `input`: The input `BitVec` to compress.
82    #[must_use]
83    pub fn from_bitvec(input: &BitVec) -> Self {
84        let len = input.len() as u64;
85        Self::new(
86            input
87                .iter()
88                .enumerate()
89                .filter(|&(_, bit)| bit == 1)
90                .map(|(i, _)| i as u64)
91                .collect::<Vec<_>>()
92                .as_slice(),
93            len,
94        )
95    }
96
97    /// Creates a new `SparseRSVec` from a `BitVec`.
98    /// However, before compressing the 1-bits, the input is inverted.
99    /// This means that the sparse vector will compress the 0-bits instead of the 1-bits,
100    /// and the [`rank1`] and [`select1`] functions will return the number of 0-bits and the position of 0-bits.
101    ///
102    /// This is a convenience function to allow for easy creation of sparse vectors that compress
103    /// zeros, despite the lack of a `select0` function.
104    ///
105    /// However, do note that [`get`] will return the inverted value of the bit at position `i` from
106    /// the original `BitVec`.
107    ///
108    /// # Parameters
109    /// - `input`: The input `BitVec` to compress.
110    ///
111    /// # Example
112    /// ```
113    /// use vers_vecs::SparseRSVec;
114    /// use vers_vecs::BitVec;
115    ///
116    /// let mut bv = BitVec::from_ones(12);
117    /// // set 6 and 7 to 0
118    /// bv.flip_bit(6);
119    /// bv.flip_bit(7);
120    ///
121    /// let sparse = SparseRSVec::from_bitvec_inverted(&bv);
122    /// // now select1 gives the position of 0-bits
123    /// assert_eq!(sparse.select1(1), 7);
124    /// ```
125    ///
126    /// [`rank1`]: #method.rank1
127    /// [`select1`]: #method.select1
128    /// [`get`]: #method.get
129    #[must_use]
130    pub fn from_bitvec_inverted(input: &BitVec) -> Self {
131        let len = input.len() as u64;
132        Self::new(
133            input
134                .iter()
135                .enumerate()
136                .filter(|&(_, bit)| bit == 0)
137                .map(|(i, _)| i as u64)
138                .collect::<Vec<_>>()
139                .as_slice(),
140            len,
141        )
142    }
143
144    /// Returns true if the bit at position `i` is set.
145    ///
146    /// If `i` is out of bounds the function produces incorrect results.
147    /// Use [`is_set`] for a checked version.
148    ///
149    /// [`is_set`]: #method.is_set
150    #[must_use]
151    pub fn is_set_unchecked(&self, i: u64) -> bool {
152        self.vec.predecessor_unchecked(i) == i
153    }
154
155    /// Returns true if the bit at position `i` is set.
156    ///
157    /// Returns `None` if `i` is out of bounds.
158    #[must_use]
159    pub fn is_set(&self, i: u64) -> Option<bool> {
160        if i >= self.len {
161            None
162        } else {
163            // if the predecessor is None, the bit is left of the first 1-bit
164            Some(self.vec.predecessor(i).is_some_and(|p| p == i))
165        }
166    }
167
168    /// Gets the bit at position `i`.
169    /// Returns 1 if the bit is set, 0 if it is not set.
170    ///
171    /// # Panics
172    /// If `i` is out of bounds the function might panic or produce incorrect results.
173    /// Use [`get`] for a checked version.
174    ///
175    /// [`get`]: Self::get
176    #[must_use]
177    pub fn get_unchecked(&self, i: u64) -> u64 {
178        self.is_set_unchecked(i).into()
179    }
180
181    /// Gets the bit at position `i`.
182    /// Returns `Some(1)` if the bit is set, `Some(0)` if it is not set, and `None` if `i` is out of bounds.
183    #[must_use]
184    pub fn get(&self, i: u64) -> Option<u64> {
185        self.is_set(i).map(std::convert::Into::into)
186    }
187
188    /// Return the position of the 1-bit with the given rank.
189    /// The following holds for all `pos` with 1-bits:
190    /// ``select1(rank1(pos)) == pos``
191    ///
192    /// If the rank is larger than the number of sparse bits in the vector, the vector length is returned.
193    #[must_use]
194    pub fn select1(&self, i: usize) -> u64 {
195        self.vec.get(i).unwrap_or(self.len)
196    }
197
198    /// Returns the number of 1-bits in the vector up to position `i`.
199    ///
200    /// If `i` is out of bounds, the number of 1-bits in the vector is returned.
201    #[must_use]
202    pub fn rank1(&self, i: u64) -> u64 {
203        self.vec.rank(i)
204    }
205
206    /// Returns the number of 0-bits in the vector up to position `i`.
207    ///
208    /// If `i` is out of bounds, the number of 0-bits in the vector is returned.
209    #[must_use]
210    pub fn rank0(&self, i: u64) -> u64 {
211        if i >= self.len {
212            self.len - self.vec.rank(self.len)
213        } else {
214            i - self.vec.rank(i)
215        }
216    }
217
218    /// Returns an iterator over the 1-bits in the vector.
219    /// The iterator yields the positions of the 1-bits in ascending order.
220    pub fn iter1(&self) -> impl Iterator<Item = u64> + '_ {
221        self.vec.iter()
222    }
223
224    /// Returns the length of the bit vector if it was uncompressed.
225    #[must_use]
226    pub fn len(&self) -> u64 {
227        self.len
228    }
229
230    /// Returns true if the vector is empty.
231    #[must_use]
232    pub fn is_empty(&self) -> bool {
233        self.len == 0
234    }
235
236    /// Returns the number of bytes used by the vector on the heap.
237    ///  Does not include allocated memory that isn't used.
238    #[must_use]
239    pub fn heap_size(&self) -> usize {
240        self.vec.heap_size()
241    }
242}
243
244impl From<BitVec> for SparseRSVec {
245    fn from(input: BitVec) -> Self {
246        Self::from_bitvec_inverted(&input)
247    }
248}
249
250impl<'a> From<&'a BitVec> for SparseRSVec {
251    fn from(input: &'a BitVec) -> Self {
252        Self::from_bitvec_inverted(input)
253    }
254}
255
256#[cfg(test)]
257mod tests {
258    use super::SparseRSVec;
259    use crate::BitVec;
260    use rand::prelude::StdRng;
261    use rand::{Rng, SeedableRng};
262
263    #[test]
264    fn test_sparse_rank() {
265        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
266        assert_eq!(sparse.rank1(0), 0);
267        assert_eq!(sparse.rank1(1), 0);
268        assert_eq!(sparse.rank1(2), 1);
269        assert_eq!(sparse.rank1(3), 1);
270        assert_eq!(sparse.rank1(4), 2);
271        assert_eq!(sparse.rank1(5), 2);
272        assert_eq!(sparse.rank1(6), 3);
273        assert_eq!(sparse.rank1(7), 3);
274        assert_eq!(sparse.rank1(8), 4);
275        assert_eq!(sparse.rank1(9), 4);
276        assert_eq!(sparse.rank1(10), 5);
277        assert_eq!(sparse.rank1(11), 5);
278        assert_eq!(sparse.rank1(12), 5);
279        assert_eq!(sparse.rank1(999), 5);
280    }
281
282    #[test]
283    fn test_sparse_select() {
284        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
285        assert_eq!(sparse.select1(0), 1);
286        assert_eq!(sparse.select1(1), 3);
287        assert_eq!(sparse.select1(2), 5);
288        assert_eq!(sparse.select1(3), 7);
289        assert_eq!(sparse.select1(4), 9);
290        assert_eq!(sparse.select1(5), 12);
291        assert_eq!(sparse.select1(6), 12);
292    }
293
294    #[test]
295    fn test_sparse_rank0() {
296        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
297        assert_eq!(sparse.rank0(0), 0);
298        assert_eq!(sparse.rank0(1), 1);
299        assert_eq!(sparse.rank0(2), 1);
300        assert_eq!(sparse.rank0(3), 2);
301        assert_eq!(sparse.rank0(4), 2);
302        assert_eq!(sparse.rank0(5), 3);
303        assert_eq!(sparse.rank0(6), 3);
304        assert_eq!(sparse.rank0(7), 4);
305        assert_eq!(sparse.rank0(8), 4);
306        assert_eq!(sparse.rank0(9), 5);
307        assert_eq!(sparse.rank0(10), 5);
308        assert_eq!(sparse.rank0(11), 6);
309        assert_eq!(sparse.rank0(12), 7);
310        assert_eq!(sparse.rank0(999), 7);
311    }
312
313    #[test]
314    fn test_empty_sparse() {
315        let sparse = SparseRSVec::new(&[], 0);
316        assert_eq!(sparse.rank1(0), 0);
317        assert_eq!(sparse.rank1(1), 0);
318        assert_eq!(sparse.rank1(999), 0);
319        assert_eq!(sparse.select1(0), 0);
320        assert_eq!(sparse.select1(1), 0);
321        assert_eq!(sparse.select1(999), 0);
322        assert_eq!(sparse.rank0(0), 0);
323        assert_eq!(sparse.rank0(1), 0);
324        assert_eq!(sparse.rank0(999), 0);
325        assert!(sparse.is_empty());
326        assert_eq!(sparse.len(), 0);
327    }
328
329    #[test]
330    fn test_sparse_get() {
331        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
332        assert_eq!(sparse.get(0), Some(0));
333        assert_eq!(sparse.get(1), Some(1));
334        assert_eq!(sparse.get(2), Some(0));
335        assert_eq!(sparse.get(3), Some(1));
336        assert_eq!(sparse.get(4), Some(0));
337        assert_eq!(sparse.get(5), Some(1));
338        assert_eq!(sparse.get(6), Some(0));
339        assert_eq!(sparse.get(7), Some(1));
340        assert_eq!(sparse.get(8), Some(0));
341        assert_eq!(sparse.get(9), Some(1));
342        assert_eq!(sparse.get(10), Some(0));
343        assert_eq!(sparse.get(11), Some(0));
344        assert_eq!(sparse.get(12), None);
345        assert_eq!(sparse.get(999), None);
346    }
347
348    #[test]
349    fn test_from_bitvector() {
350        let mut bv = BitVec::from_ones(12);
351        bv.flip_bit(6);
352        bv.flip_bit(7);
353
354        let sparse = SparseRSVec::from_bitvec(&bv);
355        assert_eq!(sparse.rank1(0), 0);
356        assert_eq!(sparse.rank1(1), 1);
357        assert_eq!(sparse.rank1(2), 2);
358        assert_eq!(sparse.rank1(7), 6);
359        assert_eq!(sparse.rank1(8), 6);
360        assert_eq!(sparse.rank1(9), 7);
361        assert_eq!(sparse.rank1(12), 10);
362
363        let sparse = SparseRSVec::from_bitvec_inverted(&bv);
364        assert_eq!(sparse.rank1(0), 0);
365        assert_eq!(sparse.rank1(1), 0);
366        assert_eq!(sparse.rank1(2), 0);
367        assert_eq!(sparse.rank1(7), 1);
368        assert_eq!(sparse.rank1(8), 2);
369        assert_eq!(sparse.rank1(9), 2);
370        assert_eq!(sparse.rank1(12), 2);
371    }
372
373    #[test]
374    fn test_large_block() {
375        // test that the implementation works correctly if the search triggers a binary search
376        let sparse = SparseRSVec::new(
377            &[
378                1, 100_000, 100_001, 100_002, 100_003, 100_004, 100_005, 100_006, 100_007, 100_008,
379                100_009, 100_010, 1_000_000,
380            ],
381            2_000_000,
382        );
383        assert_eq!(sparse.rank1(100_008), 9);
384        assert_eq!(sparse.rank1(100_012), 12);
385    }
386
387    #[test]
388    fn test_fuzzy() {
389        const L: usize = 100_000;
390        let mut bv = BitVec::from_zeros(L);
391        let mut rng = StdRng::from_seed([0; 32]);
392
393        for _ in 0..L / 4 {
394            bv.flip_bit(rng.gen_range(0..L));
395        }
396
397        let sparse = SparseRSVec::from_bitvec(&bv);
398
399        let mut ones = 0;
400        for i in 0..L {
401            assert_eq!(bv.get(i), sparse.get(i as u64));
402            assert_eq!(ones, sparse.rank1(i as u64));
403            assert_eq!(i as u64 - ones, sparse.rank0(i as u64));
404            if bv.get(i) == Some(1) {
405                assert_eq!(i, sparse.select1(ones as usize).try_into().unwrap());
406                ones += 1;
407            }
408        }
409    }
410
411    #[test]
412    fn test_from_padded_bitvec() {
413        // test no garbage is added to the sparse vec when the bit vector contains trailing data
414        let mut bv = BitVec::new();
415        bv.append_bit(1);
416        bv.append_bit(0);
417        bv.append_bits(u64::MAX, 10);
418        bv.drop_last(10);
419        bv.append_bit(0);
420        bv.drop_last(1);
421
422        let sparse = SparseRSVec::from_bitvec(&bv);
423        assert_eq!(sparse.len(), 2);
424        assert_eq!(sparse.get(0), Some(1));
425        assert_eq!(sparse.get(1), Some(0));
426        assert_eq!(sparse.iter1().collect::<Vec<_>>(), vec![0]);
427    }
428}