fmlrc/
indexed_bit_vec.rs

1use math::round;
2
3/*
4TODO: when rust allows loops in const functions, this will be preferred IMO
5const fn build_low_shift() -> [u64; 65] {
6    let mut ret: [u64; 65] = [0; 65];
7    for set_count in 1..64 {
8        ret[set_count] = 0xFFFF_FFFF_FFFF_FFFF >> (64 - set_count);
9    }
10    ret
11}
12const LOW_SET_FLAGS: [u64; 65] = build_low_shift();
13*/
14
15//interim hard set the low-set bits array, replace with above once fixed
16const LOW_SET_FLAGS: [u64; 65] = [
17    0x0000_0000_0000_0000,
18    0x0000_0000_0000_0001, 0x0000_0000_0000_0003, 0x0000_0000_0000_0007, 0x0000_0000_0000_000F,
19    0x0000_0000_0000_001F, 0x0000_0000_0000_003F, 0x0000_0000_0000_007F, 0x0000_0000_0000_00FF,
20    0x0000_0000_0000_01FF, 0x0000_0000_0000_03FF, 0x0000_0000_0000_07FF, 0x0000_0000_0000_0FFF,
21    0x0000_0000_0000_1FFF, 0x0000_0000_0000_3FFF, 0x0000_0000_0000_7FFF, 0x0000_0000_0000_FFFF,
22    0x0000_0000_0001_FFFF, 0x0000_0000_0003_FFFF, 0x0000_0000_0007_FFFF, 0x0000_0000_000F_FFFF,
23    0x0000_0000_001F_FFFF, 0x0000_0000_003F_FFFF, 0x0000_0000_007F_FFFF, 0x0000_0000_00FF_FFFF,
24    0x0000_0000_01FF_FFFF, 0x0000_0000_03FF_FFFF, 0x0000_0000_07FF_FFFF, 0x0000_0000_0FFF_FFFF,
25    0x0000_0000_1FFF_FFFF, 0x0000_0000_3FFF_FFFF, 0x0000_0000_7FFF_FFFF, 0x0000_0000_FFFF_FFFF,
26    0x0000_0001_FFFF_FFFF, 0x0000_0003_FFFF_FFFF, 0x0000_0007_FFFF_FFFF, 0x0000_000F_FFFF_FFFF,
27    0x0000_001F_FFFF_FFFF, 0x0000_003F_FFFF_FFFF, 0x0000_007F_FFFF_FFFF, 0x0000_00FF_FFFF_FFFF,
28    0x0000_01FF_FFFF_FFFF, 0x0000_03FF_FFFF_FFFF, 0x0000_07FF_FFFF_FFFF, 0x0000_0FFF_FFFF_FFFF,
29    0x0000_1FFF_FFFF_FFFF, 0x0000_3FFF_FFFF_FFFF, 0x0000_7FFF_FFFF_FFFF, 0x0000_FFFF_FFFF_FFFF,
30    0x0001_FFFF_FFFF_FFFF, 0x0003_FFFF_FFFF_FFFF, 0x0007_FFFF_FFFF_FFFF, 0x000F_FFFF_FFFF_FFFF,
31    0x001F_FFFF_FFFF_FFFF, 0x003F_FFFF_FFFF_FFFF, 0x007F_FFFF_FFFF_FFFF, 0x00FF_FFFF_FFFF_FFFF,
32    0x01FF_FFFF_FFFF_FFFF, 0x03FF_FFFF_FFFF_FFFF, 0x07FF_FFFF_FFFF_FFFF, 0x0FFF_FFFF_FFFF_FFFF,
33    0x1FFF_FFFF_FFFF_FFFF, 0x3FFF_FFFF_FFFF_FFFF, 0x7FFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF_FFFF_FFFF,
34];
35
36/// the number of bits to shift out when finding the index
37const INDEX_SHIFT: usize = 6;
38/// the flag for selecting
39const INDEX_LOWER_MASK: u64 = LOW_SET_FLAGS[INDEX_SHIFT];
40
41/// A block containing both the bits in the block and the rank before the block
42#[derive(Clone,Default)]
43pub struct BitVecBlock {
44    pub bits: u64,
45    pub index: u64
46}
47
48/// Represents a bit vector with an very fast index on top of it.
49/// For a vector of length N, the index takes up O(N) bits.
50pub struct IndexedBitVec {
51    index_size: usize,
52    data_vec: Vec<BitVecBlock>
53}
54
55impl IndexedBitVec {
56    /// Returns an IndexedBitVec with pre-allocated bit-vector and index space.  Note that both the bit-array and index are empty.
57    /// # Arguments
58    /// * `size` - the number of bits in the bit-vector
59    /// # Examples
60    /// ```rust
61    /// use fmlrc::indexed_bit_vec::IndexedBitVec;
62    /// let mut ibv = IndexedBitVec::with_capacity(128);
63    /// ```
64    #[inline]
65	pub fn with_capacity(size: usize) -> Self {
66        let index_size: usize = (round::ceil((size as f64) / 64.0, 0) as usize)+1;
67        Self {
68            index_size,
69            data_vec: vec![Default::default(); index_size]
70        }
71    }
72
73    /// Sets a bit in the array. This MUST be used prior to indexing.
74    /// # Arguments
75    /// * `pos` - the index of the bit to set
76    /// # Examples
77    /// ```rust
78    /// # use fmlrc::indexed_bit_vec::IndexedBitVec;
79    /// # let mut ibv = IndexedBitVec::with_capacity(128);
80    /// ibv.set_bit(64);
81    /// ```
82    #[inline]
83    pub fn set_bit(&mut self, pos: usize) {
84        self.data_vec[pos >> INDEX_SHIFT].bits |= 0x1 << (pos & INDEX_LOWER_MASK as usize);
85    }
86
87    /// Currently, this function is strictly for testing
88    #[inline]
89    pub fn get_bit(&mut self, pos: usize) -> bool {
90        ((self.data_vec[pos >> INDEX_SHIFT].bits >> (pos & INDEX_LOWER_MASK as usize)) & 0x1) != 0
91    }
92
93    /// Builds an index for the array to perform rank queries.
94    /// # Arguments
95    /// * `initial_rank` - the base rank at bit 0, set to 0 for traditional ranking
96    /// # Examples
97    /// ```rust
98    /// # use fmlrc::indexed_bit_vec::IndexedBitVec;
99    /// # let mut ibv = IndexedBitVec::with_capacity(128);
100    /// # ibv.set_bit(64);
101    /// let initial_rank=0;
102    /// ibv.build_index(initial_rank);
103    /// ```
104    pub fn build_index(&mut self, initial_rank: u64) {
105        let mut current_rank = initial_rank;
106        //iterate through each block, set the rank and then add the bit count
107        for bv_val in self.data_vec.iter_mut().take(self.index_size-1) {
108            bv_val.index = current_rank;
109            current_rank += bv_val.bits.count_ones() as u64;
110        }
111        //set the final one to the total count
112        self.data_vec[self.index_size-1].index = current_rank;
113    }
114
115    /// Performs a rank-1 query, returned the number of set bits up to but NOT including `pos`
116    /// # Arguments
117    /// * `pos` - the position to use for ranking
118    /// # Examples
119    /// ```rust
120    /// # use fmlrc::indexed_bit_vec::IndexedBitVec;
121    /// # let mut ibv = IndexedBitVec::with_capacity(128);
122    /// # ibv.set_bit(64);
123    /// # let initial_rank=0;
124    /// # ibv.build_index(initial_rank);
125    /// assert_eq!(ibv.rank(64), initial_rank);
126    /// assert_eq!(ibv.rank(65), initial_rank+1);
127    /// ```
128    #[inline]
129    pub fn rank(&self, pos: usize) -> u64 {
130        let offset: usize = pos >> INDEX_SHIFT;
131        //rank until block + rank up until particular bit
132        self.data_vec[offset].index + 
133            (self.data_vec[offset].bits & LOW_SET_FLAGS[pos & INDEX_LOWER_MASK as usize]).count_ones() as u64
134    }
135
136    /// Performs a rank-1 query without bounds checking, returned the number of set bits up to but NOT including `pos`
137    /// # Arguments
138    /// * `pos` - the position to use for ranking
139    /// # Safety
140    /// If `pos` is outside the length of the bit vector, this will have undefined behavior.
141    /// # Examples
142    /// ```rust
143    /// # use fmlrc::indexed_bit_vec::IndexedBitVec;
144    /// # let mut ibv = IndexedBitVec::with_capacity(128);
145    /// # ibv.set_bit(64);
146    /// # let initial_rank=0;
147    /// # ibv.build_index(initial_rank);
148    /// unsafe {
149    ///   assert_eq!(ibv.rank_unchecked(64), initial_rank);
150    ///   assert_eq!(ibv.rank_unchecked(65), initial_rank+1);
151    /// }
152    /// ```
153    #[inline]
154    pub unsafe fn rank_unchecked(&self, pos:usize) -> u64 {
155        let vec_offset: usize = pos >> INDEX_SHIFT;
156        //rank until block + rank up until particular bit
157        self.data_vec.get_unchecked(vec_offset).index + 
158            (self.data_vec.get_unchecked(vec_offset).bits & LOW_SET_FLAGS.get_unchecked(pos & INDEX_LOWER_MASK as usize)).count_ones() as u64
159    }
160}
161
162#[cfg(test)]
163mod tests {
164    use super::*;
165    
166    #[test]
167    fn test_allocation() {
168        //just test index allocation at the corners
169        let ibv = IndexedBitVec::with_capacity(63);
170        assert_eq!(ibv.data_vec.len(), 2);
171        let ibv = IndexedBitVec::with_capacity(64);
172        assert_eq!(ibv.data_vec.len(), 2);
173        let ibv = IndexedBitVec::with_capacity(65);
174        assert_eq!(ibv.data_vec.len(), 3);
175    }
176    
177    #[test]
178    fn test_set_bit() {
179        //array with 128 "1"s, then 128 "0"s, then 1 "1"
180        let mut ibv = IndexedBitVec::with_capacity(257);
181        for i in 0..128 {
182            ibv.set_bit(i);
183        }
184        ibv.set_bit(256);
185
186        //check the sets
187        for i in 0..128 {
188            assert_eq!(ibv.get_bit(i), true);
189        }
190        for i in 128..256 {
191            assert_eq!(ibv.get_bit(i), false);
192        }
193        assert_eq!(ibv.get_bit(256), true);
194
195        //check that our slicing is correct also
196        let slice: &[BitVecBlock] = ibv.data_vec.as_slice();
197        assert_eq!(slice.len(), 6);
198        for i in 0..2 {
199            assert_eq!(slice[i].bits, 0xffffffffffffffff);
200        }
201        for i in 2..4 {
202            assert_eq!(slice[i].bits, 0x0);
203        }
204        assert_eq!(slice[4].bits, 0x1)
205    }
206    
207    #[test]
208    fn test_indexing() {
209        //array with 128 "1"s, then 128 "0"s, then 1 "1"
210        let mut ibv = IndexedBitVec::with_capacity(257);
211        for i in 0..128 {
212            ibv.set_bit(i);
213        }
214        ibv.set_bit(256);
215
216        ibv.build_index(0);
217        let slice: &[BitVecBlock] = ibv.data_vec.as_slice();
218        assert_eq!(slice.len(), 6);
219        let counts = vec![0, 64, 128, 128, 128, 129];
220        for i in 0..slice.len() {
221            assert_eq!(counts[i], slice[i].index);
222        }
223
224        //progressively adding bits
225        let mut ibv = IndexedBitVec::with_capacity(64);
226        for i in 0..64 {
227            ibv.set_bit(i);
228            ibv.build_index(0);
229            let slice: &[BitVecBlock] = ibv.data_vec.as_slice();
230            assert_eq!(slice[1].index, (i+1) as u64);
231        }
232    }
233    
234    #[test]
235    fn test_offset() {
236        //progressively adding bits
237        let mut ibv = IndexedBitVec::with_capacity(64);
238        for i in 0..64 {
239            ibv.set_bit(i);
240        }
241        ibv.build_index(100);
242        let slice: &[BitVecBlock] = ibv.data_vec.as_slice();
243        assert_eq!(slice[1].index, 164);
244    }
245
246    #[test]
247    fn test_rank() {
248        //array with 128 "1"s, then 128 "0"s, then 1 "1"
249        let mut ibv = IndexedBitVec::with_capacity(257);
250        for i in 0..128 {
251            ibv.set_bit(i);
252        }
253        ibv.set_bit(256);
254        ibv.build_index(0);
255
256        for i in 0..128 {
257            assert_eq!(ibv.rank(i), i as u64);
258        }
259        for i in 128..257 {
260            assert_eq!(ibv.rank(i), 128);
261        }
262        assert_eq!(ibv.rank(257), 129);
263    }
264}