simple_sds_sbwt/bit_vector/
rank_support.rs1use crate::bit_vector::BitVector;
25use crate::ops::BitVec;
26use crate::raw_vector::AccessRaw;
27use crate::serialize::Serialize;
28use crate::bits;
29
30use std::{cmp, io};
31
32#[derive(Clone, Debug, PartialEq, Eq)]
39pub struct RankSupport {
40 samples: Vec<(u64, u64)>,
42}
43
44impl RankSupport {
45 pub const BLOCK_SIZE: usize = 512;
47
48 const RELATIVE_RANK_BITS: usize = 9;
49 const RELATIVE_RANK_MASK: usize = 0x1FF;
50 const WORDS_PER_BLOCK: usize = 8;
51 const WORD_MASK: usize = 0x7;
52
53 pub fn blocks(&self) -> usize {
55 self.samples.len()
56 }
57
58 pub fn new(parent: &BitVector) -> RankSupport {
72 let words = bits::bits_to_words(parent.len());
73 let blocks = (parent.len() + Self::BLOCK_SIZE - 1) / Self::BLOCK_SIZE;
74 let mut samples: Vec<(u64, u64)> = Vec::with_capacity(blocks);
75
76 let mut ones: usize = 0;
77 for block in 0..blocks {
78 let mut block_ones: usize = 0;
79 let mut relative_ranks: u64 = 0;
80 let block_words = cmp::min(Self::WORDS_PER_BLOCK, words - block * Self::WORDS_PER_BLOCK);
81 for word in 0..block_words {
82 block_ones += parent.data.word(block * Self::WORDS_PER_BLOCK + word).count_ones() as usize;
83 relative_ranks |= ((block_ones as u64) << (word as u64 * Self::RELATIVE_RANK_BITS as u64)) as u64;
84 }
85 relative_ranks &= bits::low_set((Self::WORDS_PER_BLOCK - 1) * Self::RELATIVE_RANK_BITS);
87 samples.push((ones as u64, relative_ranks));
88 ones += block_ones;
89 }
90
91 RankSupport {
92 samples,
93 }
94 }
95
96 pub fn rank(&self, parent: &BitVector, index: usize) -> usize {
122 let block = index / Self::BLOCK_SIZE;
123 let (word, offset) = bits::split_offset(index);
124
125 let (block_start, relative_ranks) = self.samples[block];
127
128 let relative = (word + Self::WORDS_PER_BLOCK - 1) & Self::WORD_MASK;
132
133 let word_start = (relative_ranks >> (relative * Self::RELATIVE_RANK_BITS)) as usize & Self::RELATIVE_RANK_MASK;
135
136 let within_word = (parent.data.word(word) & unsafe { bits::low_set_unchecked(offset) }).count_ones() as usize;
138
139 block_start as usize + word_start + within_word
140 }
141
142 pub unsafe fn rank_unchecked(&self, parent: &BitVector, index: usize) -> usize {
148 let block = index / Self::BLOCK_SIZE;
149 let (word, offset) = bits::split_offset(index);
150
151 let (block_start, relative_ranks) = *self.samples.get_unchecked(block);
153
154 let relative = ((word & Self::WORD_MASK) + Self::WORDS_PER_BLOCK - 1) & Self::WORD_MASK;
158
159 let word_start = (relative_ranks >> (relative * Self::RELATIVE_RANK_BITS)) as usize & Self::RELATIVE_RANK_MASK;
161
162 let within_word = (parent.data.word_unchecked(word) & bits::low_set_unchecked(offset)).count_ones() as usize;
164
165 block_start as usize + word_start + within_word
166 }
167}
168
169impl Serialize for RankSupport {
172 fn serialize_header<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
173 self.samples.serialize_header(writer)?;
174 Ok(())
175 }
176
177 fn serialize_body<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
178 self.samples.serialize_body(writer)?;
179 Ok(())
180 }
181
182 fn load<T: io::Read>(reader: &mut T) -> io::Result<Self> {
183 let samples = Vec::<(u64, u64)>::load(reader)?;
184 Ok(RankSupport {
185 samples,
186 })
187 }
188
189 fn size_in_elements(&self) -> usize {
190 self.samples.size_in_elements()
191 }
192}
193
194#[cfg(test)]
197mod tests {
198 use super::*;
199 use crate::bit_vector::BitVector;
200 use crate::ops::BitVec;
201 use crate::raw_vector::{RawVector, PushRaw};
202 use crate::serialize;
203 use rand::Rng;
204
205 #[test]
206 fn empty_vector() {
207 let bv = BitVector::from(RawVector::new());
208 let rs = RankSupport::new(&bv);
209 assert_eq!(rs.blocks(), 0, "Non-zero rank blocks for empty vector");
210 }
211
212 fn raw_vector(len: usize) -> RawVector {
213 let mut data = RawVector::with_capacity(len);
214 let mut rng = rand::thread_rng();
215 while data.len() < len {
216 let value: u64 = rng.gen();
217 let bits = cmp::min(bits::WORD_BITS, len - data.len());
218 unsafe { data.push_int(value, bits); }
219 }
220 assert_eq!(data.len(), len, "Invalid length for random RawVector");
221 data
222 }
223
224 fn test_vector(len: usize, blocks: usize) {
225 let data = raw_vector(len);
226 let bv = BitVector::from(data.clone());
227 let rs = RankSupport::new(&bv);
228 assert_eq!(bv.len(), len, "Invalid bitvector length at {}", len);
229 assert_eq!(rs.blocks(), blocks, "Invalid number of rank blocks at {}", len);
230
231 let mut count: usize = 0;
232 for i in 0..bv.len() {
233 assert_eq!(rs.rank(&bv, i), count, "Invalid rank({}) at {}", i, len);
234 count += bv.get(i) as usize;
235 }
236
237 unsafe {
238 let mut count: usize = 0;
239 for i in 0..bv.len() {
240 assert_eq!(rs.rank_unchecked(&bv, i), count, "Invalid rank_unchecked({}) at {}", i, len);
241 count += bv.get(i) as usize;
242 }
243 }
244 }
245
246 #[test]
247 fn non_empty_vector() {
248 test_vector(4095, 8);
249 test_vector(4096, 8);
250 test_vector(4097, 9);
251 }
252
253 #[test]
254 fn serialize() {
255 let data = raw_vector(5187);
256 let bv = BitVector::from(data);
257 let original = RankSupport::new(&bv);
258 let _ = serialize::test(&original, "rank-support", None, true);
259 }
260}
261
262