simple_sds_sbwt/bit_vector/
select_support.rs1use crate::bit_vector::{BitVector, Transformation};
30use crate::int_vector::IntVector;
31use crate::ops::{Vector, Resize, Pack, Access, Push, BitVec};
32use crate::serialize::Serialize;
33use crate::bits;
34
35use std::io::{Error, ErrorKind};
36use std::{io, marker};
37
38#[derive(Clone, Debug, PartialEq, Eq)]
47pub struct SelectSupport<T: Transformation> {
48 samples: IntVector,
50
51 long: IntVector,
54
55 short: IntVector,
58
59 _marker: marker::PhantomData<T>,
61}
62
63impl<T: Transformation> SelectSupport<T> {
64 pub const SUPERBLOCK_SIZE: usize = 4096;
66
67 const SUPERBLOCK_MASK: usize = 0xFFF;
68 const BLOCKS_IN_SUPERBLOCK: usize = 64;
69 const BLOCK_SIZE: usize = 64;
70 const BLOCK_MASK: usize = 0x3F;
71
72 pub fn superblocks(&self) -> usize {
74 self.samples.len() / 2
75 }
76
77 pub fn long_superblocks(&self) -> usize {
79 (self.long.len() + Self::SUPERBLOCK_SIZE - 1) / Self::SUPERBLOCK_SIZE
80 }
81
82 pub fn short_superblocks(&self) -> usize {
84 (self.short.len() + Self::BLOCKS_IN_SUPERBLOCK - 1) / Self::BLOCKS_IN_SUPERBLOCK
85 }
86
87 pub fn new(parent: &BitVector) -> SelectSupport<T> {
103 let superblocks = (T::count_ones(parent) + Self::SUPERBLOCK_SIZE - 1) / Self::SUPERBLOCK_SIZE;
104 let log4 = bits::bit_len(parent.len() as u64);
105 let log4 = log4 * log4;
106 let log4 = log4 * log4;
107
108 let mut result = SelectSupport {
109 samples: IntVector::default(),
110 long: IntVector::default(),
111 short: IntVector::default(),
112 _marker: marker::PhantomData,
113 };
114 result.samples.reserve(superblocks * 2);
115
116 let mut sample_iter = T::one_iter(parent);
118 let mut sample = sample_iter.next();
119
120 let mut iter = T::one_iter(parent);
122 let mut value = iter.next();
123
124 while sample != None {
125 let start = sample.unwrap();
126 let next_sample = sample_iter.nth(Self::SUPERBLOCK_SIZE - 1);
127 let limit = match next_sample {
128 Some(v) => v,
129 None => (T::count_ones(parent), parent.len()),
130 };
131 result.samples.push(start.1 as u64);
132 if limit.1 - start.1 >= log4 {
134 result.samples.push((2 * result.long.len()) as u64);
135 let values = limit.0 - start.0;
136 for _ in 0..values {
137 result.long.push((value.unwrap().1 - start.1) as u64);
138 value = iter.next();
139 }
140 }
141 else {
143 result.samples.push((2 * result.short.len() + 1) as u64);
144 let blocks = ((limit.0 - start.0) + Self::BLOCK_SIZE - 1) / Self::BLOCK_SIZE;
145 for _ in 0..blocks {
146 result.short.push((value.unwrap().1 - start.1) as u64);
147 value = iter.nth(Self::BLOCK_SIZE - 1);
148 }
149 }
150 sample = next_sample;
151 }
152
153 result.samples.pack();
154 result.long.pack();
155 result.short.pack();
156 result
157 }
158
159 pub fn select(&self, parent: &BitVector, rank: usize) -> usize {
184 let (superblock, offset) = (rank / Self::SUPERBLOCK_SIZE, rank & Self::SUPERBLOCK_MASK);
185 let mut result: usize = self.samples.get(2 * superblock) as usize;
186 if offset == 0 {
187 return result;
188 }
189
190 let ptr = self.samples.get(2 * superblock + 1) as usize;
191 let (ptr, is_short) = (ptr / 2, ptr & 1);
192 if is_short == 0 {
193 result += self.long.get(ptr + offset) as usize;
194 } else {
195 let (block, mut relative_rank) = (offset / Self::BLOCK_SIZE, offset & Self::BLOCK_MASK);
196 result += self.short.get(ptr + block) as usize;
197 if relative_rank > 0 {
200 let (mut word, word_offset) = bits::split_offset(result);
201 let mut value: u64 = T::word(parent, word) & unsafe { !bits::low_set_unchecked(word_offset) };
202 loop {
203 let ones = value.count_ones() as usize;
204 if ones > relative_rank {
205 result = bits::bit_offset(word, unsafe { bits::select(value, relative_rank) });
206 break;
207 }
208 relative_rank -= ones;
209 word += 1;
210 value = T::word(parent, word);
211 }
212 }
213 }
214
215 result
216 }
217
218 pub unsafe fn select_unchecked(&self, parent: &BitVector, rank: usize) -> usize {
224 let (superblock, offset) = (rank / Self::SUPERBLOCK_SIZE, rank & Self::SUPERBLOCK_MASK);
225 let mut result: usize = self.samples.get(2 * superblock) as usize;
226 if offset == 0 {
227 return result;
228 }
229
230 let ptr = self.samples.get(2 * superblock + 1) as usize;
231 let (ptr, is_short) = (ptr / 2, ptr & 1);
232 if is_short == 0 {
233 result += self.long.get(ptr + offset) as usize;
234 } else {
235 let (block, mut relative_rank) = (offset / Self::BLOCK_SIZE, offset & Self::BLOCK_MASK);
236 result += self.short.get(ptr + block) as usize;
237 if relative_rank > 0 {
240 let (mut word, word_offset) = bits::split_offset(result);
241 let mut value: u64 = T::word_unchecked(parent, word) & !bits::low_set_unchecked(word_offset);
242 loop {
243 let ones = value.count_ones() as usize;
244 if ones > relative_rank {
245 result = bits::bit_offset(word, bits::select(value, relative_rank));
246 break;
247 }
248 relative_rank -= ones;
249 word += 1;
250 value = T::word_unchecked(parent, word);
251 }
252 }
253 }
254
255 result
256 }
257}
258
259impl<T: Transformation> Serialize for SelectSupport<T> {
262 fn serialize_header<W: io::Write>(&self, _: &mut W) -> io::Result<()> {
263 Ok(())
264 }
265
266 fn serialize_body<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
267 self.samples.serialize(writer)?;
268 self.long.serialize(writer)?;
269 self.short.serialize(writer)?;
270 Ok(())
271 }
272
273 fn load<W: io::Read>(reader: &mut W) -> io::Result<Self> {
274 let samples = IntVector::load(reader)?;
275 let long = IntVector::load(reader)?;
276 let short = IntVector::load(reader)?;
277 let result = SelectSupport {
278 samples,
279 long,
280 short,
281 _marker: marker::PhantomData,
282 };
283 if result.superblocks() != result.long_superblocks() + result.short_superblocks() {
284 Err(Error::new(ErrorKind::InvalidData, "Invalid long/short superblock counts"))
285 }
286 else {
287 Ok(result)
288 }
289 }
290
291 fn size_in_elements(&self) -> usize {
292 self.samples.size_in_elements() + self.long.size_in_elements() + self.short.size_in_elements()
293 }
294}
295
296#[cfg(test)]
299mod tests {
300 use super::*;
301 use crate::bit_vector::{BitVector, Identity};
302 use crate::ops::BitVec;
303 use crate::raw_vector::{RawVector, PushRaw};
304 use crate::serialize;
305 use rand::distributions::{Bernoulli, Distribution};
306
307 #[test]
308 fn empty_vector() {
309 let bv = BitVector::from(RawVector::new());
310 let ss = SelectSupport::<Identity>::new(&bv);
311 assert_eq!(ss.superblocks(), 0, "Non-zero select superblocks for empty vector");
312 assert_eq!(ss.long_superblocks(), 0, "Non-zero long superblocks for empty vector");
313 assert_eq!(ss.short_superblocks(), 0, "Non-zero short superblocks for empty vector");
314 }
315
316 fn with_density(len: usize, density: f64) -> RawVector {
317 let mut data = RawVector::with_capacity(len);
318 let mut rng = rand::thread_rng();
319 let dist = Bernoulli::new(density).unwrap();
320 let mut iter = dist.sample_iter(&mut rng);
321 while data.len() < len {
322 data.push_bit(iter.next().unwrap());
323 }
324 assert_eq!(data.len(), len, "Invalid length for random RawVector");
325 data
326 }
327
328 fn test_vector(len: usize, density: f64) {
329 let data = with_density(len, density);
330 let bv = BitVector::from(data.clone());
331 let ss = SelectSupport::<Identity>::new(&bv);
332 assert_eq!(bv.len(), len, "test_vector({}, {}): invalid bitvector length", len, density);
333
334 let superblocks = ss.superblocks();
335 let long = ss.long_superblocks();
336 let short = ss.short_superblocks();
337 assert_eq!(superblocks, long + short, "test_vector({}, {}): block counts do not match", len, density);
338
339 let ones: f64 = bv.count_ones() as f64;
341 let expected: f64 = len as f64 * density;
342 let stdev: f64 = (len as f64 * density * (1.0 - density)).sqrt();
343 assert!(ones >= expected - 6.0 * stdev && ones <= expected + 6.0 * stdev,
344 "test_vector({}, {}): unexpected number of ones: {}", len, density, ones);
345
346 let mut next: usize = 0;
347 for i in 0..bv.count_ones() {
348 let value = ss.select(&bv, i);
349 assert!(value >= next, "test_vector({}, {}): select({}) == {}, expected at least {}", len, density, i, value, next);
350 assert!(bv.get(value), "test_vector({}, {}): select({}) == {} is not set", len, density, i, value);
351 next = value + 1;
352 }
353
354 unsafe {
355 let mut next: usize = 0;
356 for i in 0..bv.count_ones() {
357 let value = ss.select_unchecked(&bv, i);
358 assert!(value >= next, "test_vector({}, {}): select_unchecked({}) == {}, expected at least {}", len, density, i, value, next);
359 assert!(bv.get(value), "test_vector({}, {}): select_unchecked({}) == {} is not set", len, density, i, value);
360 next = value + 1;
361 }
362 }
363 }
364
365 #[test]
366 fn non_empty_vector() {
367 test_vector(8131, 0.1);
368 test_vector(8192, 0.5);
369 test_vector(8266, 0.9);
370 }
371
372 #[test]
373 fn serialize() {
374 let data = with_density(5187, 0.5);
375 let bv = BitVector::from(data);
376 let original = SelectSupport::<Identity>::new(&bv);
377 let _ = serialize::test(&original, "select-support", None, true);
378 }
379}
380
381