simple_sds_sbwt/wavelet_matrix/
wm_core.rs1use crate::bit_vector::BitVector;
24use crate::ops::{BitVec, Rank, Select, SelectZero, PredSucc};
25use crate::raw_vector::{RawVector, PushRaw};
26use crate::serialize::Serialize;
27use crate::bits;
28
29use std::io::{Error, ErrorKind};
30use std::{cmp, io};
31
32#[derive(Clone, Debug, PartialEq, Eq)]
74pub struct WMCore {
75 levels: Vec<BitVector>,
76}
77
78impl WMCore {
79 pub fn len(&self) -> usize {
81 self.levels[0].len()
82 }
83
84 pub fn width(&self) -> usize {
86 self.levels.len()
87 }
88
89 pub fn map_down(&self, index: usize) -> Option<(usize, u64)> {
93 if index >= self.len() {
94 return None;
95 }
96
97 let mut index = index;
98 let mut value = 0;
99 for level in 0..self.width() {
100 if self.levels[level].get(index) {
101 index = self.map_down_one(index, level);
102 value += self.bit_value(level);
103 } else {
104 index = self.map_down_zero(index, level);
105 }
106 }
107
108 Some((index, value))
109 }
110
111 pub fn map_down_with(&self, index: usize, value: u64) -> usize {
121 let mut index = cmp::min(index, self.len());
122 for level in 0..self.width() {
123 if value & self.bit_value(level) != 0 {
124 index = self.map_down_one(index, level);
125 } else {
126 index = self.map_down_zero(index, level);
127 }
128 }
129
130 index
131 }
132
133 pub fn map_down_with_two_positions(&self, first: usize, second: usize, value: u64) -> (usize, usize) {
144 let mut first = cmp::min(first, self.len());
145 let mut second = cmp::min(second, self.len());
146 for level in 0..self.width() {
147 if value & self.bit_value(level) != 0 {
148 first = self.map_down_one(first, level);
149 second = self.map_down_one(second, level);
150 } else {
151 first = self.map_down_zero(first, level);
152 second = self.map_down_zero(second, level);
153 }
154 }
155
156 (first, second)
157 }
158
159 pub fn map_up_with(&self, index: usize, value: u64) -> Option<usize> {
169 let mut index = index;
170 for level in (0..self.width()).rev() {
171 if value & self.bit_value(level) != 0 {
172 index = self.map_up_one(index, level)?;
173 } else {
174 index = self.map_up_zero(index, level)?;
175 }
176 }
177
178 Some(index)
179 }
180
181 fn bit_value(&self, level: usize) -> u64 {
183 1 << (self.width() - 1 - level)
184 }
185
186 fn map_down_one(&self, index: usize, level: usize) -> usize {
188 self.levels[level].count_zeros() + self.levels[level].rank(index)
189 }
190
191 fn map_down_zero(&self, index: usize, level: usize) -> usize {
193 self.levels[level].rank_zero(index)
194 }
195
196 fn map_up_one(&self, index: usize, level: usize) -> Option<usize> {
198 self.levels[level].select(index - self.levels[level].count_zeros())
199 }
200
201 fn map_up_zero(&self, index: usize, level: usize) -> Option<usize> {
203 self.levels[level].select_zero(index)
204 }
205
206 fn init_support(&mut self) {
208 for bv in self.levels.iter_mut() {
209 bv.enable_rank();
210 bv.enable_select();
211 bv.enable_select_zero();
212 bv.enable_pred_succ();
213 }
214 }
215}
216
217macro_rules! wm_core_from {
220 ($t:ident) => {
221 impl From<Vec<$t>> for WMCore {
222 fn from(source: Vec<$t>) -> Self {
223 let mut source = source;
224 let max_value = source.iter().cloned().max().unwrap_or(0);
225 let width = bits::bit_len(max_value as u64);
226
227 let mut levels: Vec<BitVector> = Vec::new();
228 for level in 0..width {
229 let bit_value: $t = 1 << (width - 1 - level);
230 let mut zeros: Vec<$t> = Vec::new();
231 let mut ones: Vec<$t> = Vec::new();
232 let mut raw_data = RawVector::with_capacity(source.len());
233
234 for value in source.iter() {
236 if value & bit_value != 0 {
237 ones.push(*value);
238 raw_data.push_bit(true);
239 } else {
240 zeros.push(*value);
241 raw_data.push_bit(false);
242 }
243 }
244
245 source.clear();
247 source.extend(zeros);
248 source.extend(ones);
249
250 levels.push(BitVector::from(raw_data));
252 }
253
254 let mut result = WMCore { levels, };
255 result.init_support();
256 result
257 }
258 }
259 }
260}
261
262wm_core_from!(u8);
263wm_core_from!(u16);
264wm_core_from!(u32);
265wm_core_from!(u64);
266wm_core_from!(usize);
267
268impl Serialize for WMCore {
271 fn serialize_header<T: io::Write>(&self, _: &mut T) -> io::Result<()> {
272 Ok(())
273 }
274
275 fn serialize_body<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
276 let width = self.width();
277 width.serialize(writer)?;
278 for bv in self.levels.iter() {
279 bv.serialize(writer)?;
280 }
281 Ok(())
282 }
283
284 fn load<T: io::Read>(reader: &mut T) -> io::Result<Self> {
285 let width = usize::load(reader)?;
286 if width == 0 || width > bits::WORD_BITS {
287 return Err(Error::new(ErrorKind::InvalidData, "Invalid width"));
288 }
289
290 let mut len: Option<usize> = None;
291 let mut levels: Vec<BitVector> = Vec::with_capacity(width);
292 for _ in 0..width {
293 let bv = BitVector::load(reader)?;
294 match len {
295 Some(len) => {
296 if bv.len() != len {
297 return Err(Error::new(ErrorKind::InvalidData, "Invalid bitvector length"));
298 }
299 },
300 None => {
301 len = Some(bv.len());
302 },
303 }
304 levels.push(bv);
305 }
306 let mut result = WMCore { levels, };
307 result.init_support();
308 Ok(result)
309 }
310
311 fn size_in_elements(&self) -> usize {
312 let mut result = 1; for bv in self.levels.iter() {
314 result += bv.size_in_elements();
315 }
316 result
317 }
318}
319
320#[cfg(test)]
323mod tests {
324 use super::*;
325
326 use crate::{internal, serialize};
327
328 fn reordered_vector(original: &[u64]) -> Vec<u64> {
329 let mut result = Vec::from(original);
330 result.sort_unstable_by_key(|x| x.reverse_bits());
331 result
332 }
333
334 fn check_core(core: &WMCore, original: &[u64], reordered: &[u64]) {
335 assert_eq!(core.len(), original.len(), "Invalid WMCore length");
336
337 for i in 0..core.len() {
338 let mapped = core.map_down_with(i, original[i]);
339 assert_eq!(reordered[mapped], original[i], "Invalid value at the mapped down position from {}", i);
340 assert_eq!(core.map_down(i), Some((mapped, original[i])), "The map down functions are inconsistent at {}", i);
341 }
342
343 for i in 0..core.len() {
344 let mapped = core.map_up_with(i, reordered[i]).unwrap();
345 assert_eq!(original[mapped], reordered[i], "Invalid value at the mapped up position from {}", i);
346 assert_eq!(core.map_down_with(mapped, original[mapped]), i, "Did not get the original position after mapping up and down from {}", i);
347 }
348
349 let first = internal::random_queries(core.len(), core.len());
350 let second = internal::random_queries(core.len(), core.len());
351 let values = internal::random_vector(core.len(), core.width());
352 for i in 0..core.len() {
353 let mapped = core.map_down_with_two_positions(first[i], second[i], values[i]);
354 assert_eq!(mapped.0, core.map_down_with(first[i], values[i]), "Invalid first mapped position at {}, value {}", first[i], values[i]);
355 assert_eq!(mapped.1, core.map_down_with(second[i], values[i]), "Invalid second mapped position at {}, value {}", second[i], values[i]);
356 }
357 }
358
359 #[test]
360 fn empty_core() {
361 let original: Vec<u64> = Vec::new();
362 let reordered = reordered_vector(&original);
363 let core = WMCore::from(original.clone());
364 check_core(&core, &original, &reordered);
365 }
366
367 #[test]
368 fn non_empty_core() {
369 let original = internal::random_vector(322, 7);
370 let reordered = reordered_vector(&original);
371 let core = WMCore::from(original.clone());
372 check_core(&core, &original, &reordered);
373 }
374
375 #[test]
376 fn serialize_core() {
377 let original = internal::random_vector(286, 6);
378 let core = WMCore::from(original);
379 serialize::test(&core, "wm-core", None, true);
380 }
381}
382
383