wavelet_matrix/wavelet_matrix.rs
1// use std;
2use std::ops::Range;
3use std::collections::BinaryHeap;
4use std::cmp::Ordering;
5use succinct::SpaceUsage;
6use rsdic_simple::*;
7use node_range::*;
8
9/// WaveletMatrix supports various near-O(1) queries on the sequence of integers.
10#[derive(Debug)]
11pub struct WaveletMatrix {
12 layers: Vec<RsDic>,
13 dim: u64, // = max vals + 1
14 num: usize, // = layers[0].len()
15 bit_len: u8, // = layers.len()
16}
17
18impl WaveletMatrix {
19 /// Create a new WaveletMatrix struct from a Vec<u64> `vals`.
20 ///
21 /// The values contained in `vals` should be less than `u64::MAX`.
22 ///
23 /// ## Panics
24 ///
25 /// It panics when `vals` include `u64::MAX`.
26 pub fn new(vals: &Vec<u64>) -> WaveletMatrix {
27 // let dim = get_dim(&vals);
28 let max = *vals.iter().max().unwrap();
29 if max == ::std::u64::MAX {
30 panic!("WaveletMatrix::new(): Can't store u64::MAX");
31 }
32 let dim = max + 1;
33 let bit_len = get_bit_len(dim);
34 let num = vals.len();
35 let mut zeros: Vec<u64> = vals.clone();
36 let mut ones: Vec<u64> = Vec::new();
37 let mut layers: Vec<RsDic> = Vec::new();
38
39 for depth in 0..bit_len {
40 let mut next_zeros: Vec<u64> = Vec::with_capacity(vals.len());
41 let mut next_ones: Vec<u64> = Vec::with_capacity(vals.len());
42 let mut rsd_ = RsDicBuilder::new();
43 Self::filter(&zeros,
44 bit_len - depth - 1,
45 &mut next_zeros,
46 &mut next_ones,
47 &mut rsd_);
48 Self::filter(&ones,
49 bit_len - depth - 1,
50 &mut next_zeros,
51 &mut next_ones,
52 &mut rsd_);
53 zeros = next_zeros;
54 ones = next_ones;
55 layers.push(rsd_.build());
56 }
57
58 WaveletMatrix {
59 layers: layers,
60 dim: dim,
61 num: num,
62 bit_len: bit_len,
63 }
64 }
65
66 fn filter(vals: &Vec<u64>,
67 shift: u8,
68 next_zeros: &mut Vec<u64>,
69 next_ones: &mut Vec<u64>,
70 rsd: &mut RsDicBuilder) {
71 for val in vals {
72 let bit = get_bit_lsb(*val, shift);
73 rsd.push_bit(bit);
74 if bit {
75 next_ones.push(*val);
76 } else {
77 next_zeros.push(*val);
78 }
79 }
80 }
81
82 /// Returns the length of `T`
83 #[inline]
84 pub fn len(&self) -> usize {
85 self.num
86 }
87
88 /// Returns the value at the position `pos`, i.e. `T[pos]`.
89 ///
90 /// ## Panics
91 ///
92 /// It panics when index `pos` is out of range.
93 pub fn lookup(&self, pos: usize) -> u64 {
94 let mut val: u64 = 0;
95 let mut pos: usize = pos;
96
97 self.validate_pos(pos);
98
99 for depth in 0..self.bit_len as usize {
100 let rsd = &self.layers[depth];
101 let bit = rsd.access(pos);
102 pos = rsd.rank(pos, bit);
103 val <<= 1;
104 if bit {
105 pos += rsd.zero_num();
106 val |= 1;
107 }
108 }
109 val
110 }
111
112 /// Returns the maximum value + 1.
113 ///
114 /// Useful to get the upper bound of value range.
115 #[inline]
116 pub fn dim(&self) -> u64 {
117 self.dim
118 }
119
120 /// Returns the bit length stored internally
121 #[inline]
122 pub fn bit_len(&self) -> u8 {
123 self.bit_len
124 }
125
126 #[inline]
127 fn validate_pos(&self, pos: usize) {
128 if pos >= self.len() {
129 panic!("pos: out of range");
130 }
131 }
132
133 #[inline]
134 fn validate_pos_range(&self, pos_range: &Range<usize>) {
135 if pos_range.start >= self.len() {
136 panic!("pos_range.start: out of range");
137 } else if pos_range.end > self.len() {
138 panic!("pos_range.end: out of range");
139 }
140 }
141
142 /// Returns the number of the element `e` which satisfies `e == value` included in T[pos_range]
143 ///
144 /// ## Panics
145 ///
146 /// It panics when `pos_range` is out of range.
147 pub fn count(&self, pos_range: Range<usize>, value: u64) -> usize {
148 self.prefix_rank_op(pos_range, value, 0, Ordering::Equal)
149 }
150
151 /// Returns the number of the element `e` which satisfies `e < value` included in T[pos_range]
152 ///
153 /// ## Panics
154 ///
155 /// It panics when `pos_range` is out of range.
156 pub fn count_lt(&self, pos_range: Range<usize>, value: u64) -> usize {
157 self.prefix_rank_op(pos_range, value, 0, Ordering::Less)
158 }
159
160 /// Returns the number of the element `e` which satisfies `e > value` included in T[pos_range]
161 ///
162 /// ## Panics
163 ///
164 /// It panics when `pos_range` is out of range.
165 pub fn count_gt(&self, pos_range: Range<usize>, value: u64) -> usize {
166 self.prefix_rank_op(pos_range, value, 0, Ordering::Greater)
167 }
168
169 /// Returns the number of the element `e` which satisfies `(e >> ignore_bit) == (value >> ignore_bit)` included in T[pos_range]
170 ///
171 /// ## Panics
172 ///
173 /// It panics when `pos_range` is out of range.
174 pub fn count_prefix(&self, pos_range: Range<usize>, value: u64, ignore_bit: u8) -> usize {
175 self.prefix_rank_op(pos_range, value, ignore_bit, Ordering::Equal)
176 }
177
178 /// Returns the number of the element `e` which satisfies `val_range.start <= e < val_range.end` included in T[pos_range]
179 ///
180 /// ## Panics
181 ///
182 /// It panics when `pos_range` is out of range.
183 pub fn count_range(&self, pos_range: Range<usize>, val_range: Range<u64>) -> usize {
184 self.count_lt(pos_range.clone(), val_range.end) - self.count_lt(pos_range, val_range.start)
185 }
186
187 /// Returns the iterator that generates indices that satisfy the condition `e == value`.
188 ///
189 /// ## Panics
190 ///
191 /// It panics when `pos_range` is out of range.
192 pub fn search(&self, pos_range: Range<usize>, value: u64) -> WaveletMatrixSearch {
193 self.validate_pos_range(&pos_range);
194 self.search_prefix(pos_range, value, 0)
195 }
196
197 /// Returns the iterator that generates indices that satisfy the condition `e >> ignore_bit == value >> ignore_bit`.
198 ///
199 /// ## Panics
200 ///
201 /// It panics when `pos_range` is out of range.
202 pub fn search_prefix(&self,
203 pos_range: Range<usize>,
204 value: u64,
205 ignore_bit: u8)
206 -> WaveletMatrixSearch {
207 let rank = self.count_prefix(0..pos_range.start, value, ignore_bit);
208 WaveletMatrixSearch {
209 inner: &self,
210 range: pos_range,
211 rank: rank,
212 value: value,
213 ignore_bit: ignore_bit,
214 }
215 }
216
217 /// Returns the number of val found in `T[0..pos]`.
218 ///
219 /// The range specified is half open, i.e. `[0, pos)`. When `pos` is 0, the range includes no value.
220 ///
221 /// ## Panics
222 ///
223 /// It panics when `pos` is greater than len().
224 pub fn rank(&self, pos: usize, value: u64) -> usize {
225 self.prefix_rank_op(0..pos, value, 0, Ordering::Equal)
226 }
227
228 /// `.rank()` with:
229 /// - range support bpos..epos
230 /// - prefix search support (`ignore_bit`)
231 /// - operator support
232 ///
233 /// ## Panics
234 ///
235 /// It panics when `pos_range` is out of range.
236 #[inline]
237 fn prefix_rank_op(&self,
238 pos_range: Range<usize>,
239 val: u64,
240 ignore_bit: u8,
241 operator: Ordering)
242 -> usize {
243 self.validate_pos_range(&pos_range);
244
245 let mut bpos = pos_range.start;
246 let mut epos = pos_range.end;
247 let mut rank = 0;
248
249 if self.bit_len > ignore_bit {
250 for depth in 0..self.bit_len - ignore_bit {
251 let rsd = &self.layers[depth as usize];
252 let bit = get_bit_msb(val, depth, self.bit_len);
253 if bit {
254 if let Ordering::Less = operator {
255 rank += rsd.rank(epos, !bit) - rsd.rank(bpos, !bit);
256 }
257 let zero_num = rsd.zero_num();
258 bpos = rsd.rank(bpos, bit) + zero_num;
259 epos = rsd.rank(epos, bit) + zero_num;
260 } else {
261 if let Ordering::Greater = operator {
262 rank += rsd.rank(epos, !bit) - rsd.rank(bpos, !bit);
263 }
264 bpos = rsd.rank(bpos, bit);
265 epos = rsd.rank(epos, bit);
266 }
267 }
268 }
269 match operator {
270 Ordering::Equal => epos - bpos,
271 _ => rank,
272 }
273 }
274
275 /// Return the position of (rank+1)-th val in T.
276 ///
277 /// If no match has been found, it returns the length of self.
278 pub fn select(&self, rank: usize, val: u64) -> usize {
279 self.select_helper(rank, val, 0, 0, 0)
280 }
281
282 /// ignore_bit: experimental support for prefix search
283 fn select_helper(&self, rank: usize, val: u64, pos: usize, depth: u8, ignore_bit: u8) -> usize {
284 if self.bit_len < ignore_bit || depth == self.bit_len - ignore_bit {
285 return ::std::cmp::min(pos + rank, self.len());
286 }
287 let mut pos = pos;
288 let mut rank = rank;
289
290 let bit = get_bit_msb(val, depth, self.bit_len);
291 let rsd = &self.layers[depth as usize];
292 if bit {
293 let zero_num = rsd.zero_num();
294 pos = rsd.rank(pos, bit) + zero_num;
295 rank = self.select_helper(rank, val, pos, depth + 1, ignore_bit) - zero_num;
296 } else {
297 pos = rsd.rank(pos, bit);
298 rank = self.select_helper(rank, val, pos, depth + 1, ignore_bit);
299 }
300 rsd.select(rank, bit)
301 }
302
303 /// list the `(value, count)` pairs in most-frequent-one-first order.
304 /// values are constrained to the `val_range`.
305 ///
306 /// ## Panics
307 ///
308 /// It panics when `pos_range` is out of range.
309 pub fn top_k(&self,
310 pos_range: Range<usize>,
311 val_range: Range<u64>,
312 k: usize)
313 -> Vec<(u64, usize)> {
314 self.values::<NodeRangeByFrequency>(pos_range, val_range, k)
315 }
316
317 /// list the `(range, count)` pairs in most-frequent-one-first order.
318 /// values are constrained to the `val_range`.
319 ///
320 /// ## Panics
321 ///
322 /// It panics when `pos_range` is out of range.
323 ///
324 /// ## Example
325 ///
326 /// ```
327 /// use wavelet_matrix::WaveletMatrix;
328 ///
329 /// let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
330 /// // 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
331 ///
332 /// let wm = WaveletMatrix::new(&vec);
333 /// assert_eq!(wm.top_k_ranges(0..wm.len(), 0..wm.dim(), 3), vec![(0..4, 7), (4..8, 4), (8..16, 1)]);
334 /// // You can drill down into any specific range.
335 /// assert_eq!(wm.top_k_ranges(0..wm.len(), 0..4, 3), vec![(2..4, 3), (1..2, 2), (0..1, 2)]);
336 ///
337 /// assert_eq!(wm.top_k_ranges(0..wm.len(), 0..wm.dim(), 4), vec![(0..2, 4), (4..8, 4), (2..4, 3), (8..16, 1)]);
338 /// // You can drill down into any specific range.
339 /// assert_eq!(wm.top_k_ranges(0..wm.len(), 4..8, 4), vec![(4..5, 2), (5..6, 1), (6..7, 1)]);
340 /// assert_eq!(wm.top_k_ranges(0..wm.len(), 2..9, 4), vec![(2..4, 3), (4..6, 3), (6..8, 1), (8..16, 1)]);
341 /// ```
342 pub fn top_k_ranges(&self,
343 pos_range: Range<usize>,
344 val_range: Range<u64>,
345 k: usize)
346 -> Vec<(Range<u64>, usize)> {
347 self.value_ranges::<NodeRangeByFrequency>(pos_range, val_range, k)
348 }
349
350 /// list the `(value, count)` pairs in descending order.
351 /// values are constrained to the `val_range`.
352 ///
353 /// ## Panics
354 ///
355 /// It panics when `pos_range` is out of range.
356 pub fn max_k(&self,
357 pos_range: Range<usize>,
358 val_range: Range<u64>,
359 k: usize)
360 -> Vec<(u64, usize)> {
361 self.values::<NodeRangeDescending>(pos_range, val_range, k)
362 }
363
364 /// list the `(value, count)` pairs in ascending order.
365 /// values are constrained to the `val_range`.
366 ///
367 /// ## Panics
368 ///
369 /// It panics when `pos_range` is out of range.
370 pub fn min_k(&self,
371 pos_range: Range<usize>,
372 val_range: Range<u64>,
373 k: usize)
374 -> Vec<(u64, usize)> {
375 self.values::<NodeRangeAscending>(pos_range, val_range, k)
376 }
377
378 /// ## Panics
379 ///
380 /// It panics when `pos_range` is out of range.
381 fn values<N>(&self,
382 pos_range: Range<usize>,
383 val_range: Range<u64>,
384 k: usize)
385 -> Vec<(u64, usize)>
386 where N: NodeRange
387 {
388
389 let mut res = Vec::new();
390
391 self.validate_pos_range(&pos_range);
392 // if pos_range.start > self.len() || pos_range.start >= pos_range.end {
393 // return res;
394 // }
395
396 let mut qons: BinaryHeap<N> = BinaryHeap::new();
397
398 qons.push(NodeRange::new(pos_range, 0, 0, self.bit_len()));
399
400 while res.len() < k && !qons.is_empty() {
401 let qon = qons.pop().unwrap();
402 let qon = qon.inner();
403 if qon.depth == self.bit_len {
404 res.push((qon.prefix_char, qon.pos_end - qon.pos_start));
405 } else {
406 let next = self.expand_node(val_range.clone(), &qon);
407 for i in 0..next.len() {
408 qons.push(NodeRange::from(&next[i]));
409 }
410 }
411 }
412 res
413 }
414
415 /// ## Panics
416 ///
417 /// It panics when `pos_range` is out of range.
418 fn value_ranges<N>(&self,
419 pos_range: Range<usize>,
420 val_range: Range<u64>,
421 k: usize)
422 -> Vec<(Range<u64>, usize)>
423 where N: NodeRange
424 {
425
426 let mut res = Vec::new();
427
428 self.validate_pos_range(&pos_range);
429 // if pos_range.start > self.len() || pos_range.start >= pos_range.end {
430 // return res; // return the empty vector
431 // }
432
433 let mut qons: BinaryHeap<N> = BinaryHeap::new();
434
435 qons.push(NodeRange::new(pos_range, 0, 0, self.bit_len()));
436
437 while res.len() < k && !qons.is_empty() && qons.len() + res.len() < k {
438 let qon = qons.pop().unwrap();
439 let qon = qon.inner();
440 if qon.depth == self.bit_len {
441 res.push((qon.prefix_char..qon.prefix_char + 1, qon.pos_end - qon.pos_start));
442 } else {
443 let next = self.expand_node(val_range.clone(), &qon);
444 for i in 0..next.len() {
445 qons.push(NodeRange::from(&next[i]));
446 }
447 }
448 }
449 while let Some(qon) = qons.pop() {
450 let qon = qon.inner();
451 let shift = self.bit_len - qon.depth;
452 res.push((qon.prefix_char << shift..qon.prefix_char + 1 << shift,
453 qon.pos_end - qon.pos_start));
454 }
455 res
456 }
457
458 fn expand_node(&self, val_range: Range<u64>, qon: &QueryOnNode) -> Vec<QueryOnNode> {
459 let ba = &self.layers[qon.depth as usize];
460 let mut next = Vec::new();
461
462 let bpos_zero = ba.rank(qon.pos_start, false);
463 let epos_zero = ba.rank(qon.pos_end, false);
464
465 let boundary = ba.zero_num();
466 let bpos_one = ba.rank(qon.pos_start, true) + boundary;
467 let epos_one = ba.rank(qon.pos_end, true) + boundary;
468
469 if epos_zero > bpos_zero {
470 // child for zero
471 let next_prefix = qon.prefix_char << 1;
472 if self.check_prefix(next_prefix, qon.depth + 1, val_range.clone()) {
473 next.push(QueryOnNode::new(bpos_zero..epos_zero,
474 next_prefix,
475 qon.depth + 1,
476 self.bit_len()));
477 }
478 }
479 if epos_one > bpos_one {
480 // child for one
481 let next_prefix = (qon.prefix_char << 1) + 1;
482 if self.check_prefix(next_prefix, qon.depth + 1, val_range) {
483 next.push(QueryOnNode::new(bpos_one..epos_one,
484 next_prefix,
485 qon.depth + 1,
486 self.bit_len()));
487 }
488 }
489 next
490 }
491
492 fn check_prefix(&self, prefix: u64, depth: u8, val_range: Range<u64>) -> bool {
493 Self::prefix_code(val_range.start, depth, self.bit_len) <= prefix &&
494 prefix <= Self::prefix_code(val_range.end - 1, depth, self.bit_len)
495 }
496
497 fn prefix_code(x: u64, len: u8, bit_num: u8) -> u64 {
498 x >> (bit_num - len)
499 }
500
501 /// Approximately calculates the sum of `T[pos_range]` using up to k wavelet tree nodes and
502 /// returns `(sum of value, sum of count)` tuple.
503 ///
504 /// It enumerates the top-k most relevant node ranges using `.top_k_ranges()` function.
505 ///
506 /// To get the exact result, specfy k = m + 1 where m is the number of values which are unique.
507 /// E.g. If you have 7 unique values in the sequence T, specify k = 8 to get the exact result.
508 ///
509 /// For calculation, this method uses u64 for summing.
510 /// To avoid overflow during calculation, we recommend to use this function for < 32 bits values assuming the number of elements is ~ 32 bits.
511 ///
512 /// ## Panics
513 ///
514 /// It panics when `pos_range` is out of range.
515 ///
516 /// ## Example
517 ///
518 /// ```
519 /// use wavelet_matrix::WaveletMatrix;
520 ///
521 /// let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
522 /// // 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
523 ///
524 /// let wm = WaveletMatrix::new(&vec);
525 /// // assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 0), (0, 0)); // useless
526 /// assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 1), (96, 12));
527 /// assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 2), (56, 12));
528 /// assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 3), (50, 12));
529 /// assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 4), (49, 12));
530 /// assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 5), (47, 12));
531 /// assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 6), (45, 12));
532 /// assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 7), (40, 12));
533 /// assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 8), (36, 12)); // got the exact sum
534 /// assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 12), (36, 12));
535 ///
536 /// assert_eq!(wm.sum_experiment1(0..wm.len(), 6..7, 12), (6, 1));
537 /// assert_eq!(wm.sum_experiment1(3..8, 4..7, 12), (15, 3));
538 /// ```
539 pub fn sum_experiment1(&self,
540 pos_range: Range<usize>,
541 val_range: Range<u64>,
542 k: usize)
543 -> (u64, usize) {
544 let ranges = self.top_k_ranges(pos_range, val_range, k);
545
546 let sum: u64 = ranges.iter()
547 .map(|&(ref r, count)| {
548 (r.start + r.end) / 2 // expected value
549 * (count as u64)
550 })
551 .sum();
552
553 let count: usize = ranges.iter()
554 .map(|&(ref _r, count)| count)
555 .sum();
556
557 (sum, count)
558 }
559
560 /// Improved version of `.sum_experiment1()`.
561 ///
562 /// It enumerates the top-k most relevant node ranges using custom node expander instead of `.top_k_ranges()`.
563 ///
564 /// ## Panics
565 ///
566 /// It panics when `pos_range` is out of range.
567 ///
568 /// ## Example
569 ///
570 /// ```
571 /// use wavelet_matrix::WaveletMatrix;
572 ///
573 /// let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
574 /// // 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
575 ///
576 /// let wm = WaveletMatrix::new(&vec);
577 /// // assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 0), (0, 0)); // useless
578 /// assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 1), (96, 12));
579 /// assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 2), (56, 12));
580 /// assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 3), (50, 12));
581 /// assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 4), (49, 12));
582 /// assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 5), (47, 12));
583 /// assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 6), (43, 12)); // improved since experiment 1 (45->43)
584 /// assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 7), (38, 12)); // improved since experiment 1 (40->38)
585 /// assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 8), (36, 12)); // got the exact sum
586 /// assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 12), (36, 12));
587 ///
588 /// assert_eq!(wm.sum_experiment2(0..wm.len(), 6..7, 12), (6, 1));
589 /// assert_eq!(wm.sum_experiment2(3..8, 4..7, 12), (15, 3));
590 /// ```
591 pub fn sum_experiment2(&self,
592 pos_range: Range<usize>,
593 val_range: Range<u64>,
594 k: usize)
595 -> (u64, usize) {
596 let ranges = self.value_ranges::<NodeRangeBySumError>(pos_range, val_range, k);
597
598 let sum: u64 = ranges.iter()
599 .map(|&(ref r, count)| {
600 (r.start + r.end) / 2 // expected value
601 * (count as u64)
602 })
603 .sum();
604
605 let count: usize = ranges.iter()
606 .map(|&(ref _r, count)| count)
607 .sum();
608
609 (sum, count)
610 }
611
612 /// Improved version of `.sum_experiment2()`.
613 ///
614 /// It returns `Range<u64>` value to tell how accurate the computed value is.
615 ///
616 /// ## Panics
617 ///
618 /// It panics when `pos_range` is out of range.
619 ///
620 /// ## Example
621 ///
622 /// ```
623 /// use wavelet_matrix::WaveletMatrix;
624 ///
625 /// let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
626 /// // 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
627 ///
628 /// let wm = WaveletMatrix::new(&vec);
629 /// // assert_eq!(wm.sum_experimen3(0..wm.len(), 0..wm.dim(), 0), (0, 0)); // useless
630 /// assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 1), (0..181, 12)); // 181 / 2 = 90
631 /// assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 2), (8..93, 12)); // 101 / 2 = 50
632 /// assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 3), (24..65, 12)); // 89 / 2 = 44
633 /// assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 4), (30..57, 12)); // 87 / 2 = 43
634 /// assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 5), (32..51, 12)); // 83 / 2 = 41
635 /// assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 6), (34..45, 12)); // 79 / 2 = 38
636 /// assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 7), (35..40, 12)); // 75 / 2 = 37
637 /// assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 8), (36..37, 12)); // got the exact sum = 36
638 /// assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 12), (36..37, 12));
639 ///
640 /// assert_eq!(wm.sum_experiment3(0..wm.len(), 6..7, 12), (6..7, 1));
641 /// assert_eq!(wm.sum_experiment3(3..8, 4..7, 12), (15..16, 3));
642 /// ```
643 pub fn sum_experiment3(&self,
644 pos_range: Range<usize>,
645 val_range: Range<u64>,
646 k: usize)
647 -> (Range<u64>, usize) {
648 let ranges = self.value_ranges::<NodeRangeBySumError>(pos_range, val_range, k);
649
650 let sum_min: u64 = ranges.iter()
651 .map(|&(ref r, count)| r.start * (count as u64))
652 .sum();
653
654 let sum_max: u64 = ranges.iter()
655 .map(|&(ref r, count)| (r.end - 1) * (count as u64))
656 .sum();
657
658 let count: usize = ranges.iter()
659 .map(|&(ref _r, count)| count)
660 .sum();
661
662 (sum_min..sum_max + 1, count)
663 }
664
665 /// Approximately calculates the average of `T[pos_range]` using up to k wavelet tree nodes.
666 ///
667 /// It enumerates the top-k most relevant node ranges using `.top_k_ranges()` function.
668 ///
669 /// To get the exact result, specfy k = m + 1 where m is the number of values which are unique.
670 /// E.g. If you have 256 unique values in the sequence T, specify k = 257 to get the exact result.
671 ///
672 /// For calculation, this method uses u64 for summing.
673 /// To avoid overflow during calculation, we recommend to use this function for < 32 bits values assuming the number of elements is ~ 32 bits.
674 ///
675 /// ## Panics
676 ///
677 /// It panics when `pos_range` is out of range.
678 ///
679 /// ## Example
680 ///
681 /// ```
682 /// use wavelet_matrix::WaveletMatrix;
683 ///
684 /// let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
685 /// // 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
686 ///
687 /// let wm = WaveletMatrix::new(&vec);
688 /// assert_eq!(wm.mean_experiment1(0..wm.len(), 0..wm.dim(), 1), 8);
689 /// assert_eq!(wm.mean_experiment1(0..wm.len(), 0..wm.dim(), 2), 4);
690 /// assert_eq!(wm.mean_experiment1(0..wm.len(), 0..wm.dim(), 3), 4);
691 /// assert_eq!(wm.mean_experiment1(0..wm.len(), 0..wm.dim(), 4), 4);
692 /// assert_eq!(wm.mean_experiment1(0..wm.len(), 0..wm.dim(), 5), 3); // got the actual average
693 /// assert_eq!(wm.mean_experiment1(0..wm.len(), 0..wm.dim(), 12), 3);
694 /// ```
695 pub fn mean_experiment1(&self,
696 pos_range: Range<usize>,
697 val_range: Range<u64>,
698 k: usize)
699 -> u64 {
700 let (sum, count) = self.sum_experiment1(pos_range, val_range, k);
701
702 sum / (count as u64)
703 }
704
705 /// Approximately calculates the variance of `T[pos_range]` using up to k wavelet tree nodes.
706 ///
707 /// It enumerates the top-k most relevant node ranges using `.top_k_ranges()` function.
708 ///
709 /// To get the exact result, specfy k = m + 1 where m is the number of values which are unique.
710 /// E.g. If you have 256 unique values in the sequence T, specify k = 257 to get the exact result.
711 ///
712 /// For calculation, this method uses u64 for summing.
713 /// To avoid overflow during calculation, we recommend to use this function for < 16 bits values assuming the number of elements is ~ 32 bits.
714 ///
715 /// ## Panics
716 ///
717 /// It panics when `pos_range` is out of range.
718 ///
719 /// ## Example
720 ///
721 /// ```
722 /// use wavelet_matrix::WaveletMatrix;
723 ///
724 /// let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
725 /// // 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
726 ///
727 /// let wm = WaveletMatrix::new(&vec);
728 /// let mean = wm.mean_experiment1(0..wm.len(), 0..wm.dim(), 8);
729 /// assert_eq!(mean, 3); // got the actual average
730 /// assert_eq!(wm.variance_experiment1(0..wm.len(), 0..wm.dim(), 8), 6);
731 /// assert_eq!(vec.iter()
732 /// .map(|&v| {
733 /// let diff = if v > mean {v - mean} else {mean - v};
734 /// diff * diff
735 /// })
736 /// .sum::<u64>() / (vec.len() as u64), 6);
737 /// ```
738 pub fn variance_experiment1(&self,
739 pos_range: Range<usize>,
740 val_range: Range<u64>,
741 k: usize)
742 -> u64 {
743 let ranges = self.top_k_ranges(pos_range, val_range, k);
744
745 let sum: u64 = ranges.iter()
746 .map(|&(ref r, count)| {
747 (r.start + r.end) / 2 // expected value
748 * (count as u64)
749 })
750 .sum();
751
752 let count: usize = ranges.iter()
753 .map(|&(ref _r, count)| count)
754 .sum();
755
756 let mean = sum / count as u64;
757
758 let variance: u64 = ranges.iter()
759 .map(|&(ref r, count)| {
760 let expected = (r.start + r.end) / 2;
761 let diff = if expected > mean {
762 expected - mean
763 } else {
764 mean - expected
765 };
766 diff * diff * (count as u64)
767 })
768 .sum::<u64>() / count as u64;
769
770 variance
771 }
772
773 /// returns the median value from ``T[pos_range]``.
774 ///
775 /// same as `.quantile(start..end, start + (end - start) / 2)`.
776 ///
777 /// ## Panics
778 ///
779 /// It panics when `pos_range` is out of range.
780 ///
781 /// ## Example
782 ///
783 /// ```
784 /// use wavelet_matrix::WaveletMatrix;
785 ///
786 /// let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
787 /// // 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
788 ///
789 /// let wm = WaveletMatrix::new(&vec);
790 /// assert_eq!(wm.median(0..wm.len()), 2); // median
791 /// assert_eq!(wm.median(6..wm.len()), 4); // median
792 ///
793 /// // Let's compare with the sorted vector version
794 /// let mut sorted = vec.clone();
795 /// sorted.sort(); // O(n log n)
796 /// assert_eq!(wm.median(0..wm.len()), sorted[wm.len() / 2]);
797 /// ```
798 pub fn median(&self, pos_range: Range<usize>) -> u64 {
799 self.quantile(pos_range.clone(), (pos_range.end - pos_range.start) / 2)
800 }
801
802 /// Returns the (k+1)th smallest value from `T[pos_range]`.
803 ///
804 /// ## Panics
805 ///
806 /// It panics when `pos_range` is out of range.
807 ///
808 /// ## Example
809 ///
810 /// ```
811 /// use wavelet_matrix::WaveletMatrix;
812 ///
813 /// let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
814 /// // 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
815 ///
816 /// let wm = WaveletMatrix::new(&vec);
817 /// assert_eq!(wm.quantile(0..wm.len(), 0), 0); // min
818 /// assert_eq!(wm.quantile(0..wm.len(), 3), 1);
819 /// assert_eq!(wm.quantile(0..wm.len(), 6), 2); // median
820 /// assert_eq!(wm.quantile(0..wm.len(), 11), 9); // max
821 /// // assert_eq!(wm.quantile(0..wm.len(), 12), 9); // out of range
822 ///
823 /// // Let's compare with the sorted vector
824 /// let mut sorted = vec.clone();
825 /// sorted.sort(); // O(n log n)
826 /// for i in 0..sorted.len() {
827 /// assert_eq!(wm.quantile(0..wm.len(), i), sorted[i]);
828 /// }
829 /// ```
830 pub fn quantile(&self, pos_range: Range<usize>, k: usize) -> u64 {
831 self.validate_pos_range(&pos_range);
832
833 let mut bpos = pos_range.start;
834 let mut epos = pos_range.end;
835 let mut value: u64 = 0;
836 let mut k = k;
837
838 for depth in 0..self.bit_len {
839 let rsd = &self.layers[depth as usize];
840 value = value << 1;
841
842 let zeros_bpos = rsd.rank(bpos, false);
843 let zeros_epos = rsd.rank(epos, false);
844 let zeros_rank = zeros_epos - zeros_bpos;
845
846 if k < zeros_rank {
847 // the value is included in zero's node.
848 // take zero's node in the next loop
849 bpos = zeros_bpos;
850 epos = zeros_epos;
851 } else {
852 // the value is included in one's node.
853 k -= zeros_rank; // the count of zeros are consumed
854 value |= 1;
855
856 // take one's node in the next loop
857 bpos = (bpos - zeros_bpos) + rsd.zero_num();
858 // ^^^^^^^^^^^^^^^^^^^ rsd.rank(bpos, true)
859 epos = (epos - zeros_epos) + rsd.zero_num();
860 // ^^^^^^^^^^^^^^^^^^^ rsd.rank(epos, true)
861 }
862 }
863 value
864 }
865}
866
867fn get_bit_msb(x: u64, pos: u8, blen: u8) -> bool {
868 ((x >> (blen - pos - 1)) & 1) == 1
869}
870
871fn get_bit_lsb(x: u64, pos: u8) -> bool {
872 ((x >> pos) & 1) == 1
873}
874
875impl SpaceUsage for WaveletMatrix {
876 fn is_stack_only() -> bool {
877 false
878 }
879
880 fn heap_bytes(&self) -> usize {
881 self.layers.iter().map(|x| x.heap_bytes()).sum()
882 }
883}
884
885
886/// Thin builder that builds WaveletMatrix
887#[derive(Debug)]
888pub struct WaveletMatrixBuilder {
889 vals: Vec<u64>,
890}
891
892impl WaveletMatrixBuilder {
893 /// Create builder.
894 pub fn new() -> WaveletMatrixBuilder {
895 WaveletMatrixBuilder { vals: Vec::new() }
896 }
897
898 /// append to the internal Vec.
899 pub fn push(&mut self, val: u64) {
900 self.vals.push(val);
901 }
902
903 /// Build creates WaveletMatrix from this builder.
904 /// It takes self, so the original builder won't be accessible later.
905 pub fn build(self) -> WaveletMatrix {
906 WaveletMatrix::new(&self.vals)
907 }
908}
909
910/// Iterator struct used by the WaveletMatrix::search()
911#[derive(Debug)]
912pub struct WaveletMatrixSearch<'a> {
913 inner: &'a WaveletMatrix, // underlying Wavelet Matrix
914 range: Range<usize>, // index range to be searched
915 rank: usize, // the next rank
916 value: u64, // value to be searched
917 ignore_bit: u8, // used in prefix search
918}
919
920impl<'a> Iterator for WaveletMatrixSearch<'a> {
921 type Item = usize;
922 fn next(&mut self) -> Option<Self::Item> {
923 let pos = self.inner.select_helper(self.rank, self.value, 0, 0, self.ignore_bit);
924 self.rank += 1;
925 if pos < self.range.end {
926 Some(pos)
927 } else {
928 None
929 }
930 }
931}
932
933// Note:
934// If max of vals is 0xffff_ffff_ffff_ffff (max u64),
935// return value will overflow u64.
936// fn get_dim(vals: &[u64]) -> u64 {
937// let mut dim: u64 = 0;
938// for val in vals.iter() {
939// if *val >= dim {
940// dim = *val + 1;
941// }
942// }
943// dim
944// }
945
946fn get_bit_len(val: u64) -> u8 {
947 let mut blen: u8 = 0;
948 let mut val = val;
949 while val > 0 {
950 val >>= 1;
951 blen += 1;
952 }
953 blen
954}
955
956
957#[cfg(test)]
958mod tests {
959 extern crate rand;
960 extern crate itertools;
961
962 use super::*;
963 use wavelet_matrix::tests::rand::Rng;
964
965 #[test]
966 fn example() {
967 let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
968 // 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
969 let wm = WaveletMatrix::new(&vec);
970
971 assert_eq!(wm.len(), 12);
972 assert_eq!(wm.lookup(7), 6);
973
974 // Counting
975 assert_eq!(wm.count(0..wm.len(), 2), 3);
976 assert_eq!(wm.count(0..wm.len(), 4), 2);
977 assert_eq!(wm.count(0..wm.len(), 5), 1);
978 assert_eq!(wm.count(0..wm.len(), 7), 0);
979 assert_eq!(wm.count(0..wm.len(), 39), 0);
980
981 assert_eq!(wm.count_prefix(0..wm.len(), 8, 3), 1);
982 assert_eq!(wm.count_prefix(0..wm.len(), 6, 1), 1);
983 assert_eq!(wm.count_prefix(0..wm.len(), 0, 1), 4);
984 assert_eq!(wm.count_prefix(0..wm.len(), 0, 2), 7);
985
986 assert_eq!(wm.count_lt(0..wm.len(), 2), 4);
987 assert_eq!(wm.count_lt(0..wm.len(), 7), 11);
988
989 assert_eq!(wm.count_gt(0..wm.len(), 2), 5);
990 assert_eq!(wm.count_gt(0..wm.len(), 7), 1);
991
992 assert_eq!(wm.count_range(0..wm.len(), 0..10), 12);
993 assert_eq!(wm.count_range(0..wm.len(), 4..6), 3);
994
995 // Searching
996 assert_eq!(wm.search(0..wm.len(), 4).collect::<Vec<usize>>(),
997 vec![2, 6]);
998 assert_eq!(wm.search(3..wm.len(), 4).collect::<Vec<usize>>(), vec![6]);
999 assert_eq!(wm.search(0..wm.len(), 7).collect::<Vec<usize>>(), vec![]);
1000
1001 // Ranking
1002 assert_eq!(wm.top_k(0..wm.len(), 0..10, 12),
1003 vec![(2, 3), (1, 2), (4, 2), (0, 2), (5, 1), (6, 1), (9, 1)]);
1004 assert_eq!(wm.top_k(0..wm.len(), 0..10, 4),
1005 vec![(2, 3), (1, 2), (4, 2), (0, 2)]);
1006 assert_eq!(wm.top_k(0..wm.len(), 2..9, 12),
1007 vec![(2, 3), (4, 2), (5, 1), (6, 1)]);
1008
1009 assert_eq!(wm.max_k(0..wm.len(), 0..10, 12),
1010 vec![(9, 1), (6, 1), (5, 1), (4, 2), (2, 3), (1, 2), (0, 2)]);
1011 assert_eq!(wm.max_k(0..wm.len(), 0..10, 4),
1012 vec![(9, 1), (6, 1), (5, 1), (4, 2)]);
1013 assert_eq!(wm.max_k(0..wm.len(), 2..9, 12),
1014 vec![(6, 1), (5, 1), (4, 2), (2, 3)]);
1015
1016 assert_eq!(wm.min_k(0..wm.len(), 0..10, 12),
1017 vec![(0, 2), (1, 2), (2, 3), (4, 2), (5, 1), (6, 1), (9, 1)]);
1018 assert_eq!(wm.min_k(0..wm.len(), 0..10, 4),
1019 vec![(0, 2), (1, 2), (2, 3), (4, 2)]);
1020 assert_eq!(wm.min_k(0..wm.len(), 2..9, 12),
1021 vec![(2, 3), (4, 2), (5, 1), (6, 1)]);
1022
1023 // classic .rank()/.select() API
1024 assert_eq!(wm.rank(5, 1), 2);
1025 assert_eq!(wm.select(2, 2), 10);
1026 }
1027
1028 #[test]
1029 fn wavelet_matrix_sanity() {
1030 let mut wmb = WaveletMatrixBuilder::new();
1031 wmb.push(1);
1032 wmb.push(31);
1033 wmb.push(11);
1034 wmb.push(10);
1035 wmb.push(11);
1036
1037 let wm = wmb.build();
1038 assert_eq!(wm.lookup(0), 1);
1039 assert_eq!(wm.lookup(1), 31);
1040 assert_eq!(wm.lookup(2), 11);
1041 assert_eq!(wm.lookup(3), 10);
1042 assert_eq!(wm.lookup(4), 11);
1043 // assert_eq!(wm.lookup(5), 11);
1044
1045 assert_eq!(wm.count(0..wm.len(), 11), 2);
1046 assert_eq!(wm.count(0..wm.len(), 10), 1);
1047 assert_eq!(wm.count(0..wm.len(), 5), 0);
1048
1049 assert_eq!(wm.rank(0, 1), 0);
1050 assert_eq!(wm.rank(1, 1), 1);
1051 assert_eq!(wm.rank(4, 1), 1);
1052
1053 assert_eq!(wm.rank(0, 31), 0);
1054 assert_eq!(wm.rank(1, 31), 0);
1055 assert_eq!(wm.rank(2, 31), 1);
1056 assert_eq!(wm.rank(3, 31), 1);
1057 assert_eq!(wm.rank(4, 31), 1);
1058
1059 assert_eq!(wm.select(0, 1), 0);
1060 assert_eq!(wm.select(0, 31), 1);
1061 assert_eq!(wm.select(0, 11), 2);
1062 assert_eq!(wm.select(1, 11), 4);
1063 assert_eq!(wm.select(2, 11), 5);
1064 assert_eq!(wm.select(3, 11), 5);
1065
1066 // assert_eq!(wm.total_bytes(), 336); // Only true for now with x64
1067 }
1068
1069 fn random_upto(max: u64) -> u64 {
1070 let mut rng = rand::weak_rng();
1071 if max != 0 {
1072 rng.gen_range(0, max)
1073 } else {
1074 0
1075 }
1076 }
1077
1078 fn test_count_all(wm: &WaveletMatrix,
1079 vec: &Vec<u64>,
1080 val: u64,
1081 ignore_bit: u8,
1082 range: Range<usize>) {
1083
1084 assert_eq!(wm.count(range.clone(), val),
1085 vec[range.clone()].iter().filter(|x| **x == val).count());
1086
1087 assert_eq!(wm.count_prefix(range.clone(), val, ignore_bit),
1088 vec[range.clone()]
1089 .iter()
1090 .filter(|x| (**x >> ignore_bit) == (val >> ignore_bit))
1091 .count());
1092
1093 assert_eq!(wm.count_lt(range.clone(), val),
1094 vec[range.clone()].iter().filter(|x| **x < val).count());
1095
1096 assert_eq!(wm.count_gt(range.clone(), val),
1097 vec[range.clone()].iter().filter(|x| **x > val).count());
1098 }
1099
1100 fn test_search_all(wm: &WaveletMatrix,
1101 vec: &Vec<u64>,
1102 val: u64,
1103 ignore_bit: u8,
1104 range: Range<usize>) {
1105
1106 assert_eq!(wm.search(range.clone(), val).collect::<Vec<usize>>(),
1107 vec[range.clone()]
1108 .iter()
1109 .enumerate()
1110 .filter(|x| *x.1 == val)
1111 .map(|x| x.0 + range.start)
1112 .collect::<Vec<usize>>());
1113
1114 assert_eq!(wm.search_prefix(range.clone(), val, ignore_bit).collect::<Vec<usize>>(),
1115 vec[range.clone()]
1116 .iter()
1117 .enumerate()
1118 .filter(|x| *x.1 >> ignore_bit == val >> ignore_bit)
1119 .map(|x| x.0 + range.start)
1120 .collect::<Vec<usize>>());
1121 }
1122
1123 fn test_statistics_all(wm: &WaveletMatrix,
1124 vec: &Vec<u64>,
1125 range: Range<usize>) {
1126
1127 let mut sorted: Vec<u64> = vec[range.clone()].iter().map(|x| *x).collect();
1128 sorted.sort();
1129
1130 if sorted.len() > 0 {
1131 let k = random_upto(sorted.len() as u64) as usize;
1132
1133 assert_eq!(wm.quantile(range.clone(), k), sorted[k]);
1134 assert_eq!(wm.median(range.clone()), sorted[sorted.len() / 2]);
1135 }
1136 }
1137
1138 use std::collections::BTreeMap;
1139 fn value_count(vec: &[u64]) -> BTreeMap<u64, (usize, usize)> {
1140 let mut map = BTreeMap::new();
1141 for pos in 0..vec.len() {
1142 let entry = map.entry(vec[pos]).or_insert((0, pos));
1143 entry.0 = entry.0 + 1; // count += 1
1144 }
1145 map // value => (count, 1st pos)
1146 }
1147
1148 use self::itertools::Itertools;
1149 fn test_ranking(wm: &WaveletMatrix,
1150 vec: &Vec<u64>,
1151 vals: Range<u64>,
1152 range: Range<usize>,
1153 len: usize) {
1154
1155 assert_eq!(wm.min_k(range.clone(), vals.clone(), len),
1156 value_count(&vec[range.clone()]).iter()
1157 .into_iter()
1158 .filter(|&(value, _count)| vals.start <= *value && *value < vals.end)
1159 .take(len)
1160 .map(|k| (*k.0, (k.1).0)) // value, count
1161 .collect::<Vec<_>>());
1162
1163 assert_eq!(wm.max_k(range.clone(), vals.clone(), len),
1164 value_count(&vec[range.clone()]).iter()
1165 .into_iter()
1166 .filter(|&(value, _count)| vals.start <= *value && *value < vals.end)
1167 .rev()
1168 .take(len)
1169 .map(|k| (*k.0, (k.1).0)) // (value, count)
1170 .collect::<Vec<_>>());
1171
1172 assert_eq!(wm.top_k(range.clone(), vals.clone(), len)
1173 .iter()
1174 .map(|&(_value, count)| count) // Note: currently only count is tested because value order is not predictable
1175 .collect::<Vec<_>>(),
1176 value_count(&vec[range.clone()]).iter()
1177 .into_iter()
1178 .filter(|&(value, _count)| vals.start <= *value && *value < vals.end)
1179 .sorted_by(|a, b| a.1.cmp(&b.1).reverse() ) // ordered by (count, first pos)
1180 .iter()
1181 .take(len)
1182 .map(|k| (*k.0, (k.1).0)) // (value, count)
1183 .collect::<Vec<_>>()
1184 .iter()
1185 .map(|&(_value, count)| count) // Note: currently only count is tested because value order is not predictable
1186 .collect::<Vec<_>>());
1187 }
1188
1189 fn random_test(len: usize, val_max: u64) {
1190 let mut vec: Vec<u64> = Vec::new();
1191 for _ in 0..len {
1192 vec.push(random_upto(val_max));
1193 }
1194 let wm = WaveletMatrix::new(&vec);
1195
1196 assert_eq!(wm.dim, *vec.iter().max().unwrap() + 1);
1197 assert_eq!(wm.num, len);
1198 assert_eq!(wm.len(), len);
1199
1200 for i in 0..100 {
1201 let idx = random_upto(wm.len() as u64) as usize;
1202 assert_eq!(wm.lookup(idx), vec[idx]);
1203
1204 let val = vec[idx];
1205 let ignore_bit = random_upto(wm.bit_len as u64) as u8;
1206 let a = random_upto(wm.len() as u64) as usize;
1207 let b = random_upto(wm.len() as u64) as usize;
1208 let range = ::std::cmp::min(a, b)..::std::cmp::max(a, b);
1209
1210 test_count_all(&wm, &vec, val, ignore_bit, range.clone());
1211 test_count_all(&wm, &vec, val + 1, ignore_bit, range.clone());
1212
1213 test_statistics_all(&wm, &vec, range.clone());
1214
1215 if i == 0 {
1216 test_search_all(&wm, &vec, val, ignore_bit, range.clone());
1217 test_search_all(&wm, &vec, val + 1, ignore_bit, range.clone());
1218
1219 let a = random_upto(wm.dim as u64) as u64;
1220 let b = random_upto(wm.dim as u64) as u64;
1221 let val_range = ::std::cmp::min(a, b)..::std::cmp::max(a, b);
1222 test_ranking(&wm, &vec, val_range.clone(), range.clone(), 10);
1223 test_ranking(&wm, &vec, val_range.clone(), range.clone(), 10000);
1224 }
1225 }
1226 }
1227
1228 #[test]
1229 fn layers_64() {
1230 random_test(1024, ::std::u64::MAX);
1231 random_test(1023, ::std::u64::MAX - 1);
1232 }
1233 #[test]
1234 fn layers_7() {
1235 random_test(1024, 128);
1236 random_test(1023, 127);
1237 }
1238 #[test]
1239 fn layers_4() {
1240 random_test(9, 16);
1241 random_test(10240, 16);
1242 random_test(10231, 15);
1243 }
1244 #[test]
1245 #[should_panic]
1246 fn new_panic() {
1247 let vals: Vec<u64> = vec![::std::u64::MAX];
1248 let _wm = WaveletMatrix::new(&vals);
1249 }
1250 #[test]
1251 #[should_panic]
1252 fn lookup_panic() {
1253 let vals: Vec<u64> = vec![0,1,2];
1254 let wm = WaveletMatrix::new(&vals);
1255 let _ = wm.lookup(3);
1256 }
1257 #[test]
1258 #[should_panic]
1259 fn count_panic_start() {
1260 let vals: Vec<u64> = vec![0,1,2];
1261 let wm = WaveletMatrix::new(&vals);
1262 let _ = wm.count(3..3, 1);
1263 }
1264 #[test]
1265 #[should_panic]
1266 fn count_panic_end() {
1267 let vals: Vec<u64> = vec![0,1,2];
1268 let wm = WaveletMatrix::new(&vals);
1269 let _ = wm.count(0..4, 1);
1270 }
1271}