pub struct WaveletMatrix { /* private fields */ }
Expand description
WaveletMatrix supports various near-O(1) queries on the sequence of integers.
Implementations§
Source§impl WaveletMatrix
impl WaveletMatrix
Sourcepub fn new(vals: &Vec<u64>) -> WaveletMatrix
pub fn new(vals: &Vec<u64>) -> WaveletMatrix
Create a new WaveletMatrix struct from a Vecvals
.
The values contained in vals
should be less than u64::MAX
.
§Panics
It panics when vals
include u64::MAX
.
Sourcepub fn lookup(&self, pos: usize) -> u64
pub fn lookup(&self, pos: usize) -> u64
Returns the value at the position pos
, i.e. T[pos]
.
§Panics
It panics when index pos
is out of range.
Sourcepub fn dim(&self) -> u64
pub fn dim(&self) -> u64
Returns the maximum value + 1.
Useful to get the upper bound of value range.
Sourcepub fn count(&self, pos_range: Range<usize>, value: u64) -> usize
pub fn count(&self, pos_range: Range<usize>, value: u64) -> usize
Returns the number of the element e
which satisfies e == value
included in T[pos_range]
§Panics
It panics when pos_range
is out of range.
Sourcepub fn count_lt(&self, pos_range: Range<usize>, value: u64) -> usize
pub fn count_lt(&self, pos_range: Range<usize>, value: u64) -> usize
Returns the number of the element e
which satisfies e < value
included in T[pos_range]
§Panics
It panics when pos_range
is out of range.
Sourcepub fn count_gt(&self, pos_range: Range<usize>, value: u64) -> usize
pub fn count_gt(&self, pos_range: Range<usize>, value: u64) -> usize
Returns the number of the element e
which satisfies e > value
included in T[pos_range]
§Panics
It panics when pos_range
is out of range.
Sourcepub fn count_prefix(
&self,
pos_range: Range<usize>,
value: u64,
ignore_bit: u8,
) -> usize
pub fn count_prefix( &self, pos_range: Range<usize>, value: u64, ignore_bit: u8, ) -> usize
Returns the number of the element e
which satisfies (e >> ignore_bit) == (value >> ignore_bit)
included in T[pos_range]
§Panics
It panics when pos_range
is out of range.
Sourcepub fn count_range(
&self,
pos_range: Range<usize>,
val_range: Range<u64>,
) -> usize
pub fn count_range( &self, pos_range: Range<usize>, val_range: Range<u64>, ) -> usize
Returns the number of the element e
which satisfies val_range.start <= e < val_range.end
included in T[pos_range]
§Panics
It panics when pos_range
is out of range.
Sourcepub fn search(
&self,
pos_range: Range<usize>,
value: u64,
) -> WaveletMatrixSearch<'_> ⓘ
pub fn search( &self, pos_range: Range<usize>, value: u64, ) -> WaveletMatrixSearch<'_> ⓘ
Returns the iterator that generates indices that satisfy the condition e == value
.
§Panics
It panics when pos_range
is out of range.
Sourcepub fn search_prefix(
&self,
pos_range: Range<usize>,
value: u64,
ignore_bit: u8,
) -> WaveletMatrixSearch<'_> ⓘ
pub fn search_prefix( &self, pos_range: Range<usize>, value: u64, ignore_bit: u8, ) -> WaveletMatrixSearch<'_> ⓘ
Returns the iterator that generates indices that satisfy the condition e >> ignore_bit == value >> ignore_bit
.
§Panics
It panics when pos_range
is out of range.
Sourcepub fn rank(&self, pos: usize, value: u64) -> usize
pub fn rank(&self, pos: usize, value: u64) -> usize
Returns the number of val found in T[0..pos]
.
The range specified is half open, i.e. [0, pos)
. When pos
is 0, the range includes no value.
§Panics
It panics when pos
is greater than len().
Sourcepub fn select(&self, rank: usize, val: u64) -> usize
pub fn select(&self, rank: usize, val: u64) -> usize
Return the position of (rank+1)-th val in T.
If no match has been found, it returns the length of self.
Sourcepub fn top_k(
&self,
pos_range: Range<usize>,
val_range: Range<u64>,
k: usize,
) -> Vec<(u64, usize)>
pub fn top_k( &self, pos_range: Range<usize>, val_range: Range<u64>, k: usize, ) -> Vec<(u64, usize)>
list the (value, count)
pairs in most-frequent-one-first order.
values are constrained to the val_range
.
§Panics
It panics when pos_range
is out of range.
Sourcepub fn top_k_ranges(
&self,
pos_range: Range<usize>,
val_range: Range<u64>,
k: usize,
) -> Vec<(Range<u64>, usize)>
pub fn top_k_ranges( &self, pos_range: Range<usize>, val_range: Range<u64>, k: usize, ) -> Vec<(Range<u64>, usize)>
list the (range, count)
pairs in most-frequent-one-first order.
values are constrained to the val_range
.
§Panics
It panics when pos_range
is out of range.
§Example
use wavelet_matrix::WaveletMatrix;
let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
// 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
let wm = WaveletMatrix::new(&vec);
assert_eq!(wm.top_k_ranges(0..wm.len(), 0..wm.dim(), 3), vec![(0..4, 7), (4..8, 4), (8..16, 1)]);
// You can drill down into any specific range.
assert_eq!(wm.top_k_ranges(0..wm.len(), 0..4, 3), vec![(2..4, 3), (1..2, 2), (0..1, 2)]);
assert_eq!(wm.top_k_ranges(0..wm.len(), 0..wm.dim(), 4), vec![(0..2, 4), (4..8, 4), (2..4, 3), (8..16, 1)]);
// You can drill down into any specific range.
assert_eq!(wm.top_k_ranges(0..wm.len(), 4..8, 4), vec![(4..5, 2), (5..6, 1), (6..7, 1)]);
assert_eq!(wm.top_k_ranges(0..wm.len(), 2..9, 4), vec![(2..4, 3), (4..6, 3), (6..8, 1), (8..16, 1)]);
Sourcepub fn max_k(
&self,
pos_range: Range<usize>,
val_range: Range<u64>,
k: usize,
) -> Vec<(u64, usize)>
pub fn max_k( &self, pos_range: Range<usize>, val_range: Range<u64>, k: usize, ) -> Vec<(u64, usize)>
list the (value, count)
pairs in descending order.
values are constrained to the val_range
.
§Panics
It panics when pos_range
is out of range.
Sourcepub fn min_k(
&self,
pos_range: Range<usize>,
val_range: Range<u64>,
k: usize,
) -> Vec<(u64, usize)>
pub fn min_k( &self, pos_range: Range<usize>, val_range: Range<u64>, k: usize, ) -> Vec<(u64, usize)>
list the (value, count)
pairs in ascending order.
values are constrained to the val_range
.
§Panics
It panics when pos_range
is out of range.
Sourcepub fn sum_experiment1(
&self,
pos_range: Range<usize>,
val_range: Range<u64>,
k: usize,
) -> (u64, usize)
pub fn sum_experiment1( &self, pos_range: Range<usize>, val_range: Range<u64>, k: usize, ) -> (u64, usize)
Approximately calculates the sum of T[pos_range]
using up to k wavelet tree nodes and
returns (sum of value, sum of count)
tuple.
It enumerates the top-k most relevant node ranges using .top_k_ranges()
function.
To get the exact result, specfy k = m + 1 where m is the number of values which are unique. E.g. If you have 7 unique values in the sequence T, specify k = 8 to get the exact result.
For calculation, this method uses u64 for summing. To avoid overflow during calculation, we recommend to use this function for < 32 bits values assuming the number of elements is ~ 32 bits.
§Panics
It panics when pos_range
is out of range.
§Example
use wavelet_matrix::WaveletMatrix;
let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
// 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
let wm = WaveletMatrix::new(&vec);
// assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 0), (0, 0)); // useless
assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 1), (96, 12));
assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 2), (56, 12));
assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 3), (50, 12));
assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 4), (49, 12));
assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 5), (47, 12));
assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 6), (45, 12));
assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 7), (40, 12));
assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 8), (36, 12)); // got the exact sum
assert_eq!(wm.sum_experiment1(0..wm.len(), 0..wm.dim(), 12), (36, 12));
assert_eq!(wm.sum_experiment1(0..wm.len(), 6..7, 12), (6, 1));
assert_eq!(wm.sum_experiment1(3..8, 4..7, 12), (15, 3));
Sourcepub fn sum_experiment2(
&self,
pos_range: Range<usize>,
val_range: Range<u64>,
k: usize,
) -> (u64, usize)
pub fn sum_experiment2( &self, pos_range: Range<usize>, val_range: Range<u64>, k: usize, ) -> (u64, usize)
Improved version of .sum_experiment1()
.
It enumerates the top-k most relevant node ranges using custom node expander instead of .top_k_ranges()
.
§Panics
It panics when pos_range
is out of range.
§Example
use wavelet_matrix::WaveletMatrix;
let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
// 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
let wm = WaveletMatrix::new(&vec);
// assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 0), (0, 0)); // useless
assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 1), (96, 12));
assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 2), (56, 12));
assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 3), (50, 12));
assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 4), (49, 12));
assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 5), (47, 12));
assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 6), (43, 12)); // improved since experiment 1 (45->43)
assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 7), (38, 12)); // improved since experiment 1 (40->38)
assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 8), (36, 12)); // got the exact sum
assert_eq!(wm.sum_experiment2(0..wm.len(), 0..wm.dim(), 12), (36, 12));
assert_eq!(wm.sum_experiment2(0..wm.len(), 6..7, 12), (6, 1));
assert_eq!(wm.sum_experiment2(3..8, 4..7, 12), (15, 3));
Sourcepub fn sum_experiment3(
&self,
pos_range: Range<usize>,
val_range: Range<u64>,
k: usize,
) -> (Range<u64>, usize)
pub fn sum_experiment3( &self, pos_range: Range<usize>, val_range: Range<u64>, k: usize, ) -> (Range<u64>, usize)
Improved version of .sum_experiment2()
.
It returns Range<u64>
value to tell how accurate the computed value is.
§Panics
It panics when pos_range
is out of range.
§Example
use wavelet_matrix::WaveletMatrix;
let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
// 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
let wm = WaveletMatrix::new(&vec);
// assert_eq!(wm.sum_experimen3(0..wm.len(), 0..wm.dim(), 0), (0, 0)); // useless
assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 1), (0..181, 12)); // 181 / 2 = 90
assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 2), (8..93, 12)); // 101 / 2 = 50
assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 3), (24..65, 12)); // 89 / 2 = 44
assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 4), (30..57, 12)); // 87 / 2 = 43
assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 5), (32..51, 12)); // 83 / 2 = 41
assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 6), (34..45, 12)); // 79 / 2 = 38
assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 7), (35..40, 12)); // 75 / 2 = 37
assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 8), (36..37, 12)); // got the exact sum = 36
assert_eq!(wm.sum_experiment3(0..wm.len(), 0..wm.dim(), 12), (36..37, 12));
assert_eq!(wm.sum_experiment3(0..wm.len(), 6..7, 12), (6..7, 1));
assert_eq!(wm.sum_experiment3(3..8, 4..7, 12), (15..16, 3));
Sourcepub fn mean_experiment1(
&self,
pos_range: Range<usize>,
val_range: Range<u64>,
k: usize,
) -> u64
pub fn mean_experiment1( &self, pos_range: Range<usize>, val_range: Range<u64>, k: usize, ) -> u64
Approximately calculates the average of T[pos_range]
using up to k wavelet tree nodes.
It enumerates the top-k most relevant node ranges using .top_k_ranges()
function.
To get the exact result, specfy k = m + 1 where m is the number of values which are unique. E.g. If you have 256 unique values in the sequence T, specify k = 257 to get the exact result.
For calculation, this method uses u64 for summing. To avoid overflow during calculation, we recommend to use this function for < 32 bits values assuming the number of elements is ~ 32 bits.
§Panics
It panics when pos_range
is out of range.
§Example
use wavelet_matrix::WaveletMatrix;
let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
// 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
let wm = WaveletMatrix::new(&vec);
assert_eq!(wm.mean_experiment1(0..wm.len(), 0..wm.dim(), 1), 8);
assert_eq!(wm.mean_experiment1(0..wm.len(), 0..wm.dim(), 2), 4);
assert_eq!(wm.mean_experiment1(0..wm.len(), 0..wm.dim(), 3), 4);
assert_eq!(wm.mean_experiment1(0..wm.len(), 0..wm.dim(), 4), 4);
assert_eq!(wm.mean_experiment1(0..wm.len(), 0..wm.dim(), 5), 3); // got the actual average
assert_eq!(wm.mean_experiment1(0..wm.len(), 0..wm.dim(), 12), 3);
Sourcepub fn variance_experiment1(
&self,
pos_range: Range<usize>,
val_range: Range<u64>,
k: usize,
) -> u64
pub fn variance_experiment1( &self, pos_range: Range<usize>, val_range: Range<u64>, k: usize, ) -> u64
Approximately calculates the variance of T[pos_range]
using up to k wavelet tree nodes.
It enumerates the top-k most relevant node ranges using .top_k_ranges()
function.
To get the exact result, specfy k = m + 1 where m is the number of values which are unique. E.g. If you have 256 unique values in the sequence T, specify k = 257 to get the exact result.
For calculation, this method uses u64 for summing. To avoid overflow during calculation, we recommend to use this function for < 16 bits values assuming the number of elements is ~ 32 bits.
§Panics
It panics when pos_range
is out of range.
§Example
use wavelet_matrix::WaveletMatrix;
let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
// 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
let wm = WaveletMatrix::new(&vec);
let mean = wm.mean_experiment1(0..wm.len(), 0..wm.dim(), 8);
assert_eq!(mean, 3); // got the actual average
assert_eq!(wm.variance_experiment1(0..wm.len(), 0..wm.dim(), 8), 6);
assert_eq!(vec.iter()
.map(|&v| {
let diff = if v > mean {v - mean} else {mean - v};
diff * diff
})
.sum::<u64>() / (vec.len() as u64), 6);
Sourcepub fn median(&self, pos_range: Range<usize>) -> u64
pub fn median(&self, pos_range: Range<usize>) -> u64
returns the median value from T[pos_range]
.
same as .quantile(start..end, start + (end - start) / 2)
.
§Panics
It panics when pos_range
is out of range.
§Example
use wavelet_matrix::WaveletMatrix;
let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
// 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
let wm = WaveletMatrix::new(&vec);
assert_eq!(wm.median(0..wm.len()), 2); // median
assert_eq!(wm.median(6..wm.len()), 4); // median
// Let's compare with the sorted vector version
let mut sorted = vec.clone();
sorted.sort(); // O(n log n)
assert_eq!(wm.median(0..wm.len()), sorted[wm.len() / 2]);
Sourcepub fn quantile(&self, pos_range: Range<usize>, k: usize) -> u64
pub fn quantile(&self, pos_range: Range<usize>, k: usize) -> u64
Returns the (k+1)th smallest value from T[pos_range]
.
§Panics
It panics when pos_range
is out of range.
§Example
use wavelet_matrix::WaveletMatrix;
let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
// 0 1 2 3 4 5 6 7 8 9 10 11 (length = 12)
let wm = WaveletMatrix::new(&vec);
assert_eq!(wm.quantile(0..wm.len(), 0), 0); // min
assert_eq!(wm.quantile(0..wm.len(), 3), 1);
assert_eq!(wm.quantile(0..wm.len(), 6), 2); // median
assert_eq!(wm.quantile(0..wm.len(), 11), 9); // max
// assert_eq!(wm.quantile(0..wm.len(), 12), 9); // out of range
// Let's compare with the sorted vector
let mut sorted = vec.clone();
sorted.sort(); // O(n log n)
for i in 0..sorted.len() {
assert_eq!(wm.quantile(0..wm.len(), i), sorted[i]);
}