Crate wavelet_matrix [] [src]

This crate provides state-of-the-art O(1) or alike queries on large number of unsigned integers.

The typical memory usage of this data structure is calculated like below.

  • bit_len * num_of_elements / 8 * 1.25 [bytes]

In other words, it roughly consumes additional 25% space compared with the original data.

Example

use wavelet_matrix::WaveletMatrix;

let vec: Vec<u64> = vec![1, 2, 4, 5, 1, 0, 4, 6, 2, 9, 2, 0];
//                       0  1  2  3  4  5  6  7  8  9 10 11 (length = 12)
let wm = WaveletMatrix::new(&vec);

// Basic access
assert_eq!(wm.len(), vec.len());
assert_eq!(wm.lookup(7), vec[7]);
assert_eq!(wm.dim(), 10); // max value + 1
assert_eq!(wm.bit_len(), 4); // bit length stored internally

// Counting
assert_eq!(wm.count(0..wm.len(), 2), 3);
assert_eq!(wm.count(0..wm.len(), 4), 2);
assert_eq!(wm.count(0..wm.len(), 5), 1);
assert_eq!(wm.count(0..wm.len(), 7), 0);
assert_eq!(wm.count(0..wm.len(), 39), 0);

assert_eq!(wm.count_prefix(0..wm.len(), 8, 3), 1);
assert_eq!(wm.count_prefix(0..wm.len(), 6, 1), 1);
assert_eq!(wm.count_prefix(0..wm.len(), 0, 1), 4);
assert_eq!(wm.count_prefix(0..wm.len(), 0, 2), 7);

assert_eq!(wm.count_lt(0..wm.len(), 2), 4);
assert_eq!(wm.count_lt(0..wm.len(), 7), 11);

assert_eq!(wm.count_gt(0..wm.len(), 2), 5);
assert_eq!(wm.count_gt(0..wm.len(), 7), 1);

assert_eq!(wm.count_range(0..wm.len(), 0..wm.dim()), 12);
assert_eq!(wm.count_range(0..wm.len(), 4..6), 3);

// Searching
assert_eq!(wm.search(0..wm.len(), 4).collect::<Vec<usize>>(), vec![2, 6]);
assert_eq!(wm.search(3..wm.len(), 4).collect::<Vec<usize>>(), vec![6]);
assert_eq!(wm.search(0..wm.len(), 7).collect::<Vec<usize>>(), vec![]);

// Ranking: (value, count), frequent values first
assert_eq!(wm.top_k(0..wm.len(), 0..wm.dim(), 12),
           vec![(2, 3), (1, 2), (4, 2), (0, 2), (5, 1), (6, 1), (9, 1)]);
assert_eq!(wm.top_k(0..wm.len(), 0..wm.dim(), 4),
           vec![(2, 3), (1, 2), (4, 2), (0, 2)]);
assert_eq!(wm.top_k(0..wm.len(), 2..9, 12),
           vec![(2, 3), (4, 2), (5, 1), (6, 1)]);

// Ranking: (value, count), max values first
assert_eq!(wm.max_k(0..wm.len(), 0..wm.dim(), 12),
           vec![(9, 1), (6, 1), (5, 1), (4, 2), (2, 3), (1, 2), (0, 2)]);
assert_eq!(wm.max_k(0..wm.len(), 0..wm.dim(), 4),
           vec![(9, 1), (6, 1), (5, 1), (4, 2)]);
assert_eq!(wm.max_k(0..wm.len(), 2..9, 12),
           vec![(6, 1), (5, 1), (4, 2), (2, 3)]);

// Ranking: (value, count), min values first
assert_eq!(wm.min_k(0..wm.len(), 0..wm.dim(), 12),
           vec![(0, 2), (1, 2), (2, 3), (4, 2), (5, 1), (6, 1), (9, 1)]);
assert_eq!(wm.min_k(0..wm.len(), 0..wm.dim(), 4),
           vec![(0, 2), (1, 2), (2, 3), (4, 2)]);
assert_eq!(wm.min_k(0..wm.len(), 2..9, 12),
           vec![(2, 3), (4, 2), (5, 1), (6, 1)]);

// Statistics
assert_eq!(wm.quantile(0..wm.len(), 0), 0);
assert_eq!(wm.quantile(0..wm.len(), 8), 4);
assert_eq!(wm.quantile(0..wm.len(), 11), 9);

Structs

WaveletMatrix

WaveletMatrix supports various near-O(1) queries on the sequence of integers.

WaveletMatrixBuilder

Thin builder that builds WaveletMatrix

WaveletMatrixSearch

Iterator struct used by the WaveletMatrix::search()