use crate::int_vector::IntVector;
use crate::ops::{Vector, Access, AccessIter, VectorIndex, Pack};
use crate::serialize::Serialize;
use crate::wavelet_matrix::wm_core::WMCore;
use std::io::{Error, ErrorKind};
use std::iter::FusedIterator;
use std::io;
pub mod wm_core;
#[cfg(test)]
mod tests;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct WaveletMatrix {
len: usize,
data: WMCore,
first: IntVector,
}
impl WaveletMatrix {
fn start(&self, value: <Self as Vector>::Item) -> usize {
self.first.get(value as usize) as usize
}
fn start_offsets<Iter: Iterator<Item = u64>>(iter: Iter, len: usize, max_value: u64) -> IntVector {
let mut counts: Vec<(u64, usize)> = Vec::with_capacity((max_value + 1) as usize);
for i in 0..=max_value {
counts.push((i, 0));
}
for value in iter {
counts[value as usize].1 += 1;
}
counts.sort_unstable_by_key(|(value, _)| value.reverse_bits());
let mut cumulative = 0;
for (_, count) in counts.iter_mut() {
if *count == 0 {
*count = len;
} else {
let increment = *count;
*count = cumulative;
cumulative += increment;
}
}
counts.sort_unstable_by_key(|(value, _)| *value);
let mut result: IntVector = counts.into_iter().map(|(_, offset)| offset).collect();
result.pack();
result
}
}
macro_rules! wavelet_matrix_from {
($t:ident) => {
impl From<Vec<$t>> for WaveletMatrix {
fn from(source: Vec<$t>) -> Self {
let len = source.len();
let max_value = source.iter().cloned().max().unwrap_or(0);
let first = Self::start_offsets(source.iter().map(|x| *x as u64), source.len(), max_value as u64);
let data = WMCore::from(source);
WaveletMatrix { len, data, first, }
}
}
}
}
wavelet_matrix_from!(u8);
wavelet_matrix_from!(u16);
wavelet_matrix_from!(u32);
wavelet_matrix_from!(u64);
wavelet_matrix_from!(usize);
impl Vector for WaveletMatrix {
type Item = u64;
#[inline]
fn len(&self) -> usize {
self.len
}
#[inline]
fn width(&self) -> usize {
self.data.width()
}
#[inline]
fn max_len(&self) -> usize {
usize::MAX
}
}
impl<'a> Access<'a> for WaveletMatrix {
type Iter = AccessIter<'a, Self>;
fn get(&self, index: usize) -> <Self as Vector>::Item {
self.inverse_select(index).unwrap().1
}
fn iter(&'a self) -> Self::Iter {
Self::Iter::new(self)
}
}
impl<'a> VectorIndex<'a> for WaveletMatrix {
type ValueIter = ValueIter<'a>;
fn contains(&self, value: <Self as Vector>::Item) -> bool {
(value as usize) < self.first.len() && self.start(value) < self.len()
}
fn rank(&self, index: usize, value: <Self as Vector>::Item) -> usize {
if !self.contains(value) {
return 0;
}
self.data.map_down_with(index, value) - self.start(value)
}
fn inverse_select(&self, index: usize) -> Option<(usize, <Self as Vector>::Item)> {
self.data.map_down(index).map(|(index, value)| (index - self.start(value), value))
}
fn value_iter(&'a self, value: <Self as Vector>::Item) -> Self::ValueIter {
Self::ValueIter {
parent: self,
value,
rank: 0,
}
}
fn value_of(iter: &Self::ValueIter) -> <Self as Vector>::Item {
iter.value
}
fn select(&self, rank: usize, value: <Self as Vector>::Item) -> Option<usize> {
if !self.contains(value) {
return None;
}
self.data.map_up_with(self.start(value) + rank, value)
}
fn select_iter(&'a self, rank: usize, value: <Self as Vector>::Item) -> Self::ValueIter {
Self::ValueIter {
parent: self,
value,
rank,
}
}
}
impl Serialize for WaveletMatrix {
fn serialize_header<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
self.len.serialize(writer)
}
fn serialize_body<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
self.data.serialize(writer)?;
self.first.serialize(writer)?;
Ok(())
}
fn load<T: io::Read>(reader: &mut T) -> io::Result<Self> {
let len = usize::load(reader)?;
let data = WMCore::load(reader)?;
if data.len() != len {
return Err(Error::new(ErrorKind::InvalidData, "Core length does not match wavelet matrix length"));
}
let first = IntVector::load(reader)?;
Ok(WaveletMatrix { len, data, first, })
}
fn size_in_elements(&self) -> usize {
let mut result = self.len.size_in_elements();
result += self.data.size_in_elements();
result += self.first.size_in_elements();
result
}
}
#[derive(Clone, Debug)]
pub struct ValueIter<'a> {
parent: &'a WaveletMatrix,
value: <WaveletMatrix as Vector>::Item,
rank: usize,
}
impl<'a> Iterator for ValueIter<'a> {
type Item = (usize, usize);
fn next(&mut self) -> Option<Self::Item> {
if self.rank >= self.parent.len() {
return None;
}
if let Some(index) = self.parent.select(self.rank, self.value) {
let result = Some((self.rank, index));
self.rank += 1;
result
} else {
self.rank = self.parent.len();
None
}
}
}
impl<'a> FusedIterator for ValueIter<'a> {}
#[derive(Clone, Debug)]
pub struct IntoIter {
parent: WaveletMatrix,
index: usize,
}
impl Iterator for IntoIter {
type Item = <WaveletMatrix as Vector>::Item;
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.parent.len() {
None
} else {
let result = Some(self.parent.get(self.index));
self.index += 1;
result
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.parent.len() - self.index;
(remaining, Some(remaining))
}
}
impl ExactSizeIterator for IntoIter {}
impl FusedIterator for IntoIter {}
impl IntoIterator for WaveletMatrix {
type Item = <Self as Vector>::Item;
type IntoIter = IntoIter;
fn into_iter(self) -> Self::IntoIter {
IntoIter {
parent: self,
index: 0,
}
}
}