use crate::bit_vector::BitVector;
use crate::int_vector::IntVector;
use crate::ops::{Vector, Access, BitVec, Rank, Select, PredSucc, SelectZero};
use crate::raw_vector::{RawVector, AccessRaw};
use crate::serialize::Serialize;
use crate::bits;
use std::convert::TryFrom;
use std::io::{Error, ErrorKind};
use std::iter::FusedIterator;
use std::{cmp, io};
#[cfg(test)]
mod tests;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct SparseVector {
len: usize,
high: BitVector,
low: IntVector,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
struct Pos {
high: usize,
low: usize,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
struct Parts {
high: usize,
low: usize,
}
impl SparseVector {
const BINARY_SEARCH_THRESHOLD: usize = 16;
pub fn copy_bit_vec<'a, T: BitVec<'a> + Select<'a>>(source: &'a T) -> Self {
let mut builder = SparseBuilder::new(source.len(), source.count_ones()).unwrap();
for (_, index) in source.one_iter() {
unsafe { builder.set_unchecked(index); }
}
SparseVector::try_from(builder).unwrap()
}
pub fn try_from_iter<T: Iterator<Item = usize> + DoubleEndedIterator + ExactSizeIterator>(iter: T) -> Result<SparseVector, &'static str> {
let mut iter = iter;
let (ones, _) = iter.size_hint();
let universe = if let Some(pos) = iter.next_back() { pos + 1 } else { 0 };
let mut builder = SparseBuilder::multiset(universe, ones);
for pos in iter {
builder.try_set(pos)?;
}
if universe > 0 {
builder.try_set(universe - 1)?;
}
SparseVector::try_from(builder)
}
pub fn is_multiset(&self) -> bool {
let mut prev = self.len();
for (_, value) in self.one_iter() {
if value == prev {
return true;
}
prev = value;
}
false
}
fn split(&self, index: usize) -> Parts {
Parts {
high: index >> self.low.width(),
low: index & unsafe { bits::low_set_unchecked(self.low.width()) as usize },
}
}
fn combine(&self, pos: Pos) -> (usize, usize) {
(pos.low, ((pos.high - pos.low) << self.low.width()) + (self.low.get(pos.low) as usize))
}
fn pos(&self, rank: usize) -> Pos {
Pos {
high: self.high.select(rank).unwrap(),
low: rank,
}
}
fn lower_bound(&self, high_part: usize) -> Pos {
if high_part == 0 {
Pos { high: 0, low: 0, }
} else {
let high_offset = self.high.select_zero(high_part - 1).unwrap() + 1;
Pos { high: high_offset, low: high_offset - high_part, }
}
}
fn upper_bound(&self, high_part: usize) -> Pos {
let high_offset = self.high.select_zero(high_part).unwrap();
Pos { high: high_offset, low: high_offset - high_part, }
}
fn find_zero_run(&self, rank: usize) -> (usize, OneIter) {
let mut low = 0;
let mut high = self.count_ones();
let mut result = (0, self.one_iter());
while high - low > Self::BINARY_SEARCH_THRESHOLD {
let mid = low + (high - low) / 2;
let mut iter = self.select_iter(mid);
let (_, mid_pos) = iter.next().unwrap();
if mid_pos - mid <= rank {
result = (mid + 1, iter);
low = mid + 1;
} else {
high = mid;
}
}
let mut iter = result.1.clone();
while let Some((mid, mid_pos)) = iter.next() {
if mid_pos - mid <= rank {
result = (mid + 1, iter.clone());
} else {
break;
}
}
result
}
}
#[derive(Clone, Debug)]
pub struct SparseBuilder {
data: SparseVector,
high: RawVector,
len: usize,
next: usize,
increment: usize,
}
impl SparseBuilder {
pub fn new(universe: usize, ones: usize) -> Result<SparseBuilder, &'static str> {
if ones > universe {
return Err("Number of set bits is greater than universe size");
}
let (width, high_len) = Self::get_params(universe, ones);
let low = IntVector::with_len(ones, width, 0).unwrap();
let data = SparseVector {
len: universe,
high: BitVector::from(RawVector::new()),
low,
};
let high = RawVector::with_len(high_len, false);
Ok(SparseBuilder {
data,
high,
len: 0,
next: 0,
increment: 1,
})
}
pub fn multiset(universe: usize, ones: usize) -> SparseBuilder {
let (width, high_len) = Self::get_params(universe, ones);
let low = IntVector::with_len(ones, width, 0).unwrap();
let data = SparseVector {
len: universe,
high: BitVector::from(RawVector::new()),
low,
};
let high = RawVector::with_len(high_len, false);
SparseBuilder {
data,
high,
len: 0,
next: 0,
increment: 0,
}
}
pub fn is_multiset(&self) -> bool {
self.increment == 0
}
fn get_params(universe: usize, ones: usize) -> (usize, usize) {
let mut low_width: usize = 1;
if ones > 0 && ones <= universe {
let ideal_width = ((universe as f64 * 2.0_f64.ln()) / (ones as f64)).log2();
low_width = ideal_width.max(1.0).round() as usize;
}
let buckets = Self::get_buckets(universe, low_width);
(low_width, ones + buckets)
}
fn get_buckets(universe: usize, low_width: usize) -> usize {
let mut buckets = if low_width < bits::WORD_BITS { universe >> low_width } else { 0 };
if universe & (bits::low_set(low_width) as usize) != 0 {
buckets += 1;
}
buckets
}
pub fn len(&self) -> usize {
self.len
}
pub fn capacity(&self) -> usize {
self.data.count_ones()
}
pub fn universe(&self) -> usize {
self.data.len()
}
pub fn next_index(&self) -> usize {
self.next
}
pub fn is_full(&self) -> bool {
self.len() == self.capacity()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn set(&mut self, index: usize) {
self.try_set(index).unwrap();
}
pub unsafe fn set_unchecked(&mut self, index: usize) {
let parts = self.data.split(index);
self.high.set_bit(parts.high + self.len, true);
self.data.low.set(self.len, parts.low as u64);
self.len += 1; self.next = index + self.increment;
}
pub fn try_set(&mut self, index: usize) -> Result<(), &'static str> {
if self.is_full() {
return Err("The builder is full");
}
if index < self.next_index() {
if self.increment == 0 {
return Err("Index must be >= previous set position");
} else {
return Err("Index must be > previous set position");
}
}
if index >= self.universe() {
return Err("Index is larger than universe size");
}
unsafe { self.set_unchecked(index); }
Ok(())
}
}
impl Extend<usize> for SparseBuilder {
fn extend<T: IntoIterator<Item = usize>>(&mut self, iter: T) {
for index in iter {
self.set(index);
}
}
}
impl TryFrom<SparseBuilder> for SparseVector {
type Error = &'static str;
fn try_from(builder: SparseBuilder) -> Result<Self, Self::Error> {
let mut builder = builder;
if !builder.is_full() {
return Err("The builder is not full");
}
builder.data.high = BitVector::from(builder.high);
builder.data.high.enable_select();
builder.data.high.enable_select_zero();
Ok(builder.data)
}
}
#[derive(Clone, Debug)]
pub struct Iter<'a> {
parent: OneIter<'a>,
next: usize,
next_set: Option<usize>,
limit: usize,
last_set: Option<usize>,
}
impl<'a> Iterator for Iter<'a> {
type Item = bool;
fn next(&mut self) -> Option<Self::Item> {
if self.next >= self.limit {
return None;
}
match self.next_set {
Some(value) => {
if value == self.next {
self.next_set = self.last_set;
for (_, index) in self.parent.by_ref() {
if index > self.next {
self.next_set = Some(index);
break;
}
}
self.next += 1;
Some(true)
} else {
self.next += 1;
Some(false)
}
},
None => {
self.next += 1;
Some(false)
},
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.limit - self.next;
(remaining, Some(remaining))
}
}
impl<'a> DoubleEndedIterator for Iter<'a> {
fn next_back(&mut self) -> Option<Self::Item> {
if self.next >= self.limit {
return None;
}
self.limit -= 1;
match self.last_set {
Some(value) => {
if value == self.limit {
self.last_set = self.next_set;
while let Some((_, index)) = self.parent.next_back() {
if index < self.limit {
self.last_set = Some(index);
break;
}
}
Some(true)
} else {
Some(false)
}
},
None => Some(false),
}
}
}
impl<'a> ExactSizeIterator for Iter<'a> {}
impl<'a> FusedIterator for Iter<'a> {}
impl<'a> BitVec<'a> for SparseVector {
type Iter = Iter<'a>;
#[inline]
fn len(&self) -> usize {
self.len
}
#[inline]
fn count_ones(&self) -> usize {
self.low.len()
}
#[inline]
fn count_zeros(&self) -> usize {
if self.count_ones() >= self.len() {
0
} else {
self.len() - self.count_ones()
}
}
fn get(&self, index: usize) -> bool {
let parts = self.split(index);
let mut pos = self.lower_bound(parts.high);
while pos.high < self.high.len() && self.high.get(pos.high) {
let low = self.low.get(pos.low) as usize;
if low >= parts.low {
return low == parts.low;
}
pos.high += 1; pos.low += 1;
}
false
}
fn iter(&'a self) -> Self::Iter {
let mut one_iter = self.one_iter();
let next_set = if let Some((_, index)) = one_iter.next() {
Some(index)
} else {
None
};
let last_set = if let Some((_, index)) = one_iter.next_back() {
Some(index)
} else {
next_set
};
Self::Iter {
parent: one_iter,
next: 0,
next_set,
limit: self.len(),
last_set,
}
}
}
impl<'a> Rank<'a> for SparseVector {
fn supports_rank(&self) -> bool {
true
}
fn enable_rank(&mut self) {}
fn rank(&self, index: usize) -> usize {
if index >= self.len() {
return self.count_ones();
}
let parts = self.split(index);
let mut pos = self.upper_bound(parts.high);
if pos.low == 0 {
return 0;
}
pos.high -= 1; pos.low -= 1;
while self.high.get(pos.high) && (self.low.get(pos.low) as usize) >= parts.low {
if pos.low == 0 {
return 0;
}
pos.high -= 1; pos.low -= 1;
}
pos.low + 1
}
}
#[derive(Clone, Debug)]
pub struct OneIter<'a> {
parent: &'a SparseVector,
next: Pos,
limit: Pos,
}
impl<'a> OneIter<'a> {
fn empty_iter(parent: &'a SparseVector) -> OneIter<'a> {
OneIter {
parent,
next: Pos { high: parent.high.len(), low: parent.low.len(), },
limit: Pos { high: parent.high.len(), low: parent.low.len(), },
}
}
}
impl<'a> Iterator for OneIter<'a> {
type Item = (usize, usize);
fn next(&mut self) -> Option<Self::Item> {
if self.next.low >= self.limit.low {
None
} else {
while !self.parent.high.get(self.next.high) {
self.next.high += 1;
}
let result = self.parent.combine(self.next);
self.next.high += 1; self.next.low += 1;
Some(result)
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.limit.low - self.next.low;
(remaining, Some(remaining))
}
}
impl<'a> DoubleEndedIterator for OneIter<'a> {
fn next_back(&mut self) -> Option<Self::Item> {
if self.next.low >= self.limit.low {
None
} else {
self.limit.high -= 1; self.limit.low -= 1;
while !self.parent.high.get(self.limit.high) {
self.limit.high -= 1;
}
Some(self.parent.combine(self.limit))
}
}
}
impl<'a> ExactSizeIterator for OneIter<'a> {}
impl<'a> FusedIterator for OneIter<'a> {}
#[derive(Clone, Debug)]
pub struct ZeroIter<'a> {
iter: OneIter<'a>,
one_pos: usize,
next: (usize, usize),
limit: (usize, usize),
}
impl<'a> ZeroIter<'a> {
fn empty_iter(parent: &'a SparseVector) -> ZeroIter<'a> {
ZeroIter {
iter: OneIter::empty_iter(parent),
one_pos: 0,
next: (0, 0),
limit: (0, 0),
}
}
fn next_run(&mut self) {
while self.next.1 >= self.one_pos {
self.next.1 = self.one_pos + 1;
self.one_pos = if let Some((_, pos)) = self.iter.next() { pos } else { self.limit.1 };
}
}
}
impl<'a> Iterator for ZeroIter<'a> {
type Item = (usize, usize);
fn next(&mut self) -> Option<Self::Item> {
if self.next.0 >= self.limit.0 {
None
} else {
self.next_run();
let result = self.next;
self.next.0 += 1;
self.next.1 += 1;
Some(result)
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.limit.0 - self.next.0;
(remaining, Some(remaining))
}
}
impl<'a> ExactSizeIterator for ZeroIter<'a> {}
impl<'a> FusedIterator for ZeroIter<'a> {}
impl<'a> Select<'a> for SparseVector {
type OneIter = OneIter<'a>;
fn supports_select(&self) -> bool {
true
}
fn enable_select(&mut self) {}
fn one_iter(&'a self) -> Self::OneIter {
Self::OneIter {
parent: self,
next: Pos { high: 0, low: 0, },
limit: Pos { high: self.high.len(), low: self.low.len(), },
}
}
fn select(&'a self, rank: usize) -> Option<usize> {
if rank >= self.count_ones() {
None
} else {
Some(self.combine(self.pos(rank)).1)
}
}
fn select_iter(&'a self, rank: usize) -> Self::OneIter {
if rank >= self.count_ones() {
Self::OneIter::empty_iter(self)
} else {
Self::OneIter {
parent: self,
next: self.pos(rank),
limit: Pos { high: self.high.len(), low: self.low.len(), },
}
}
}
}
impl<'a> SelectZero<'a> for SparseVector {
type ZeroIter = ZeroIter<'a>;
fn supports_select_zero(&self) -> bool {
true
}
fn enable_select_zero(&mut self) {}
fn zero_iter(&'a self) -> Self::ZeroIter {
let mut iter = self.one_iter();
let one_pos = if let Some((_, pos)) = iter.next() { pos } else { self.len() };
ZeroIter {
iter,
one_pos,
next: (0, 0),
limit: (self.count_zeros(), self.len()),
}
}
fn select_zero(&'a self, rank: usize) -> Option<usize> {
if rank >= self.count_zeros() {
return None;
}
let (run_rank, _) = self.find_zero_run(rank);
Some(run_rank + rank)
}
fn select_zero_iter(&'a self, rank: usize) -> Self::ZeroIter {
if rank >= self.count_zeros() {
return Self::ZeroIter::empty_iter(self);
}
let (run_rank, mut iter) = self.find_zero_run(rank);
let one_pos = if let Some((_, pos)) = iter.next() { pos } else { self.len() };
ZeroIter {
iter,
one_pos,
next: (rank, run_rank + rank),
limit: (self.count_zeros(), self.len()),
}
}
}
impl<'a> PredSucc<'a> for SparseVector {
type OneIter = OneIter<'a>;
fn supports_pred_succ(&self) -> bool {
true
}
fn enable_pred_succ(&mut self) {}
fn predecessor(&'a self, value: usize) -> Self::OneIter {
if self.is_empty() {
return Self::OneIter::empty_iter(self);
}
let parts = self.split(cmp::min(value, self.len() - 1));
let mut pos = self.upper_bound(parts.high);
if pos.low == 0 {
return Self::OneIter::empty_iter(self);
}
pos.high -= 1; pos.low -= 1;
while self.high.get(pos.high) && (self.low.get(pos.low) as usize) > parts.low {
if pos.low == 0 {
return Self::OneIter::empty_iter(self);
}
pos.high -= 1; pos.low -= 1;
}
while !self.high.get(pos.high) {
pos.high -= 1;
}
Self::OneIter {
parent: self,
next: pos,
limit: Pos { high: self.high.len(), low: self.low.len(), },
}
}
fn successor(&'a self, value: usize) -> Self::OneIter {
if value >= self.len() {
return Self::OneIter::empty_iter(self);
}
let parts = self.split(value);
let mut pos = self.lower_bound(parts.high);
while pos.high < self.high.len() && self.high.get(pos.high) {
if (self.low.get(pos.low) as usize) >= parts.low {
return Self::OneIter {
parent: self,
next: pos,
limit: Pos { high: self.high.len(), low: self.low.len(), },
};
}
pos.high += 1; pos.low += 1;
}
while pos.high < self.high.len() {
if self.high.get(pos.high) {
return Self::OneIter {
parent: self,
next: pos,
limit: Pos { high: self.high.len(), low: self.low.len(), },
};
}
pos.high += 1;
}
Self::OneIter::empty_iter(self)
}
}
impl Serialize for SparseVector {
fn serialize_header<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
self.len.serialize(writer)?;
Ok(())
}
fn serialize_body<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
self.high.serialize(writer)?;
self.low.serialize(writer)?;
Ok(())
}
fn load<T: io::Read>(reader: &mut T) -> io::Result<Self> {
let len = usize::load(reader)?;
let mut high = BitVector::load(reader)?;
let low = IntVector::load(reader)?;
high.enable_select();
high.enable_select_zero();
if low.len() != high.count_ones() {
return Err(Error::new(ErrorKind::InvalidData, "Inconsistent number of set bits"));
}
if high.len() != low.len() + SparseBuilder::get_buckets(len, low.width()){
return Err(Error::new(ErrorKind::InvalidData, "Invalid number of buckets"));
}
let result = SparseVector {
len, high, low,
};
Ok(result)
}
fn size_in_elements(&self) -> usize {
self.len.size_in_elements() +
self.high.size_in_elements() +
self.low.size_in_elements()
}
}