use crate::int_vector::IntVector;
use crate::ops::{Vector, Access, Push, Resize, BitVec, Rank, Select, SelectZero, PredSucc};
use crate::rl_vector::index::SampleIndex;
use crate::serialize::Serialize;
use crate::bits;
use std::io::{Error, ErrorKind};
use std::iter::FusedIterator;
use std::{cmp, io};
pub mod index;
#[cfg(test)]
mod tests;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct RLVector {
len: usize,
ones: usize,
rank_index: SampleIndex,
select_index: SampleIndex,
select_zero_index: SampleIndex,
samples: IntVector,
data: IntVector,
}
impl RLVector {
pub const CODE_SIZE: usize = 4;
const CODE_SHIFT: usize = Self::CODE_SIZE - 1;
const CODE_FLAG: u64 = 1 << Self::CODE_SHIFT;
const CODE_MASK: u64 = (1 << Self::CODE_SHIFT) - 1;
pub const BLOCK_SIZE: usize = 64;
pub fn copy_bit_vec<'a, T: BitVec<'a> + Select<'a>>(source: &'a T) -> Self {
let mut builder = RLBuilder::new();
for (_, index) in source.one_iter() {
unsafe { builder.set_bit_unchecked(index); }
}
builder.set_len(source.len());
RLVector::from(builder)
}
pub fn run_iter(&self) -> RunIter<'_> {
RunIter {
parent: self,
offset: 0,
pos: (0, 0),
limit: self.ones_after(0),
}
}
fn blocks(&self) -> usize {
self.samples.len() / 2
}
fn ones_after(&self, block: usize) -> usize {
if block + 1 < self.blocks() {
self.samples.get(2 * (block + 1)) as usize
} else {
self.count_ones()
}
}
fn decode(&self, offset: usize) -> (usize, usize) {
let mut value = 0;
let mut offset = offset;
let mut shift = 0;
loop {
let code = self.data.get(offset);
offset += 1;
value += ((code & Self::CODE_MASK) << shift) as usize;
shift += Self::CODE_SHIFT;
if code & Self::CODE_FLAG == 0 {
return (value, offset);
}
}
}
fn block_for<F: Fn(usize) -> usize>(low: usize, high: usize, value: usize, f: F) -> usize {
let mut low = low;
let mut high = high;
while high - low > 1 {
let mid = low + (high - low) / 2;
let candidate = f(mid);
if candidate <= value {
low = mid;
} else {
high = mid;
}
}
low
}
fn iter_for_block(&self, block: usize) -> RunIter<'_> {
let pos = if self.samples.is_empty() { (0, 0) } else { (self.samples.get(2 * block) as usize, self.samples.get(2 * block + 1) as usize) };
RunIter {
parent: self,
offset: block * Self::BLOCK_SIZE,
pos,
limit: self.ones_after(block),
}
}
fn iter_for_bit(&self, index: usize) -> RunIter<'_> {
if index >= self.len() {
return RunIter::empty_iter(self);
}
let range = self.rank_index.range(index);
let block = Self::block_for(range.start, range.end, index, |i| self.samples.get(2 * i + 1) as usize);
self.iter_for_block(block)
}
fn iter_for_one(&self, rank: usize) -> RunIter<'_> {
if rank >= self.count_ones() {
return RunIter::empty_iter(self);
}
let range = self.select_index.range(rank);
let block = Self::block_for(range.start, range.end, rank, |i| self.samples.get(2 * i) as usize);
self.iter_for_block(block)
}
fn iter_for_zero(&self, rank: usize) -> RunIter<'_> {
if rank >= self.count_zeros() {
return RunIter::empty_iter(self);
}
let range = self.select_zero_index.range(rank);
let block = Self::block_for(range.start, range.end, rank, |i| (self.samples.get(2 * i + 1) - self.samples.get(2 * i)) as usize);
self.iter_for_block(block)
}
}
#[derive(Clone, Debug)]
pub struct RLBuilder {
len: usize,
ones: usize,
tail: usize,
run: (usize, usize),
samples: Vec<(usize, usize)>,
data: IntVector,
}
impl RLBuilder {
pub fn new() -> Self {
RLBuilder::default()
}
pub fn len(&self) -> usize {
self.len
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn count_ones(&self) -> usize {
self.ones
}
pub fn count_zeros(&self) -> usize {
self.len() - self.count_ones()
}
fn tail(&self) -> usize {
self.tail
}
fn blocks(&self) -> usize {
self.samples.len()
}
pub fn try_set(&mut self, start: usize, len: usize) -> Result<(), String> {
if start < self.len() {
return Err(format!("RLBuilder: Cannot set bit {} when length is {}", start, self.len()));
}
if usize::MAX - len < start {
return Err(format!("RLBuilder: A run of length {} starting at {} exceeds the maximum length of a bitvector", start, len));
}
unsafe { self.set_run_unchecked(start, len); }
Ok(())
}
pub unsafe fn set_bit_unchecked(&mut self, index: usize) {
self.set_run_unchecked(index, 1);
}
pub unsafe fn set_run_unchecked(&mut self, start: usize, len: usize) {
if len == 0 {
return;
}
if start == self.len() {
self.len += len;
self.ones += len;
self.run.1 += len;
} else {
self.flush();
self.len = start + len;
self.ones += len;
self.run = (start, len);
}
}
pub fn set_len(&mut self, len: usize) {
if len > self.len() {
self.flush();
self.len = len;
}
}
fn flush(&mut self) {
if self.run.1 == 0 {
return;
}
let units_needed = Self::code_len(self.run.0 - self.tail()) + Self::code_len(self.run.1 - 1);
if self.data.len() + units_needed > self.blocks() * RLVector::BLOCK_SIZE {
self.data.resize(self.blocks() * RLVector::BLOCK_SIZE, 0);
let sample = (self.ones - self.run.1, self.tail);
self.samples.push(sample);
}
self.encode(self.run.0 - self.tail());
self.encode(self.run.1 - 1);
self.tail = self.run.0 + self.run.1;
self.run = (self.len(), 0);
}
fn encode(&mut self, value: usize) {
let mut value = value as u64;
while value > RLVector::CODE_MASK {
self.data.push((value & RLVector::CODE_MASK) | RLVector::CODE_FLAG);
value = value >> RLVector::CODE_SHIFT;
}
self.data.push(value);
}
fn code_len(value: usize) -> usize {
bits::div_round_up(bits::bit_len(value as u64), RLVector::CODE_SHIFT)
}
}
impl Default for RLBuilder {
fn default() -> Self {
RLBuilder {
len: 0,
ones: 0,
tail: 0,
run: (0, 0),
samples: Vec::new(),
data: IntVector::new(RLVector::CODE_SIZE).unwrap(),
}
}
}
impl From<RLBuilder> for RLVector {
fn from(builder: RLBuilder) -> Self {
let mut builder = builder;
builder.flush();
let rank_index = SampleIndex::new(builder.samples.iter().map(|(_, bits)| *bits), builder.len());
let select_index = SampleIndex::new(builder.samples.iter().map(|(ones, _)| *ones), builder.count_ones());
let select_zero_index = SampleIndex::new(builder.samples.iter().map(|(ones, bits)| bits - ones), builder.count_zeros());
let max_value = builder.samples.last().unwrap_or(&(0, 0)).1;
let mut samples = IntVector::with_capacity(2 * builder.blocks(), bits::bit_len(max_value as u64)).unwrap();
for (ones, bits) in builder.samples.iter() {
samples.push(*ones as u64);
samples.push(*bits as u64);
}
RLVector {
len: builder.len(),
ones: builder.count_ones(),
rank_index,
select_index,
select_zero_index,
samples: samples,
data: builder.data,
}
}
}
#[derive(Clone, Debug)]
pub struct RunIter<'a> {
parent: &'a RLVector,
offset: usize,
pos: (usize, usize),
limit: usize,
}
impl<'a> RunIter<'a> {
pub fn offset(&self) -> usize {
self.pos.1
}
pub fn rank(&self) -> usize {
self.pos.0
}
pub fn rank_zero(&self) -> usize {
self.offset() - self.rank()
}
fn empty_iter(parent: &'a RLVector) -> Self {
RunIter {
parent,
offset: parent.data.len(),
pos: (parent.count_ones(), parent.len()),
limit: parent.count_ones(),
}
}
fn offset_for(&self, rank: usize) -> usize {
self.offset() - (self.rank() - rank)
}
fn rank_at(&self, index: usize) -> usize {
self.rank() - (self.offset() - index)
}
fn advance_if<F: FnMut(Option<<Self as Iterator>::Item>) -> bool>(&mut self, mut advance: F) -> Option<<Self as Iterator>::Item> {
if self.offset >= self.parent.data.len() {
let _ = advance(None);
return None;
}
let mut offset = self.offset;
let mut limit = self.limit;
if self.rank() >= self.limit {
let block = bits::div_round_up(offset, RLVector::BLOCK_SIZE);
offset = block * RLVector::BLOCK_SIZE;
if block >= self.parent.blocks() {
if advance(None) {
self.offset = offset;
}
return None;
}
limit = self.parent.ones_after(block);
}
let (gap, offset) = self.parent.decode(offset);
let start = self.offset() + gap;
let (len, offset) = self.parent.decode(offset);
let result = Some((start, len + 1));
if advance(result) {
self.offset = offset;
self.limit = limit;
self.pos.0 += len + 1;
self.pos.1 = start + len + 1;
}
result
}
}
impl<'a> Iterator for RunIter<'a> {
type Item = (usize, usize);
fn next(&mut self) -> Option<Self::Item> {
self.advance_if(|_| true)
}
}
impl<'a> FusedIterator for RunIter<'a> {}
#[derive(Clone, Debug)]
pub struct Iter<'a> {
iter: RunIter<'a>,
run: Option<(usize, usize)>,
pos: usize,
}
impl<'a> Iterator for Iter<'a> {
type Item = bool;
fn next(&mut self) -> Option<Self::Item> {
if let Some((start, len)) = self.run {
if self.pos >= start + len {
self.run = self.iter.next();
}
}
match self.run {
Some((start, _)) => {
self.pos += 1;
Some(self.pos > start)
},
None => {
if self.pos >= self.iter.parent.len() {
None
} else {
self.pos += 1;
Some(false)
}
},
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.iter.parent.len() - self.pos;
(remaining, Some(remaining))
}
}
impl<'a> ExactSizeIterator for Iter<'a> {}
impl<'a> FusedIterator for Iter<'a> {}
impl<'a> BitVec<'a> for RLVector {
type Iter = Iter<'a>;
#[inline]
fn len(&self) -> usize {
self.len
}
#[inline]
fn count_ones(&self) -> usize {
self.ones
}
fn get(&self, index: usize) -> bool {
let mut iter = self.iter_for_bit(index);
while let Some((start, _)) = iter.next() {
if start > index {
return false;
}
if index < iter.offset() {
return true;
}
}
false
}
fn iter(&'a self) -> Self::Iter {
Self::Iter {
iter: self.run_iter(),
run: Some((0, 0)),
pos: 0,
}
}
}
impl<'a> Rank<'a> for RLVector {
fn supports_rank(&self) -> bool {
true
}
fn enable_rank(&mut self) {}
fn rank(&self, index: usize) -> usize {
let mut iter = self.iter_for_bit(index);
while let Some((start, len)) = iter.next() {
if start >= index {
return iter.rank() - len;
}
if iter.offset() >= index {
return iter.rank_at(index);
}
}
iter.rank()
}
}
#[derive(Clone, Debug)]
pub struct OneIter<'a> {
iter: RunIter<'a>,
got_none: bool,
rank: usize,
}
impl<'a> OneIter<'a> {
fn empty_iter(parent: &'a RLVector) -> Self {
OneIter {
iter: RunIter::empty_iter(parent),
got_none: true,
rank: parent.count_ones(),
}
}
}
impl<'a> Iterator for OneIter<'a> {
type Item = (usize, usize);
fn next(&mut self) -> Option<Self::Item> {
if !self.got_none && self.rank >= self.iter.rank() {
self.got_none = self.iter.next().is_none();
}
if self.got_none {
None
} else {
let result = (self.rank, self.iter.offset_for(self.rank));
self.rank += 1;
Some(result)
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.iter.parent.count_ones() - self.rank;
(remaining, Some(remaining))
}
}
impl<'a> ExactSizeIterator for OneIter<'a> {}
impl<'a> FusedIterator for OneIter<'a> {}
#[derive(Clone, Debug)]
pub struct ZeroIter<'a> {
iter: RunIter<'a>,
got_none: bool,
pos: (usize, usize),
}
impl<'a> Iterator for ZeroIter<'a> {
type Item = (usize, usize);
fn next(&mut self) -> Option<Self::Item> {
if !self.got_none && self.pos.0 >= self.iter.rank_zero() {
self.pos.1 = self.iter.offset();
self.got_none = self.iter.next().is_none();
}
if self.pos.0 >= self.iter.parent.count_zeros() {
None
} else {
let result = self.pos;
self.pos.0 += 1; self.pos.1 += 1;
Some(result)
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.iter.parent.count_zeros() - self.pos.0;
(remaining, Some(remaining))
}
}
impl<'a> ExactSizeIterator for ZeroIter<'a> {}
impl<'a> FusedIterator for ZeroIter<'a> {}
impl<'a> Select<'a> for RLVector {
type OneIter = OneIter<'a>;
fn supports_select(&self) -> bool {
true
}
fn enable_select(&mut self) {}
fn one_iter(&'a self) -> Self::OneIter {
Self::OneIter {
iter: self.run_iter(),
got_none: false,
rank: 0,
}
}
fn select(&'a self, rank: usize) -> Option<usize> {
if rank >= self.count_ones() {
return None;
}
let mut iter = self.iter_for_one(rank);
while iter.rank() <= rank {
let _ = iter.next();
}
Some(iter.offset_for(rank))
}
fn select_iter(&'a self, rank: usize) -> Self::OneIter {
if rank >= self.count_ones() {
return Self::OneIter::empty_iter(self);
}
let mut iter = self.iter_for_one(rank);
while iter.rank() < rank {
let _ = iter.next();
}
Self::OneIter {
iter,
got_none: false,
rank,
}
}
}
impl<'a> SelectZero<'a> for RLVector {
type ZeroIter = ZeroIter<'a>;
fn supports_select_zero(&self) -> bool {
true
}
fn enable_select_zero(&mut self) {}
fn zero_iter(&'a self) -> Self::ZeroIter {
let mut iter = self.run_iter();
let got_none = iter.next().is_none();
Self::ZeroIter {
iter,
got_none,
pos: (0, 0),
}
}
fn select_zero(&'a self, rank: usize) -> Option<usize> {
if rank >= self.count_zeros() {
return None;
}
let mut iter = self.iter_for_zero(rank);
let mut ones = iter.rank();
loop {
match iter.next() {
Some(_) => {
if iter.rank_zero() > rank {
return Some(rank + ones);
}
ones = iter.rank();
},
None => {
return Some(rank + ones);
},
}
}
}
fn select_zero_iter(&'a self, rank: usize) -> Self::ZeroIter {
if rank >= self.count_zeros() {
return Self::ZeroIter {
iter: RunIter::empty_iter(self),
got_none: true,
pos: (self.count_zeros(), self.len()),
}
}
let mut iter = self.iter_for_zero(rank);
let mut ones = iter.rank();
loop {
match iter.next() {
Some(_) => {
if iter.rank_zero() > rank {
return Self::ZeroIter {
iter,
got_none: false,
pos: (rank, rank + ones),
};
}
ones = iter.rank();
},
None => {
return Self::ZeroIter {
iter,
got_none: true,
pos: (rank, rank + ones),
}
},
}
}
}
}
impl<'a> PredSucc<'a> for RLVector {
type OneIter = OneIter<'a>;
fn supports_pred_succ(&self) -> bool {
true
}
fn enable_pred_succ(&mut self) {}
fn predecessor(&'a self, value: usize) -> Self::OneIter {
if self.is_empty() {
return Self::OneIter::empty_iter(self);
}
let value = cmp::min(value, self.len() - 1);
let mut iter = self.iter_for_bit(value);
let mut iterate = true;
while iterate {
let _ = iter.advance_if(|next| {
if next.is_none() {
iterate = false;
return false;
}
let (start, _) = next.unwrap();
iterate = start <= value;
return iterate;
});
}
if iter.rank() == 0 {
return Self::OneIter::empty_iter(self);
}
let rank = if iter.offset() > value { iter.rank_at(value) } else { iter.rank() - 1 };
Self::OneIter {
iter,
got_none: false,
rank,
}
}
fn successor(&'a self, value: usize) -> Self::OneIter {
if value >= self.len() {
return Self::OneIter::empty_iter(self);
}
let mut iter = self.iter_for_bit(value);
let rank;
loop {
let result = iter.next();
if result.is_none() {
return Self::OneIter::empty_iter(self);
}
let (start, len) = result.unwrap();
if start > value {
rank = iter.rank() - len;
break;
}
if iter.offset() > value {
rank = iter.rank_at(value);
break;
}
}
Self::OneIter {
iter,
got_none: false,
rank,
}
}
}
impl Serialize for RLVector {
fn serialize_header<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
self.len.serialize(writer)?;
self.ones.serialize(writer)?;
Ok(())
}
fn serialize_body<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
self.samples.serialize(writer)?;
self.data.serialize(writer)?;
Ok(())
}
fn load<T: io::Read>(reader: &mut T) -> io::Result<Self> {
let len = usize::load(reader)?;
let ones = usize::load(reader)?;
let samples = IntVector::load(reader)?;
let data = IntVector::load(reader)?;
let sample_blocks = samples.len() / 2;
let data_blocks = bits::div_round_up(data.len(), Self::BLOCK_SIZE);
if sample_blocks != data_blocks {
return Err(Error::new(ErrorKind::InvalidData, "Mismatch between number of blocks and samples"));
}
let rank_index = SampleIndex::new((0..sample_blocks).map(|block| samples.get(2 * block + 1) as usize), len);
let select_index = SampleIndex::new((0..sample_blocks).map(|block| samples.get(2 * block) as usize), ones);
let select_zero_index = SampleIndex::new((0..sample_blocks).map(|block| (samples.get(2 * block + 1) - samples.get(2 * block)) as usize), len - ones);
let result = RLVector {
len, ones, rank_index, select_index, select_zero_index, samples, data,
};
Ok(result)
}
fn size_in_elements(&self) -> usize {
self.len.size_in_elements() +
self.ones.size_in_elements() +
self.samples.size_in_elements() +
self.data.size_in_elements()
}
}