#![cfg(target_pointer_width = "64")]
pub mod iter;
use std::io::{Read, Write};
use std::ops::Range;
use anyhow::{anyhow, Result};
use crate::bit_vectors::{Access, BitVector, DArray, NumBits, Select};
use crate::broadword;
use crate::Serializable;
use iter::Iter;
const LINEAR_SCAN_THRESHOLD: usize = 64;
#[derive(Default, Debug, Clone, PartialEq, Eq)]
pub struct EliasFano {
high_bits: DArray,
low_bits: BitVector,
low_len: usize,
universe: usize,
}
impl EliasFano {
pub fn from_bits<I>(bits: I) -> Result<Self>
where
I: IntoIterator<Item = bool>,
{
let bv = BitVector::from_bits(bits);
if bv.num_bits() == 0 {
return Err(anyhow!("bits must not be empty."));
}
let n = bv.num_bits();
let m = (0..bv.num_words()).fold(0, |acc, i| acc + broadword::popcount(bv.words()[i]));
if m == 0 {
return Err(anyhow!("bits must contains one set bit at least."));
}
let mut b = EliasFanoBuilder::new(n, m)?;
for i in 0..n {
if bv.access(i).unwrap() {
b.push(i)?;
}
}
Ok(b.build())
}
#[must_use]
pub fn enable_rank(mut self) -> Self {
self.high_bits = self.high_bits.enable_select0();
self
}
#[inline(always)]
pub const fn has_rank(&self) -> bool {
self.high_bits.has_select0()
}
#[inline(always)]
pub fn delta(&self, k: usize) -> Option<usize> {
if self.len() <= k {
return None;
}
let high_val = self.high_bits.select1(k).unwrap();
let low_val = self
.low_bits
.get_bits(k * self.low_len, self.low_len)
.unwrap();
let x = if k != 0 {
((high_val
- self
.high_bits
.bit_vector()
.predecessor1(high_val - 1)
.unwrap()
- 1)
<< self.low_len)
+ low_val
- self
.low_bits
.get_bits((k - 1) * self.low_len, self.low_len)
.unwrap()
} else {
((high_val - k) << self.low_len) | low_val
};
Some(x)
}
#[inline(always)]
pub fn binsearch(&self, val: usize) -> Option<usize> {
self.binsearch_range(0..self.len(), val)
}
#[inline(always)]
pub fn binsearch_range(&self, range: Range<usize>, val: usize) -> Option<usize> {
if range.is_empty() {
return None;
}
let (mut lo, mut hi) = (range.start, range.end);
while hi - lo > LINEAR_SCAN_THRESHOLD {
let mi = (lo + hi) / 2;
let x = self.select(mi).unwrap();
if val == x {
return Some(mi);
}
if val < x {
hi = mi;
} else {
lo = mi + 1;
}
}
let mut it = self.iter(lo);
for i in lo..hi {
let x = it.next().unwrap();
if val == x {
return Some(i);
}
}
None
}
pub fn rank(&self, pos: usize) -> Option<usize> {
if self.universe() < pos {
return None;
}
if self.universe() == pos {
return Some(self.len());
}
let h_rank = pos >> self.low_len;
let mut h_pos = self.high_bits.select0(h_rank).unwrap();
let mut rank = h_pos - h_rank;
let l_pos = pos & ((1 << self.low_len) - 1);
while h_pos > 0
&& self.high_bits.access(h_pos - 1).unwrap()
&& self
.low_bits
.get_bits((rank - 1) * self.low_len, self.low_len)
.unwrap()
>= l_pos
{
rank -= 1;
h_pos -= 1;
}
Some(rank)
}
pub fn select(&self, k: usize) -> Option<usize> {
if self.len() <= k {
None
} else {
Some(
((self.high_bits.select1(k).unwrap() - k) << self.low_len)
| self
.low_bits
.get_bits(k * self.low_len, self.low_len)
.unwrap(),
)
}
}
pub fn predecessor(&self, pos: usize) -> Option<usize> {
if self.universe() <= pos {
None
} else {
Some(self.rank(pos + 1).unwrap())
.filter(|&i| i > 0)
.map(|i| self.select(i - 1).unwrap())
}
}
pub fn successor(&self, pos: usize) -> Option<usize> {
if self.universe() <= pos {
None
} else {
Some(self.rank(pos).unwrap())
.filter(|&i| i < self.len())
.map(|i| self.select(i).unwrap())
}
}
pub fn iter(&self, k: usize) -> Iter {
Iter::new(self, k)
}
#[inline(always)]
pub fn len(&self) -> usize {
self.high_bits.num_ones()
}
#[inline(always)]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline(always)]
pub const fn universe(&self) -> usize {
self.universe
}
}
impl Serializable for EliasFano {
fn serialize_into<W: Write>(&self, mut writer: W) -> Result<usize> {
let mut mem = 0;
mem += self.high_bits.serialize_into(&mut writer)?;
mem += self.low_bits.serialize_into(&mut writer)?;
mem += self.low_len.serialize_into(&mut writer)?;
mem += self.universe.serialize_into(&mut writer)?;
Ok(mem)
}
fn deserialize_from<R: Read>(mut reader: R) -> Result<Self> {
let high_bits = DArray::deserialize_from(&mut reader)?;
let low_bits = BitVector::deserialize_from(&mut reader)?;
let low_len = usize::deserialize_from(&mut reader)?;
let universe = usize::deserialize_from(&mut reader)?;
Ok(Self {
high_bits,
low_bits,
low_len,
universe,
})
}
fn size_in_bytes(&self) -> usize {
self.high_bits.size_in_bytes()
+ self.low_bits.size_in_bytes()
+ usize::size_of().unwrap() * 2
}
}
pub struct EliasFanoBuilder {
high_bits: BitVector,
low_bits: BitVector,
universe: usize,
num_vals: usize,
pos: usize,
last: usize,
low_len: usize,
}
impl EliasFanoBuilder {
pub fn new(universe: usize, num_vals: usize) -> Result<Self> {
if num_vals == 0 {
return Err(anyhow!("num_vals must not be zero."));
}
let low_len = broadword::msb(universe / num_vals).unwrap_or(0);
Ok(Self {
high_bits: BitVector::from_bit(false, (num_vals + 1) + (universe >> low_len) + 1),
low_bits: BitVector::new(),
universe,
num_vals,
pos: 0,
last: 0,
low_len,
})
}
pub fn push(&mut self, val: usize) -> Result<()> {
if val < self.last {
return Err(anyhow!(
"val must be no less than the last one {}, but got {val}.",
self.last
));
}
if self.universe <= val {
return Err(anyhow!(
"val must be less than self.universe()={}, but got {val}.",
self.universe
));
}
if self.num_vals <= self.pos {
return Err(anyhow!(
"The number of pushed integers must not exceed self.num_vals()={}.",
self.num_vals
));
}
self.last = val;
let low_mask = (1 << self.low_len) - 1;
if self.low_len != 0 {
self.low_bits
.push_bits(val & low_mask, self.low_len)
.unwrap();
}
self.high_bits
.set_bit((val >> self.low_len) + self.pos, true)
.unwrap();
self.pos += 1;
Ok(())
}
pub fn extend<I>(&mut self, vals: I) -> Result<()>
where
I: IntoIterator<Item = usize>,
{
for x in vals {
self.push(x)?;
}
Ok(())
}
pub fn build(self) -> EliasFano {
EliasFano {
high_bits: DArray::from_bits(self.high_bits.iter()),
low_bits: self.low_bits,
low_len: self.low_len,
universe: self.universe,
}
}
#[inline(always)]
pub const fn universe(&self) -> usize {
self.universe
}
#[inline(always)]
pub const fn num_vals(&self) -> usize {
self.num_vals
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_from_bits_empty() {
let e = EliasFano::from_bits([]);
assert_eq!(
e.err().map(|x| x.to_string()),
Some("bits must not be empty.".to_string())
);
}
#[test]
fn test_from_bits_unset() {
let e = EliasFano::from_bits([false, false, false]);
assert_eq!(
e.err().map(|x| x.to_string()),
Some("bits must contains one set bit at least.".to_string())
);
}
#[test]
fn test_serialize() {
let mut bytes = vec![];
let ef = EliasFano::from_bits([false, true, true, true, false, true])
.unwrap()
.enable_rank();
let size = ef.serialize_into(&mut bytes).unwrap();
let other = EliasFano::deserialize_from(&bytes[..]).unwrap();
assert_eq!(ef, other);
assert_eq!(size, bytes.len());
assert_eq!(size, ef.size_in_bytes());
}
#[test]
fn test_builder_new_zero_size() {
let e = EliasFanoBuilder::new(3, 0);
assert_eq!(
e.err().map(|x| x.to_string()),
Some("num_vals must not be zero.".to_string())
);
}
#[test]
fn test_builder_push_decrease() {
let mut b = EliasFanoBuilder::new(3, 2).unwrap();
b.push(2).unwrap();
let e = b.push(1);
assert_eq!(
e.err().map(|x| x.to_string()),
Some("val must be no less than the last one 2, but got 1.".to_string())
);
}
#[test]
fn test_builder_overflow_universe() {
let mut b = EliasFanoBuilder::new(3, 2).unwrap();
let e = b.push(3);
assert_eq!(
e.err().map(|x| x.to_string()),
Some("val must be less than self.universe()=3, but got 3.".to_string())
);
}
#[test]
fn test_builder_overflow_num_vals() {
let mut b = EliasFanoBuilder::new(3, 1).unwrap();
b.push(1).unwrap();
let e = b.push(2);
assert_eq!(
e.err().map(|x| x.to_string()),
Some("The number of pushed integers must not exceed self.num_vals()=1.".to_string())
);
}
}