use bitvec::prelude::*;
use sucds::bit_vectors::{BitVector, DArray, Select};
#[derive(Default, Debug, Clone, PartialEq)]
pub struct EliasFano {
universe: usize,
element_count: usize,
upper_bits: DArray,
lower_bits: BitVector,
lower_bit_count: usize,
}
impl EliasFano {
pub fn index(&self, idx: usize) -> usize {
self.get(idx).expect("index out of bounds")
}
pub fn get(&self, index: usize) -> Option<usize> {
if index >= self.element_count {
None
} else {
Some(
((self.upper_bits.select1(index).unwrap() - index) << self.lower_bit_count)
| self
.lower_bits
.get_bits(index * self.lower_bit_count, self.lower_bit_count)
.unwrap(),
)
}
}
pub fn size(&self) -> usize {
self.element_count
}
pub fn universe(&self) -> usize {
self.universe
}
pub fn is_empty(&self) -> bool {
self.element_count == 0
}
pub fn iter(&self) -> EliasFanoIter<'_> {
EliasFanoIter { ef: self, index: 0 }
}
}
impl<T> From<&[T]> for EliasFano
where
T: TryInto<usize> + Copy + PartialOrd,
<T as TryInto<usize>>::Error: std::fmt::Debug,
{
fn from(values: &[T]) -> Self {
if values.is_empty() {
return Self::default();
}
assert!(
values.windows(2).all(|w| w[0] <= w[1]),
"Input sequence must be sorted in ascending order"
);
let last_value: usize = (*values.last().unwrap()).try_into().unwrap();
let mut builder = EliasFanoBuilder::new(last_value + 1, values.len());
for &value in values {
let value: usize = value.try_into().unwrap();
builder.add(value);
}
builder.finalize()
}
}
impl<T> From<Vec<T>> for EliasFano
where
T: TryInto<usize> + Copy + PartialOrd,
<T as TryInto<usize>>::Error: std::fmt::Debug,
{
fn from(values: Vec<T>) -> Self {
Self::from(values.as_slice())
}
}
impl<T> From<&Vec<T>> for EliasFano
where
T: TryInto<usize> + Copy + PartialOrd,
<T as TryInto<usize>>::Error: std::fmt::Debug,
{
fn from(values: &Vec<T>) -> Self {
Self::from(values.as_slice())
}
}
impl<T, const N: usize> From<&[T; N]> for EliasFano
where
T: TryInto<usize> + Copy + PartialOrd,
<T as TryInto<usize>>::Error: std::fmt::Debug,
{
fn from(values: &[T; N]) -> Self {
Self::from(values.as_slice())
}
}
impl<T, const N: usize> From<[T; N]> for EliasFano
where
T: TryInto<usize> + Copy + PartialOrd,
<T as TryInto<usize>>::Error: std::fmt::Debug,
{
fn from(values: [T; N]) -> Self {
Self::from(values.as_slice())
}
}
pub struct EliasFanoBuilder {
upper_bits: BitVector,
lower_bits: BitVector,
universe: usize,
element_count: usize,
current_index: usize,
previous_value: usize,
lower_bit_count: usize,
}
impl EliasFanoBuilder {
pub fn new(universe: usize, count: usize) -> Self {
assert!(count > 0, "Element count must be greater than zero");
let lower_bit_count = if count > 0 {
((universe as f64) / (count as f64)).log2().floor() as usize
} else {
0
};
let upper_size = count + (universe >> lower_bit_count) + 1;
Self {
upper_bits: BitVector::from_bits(bitvec![u64, Lsb0; 0; upper_size]),
lower_bits: BitVector::new(),
universe,
element_count: count,
current_index: 0,
previous_value: 0,
lower_bit_count,
}
}
pub fn add(&mut self, value: usize) {
assert!(
self.previous_value <= value,
"Values must be in ascending order"
);
assert!(
value < self.universe,
"Value {} exceeds maximum allowed value {}",
value,
self.universe
);
assert!(
self.current_index < self.element_count,
"Sequence is already full"
);
self.previous_value = value;
if self.lower_bit_count > 0 {
let lower_mask = (1 << self.lower_bit_count) - 1;
let lower_value = value & lower_mask;
for i in 0..self.lower_bit_count {
self.lower_bits.push_bit((lower_value & (1 << i)) != 0);
}
}
let upper_value = value >> self.lower_bit_count;
_ = self
.upper_bits
.set_bit(upper_value + self.current_index, true);
self.current_index += 1;
}
pub fn add_all<'a, I>(&mut self, values: I)
where
I: IntoIterator<Item = &'a usize>,
{
for &value in values {
self.add(value);
}
}
pub fn finalize(self) -> EliasFano {
EliasFano {
upper_bits: DArray::from_bits(self.upper_bits.iter()).enable_select0(),
lower_bits: self.lower_bits,
lower_bit_count: self.lower_bit_count,
universe: self.universe,
element_count: self.element_count,
}
}
}
pub struct EliasFanoIter<'a> {
ef: &'a EliasFano,
index: usize,
}
impl<'a> EliasFanoIter<'a> {
pub fn lookup(&mut self, value: usize) -> Option<usize> {
if value >= self.ef.universe {
return None;
}
let pos = self.lookup_next(value);
self.index = pos;
if pos < self.ef.element_count {
if let Some(current) = self.ef.get(pos) {
if current == value {
return Some(pos);
}
}
}
None
}
pub fn lookup_next(&self, value: usize) -> usize {
if value >= self.ef.universe {
return self.ef.element_count - 1;
}
if let Some(current) = self.ef.get(self.index) {
if value == current {
return self.index;
}
if value > current {
const LINEAR_SCAN_THRESHOLD: usize = 8;
let high_value = value >> self.ef.lower_bit_count;
let high_current = current >> self.ef.lower_bit_count;
let high_diff = high_value.saturating_sub(high_current);
if high_diff <= LINEAR_SCAN_THRESHOLD {
let mut pos = self.index + 1;
while pos < self.ef.element_count {
if let Some(val) = self.ef.get(pos) {
if val >= value {
return pos;
}
pos += 1;
} else {
break;
}
}
return self.ef.element_count - 1;
}
}
}
let mut left = self.index;
let mut right = self.ef.element_count;
while left < right {
let mid = left + (right - left) / 2;
if let Some(val) = self.ef.get(mid) {
if val < value {
left = mid + 1;
} else {
right = mid;
}
} else {
break;
}
}
if left < self.ef.element_count {
if let Some(val) = self.ef.get(left) {
if val >= value {
return left;
}
}
}
self.ef.element_count - 1
}
}
impl<'a> Iterator for EliasFanoIter<'a> {
type Item = usize;
fn next(&mut self) -> Option<Self::Item> {
let value = self.ef.get(self.index)?;
self.index += 1;
Some(value)
}
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.ef.size().saturating_sub(self.index);
(remaining, Some(remaining))
}
}
impl<'a> ExactSizeIterator for EliasFanoIter<'a> {}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_operations() {
let values = vec![2, 5, 5, 9];
let seq = EliasFano::from(&values);
assert_eq!(seq.size(), 4);
assert_eq!(seq.universe(), 10);
assert!(!seq.is_empty());
assert_eq!(seq.get(0), Some(2));
assert_eq!(seq.get(1), Some(5));
assert_eq!(seq.get(2), Some(5));
assert_eq!(seq.get(3), Some(9));
assert_eq!(seq.get(4), None);
}
#[test]
fn test_indexing() {
let values = vec![2, 5, 5, 9];
let seq = EliasFano::from(&values);
assert_eq!(seq.index(0), 2);
assert_eq!(seq.index(1), 5);
assert_eq!(seq.index(2), 5);
assert_eq!(seq.index(3), 9);
}
#[test]
#[should_panic(expected = "index out of bounds")]
fn test_index_out_of_bounds() {
let values = vec![2, 5, 5, 9];
let seq = EliasFano::from(&values);
let _ = seq.index(4);
}
#[test]
fn test_single_value() {
let values = vec![2];
let seq = EliasFano::from(&values);
assert_eq!(seq.get(0), Some(2));
}
#[test]
fn test_consecutive_values() {
let values = vec![1, 2, 3, 4];
let seq = EliasFano::from(&values);
assert_eq!(seq.get(0), Some(1));
assert_eq!(seq.get(1), Some(2));
assert_eq!(seq.get(2), Some(3));
assert_eq!(seq.get(3), Some(4));
}
#[test]
fn test_empty_sequence() {
let seq = EliasFano::from(&[] as &[usize]);
assert!(seq.is_empty());
assert_eq!(seq.size(), 0);
}
#[test]
#[should_panic(expected = "Input sequence must be sorted")]
fn test_unsorted_input() {
let values: &[usize] = &[5, 2, 7];
let _ = EliasFano::from(values);
}
#[test]
fn test_iter() {
let values = vec![2, 5, 5, 9];
let seq = EliasFano::from(&values);
let collected: Vec<_> = seq.iter().collect();
assert_eq!(collected, values);
let mut iter = seq.iter();
assert_eq!(iter.size_hint(), (4, Some(4)));
iter.next();
assert_eq!(iter.size_hint(), (3, Some(3)));
}
#[test]
fn test_array_implementation() {
let values = [2usize, 5, 5, 9];
let seq = EliasFano::from(&values);
assert_eq!(seq.size(), 4);
assert_eq!(seq.get(0), Some(2));
assert_eq!(seq.get(1), Some(5));
assert_eq!(seq.get(2), Some(5));
assert_eq!(seq.get(3), Some(9));
}
#[test]
fn test_iter_lookup() {
let values = vec![2, 5, 5, 9];
let seq = EliasFano::from(&values);
let mut iter = seq.iter();
assert_eq!(iter.lookup(5), Some(1));
assert_eq!(iter.lookup(7), None);
assert_eq!(iter.index, 3);
iter.index = 0;
iter.next(); assert_eq!(iter.lookup(5), Some(1));
iter.next(); assert_eq!(iter.lookup(5), Some(2));
assert_eq!(iter.lookup(9), Some(3));
assert_eq!(iter.lookup(7), None);
iter.index = 0;
assert_eq!(iter.lookup(10), None);
assert_eq!(iter.index, 0);
}
#[test]
fn test_iter_lookup_next() {
let values = vec![2, 5, 5, 9];
let seq = EliasFano::from(&values);
let mut iter = seq.iter();
assert_eq!(iter.lookup_next(4), 1); assert_eq!(iter.lookup_next(6), 3); assert_eq!(iter.lookup_next(10), 3);
iter.next(); assert_eq!(iter.lookup_next(6), 3); assert_eq!(iter.lookup_next(8), 3);
iter.index = 0;
assert_eq!(iter.lookup_next(1), 0);
}
}