use crate::bitvec::BitVector;
use crate::error::{Error, Result};
use alloc::format;
use alloc::vec;
use alloc::vec::Vec;
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct EliasFano {
universe_size: u64,
upper_bits: BitVector,
lower_bits: Vec<u64>,
l: usize,
n: usize,
}
impl EliasFano {
pub fn new(values: &[u64], universe_size: u64) -> Self {
for i in 0..values.len() {
assert!(
values[i] < universe_size,
"EliasFano: value {} at index {} >= universe_size {}",
values[i],
i,
universe_size
);
if i > 0 {
assert!(
values[i] >= values[i - 1],
"EliasFano: values not sorted at index {} ({} < {})",
i,
values[i],
values[i - 1]
);
}
}
let n = values.len();
if n == 0 {
return Self {
universe_size,
upper_bits: BitVector::new(&[], 0),
lower_bits: Vec::new(),
l: 0,
n: 0,
};
}
let ratio = universe_size / n as u64;
let l = if ratio > 0 {
(63 - ratio.leading_zeros()) as usize
} else {
0
};
let mut lower_bits = Vec::with_capacity(n.saturating_mul(l).div_ceil(64));
let mut current_word = 0u64;
let mut bit_offset = 0;
for &v in values {
let low = v & ((1u64 << l) - 1);
if bit_offset + l <= 64 {
current_word |= low << bit_offset;
bit_offset += l;
if bit_offset == 64 {
lower_bits.push(current_word);
current_word = 0;
bit_offset = 0;
}
} else {
let bits_in_this = 64 - bit_offset;
current_word |= (low & ((1 << bits_in_this) - 1)) << bit_offset;
lower_bits.push(current_word);
current_word = low >> bits_in_this;
bit_offset = l - bits_in_this;
}
}
if bit_offset > 0 {
lower_bits.push(current_word);
}
let num_upper_vals = (universe_size >> l) as usize + 1;
let upper_bv_len = n + num_upper_vals;
let mut upper_data = vec![0u64; upper_bv_len.div_ceil(64)];
for (i, &v) in values.iter().enumerate() {
let high = (v >> l) as usize;
let pos = high + i;
upper_data[pos / 64] |= 1 << (pos % 64);
}
Self {
universe_size,
upper_bits: BitVector::new(&upper_data, upper_bv_len),
lower_bits,
l,
n,
}
}
pub fn universe_size(&self) -> u64 {
self.universe_size
}
pub fn len(&self) -> usize {
self.n
}
pub fn is_empty(&self) -> bool {
self.n == 0
}
pub fn get(&self, i: usize) -> Result<u64> {
if i >= self.n {
return Err(Error::IndexOutOfBounds(i));
}
let pos = self
.upper_bits
.select1(i)
.ok_or(Error::InvalidSelection(i))?;
let high = (pos - i) as u64;
let low: u64 = if self.l == 0 {
0
} else {
let start_bit = i * self.l;
let word_idx = start_bit / 64;
let bit_offset = start_bit % 64;
let mut low = self.lower_bits[word_idx] >> bit_offset;
if bit_offset + self.l > 64 {
let bits_from_next = bit_offset + self.l - 64;
low |= (self.lower_bits[word_idx + 1] & ((1 << bits_from_next) - 1))
<< (self.l - bits_from_next);
}
low &= (1 << self.l) - 1;
low
};
Ok((high << self.l) | low)
}
pub fn successor(&self, target: u64) -> Option<u64> {
if self.n == 0 {
return None;
}
let high = (target >> self.l) as usize;
let (bucket_start, bucket_end) = self.bucket_range(high);
if bucket_start < bucket_end {
let mut lo = bucket_start;
let mut hi = bucket_end;
while lo < hi {
let mid = lo + (hi - lo) / 2;
if self.get(mid).unwrap() < target {
lo = mid + 1;
} else {
hi = mid;
}
}
if lo < bucket_end {
return Some(self.get(lo).unwrap());
}
}
let next_start = bucket_end;
if next_start < self.n {
Some(self.get(next_start).unwrap())
} else {
None
}
}
pub fn predecessor(&self, target: u64) -> Option<u64> {
if self.n == 0 {
return None;
}
let high = (target >> self.l) as usize;
let (bucket_start, bucket_end) = self.bucket_range(high);
if bucket_start < bucket_end {
let mut lo = bucket_start;
let mut hi = bucket_end;
while lo < hi {
let mid = lo + (hi - lo) / 2;
if self.get(mid).unwrap() <= target {
lo = mid + 1;
} else {
hi = mid;
}
}
if lo > bucket_start {
return Some(self.get(lo - 1).unwrap());
}
}
if bucket_start > 0 {
Some(self.get(bucket_start - 1).unwrap())
} else {
None
}
}
fn bucket_range(&self, high: usize) -> (usize, usize) {
let start = if high == 0 {
0
} else {
match self.upper_bits.select0(high - 1) {
Some(pos) => self.upper_bits.rank1(pos + 1),
None => self.n,
}
};
let end = match self.upper_bits.select0(high) {
Some(pos) => self.upper_bits.rank1(pos),
None => self.n,
};
(start, end)
}
pub fn next_geq(&self, target: u64) -> Option<u64> {
self.successor(target)
}
pub fn rank(&self, target: u64) -> usize {
if self.n == 0 {
return 0;
}
let high = (target >> self.l) as usize;
let (bucket_start, bucket_end) = self.bucket_range(high);
if bucket_start >= bucket_end {
return bucket_start;
}
let mut lo = bucket_start;
let mut hi = bucket_end;
while lo < hi {
let mid = lo + (hi - lo) / 2;
if self.get(mid).unwrap() < target {
lo = mid + 1;
} else {
hi = mid;
}
}
lo
}
pub fn contains(&self, target: u64) -> bool {
self.successor(target) == Some(target)
}
pub fn iter(&self) -> EliasFanoIter<'_> {
EliasFanoIter { ef: self, idx: 0 }
}
pub fn heap_bytes(&self) -> usize {
self.upper_bits.heap_bytes() + self.lower_bits.len() * 8
}
pub fn to_bytes(&self) -> Vec<u8> {
let mut out = Vec::new();
out.extend_from_slice(b"SBITEF02");
out.extend_from_slice(&self.universe_size.to_le_bytes());
out.extend_from_slice(&(self.l as u32).to_le_bytes());
out.extend_from_slice(&(self.n as u64).to_le_bytes());
out.extend_from_slice(&(self.lower_bits.len() as u64).to_le_bytes());
for &w in &self.lower_bits {
out.extend_from_slice(&w.to_le_bytes());
}
let upper = self.upper_bits.to_bytes();
out.extend_from_slice(&(upper.len() as u64).to_le_bytes());
out.extend_from_slice(&upper);
out
}
pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
use crate::error::ByteReader;
let mut r = ByteReader::new(bytes);
r.read_magic(b"SBITEF02", "EliasFano")?;
let universe_size = r.read_u64()?;
let l = r.read_u32()? as usize;
if l > 63 {
return Err(Error::InvalidEncoding(format!(
"EliasFano l={l} exceeds maximum (63) for u64 values"
)));
}
let n = r.read_u64()? as usize;
let lower_len = r.read_u64()? as usize;
let lower_bits = r.read_u64_vec(lower_len)?;
let upper_len = r.read_u64()? as usize;
let upper_bytes = r.take(upper_len)?;
let upper_bits = BitVector::from_bytes(upper_bytes)?;
r.expect_eof("EliasFano")?;
Ok(Self {
universe_size,
upper_bits,
lower_bits,
l,
n,
})
}
}
pub struct EliasFanoIter<'a> {
ef: &'a EliasFano,
idx: usize,
}
impl Iterator for EliasFanoIter<'_> {
type Item = u64;
fn next(&mut self) -> Option<u64> {
if self.idx >= self.ef.n {
return None;
}
let val = self.ef.get(self.idx).unwrap();
self.idx += 1;
Some(val)
}
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.ef.n - self.idx;
(remaining, Some(remaining))
}
}
impl ExactSizeIterator for EliasFanoIter<'_> {}
impl<'a> IntoIterator for &'a EliasFano {
type Item = u64;
type IntoIter = EliasFanoIter<'a>;
fn into_iter(self) -> EliasFanoIter<'a> {
self.iter()
}
}
#[cfg(test)]
mod tests {
use super::*;
use alloc::vec;
#[test]
fn test_elias_fano_basic() {
let values = vec![10u64, 20, 30, 100, 1000];
let ef = EliasFano::new(&values, 2000);
assert_eq!(ef.len(), 5);
assert_eq!(ef.get(0).unwrap(), 10);
assert_eq!(ef.get(1).unwrap(), 20);
assert_eq!(ef.get(2).unwrap(), 30);
assert_eq!(ef.get(3).unwrap(), 100);
assert_eq!(ef.get(4).unwrap(), 1000);
}
#[test]
fn test_elias_fano_l_equals_zero() {
let values = vec![0u64, 1, 2, 3];
let ef = EliasFano::new(&values, 4);
assert_eq!(ef.len(), 4);
for (i, &v) in values.iter().enumerate() {
assert_eq!(ef.get(i).unwrap(), v, "mismatch at index {i}");
}
}
#[test]
fn test_elias_fano_serialization_roundtrip() {
let values = vec![10u64, 20, 30, 100, 1000];
let ef = EliasFano::new(&values, 2000);
let bytes = ef.to_bytes();
let ef2 = EliasFano::from_bytes(&bytes).unwrap();
assert_eq!(ef2.len(), values.len());
for (i, &v) in values.iter().enumerate() {
assert_eq!(ef2.get(i).unwrap(), v);
}
}
#[test]
fn test_elias_fano_l0_serialization_roundtrip() {
let values = vec![0u64, 1, 2, 3];
let ef = EliasFano::new(&values, 4);
let bytes = ef.to_bytes();
let ef2 = EliasFano::from_bytes(&bytes).unwrap();
for (i, &v) in values.iter().enumerate() {
assert_eq!(ef2.get(i).unwrap(), v);
}
}
#[test]
fn test_elias_fano_rejects_bad_l() {
let ef = EliasFano::new(&[10u64], 100);
let mut bytes = ef.to_bytes();
bytes[16] = 64;
bytes[17] = 0;
bytes[18] = 0;
bytes[19] = 0;
assert!(EliasFano::from_bytes(&bytes).is_err());
}
#[test]
fn test_elias_fano_empty() {
let ef = EliasFano::new(&[], 100);
assert!(ef.is_empty());
assert!(ef.get(0).is_err());
let bytes = ef.to_bytes();
let ef2 = EliasFano::from_bytes(&bytes).unwrap();
assert!(ef2.is_empty());
}
}