use super::super::BitVector;
use super::rank_select::SuccinctBitVector;
#[derive(Debug, Clone)]
pub struct EliasFano {
n: usize,
universe: u64,
lower_bits: usize,
lower: BitVector,
upper: SuccinctBitVector,
}
impl EliasFano {
#[must_use]
pub fn new(values: &[u64]) -> Self {
if values.is_empty() {
return Self {
n: 0,
universe: 0,
lower_bits: 0,
lower: BitVector::new(),
upper: SuccinctBitVector::default(),
};
}
for i in 1..values.len() {
assert!(
values[i] > values[i - 1],
"Values must be strictly increasing: values[{}]={} <= values[{}]={}",
i,
values[i],
i - 1,
values[i - 1]
);
}
let n = values.len();
let universe = values[n - 1] + 1;
let lower_bits = if universe <= n as u64 {
0
} else {
(64 - (universe / n as u64).leading_zeros()) as usize
};
let lower_mask = if lower_bits == 0 {
0
} else if lower_bits >= 64 {
u64::MAX
} else {
(1u64 << lower_bits) - 1
};
let mut lower = BitVector::with_capacity(n * lower_bits);
for &val in values {
let low = val & lower_mask;
for bit_idx in 0..lower_bits {
lower.push((low >> bit_idx) & 1 == 1);
}
}
let max_high = values[n - 1] >> lower_bits;
#[allow(clippy::cast_possible_truncation)]
let upper_len = n + max_high as usize;
let mut upper_bits = BitVector::zeros(upper_len);
for (i, &val) in values.iter().enumerate() {
let high = val >> lower_bits;
#[allow(clippy::cast_possible_truncation)]
let pos = high as usize + i;
if pos < upper_len {
upper_bits.set(pos, true);
}
}
let upper = SuccinctBitVector::from_bitvec(upper_bits);
Self {
n,
universe,
lower_bits,
lower,
upper,
}
}
#[must_use]
pub fn len(&self) -> usize {
self.n
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.n == 0
}
#[must_use]
pub fn universe(&self) -> u64 {
self.universe
}
#[must_use]
pub fn get(&self, index: usize) -> u64 {
assert!(
index < self.n,
"Index {} out of bounds (len={})",
index,
self.n
);
let upper_pos = self.upper.select1(index).expect("index within bounds");
let high = (upper_pos - index) as u64;
let low = self.get_lower(index);
(high << self.lower_bits) | low
}
fn get_lower(&self, index: usize) -> u64 {
if self.lower_bits == 0 {
return 0;
}
let bit_start = index * self.lower_bits;
let mut low = 0u64;
for bit_idx in 0..self.lower_bits {
if self.lower.get(bit_start + bit_idx) == Some(true) {
low |= 1 << bit_idx;
}
}
low
}
#[must_use]
pub fn contains(&self, value: u64) -> bool {
if self.is_empty() || value >= self.universe {
return false;
}
match self.predecessor(value) {
Some(idx) => self.get(idx) == value,
None => false,
}
}
#[must_use]
pub fn predecessor(&self, value: u64) -> Option<usize> {
if self.is_empty() {
return None;
}
let mut lo = 0;
let mut hi = self.n;
while lo < hi {
let mid = lo + (hi - lo) / 2;
if self.get(mid) <= value {
lo = mid + 1;
} else {
hi = mid;
}
}
if lo > 0 {
Some(lo - 1)
} else if self.get(0) <= value {
Some(0)
} else {
None
}
}
#[must_use]
pub fn successor(&self, value: u64) -> Option<usize> {
if self.is_empty() {
return None;
}
let mut lo = 0;
let mut hi = self.n;
while lo < hi {
let mid = lo + (hi - lo) / 2;
if self.get(mid) < value {
lo = mid + 1;
} else {
hi = mid;
}
}
if lo < self.n { Some(lo) } else { None }
}
pub fn iter(&self) -> impl Iterator<Item = u64> + '_ {
(0..self.n).map(move |i| self.get(i))
}
#[must_use]
pub fn size_bytes(&self) -> usize {
let base = std::mem::size_of::<Self>();
let lower_bytes = self.lower.data().len() * 8;
let upper_bytes = self.upper.size_bytes();
base + lower_bytes + upper_bytes
}
#[must_use]
pub fn compression_ratio(&self) -> f64 {
if self.n == 0 {
return 1.0;
}
let original_bytes = self.n * 8; let compressed_bytes = self.size_bytes();
if compressed_bytes == 0 {
return 1.0;
}
original_bytes as f64 / compressed_bytes as f64
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty() {
let ef = EliasFano::new(&[]);
assert!(ef.is_empty());
assert_eq!(ef.len(), 0);
assert!(!ef.contains(0));
assert_eq!(ef.predecessor(100), None);
}
#[test]
fn test_single() {
let ef = EliasFano::new(&[42]);
assert_eq!(ef.len(), 1);
assert_eq!(ef.get(0), 42);
assert!(ef.contains(42));
assert!(!ef.contains(0));
assert!(!ef.contains(100));
}
#[test]
fn test_small() {
let values = vec![2, 3, 5, 7, 11, 13, 17, 19, 23];
let ef = EliasFano::new(&values);
assert_eq!(ef.len(), values.len());
for (i, &v) in values.iter().enumerate() {
assert_eq!(ef.get(i), v, "get({}) failed", i);
assert!(ef.contains(v), "contains({}) failed", v);
}
assert!(!ef.contains(0));
assert!(!ef.contains(4));
assert!(!ef.contains(100));
}
#[test]
fn test_sparse() {
let values: Vec<u64> = vec![100, 1_000, 10_000, 100_000, 1_000_000];
let ef = EliasFano::new(&values);
for (i, &v) in values.iter().enumerate() {
assert_eq!(ef.get(i), v);
assert!(ef.contains(v));
}
assert_eq!(ef.len(), 5);
}
#[test]
fn test_dense() {
let values: Vec<u64> = (0..100).collect();
let ef = EliasFano::new(&values);
for (i, &v) in values.iter().enumerate() {
assert_eq!(ef.get(i), v);
}
}
#[test]
fn test_predecessor() {
let values = vec![10, 20, 30, 40, 50];
let ef = EliasFano::new(&values);
assert_eq!(ef.predecessor(5), None);
assert_eq!(ef.predecessor(10), Some(0));
assert_eq!(ef.predecessor(15), Some(0));
assert_eq!(ef.predecessor(20), Some(1));
assert_eq!(ef.predecessor(25), Some(1));
assert_eq!(ef.predecessor(50), Some(4));
assert_eq!(ef.predecessor(100), Some(4));
}
#[test]
fn test_successor() {
let values = vec![10, 20, 30, 40, 50];
let ef = EliasFano::new(&values);
assert_eq!(ef.successor(5), Some(0));
assert_eq!(ef.successor(10), Some(0));
assert_eq!(ef.successor(15), Some(1));
assert_eq!(ef.successor(20), Some(1));
assert_eq!(ef.successor(50), Some(4));
assert_eq!(ef.successor(51), None);
}
#[test]
fn test_iter() {
let values = vec![1, 2, 4, 8, 16, 32];
let ef = EliasFano::new(&values);
let collected: Vec<u64> = ef.iter().collect();
assert_eq!(collected, values);
}
#[test]
fn test_large_universe() {
let values: Vec<u64> = vec![
1_000_000,
10_000_000,
100_000_000,
1_000_000_000,
10_000_000_000,
];
let ef = EliasFano::new(&values);
for (i, &v) in values.iter().enumerate() {
assert_eq!(ef.get(i), v, "Failed at index {}", i);
}
}
#[test]
#[should_panic(expected = "strictly increasing")]
fn test_unsorted_panics() {
let _ = EliasFano::new(&[5, 3, 7]);
}
#[test]
#[should_panic(expected = "strictly increasing")]
fn test_duplicate_panics() {
let _ = EliasFano::new(&[1, 2, 2, 3]);
}
#[test]
fn test_compression_large() {
let values: Vec<u64> = (0..1000).map(|i| i * 10_000).collect();
let ef = EliasFano::new(&values);
assert_eq!(ef.len(), 1000);
assert_eq!(ef.get(0), 0);
assert_eq!(ef.get(999), 9_990_000);
let size = ef.size_bytes();
assert!(size < 10000, "Size {} bytes is too large", size);
}
}