#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct EliasFano {
low_bits: Vec<u64>,
high_bits: Vec<u64>,
len: usize,
low_bit_width: u32,
universe: u64,
high_len_bits: usize,
}
impl EliasFano {
pub fn from_sorted(values: &[u32]) -> Self {
Self::from_sorted_u64(&values.iter().map(|&v| v as u64).collect::<Vec<_>>())
}
pub fn from_sorted_u64(values: &[u64]) -> Self {
if values.is_empty() {
return Self {
low_bits: Vec::new(),
high_bits: Vec::new(),
len: 0,
low_bit_width: 0,
universe: 0,
high_len_bits: 0,
};
}
let n = values.len();
let universe = values[n - 1] + 1;
let low_bit_width = if n >= universe as usize {
0
} else {
(64 - (universe / n as u64).leading_zeros()).saturating_sub(1)
};
let low_mask = if low_bit_width == 0 { 0u64 } else { (1u64 << low_bit_width) - 1 };
let total_low_bits = n as u64 * low_bit_width as u64;
let low_words = ((total_low_bits + 63) / 64) as usize;
let mut low_bits = vec![0u64; low_words];
for (i, &val) in values.iter().enumerate() {
if low_bit_width > 0 {
let low_val = val & low_mask;
let bit_pos = i as u64 * low_bit_width as u64;
let word_idx = (bit_pos / 64) as usize;
let bit_idx = (bit_pos % 64) as u32;
low_bits[word_idx] |= low_val << bit_idx;
if bit_idx + low_bit_width > 64 && word_idx + 1 < low_words {
low_bits[word_idx + 1] |= low_val >> (64 - bit_idx);
}
}
}
let max_high = values[n - 1] >> low_bit_width;
let high_len_bits = n + max_high as usize + 1;
let high_words = (high_len_bits + 63) / 64;
let mut high_bits = vec![0u64; high_words];
let mut pos = 0usize;
let mut prev_high = 0u64;
for &val in values {
let high = val >> low_bit_width;
pos += (high - prev_high) as usize;
let word_idx = pos / 64;
let bit_idx = pos % 64;
if word_idx < high_words {
high_bits[word_idx] |= 1u64 << bit_idx;
}
pos += 1;
prev_high = high;
}
Self {
low_bits,
high_bits,
len: n,
low_bit_width,
universe,
high_len_bits,
}
}
#[inline]
pub fn len(&self) -> usize { self.len }
#[inline]
pub fn is_empty(&self) -> bool { self.len == 0 }
#[inline]
pub fn size_bytes(&self) -> usize {
self.low_bits.len() * 8 + self.high_bits.len() * 8 + 32 }
#[inline]
pub fn bits_per_element(&self) -> f64 {
if self.len == 0 { return 0.0; }
(self.size_bytes() * 8) as f64 / self.len as f64
}
pub fn get(&self, index: usize) -> Option<u64> {
if index >= self.len { return None; }
let low = self.get_low(index);
let high_pos = self.select1(index)?;
let high_val = high_pos.checked_sub(index)? as u64;
Some((high_val << self.low_bit_width) | low)
}
pub fn next_geq(&self, target: u64) -> Option<(usize, u64)> {
if self.len == 0 || target >= self.universe { return None; }
let target_high = target >> self.low_bit_width;
let target_low = if self.low_bit_width > 0 {
target & ((1u64 << self.low_bit_width) - 1)
} else {
0
};
let bucket_start_pos = if target_high == 0 {
0
} else {
match self.select0(target_high as usize - 1) {
Some(p) => p + 1,
None => return None,
}
};
let elem_before = self.rank1(bucket_start_pos);
for idx in elem_before..self.len {
if let Some(val) = self.get(idx) {
if val >= target {
return Some((idx, val));
}
}
}
None
}
pub fn iter(&self) -> EliasFanoIter<'_> {
EliasFanoIter { ef: self, pos: 0, high_pos: 0 }
}
#[inline]
fn get_low(&self, index: usize) -> u64 {
if self.low_bit_width == 0 { return 0; }
let bit_pos = index as u64 * self.low_bit_width as u64;
let word_idx = (bit_pos / 64) as usize;
let bit_idx = (bit_pos % 64) as u32;
let mut val = self.low_bits[word_idx] >> bit_idx;
if bit_idx + self.low_bit_width > 64 && word_idx + 1 < self.low_bits.len() {
val |= self.low_bits[word_idx + 1] << (64 - bit_idx);
}
val & ((1u64 << self.low_bit_width) - 1)
}
fn select1(&self, rank: usize) -> Option<usize> {
let mut remaining = rank;
for (word_idx, &word) in self.high_bits.iter().enumerate() {
let ones = word.count_ones() as usize;
if remaining < ones {
let mut w = word;
for _ in 0..remaining {
w &= w - 1; }
let bit_pos = w.trailing_zeros() as usize;
return Some(word_idx * 64 + bit_pos);
}
remaining -= ones;
}
None
}
fn select0(&self, rank: usize) -> Option<usize> {
let mut remaining = rank;
for (word_idx, &word) in self.high_bits.iter().enumerate() {
let zeros = word.count_zeros() as usize;
let valid_bits = if (word_idx + 1) * 64 <= self.high_len_bits {
64
} else {
self.high_len_bits - word_idx * 64
};
let actual_zeros = valid_bits - word.count_ones() as usize;
if remaining < actual_zeros {
let inverted = !word;
let mut w = inverted;
for _ in 0..remaining {
w &= w - 1;
}
let bit_pos = w.trailing_zeros() as usize;
return Some(word_idx * 64 + bit_pos);
}
remaining -= actual_zeros;
}
None
}
fn rank1(&self, pos: usize) -> usize {
let full_words = pos / 64;
let remaining_bits = pos % 64;
let mut count = 0usize;
for i in 0..full_words {
count += self.high_bits[i].count_ones() as usize;
}
if remaining_bits > 0 && full_words < self.high_bits.len() {
let mask = (1u64 << remaining_bits) - 1;
count += (self.high_bits[full_words] & mask).count_ones() as usize;
}
count
}
}
pub struct EliasFanoIter<'a> {
ef: &'a EliasFano,
pos: usize,
high_pos: usize,
}
impl<'a> Iterator for EliasFanoIter<'a> {
type Item = u64;
fn next(&mut self) -> Option<Self::Item> {
if self.pos >= self.ef.len { return None; }
while self.high_pos < self.ef.high_len_bits {
let word_idx = self.high_pos / 64;
let bit_idx = self.high_pos % 64;
if word_idx >= self.ef.high_bits.len() { return None; }
if (self.ef.high_bits[word_idx] >> bit_idx) & 1 == 1 {
let high_val = (self.high_pos - self.pos) as u64;
let low = self.ef.get_low(self.pos);
let val = (high_val << self.ef.low_bit_width) | low;
self.pos += 1;
self.high_pos += 1;
return Some(val);
}
self.high_pos += 1; }
None
}
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.ef.len - self.pos;
(remaining, Some(remaining))
}
}
impl<'a> ExactSizeIterator for EliasFanoIter<'a> {}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty() {
let ef = EliasFano::from_sorted(&[]);
assert_eq!(ef.len(), 0);
assert!(ef.is_empty());
assert_eq!(ef.get(0), None);
assert_eq!(ef.next_geq(0), None);
}
#[test]
fn test_single() {
let ef = EliasFano::from_sorted(&[42]);
assert_eq!(ef.len(), 1);
assert_eq!(ef.get(0), Some(42));
assert_eq!(ef.next_geq(0), Some((0, 42)));
assert_eq!(ef.next_geq(42), Some((0, 42)));
assert_eq!(ef.next_geq(43), None);
}
#[test]
fn test_basic() {
let docs = vec![3, 5, 11, 27, 31, 42, 58, 63];
let ef = EliasFano::from_sorted(&docs);
assert_eq!(ef.len(), 8);
for (i, &v) in docs.iter().enumerate() {
assert_eq!(ef.get(i), Some(v as u64), "get({}) failed", i);
}
assert_eq!(ef.get(8), None);
}
#[test]
fn test_next_geq() {
let docs = vec![3, 5, 11, 27, 31, 42, 58, 63];
let ef = EliasFano::from_sorted(&docs);
assert_eq!(ef.next_geq(3), Some((0, 3)));
assert_eq!(ef.next_geq(42), Some((5, 42)));
assert_eq!(ef.next_geq(63), Some((7, 63)));
assert_eq!(ef.next_geq(0), Some((0, 3)));
assert_eq!(ef.next_geq(4), Some((1, 5)));
assert_eq!(ef.next_geq(10), Some((2, 11)));
assert_eq!(ef.next_geq(28), Some((4, 31)));
assert_eq!(ef.next_geq(59), Some((7, 63)));
assert_eq!(ef.next_geq(64), None);
assert_eq!(ef.next_geq(1000), None);
}
#[test]
fn test_iterator() {
let docs = vec![3, 5, 11, 27, 31, 42, 58, 63];
let ef = EliasFano::from_sorted(&docs);
let collected: Vec<u64> = ef.iter().collect();
let expected: Vec<u64> = docs.iter().map(|&v| v as u64).collect();
assert_eq!(collected, expected);
}
#[test]
fn test_consecutive() {
let docs: Vec<u32> = (0..100).collect();
let ef = EliasFano::from_sorted(&docs);
assert_eq!(ef.len(), 100);
for i in 0..100 {
assert_eq!(ef.get(i), Some(i as u64));
}
assert_eq!(ef.next_geq(50), Some((50, 50)));
}
#[test]
fn test_sparse() {
let docs: Vec<u32> = (0..100).map(|i| i * 1000).collect();
let ef = EliasFano::from_sorted(&docs);
assert_eq!(ef.len(), 100);
for (i, &v) in docs.iter().enumerate() {
assert_eq!(ef.get(i), Some(v as u64), "get({}) failed", i);
}
assert_eq!(ef.next_geq(500), Some((1, 1000)));
assert_eq!(ef.next_geq(1000), Some((1, 1000)));
assert_eq!(ef.next_geq(1001), Some((2, 2000)));
}
#[test]
fn test_large_posting_list() {
let docs: Vec<u32> = (0..10000).map(|i| i * 100 + i % 7).collect();
let ef = EliasFano::from_sorted(&docs);
assert_eq!(ef.len(), 10000);
for (i, &v) in docs.iter().enumerate() {
assert_eq!(ef.get(i), Some(v as u64), "get({}) = {:?}, expected {}", i, ef.get(i), v);
}
let from_iter: Vec<u64> = ef.iter().collect();
assert_eq!(from_iter.len(), 10000);
for (i, &v) in docs.iter().enumerate() {
assert_eq!(from_iter[i], v as u64);
}
let bits_per_elem = ef.bits_per_element();
assert!(bits_per_elem < 32.0, "Should be much less than 32 bits/elem, got {:.1}", bits_per_elem);
}
#[test]
fn test_next_geq_scan() {
let docs: Vec<u32> = (0..1000).map(|i| i * 10).collect();
let ef = EliasFano::from_sorted(&docs);
let mut target = 0u64;
let mut found = Vec::new();
while let Some((_, val)) = ef.next_geq(target) {
if val % 30 == 0 {
found.push(val as u32);
}
target = val + 1;
}
let expected: Vec<u32> = (0..1000).map(|i| i * 10).filter(|v| v % 30 == 0).collect();
assert_eq!(found, expected);
}
#[test]
fn test_space_efficiency() {
let docs: Vec<u32> = (0..10000).map(|i| i * 100).collect();
let ef = EliasFano::from_sorted(&docs);
let bpe = ef.bits_per_element();
eprintln!("Elias-Fano: {} elements, {:.1} bits/elem, {} bytes total",
ef.len(), bpe, ef.size_bytes());
assert!(bpe < 20.0, "Too many bits per element: {:.1}", bpe);
}
#[test]
fn test_max_values() {
let docs = vec![0, u32::MAX / 2, u32::MAX];
let ef = EliasFano::from_sorted(&docs);
assert_eq!(ef.get(0), Some(0));
assert_eq!(ef.get(1), Some(u32::MAX as u64 / 2));
assert_eq!(ef.get(2), Some(u32::MAX as u64));
}
#[test]
fn test_performance_next_geq() {
let docs: Vec<u32> = (0..100000).map(|i| i * 10).collect();
let ef = EliasFano::from_sorted(&docs);
let targets: Vec<u64> = (0..10000).map(|i| (i * 100) as u64).collect();
let start = std::time::Instant::now();
let mut found = 0usize;
for _ in 0..100 {
for &t in &targets {
if ef.next_geq(t).is_some() { found += 1; }
}
}
let elapsed = start.elapsed();
#[cfg(not(debug_assertions))]
{
let per_call = elapsed / (100 * targets.len() as u32);
eprintln!("Elias-Fano next_geq: {:?}/call, {} found", per_call, found);
}
}
}