#[cfg(not(test))]
use alloc::vec;
#[cfg(not(test))]
use alloc::vec::Vec;
use crate::util::broadword::select_in_word;
const SELECT_SAMPLE_RATE: usize = 256;
#[derive(Clone, Debug)]
pub struct EliasFano {
low_bits: Vec<u64>,
low_width: usize,
high_bits: Vec<u64>,
len: usize,
universe: u64,
select_samples: Vec<u32>,
}
#[derive(Clone, Debug)]
pub struct EliasFanoCursor<'a> {
ef: &'a EliasFano,
idx: usize,
high_pos: usize,
word_idx: usize,
remaining_bits: u64,
}
impl EliasFano {
pub fn build(values: &[u32]) -> Self {
if values.is_empty() {
return Self {
low_bits: Vec::new(),
low_width: 0,
high_bits: Vec::new(),
len: 0,
universe: 0,
select_samples: Vec::new(),
};
}
debug_assert!(
values.windows(2).all(|w| w[0] <= w[1]),
"EliasFano::build requires monotonically increasing values"
);
let n = values.len();
let max_value = *values.last().unwrap() as u64;
let universe = max_value + 1;
let low_width = if n == 0 || universe <= n as u64 {
0
} else {
(64 - (universe / n as u64).leading_zeros() as usize).saturating_sub(1)
};
let low_mask = if low_width == 0 {
0
} else {
(1u64 << low_width) - 1
};
let low_bits_total = n * low_width;
let low_words = low_bits_total.div_ceil(64);
let mut low_bits = vec![0u64; low_words];
let high_bits_len = n + (max_value >> low_width) as usize + 1;
let high_words = high_bits_len.div_ceil(64);
let mut high_bits = vec![0u64; high_words];
for (i, &value) in values.iter().enumerate() {
let v = value as u64;
if low_width > 0 {
let low_value = v & low_mask;
let bit_pos = i * low_width;
let word_idx = bit_pos / 64;
let bit_offset = bit_pos % 64;
low_bits[word_idx] |= low_value << bit_offset;
if bit_offset + low_width > 64 && word_idx + 1 < low_bits.len() {
low_bits[word_idx + 1] |= low_value >> (64 - bit_offset);
}
}
let high_value = v >> low_width;
let high_pos = high_value as usize + i;
let word_idx = high_pos / 64;
let bit_offset = high_pos % 64;
high_bits[word_idx] |= 1u64 << bit_offset;
}
let num_samples = n.div_ceil(SELECT_SAMPLE_RATE);
let mut select_samples = Vec::with_capacity(num_samples);
let mut ones_seen = 0usize;
let mut sample_target = 0usize;
'outer: for (word_idx, &word) in high_bits.iter().enumerate() {
if word == 0 {
continue;
}
let word_ones = word.count_ones() as usize;
while sample_target < ones_seen + word_ones {
let local_rank = sample_target - ones_seen;
let bit_pos = select_in_word(word, local_rank as u32) as usize;
let global_pos = word_idx * 64 + bit_pos;
select_samples.push(global_pos as u32);
sample_target += SELECT_SAMPLE_RATE;
if select_samples.len() >= num_samples {
break 'outer;
}
}
ones_seen += word_ones;
}
Self {
low_bits,
low_width,
high_bits,
len: n,
universe,
select_samples,
}
}
#[inline]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
#[inline]
pub fn universe(&self) -> u64 {
self.universe
}
#[inline]
pub fn get(&self, i: usize) -> Option<u32> {
if i >= self.len {
return None;
}
let high_pos = self.select1(i);
let high_value = high_pos - i;
let low_value = self.read_low_bits(i);
let value = ((high_value as u64) << self.low_width) | low_value;
Some(value as u32)
}
#[inline]
pub fn cursor(&self) -> EliasFanoCursor<'_> {
if self.is_empty() {
return EliasFanoCursor {
ef: self,
idx: 0,
high_pos: 0,
word_idx: 0,
remaining_bits: 0,
};
}
let mut word_idx = 0;
while word_idx < self.high_bits.len() && self.high_bits[word_idx] == 0 {
word_idx += 1;
}
let word = if word_idx < self.high_bits.len() {
self.high_bits[word_idx]
} else {
0
};
let bit_pos = word.trailing_zeros() as usize;
let high_pos = word_idx * 64 + bit_pos;
EliasFanoCursor {
ef: self,
idx: 0,
high_pos,
word_idx,
remaining_bits: word,
}
}
#[inline]
pub fn cursor_from(&self, idx: usize) -> EliasFanoCursor<'_> {
if idx >= self.len {
return EliasFanoCursor {
ef: self,
idx: self.len,
high_pos: 0,
word_idx: 0,
remaining_bits: 0,
};
}
if idx == 0 {
return self.cursor();
}
let high_pos = self.select1(idx);
let word_idx = high_pos / 64;
let bit_offset = high_pos % 64;
let word = self.high_bits[word_idx];
let remaining_bits = word & !((1u64 << bit_offset) - 1);
EliasFanoCursor {
ef: self,
idx,
high_pos,
word_idx,
remaining_bits,
}
}
fn select1(&self, k: usize) -> usize {
debug_assert!(k < self.len, "select1 out of bounds");
let sample_idx = k / SELECT_SAMPLE_RATE;
let (start_word, mut remaining) = if sample_idx < self.select_samples.len() {
let sample_pos = self.select_samples[sample_idx] as usize;
let skip_ones = sample_idx * SELECT_SAMPLE_RATE;
(sample_pos / 64, k - skip_ones)
} else {
(0, k)
};
for word_idx in start_word..self.high_bits.len() {
let word = if word_idx == start_word && sample_idx < self.select_samples.len() {
let sample_pos = self.select_samples[sample_idx] as usize;
let bit_offset = sample_pos % 64;
self.high_bits[word_idx] & !((1u64 << bit_offset) - 1)
} else {
self.high_bits[word_idx]
};
let ones = word.count_ones() as usize;
if ones > remaining {
let bit_pos = select_in_word(word, remaining as u32) as usize;
return word_idx * 64 + bit_pos;
}
remaining -= ones;
}
unreachable!("select1: not enough 1-bits");
}
#[inline]
fn read_low_bits(&self, i: usize) -> u64 {
if self.low_width == 0 {
return 0;
}
let bit_pos = i * self.low_width;
let word_idx = bit_pos / 64;
let bit_offset = bit_pos % 64;
let low_mask = (1u64 << self.low_width) - 1;
let mut value = (self.low_bits[word_idx] >> bit_offset) & low_mask;
if bit_offset + self.low_width > 64 && word_idx + 1 < self.low_bits.len() {
let overflow_bits = bit_offset + self.low_width - 64;
let overflow_mask = (1u64 << overflow_bits) - 1;
value |=
(self.low_bits[word_idx + 1] & overflow_mask) << (self.low_width - overflow_bits);
}
value
}
pub fn heap_size(&self) -> usize {
self.low_bits.len() * 8 + self.high_bits.len() * 8 + self.select_samples.len() * 4
}
}
impl<'a> EliasFanoCursor<'a> {
#[inline]
pub fn current(&self) -> Option<u32> {
if self.idx >= self.ef.len {
return None;
}
let high_value = self.high_pos - self.idx;
let low_value = self.ef.read_low_bits(self.idx);
let value = ((high_value as u64) << self.ef.low_width) | low_value;
Some(value as u32)
}
#[inline]
pub fn index(&self) -> usize {
self.idx
}
#[inline]
pub fn is_exhausted(&self) -> bool {
self.idx >= self.ef.len
}
#[inline]
pub fn advance_one(&mut self) -> Option<u32> {
if self.idx + 1 >= self.ef.len {
self.idx = self.ef.len;
return None;
}
self.remaining_bits &= self.remaining_bits.wrapping_sub(1);
if self.remaining_bits != 0 {
let bit_pos = self.remaining_bits.trailing_zeros() as usize;
self.high_pos = self.word_idx * 64 + bit_pos;
} else {
self.word_idx += 1;
while self.word_idx < self.ef.high_bits.len() && self.ef.high_bits[self.word_idx] == 0 {
self.word_idx += 1;
}
if self.word_idx >= self.ef.high_bits.len() {
self.idx = self.ef.len;
return None;
}
self.remaining_bits = self.ef.high_bits[self.word_idx];
let bit_pos = self.remaining_bits.trailing_zeros() as usize;
self.high_pos = self.word_idx * 64 + bit_pos;
}
self.idx += 1;
self.current()
}
#[inline]
pub fn advance_by(&mut self, k: usize) -> Option<u32> {
if k == 0 {
return self.current();
}
if k == 1 {
return self.advance_one();
}
let target_idx = self.idx + k;
if target_idx >= self.ef.len {
self.idx = self.ef.len;
return None;
}
if k > 64 {
return self.seek(target_idx);
}
let mut remaining = k;
let word_after_current = self.remaining_bits & self.remaining_bits.wrapping_sub(1);
let ones_in_current = word_after_current.count_ones() as usize;
if ones_in_current >= remaining {
self.remaining_bits = word_after_current;
for _ in 0..remaining - 1 {
self.remaining_bits &= self.remaining_bits.wrapping_sub(1);
}
let bit_pos = self.remaining_bits.trailing_zeros() as usize;
self.high_pos = self.word_idx * 64 + bit_pos;
self.idx = target_idx;
return self.current();
}
remaining -= ones_in_current;
self.word_idx += 1;
while self.word_idx < self.ef.high_bits.len() {
let word = self.ef.high_bits[self.word_idx];
let ones = word.count_ones() as usize;
if ones >= remaining {
self.remaining_bits = word;
for _ in 0..remaining - 1 {
self.remaining_bits &= self.remaining_bits.wrapping_sub(1);
}
let bit_pos = self.remaining_bits.trailing_zeros() as usize;
self.high_pos = self.word_idx * 64 + bit_pos;
self.idx = target_idx;
return self.current();
}
remaining -= ones;
self.word_idx += 1;
}
self.idx = self.ef.len;
None
}
#[inline]
pub fn seek(&mut self, idx: usize) -> Option<u32> {
if idx >= self.ef.len {
self.idx = self.ef.len;
return None;
}
let high_pos = self.ef.select1(idx);
let word_idx = high_pos / 64;
let bit_offset = high_pos % 64;
self.idx = idx;
self.high_pos = high_pos;
self.word_idx = word_idx;
self.remaining_bits = self.ef.high_bits[word_idx] & !((1u64 << bit_offset) - 1);
self.current()
}
}
impl<'a> IntoIterator for &'a EliasFano {
type Item = u32;
type IntoIter = EliasFanoIter<'a>;
fn into_iter(self) -> Self::IntoIter {
EliasFanoIter {
cursor: self.cursor(),
started: false,
}
}
}
pub struct EliasFanoIter<'a> {
cursor: EliasFanoCursor<'a>,
started: bool,
}
impl<'a> Iterator for EliasFanoIter<'a> {
type Item = u32;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if !self.started {
self.started = true;
self.cursor.current()
} else {
self.cursor.advance_one()
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.cursor.ef.len - self.cursor.idx;
(remaining, Some(remaining))
}
}
impl<'a> ExactSizeIterator for EliasFanoIter<'a> {}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty() {
let ef = EliasFano::build(&[]);
assert!(ef.is_empty());
assert_eq!(ef.len(), 0);
assert_eq!(ef.get(0), None);
let cursor = ef.cursor();
assert!(cursor.is_exhausted());
assert_eq!(cursor.current(), None);
}
#[test]
fn test_single_element() {
let ef = EliasFano::build(&[42]);
assert_eq!(ef.len(), 1);
assert_eq!(ef.get(0), Some(42));
assert_eq!(ef.get(1), None);
let mut cursor = ef.cursor();
assert_eq!(cursor.current(), Some(42));
assert_eq!(cursor.advance_one(), None);
assert!(cursor.is_exhausted());
}
#[test]
fn test_sequential_values() {
let values: Vec<u32> = (0..100).collect();
let ef = EliasFano::build(&values);
assert_eq!(ef.len(), 100);
for (i, &v) in values.iter().enumerate() {
assert_eq!(ef.get(i), Some(v), "get({}) failed", i);
}
let mut cursor = ef.cursor();
for &expected in &values {
assert_eq!(cursor.current(), Some(expected));
cursor.advance_one();
}
assert!(cursor.is_exhausted());
}
#[test]
fn test_sparse_values() {
let values = vec![0, 15, 23, 45, 52, 78, 120, 256, 1000, 5000];
let ef = EliasFano::build(&values);
assert_eq!(ef.len(), values.len());
for (i, &v) in values.iter().enumerate() {
assert_eq!(ef.get(i), Some(v), "get({}) failed", i);
}
let collected: Vec<u32> = ef.into_iter().collect();
assert_eq!(collected, values);
}
#[test]
fn test_advance_by() {
let values: Vec<u32> = (0..1000).map(|i| i * 10).collect();
let ef = EliasFano::build(&values);
let mut cursor = ef.cursor();
assert_eq!(cursor.current(), Some(0));
assert_eq!(cursor.advance_by(5), Some(50));
assert_eq!(cursor.index(), 5);
assert_eq!(cursor.advance_by(10), Some(150));
assert_eq!(cursor.index(), 15);
assert_eq!(cursor.advance_by(100), Some(1150));
assert_eq!(cursor.index(), 115);
}
#[test]
fn test_seek() {
let values: Vec<u32> = (0..500).map(|i| i * 7).collect();
let ef = EliasFano::build(&values);
let mut cursor = ef.cursor();
assert_eq!(cursor.seek(100), Some(700));
assert_eq!(cursor.index(), 100);
assert_eq!(cursor.seek(50), Some(350));
assert_eq!(cursor.index(), 50);
assert_eq!(cursor.seek(499), Some(3493));
assert_eq!(cursor.index(), 499);
assert_eq!(cursor.seek(500), None);
assert!(cursor.is_exhausted());
}
#[test]
fn test_cursor_from() {
let values: Vec<u32> = (0..100).map(|i| i * 3).collect();
let ef = EliasFano::build(&values);
let cursor = ef.cursor_from(50);
assert_eq!(cursor.index(), 50);
assert_eq!(cursor.current(), Some(150));
let cursor_end = ef.cursor_from(100);
assert!(cursor_end.is_exhausted());
}
#[test]
fn test_cursor_from_comprehensive() {
let mut values = Vec::with_capacity(1000);
let mut pos = 0u32;
for i in 0..1000 {
values.push(pos);
pos += if i % 10 == 0 {
50 + (i as u32 % 100)
} else {
10 + (i as u32 % 20)
};
}
let ef = EliasFano::build(&values);
let cursor0 = ef.cursor_from(0);
let cursor = ef.cursor();
assert_eq!(cursor0.index(), cursor.index());
assert_eq!(cursor0.current(), cursor.current());
for &idx in &[0, 1, 10, 50, 100, 255, 256, 257, 500, 512, 513, 999] {
let cursor = ef.cursor_from(idx);
assert_eq!(cursor.index(), idx, "cursor_from({}).index()", idx);
assert_eq!(
cursor.current(),
Some(values[idx]),
"cursor_from({}).current()",
idx
);
}
assert!(ef.cursor_from(1000).is_exhausted());
assert!(ef.cursor_from(1001).is_exhausted());
assert!(ef.cursor_from(usize::MAX).is_exhausted());
}
#[test]
fn test_cursor_from_then_iterate() {
let values: Vec<u32> = (0..500).map(|i| i * 7).collect();
let ef = EliasFano::build(&values);
let mut cursor = ef.cursor_from(250);
let mut collected = Vec::new();
while let Some(v) = cursor.current() {
collected.push(v);
cursor.advance_one();
}
assert_eq!(collected, values[250..].to_vec());
let mut cursor = ef.cursor_from(495);
let mut collected = Vec::new();
while let Some(v) = cursor.current() {
collected.push(v);
cursor.advance_one();
}
assert_eq!(collected, values[495..].to_vec());
}
#[test]
fn test_cursor_from_then_advance_by() {
let values: Vec<u32> = (0..1000).map(|i| i * 5).collect();
let ef = EliasFano::build(&values);
let mut cursor = ef.cursor_from(100);
assert_eq!(cursor.current(), Some(500));
assert_eq!(cursor.advance_by(50), Some(750)); assert_eq!(cursor.index(), 150);
assert_eq!(cursor.advance_by(100), Some(1250)); assert_eq!(cursor.index(), 250);
assert_eq!(cursor.advance_by(740), Some(4950)); assert_eq!(cursor.index(), 990);
assert_eq!(cursor.advance_by(100), None);
assert!(cursor.is_exhausted());
}
#[test]
fn test_cursor_from_at_sample_boundaries() {
let values: Vec<u32> = (0..1000).map(|i| i * 5).collect();
let ef = EliasFano::build(&values);
for boundary in [256i32, 512, 768] {
for offset in [-2, -1, 0, 1, 2] {
let idx = (boundary + offset) as usize;
if idx < values.len() {
let cursor = ef.cursor_from(idx);
assert_eq!(
cursor.current(),
Some(values[idx]),
"cursor_from({}) near boundary {}",
idx,
boundary
);
}
}
}
}
#[test]
fn test_large_values() {
let values = vec![
0,
1_000_000,
100_000_000,
1_000_000_000,
u32::MAX - 1000,
u32::MAX,
];
let ef = EliasFano::build(&values);
for (i, &v) in values.iter().enumerate() {
assert_eq!(ef.get(i), Some(v), "get({}) failed", i);
}
let collected: Vec<u32> = ef.into_iter().collect();
assert_eq!(collected, values);
}
#[test]
fn test_duplicate_values() {
let values = vec![0, 0, 5, 5, 5, 10, 10, 20];
let ef = EliasFano::build(&values);
let collected: Vec<u32> = ef.into_iter().collect();
assert_eq!(collected, values);
}
#[test]
fn test_heap_size() {
let values: Vec<u32> = (0..10000).map(|i| i * 10).collect();
let ef = EliasFano::build(&values);
let vec_size = values.len() * 4; let ef_size = ef.heap_size();
assert!(
ef_size < vec_size,
"EF size {} should be less than Vec size {}",
ef_size,
vec_size
);
let ratio = vec_size as f64 / ef_size as f64;
eprintln!(
"Compression: {} bytes -> {} bytes ({:.2}x)",
vec_size, ef_size, ratio
);
}
#[test]
fn test_iterator() {
let values: Vec<u32> = (0..50).map(|i| i * i).collect();
let ef = EliasFano::build(&values);
let collected: Vec<u32> = ef.into_iter().collect();
assert_eq!(collected, values);
let iter = ef.into_iter();
assert_eq!(iter.size_hint(), (50, Some(50)));
}
#[test]
fn test_many_elements_with_samples() {
let values: Vec<u32> = (0..1000).map(|i| i * 5).collect();
let ef = EliasFano::build(&values);
assert_eq!(ef.get(0), Some(0));
assert_eq!(ef.get(256), Some(1280)); assert_eq!(ef.get(512), Some(2560)); assert_eq!(ef.get(999), Some(4995));
let mut cursor = ef.cursor();
assert_eq!(cursor.seek(300), Some(1500));
assert_eq!(cursor.seek(600), Some(3000));
}
#[test]
#[ignore]
fn benchmark_access_patterns() {
use std::time::Instant;
let n = 100_000;
let mut values = Vec::with_capacity(n);
let mut pos = 0u32;
for i in 0..n {
values.push(pos);
pos += if i % 10 == 0 {
50 + (i as u32 % 100)
} else {
10 + (i as u32 % 20)
};
}
let ef = EliasFano::build(&values);
eprintln!("\n=== EliasFano Benchmark ===");
eprintln!("Elements: {}", n);
eprintln!("Universe: {}", values.last().unwrap());
eprintln!(
"Size: Vec<u32>={} bytes, EliasFano={} bytes ({:.2}x compression)",
n * 4,
ef.heap_size(),
(n * 4) as f64 / ef.heap_size() as f64
);
let iterations = 10;
let start = Instant::now();
for _ in 0..iterations {
let mut cursor = ef.cursor();
let mut sum = 0u64;
while let Some(v) = cursor.current() {
sum += v as u64;
cursor.advance_one();
}
std::hint::black_box(sum);
}
let ef_seq_time = start.elapsed();
let start = Instant::now();
for _ in 0..iterations {
let mut sum = 0u64;
for &v in &values {
sum += v as u64;
}
std::hint::black_box(sum);
}
let vec_seq_time = start.elapsed();
eprintln!(
"\nSequential iteration ({} elements × {} iterations):",
n, iterations
);
eprintln!(
" Vec<u32>: {:?} ({:.1} ns/elem)",
vec_seq_time,
vec_seq_time.as_nanos() as f64 / (n * iterations) as f64
);
eprintln!(
" EliasFano: {:?} ({:.1} ns/elem)",
ef_seq_time,
ef_seq_time.as_nanos() as f64 / (n * iterations) as f64
);
eprintln!(
" Ratio: {:.2}x",
ef_seq_time.as_nanos() as f64 / vec_seq_time.as_nanos() as f64
);
let indices: Vec<usize> = (0..10000).map(|i| (i * 7) % n).collect();
let start = Instant::now();
for _ in 0..iterations {
let mut sum = 0u64;
for &i in &indices {
sum += ef.get(i).unwrap() as u64;
}
std::hint::black_box(sum);
}
let ef_rand_time = start.elapsed();
let start = Instant::now();
for _ in 0..iterations {
let mut sum = 0u64;
for &i in &indices {
sum += values[i] as u64;
}
std::hint::black_box(sum);
}
let vec_rand_time = start.elapsed();
eprintln!(
"\nRandom access ({} accesses × {} iterations):",
indices.len(),
iterations
);
eprintln!(
" Vec<u32>: {:?} ({:.1} ns/access)",
vec_rand_time,
vec_rand_time.as_nanos() as f64 / (indices.len() * iterations) as f64
);
eprintln!(
" EliasFano: {:?} ({:.1} ns/access)",
ef_rand_time,
ef_rand_time.as_nanos() as f64 / (indices.len() * iterations) as f64
);
eprintln!(
" Ratio: {:.2}x",
ef_rand_time.as_nanos() as f64 / vec_rand_time.as_nanos() as f64
);
let start = Instant::now();
for _ in 0..iterations {
let mut cursor = ef.cursor();
let mut sum = 0u64;
let mut skip = 2;
while let Some(v) = cursor.current() {
sum += v as u64;
cursor.advance_by(skip);
skip = (skip % 7) + 2; }
std::hint::black_box(sum);
}
let ef_skip_time = start.elapsed();
let start = Instant::now();
for _ in 0..iterations {
let mut sum = 0u64;
let mut i = 0;
let mut skip = 2;
while i < values.len() {
sum += values[i] as u64;
i += skip;
skip = (skip % 7) + 2;
}
std::hint::black_box(sum);
}
let vec_skip_time = start.elapsed();
eprintln!(
"\nSkip-by-small (advance_by 2-8 × {} iterations):",
iterations
);
eprintln!(" Vec<u32>: {:?}", vec_skip_time);
eprintln!(" EliasFano: {:?}", ef_skip_time);
eprintln!(
" Ratio: {:.2}x",
ef_skip_time.as_nanos() as f64 / vec_skip_time.as_nanos() as f64
);
}
}