const SUPERBLOCK_WORDS: usize = 8;
#[derive(Debug, Clone)]
pub struct EliasFano {
low_bits: Vec<u64>,
high_bits: Vec<u64>,
rank_superblocks: Vec<u64>,
n: usize,
low_width: u32,
universe: u64,
}
impl EliasFano {
pub fn encode(values: &[u64]) -> Self {
let n = values.len();
if n == 0 {
return Self {
low_bits: Vec::new(),
high_bits: Vec::new(),
rank_superblocks: vec![0],
n: 0,
low_width: 0,
universe: 0,
};
}
for i in 1..n {
assert!(
values[i] >= values[i - 1],
"sequence must be non-decreasing: values[{}]={} < values[{}]={}",
i,
values[i],
i - 1,
values[i - 1]
);
}
let universe = values[n - 1];
let low_width = if universe == 0 || n <= 1 {
0
} else {
let ratio = universe / n as u64;
if ratio == 0 { 0 } else { ratio.ilog2() }
};
let low_mask = if low_width == 0 {
0
} else if low_width >= 64 {
u64::MAX
} else {
(1u64 << low_width) - 1
};
let total_low_bits = n as u64 * low_width as u64;
let low_words = div_ceil_u64(total_low_bits, 64) as usize;
let mut low_bits = vec![0u64; low_words];
let max_high = universe >> low_width;
let high_bit_len = n as u64 + max_high + 1;
let high_words = div_ceil_u64(high_bit_len, 64) as usize;
let mut high_bits = vec![0u64; high_words];
for (i, &v) in values.iter().enumerate() {
if low_width > 0 {
let low = v & low_mask;
set_bits(&mut low_bits, i as u64 * low_width as u64, low_width, low);
}
let high = v >> low_width;
let bit_pos = high + i as u64;
let word_idx = (bit_pos / 64) as usize;
let bit_offset = bit_pos % 64;
high_bits[word_idx] |= 1u64 << bit_offset;
}
let num_superblocks = div_ceil(high_words, SUPERBLOCK_WORDS);
let mut rank_superblocks = Vec::with_capacity(num_superblocks + 1);
rank_superblocks.push(0u64);
let mut cumulative = 0u64;
for chunk in high_bits.chunks(SUPERBLOCK_WORDS) {
for &word in chunk {
cumulative += word.count_ones() as u64;
}
rank_superblocks.push(cumulative);
}
Self {
low_bits,
high_bits,
rank_superblocks,
n,
low_width,
universe,
}
}
pub fn access(&self, index: usize) -> u64 {
assert!(
index < self.n,
"index {index} out of bounds (len={})",
self.n
);
let low = if self.low_width > 0 {
get_bits(
&self.low_bits,
index as u64 * self.low_width as u64,
self.low_width,
)
} else {
0
};
let pos = self.select1(index);
let high = pos as u64 - index as u64;
(high << self.low_width) | low
}
pub fn rank(&self, value: u64) -> usize {
if self.n == 0 || value < self.access(0) {
return 0;
}
if value >= self.universe {
return self.n;
}
let high = value >> self.low_width;
let low = if self.low_width > 0 {
value & ((1u64 << self.low_width) - 1)
} else {
0
};
let bucket_start_pos = if high == 0 {
0
} else {
self.select0(high as usize - 1) + 1
};
let bucket_end_pos = if (high as usize) < self.high_bits.len() * 64 {
match self.try_select0(high as usize) {
Some(pos) => pos,
None => self.high_bits.len() * 64,
}
} else {
self.high_bits.len() * 64
};
let base_rank = self.rank1(bucket_start_pos);
let mut count = base_rank;
let mut pos = bucket_start_pos;
while pos < bucket_end_pos {
let word_idx = pos / 64;
if word_idx >= self.high_bits.len() {
break;
}
let bit = pos % 64;
let word = self.high_bits[word_idx] >> bit;
if word & 1 == 1 {
let elem_idx = count;
if elem_idx >= self.n {
break;
}
let elem_low = if self.low_width > 0 {
get_bits(
&self.low_bits,
elem_idx as u64 * self.low_width as u64,
self.low_width,
)
} else {
0
};
if elem_low <= low {
count += 1;
} else {
break;
}
} else {
break;
}
pos += 1;
}
count
}
pub fn select(&self, rank: usize) -> u64 {
self.access(rank)
}
pub fn next_geq(&self, value: u64) -> Option<(usize, u64)> {
if self.n == 0 {
return None;
}
if value == 0 {
return Some((0, self.access(0)));
}
if value > self.universe {
return None;
}
let idx = self.rank(value - 1);
if idx >= self.n {
return None;
}
Some((idx, self.access(idx)))
}
#[inline]
pub fn len(&self) -> usize {
self.n
}
#[inline]
pub fn is_empty(&self) -> bool {
self.n == 0
}
pub fn size_in_bytes(&self) -> usize {
self.low_bits.len() * 8 + self.high_bits.len() * 8 + self.rank_superblocks.len() * 8
}
pub fn optimal_size_in_bytes(&self) -> usize {
if self.n == 0 {
return 0;
}
let bits_per_elem = if self.universe > 0 && self.n > 1 {
let ratio = self.universe as f64 / self.n as f64;
ratio.log2().max(0.0) + 2.0
} else {
2.0
};
((self.n as f64 * bits_per_elem) / 8.0).ceil() as usize
}
fn rank1(&self, pos: usize) -> usize {
if pos == 0 {
return 0;
}
let word_idx = pos / 64;
let bit_idx = pos % 64;
let sb_idx = word_idx / SUPERBLOCK_WORDS;
let mut count = self.rank_superblocks[sb_idx] as usize;
let sb_start = sb_idx * SUPERBLOCK_WORDS;
for i in sb_start..word_idx.min(self.high_bits.len()) {
count += self.high_bits[i].count_ones() as usize;
}
if bit_idx > 0 && word_idx < self.high_bits.len() {
let mask = (1u64 << bit_idx) - 1;
count += (self.high_bits[word_idx] & mask).count_ones() as usize;
}
count
}
fn select1(&self, k: usize) -> usize {
assert!(k < self.n, "select1({k}) out of bounds (n={})", self.n);
let target = k as u64;
let mut lo = 0usize;
let mut hi = self.rank_superblocks.len() - 1;
while lo < hi {
let mid = lo + (hi - lo) / 2;
if self.rank_superblocks[mid + 1] <= target {
lo = mid + 1;
} else {
hi = mid;
}
}
let sb = lo;
let mut remaining = k - self.rank_superblocks[sb] as usize;
let word_start = sb * SUPERBLOCK_WORDS;
for w in word_start..self.high_bits.len() {
let ones = self.high_bits[w].count_ones() as usize;
if remaining < ones {
return w * 64 + select_in_word(self.high_bits[w], remaining);
}
remaining -= ones;
}
unreachable!("select1({k}): not enough 1-bits in high_bits")
}
fn select0(&self, k: usize) -> usize {
self.try_select0(k)
.unwrap_or_else(|| panic!("select0({k}): not enough 0-bits in high_bits"))
}
fn try_select0(&self, k: usize) -> Option<usize> {
let mut lo = 0usize;
let mut hi = self.rank_superblocks.len() - 1;
while lo < hi {
let mid = lo + (hi - lo) / 2;
let total_bits = (mid + 1) * SUPERBLOCK_WORDS * 64;
let ones = self.rank_superblocks[mid + 1] as usize;
let zeros = total_bits - ones;
if zeros <= k {
lo = mid + 1;
} else {
hi = mid;
}
}
let sb = lo;
let sb_total_bits = sb * SUPERBLOCK_WORDS * 64;
let sb_ones = self.rank_superblocks[sb] as usize;
let mut remaining = k - (sb_total_bits - sb_ones);
let word_start = sb * SUPERBLOCK_WORDS;
for w in word_start..self.high_bits.len() {
let zeros = self.high_bits[w].count_zeros() as usize;
if remaining < zeros {
return Some(w * 64 + select0_in_word(self.high_bits[w], remaining));
}
remaining -= zeros;
}
None
}
}
fn get_bits(words: &[u64], bit_pos: u64, width: u32) -> u64 {
if width == 0 {
return 0;
}
let word_idx = (bit_pos / 64) as usize;
let bit_offset = (bit_pos % 64) as u32;
let mask = if width >= 64 {
u64::MAX
} else {
(1u64 << width) - 1
};
if bit_offset + width <= 64 {
(words[word_idx] >> bit_offset) & mask
} else {
let lo = words[word_idx] >> bit_offset;
let hi = words[word_idx + 1] << (64 - bit_offset);
(lo | hi) & mask
}
}
fn set_bits(words: &mut [u64], bit_pos: u64, width: u32, value: u64) {
if width == 0 {
return;
}
let word_idx = (bit_pos / 64) as usize;
let bit_offset = (bit_pos % 64) as u32;
let mask = if width >= 64 {
u64::MAX
} else {
(1u64 << width) - 1
};
let value = value & mask;
words[word_idx] &= !(mask << bit_offset);
words[word_idx] |= value << bit_offset;
if bit_offset + width > 64 {
let overflow = bit_offset + width - 64;
let overflow_mask = if overflow >= 64 {
u64::MAX
} else {
(1u64 << overflow) - 1
};
words[word_idx + 1] &= !overflow_mask;
words[word_idx + 1] |= value >> (64 - bit_offset);
}
}
fn select_in_word(word: u64, k: usize) -> usize {
let mut remaining = k;
let mut w = word;
for bit in 0..64 {
if w & 1 == 1 {
if remaining == 0 {
return bit;
}
remaining -= 1;
}
w >>= 1;
if w == 0 {
break;
}
}
unreachable!("select_in_word: not enough 1-bits")
}
fn select0_in_word(word: u64, k: usize) -> usize {
let mut remaining = k;
let inverted = !word;
let mut w = inverted;
for bit in 0..64 {
if w & 1 == 1 {
if remaining == 0 {
return bit;
}
remaining -= 1;
}
w >>= 1;
}
unreachable!("select0_in_word: not enough 0-bits")
}
fn div_ceil(a: usize, b: usize) -> usize {
a.div_ceil(b)
}
fn div_ceil_u64(a: u64, b: u64) -> u64 {
a.div_ceil(b)
}
#[cfg(test)]
mod tests {
use super::*;
use proptest::prelude::*;
#[test]
fn empty_sequence() {
let ef = EliasFano::encode(&[]);
assert_eq!(ef.len(), 0);
assert!(ef.is_empty());
assert_eq!(ef.rank(0), 0);
assert_eq!(ef.rank(100), 0);
assert_eq!(ef.next_geq(0), None);
}
#[test]
fn single_element() {
let ef = EliasFano::encode(&[42]);
assert_eq!(ef.len(), 1);
assert_eq!(ef.access(0), 42);
assert_eq!(ef.select(0), 42);
assert_eq!(ef.rank(41), 0);
assert_eq!(ef.rank(42), 1);
assert_eq!(ef.rank(100), 1);
assert_eq!(ef.next_geq(0), Some((0, 42)));
assert_eq!(ef.next_geq(42), Some((0, 42)));
assert_eq!(ef.next_geq(43), None);
}
#[test]
fn all_zeros() {
let ef = EliasFano::encode(&[0, 0, 0, 0]);
assert_eq!(ef.len(), 4);
for i in 0..4 {
assert_eq!(ef.access(i), 0);
}
assert_eq!(ef.rank(0), 4);
assert_eq!(ef.next_geq(0), Some((0, 0)));
assert_eq!(ef.next_geq(1), None);
}
#[test]
fn consecutive_values() {
let values: Vec<u64> = (0..100).collect();
let ef = EliasFano::encode(&values);
assert_eq!(ef.len(), 100);
for (i, &v) in values.iter().enumerate() {
assert_eq!(ef.access(i), v, "access({i})");
}
}
#[test]
fn height_prefix_sums() {
let sums = [20u64, 45, 75, 100, 130, 180];
let ef = EliasFano::encode(&sums);
for (i, &v) in sums.iter().enumerate() {
assert_eq!(ef.access(i), v, "access({i})");
}
assert_eq!(ef.rank(50), 2);
assert_eq!(ef.rank(75), 3);
assert_eq!(ef.next_geq(50), Some((2, 75)));
}
#[test]
fn large_values() {
let values = [1_000_000u64, 2_000_000, 3_000_000, 4_000_000];
let ef = EliasFano::encode(&values);
for (i, &v) in values.iter().enumerate() {
assert_eq!(ef.access(i), v);
}
}
#[test]
fn duplicate_values() {
let values = [5u64, 5, 10, 10, 10, 20];
let ef = EliasFano::encode(&values);
for (i, &v) in values.iter().enumerate() {
assert_eq!(ef.access(i), v, "access({i})");
}
assert_eq!(ef.rank(5), 2);
assert_eq!(ef.rank(10), 5);
assert_eq!(ef.rank(9), 2);
}
#[test]
fn rank_boundary_cases() {
let values = [10u64, 20, 30, 40, 50];
let ef = EliasFano::encode(&values);
assert_eq!(ef.rank(0), 0);
assert_eq!(ef.rank(9), 0);
assert_eq!(ef.rank(10), 1);
assert_eq!(ef.rank(15), 1);
assert_eq!(ef.rank(50), 5);
assert_eq!(ef.rank(100), 5);
}
#[test]
fn next_geq_exhaustive() {
let values = [10u64, 20, 30, 40, 50];
let ef = EliasFano::encode(&values);
assert_eq!(ef.next_geq(0), Some((0, 10)));
assert_eq!(ef.next_geq(10), Some((0, 10)));
assert_eq!(ef.next_geq(11), Some((1, 20)));
assert_eq!(ef.next_geq(50), Some((4, 50)));
assert_eq!(ef.next_geq(51), None);
}
#[test]
fn select_matches_access() {
let values = [3u64, 7, 15, 31, 63, 127, 255];
let ef = EliasFano::encode(&values);
for i in 0..values.len() {
assert_eq!(ef.select(i), ef.access(i));
}
}
#[test]
fn space_efficiency() {
let values: Vec<u64> = (0..10_000).map(|i| i * 100).collect();
let ef = EliasFano::encode(&values);
let dense_size = values.len() * 8; let ef_size = ef.size_in_bytes();
assert!(
ef_size < dense_size,
"Elias-Fano ({ef_size} bytes) should be smaller than dense ({dense_size} bytes)"
);
}
#[test]
fn size_in_bytes_non_zero_for_non_empty() {
let ef = EliasFano::encode(&[1, 2, 3]);
assert!(ef.size_in_bytes() > 0);
}
#[test]
#[should_panic(expected = "non-decreasing")]
fn rejects_non_monotone() {
EliasFano::encode(&[10, 5, 20]);
}
#[test]
#[should_panic(expected = "out of bounds")]
fn access_out_of_bounds() {
let ef = EliasFano::encode(&[1, 2, 3]);
ef.access(3);
}
#[test]
fn get_set_bits_within_word() {
let mut words = vec![0u64; 2];
set_bits(&mut words, 0, 8, 0xAB);
assert_eq!(get_bits(&words, 0, 8), 0xAB);
set_bits(&mut words, 16, 12, 0xFFF);
assert_eq!(get_bits(&words, 16, 12), 0xFFF);
}
#[test]
fn get_set_bits_crossing_boundary() {
let mut words = vec![0u64; 2];
set_bits(&mut words, 60, 8, 0xFF);
assert_eq!(get_bits(&words, 60, 8), 0xFF);
}
#[test]
fn select_in_word_various() {
assert_eq!(select_in_word(0b1010_1010, 0), 1);
assert_eq!(select_in_word(0b1010_1010, 1), 3);
assert_eq!(select_in_word(0b1010_1010, 2), 5);
assert_eq!(select_in_word(0b1010_1010, 3), 7);
assert_eq!(select_in_word(1, 0), 0);
assert_eq!(select_in_word(u64::MAX, 63), 63);
}
fn sorted_values_strategy(max_len: usize, max_val: u64) -> impl Strategy<Value = Vec<u64>> {
prop::collection::vec(0u64..=max_val, 0..=max_len).prop_map(|mut v| {
v.sort();
v
})
}
proptest! {
#[test]
fn access_matches_original(values in sorted_values_strategy(200, 10_000)) {
let ef = EliasFano::encode(&values);
prop_assert_eq!(ef.len(), values.len());
for (i, &v) in values.iter().enumerate() {
prop_assert_eq!(ef.access(i), v, "access({}) mismatch", i);
}
}
#[test]
fn rank_matches_naive(values in sorted_values_strategy(100, 1_000)) {
let ef = EliasFano::encode(&values);
let mut test_points: Vec<u64> = values.clone();
for &v in &values {
if v > 0 { test_points.push(v - 1); }
test_points.push(v + 1);
}
test_points.push(0);
test_points.sort();
test_points.dedup();
for &q in &test_points {
let naive_rank = values.iter().filter(|&&v| v <= q).count();
prop_assert_eq!(
ef.rank(q), naive_rank,
"rank({}) mismatch: ef={}, naive={}",
q, ef.rank(q), naive_rank
);
}
}
#[test]
fn next_geq_matches_naive(values in sorted_values_strategy(100, 1_000)) {
let ef = EliasFano::encode(&values);
let mut test_points: Vec<u64> = values.clone();
test_points.push(0);
if let Some(&max) = values.last() {
test_points.push(max + 1);
}
test_points.sort();
test_points.dedup();
for &q in &test_points {
let naive = values.iter().enumerate()
.find(|&(_, v)| *v >= q)
.map(|(i, &v)| (i, v));
let ef_result = ef.next_geq(q);
prop_assert_eq!(
ef_result, naive,
"next_geq({}) mismatch: ef={:?}, naive={:?}",
q, ef_result, naive
);
}
}
#[test]
fn select_equals_access(values in sorted_values_strategy(100, 1_000)) {
let ef = EliasFano::encode(&values);
for i in 0..values.len() {
prop_assert_eq!(ef.select(i), ef.access(i));
}
}
#[test]
fn space_within_10x_of_optimal(values in sorted_values_strategy(500, 100_000)) {
if values.len() < 2 { return Ok(()); }
let ef = EliasFano::encode(&values);
let actual = ef.size_in_bytes();
let optimal = ef.optimal_size_in_bytes().max(1);
prop_assert!(
actual <= optimal * 10,
"space: actual={actual}, optimal={optimal}, ratio={}",
actual as f64 / optimal as f64
);
}
}
fn make_prefix_sums(n: usize, avg_height: u64) -> Vec<u64> {
let mut sums = Vec::with_capacity(n);
let mut acc = 0u64;
for i in 0..n {
acc += avg_height + (i as u64 % 5); sums.push(acc);
}
sums
}
#[test]
fn memory_comparison_1k() {
let sums = make_prefix_sums(1_000, 20);
let ef = EliasFano::encode(&sums);
let dense = sums.len() * 8;
let ef_size = ef.size_in_bytes();
assert!(ef_size < dense, "1K: EF={ef_size}B < dense={dense}B");
}
#[test]
fn memory_comparison_10k() {
let sums = make_prefix_sums(10_000, 20);
let ef = EliasFano::encode(&sums);
let dense = sums.len() * 8;
let ef_size = ef.size_in_bytes();
assert!(ef_size < dense, "10K: EF={ef_size}B < dense={dense}B");
}
#[test]
fn memory_comparison_100k() {
let sums = make_prefix_sums(100_000, 20);
let ef = EliasFano::encode(&sums);
let dense = sums.len() * 8;
let ef_size = ef.size_in_bytes();
assert!(ef_size < dense, "100K: EF={ef_size}B < dense={dense}B");
}
#[test]
fn memory_comparison_1m() {
let sums = make_prefix_sums(1_000_000, 20);
let ef = EliasFano::encode(&sums);
let dense = sums.len() * 8;
let ef_size = ef.size_in_bytes();
assert!(ef_size < dense, "1M: EF={ef_size}B < dense={dense}B");
}
#[test]
fn query_correctness_at_scale() {
let sums = make_prefix_sums(100_000, 20);
let ef = EliasFano::encode(&sums);
assert_eq!(ef.access(0), sums[0]);
assert_eq!(ef.access(50_000), sums[50_000]);
assert_eq!(ef.access(99_999), sums[99_999]);
let mid_val = sums[50_000];
let naive_rank = sums.iter().filter(|&&v| v <= mid_val).count();
assert_eq!(ef.rank(mid_val), naive_rank);
let target = sums[75_000] - 1;
let result = ef.next_geq(target);
assert!(result.is_some());
let (idx, val) = result.unwrap();
assert!(val >= target);
assert_eq!(val, sums[idx]);
}
}