const RANK_SAMPLE_RATE: usize = 8;
const SELECT1_SAMPLE_RATE: usize = 256;
const SELECT0_SAMPLE_RATE: usize = 64;
use crate::algorithms::bit_ops::select_in_word;
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct EliasFano {
low_bits: Vec<u64>,
high_bits: Vec<u64>,
len: usize,
low_bit_width: u32,
universe: u64,
high_len_bits: usize,
rank_samples: Vec<u32>,
select1_samples: Vec<u32>,
select0_samples: Vec<u32>,
}
impl EliasFano {
pub fn from_sorted(values: &[u32]) -> Self {
if values.is_empty() {
return Self::from_sorted_u64(&[]);
}
let universe = values[values.len() - 1] as u64 + 1;
Self::from_sorted_impl(values.len(), universe, |i| values[i] as u64)
}
pub fn from_sorted_u64(values: &[u64]) -> Self {
if values.is_empty() {
return Self {
low_bits: Vec::new(),
high_bits: Vec::new(),
len: 0,
low_bit_width: 0,
universe: 0,
high_len_bits: 0,
rank_samples: Vec::new(),
select1_samples: Vec::new(),
select0_samples: Vec::new(),
};
}
let universe = values[values.len() - 1] + 1;
Self::from_sorted_impl(values.len(), universe, |i| values[i])
}
fn from_sorted_impl(n: usize, universe: u64, get_val: impl Fn(usize) -> u64) -> Self {
let low_bit_width = if n >= universe as usize {
0
} else {
(64 - (universe / n as u64).leading_zeros()).saturating_sub(1)
};
let low_mask = if low_bit_width == 0 { 0u64 } else { (1u64 << low_bit_width) - 1 };
let total_low_bits = n as u64 * low_bit_width as u64;
let low_words = ((total_low_bits + 63) / 64) as usize;
let mut low_bits = vec![0u64; low_words + 1];
for i in 0..n {
if low_bit_width > 0 {
let low_val = get_val(i) & low_mask;
let bit_pos = i as u64 * low_bit_width as u64;
let word_idx = (bit_pos / 64) as usize;
let bit_idx = (bit_pos % 64) as u32;
low_bits[word_idx] |= low_val << bit_idx;
if bit_idx + low_bit_width > 64 && word_idx + 1 < low_words {
low_bits[word_idx + 1] |= low_val >> (64 - bit_idx);
}
}
}
let last_val = get_val(n - 1);
let max_high = last_val >> low_bit_width;
let high_len_bits = n + max_high as usize + 1;
let high_words = (high_len_bits + 63) / 64;
let mut high_bits = vec![0u64; high_words];
let mut pos = 0usize;
let mut prev_high = 0u64;
for i in 0..n {
let high = get_val(i) >> low_bit_width;
pos += (high - prev_high) as usize;
let word_idx = pos / 64;
let bit_idx = pos % 64;
if word_idx < high_words {
high_bits[word_idx] |= 1u64 << bit_idx;
}
pos += 1;
prev_high = high;
}
let (rank_samples, select1_samples, select0_samples) =
Self::build_samples(&high_bits, high_len_bits);
Self {
low_bits,
high_bits,
len: n,
low_bit_width,
universe,
high_len_bits,
rank_samples,
select1_samples,
select0_samples,
}
}
fn build_samples(high_bits: &[u64], high_len_bits: usize) -> (Vec<u32>, Vec<u32>, Vec<u32>) {
let mut rank_samples = Vec::new();
let mut select1_samples = Vec::new();
let mut select0_samples = Vec::new();
let mut cumul_ones: u32 = 0;
let mut cumul_zeros: u32 = 0;
let mut next_sel1: u32 = 0;
let mut next_sel0: u32 = 0;
for (word_idx, &word) in high_bits.iter().enumerate() {
if word_idx % RANK_SAMPLE_RATE == 0 {
rank_samples.push(cumul_ones);
}
let ones = word.count_ones();
let valid = if (word_idx + 1) * 64 <= high_len_bits {
64u32
} else {
(high_len_bits - word_idx * 64) as u32
};
let zeros = valid - ones;
let after_ones = cumul_ones + ones;
while next_sel1 < after_ones {
select1_samples.push((word_idx * 64) as u32);
next_sel1 += SELECT1_SAMPLE_RATE as u32;
}
let after_zeros = cumul_zeros + zeros;
while next_sel0 < after_zeros {
select0_samples.push((word_idx * 64) as u32);
next_sel0 += SELECT0_SAMPLE_RATE as u32;
}
cumul_ones = after_ones;
cumul_zeros = after_zeros;
}
rank_samples.push(cumul_ones);
(rank_samples, select1_samples, select0_samples)
}
#[inline]
pub fn len(&self) -> usize { self.len }
#[inline]
pub fn is_empty(&self) -> bool { self.len == 0 }
#[inline]
pub fn size_bytes(&self) -> usize {
self.low_bits.len() * 8
+ self.high_bits.len() * 8
+ self.rank_samples.len() * 4
+ self.select1_samples.len() * 4
+ self.select0_samples.len() * 4
+ 32 }
#[inline]
pub fn bits_per_element(&self) -> f64 {
if self.len == 0 { return 0.0; }
(self.size_bytes() * 8) as f64 / self.len as f64
}
pub fn get(&self, index: usize) -> Option<u64> {
if index >= self.len { return None; }
let low = self.get_low(index);
let high_pos = self.select1(index)?;
let high_val = high_pos.checked_sub(index)? as u64;
Some((high_val << self.low_bit_width) | low)
}
#[inline]
pub fn next_geq(&self, target: u64) -> Option<(usize, u64)> {
if self.len == 0 || target >= self.universe { return None; }
let target_high = target >> self.low_bit_width;
let bucket_start_pos = if target_high == 0 {
0
} else {
match self.select0(target_high as usize - 1) {
Some(p) => p + 1,
None => return None,
}
};
let elem_before = self.rank1(bucket_start_pos);
let mut idx = elem_before;
let mut word_idx = bucket_start_pos / 64;
if word_idx >= self.high_bits.len() { return None; }
let start_bit = bucket_start_pos % 64;
let mut word = self.high_bits[word_idx] >> start_bit;
let mut bit_pos = bucket_start_pos;
while idx < self.len {
if word == 0 {
word_idx += 1;
if word_idx >= self.high_bits.len() { return None; }
word = self.high_bits[word_idx];
bit_pos = word_idx * 64;
continue;
}
let tz = word.trailing_zeros() as usize;
bit_pos += tz;
word >>= tz;
let high_val = (bit_pos - idx) as u64;
let low = self.get_low(idx);
let val = (high_val << self.low_bit_width) | low;
if val >= target {
return Some((idx, val));
}
idx += 1;
bit_pos += 1;
word >>= 1;
}
None
}
pub fn iter(&self) -> EliasFanoIter<'_> {
EliasFanoIter { ef: self, pos: 0, high_pos: 0 }
}
#[inline]
fn get_low(&self, index: usize) -> u64 {
if self.low_bit_width == 0 { return 0; }
let bit_pos = index as u64 * self.low_bit_width as u64;
let word_idx = (bit_pos / 64) as usize;
let bit_idx = (bit_pos % 64) as u32;
let w0 = self.low_bits[word_idx];
let w1 = self.low_bits[word_idx + 1];
let combined = w0 as u128 | ((w1 as u128) << 64);
(combined >> bit_idx) as u64 & ((1u64 << self.low_bit_width) - 1)
}
#[inline]
fn select1(&self, rank: usize) -> Option<usize> {
if rank >= self.len { return None; }
let sample_idx = rank / SELECT1_SAMPLE_RATE;
let start_word = if sample_idx < self.select1_samples.len() {
self.select1_samples[sample_idx] as usize / 64
} else {
0
};
let rank_block = start_word / RANK_SAMPLE_RATE;
let mut cumul = self.rank_samples[rank_block] as usize;
for w in (rank_block * RANK_SAMPLE_RATE)..start_word {
cumul += self.high_bits[w].count_ones() as usize;
}
let mut remaining = rank - cumul;
for word_idx in start_word..self.high_bits.len() {
let word = self.high_bits[word_idx];
let ones = word.count_ones() as usize;
if remaining < ones {
return Some(word_idx * 64 + select_in_word(word, remaining));
}
remaining -= ones;
}
None
}
#[inline]
fn select0(&self, rank: usize) -> Option<usize> {
let sample_idx = rank / SELECT0_SAMPLE_RATE;
let start_word = if sample_idx < self.select0_samples.len() {
self.select0_samples[sample_idx] as usize / 64
} else {
0
};
let rank_block = start_word / RANK_SAMPLE_RATE;
let cumul_ones = {
let mut c = self.rank_samples[rank_block] as usize;
for w in (rank_block * RANK_SAMPLE_RATE)..start_word {
c += self.high_bits[w].count_ones() as usize;
}
c
};
let total_bits_before = start_word * 64;
let cumul_zeros = total_bits_before - cumul_ones;
let mut remaining = rank - cumul_zeros;
for word_idx in start_word..self.high_bits.len() {
let word = self.high_bits[word_idx];
let valid_bits = if (word_idx + 1) * 64 <= self.high_len_bits {
64
} else {
self.high_len_bits - word_idx * 64
};
let actual_zeros = valid_bits - word.count_ones() as usize;
if remaining < actual_zeros {
let inverted = !word;
return Some(word_idx * 64 + select_in_word(inverted, remaining));
}
remaining -= actual_zeros;
}
None
}
#[inline]
fn rank1(&self, pos: usize) -> usize {
let full_words = pos / 64;
let remaining_bits = pos % 64;
let sample_idx = full_words / RANK_SAMPLE_RATE;
let mut count = self.rank_samples[sample_idx] as usize;
let scan_start = sample_idx * RANK_SAMPLE_RATE;
for i in scan_start..full_words {
count += self.high_bits[i].count_ones() as usize;
}
if remaining_bits > 0 && full_words < self.high_bits.len() {
let mask = (1u64 << remaining_bits) - 1;
count += (self.high_bits[full_words] & mask).count_ones() as usize;
}
count
}
}
pub struct EliasFanoIter<'a> {
ef: &'a EliasFano,
pos: usize,
high_pos: usize,
}
impl<'a> Iterator for EliasFanoIter<'a> {
type Item = u64;
fn next(&mut self) -> Option<Self::Item> {
if self.pos >= self.ef.len { return None; }
let next_pos = self.high_pos;
let mut word_idx = next_pos / 64;
if word_idx >= self.ef.high_bits.len() { return None; }
let bit_offset = next_pos % 64;
let mut word = self.ef.high_bits[word_idx] >> bit_offset;
if word != 0 {
let tz = word.trailing_zeros() as usize;
self.high_pos = word_idx * 64 + bit_offset + tz;
} else {
loop {
word_idx += 1;
if word_idx >= self.ef.high_bits.len() { return None; }
word = self.ef.high_bits[word_idx];
if word != 0 {
self.high_pos = word_idx * 64 + word.trailing_zeros() as usize;
break;
}
}
}
let high_val = (self.high_pos - self.pos) as u64;
let low = self.ef.get_low(self.pos);
let val = (high_val << self.ef.low_bit_width) | low;
self.pos += 1;
self.high_pos += 1;
Some(val)
}
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.ef.len - self.pos;
(remaining, Some(remaining))
}
}
impl<'a> ExactSizeIterator for EliasFanoIter<'a> {}
pub struct EliasFanoCursor<'a> {
ef: &'a EliasFano,
index: usize,
high_pos: usize,
}
impl<'a> EliasFanoCursor<'a> {
fn new(ef: &'a EliasFano) -> Self {
if ef.is_empty() {
return Self { ef, index: 0, high_pos: 0 };
}
let mut high_pos = 0;
for (word_idx, &word) in ef.high_bits.iter().enumerate() {
if word != 0 {
high_pos = word_idx * 64 + word.trailing_zeros() as usize;
break;
}
}
Self { ef, index: 0, high_pos }
}
#[inline]
pub fn current(&self) -> Option<u64> {
if self.index >= self.ef.len { return None; }
let high_val = (self.high_pos - self.index) as u64;
let low = self.ef.get_low(self.index);
Some((high_val << self.ef.low_bit_width) | low)
}
#[inline]
pub fn index(&self) -> usize { self.index }
#[inline]
pub fn is_exhausted(&self) -> bool { self.index >= self.ef.len }
#[inline]
pub fn advance(&mut self) -> bool {
self.index += 1;
if self.index >= self.ef.len { return false; }
let next_pos = self.high_pos + 1;
let mut word_idx = next_pos / 64;
if word_idx >= self.ef.high_bits.len() { return false; }
let bit_in_word = next_pos % 64;
let mut word = self.ef.high_bits[word_idx] >> bit_in_word;
if word != 0 {
self.high_pos = word_idx * 64 + bit_in_word + word.trailing_zeros() as usize;
return true;
}
loop {
word_idx += 1;
if word_idx >= self.ef.high_bits.len() { return false; }
word = self.ef.high_bits[word_idx];
if word != 0 {
self.high_pos = word_idx * 64 + word.trailing_zeros() as usize;
return true;
}
}
}
#[inline]
pub fn advance_to_geq(&mut self, target: u64) -> bool {
if let Some(val) = self.current() {
if val >= target { return true; }
} else {
return false;
}
if target >= self.ef.universe { self.index = self.ef.len; return false; }
let target_high = target >> self.ef.low_bit_width;
let bucket_start_pos = if target_high == 0 {
0
} else {
match self.ef.select0(target_high as usize - 1) {
Some(p) => p + 1,
None => { self.index = self.ef.len; return false; }
}
};
let elem_before = self.ef.rank1(bucket_start_pos);
let mut idx = elem_before;
let mut word_idx = bucket_start_pos / 64;
if word_idx >= self.ef.high_bits.len() { self.index = self.ef.len; return false; }
let start_bit = bucket_start_pos % 64;
let mut word = self.ef.high_bits[word_idx] >> start_bit;
let mut bit_pos = bucket_start_pos;
while idx < self.ef.len {
if word == 0 {
word_idx += 1;
if word_idx >= self.ef.high_bits.len() { break; }
word = self.ef.high_bits[word_idx];
bit_pos = word_idx * 64;
continue;
}
let tz = word.trailing_zeros() as usize;
bit_pos += tz;
word >>= tz;
let high_val = (bit_pos - idx) as u64;
let low = self.ef.get_low(idx);
let val = (high_val << self.ef.low_bit_width) | low;
if val >= target {
self.index = idx;
self.high_pos = bit_pos;
return true;
}
idx += 1;
bit_pos += 1;
word >>= 1;
}
self.index = self.ef.len;
false
}
#[inline]
pub fn advance_to_index(&mut self, idx: usize) -> bool {
if idx >= self.ef.len { return false; }
if idx == self.index { return true; }
if let Some(pos) = self.ef.select1(idx) {
self.index = idx;
self.high_pos = pos;
true
} else {
false
}
}
pub fn reset(&mut self) {
*self = Self::new(self.ef);
}
}
impl EliasFano {
#[inline]
pub fn cursor(&self) -> EliasFanoCursor<'_> {
EliasFanoCursor::new(self)
}
}
const PEF_CHUNK_SIZE: usize = 128;
#[derive(Debug, Clone, Copy)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
struct PefChunkMeta {
min_value: u64,
low_offset: u32,
high_offset: u32,
count: u16,
high_len_bits: u16,
high_words: u16,
low_words: u16,
low_bit_width: u8,
}
struct ChunkView<'a> {
low_bits: &'a [u64],
high_bits: &'a [u64],
low_bit_width: u32,
count: usize,
min_value: u64,
high_len_bits: usize,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct PartitionedEliasFano {
all_low_bits: Vec<u64>,
all_high_bits: Vec<u64>,
meta: Vec<PefChunkMeta>,
chunk_upper_bounds: Vec<u64>,
len: usize,
universe: u64,
}
impl PartitionedEliasFano {
pub fn from_sorted(values: &[u32]) -> Self {
if values.is_empty() {
return Self {
all_low_bits: Vec::new(), all_high_bits: Vec::new(),
meta: Vec::new(), chunk_upper_bounds: Vec::new(), len: 0, universe: 0,
};
}
let universe = values[values.len() - 1] as u64 + 1;
Self::from_sorted_impl(values.len(), universe, |i| values[i] as u64)
}
pub fn from_sorted_u64(values: &[u64]) -> Self {
if values.is_empty() {
return Self {
all_low_bits: Vec::new(), all_high_bits: Vec::new(),
meta: Vec::new(), chunk_upper_bounds: Vec::new(), len: 0, universe: 0,
};
}
let universe = values[values.len() - 1] + 1;
Self::from_sorted_impl(values.len(), universe, |i| values[i])
}
fn from_sorted_impl(n: usize, universe: u64, get_val: impl Fn(usize) -> u64) -> Self {
let num_chunks = (n + PEF_CHUNK_SIZE - 1) / PEF_CHUNK_SIZE;
let mut all_low_bits = Vec::new();
let mut all_high_bits = Vec::new();
let mut meta = Vec::with_capacity(num_chunks);
let mut chunk_upper_bounds = Vec::with_capacity(num_chunks);
for chunk_idx in 0..num_chunks {
let start = chunk_idx * PEF_CHUNK_SIZE;
let end = (start + PEF_CHUNK_SIZE).min(n);
let count = end - start;
let min_val = get_val(start);
let max_val = get_val(end - 1);
let local_universe = max_val - min_val + 1;
let low_bit_width = if count as u64 >= local_universe {
0
} else {
(64 - (local_universe / count as u64).leading_zeros()).saturating_sub(1)
};
let low_mask = if low_bit_width == 0 { 0u64 } else { (1u64 << low_bit_width) - 1 };
let total_low_bits = count as u64 * low_bit_width as u64;
let low_words = ((total_low_bits + 63) / 64) as usize;
let low_offset = all_low_bits.len();
all_low_bits.resize(low_offset + low_words, 0);
for i in 0..count {
if low_bit_width > 0 {
let delta = get_val(start + i) - min_val;
let low_val = delta & low_mask;
let bit_pos = i as u64 * low_bit_width as u64;
let word_idx = (bit_pos / 64) as usize;
let bit_idx = (bit_pos % 64) as u32;
all_low_bits[low_offset + word_idx] |= low_val << bit_idx;
if bit_idx + low_bit_width > 64 && word_idx + 1 < low_words {
all_low_bits[low_offset + word_idx + 1] |= low_val >> (64 - bit_idx);
}
}
}
let last_delta = max_val - min_val;
let max_high = last_delta >> low_bit_width;
let high_len_bits = count + max_high as usize + 1;
let high_words = (high_len_bits + 63) / 64;
let high_offset = all_high_bits.len();
all_high_bits.resize(high_offset + high_words, 0);
let mut pos = 0usize;
let mut prev_high = 0u64;
for i in 0..count {
let delta = get_val(start + i) - min_val;
let high = delta >> low_bit_width;
pos += (high - prev_high) as usize;
let word_idx = pos / 64;
let bit_idx = pos % 64;
if word_idx < high_words {
all_high_bits[high_offset + word_idx] |= 1u64 << bit_idx;
}
pos += 1;
prev_high = high;
}
meta.push(PefChunkMeta {
min_value: min_val,
low_offset: low_offset as u32,
high_offset: high_offset as u32,
count: count as u16,
high_len_bits: high_len_bits as u16,
high_words: high_words as u16,
low_words: low_words as u16,
low_bit_width: low_bit_width as u8,
});
chunk_upper_bounds.push(max_val);
}
all_low_bits.push(0);
Self { all_low_bits, all_high_bits, meta, chunk_upper_bounds, len: n, universe }
}
#[inline]
pub fn len(&self) -> usize { self.len }
#[inline]
pub fn is_empty(&self) -> bool { self.len == 0 }
pub fn size_bytes(&self) -> usize {
self.all_low_bits.len() * 8
+ self.all_high_bits.len() * 8
+ self.meta.len() * std::mem::size_of::<PefChunkMeta>()
+ self.chunk_upper_bounds.len() * 8
+ 48 }
#[inline]
pub fn bits_per_element(&self) -> f64 {
if self.len == 0 { return 0.0; }
(self.size_bytes() * 8) as f64 / self.len as f64
}
pub fn get(&self, index: usize) -> Option<u64> {
if index >= self.len { return None; }
let chunk_idx = index / PEF_CHUNK_SIZE;
let local_idx = index % PEF_CHUNK_SIZE;
let view = self.chunk_view(chunk_idx);
let delta = chunk_get_delta(&view, local_idx);
Some(view.min_value + delta)
}
#[inline]
pub fn next_geq(&self, target: u64) -> Option<(usize, u64)> {
if self.len == 0 || target >= self.universe { return None; }
let chunk_idx = match self.chunk_upper_bounds.binary_search(&target) {
Ok(i) => i, Err(i) => {
if i >= self.meta.len() { return None; }
i
}
};
let view = self.chunk_view(chunk_idx);
let global_offset = chunk_idx * PEF_CHUNK_SIZE;
if target <= view.min_value {
let delta = chunk_get_delta(&view, 0);
return Some((global_offset, view.min_value + delta));
}
let target_delta = target - view.min_value;
let target_high = (target_delta >> view.low_bit_width) as usize;
let (start_idx, start_pos) = chunk_skip_to_high(&view, target_high);
if let Some((local_idx, delta, _)) = chunk_scan_geq(&view, target_delta, start_idx, start_pos) {
return Some((global_offset + local_idx, view.min_value + delta));
}
let next_chunk = chunk_idx + 1;
if next_chunk < self.meta.len() {
let nv = self.chunk_view(next_chunk);
let delta = chunk_get_delta(&nv, 0);
Some((next_chunk * PEF_CHUNK_SIZE, nv.min_value + delta))
} else {
None
}
}
pub fn iter(&self) -> PartitionedEliasFanoIter<'_> {
if self.is_empty() {
return PartitionedEliasFanoIter {
pef: self,
chunk_idx: 0,
local_idx: 0,
local_high_pos: 0,
cached_high_bits: &[],
cached_low_bits: &[],
cached_low_bit_width: 0,
cached_count: 0,
cached_min_value: 0,
};
}
let view = self.chunk_view(0);
PartitionedEliasFanoIter {
pef: self,
chunk_idx: 0,
local_idx: 0,
local_high_pos: 0,
cached_high_bits: view.high_bits,
cached_low_bits: view.low_bits,
cached_low_bit_width: view.low_bit_width,
cached_count: view.count,
cached_min_value: view.min_value,
}
}
#[inline]
pub fn cursor(&self) -> PartitionedEliasFanoCursor<'_> {
PartitionedEliasFanoCursor::new(self)
}
#[inline]
fn chunk_view(&self, idx: usize) -> ChunkView<'_> {
let m = &self.meta[idx];
let low_start = m.low_offset as usize;
let low_end = (low_start + m.low_words as usize + 1).min(self.all_low_bits.len());
let high_start = m.high_offset as usize;
ChunkView {
low_bits: &self.all_low_bits[low_start..low_end],
high_bits: &self.all_high_bits[high_start..high_start + m.high_words as usize],
low_bit_width: m.low_bit_width as u32,
count: m.count as usize,
min_value: m.min_value,
high_len_bits: m.high_len_bits as usize,
}
}
}
#[inline]
fn chunk_get_delta(chunk: &ChunkView<'_>, local_idx: usize) -> u64 {
let high_pos = chunk_select1(chunk, local_idx);
let high_val = (high_pos - local_idx) as u64;
let low = chunk_get_low(chunk, local_idx);
(high_val << chunk.low_bit_width) | low
}
#[inline]
fn chunk_select1(chunk: &ChunkView<'_>, rank: usize) -> usize {
let mut remaining = rank;
for (word_idx, &word) in chunk.high_bits.iter().enumerate() {
let ones = word.count_ones() as usize;
if remaining < ones {
return word_idx * 64 + select_in_word(word, remaining);
}
remaining -= ones;
}
chunk.high_len_bits
}
#[inline]
fn chunk_get_low(chunk: &ChunkView<'_>, index: usize) -> u64 {
if chunk.low_bit_width == 0 { return 0; }
let bit_pos = index as u64 * chunk.low_bit_width as u64;
let word_idx = (bit_pos / 64) as usize;
let bit_idx = (bit_pos % 64) as u32;
let w0 = chunk.low_bits[word_idx];
let w1 = chunk.low_bits[word_idx + 1];
let combined = w0 as u128 | ((w1 as u128) << 64);
(combined >> bit_idx) as u64 & ((1u64 << chunk.low_bit_width) - 1)
}
#[inline]
fn chunk_skip_to_high(chunk: &ChunkView<'_>, target_high: usize) -> (usize, usize) {
if target_high == 0 {
return (0, 0);
}
let mut zeros_remaining = target_high;
let mut ones_count = 0usize;
let last_wi = chunk.high_bits.len().saturating_sub(1);
for (wi, &w) in chunk.high_bits.iter().enumerate() {
let valid_bits = if wi == last_wi {
let rem = chunk.high_len_bits % 64;
if rem == 0 && chunk.high_len_bits > 0 { 64 } else { rem }
} else {
64
};
let valid_mask = if valid_bits >= 64 { u64::MAX } else { (1u64 << valid_bits) - 1 };
let masked = w & valid_mask;
let ones = masked.count_ones() as usize;
let zeros = valid_bits - ones;
if zeros_remaining <= zeros {
let inverted = (!w) & valid_mask;
let zero_pos_in_word = select_in_word(inverted, zeros_remaining - 1);
let abs_pos = wi * 64 + zero_pos_in_word;
let bits_up_to = zero_pos_in_word + 1;
let partial_mask = if bits_up_to >= 64 { u64::MAX } else { (1u64 << bits_up_to) - 1 };
ones_count += (w & partial_mask).count_ones() as usize;
return (ones_count, abs_pos + 1);
}
zeros_remaining -= zeros;
ones_count += ones;
}
(chunk.count, chunk.high_len_bits)
}
#[inline]
fn chunk_first_one(chunk: &ChunkView<'_>) -> usize {
chunk_first_one_cached(chunk.high_bits)
}
#[inline]
fn chunk_first_one_cached(high_bits: &[u64]) -> usize {
for (wi, &w) in high_bits.iter().enumerate() {
if w != 0 {
return wi * 64 + w.trailing_zeros() as usize;
}
}
0
}
#[inline]
fn chunk_scan_geq(
chunk: &ChunkView<'_>,
target_delta: u64,
start_idx: usize,
start_pos: usize,
) -> Option<(usize, u64, usize)> {
let mut idx = start_idx;
let mut bit_pos = start_pos;
let mut word_idx = bit_pos / 64;
if word_idx >= chunk.high_bits.len() {
return None;
}
let start_bit = bit_pos % 64;
let mut word = chunk.high_bits[word_idx] >> start_bit;
bit_pos = word_idx * 64 + start_bit;
while idx < chunk.count {
if word == 0 {
word_idx += 1;
if word_idx >= chunk.high_bits.len() { break; }
word = chunk.high_bits[word_idx];
bit_pos = word_idx * 64;
continue;
}
let tz = word.trailing_zeros() as usize;
bit_pos += tz;
word >>= tz;
let high_val = (bit_pos - idx) as u64;
let low = chunk_get_low(chunk, idx);
let delta = (high_val << chunk.low_bit_width) | low;
if delta >= target_delta {
return Some((idx, delta, bit_pos));
}
idx += 1;
bit_pos += 1;
word >>= 1;
}
None
}
pub struct PartitionedEliasFanoIter<'a> {
pef: &'a PartitionedEliasFano,
chunk_idx: usize,
local_idx: usize,
local_high_pos: usize,
cached_high_bits: &'a [u64],
cached_low_bits: &'a [u64],
cached_low_bit_width: u32,
cached_count: usize,
cached_min_value: u64,
}
impl<'a> PartitionedEliasFanoIter<'a> {
#[inline]
fn refresh_chunk_cache(&mut self) {
let view = self.pef.chunk_view(self.chunk_idx);
self.cached_high_bits = view.high_bits;
self.cached_low_bits = view.low_bits;
self.cached_low_bit_width = view.low_bit_width;
self.cached_count = view.count;
self.cached_min_value = view.min_value;
}
}
impl<'a> Iterator for PartitionedEliasFanoIter<'a> {
type Item = u64;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.chunk_idx >= self.pef.meta.len() { return None; }
if self.local_idx >= self.cached_count {
self.chunk_idx += 1;
self.local_idx = 0;
self.local_high_pos = 0;
if self.chunk_idx >= self.pef.meta.len() { return None; }
self.refresh_chunk_cache();
return self.next(); }
let next_pos = self.local_high_pos;
let mut word_idx = next_pos / 64;
if word_idx >= self.cached_high_bits.len() { return None; }
let bit_offset = next_pos % 64;
let mut word = self.cached_high_bits[word_idx] >> bit_offset;
if word != 0 {
let tz = word.trailing_zeros() as usize;
self.local_high_pos = word_idx * 64 + bit_offset + tz;
} else {
loop {
word_idx += 1;
if word_idx >= self.cached_high_bits.len() { return None; }
word = self.cached_high_bits[word_idx];
if word != 0 {
self.local_high_pos = word_idx * 64 + word.trailing_zeros() as usize;
break;
}
}
}
let high_val = (self.local_high_pos - self.local_idx) as u64;
let low = if self.cached_low_bit_width == 0 {
0
} else {
let lbw = self.cached_low_bit_width as u64;
let bit_pos = self.local_idx as u64 * lbw;
let w_idx = (bit_pos / 64) as usize;
let b_idx = (bit_pos % 64) as u32;
let w0 = self.cached_low_bits[w_idx];
let w1 = self.cached_low_bits[w_idx + 1]; let combined = w0 as u128 | ((w1 as u128) << 64);
(combined >> b_idx) as u64 & ((1u64 << lbw) - 1)
};
let delta = (high_val << self.cached_low_bit_width) | low;
let val = self.cached_min_value + delta;
self.local_idx += 1;
self.local_high_pos += 1;
Some(val)
}
fn size_hint(&self) -> (usize, Option<usize>) {
let consumed = self.chunk_idx * PEF_CHUNK_SIZE + self.local_idx;
let remaining = self.pef.len.saturating_sub(consumed);
(remaining, Some(remaining))
}
}
impl<'a> ExactSizeIterator for PartitionedEliasFanoIter<'a> {}
pub struct PartitionedEliasFanoCursor<'a> {
pef: &'a PartitionedEliasFano,
chunk_idx: usize,
local_idx: usize,
local_high_pos: usize,
global_idx: usize,
cached_value: u64,
cached_high_bits: &'a [u64],
cached_low_bits: &'a [u64],
cached_low_bit_width: u32,
cached_count: usize,
cached_min_value: u64,
}
impl<'a> PartitionedEliasFanoCursor<'a> {
fn new(pef: &'a PartitionedEliasFano) -> Self {
if pef.is_empty() {
return Self {
pef, chunk_idx: 0, local_idx: 0, local_high_pos: 0, global_idx: 0,
cached_value: 0,
cached_high_bits: &[], cached_low_bits: &[],
cached_low_bit_width: 0, cached_count: 0, cached_min_value: 0,
};
}
let view = pef.chunk_view(0);
let mut high_pos = 0;
for (word_idx, &word) in view.high_bits.iter().enumerate() {
if word != 0 {
high_pos = word_idx * 64 + word.trailing_zeros() as usize;
break;
}
}
let high_val = high_pos as u64; let low = chunk_get_low(&view, 0);
let initial_val = view.min_value + (high_val << view.low_bit_width) + low;
Self {
pef, chunk_idx: 0, local_idx: 0, local_high_pos: high_pos, global_idx: 0,
cached_value: initial_val,
cached_high_bits: view.high_bits, cached_low_bits: view.low_bits,
cached_low_bit_width: view.low_bit_width, cached_count: view.count,
cached_min_value: view.min_value,
}
}
#[inline]
fn refresh_chunk_cache(&mut self) {
let view = self.pef.chunk_view(self.chunk_idx);
self.cached_high_bits = view.high_bits;
self.cached_low_bits = view.low_bits;
self.cached_low_bit_width = view.low_bit_width;
self.cached_count = view.count;
self.cached_min_value = view.min_value;
}
#[inline]
fn get_low_cached(&self, local_idx: usize) -> u64 {
if self.cached_low_bit_width == 0 { return 0; }
let lbw = self.cached_low_bit_width as u64;
let bit_pos = local_idx as u64 * lbw;
let w_idx = (bit_pos / 64) as usize;
let b_idx = (bit_pos % 64) as u32;
let w0 = self.cached_low_bits[w_idx];
let w1 = self.cached_low_bits[w_idx + 1]; let combined = w0 as u128 | ((w1 as u128) << 64);
(combined >> b_idx) as u64 & ((1u64 << lbw) - 1)
}
#[inline]
fn recompute_value(&mut self) {
let high_val = (self.local_high_pos - self.local_idx) as u64;
let low = self.get_low_cached(self.local_idx);
self.cached_value = self.cached_min_value + (high_val << self.cached_low_bit_width) + low;
}
#[inline]
pub fn current(&self) -> Option<u64> {
if self.global_idx >= self.pef.len { None } else { Some(self.cached_value) }
}
#[inline]
pub fn index(&self) -> usize { self.global_idx }
#[inline]
pub fn is_exhausted(&self) -> bool { self.global_idx >= self.pef.len }
#[inline]
pub fn advance(&mut self) -> bool {
self.global_idx += 1;
if self.global_idx >= self.pef.len { return false; }
self.local_idx += 1;
if self.local_idx >= self.cached_count {
self.chunk_idx += 1;
self.local_idx = 0;
if self.chunk_idx >= self.pef.meta.len() { return false; }
self.refresh_chunk_cache();
self.local_high_pos = chunk_first_one_cached(self.cached_high_bits);
self.recompute_value();
return true;
}
let next_pos = self.local_high_pos + 1;
let mut word_idx = next_pos / 64;
if word_idx >= self.cached_high_bits.len() { return false; }
let bit_in_word = next_pos % 64;
let mut word = self.cached_high_bits[word_idx] >> bit_in_word;
if word != 0 {
self.local_high_pos = word_idx * 64 + bit_in_word + word.trailing_zeros() as usize;
} else {
loop {
word_idx += 1;
if word_idx >= self.cached_high_bits.len() { return false; }
word = self.cached_high_bits[word_idx];
if word != 0 {
self.local_high_pos = word_idx * 64 + word.trailing_zeros() as usize;
break;
}
}
}
self.recompute_value();
true
}
#[inline]
pub fn advance_to_geq(&mut self, target: u64) -> bool {
if self.global_idx >= self.pef.len { return false; }
if self.cached_value >= target { return true; }
let num_chunks = self.pef.meta.len();
if target <= self.pef.chunk_upper_bounds[self.chunk_idx] {
let target_delta = target - self.cached_min_value;
let view = ChunkView {
low_bits: self.cached_low_bits,
high_bits: self.cached_high_bits,
low_bit_width: self.cached_low_bit_width,
count: self.cached_count,
min_value: self.cached_min_value,
high_len_bits: self.pef.meta[self.chunk_idx].high_len_bits as usize,
};
if let Some((local_idx, delta, hp)) = chunk_scan_geq(&view, target_delta, self.local_idx, self.local_high_pos) {
self.local_idx = local_idx;
self.global_idx = self.chunk_idx * PEF_CHUNK_SIZE + local_idx;
self.local_high_pos = hp;
self.cached_value = self.cached_min_value + delta;
return true;
}
}
let mut lo = self.chunk_idx;
let mut step = 1usize;
let target_chunk = loop {
let hi = (lo + step).min(num_chunks - 1);
if self.pef.chunk_upper_bounds[hi] >= target {
let s = lo + 1;
if s > hi {
break hi;
}
break s + self.pef.chunk_upper_bounds[s..=hi]
.partition_point(|&x| x < target);
}
if hi == num_chunks - 1 {
self.global_idx = self.pef.len;
return false;
}
lo = hi;
step *= 2;
};
self.chunk_idx = target_chunk;
self.refresh_chunk_cache();
let global_offset = target_chunk * PEF_CHUNK_SIZE;
if target <= self.cached_min_value {
self.local_idx = 0;
self.global_idx = global_offset;
self.local_high_pos = chunk_first_one_cached(self.cached_high_bits);
self.recompute_value();
return true;
}
let view = ChunkView {
low_bits: self.cached_low_bits, high_bits: self.cached_high_bits,
low_bit_width: self.cached_low_bit_width, count: self.cached_count,
min_value: self.cached_min_value,
high_len_bits: self.pef.meta[self.chunk_idx].high_len_bits as usize,
};
let target_delta = target - self.cached_min_value;
let target_high = (target_delta >> view.low_bit_width) as usize;
let (si, sp) = chunk_skip_to_high(&view, target_high);
if let Some((local_idx, delta, hp)) = chunk_scan_geq(&view, target_delta, si, sp) {
self.local_idx = local_idx;
self.global_idx = global_offset + local_idx;
self.local_high_pos = hp;
self.cached_value = self.cached_min_value + delta;
return true;
}
let next = target_chunk + 1;
if next >= num_chunks {
self.global_idx = self.pef.len;
return false;
}
self.chunk_idx = next;
self.refresh_chunk_cache();
self.local_idx = 0;
self.global_idx = next * PEF_CHUNK_SIZE;
self.local_high_pos = chunk_first_one_cached(self.cached_high_bits);
self.recompute_value();
true
}
#[inline]
pub fn advance_to_index(&mut self, idx: usize) -> bool {
if idx >= self.pef.len { return false; }
if idx == self.global_idx { return true; }
let chunk_idx = idx / PEF_CHUNK_SIZE;
let local_idx = idx % PEF_CHUNK_SIZE;
if chunk_idx != self.chunk_idx {
self.chunk_idx = chunk_idx;
self.refresh_chunk_cache();
}
self.local_idx = local_idx;
self.global_idx = idx;
self.local_high_pos = Self::find_nth_one(self.cached_high_bits, local_idx);
self.recompute_value();
true
}
#[inline]
fn find_nth_one(high_bits: &[u64], n: usize) -> usize {
let mut remaining = n;
for (word_idx, &word) in high_bits.iter().enumerate() {
let ones = word.count_ones() as usize;
if remaining < ones {
return word_idx * 64 + select_in_word(word, remaining);
}
remaining -= ones;
}
0 }
pub fn reset(&mut self) {
*self = Self::new(self.pef);
}
}
const BATCH_SIZE: usize = 8;
pub struct EliasFanoBatchCursor<'a> {
ef: &'a EliasFano,
buffer: [u64; BATCH_SIZE],
buf_pos: usize,
buf_len: usize,
next_elem: usize,
high_pos: usize,
}
impl<'a> EliasFanoBatchCursor<'a> {
fn new(ef: &'a EliasFano) -> Self {
let mut cursor = Self {
ef,
buffer: [0; BATCH_SIZE],
buf_pos: 0,
buf_len: 0,
next_elem: 0,
high_pos: 0,
};
if !ef.is_empty() {
cursor.refill();
}
cursor
}
#[inline]
pub fn current(&self) -> Option<u64> {
if self.buf_pos < self.buf_len {
Some(self.buffer[self.buf_pos])
} else {
None
}
}
#[inline]
pub fn index(&self) -> usize {
self.next_elem - self.buf_len + self.buf_pos
}
#[inline]
pub fn is_exhausted(&self) -> bool {
self.buf_pos >= self.buf_len && self.next_elem >= self.ef.len
}
#[inline]
pub fn advance(&mut self) -> bool {
self.buf_pos += 1;
if self.buf_pos < self.buf_len {
return true;
}
if self.next_elem >= self.ef.len {
return false;
}
self.refill();
self.buf_len > 0
}
fn refill(&mut self) {
let count = (self.ef.len - self.next_elem).min(BATCH_SIZE);
if count == 0 {
self.buf_len = 0;
return;
}
let start_elem = self.next_elem;
let lbw = self.ef.low_bit_width;
let mut bit_pos = self.high_pos;
let mut word_idx = bit_pos / 64;
let start_bit = bit_pos % 64;
let mut word = if word_idx < self.ef.high_bits.len() {
self.ef.high_bits[word_idx] >> start_bit
} else {
0
};
bit_pos = word_idx * 64 + start_bit;
let low_mask = if lbw == 0 { 0u64 } else { (1u64 << lbw) - 1 };
let mut low_bit_pos = start_elem as u64 * lbw as u64;
let mut low_word_idx = (low_bit_pos / 64) as usize;
let mut low_bit_idx = (low_bit_pos % 64) as u32;
let mut low_word = if lbw > 0 && low_word_idx < self.ef.low_bits.len() {
self.ef.low_bits[low_word_idx]
} else { 0 };
let mut low_word_next = if lbw > 0 && low_word_idx + 1 < self.ef.low_bits.len() {
self.ef.low_bits[low_word_idx + 1]
} else { 0 };
for k in 0..count {
let idx = start_elem + k;
while word == 0 {
word_idx += 1;
word = self.ef.high_bits[word_idx];
bit_pos = word_idx * 64;
}
let tz = word.trailing_zeros() as usize;
bit_pos += tz;
word >>= tz;
let high_val = (bit_pos - idx) as u64;
let low = if lbw == 0 {
0u64
} else {
let mut val = low_word >> low_bit_idx;
if low_bit_idx + lbw > 64 {
val |= low_word_next << (64 - low_bit_idx);
}
val & low_mask
};
self.buffer[k] = (high_val << lbw) | low;
bit_pos += 1;
word >>= 1;
if lbw > 0 {
low_bit_idx += lbw;
if low_bit_idx >= 64 {
low_bit_idx -= 64;
low_word_idx += 1;
low_word = low_word_next;
low_word_next = if low_word_idx + 1 < self.ef.low_bits.len() {
self.ef.low_bits[low_word_idx + 1]
} else { 0 };
}
}
}
self.buf_pos = 0;
self.buf_len = count;
self.next_elem = start_elem + count;
self.high_pos = bit_pos;
}
pub fn reset(&mut self) {
*self = Self::new(self.ef);
}
}
impl EliasFano {
#[inline]
pub fn batch_cursor(&self) -> EliasFanoBatchCursor<'_> {
EliasFanoBatchCursor::new(self)
}
}
pub struct PartitionedEliasFanoBatchCursor<'a> {
pef: &'a PartitionedEliasFano,
buffer: [u64; BATCH_SIZE],
buf_pos: usize,
buf_len: usize,
next_elem: usize,
chunk_idx: usize,
local_idx: usize,
local_high_pos: usize,
}
impl<'a> PartitionedEliasFanoBatchCursor<'a> {
fn new(pef: &'a PartitionedEliasFano) -> Self {
let mut cursor = Self {
pef,
buffer: [0; BATCH_SIZE],
buf_pos: 0,
buf_len: 0,
next_elem: 0,
chunk_idx: 0,
local_idx: 0,
local_high_pos: 0,
};
if !pef.is_empty() {
cursor.refill();
}
cursor
}
#[inline]
pub fn current(&self) -> Option<u64> {
if self.buf_pos < self.buf_len {
Some(self.buffer[self.buf_pos])
} else {
None
}
}
#[inline]
pub fn index(&self) -> usize {
self.next_elem - self.buf_len + self.buf_pos
}
#[inline]
pub fn is_exhausted(&self) -> bool {
self.buf_pos >= self.buf_len && self.next_elem >= self.pef.len
}
#[inline]
pub fn advance(&mut self) -> bool {
self.buf_pos += 1;
if self.buf_pos < self.buf_len {
return true;
}
if self.next_elem >= self.pef.len {
return false;
}
self.refill();
self.buf_len > 0
}
fn refill(&mut self) {
let remaining = self.pef.len - self.next_elem;
if remaining == 0 {
self.buf_len = 0;
return;
}
let count = remaining.min(BATCH_SIZE);
let mut filled = 0;
while filled < count {
if self.chunk_idx >= self.pef.meta.len() {
break;
}
let view = self.pef.chunk_view(self.chunk_idx);
if self.local_idx == 0 {
self.local_high_pos = 0;
for (wi, &w) in view.high_bits.iter().enumerate() {
if w != 0 {
self.local_high_pos = wi * 64 + w.trailing_zeros() as usize;
break;
}
}
}
let chunk_remaining = view.count - self.local_idx;
let to_decode = (count - filled).min(chunk_remaining);
let lbw = view.low_bit_width;
let low_mask = if lbw == 0 { 0u64 } else { (1u64 << lbw) - 1 };
let mut bit_pos = self.local_high_pos;
let mut word_idx = bit_pos / 64;
let start_bit = bit_pos % 64;
let mut word = if word_idx < view.high_bits.len() {
view.high_bits[word_idx] >> start_bit
} else {
0
};
bit_pos = word_idx * 64 + start_bit;
let mut low_bit_pos = self.local_idx as u64 * lbw as u64;
let mut low_word_idx = (low_bit_pos / 64) as usize;
let mut low_bit_idx = (low_bit_pos % 64) as u32;
let mut low_word = if lbw > 0 && low_word_idx < view.low_bits.len() {
view.low_bits[low_word_idx]
} else { 0 };
let mut low_word_next = if lbw > 0 && low_word_idx + 1 < view.low_bits.len() {
view.low_bits[low_word_idx + 1]
} else { 0 };
for k in 0..to_decode {
let li = self.local_idx + k;
while word == 0 {
word_idx += 1;
if word_idx >= view.high_bits.len() { break; }
word = view.high_bits[word_idx];
bit_pos = word_idx * 64;
}
let tz = word.trailing_zeros() as usize;
bit_pos += tz;
word >>= tz;
let high_val = (bit_pos - li) as u64;
let low = if lbw == 0 {
0u64
} else {
let mut val = low_word >> low_bit_idx;
if low_bit_idx + lbw > 64 {
val |= low_word_next << (64 - low_bit_idx);
}
val & low_mask
};
let delta = (high_val << lbw) | low;
self.buffer[filled + k] = view.min_value + delta;
bit_pos += 1;
word >>= 1;
if lbw > 0 {
low_bit_idx += lbw;
if low_bit_idx >= 64 {
low_bit_idx -= 64;
low_word_idx += 1;
low_word = low_word_next;
low_word_next = if low_word_idx + 1 < view.low_bits.len() {
view.low_bits[low_word_idx + 1]
} else { 0 };
}
}
}
self.local_idx += to_decode;
self.local_high_pos = bit_pos;
filled += to_decode;
if self.local_idx >= view.count {
self.chunk_idx += 1;
self.local_idx = 0;
self.local_high_pos = 0;
}
}
self.buf_pos = 0;
self.buf_len = filled;
self.next_elem += filled;
}
pub fn reset(&mut self) {
*self = Self::new(self.pef);
}
}
impl PartitionedEliasFano {
#[inline]
pub fn batch_cursor(&self) -> PartitionedEliasFanoBatchCursor<'_> {
PartitionedEliasFanoBatchCursor::new(self)
}
}
const MIN_CHUNK_SIZE: usize = 32;
const MAX_CHUNK_SIZE: usize = 512;
const CHUNK_OVERHEAD_BITS: usize = 256;
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct OptimalPartitionedEliasFano {
all_low_bits: Vec<u64>,
all_high_bits: Vec<u64>,
meta: Vec<PefChunkMeta>,
chunk_starts: Vec<usize>,
chunk_upper_bounds: Vec<u64>,
len: usize,
universe: u64,
}
impl OptimalPartitionedEliasFano {
pub fn from_sorted(values: &[u32]) -> Self {
if values.is_empty() {
return Self {
all_low_bits: Vec::new(), all_high_bits: Vec::new(),
meta: Vec::new(), chunk_starts: Vec::new(),
chunk_upper_bounds: Vec::new(), len: 0, universe: 0,
};
}
let universe = values[values.len() - 1] as u64 + 1;
Self::from_sorted_impl(values.len(), universe, |i| values[i] as u64)
}
pub fn from_sorted_u64(values: &[u64]) -> Self {
if values.is_empty() {
return Self {
all_low_bits: Vec::new(), all_high_bits: Vec::new(),
meta: Vec::new(), chunk_starts: Vec::new(),
chunk_upper_bounds: Vec::new(), len: 0, universe: 0,
};
}
let universe = values[values.len() - 1] + 1;
Self::from_sorted_impl(values.len(), universe, |i| values[i])
}
#[inline]
fn chunk_cost(n: usize, local_universe: u64) -> usize {
if n == 0 { return 0; }
let low_bit_width = if n as u64 >= local_universe {
0u32
} else {
(64 - (local_universe / n as u64).leading_zeros()).saturating_sub(1)
};
let low_bits = n * low_bit_width as usize;
let max_high = if low_bit_width == 0 {
local_universe.saturating_sub(1) as usize
} else {
(local_universe.saturating_sub(1) >> low_bit_width) as usize
};
let high_bits = n + max_high + 1;
low_bits + high_bits + CHUNK_OVERHEAD_BITS
}
fn from_sorted_impl(n: usize, universe: u64, get_val: impl Fn(usize) -> u64) -> Self {
let mut dp = vec![usize::MAX; n + 1];
let mut back = vec![0usize; n + 1];
dp[0] = 0;
for i in 1..=n {
let max_j_start = if i > MAX_CHUNK_SIZE { i - MAX_CHUNK_SIZE } else { 0 };
let min_j_end = if i >= MIN_CHUNK_SIZE { i - MIN_CHUNK_SIZE + 1 } else { 0 };
let j_start = max_j_start;
let j_end = if i == n {
i
} else {
min_j_end
};
for j in j_start..j_end {
let chunk_n = i - j;
if chunk_n < 1 { continue; }
if i < n && chunk_n < MIN_CHUNK_SIZE { continue; }
if chunk_n > MAX_CHUNK_SIZE { continue; }
if dp[j] == usize::MAX { continue; }
let min_val = get_val(j);
let max_val = get_val(i - 1);
let local_u = max_val - min_val + 1;
let cost = dp[j] + Self::chunk_cost(chunk_n, local_u);
if cost < dp[i] {
dp[i] = cost;
back[i] = j;
}
}
}
let mut partition_points = Vec::new();
let mut pos = n;
while pos > 0 {
partition_points.push(pos);
pos = back[pos];
}
partition_points.reverse();
let num_chunks = partition_points.len();
let mut all_low_bits = Vec::new();
let mut all_high_bits = Vec::new();
let mut meta = Vec::with_capacity(num_chunks);
let mut chunk_starts = Vec::with_capacity(num_chunks);
let mut chunk_upper_bounds = Vec::with_capacity(num_chunks);
let mut start = 0;
for &end in &partition_points {
let count = end - start;
let min_val = get_val(start);
let max_val = get_val(end - 1);
let local_universe = max_val - min_val + 1;
let low_bit_width = if count as u64 >= local_universe {
0
} else {
(64 - (local_universe / count as u64).leading_zeros()).saturating_sub(1)
};
let low_mask = if low_bit_width == 0 { 0u64 } else { (1u64 << low_bit_width) - 1 };
let total_low_bits = count as u64 * low_bit_width as u64;
let low_words = ((total_low_bits + 63) / 64) as usize;
let low_offset = all_low_bits.len();
all_low_bits.resize(low_offset + low_words, 0);
for i in 0..count {
if low_bit_width > 0 {
let delta = get_val(start + i) - min_val;
let low_val = delta & low_mask;
let bit_pos = i as u64 * low_bit_width as u64;
let word_idx = (bit_pos / 64) as usize;
let bit_idx = (bit_pos % 64) as u32;
all_low_bits[low_offset + word_idx] |= low_val << bit_idx;
if bit_idx + low_bit_width > 64 && word_idx + 1 < low_words {
all_low_bits[low_offset + word_idx + 1] |= low_val >> (64 - bit_idx);
}
}
}
let last_delta = max_val - min_val;
let max_high = last_delta >> low_bit_width;
let high_len_bits = count + max_high as usize + 1;
let high_words = (high_len_bits + 63) / 64;
let high_offset = all_high_bits.len();
all_high_bits.resize(high_offset + high_words, 0);
let mut hpos = 0usize;
let mut prev_high = 0u64;
for i in 0..count {
let delta = get_val(start + i) - min_val;
let high = delta >> low_bit_width;
hpos += (high - prev_high) as usize;
let word_idx = hpos / 64;
let bit_idx = hpos % 64;
if word_idx < high_words {
all_high_bits[high_offset + word_idx] |= 1u64 << bit_idx;
}
hpos += 1;
prev_high = high;
}
chunk_starts.push(start);
chunk_upper_bounds.push(max_val);
meta.push(PefChunkMeta {
min_value: min_val,
low_offset: low_offset as u32,
high_offset: high_offset as u32,
count: count as u16,
high_len_bits: high_len_bits as u16,
high_words: high_words as u16,
low_words: low_words as u16,
low_bit_width: low_bit_width as u8,
});
start = end;
}
all_low_bits.push(0);
Self { all_low_bits, all_high_bits, meta, chunk_starts, chunk_upper_bounds, len: n, universe }
}
#[inline]
pub fn len(&self) -> usize { self.len }
#[inline]
pub fn is_empty(&self) -> bool { self.len == 0 }
pub fn size_bytes(&self) -> usize {
self.all_low_bits.len() * 8
+ self.all_high_bits.len() * 8
+ self.meta.len() * std::mem::size_of::<PefChunkMeta>()
+ self.chunk_starts.len() * 8
+ self.chunk_upper_bounds.len() * 8
+ 56 }
#[inline]
pub fn bits_per_element(&self) -> f64 {
if self.len == 0 { return 0.0; }
(self.size_bytes() * 8) as f64 / self.len as f64
}
pub fn get(&self, index: usize) -> Option<u64> {
if index >= self.len { return None; }
let chunk_idx = match self.chunk_starts.binary_search(&index) {
Ok(i) => i,
Err(i) => i - 1,
};
let view = self.chunk_view(chunk_idx);
let local_idx = index - self.chunk_starts[chunk_idx];
let delta = chunk_get_delta(&view, local_idx);
Some(view.min_value + delta)
}
#[inline]
pub fn next_geq(&self, target: u64) -> Option<(usize, u64)> {
if self.len == 0 || target >= self.universe { return None; }
let chunk_idx = match self.chunk_upper_bounds.binary_search(&target) {
Ok(i) => i,
Err(i) => {
if i >= self.meta.len() { return None; }
i
}
};
let view = self.chunk_view(chunk_idx);
let global_offset = self.chunk_starts[chunk_idx];
if target <= view.min_value {
let delta = chunk_get_delta(&view, 0);
return Some((global_offset, view.min_value + delta));
}
let target_delta = target - view.min_value;
let target_high = (target_delta >> view.low_bit_width) as usize;
let (start_idx, start_pos) = chunk_skip_to_high(&view, target_high);
if let Some((local_idx, delta, _)) = chunk_scan_geq(&view, target_delta, start_idx, start_pos) {
return Some((global_offset + local_idx, view.min_value + delta));
}
let next = chunk_idx + 1;
if next < self.meta.len() {
let nv = self.chunk_view(next);
let delta = chunk_get_delta(&nv, 0);
Some((self.chunk_starts[next], nv.min_value + delta))
} else {
None
}
}
pub fn iter(&self) -> OptimalPefIter<'_> {
if self.is_empty() {
return OptimalPefIter {
opef: self,
chunk_idx: 0,
local_idx: 0,
local_high_pos: 0,
cached_high_bits: &[],
cached_low_bits: &[],
cached_low_bit_width: 0,
cached_count: 0,
cached_min_value: 0,
};
}
let view = self.chunk_view(0);
OptimalPefIter {
opef: self,
chunk_idx: 0,
local_idx: 0,
local_high_pos: 0,
cached_high_bits: view.high_bits,
cached_low_bits: view.low_bits,
cached_low_bit_width: view.low_bit_width,
cached_count: view.count,
cached_min_value: view.min_value,
}
}
#[inline]
pub fn cursor(&self) -> OptimalPefCursor<'_> {
OptimalPefCursor::new(self)
}
#[inline]
pub fn batch_cursor(&self) -> OptimalPefBatchCursor<'_> {
OptimalPefBatchCursor::new(self)
}
#[inline]
fn chunk_view(&self, idx: usize) -> ChunkView<'_> {
let m = &self.meta[idx];
let low_start = m.low_offset as usize;
let low_end = (low_start + m.low_words as usize + 1).min(self.all_low_bits.len());
let high_start = m.high_offset as usize;
ChunkView {
low_bits: &self.all_low_bits[low_start..low_end],
high_bits: &self.all_high_bits[high_start..high_start + m.high_words as usize],
low_bit_width: m.low_bit_width as u32,
count: m.count as usize,
min_value: m.min_value,
high_len_bits: m.high_len_bits as usize,
}
}
}
pub struct OptimalPefIter<'a> {
opef: &'a OptimalPartitionedEliasFano,
chunk_idx: usize,
local_idx: usize,
local_high_pos: usize,
cached_high_bits: &'a [u64],
cached_low_bits: &'a [u64],
cached_low_bit_width: u32,
cached_count: usize,
cached_min_value: u64,
}
impl<'a> OptimalPefIter<'a> {
#[inline]
fn refresh_chunk_cache(&mut self) {
let view = self.opef.chunk_view(self.chunk_idx);
self.cached_high_bits = view.high_bits;
self.cached_low_bits = view.low_bits;
self.cached_low_bit_width = view.low_bit_width;
self.cached_count = view.count;
self.cached_min_value = view.min_value;
}
}
impl<'a> Iterator for OptimalPefIter<'a> {
type Item = u64;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.chunk_idx >= self.opef.meta.len() { return None; }
if self.local_idx >= self.cached_count {
self.chunk_idx += 1;
self.local_idx = 0;
self.local_high_pos = 0;
if self.chunk_idx >= self.opef.meta.len() { return None; }
self.refresh_chunk_cache();
return self.next();
}
let next_pos = self.local_high_pos;
let mut word_idx = next_pos / 64;
if word_idx >= self.cached_high_bits.len() { return None; }
let bit_offset = next_pos % 64;
let mut word = self.cached_high_bits[word_idx] >> bit_offset;
if word != 0 {
let tz = word.trailing_zeros() as usize;
self.local_high_pos = word_idx * 64 + bit_offset + tz;
} else {
loop {
word_idx += 1;
if word_idx >= self.cached_high_bits.len() { return None; }
word = self.cached_high_bits[word_idx];
if word != 0 {
self.local_high_pos = word_idx * 64 + word.trailing_zeros() as usize;
break;
}
}
}
let high_val = (self.local_high_pos - self.local_idx) as u64;
let low = if self.cached_low_bit_width == 0 {
0
} else {
let lbw = self.cached_low_bit_width as u64;
let bit_pos = self.local_idx as u64 * lbw;
let w_idx = (bit_pos / 64) as usize;
let b_idx = (bit_pos % 64) as u32;
let w0 = self.cached_low_bits[w_idx];
let w1 = self.cached_low_bits[w_idx + 1]; let combined = w0 as u128 | ((w1 as u128) << 64);
(combined >> b_idx) as u64 & ((1u64 << lbw) - 1)
};
let delta = (high_val << self.cached_low_bit_width) | low;
let val = self.cached_min_value + delta;
self.local_idx += 1;
self.local_high_pos += 1;
Some(val)
}
fn size_hint(&self) -> (usize, Option<usize>) {
let consumed: usize = if self.chunk_idx < self.opef.meta.len() {
self.opef.chunk_starts[self.chunk_idx] + self.local_idx
} else {
self.opef.len
};
let remaining = self.opef.len.saturating_sub(consumed);
(remaining, Some(remaining))
}
}
impl<'a> ExactSizeIterator for OptimalPefIter<'a> {}
pub struct OptimalPefCursor<'a> {
opef: &'a OptimalPartitionedEliasFano,
chunk_idx: usize,
local_idx: usize,
local_high_pos: usize,
global_idx: usize,
cached_value: u64,
cached_high_bits: &'a [u64],
cached_low_bits: &'a [u64],
cached_low_bit_width: u32,
cached_count: usize,
cached_min_value: u64,
}
impl<'a> OptimalPefCursor<'a> {
fn new(opef: &'a OptimalPartitionedEliasFano) -> Self {
if opef.is_empty() {
return Self {
opef, chunk_idx: 0, local_idx: 0, local_high_pos: 0, global_idx: 0,
cached_value: 0,
cached_high_bits: &[], cached_low_bits: &[],
cached_low_bit_width: 0, cached_count: 0, cached_min_value: 0,
};
}
let view = opef.chunk_view(0);
let mut high_pos = 0;
for (word_idx, &word) in view.high_bits.iter().enumerate() {
if word != 0 {
high_pos = word_idx * 64 + word.trailing_zeros() as usize;
break;
}
}
let high_val = high_pos as u64;
let low = chunk_get_low(&view, 0);
let initial_val = view.min_value + (high_val << view.low_bit_width) + low;
Self {
opef, chunk_idx: 0, local_idx: 0, local_high_pos: high_pos, global_idx: 0,
cached_value: initial_val,
cached_high_bits: view.high_bits, cached_low_bits: view.low_bits,
cached_low_bit_width: view.low_bit_width, cached_count: view.count,
cached_min_value: view.min_value,
}
}
#[inline]
fn refresh_chunk_cache(&mut self) {
let view = self.opef.chunk_view(self.chunk_idx);
self.cached_high_bits = view.high_bits;
self.cached_low_bits = view.low_bits;
self.cached_low_bit_width = view.low_bit_width;
self.cached_count = view.count;
self.cached_min_value = view.min_value;
}
#[inline]
fn get_low_cached(&self, local_idx: usize) -> u64 {
if self.cached_low_bit_width == 0 { return 0; }
let lbw = self.cached_low_bit_width as u64;
let bit_pos = local_idx as u64 * lbw;
let w_idx = (bit_pos / 64) as usize;
let b_idx = (bit_pos % 64) as u32;
let w0 = self.cached_low_bits[w_idx];
let w1 = self.cached_low_bits[w_idx + 1];
let combined = w0 as u128 | ((w1 as u128) << 64);
(combined >> b_idx) as u64 & ((1u64 << lbw) - 1)
}
#[inline]
fn recompute_value(&mut self) {
let high_val = (self.local_high_pos - self.local_idx) as u64;
let low = self.get_low_cached(self.local_idx);
self.cached_value = self.cached_min_value + (high_val << self.cached_low_bit_width) + low;
}
#[inline]
fn cached_view(&self) -> ChunkView<'a> {
ChunkView {
low_bits: self.cached_low_bits,
high_bits: self.cached_high_bits,
low_bit_width: self.cached_low_bit_width,
count: self.cached_count,
min_value: self.cached_min_value,
high_len_bits: self.opef.meta[self.chunk_idx].high_len_bits as usize,
}
}
#[inline]
pub fn current(&self) -> Option<u64> {
if self.global_idx >= self.opef.len { None } else { Some(self.cached_value) }
}
#[inline]
pub fn index(&self) -> usize { self.global_idx }
#[inline]
pub fn is_exhausted(&self) -> bool { self.global_idx >= self.opef.len }
#[inline]
pub fn advance(&mut self) -> bool {
self.global_idx += 1;
if self.global_idx >= self.opef.len { return false; }
self.local_idx += 1;
if self.local_idx >= self.cached_count {
self.chunk_idx += 1;
self.local_idx = 0;
if self.chunk_idx >= self.opef.meta.len() { return false; }
self.refresh_chunk_cache();
self.local_high_pos = chunk_first_one_cached(self.cached_high_bits);
self.recompute_value();
return true;
}
let next_pos = self.local_high_pos + 1;
let mut word_idx = next_pos / 64;
if word_idx >= self.cached_high_bits.len() { return false; }
let bit_in_word = next_pos % 64;
let mut word = self.cached_high_bits[word_idx] >> bit_in_word;
if word != 0 {
self.local_high_pos = word_idx * 64 + bit_in_word + word.trailing_zeros() as usize;
} else {
loop {
word_idx += 1;
if word_idx >= self.cached_high_bits.len() { return false; }
word = self.cached_high_bits[word_idx];
if word != 0 {
self.local_high_pos = word_idx * 64 + word.trailing_zeros() as usize;
break;
}
}
}
self.recompute_value();
true
}
#[inline]
pub fn advance_to_geq(&mut self, target: u64) -> bool {
if self.global_idx >= self.opef.len { return false; }
if self.cached_value >= target { return true; }
let num_chunks = self.opef.meta.len();
if target <= self.opef.chunk_upper_bounds[self.chunk_idx] {
let target_delta = target - self.cached_min_value;
let view = self.cached_view();
if let Some((local_idx, delta, hp)) = chunk_scan_geq(&view, target_delta, self.local_idx, self.local_high_pos) {
self.local_idx = local_idx;
self.global_idx = self.opef.chunk_starts[self.chunk_idx] + local_idx;
self.local_high_pos = hp;
self.cached_value = self.cached_min_value + delta;
return true;
}
}
let mut lo = self.chunk_idx;
let mut step = 1usize;
let target_chunk = loop {
let hi = (lo + step).min(num_chunks - 1);
if self.opef.chunk_upper_bounds[hi] >= target {
let s = lo + 1;
if s > hi {
break hi;
}
break s + self.opef.chunk_upper_bounds[s..=hi]
.partition_point(|&x| x < target);
}
if hi == num_chunks - 1 {
self.global_idx = self.opef.len;
return false;
}
lo = hi;
step *= 2;
};
self.chunk_idx = target_chunk;
self.refresh_chunk_cache();
let global_offset = self.opef.chunk_starts[target_chunk];
if target <= self.cached_min_value {
self.local_idx = 0;
self.global_idx = global_offset;
self.local_high_pos = chunk_first_one_cached(self.cached_high_bits);
self.recompute_value();
return true;
}
let view = self.cached_view();
let target_delta = target - self.cached_min_value;
let target_high = (target_delta >> view.low_bit_width) as usize;
let (si, sp) = chunk_skip_to_high(&view, target_high);
if let Some((local_idx, delta, hp)) = chunk_scan_geq(&view, target_delta, si, sp) {
self.local_idx = local_idx;
self.global_idx = global_offset + local_idx;
self.local_high_pos = hp;
self.cached_value = self.cached_min_value + delta;
return true;
}
let next = target_chunk + 1;
if next >= num_chunks {
self.global_idx = self.opef.len;
return false;
}
self.chunk_idx = next;
self.refresh_chunk_cache();
self.local_idx = 0;
self.global_idx = self.opef.chunk_starts[next];
self.local_high_pos = chunk_first_one_cached(self.cached_high_bits);
self.recompute_value();
true
}
#[inline]
pub fn advance_to_index(&mut self, idx: usize) -> bool {
if idx >= self.opef.len { return false; }
if idx == self.global_idx { return true; }
let chunk_idx = self.opef.chunk_starts.partition_point(|&s| s <= idx) - 1;
let local_idx = idx - self.opef.chunk_starts[chunk_idx];
if chunk_idx != self.chunk_idx {
self.chunk_idx = chunk_idx;
self.refresh_chunk_cache();
}
self.local_idx = local_idx;
self.global_idx = idx;
self.local_high_pos = Self::find_nth_one(self.cached_high_bits, local_idx);
self.recompute_value();
true
}
#[inline]
fn find_nth_one(high_bits: &[u64], n: usize) -> usize {
let mut remaining = n;
for (word_idx, &word) in high_bits.iter().enumerate() {
let ones = word.count_ones() as usize;
if remaining < ones {
return word_idx * 64 + select_in_word(word, remaining);
}
remaining -= ones;
}
0
}
pub fn reset(&mut self) {
*self = Self::new(self.opef);
}
}
pub struct OptimalPefBatchCursor<'a> {
opef: &'a OptimalPartitionedEliasFano,
buffer: [u64; BATCH_SIZE],
buf_pos: usize,
buf_len: usize,
next_elem: usize,
chunk_idx: usize,
local_idx: usize,
local_high_pos: usize,
}
impl<'a> OptimalPefBatchCursor<'a> {
fn new(opef: &'a OptimalPartitionedEliasFano) -> Self {
let mut cursor = Self {
opef,
buffer: [0; BATCH_SIZE],
buf_pos: 0,
buf_len: 0,
next_elem: 0,
chunk_idx: 0,
local_idx: 0,
local_high_pos: 0,
};
if !opef.is_empty() {
cursor.refill();
}
cursor
}
#[inline]
pub fn current(&self) -> Option<u64> {
if self.buf_pos < self.buf_len {
Some(self.buffer[self.buf_pos])
} else {
None
}
}
#[inline]
pub fn index(&self) -> usize {
self.next_elem - self.buf_len + self.buf_pos
}
#[inline]
pub fn is_exhausted(&self) -> bool {
self.buf_pos >= self.buf_len && self.next_elem >= self.opef.len
}
#[inline]
pub fn advance(&mut self) -> bool {
self.buf_pos += 1;
if self.buf_pos < self.buf_len {
return true;
}
if self.next_elem >= self.opef.len {
return false;
}
self.refill();
self.buf_len > 0
}
fn refill(&mut self) {
let remaining = self.opef.len - self.next_elem;
if remaining == 0 {
self.buf_len = 0;
return;
}
let count = remaining.min(BATCH_SIZE);
let mut filled = 0;
while filled < count {
if self.chunk_idx >= self.opef.meta.len() { break; }
let view = self.opef.chunk_view(self.chunk_idx);
if self.local_idx == 0 {
self.local_high_pos = 0;
for (wi, &w) in view.high_bits.iter().enumerate() {
if w != 0 {
self.local_high_pos = wi * 64 + w.trailing_zeros() as usize;
break;
}
}
}
let chunk_remaining = view.count - self.local_idx;
let to_decode = (count - filled).min(chunk_remaining);
let lbw = view.low_bit_width;
let low_mask = if lbw == 0 { 0u64 } else { (1u64 << lbw) - 1 };
let mut bit_pos = self.local_high_pos;
let mut word_idx = bit_pos / 64;
let start_bit = bit_pos % 64;
let mut word = if word_idx < view.high_bits.len() {
view.high_bits[word_idx] >> start_bit
} else {
0
};
bit_pos = word_idx * 64 + start_bit;
let mut low_bit_pos = self.local_idx as u64 * lbw as u64;
let mut low_word_idx = (low_bit_pos / 64) as usize;
let mut low_bit_idx = (low_bit_pos % 64) as u32;
let mut low_word = if lbw > 0 && low_word_idx < view.low_bits.len() {
view.low_bits[low_word_idx]
} else { 0 };
let mut low_word_next = if lbw > 0 && low_word_idx + 1 < view.low_bits.len() {
view.low_bits[low_word_idx + 1]
} else { 0 };
for k in 0..to_decode {
let li = self.local_idx + k;
while word == 0 {
word_idx += 1;
if word_idx >= view.high_bits.len() { break; }
word = view.high_bits[word_idx];
bit_pos = word_idx * 64;
}
let tz = word.trailing_zeros() as usize;
bit_pos += tz;
word >>= tz;
let high_val = (bit_pos - li) as u64;
let low = if lbw == 0 {
0u64
} else {
let mut val = low_word >> low_bit_idx;
if low_bit_idx + lbw > 64 {
val |= low_word_next << (64 - low_bit_idx);
}
val & low_mask
};
let delta = (high_val << lbw) | low;
self.buffer[filled + k] = view.min_value + delta;
bit_pos += 1;
word >>= 1;
if lbw > 0 {
low_bit_idx += lbw;
if low_bit_idx >= 64 {
low_bit_idx -= 64;
low_word_idx += 1;
low_word = low_word_next;
low_word_next = if low_word_idx + 1 < view.low_bits.len() {
view.low_bits[low_word_idx + 1]
} else { 0 };
}
}
}
self.local_idx += to_decode;
self.local_high_pos = bit_pos;
filled += to_decode;
if self.local_idx >= view.count {
self.chunk_idx += 1;
self.local_idx = 0;
self.local_high_pos = 0;
}
}
self.buf_pos = 0;
self.buf_len = filled;
self.next_elem += filled;
}
pub fn reset(&mut self) {
*self = Self::new(self.opef);
}
}
const DENSE_THRESHOLD: usize = 64;
const PARTITION_THRESHOLD: usize = 256;
const OPTIMAL_THRESHOLD: usize = 4096;
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum HybridPostingList {
Dense(Vec<u32>),
EliasFano(EliasFano),
Partitioned(PartitionedEliasFano),
Optimal(OptimalPartitionedEliasFano),
}
impl HybridPostingList {
pub fn from_sorted(values: &[u32]) -> Self {
let n = values.len();
if n <= DENSE_THRESHOLD {
Self::Dense(values.to_vec())
} else if n <= PARTITION_THRESHOLD {
Self::EliasFano(EliasFano::from_sorted(values))
} else if n <= OPTIMAL_THRESHOLD {
Self::Partitioned(PartitionedEliasFano::from_sorted(values))
} else {
Self::Optimal(OptimalPartitionedEliasFano::from_sorted(values))
}
}
pub fn from_sorted_u64(values: &[u64]) -> Self {
let n = values.len();
if n <= DENSE_THRESHOLD {
Self::Dense(values.iter().map(|&v| v as u32).collect())
} else if n <= PARTITION_THRESHOLD {
Self::EliasFano(EliasFano::from_sorted_u64(values))
} else if n <= OPTIMAL_THRESHOLD {
Self::Partitioned(PartitionedEliasFano::from_sorted_u64(values))
} else {
Self::Optimal(OptimalPartitionedEliasFano::from_sorted_u64(values))
}
}
pub fn with_encoding(values: &[u32], encoding: PostingEncoding) -> Self {
match encoding {
PostingEncoding::Dense => Self::Dense(values.to_vec()),
PostingEncoding::EliasFano => Self::EliasFano(EliasFano::from_sorted(values)),
PostingEncoding::Partitioned => Self::Partitioned(PartitionedEliasFano::from_sorted(values)),
PostingEncoding::Optimal => Self::Optimal(OptimalPartitionedEliasFano::from_sorted(values)),
}
}
#[inline]
pub fn len(&self) -> usize {
match self {
Self::Dense(v) => v.len(),
Self::EliasFano(ef) => ef.len(),
Self::Partitioned(pef) => pef.len(),
Self::Optimal(opef) => opef.len(),
}
}
#[inline]
pub fn is_empty(&self) -> bool { self.len() == 0 }
pub fn size_bytes(&self) -> usize {
match self {
Self::Dense(v) => v.len() * 4 + 8, Self::EliasFano(ef) => ef.size_bytes() + 8,
Self::Partitioned(pef) => pef.size_bytes() + 8,
Self::Optimal(opef) => opef.size_bytes() + 8,
}
}
#[inline]
pub fn bits_per_element(&self) -> f64 {
if self.len() == 0 { return 0.0; }
(self.size_bytes() * 8) as f64 / self.len() as f64
}
pub fn get(&self, index: usize) -> Option<u64> {
match self {
Self::Dense(v) => v.get(index).map(|&x| x as u64),
Self::EliasFano(ef) => ef.get(index),
Self::Partitioned(pef) => pef.get(index),
Self::Optimal(opef) => opef.get(index),
}
}
#[inline]
pub fn next_geq(&self, target: u64) -> Option<(usize, u64)> {
match self {
Self::Dense(v) => {
match v.binary_search(&(target as u32)) {
Ok(i) => Some((i, v[i] as u64)),
Err(i) => {
if i < v.len() {
Some((i, v[i] as u64))
} else {
None
}
}
}
}
Self::EliasFano(ef) => ef.next_geq(target),
Self::Partitioned(pef) => pef.next_geq(target),
Self::Optimal(opef) => opef.next_geq(target),
}
}
pub fn encoding(&self) -> PostingEncoding {
match self {
Self::Dense(_) => PostingEncoding::Dense,
Self::EliasFano(_) => PostingEncoding::EliasFano,
Self::Partitioned(_) => PostingEncoding::Partitioned,
Self::Optimal(_) => PostingEncoding::Optimal,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum PostingEncoding {
Dense,
EliasFano,
Partitioned,
Optimal,
}
impl std::fmt::Display for PostingEncoding {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Dense => write!(f, "Dense"),
Self::EliasFano => write!(f, "EliasFano"),
Self::Partitioned => write!(f, "PartitionedEF"),
Self::Optimal => write!(f, "OptimalPEF"),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty() {
let ef = EliasFano::from_sorted(&[]);
assert_eq!(ef.len(), 0);
assert!(ef.is_empty());
assert_eq!(ef.get(0), None);
assert_eq!(ef.next_geq(0), None);
}
#[test]
fn test_single() {
let ef = EliasFano::from_sorted(&[42]);
assert_eq!(ef.len(), 1);
assert_eq!(ef.get(0), Some(42));
assert_eq!(ef.next_geq(0), Some((0, 42)));
assert_eq!(ef.next_geq(42), Some((0, 42)));
assert_eq!(ef.next_geq(43), None);
}
#[test]
fn test_basic() {
let docs = vec![3, 5, 11, 27, 31, 42, 58, 63];
let ef = EliasFano::from_sorted(&docs);
assert_eq!(ef.len(), 8);
for (i, &v) in docs.iter().enumerate() {
assert_eq!(ef.get(i), Some(v as u64), "get({}) failed", i);
}
assert_eq!(ef.get(8), None);
}
#[test]
fn test_next_geq() {
let docs = vec![3, 5, 11, 27, 31, 42, 58, 63];
let ef = EliasFano::from_sorted(&docs);
assert_eq!(ef.next_geq(3), Some((0, 3)));
assert_eq!(ef.next_geq(42), Some((5, 42)));
assert_eq!(ef.next_geq(63), Some((7, 63)));
assert_eq!(ef.next_geq(0), Some((0, 3)));
assert_eq!(ef.next_geq(4), Some((1, 5)));
assert_eq!(ef.next_geq(10), Some((2, 11)));
assert_eq!(ef.next_geq(28), Some((4, 31)));
assert_eq!(ef.next_geq(59), Some((7, 63)));
assert_eq!(ef.next_geq(64), None);
assert_eq!(ef.next_geq(1000), None);
}
#[test]
fn test_iterator() {
let docs = vec![3, 5, 11, 27, 31, 42, 58, 63];
let ef = EliasFano::from_sorted(&docs);
let collected: Vec<u64> = ef.iter().collect();
let expected: Vec<u64> = docs.iter().map(|&v| v as u64).collect();
assert_eq!(collected, expected);
}
#[test]
fn test_consecutive() {
let docs: Vec<u32> = (0..100).collect();
let ef = EliasFano::from_sorted(&docs);
assert_eq!(ef.len(), 100);
for i in 0..100 {
assert_eq!(ef.get(i), Some(i as u64));
}
assert_eq!(ef.next_geq(50), Some((50, 50)));
}
#[test]
fn test_sparse() {
let docs: Vec<u32> = (0..100).map(|i| i * 1000).collect();
let ef = EliasFano::from_sorted(&docs);
assert_eq!(ef.len(), 100);
for (i, &v) in docs.iter().enumerate() {
assert_eq!(ef.get(i), Some(v as u64), "get({}) failed", i);
}
assert_eq!(ef.next_geq(500), Some((1, 1000)));
assert_eq!(ef.next_geq(1000), Some((1, 1000)));
assert_eq!(ef.next_geq(1001), Some((2, 2000)));
}
#[test]
fn test_large_posting_list() {
let docs: Vec<u32> = (0..10000).map(|i| i * 100 + i % 7).collect();
let ef = EliasFano::from_sorted(&docs);
assert_eq!(ef.len(), 10000);
for (i, &v) in docs.iter().enumerate() {
assert_eq!(ef.get(i), Some(v as u64), "get({}) = {:?}, expected {}", i, ef.get(i), v);
}
let from_iter: Vec<u64> = ef.iter().collect();
assert_eq!(from_iter.len(), 10000);
for (i, &v) in docs.iter().enumerate() {
assert_eq!(from_iter[i], v as u64);
}
let bits_per_elem = ef.bits_per_element();
assert!(bits_per_elem < 32.0, "Should be much less than 32 bits/elem, got {:.1}", bits_per_elem);
}
#[test]
fn test_next_geq_scan() {
let docs: Vec<u32> = (0..1000).map(|i| i * 10).collect();
let ef = EliasFano::from_sorted(&docs);
let mut target = 0u64;
let mut found = Vec::new();
while let Some((_, val)) = ef.next_geq(target) {
if val % 30 == 0 {
found.push(val as u32);
}
target = val + 1;
}
let expected: Vec<u32> = (0..1000).map(|i| i * 10).filter(|v| v % 30 == 0).collect();
assert_eq!(found, expected);
}
#[test]
fn test_space_efficiency() {
let docs: Vec<u32> = (0..10000).map(|i| i * 100).collect();
let ef = EliasFano::from_sorted(&docs);
let bpe = ef.bits_per_element();
eprintln!("Elias-Fano: {} elements, {:.1} bits/elem, {} bytes total",
ef.len(), bpe, ef.size_bytes());
assert!(bpe < 20.0, "Too many bits per element: {:.1}", bpe);
}
#[test]
fn test_max_values() {
let docs = vec![0, u32::MAX / 2, u32::MAX];
let ef = EliasFano::from_sorted(&docs);
assert_eq!(ef.get(0), Some(0));
assert_eq!(ef.get(1), Some(u32::MAX as u64 / 2));
assert_eq!(ef.get(2), Some(u32::MAX as u64));
}
#[test]
fn test_performance_next_geq() {
let docs: Vec<u32> = (0..100000).map(|i| i * 10).collect();
let ef = EliasFano::from_sorted(&docs);
let targets: Vec<u64> = (0..10000).map(|i| (i * 100) as u64).collect();
let start = std::time::Instant::now();
let mut found = 0usize;
for _ in 0..100 {
for &t in &targets {
if ef.next_geq(t).is_some() { found += 1; }
}
}
let elapsed = start.elapsed();
#[cfg(not(debug_assertions))]
{
let per_call = elapsed / (100 * targets.len() as u32);
eprintln!("Elias-Fano next_geq: {:?}/call, {} found", per_call, found);
}
}
#[test]
fn test_cursor_basic() {
let docs = vec![3, 5, 11, 27, 31, 42, 58, 63];
let ef = EliasFano::from_sorted(&docs);
let mut cursor = ef.cursor();
for (i, &v) in docs.iter().enumerate() {
assert!(!cursor.is_exhausted());
assert_eq!(cursor.index(), i);
assert_eq!(cursor.current(), Some(v as u64), "cursor at {} failed", i);
if i < docs.len() - 1 {
assert!(cursor.advance());
}
}
assert!(!cursor.advance()); assert!(cursor.is_exhausted());
}
#[test]
fn test_cursor_advance_to_geq() {
let docs = vec![3, 5, 11, 27, 31, 42, 58, 63];
let ef = EliasFano::from_sorted(&docs);
let mut cursor = ef.cursor();
assert!(cursor.advance_to_geq(30));
assert_eq!(cursor.current(), Some(31));
assert!(cursor.advance_to_geq(42));
assert_eq!(cursor.current(), Some(42));
assert!(!cursor.advance_to_geq(100));
assert!(cursor.is_exhausted());
}
#[test]
fn test_cursor_matches_iterator() {
let docs: Vec<u32> = (0..10000).map(|i| i * 10 + i % 7).collect();
let ef = EliasFano::from_sorted(&docs);
let from_iter: Vec<u64> = ef.iter().collect();
let mut from_cursor = Vec::new();
let mut cursor = ef.cursor();
if let Some(v) = cursor.current() {
from_cursor.push(v);
while cursor.advance() {
from_cursor.push(cursor.current().unwrap());
}
}
assert_eq!(from_cursor.len(), from_iter.len());
for (i, (&a, &b)) in from_cursor.iter().zip(from_iter.iter()).enumerate() {
assert_eq!(a, b, "mismatch at index {}", i);
}
}
#[test]
fn test_cursor_reset() {
let docs = vec![10, 20, 30];
let ef = EliasFano::from_sorted(&docs);
let mut cursor = ef.cursor();
cursor.advance();
cursor.advance();
assert_eq!(cursor.current(), Some(30));
cursor.reset();
assert_eq!(cursor.current(), Some(10));
assert_eq!(cursor.index(), 0);
}
#[test]
fn test_cursor_performance() {
let docs: Vec<u32> = (0..100000).map(|i| i * 10).collect();
let ef = EliasFano::from_sorted(&docs);
#[cfg(not(debug_assertions))]
{
let start = std::time::Instant::now();
let mut sum1 = 0u64;
for _ in 0..10 {
for i in 0..ef.len() {
sum1 += ef.get(i).unwrap();
}
}
let get_time = start.elapsed();
let start = std::time::Instant::now();
let mut sum2 = 0u64;
for _ in 0..10 {
let mut cursor = ef.cursor();
sum2 += cursor.current().unwrap();
while cursor.advance() {
sum2 += cursor.current().unwrap();
}
}
let cursor_time = start.elapsed();
assert_eq!(sum1, sum2, "cursor and get must produce same sum");
let speedup = get_time.as_nanos() as f64 / cursor_time.as_nanos() as f64;
eprintln!("Sequential 100K: get={:?}, cursor={:?}, speedup={:.1}×",
get_time, cursor_time, speedup);
assert!(speedup > 2.0,
"cursor should be at least 2× faster than get(), got {:.1}×", speedup);
}
}
#[test]
fn test_pef_empty() {
let pef = PartitionedEliasFano::from_sorted(&[]);
assert_eq!(pef.len(), 0);
assert!(pef.is_empty());
assert_eq!(pef.get(0), None);
assert_eq!(pef.next_geq(0), None);
}
#[test]
fn test_pef_single() {
let pef = PartitionedEliasFano::from_sorted(&[42]);
assert_eq!(pef.len(), 1);
assert_eq!(pef.get(0), Some(42));
assert_eq!(pef.next_geq(0), Some((0, 42)));
assert_eq!(pef.next_geq(42), Some((0, 42)));
assert_eq!(pef.next_geq(43), None);
}
#[test]
fn test_pef_basic() {
let docs = vec![3, 5, 11, 27, 31, 42, 58, 63];
let pef = PartitionedEliasFano::from_sorted(&docs);
assert_eq!(pef.len(), 8);
for (i, &v) in docs.iter().enumerate() {
assert_eq!(pef.get(i), Some(v as u64), "get({}) failed", i);
}
assert_eq!(pef.get(8), None);
}
#[test]
fn test_pef_next_geq() {
let docs = vec![3, 5, 11, 27, 31, 42, 58, 63];
let pef = PartitionedEliasFano::from_sorted(&docs);
assert_eq!(pef.next_geq(3), Some((0, 3)));
assert_eq!(pef.next_geq(42), Some((5, 42)));
assert_eq!(pef.next_geq(63), Some((7, 63)));
assert_eq!(pef.next_geq(0), Some((0, 3)));
assert_eq!(pef.next_geq(4), Some((1, 5)));
assert_eq!(pef.next_geq(10), Some((2, 11)));
assert_eq!(pef.next_geq(28), Some((4, 31)));
assert_eq!(pef.next_geq(59), Some((7, 63)));
assert_eq!(pef.next_geq(64), None);
assert_eq!(pef.next_geq(1000), None);
}
#[test]
fn test_pef_iterator() {
let docs = vec![3, 5, 11, 27, 31, 42, 58, 63];
let pef = PartitionedEliasFano::from_sorted(&docs);
let collected: Vec<u64> = pef.iter().collect();
let expected: Vec<u64> = docs.iter().map(|&v| v as u64).collect();
assert_eq!(collected, expected);
}
#[test]
fn test_pef_consecutive() {
let docs: Vec<u32> = (0..100).collect();
let pef = PartitionedEliasFano::from_sorted(&docs);
assert_eq!(pef.len(), 100);
for i in 0..100 {
assert_eq!(pef.get(i), Some(i as u64));
}
assert_eq!(pef.next_geq(50), Some((50, 50)));
}
#[test]
fn test_pef_sparse() {
let docs: Vec<u32> = (0..100).map(|i| i * 1000).collect();
let pef = PartitionedEliasFano::from_sorted(&docs);
assert_eq!(pef.len(), 100);
for (i, &v) in docs.iter().enumerate() {
assert_eq!(pef.get(i), Some(v as u64), "get({}) failed", i);
}
assert_eq!(pef.next_geq(500), Some((1, 1000)));
assert_eq!(pef.next_geq(1000), Some((1, 1000)));
assert_eq!(pef.next_geq(1001), Some((2, 2000)));
}
#[test]
fn test_pef_multi_chunk() {
let docs: Vec<u32> = (0..300).map(|i| i * 10 + i % 7).collect();
let pef = PartitionedEliasFano::from_sorted(&docs);
assert_eq!(pef.len(), 300);
for (i, &v) in docs.iter().enumerate() {
assert_eq!(pef.get(i), Some(v as u64), "get({}) failed", i);
}
let from_iter: Vec<u64> = pef.iter().collect();
assert_eq!(from_iter.len(), 300);
for (i, &v) in docs.iter().enumerate() {
assert_eq!(from_iter[i], v as u64, "iter mismatch at {}", i);
}
for &v in &docs {
let (idx, found) = pef.next_geq(v as u64).unwrap();
assert_eq!(found, v as u64, "next_geq({}) returned {}", v, found);
assert_eq!(pef.get(idx), Some(found));
}
}
#[test]
fn test_pef_matches_ef() {
let docs: Vec<u32> = (0..500).map(|i| i * 10 + i % 7).collect();
let ef = EliasFano::from_sorted(&docs);
let pef = PartitionedEliasFano::from_sorted(&docs);
assert_eq!(ef.len(), pef.len());
for i in 0..docs.len() {
assert_eq!(ef.get(i), pef.get(i), "get({}) mismatch", i);
}
for target in (0..5010).step_by(7) {
assert_eq!(
ef.next_geq(target), pef.next_geq(target),
"next_geq({}) mismatch", target
);
}
let ef_iter: Vec<u64> = ef.iter().collect();
let pef_iter: Vec<u64> = pef.iter().collect();
assert_eq!(ef_iter, pef_iter);
}
#[test]
fn test_pef_large_posting_list() {
let docs: Vec<u32> = (0..10000).map(|i| i * 100 + i % 7).collect();
let pef = PartitionedEliasFano::from_sorted(&docs);
assert_eq!(pef.len(), 10000);
for i in (0..10000).step_by(100) {
assert_eq!(pef.get(i), Some(docs[i] as u64), "get({}) failed", i);
}
let from_iter: Vec<u64> = pef.iter().collect();
assert_eq!(from_iter.len(), 10000);
for (i, &v) in docs.iter().enumerate() {
assert_eq!(from_iter[i], v as u64, "iter[{}] mismatch", i);
}
}
#[test]
fn test_pef_cursor_basic() {
let docs = vec![3, 5, 11, 27, 31, 42, 58, 63];
let pef = PartitionedEliasFano::from_sorted(&docs);
let mut cursor = pef.cursor();
for (i, &v) in docs.iter().enumerate() {
assert!(!cursor.is_exhausted());
assert_eq!(cursor.index(), i);
assert_eq!(cursor.current(), Some(v as u64), "cursor at {} failed", i);
if i < docs.len() - 1 {
assert!(cursor.advance());
}
}
assert!(!cursor.advance());
assert!(cursor.is_exhausted());
}
#[test]
fn test_pef_cursor_multi_chunk() {
let docs: Vec<u32> = (0..300).map(|i| i * 10).collect();
let pef = PartitionedEliasFano::from_sorted(&docs);
let mut cursor = pef.cursor();
let mut collected = Vec::new();
if let Some(v) = cursor.current() {
collected.push(v);
while cursor.advance() {
collected.push(cursor.current().unwrap());
}
}
let expected: Vec<u64> = docs.iter().map(|&v| v as u64).collect();
assert_eq!(collected, expected);
}
#[test]
fn test_pef_cursor_advance_to_geq() {
let docs: Vec<u32> = (0..500).map(|i| i * 10).collect();
let pef = PartitionedEliasFano::from_sorted(&docs);
let mut cursor = pef.cursor();
assert!(cursor.advance_to_geq(1500));
assert_eq!(cursor.current(), Some(1500));
assert!(cursor.advance_to_geq(3005));
assert_eq!(cursor.current(), Some(3010));
assert!(!cursor.advance_to_geq(5000));
assert!(cursor.is_exhausted());
}
#[test]
fn test_pef_cursor_reset() {
let docs = vec![10, 20, 30];
let pef = PartitionedEliasFano::from_sorted(&docs);
let mut cursor = pef.cursor();
cursor.advance();
cursor.advance();
assert_eq!(cursor.current(), Some(30));
cursor.reset();
assert_eq!(cursor.current(), Some(10));
assert_eq!(cursor.index(), 0);
}
#[test]
fn test_pef_space_efficiency() {
let docs: Vec<u32> = (0..10000).map(|i| i * 100).collect();
let ef = EliasFano::from_sorted(&docs);
let pef = PartitionedEliasFano::from_sorted(&docs);
eprintln!("EF: {} elements, {:.1} bits/elem, {} bytes",
ef.len(), ef.bits_per_element(), ef.size_bytes());
eprintln!("PEF: {} elements, {:.1} bits/elem, {} bytes",
pef.len(), pef.bits_per_element(), pef.size_bytes());
assert!(pef.bits_per_element() < ef.bits_per_element() * 2.5,
"PEF too large: {:.1} vs EF {:.1}", pef.bits_per_element(), ef.bits_per_element());
}
#[test]
fn test_pef_max_values() {
let docs = vec![0, u32::MAX / 2, u32::MAX];
let pef = PartitionedEliasFano::from_sorted(&docs);
assert_eq!(pef.get(0), Some(0));
assert_eq!(pef.get(1), Some(u32::MAX as u64 / 2));
assert_eq!(pef.get(2), Some(u32::MAX as u64));
}
#[test]
fn test_pef_next_geq_scan() {
let docs: Vec<u32> = (0..1000).map(|i| i * 10).collect();
let pef = PartitionedEliasFano::from_sorted(&docs);
let mut target = 0u64;
let mut found = Vec::new();
while let Some((_, val)) = pef.next_geq(target) {
if val % 30 == 0 {
found.push(val as u32);
}
target = val + 1;
}
let expected: Vec<u32> = (0..1000).map(|i| i * 10).filter(|v| v % 30 == 0).collect();
assert_eq!(found, expected);
}
#[test]
fn test_pef_performance_next_geq() {
let docs: Vec<u32> = (0..100000).map(|i| i * 10).collect();
let ef = EliasFano::from_sorted(&docs);
let pef = PartitionedEliasFano::from_sorted(&docs);
let targets: Vec<u64> = (0..10000).map(|i| (i * 100) as u64).collect();
#[cfg(not(debug_assertions))]
{
let mut sink = 0usize;
for &t in &targets {
if ef.next_geq(t).is_some() { sink += 1; }
if pef.next_geq(t).is_some() { sink += 1; }
}
let iterations = 100;
let start = std::time::Instant::now();
for _ in 0..iterations {
for &t in &targets {
if ef.next_geq(t).is_some() { sink += 1; }
}
}
let ef_time = start.elapsed();
let start = std::time::Instant::now();
for _ in 0..iterations {
for &t in &targets {
if pef.next_geq(t).is_some() { sink += 1; }
}
}
let pef_time = start.elapsed();
let ef_ns = ef_time.as_nanos() as f64 / (iterations as f64 * targets.len() as f64);
let pef_ns = pef_time.as_nanos() as f64 / (iterations as f64 * targets.len() as f64);
let ratio = ef_ns / pef_ns;
eprintln!(
"next_geq 100K elements: EF={ef_ns:.1}ns, PEF={pef_ns:.1}ns, \
PEF is {ratio:.2}× (>1 = PEF faster) [sink={sink}]"
);
}
}
#[test]
fn test_pef_performance_cursor() {
let docs: Vec<u32> = (0..100000).map(|i| i * 10).collect();
let ef = EliasFano::from_sorted(&docs);
let pef = PartitionedEliasFano::from_sorted(&docs);
#[cfg(not(debug_assertions))]
{
let iterations = 10;
let start = std::time::Instant::now();
let mut sum1 = 0u64;
for _ in 0..iterations {
let mut cursor = ef.cursor();
sum1 += cursor.current().unwrap();
while cursor.advance() {
sum1 += cursor.current().unwrap();
}
}
let ef_time = start.elapsed();
let start = std::time::Instant::now();
let mut sum2 = 0u64;
for _ in 0..iterations {
let mut cursor = pef.cursor();
sum2 += cursor.current().unwrap();
while cursor.advance() {
sum2 += cursor.current().unwrap();
}
}
let pef_time = start.elapsed();
assert_eq!(sum1, sum2, "cursor sums must match");
let ratio = ef_time.as_nanos() as f64 / pef_time.as_nanos() as f64;
eprintln!(
"Cursor 100K: EF={:?}, PEF={:?}, PEF is {ratio:.2}×",
ef_time, pef_time
);
}
}
#[test]
fn test_batch_cursor_ef_basic() {
let docs = vec![3, 5, 11, 27, 31, 42, 58, 63];
let ef = EliasFano::from_sorted(&docs);
let mut bc = ef.batch_cursor();
let mut collected = Vec::new();
if let Some(v) = bc.current() {
collected.push(v);
while bc.advance() {
collected.push(bc.current().unwrap());
}
}
let expected: Vec<u64> = docs.iter().map(|&v| v as u64).collect();
assert_eq!(collected, expected);
}
#[test]
fn test_batch_cursor_ef_matches_iter() {
let docs: Vec<u32> = (0..10000).map(|i| i * 10 + i % 7).collect();
let ef = EliasFano::from_sorted(&docs);
let from_iter: Vec<u64> = ef.iter().collect();
let mut from_batch = Vec::new();
let mut bc = ef.batch_cursor();
if let Some(v) = bc.current() {
from_batch.push(v);
while bc.advance() {
from_batch.push(bc.current().unwrap());
}
}
assert_eq!(from_batch.len(), from_iter.len());
assert_eq!(from_batch, from_iter);
}
#[test]
fn test_batch_cursor_ef_index() {
let docs: Vec<u32> = (0..100).map(|i| i * 5).collect();
let ef = EliasFano::from_sorted(&docs);
let mut bc = ef.batch_cursor();
for i in 0..100 {
assert_eq!(bc.index(), i, "index mismatch at {}", i);
assert_eq!(bc.current(), Some(docs[i] as u64));
if i < 99 { bc.advance(); }
}
}
#[test]
fn test_batch_cursor_ef_reset() {
let docs = vec![10, 20, 30, 40, 50];
let ef = EliasFano::from_sorted(&docs);
let mut bc = ef.batch_cursor();
bc.advance();
bc.advance();
assert_eq!(bc.current(), Some(30));
bc.reset();
assert_eq!(bc.current(), Some(10));
assert_eq!(bc.index(), 0);
}
#[test]
fn test_batch_cursor_pef_matches_iter() {
let docs: Vec<u32> = (0..500).map(|i| i * 10 + i % 7).collect();
let pef = PartitionedEliasFano::from_sorted(&docs);
let from_iter: Vec<u64> = pef.iter().collect();
let mut from_batch = Vec::new();
let mut bc = pef.batch_cursor();
if let Some(v) = bc.current() {
from_batch.push(v);
while bc.advance() {
from_batch.push(bc.current().unwrap());
}
}
assert_eq!(from_batch.len(), from_iter.len());
assert_eq!(from_batch, from_iter);
}
#[test]
fn test_batch_cursor_pef_cross_chunk() {
let docs: Vec<u32> = (0..300).map(|i| i * 10).collect();
let pef = PartitionedEliasFano::from_sorted(&docs);
let mut bc = pef.batch_cursor();
let mut count = 0;
if bc.current().is_some() {
count += 1;
while bc.advance() { count += 1; }
}
assert_eq!(count, 300);
}
#[test]
fn test_batch_cursor_performance() {
let docs: Vec<u32> = (0..100000).map(|i| i * 10).collect();
let ef = EliasFano::from_sorted(&docs);
#[cfg(not(debug_assertions))]
{
let iterations = 10;
let start = std::time::Instant::now();
let mut sum1 = 0u64;
for _ in 0..iterations {
let mut cursor = ef.cursor();
sum1 += cursor.current().unwrap();
while cursor.advance() {
sum1 += cursor.current().unwrap();
}
}
let cursor_time = start.elapsed();
let start = std::time::Instant::now();
let mut sum2 = 0u64;
for _ in 0..iterations {
let mut bc = ef.batch_cursor();
sum2 += bc.current().unwrap();
while bc.advance() {
sum2 += bc.current().unwrap();
}
}
let batch_time = start.elapsed();
assert_eq!(sum1, sum2, "batch cursor and cursor must produce same sum");
eprintln!(
"Sequential 100K: cursor={:?}, batch={:?}, ratio={:.2}×",
cursor_time, batch_time,
cursor_time.as_nanos() as f64 / batch_time.as_nanos() as f64
);
}
}
#[test]
fn test_opef_empty() {
let opef = OptimalPartitionedEliasFano::from_sorted(&[]);
assert_eq!(opef.len(), 0);
assert!(opef.is_empty());
assert_eq!(opef.get(0), None);
assert_eq!(opef.next_geq(0), None);
}
#[test]
fn test_opef_small() {
let docs = vec![3, 5, 11, 27, 31];
let opef = OptimalPartitionedEliasFano::from_sorted(&docs);
assert_eq!(opef.len(), 5);
for (i, &v) in docs.iter().enumerate() {
assert_eq!(opef.get(i), Some(v as u64), "get({}) failed", i);
}
}
#[test]
fn test_opef_matches_ef() {
let docs: Vec<u32> = (0..500).map(|i| i * 10 + i % 7).collect();
let ef = EliasFano::from_sorted(&docs);
let opef = OptimalPartitionedEliasFano::from_sorted(&docs);
assert_eq!(ef.len(), opef.len());
for i in 0..docs.len() {
assert_eq!(ef.get(i), opef.get(i), "get({}) mismatch: ef={:?} opef={:?}",
i, ef.get(i), opef.get(i));
}
for target in (0..5010).step_by(7) {
assert_eq!(
ef.next_geq(target), opef.next_geq(target),
"next_geq({}) mismatch", target
);
}
let ef_vals: Vec<u64> = ef.iter().collect();
let opef_vals: Vec<u64> = opef.iter().collect();
assert_eq!(ef_vals, opef_vals);
}
#[test]
fn test_opef_large() {
let docs: Vec<u32> = (0..10000).map(|i| i * 100 + i % 7).collect();
let opef = OptimalPartitionedEliasFano::from_sorted(&docs);
assert_eq!(opef.len(), 10000);
for i in (0..10000).step_by(100) {
assert_eq!(opef.get(i), Some(docs[i] as u64), "get({}) failed", i);
}
let from_iter: Vec<u64> = opef.iter().collect();
assert_eq!(from_iter.len(), 10000);
for (i, &v) in docs.iter().enumerate() {
assert_eq!(from_iter[i], v as u64, "iter[{}] mismatch", i);
}
}
#[test]
fn test_opef_space_vs_uniform() {
let docs: Vec<u32> = (0..10000).map(|i| i * 100).collect();
let pef = PartitionedEliasFano::from_sorted(&docs);
let opef = OptimalPartitionedEliasFano::from_sorted(&docs);
eprintln!("Uniform PEF: {:.1} bits/elem, {} bytes",
pef.bits_per_element(), pef.size_bytes());
eprintln!("Optimal PEF: {:.1} bits/elem, {} bytes",
opef.bits_per_element(), opef.size_bytes());
assert!(opef.bits_per_element() < pef.bits_per_element() * 1.5,
"Optimal PEF too large: {:.1} vs uniform {:.1}",
opef.bits_per_element(), pef.bits_per_element());
}
#[test]
fn test_opef_next_geq() {
let docs: Vec<u32> = (0..1000).map(|i| i * 10).collect();
let opef = OptimalPartitionedEliasFano::from_sorted(&docs);
assert_eq!(opef.next_geq(0), Some((0, 0)));
assert_eq!(opef.next_geq(55), Some((6, 60)));
assert_eq!(opef.next_geq(9990), Some((999, 9990)));
assert_eq!(opef.next_geq(9991), None);
}
#[test]
fn test_opef_cursor_sequential() {
let docs: Vec<u32> = (0..500).map(|i| i * 10 + i % 7).collect();
let opef = OptimalPartitionedEliasFano::from_sorted(&docs);
let from_iter: Vec<u64> = opef.iter().collect();
let mut from_cursor = Vec::new();
let mut cursor = opef.cursor();
if let Some(v) = cursor.current() {
from_cursor.push(v);
while cursor.advance() {
from_cursor.push(cursor.current().unwrap());
}
}
assert_eq!(from_cursor, from_iter);
}
#[test]
fn test_opef_cursor_advance_to_geq() {
let docs: Vec<u32> = (0..1000).map(|i| i * 10).collect();
let opef = OptimalPartitionedEliasFano::from_sorted(&docs);
let mut cursor = opef.cursor();
assert!(cursor.advance_to_geq(50));
assert_eq!(cursor.current(), Some(50));
assert!(cursor.advance_to_geq(100));
assert_eq!(cursor.current(), Some(100));
assert!(cursor.advance_to_geq(100));
assert_eq!(cursor.current(), Some(100));
assert!(cursor.advance_to_geq(5000));
assert_eq!(cursor.current(), Some(5000));
assert!(cursor.advance_to_geq(9990));
assert_eq!(cursor.current(), Some(9990));
assert!(!cursor.advance_to_geq(10000));
}
#[test]
fn test_opef_cursor_advance_to_geq_sorted_targets() {
let docs: Vec<u32> = (0..1000).map(|i| i * 7 + i % 3).collect();
let opef = OptimalPartitionedEliasFano::from_sorted(&docs);
let targets: Vec<u64> = (0..200).map(|i| (i * 35) as u64).collect();
let mut cursor = opef.cursor();
let mut cursor_results = Vec::new();
for &t in &targets {
if cursor.advance_to_geq(t) {
cursor_results.push(cursor.current().unwrap());
}
}
let mut stateless_results = Vec::new();
for &t in &targets {
if let Some((_, v)) = opef.next_geq(t as u64) {
stateless_results.push(v);
}
}
assert_eq!(cursor_results.len(), stateless_results.len());
for (c, s) in cursor_results.iter().zip(stateless_results.iter()) {
assert!(*c >= *s, "cursor {} should be >= stateless {}", c, s);
}
}
#[test]
fn test_opef_batch_cursor() {
let docs: Vec<u32> = (0..500).map(|i| i * 10 + i % 7).collect();
let opef = OptimalPartitionedEliasFano::from_sorted(&docs);
let from_iter: Vec<u64> = opef.iter().collect();
let mut from_batch = Vec::new();
let mut bc = opef.batch_cursor();
if let Some(v) = bc.current() {
from_batch.push(v);
while bc.advance() {
from_batch.push(bc.current().unwrap());
}
}
assert_eq!(from_batch, from_iter);
}
#[test]
fn test_hybrid_dense() {
let docs: Vec<u32> = vec![1, 5, 10, 20, 50];
let h = HybridPostingList::from_sorted(&docs);
assert_eq!(h.encoding(), PostingEncoding::Dense);
assert_eq!(h.len(), 5);
assert_eq!(h.get(0), Some(1));
assert_eq!(h.get(4), Some(50));
assert_eq!(h.next_geq(6), Some((2, 10)));
}
#[test]
fn test_hybrid_ef() {
let docs: Vec<u32> = (0..100).map(|i| i * 10).collect();
let h = HybridPostingList::from_sorted(&docs);
assert_eq!(h.encoding(), PostingEncoding::EliasFano);
assert_eq!(h.len(), 100);
assert_eq!(h.get(50), Some(500));
assert_eq!(h.next_geq(55), Some((6, 60)));
}
#[test]
fn test_hybrid_partitioned() {
let docs: Vec<u32> = (0..500).map(|i| i * 10).collect();
let h = HybridPostingList::from_sorted(&docs);
assert_eq!(h.encoding(), PostingEncoding::Partitioned);
assert_eq!(h.len(), 500);
assert_eq!(h.get(0), Some(0));
assert_eq!(h.next_geq(4990), Some((499, 4990)));
assert_eq!(h.next_geq(4991), None);
}
#[test]
fn test_hybrid_optimal() {
let docs: Vec<u32> = (0..5000).map(|i| i * 10).collect();
let h = HybridPostingList::from_sorted(&docs);
assert_eq!(h.encoding(), PostingEncoding::Optimal);
assert_eq!(h.len(), 5000);
let ef = EliasFano::from_sorted(&docs);
for i in (0..5000).step_by(100) {
assert_eq!(h.get(i), ef.get(i), "get({}) mismatch", i);
}
for t in (0..50000).step_by(77) {
assert_eq!(h.next_geq(t), ef.next_geq(t), "next_geq({}) mismatch", t);
}
}
#[test]
fn test_hybrid_force_encoding() {
let docs: Vec<u32> = (0..100).map(|i| i * 10).collect();
let dense = HybridPostingList::with_encoding(&docs, PostingEncoding::Dense);
assert_eq!(dense.encoding(), PostingEncoding::Dense);
let ef = HybridPostingList::with_encoding(&docs, PostingEncoding::EliasFano);
assert_eq!(ef.encoding(), PostingEncoding::EliasFano);
for i in 0..100 {
assert_eq!(dense.get(i), ef.get(i));
}
}
#[test]
fn test_hybrid_empty() {
let h = HybridPostingList::from_sorted(&[]);
assert_eq!(h.encoding(), PostingEncoding::Dense);
assert_eq!(h.len(), 0);
assert_eq!(h.get(0), None);
assert_eq!(h.next_geq(0), None);
}
#[test]
fn test_hybrid_performance_comparison() {
let docs: Vec<u32> = (0..100000).map(|i| i * 10).collect();
let targets: Vec<u64> = (0..10000).map(|i| (i * 100) as u64).collect();
let dense = HybridPostingList::with_encoding(&docs, PostingEncoding::Dense);
let ef = HybridPostingList::with_encoding(&docs, PostingEncoding::EliasFano);
let pef = HybridPostingList::with_encoding(&docs, PostingEncoding::Partitioned);
let opef = HybridPostingList::with_encoding(&docs, PostingEncoding::Optimal);
#[cfg(not(debug_assertions))]
{
let iters = 50;
for (name, h) in [("Dense", &dense), ("EF", &ef), ("PEF", &pef), ("OPEF", &opef)] {
let start = std::time::Instant::now();
let mut sink = 0usize;
for _ in 0..iters {
for &t in &targets {
if h.next_geq(t).is_some() { sink += 1; }
}
}
let elapsed = start.elapsed();
let per_call = elapsed.as_nanos() as f64 / (iters as f64 * targets.len() as f64);
eprintln!("{name}: {per_call:.1}ns/call, {:.1} bits/elem [sink={sink}]",
h.bits_per_element());
}
}
}
#[test]
fn test_cursor_advance_to_index_basic() {
let vals = vec![3, 5, 11, 27, 31, 42, 58, 63];
let ef = EliasFano::from_sorted(&vals);
let mut cursor = ef.cursor();
assert!(cursor.advance_to_index(0));
assert_eq!(cursor.current(), Some(3));
assert_eq!(cursor.index(), 0);
assert!(cursor.advance_to_index(7));
assert_eq!(cursor.current(), Some(63));
assert_eq!(cursor.index(), 7);
assert!(!cursor.advance_to_index(8));
assert_eq!(cursor.index(), 7);
assert_eq!(cursor.current(), Some(63));
}
#[test]
fn test_cursor_advance_to_index_then_advance() {
let vals = vec![3, 5, 11, 27, 31, 42, 58, 63];
let ef = EliasFano::from_sorted(&vals);
let mut cursor = ef.cursor();
assert!(cursor.advance_to_index(5));
assert_eq!(cursor.current(), Some(42));
assert!(cursor.advance());
assert_eq!(cursor.current(), Some(58));
assert_eq!(cursor.index(), 6);
}
#[test]
fn test_cursor_advance_to_index_then_geq() {
let vals = vec![3, 5, 11, 27, 31, 42, 58, 63];
let ef = EliasFano::from_sorted(&vals);
let mut cursor = ef.cursor();
assert!(cursor.advance_to_index(2));
assert_eq!(cursor.current(), Some(11));
assert!(cursor.advance_to_geq(40));
assert_eq!(cursor.current(), Some(42));
assert_eq!(cursor.index(), 5);
}
#[test]
fn test_cursor_advance_to_index_backward() {
let vals = vec![3, 5, 11, 27, 31, 42, 58, 63];
let ef = EliasFano::from_sorted(&vals);
let mut cursor = ef.cursor();
assert!(cursor.advance_to_index(5));
assert_eq!(cursor.current(), Some(42));
assert!(cursor.advance_to_index(2));
assert_eq!(cursor.current(), Some(11));
assert_eq!(cursor.index(), 2);
}
#[test]
fn test_cursor_advance_to_index_roundtrip() {
let vals = vec![3, 5, 11, 27, 31, 42, 58, 63];
let ef = EliasFano::from_sorted(&vals);
let mut cursor = ef.cursor();
for i in 0..vals.len() {
assert!(cursor.advance_to_index(i));
assert_eq!(cursor.current(), ef.get(i));
assert_eq!(cursor.index(), i);
}
}
#[test]
fn test_cursor_advance_to_index_same_position() {
let vals = vec![10, 20, 30];
let ef = EliasFano::from_sorted(&vals);
let mut cursor = ef.cursor();
assert!(cursor.advance_to_index(1));
assert_eq!(cursor.current(), Some(20));
assert!(cursor.advance_to_index(1));
assert_eq!(cursor.current(), Some(20));
}
#[test]
fn test_cursor_advance_to_index_empty() {
let ef = EliasFano::from_sorted(&[]);
let mut cursor = ef.cursor();
assert!(!cursor.advance_to_index(0));
}
#[test]
fn test_pef_cursor_advance_to_index() {
let vals: Vec<u32> = (0..500).map(|i| i * 3 + 1).collect();
let pef = PartitionedEliasFano::from_sorted(&vals);
let mut cursor = pef.cursor();
for &idx in &[0, 1, 127, 128, 129, 255, 256, 300, 499] {
assert!(cursor.advance_to_index(idx), "advance_to_index({idx}) failed");
assert_eq!(cursor.current(), Some(vals[idx] as u64), "wrong value at {idx}");
assert_eq!(cursor.index(), idx);
}
assert!(!cursor.advance_to_index(500));
assert!(cursor.advance_to_index(300));
assert!(cursor.advance_to_index(50));
assert_eq!(cursor.current(), Some(vals[50] as u64));
}
#[test]
fn test_opef_cursor_advance_to_index() {
let vals: Vec<u32> = (0..500).map(|i| i * 3 + 1).collect();
let opef = OptimalPartitionedEliasFano::from_sorted(&vals);
let mut cursor = opef.cursor();
for &idx in &[0, 1, 50, 100, 200, 300, 499] {
assert!(cursor.advance_to_index(idx), "advance_to_index({idx}) failed");
assert_eq!(cursor.current(), Some(vals[idx] as u64), "wrong value at {idx}");
assert_eq!(cursor.index(), idx);
}
assert!(!cursor.advance_to_index(500));
assert!(cursor.advance_to_index(400));
assert!(cursor.advance_to_index(100));
assert_eq!(cursor.current(), Some(vals[100] as u64));
}
#[test]
fn test_cursor_advance_to_index_large_values() {
let vals = vec![0, 1000, u32::MAX / 2, u32::MAX - 1000, u32::MAX];
let ef = EliasFano::from_sorted(&vals);
let mut cursor = ef.cursor();
for i in 0..vals.len() {
assert!(cursor.advance_to_index(i));
assert_eq!(cursor.current(), Some(vals[i] as u64));
assert_eq!(cursor.index(), i);
}
}
#[test]
fn test_cursor_advance_to_index_large_sequence() {
let vals: Vec<u32> = (0..1500).map(|i| i * 7 + 3).collect();
let ef = EliasFano::from_sorted(&vals);
let mut cursor = ef.cursor();
for i in (0..1500).step_by(10) {
assert!(cursor.advance_to_index(i));
assert_eq!(cursor.current(), Some(vals[i] as u64));
assert_eq!(cursor.index(), i);
}
assert!(cursor.advance_to_index(1000));
assert!(cursor.advance_to_index(500));
assert_eq!(cursor.current(), Some(vals[500] as u64));
assert!(cursor.advance_to_index(1499));
assert_eq!(cursor.current(), Some(vals[1499] as u64));
assert!(cursor.advance_to_index(0));
assert_eq!(cursor.current(), Some(vals[0] as u64));
}
#[test]
fn test_cursor_advance_to_index_multiple_backward_jumps() {
let vals: Vec<u32> = (0..200).map(|i| i * 5).collect();
let ef = EliasFano::from_sorted(&vals);
let mut cursor = ef.cursor();
assert!(cursor.advance_to_index(100));
assert_eq!(cursor.current(), Some(500));
assert!(cursor.advance_to_index(50));
assert_eq!(cursor.current(), Some(250));
assert!(cursor.advance_to_index(150));
assert_eq!(cursor.current(), Some(750));
assert!(cursor.advance_to_index(25));
assert_eq!(cursor.current(), Some(125));
assert!(cursor.advance());
assert_eq!(cursor.index(), 26);
assert_eq!(cursor.current(), Some(130));
}
#[test]
fn test_empty_pef_advance_to_index() {
let pef = PartitionedEliasFano::from_sorted(&[]);
let mut cursor = pef.cursor();
assert!(!cursor.advance_to_index(0));
}
#[test]
fn test_empty_opef_advance_to_index() {
let opef = OptimalPartitionedEliasFano::from_sorted(&[]);
let mut cursor = opef.cursor();
assert!(!cursor.advance_to_index(0));
}
#[test]
fn test_pef_cursor_within_chunk_jumps() {
let vals: Vec<u32> = (0..500).map(|i| i * 3).collect();
let pef = PartitionedEliasFano::from_sorted(&vals);
let mut cursor = pef.cursor();
assert!(cursor.advance_to_index(10));
assert_eq!(cursor.current(), Some(30));
assert!(cursor.advance_to_index(20));
assert_eq!(cursor.current(), Some(60));
assert!(cursor.advance_to_index(5));
assert_eq!(cursor.current(), Some(15));
assert!(cursor.advance_to_index(127));
assert_eq!(cursor.current(), Some(vals[127] as u64));
assert_eq!(cursor.index(), 127);
assert!(cursor.advance_to_index(128));
assert_eq!(cursor.current(), Some(vals[128] as u64));
assert_eq!(cursor.index(), 128);
assert!(cursor.advance());
assert_eq!(cursor.index(), 129);
assert_eq!(cursor.current(), Some(vals[129] as u64));
}
#[test]
fn test_pef_cursor_chunk_boundary_roundtrip() {
let vals: Vec<u32> = (0..500).map(|i| i * 3).collect();
let pef = PartitionedEliasFano::from_sorted(&vals);
let mut cursor = pef.cursor();
for i in 125..132 {
assert!(cursor.advance_to_index(i));
assert_eq!(cursor.current(), Some(vals[i] as u64));
assert_eq!(cursor.index(), i);
}
}
#[test]
fn test_pef_cursor_advance_to_index_large_values() {
let vals = vec![0, 1000, u32::MAX / 2, u32::MAX - 1000, u32::MAX];
let pef = PartitionedEliasFano::from_sorted(&vals);
let mut cursor = pef.cursor();
for i in 0..vals.len() {
assert!(cursor.advance_to_index(i));
assert_eq!(cursor.current(), Some(vals[i] as u64));
}
}
#[test]
fn test_opef_cursor_advance_to_index_within_chunk() {
let vals: Vec<u32> = (0..1000).map(|i| i * 5).collect();
let opef = OptimalPartitionedEliasFano::from_sorted(&vals);
let mut cursor = opef.cursor();
assert!(cursor.advance_to_index(10));
assert_eq!(cursor.current(), Some(50));
assert!(cursor.advance_to_index(20));
assert_eq!(cursor.current(), Some(100));
assert!(cursor.advance_to_index(5));
assert_eq!(cursor.current(), Some(25));
assert!(cursor.advance_to_index(900));
assert_eq!(cursor.current(), Some(vals[900] as u64));
assert!(cursor.advance_to_index(50));
assert_eq!(cursor.current(), Some(vals[50] as u64));
}
#[test]
fn test_opef_cursor_advance_to_index_then_advance() {
let vals: Vec<u32> = (0..500).map(|i| i * 3 + 1).collect();
let opef = OptimalPartitionedEliasFano::from_sorted(&vals);
let mut cursor = opef.cursor();
assert!(cursor.advance_to_index(250));
assert_eq!(cursor.current(), Some(vals[250] as u64));
assert!(cursor.advance());
assert_eq!(cursor.index(), 251);
assert_eq!(cursor.current(), Some(vals[251] as u64));
assert!(cursor.advance_to_index(100));
assert_eq!(cursor.current(), Some(vals[100] as u64));
let target = vals[200] as u64;
assert!(cursor.advance_to_geq(target));
assert_eq!(cursor.index(), 200);
}
}