use crate::util::impl_ef_iterator;
use crate::BitVec;
use crate::RsVec;
use std::cmp::max;
const BIN_SEARCH_THRESHOLD: usize = 4;
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemSize, mem_dbg::MemDbg))]
pub struct EliasFanoVec {
upper_vec: RsVec,
lower_vec: BitVec,
universe_zero: u64,
universe_max: u64,
lower_len: usize,
len: usize,
}
impl EliasFanoVec {
#[must_use]
#[allow(clippy::cast_possible_truncation)]
#[allow(clippy::cast_sign_loss)] #[allow(clippy::cast_precision_loss)] pub fn from_slice(data: &[u64]) -> Self {
if data.is_empty() {
return Self {
upper_vec: RsVec::from_bit_vec(BitVec::new()),
lower_vec: BitVec::new(),
universe_zero: 0,
universe_max: 0,
lower_len: 0,
len: 0,
};
}
debug_assert!(
data.windows(2).all(|w| w[0] <= w[1]),
"Data must be sorted in ascending order"
);
let universe_zero = data[0];
let universe_bound = data[data.len() - 1] - universe_zero;
let log_n = ((data.len() + 2) as f64).log2().ceil() as usize;
let bits_per_number = (max(universe_bound, 2) as f64).log2().ceil() as usize;
let bits_for_upper_values = (max(data.len(), 2) as f64).log2().ceil() as usize;
let lower_width = max(bits_per_number, log_n) - bits_for_upper_values;
assert!(lower_width < 64);
let mut upper_vec =
BitVec::from_zeros(2 + data.len() + (universe_bound >> lower_width) as usize);
let mut lower_vec = BitVec::with_capacity(data.len() * lower_width);
for (i, &word) in data.iter().enumerate() {
let word = word - universe_zero;
let upper = (word >> lower_width) as usize;
let lower = word & ((1 << lower_width) - 1);
upper_vec.flip_bit_unchecked(upper + i + 1);
lower_vec.append_bits_unchecked(lower, lower_width);
}
Self {
upper_vec: RsVec::from_bit_vec(upper_vec),
lower_vec,
universe_zero,
universe_max: data[data.len() - 1],
lower_len: lower_width,
len: data.len(),
}
}
#[must_use]
pub fn len(&self) -> usize {
self.len
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[must_use]
pub fn get(&self, index: usize) -> Option<u64> {
if index >= self.len() {
return None;
}
Some(self.get_unchecked(index))
}
#[must_use]
pub fn select(&self, rank: usize) -> Option<u64> {
self.get(rank)
}
#[must_use]
#[allow(clippy::cast_possible_truncation)]
pub fn get_unchecked(&self, index: usize) -> u64 {
let upper = self.upper_vec.select1(index) - index - 1;
let lower = self
.lower_vec
.get_bits_unchecked(index * self.lower_len, self.lower_len);
((upper << self.lower_len) as u64 | lower) + self.universe_zero
}
#[must_use]
pub fn predecessor(&self, n: u64) -> Option<u64> {
if n < self.universe_zero || self.is_empty() {
None
} else {
Some(self.predecessor_unchecked(n))
}
}
#[inline(always)]
#[allow(clippy::too_many_lines)] #[allow(clippy::cast_sign_loss)] #[allow(clippy::cast_possible_wrap)] #[allow(clippy::cast_possible_truncation)] fn search_element_in_block<const INDEX: bool, const UPWARD: bool>(
&self,
start_index_upper: usize,
start_index_lower: usize,
query: u64,
query_upper: u64,
query_lower: u64,
query_masked_upper: u64,
) -> u64 {
let direction: isize = if UPWARD { 1 } else { -1 };
#[allow(clippy::collapsible_else_if)] let not_yet_enough: fn(_, _) -> bool = if INDEX {
if UPWARD {
PartialOrd::lt
} else {
PartialOrd::gt
}
} else {
if UPWARD {
PartialOrd::le
} else {
PartialOrd::ge
}
};
if self
.upper_vec
.get_unchecked((start_index_upper as isize + direction) as usize)
> 0
{
let mut lower_candidate = self.lower_vec.get_bits_unchecked(
(start_index_lower as isize) as usize * self.lower_len,
self.lower_len,
);
let candidate = query_masked_upper | lower_candidate;
if not_yet_enough(&candidate, &query) {
let mut cursor = direction;
while self
.upper_vec
.get_unchecked((start_index_upper as isize + cursor + direction) as usize)
> 0
{
let next_candidate = self.lower_vec.get_bits_unchecked(
(start_index_lower as isize + cursor) as usize * self.lower_len,
self.lower_len,
);
if (UPWARD && next_candidate > query_lower)
|| (!UPWARD && next_candidate < query_lower)
{
return if INDEX {
start_index_lower as u64 + cursor as u64
} else {
(query_masked_upper | lower_candidate) + self.universe_zero
};
} else if next_candidate == query_lower {
return if INDEX {
start_index_lower as u64 + cursor as u64
} else {
(query_masked_upper | next_candidate) + self.universe_zero
};
}
lower_candidate = next_candidate;
cursor += direction;
#[allow(clippy::comparison_chain)] if cursor.unsigned_abs() == BIN_SEARCH_THRESHOLD {
let block_end = if UPWARD {
self.upper_vec.select0((query_upper as isize + 1) as usize)
- query_upper as usize
- 2
} else {
self.upper_vec.select0((query_upper as isize) as usize)
- query_upper as usize
};
let mut upper_bound;
let mut lower_bound;
if UPWARD {
upper_bound = block_end;
lower_bound =
(start_index_lower as isize + cursor - direction) as usize;
} else {
upper_bound =
(start_index_lower as isize + cursor - direction) as usize;
lower_bound = block_end;
}
while lower_bound < upper_bound - 1 {
let middle = lower_bound + ((upper_bound - lower_bound) >> 1);
let middle_candidate = self
.lower_vec
.get_bits_unchecked(middle * self.lower_len, self.lower_len);
if middle_candidate > query_lower {
if !UPWARD {
lower_candidate = middle_candidate;
}
upper_bound = middle;
} else if middle_candidate == query_lower {
return if INDEX {
cursor = middle as isize;
while self.lower_vec.get_bits_unchecked(
(cursor - direction) as usize * self.lower_len,
self.lower_len,
) == query_lower
{
cursor -= direction;
}
cursor as u64
} else {
(query_masked_upper | middle_candidate) + self.universe_zero
};
} else {
if UPWARD {
lower_candidate = middle_candidate;
}
lower_bound = middle;
}
}
let final_bound = if UPWARD { lower_bound } else { upper_bound };
if (UPWARD && final_bound < block_end)
|| (!UPWARD && final_bound > block_end)
{
let check_candidate = self.lower_vec.get_bits_unchecked(
(final_bound as isize + direction) as usize * self.lower_len,
self.lower_len,
);
if not_yet_enough(&check_candidate, &query_lower) {
return if INDEX {
(final_bound as isize + direction + 1) as u64
} else {
(query_masked_upper | check_candidate) + self.universe_zero
};
}
}
if INDEX {
cursor = final_bound as isize + direction - start_index_lower as isize;
}
break;
}
}
return if INDEX {
start_index_lower as u64 + cursor as u64
} else {
(query_masked_upper | lower_candidate) + self.universe_zero
};
}
}
if INDEX {
start_index_lower as u64
} else {
self.get_unchecked((start_index_lower as isize - direction) as usize)
}
}
#[must_use]
#[allow(clippy::cast_possible_truncation)] pub fn predecessor_unchecked(&self, n: u64) -> u64 {
if n > self.universe_max {
return self.get_unchecked(self.len() - 1);
}
let n = n - self.universe_zero;
let upper_query = (n >> self.lower_len) as usize;
let lower_query = n & ((1 << self.lower_len) - 1);
let lower_bound_upper_index = self.upper_vec.select0(upper_query);
let lower_bound_lower_index = lower_bound_upper_index - upper_query;
let result_upper = (upper_query << self.lower_len) as u64;
self.search_element_in_block::<false, true>(
lower_bound_upper_index,
lower_bound_lower_index,
n,
upper_query as u64,
lower_query,
result_upper,
)
}
#[must_use]
pub fn successor(&self, n: u64) -> Option<u64> {
if n > self.universe_max || self.len == 0 {
None
} else {
Some(self.successor_unchecked(n))
}
}
#[must_use]
#[allow(clippy::cast_possible_truncation)] pub fn successor_unchecked(&self, n: u64) -> u64 {
if n < self.universe_zero {
return self.get_unchecked(0);
}
let n = n - self.universe_zero;
let upper_query = (n >> self.lower_len) as usize;
let lower_query = n & ((1 << self.lower_len) - 1);
let upper_bound_upper_index = self.upper_vec.select0(upper_query + 1);
let upper_bound_lower_index = upper_bound_upper_index - upper_query - 2;
let result_upper = (upper_query << self.lower_len) as u64;
self.search_element_in_block::<false, false>(
upper_bound_upper_index,
upper_bound_lower_index,
n,
upper_query as u64,
lower_query,
result_upper,
)
}
#[must_use]
pub fn delta(&self, index: usize) -> Option<u64> {
if index >= self.len() {
return None;
}
let upper_index = self.upper_vec.select1(index);
if self.upper_vec.get_unchecked(upper_index - 1) == 1 {
Some(
self.lower_vec
.get_bits_unchecked(index * self.lower_len, self.lower_len)
- self
.lower_vec
.get_bits_unchecked((index - 1) * self.lower_len, self.lower_len),
)
} else {
let query_upper_part = (upper_index - index - 1) << self.lower_len;
let query_number = query_upper_part as u64
| self
.lower_vec
.get_bits_unchecked(index * self.lower_len, self.lower_len);
if index == 0 {
Some(query_number + self.universe_zero)
} else {
let lower_element_upper_index = self.upper_vec.select1(index - 1);
let lower_element_upper = lower_element_upper_index - (index - 1) - 1;
let lower_elem = ((lower_element_upper as u64) << self.lower_len as u64)
| self
.lower_vec
.get_bits_unchecked((index - 1) * self.lower_len, self.lower_len);
Some(query_number - lower_elem)
}
}
}
#[must_use]
#[allow(clippy::cast_possible_truncation)] pub fn rank(&self, value: u64) -> u64 {
if value > self.universe_max || self.is_empty() {
return self.len() as u64;
}
if value < self.universe_zero {
return 0;
}
let value = value - self.universe_zero;
let upper = value >> self.lower_len;
let lower = value & ((1 << self.lower_len) - 1);
let query_begin = self.upper_vec.select0(upper as usize);
let lower_index = query_begin as u64 - upper;
self.search_element_in_block::<true, true>(
query_begin,
lower_index as usize,
value,
upper,
lower,
upper << self.lower_len,
)
}
#[must_use]
pub fn heap_size(&self) -> usize {
self.upper_vec.heap_size() + self.lower_vec.heap_size()
}
}
impl_ef_iterator! {
EliasFanoIter, EliasFanoRefIter
}
#[cfg(test)]
mod tests;