use crate::PairwiseIndependentHasher;
use std::ops::RangeBounds;
use vers_vecs::EliasFanoVec;
#[derive(Debug, Clone)]
pub struct RangeFilter {
pub ef: EliasFanoVec,
pub hasher: PairwiseIndependentHasher,
}
impl RangeFilter {
pub fn new<I>(values: I, hasher: PairwiseIndependentHasher) -> Self
where
I: Iterator<Item = u64>,
{
let mut hashed_values: Vec<u64> = values.map(|x| hasher.local_hash(x)).collect();
assert!(
!hashed_values.is_empty(),
"Cannot create a range filter with no elements"
);
hashed_values.sort_unstable();
hashed_values.dedup();
assert!(
hashed_values.last().expect("We checked emptyness above")
< &hasher.reduced_universe_size(),
"A hashed value was greater than the size of the reduced universe {}",
hasher.reduced_universe_size()
);
Self {
hasher,
ef: EliasFanoVec::from_slice(&hashed_values),
}
}
pub fn query<R>(&self, range: R) -> bool
where
R: RangeBounds<u64>,
{
let start = match range.start_bound() {
std::ops::Bound::Included(&i) => i,
std::ops::Bound::Excluded(_) => unreachable!("Somehow had an exclusive start bound"),
std::ops::Bound::Unbounded => 0,
};
let end = match range.end_bound() {
std::ops::Bound::Included(&i) => i,
std::ops::Bound::Excluded(&e) => e - 1,
std::ops::Bound::Unbounded => u64::MAX,
};
let start_hash = self.hasher.local_hash(start);
let end_hash = self.hasher.local_hash(end);
if start_hash > end_hash {
return self.min_hash() <= end_hash || start_hash <= self.max_hash();
}
self.ef
.predecessor(end_hash)
.is_some_and(|predecessor| predecessor >= start_hash)
}
fn min_hash(&self) -> u64 {
self.ef.get_unchecked(0)
}
fn max_hash(&self) -> u64 {
self.ef.get_unchecked(self.ef.len() - 1)
}
}