use crate::SipHasherBuilder;
#[cfg(feature = "serde")]
use serde_crate::{Deserialize, Serialize};
use std::borrow::Borrow;
use std::cmp::Ordering;
use std::f64::consts;
use std::hash::{BuildHasher, Hash, Hasher};
use std::marker::PhantomData;
const SHIFTED_MASK: u64 = 0b001;
const CONTINUATION_MASK: u64 = 0b010;
const OCCUPIED_MASK: u64 = 0b100;
const METADATA_MASK: u64 = 0b111;
const METADATA_BITS: u8 = 3;
#[cfg_attr(
feature = "serde",
derive(Deserialize, Serialize),
serde(crate = "serde_crate")
)]
pub struct QuotientFilter<T, B = SipHasherBuilder> {
quotient_bits: u8,
remainder_bits: u8,
slot_bits: u8,
quotient_mask: u64,
remainder_mask: u64,
slot_mask: u64,
table: Vec<u64>,
hash_builder: B,
len: usize,
_marker: PhantomData<T>,
}
impl<T> QuotientFilter<T> {
pub fn new(quotient_bits: u8, remainder_bits: u8) -> Self {
Self::with_hasher(
quotient_bits,
remainder_bits,
SipHasherBuilder::from_entropy(),
)
}
pub fn from_fpp(capacity: usize, fpp: f64) -> Self {
Self::from_fpp_with_hasher(capacity, fpp, SipHasherBuilder::from_entropy())
}
}
impl<T, B> QuotientFilter<T, B>
where
B: BuildHasher,
{
fn get_hash<U>(&self, item: &U) -> u64
where
T: Borrow<U>,
U: Hash + ?Sized,
{
let mut hasher = self.hash_builder.build_hasher();
item.hash(&mut hasher);
hasher.finish()
}
fn get_mask(size: u8) -> u64 {
(1u64 << size) - 1
}
fn get_quotient_and_remainder(&self, hash: u64) -> (usize, u64) {
(
((hash >> self.remainder_bits) & self.quotient_mask) as usize,
hash & self.remainder_mask,
)
}
fn increment_index(&self, index: &mut usize) {
if *index == self.capacity() - 1 {
*index = 0;
} else {
*index += 1;
}
}
fn decrement_index(&self, index: &mut usize) {
if *index == 0 {
*index = self.capacity() - 1;
} else {
*index -= 1;
}
}
fn get_slot(&self, index: usize) -> u64 {
let mut slot = 0;
let bit_offset = self.slot_bits as usize * index;
let table_index = bit_offset / 64;
let bit_index = bit_offset % 64;
let bits_left = self.slot_bits as isize - (64 - bit_index as isize);
slot |= (self.table[table_index] >> bit_index) & self.slot_mask;
if bits_left > 0 {
let offset = self.slot_bits - bits_left as u8;
slot |= (self.table[table_index + 1] & Self::get_mask(bits_left as u8)) << offset;
}
slot
}
fn set_slot(&mut self, index: usize, slot: u64) {
let bit_offset = self.slot_bits as usize * index;
let table_index = bit_offset / 64;
let bit_index = bit_offset % 64;
let bits_left = self.slot_bits as isize - (64 - bit_index as isize);
self.table[table_index] &= !(self.slot_mask << bit_index);
self.table[table_index] |= slot << bit_index;
if bits_left > 0 {
let offset = self.slot_bits - bits_left as u8;
self.table[table_index + 1] &= !Self::get_mask(bits_left as u8);
self.table[table_index + 1] |= slot >> offset;
}
}
fn get_run_start(&self, mut index: usize) -> (usize, usize, usize) {
let mut occupied_count = 0;
loop {
let slot = self.get_slot(index);
if slot & OCCUPIED_MASK != 0 {
occupied_count += 1;
}
if slot & SHIFTED_MASK == 0 {
break;
}
self.decrement_index(&mut index);
}
let mut runs_count = 0;
let mut total_occupied_count = 0;
loop {
let slot = self.get_slot(index);
if slot & OCCUPIED_MASK != 0 {
total_occupied_count += 1;
}
if slot & CONTINUATION_MASK == 0 {
runs_count += 1;
}
if occupied_count == runs_count {
break;
}
self.increment_index(&mut index);
}
(index, runs_count, total_occupied_count)
}
fn insert_and_shift_right(&mut self, mut index: usize, slot: u64) {
let mut curr_slot = slot;
loop {
let mut next_slot = self.get_slot(index);
let is_empty_slot = next_slot & METADATA_MASK == 0;
if next_slot & OCCUPIED_MASK != 0 {
next_slot &= !OCCUPIED_MASK;
curr_slot |= OCCUPIED_MASK;
}
self.set_slot(index, curr_slot);
curr_slot = next_slot;
self.increment_index(&mut index);
if is_empty_slot {
break;
}
curr_slot |= SHIFTED_MASK;
}
}
pub fn with_hasher(quotient_bits: u8, remainder_bits: u8, hash_builder: B) -> Self {
assert!(quotient_bits > 0);
assert!(remainder_bits > 0);
assert!(quotient_bits + remainder_bits <= 64);
let slot_bits = remainder_bits + METADATA_BITS;
let table_len = ((u64::from(slot_bits) * (1u64 << quotient_bits)) as usize + 63) / 64;
QuotientFilter {
quotient_bits,
remainder_bits,
slot_bits,
quotient_mask: Self::get_mask(quotient_bits),
remainder_mask: Self::get_mask(remainder_bits),
slot_mask: Self::get_mask(slot_bits),
table: vec![0; table_len],
hash_builder,
len: 0,
_marker: PhantomData,
}
}
pub fn from_fpp_with_hasher(capacity: usize, fpp: f64, hash_builder: B) -> Self {
let quotient_bits = ((capacity * 2) as f64).log2().ceil() as u8;
let remainder_bits = (1.0 / -2.0 / (1.0 - fpp).ln()).log2().ceil() as u8;
Self::with_hasher(quotient_bits, remainder_bits, hash_builder)
}
pub fn insert<U>(&mut self, item: &U)
where
T: Borrow<U>,
U: Hash + ?Sized,
{
let (quotient, remainder) = self.get_quotient_and_remainder(self.get_hash(item));
let slot = self.get_slot(quotient);
if slot & METADATA_MASK == 0 {
self.set_slot(quotient, (remainder << METADATA_BITS) | OCCUPIED_MASK);
self.len += 1;
return;
}
if self.contains(item) {
return;
}
assert!(self.len() < self.capacity());
let new_run = {
if slot & OCCUPIED_MASK == 0 {
self.set_slot(quotient, slot | OCCUPIED_MASK);
true
} else {
false
}
};
let (mut index, ..) = self.get_run_start(quotient);
let run_start = index;
let mut new_slot = remainder << METADATA_BITS;
let mut slot = self.get_slot(index);
if !new_run {
loop {
if remainder < slot >> METADATA_BITS {
break;
}
self.increment_index(&mut index);
slot = self.get_slot(index);
if slot & CONTINUATION_MASK == 0 {
break;
}
}
if index == run_start {
let mut run_start_slot = self.get_slot(run_start);
run_start_slot |= CONTINUATION_MASK;
self.set_slot(run_start, run_start_slot);
} else {
new_slot |= CONTINUATION_MASK;
}
}
if index != quotient {
new_slot |= SHIFTED_MASK;
}
self.len += 1;
self.insert_and_shift_right(index, new_slot);
}
pub fn contains<U>(&self, item: &U) -> bool
where
T: Borrow<U>,
U: Hash + ?Sized,
{
let (quotient, remainder) = self.get_quotient_and_remainder(self.get_hash(item));
let slot = self.get_slot(quotient);
if slot & OCCUPIED_MASK == 0 {
return false;
}
if slot >> METADATA_BITS == remainder
&& slot & CONTINUATION_MASK == 0
&& slot & SHIFTED_MASK == 0
{
return true;
}
let (mut index, ..) = self.get_run_start(quotient);
let mut slot = self.get_slot(index);
loop {
match (slot >> METADATA_BITS).cmp(&remainder) {
Ordering::Equal => return true,
Ordering::Greater => return false,
Ordering::Less => {
self.increment_index(&mut index);
slot = self.get_slot(index);
if slot & CONTINUATION_MASK == 0 {
return false;
}
}
}
}
}
pub fn remove<U>(&mut self, item: &U)
where
T: Borrow<U>,
U: Hash + ?Sized,
{
let (quotient, remainder) = self.get_quotient_and_remainder(self.get_hash(item));
if self.get_slot(quotient) & METADATA_MASK == 0 {
return;
}
let (mut index, mut runs_count, mut occupied_count) = self.get_run_start(quotient);
let mut slot = self.get_slot(index);
loop {
match (slot >> METADATA_BITS).cmp(&remainder) {
Ordering::Equal => break,
Ordering::Greater => return,
Ordering::Less => {
self.increment_index(&mut index);
slot = self.get_slot(index);
if slot & OCCUPIED_MASK != 0 {
occupied_count += 1;
}
if slot & CONTINUATION_MASK == 0 {
return;
}
}
}
}
let mut is_run_start = slot & CONTINUATION_MASK == 0;
slot &= OCCUPIED_MASK;
self.set_slot(index, 0);
let mut next_index = index;
self.increment_index(&mut next_index);
let mut next_slot = self.get_slot(next_index);
while next_slot & CONTINUATION_MASK != 0 || next_slot & SHIFTED_MASK != 0 {
self.set_slot(next_index, 0);
if next_slot & CONTINUATION_MASK == 0 {
runs_count += 1;
if is_run_start {
let canonical_slot = self.get_slot(quotient) & !OCCUPIED_MASK;
self.set_slot(quotient, canonical_slot);
}
}
else if !is_run_start {
slot |= CONTINUATION_MASK;
}
is_run_start = false;
if slot & OCCUPIED_MASK == 0 || occupied_count != runs_count {
slot |= SHIFTED_MASK;
}
slot |= next_slot & !METADATA_MASK;
self.set_slot(index, slot);
if next_slot & OCCUPIED_MASK != 0 {
occupied_count += 1;
}
slot = next_slot & OCCUPIED_MASK;
index = next_index;
self.increment_index(&mut next_index);
next_slot = self.get_slot(next_index);
}
self.len -= 1;
}
pub fn clear(&mut self) {
for value in &mut self.table {
*value = 0;
}
self.len = 0;
}
pub fn len(&self) -> usize {
self.len
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn capacity(&self) -> usize {
1 << self.quotient_bits
}
pub fn quotient_bits(&self) -> u8 {
self.quotient_bits
}
pub fn remainder_bits(&self) -> u8 {
self.remainder_bits
}
pub fn estimated_fpp(&self) -> f64 {
let fill_ratio = self.len() as f64 / self.capacity() as f64;
1.0 - consts::E.powf(-fill_ratio / 2.0f64.powf(f64::from(self.remainder_bits)))
}
pub fn hasher(&self) -> &B {
&self.hash_builder
}
}
use std::fmt;
impl<T> fmt::Debug for QuotientFilter<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for i in 0..self.capacity() {
let slot = self.get_slot(i);
write!(f, "{}|{}:{:03b} ", i, slot >> 3, slot & METADATA_MASK)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::QuotientFilter;
use rand::{seq::SliceRandom, Rng, SeedableRng};
#[test]
fn test_new() {
let mut filter = QuotientFilter::<usize>::new(8, 4);
assert_eq!(filter.capacity(), 256);
assert_eq!(filter.quotient_bits(), 8);
assert_eq!(filter.remainder_bits(), 4);
assert!(filter.is_empty());
for i in 0..128 {
filter.insert(&i);
}
assert!(filter.estimated_fpp() < 0.05);
}
#[test]
fn test_from_fpp() {
let mut filter = QuotientFilter::<usize>::from_fpp(100, 0.05);
assert_eq!(filter.capacity(), 256);
assert_eq!(filter.quotient_bits(), 8);
assert_eq!(filter.remainder_bits(), 4);
assert!(filter.is_empty());
for i in 0..128 {
filter.insert(&i);
}
assert!(filter.estimated_fpp() < 0.05);
}
#[test]
fn test_insert() {
let mut filter = QuotientFilter::<String>::new(8, 4);
filter.insert("foo");
assert_eq!(filter.len(), 1);
assert!(!filter.is_empty());
assert!(filter.contains("foo"));
}
#[test]
fn test_insert_existing_item() {
let mut filter = QuotientFilter::<String>::new(8, 4);
filter.insert("foo");
filter.insert("foo");
assert_eq!(filter.len(), 1);
assert!(!filter.is_empty());
assert!(filter.contains("foo"));
}
#[test]
fn test_remove() {
let mut filter = QuotientFilter::<String>::new(8, 4);
filter.insert("foo");
filter.remove("foo");
assert_eq!(filter.len(), 0);
assert!(filter.is_empty());
assert!(!filter.contains("foo"));
}
#[test]
fn test_clear() {
let mut filter = QuotientFilter::<String>::new(8, 4);
filter.insert("foobar");
filter.insert("barfoo");
filter.insert("baz");
filter.insert("qux");
filter.clear();
assert!(!filter.contains("baz"));
assert!(!filter.contains("qux"));
assert!(!filter.contains("foobar"));
assert!(!filter.contains("barfoo"));
}
#[test]
fn test_stress() {
let mut rng = rand_xorshift::XorShiftRng::from_entropy();
let quotient_bits = 16;
let remainder_bits = 48;
let n = 18;
let mut filter = QuotientFilter::<u64>::new(quotient_bits, remainder_bits);
assert!(filter.is_empty());
assert_eq!(filter.quotient_bits(), quotient_bits);
assert_eq!(filter.remainder_bits(), remainder_bits);
let mut items = Vec::new();
for _ in 0..1 << quotient_bits {
let mut item = rng.gen_range(1 << n, 1 << 63);
while filter.contains(&item) {
item = rng.gen_range(1 << n, 1 << 63);
}
filter.insert(&item);
filter.insert(&item);
items.push(item);
assert_eq!(filter.len(), items.len());
}
items.shuffle(&mut rng);
for item in items {
assert!(filter.contains(&item));
filter.remove(&item);
assert!(!filter.contains(&item));
}
}
#[cfg(feature = "serde")]
#[test]
fn test_ser_de() {
let mut filter = QuotientFilter::<String>::new(8, 4);
filter.insert("foo");
let serialized_filter = bincode::serialize(&filter).unwrap();
let de_filter: QuotientFilter<String> = bincode::deserialize(&serialized_filter).unwrap();
assert!(de_filter.contains("foo"));
assert_eq!(filter.quotient_bits(), de_filter.quotient_bits());
assert_eq!(filter.remainder_bits(), de_filter.remainder_bits());
assert_eq!(filter.slot_bits, de_filter.slot_bits);
assert_eq!(filter.quotient_mask, de_filter.quotient_mask);
assert_eq!(filter.remainder_mask, de_filter.remainder_mask);
assert_eq!(filter.slot_mask, de_filter.slot_mask);
assert_eq!(filter.table, de_filter.table);
assert_eq!(filter.len(), de_filter.len());
assert_eq!(filter.hasher(), de_filter.hasher());
}
}