#![cfg_attr(feature = "dev", feature(plugin))]
#![cfg_attr(feature = "dev", plugin(clippy))]
mod bucket;
mod util;
use crate::bucket::{Bucket, Fingerprint, BUCKET_SIZE, FINGERPRINT_SIZE};
use crate::util::{get_alt_index, get_fai, FaI};
use std::cmp;
use std::collections::hash_map::DefaultHasher;
use std::error::Error as StdError;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::iter::repeat;
use std::marker::PhantomData;
use std::mem;
use rand::Rng;
#[cfg(feature = "serde_support")]
use serde_derive::{Deserialize, Serialize};
pub const MAX_REBUCKET: u32 = 500;
pub const DEFAULT_CAPACITY: usize = (1 << 20) - 1;
#[derive(Debug)]
pub enum CuckooError {
NotEnoughSpace,
}
impl fmt::Display for CuckooError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str("NotEnoughSpace")
}
}
impl StdError for CuckooError {
fn description(&self) -> &str {
"Not enough space to store this item, rebucketing failed."
}
}
pub struct CuckooFilter<H> {
buckets: Box<[Bucket]>,
len: usize,
_hasher: std::marker::PhantomData<H>,
}
impl Default for CuckooFilter<DefaultHasher> {
fn default() -> Self {
Self::new()
}
}
impl CuckooFilter<DefaultHasher> {
pub fn new() -> Self {
Self::with_capacity(DEFAULT_CAPACITY)
}
}
impl<H> CuckooFilter<H>
where
H: Hasher + Default,
{
pub fn with_capacity(cap: usize) -> Self {
let capacity = cmp::max(1, cap.next_power_of_two() / BUCKET_SIZE);
Self {
buckets: repeat(Bucket::new())
.take(capacity)
.collect::<Vec<_>>()
.into_boxed_slice(),
len: 0,
_hasher: PhantomData,
}
}
pub fn contains<T: ?Sized + Hash>(&self, data: &T) -> bool {
let FaI { fp, i1, i2 } = get_fai::<T, H>(data);
let len = self.buckets.len();
self.buckets[i1 % len]
.get_fingerprint_index(fp)
.or_else(|| self.buckets[i2 % len].get_fingerprint_index(fp))
.is_some()
}
pub fn add<T: ?Sized + Hash>(&mut self, data: &T) -> Result<(), CuckooError> {
let fai = get_fai::<T, H>(data);
if self.put(fai.fp, fai.i1) || self.put(fai.fp, fai.i2) {
return Ok(());
}
let len = self.buckets.len();
let mut rng = rand::thread_rng();
let mut i = fai.random_index(&mut rng);
let mut fp = fai.fp;
for _ in 0..MAX_REBUCKET {
let other_fp;
{
let loc = &mut self.buckets[i % len].buffer[rng.gen_range(0, BUCKET_SIZE)];
other_fp = *loc;
*loc = fp;
i = get_alt_index::<H>(other_fp, i);
}
if self.put(other_fp, i) {
return Ok(());
}
fp = other_fp;
}
Err(CuckooError::NotEnoughSpace)
}
pub fn test_and_add<T: ?Sized + Hash>(&mut self, data: &T) -> Result<bool, CuckooError> {
if self.contains(data) {
Ok(false)
} else {
self.add(data).map(|_| true)
}
}
pub fn len(&self) -> usize {
self.len
}
pub fn export(&self) -> ExportedCuckooFilter {
self.into()
}
pub fn memory_usage(&self) -> usize {
mem::size_of_val(self) + self.buckets.len() * mem::size_of::<Bucket>()
}
pub fn is_empty(&self) -> bool {
self.len == 0
}
pub fn delete<T: ?Sized + Hash>(&mut self, data: &T) -> bool {
let FaI { fp, i1, i2 } = get_fai::<T, H>(data);
self.remove(fp, i1) || self.remove(fp, i2)
}
fn values(&self) -> Vec<u8> {
self.buckets
.iter()
.flat_map(|b| b.get_fingerprint_data().into_iter())
.collect()
}
fn remove(&mut self, fp: Fingerprint, i: usize) -> bool {
let len = self.buckets.len();
if self.buckets[i % len].delete(fp) {
self.len -= 1;
true
} else {
false
}
}
fn put(&mut self, fp: Fingerprint, i: usize) -> bool {
let len = self.buckets.len();
if self.buckets[i % len].insert(fp) {
self.len += 1;
true
} else {
false
}
}
}
#[derive(Debug)]
#[cfg_attr(feature = "serde_support", derive(Deserialize, Serialize))]
pub struct ExportedCuckooFilter {
#[cfg_attr(feature = "serde_support", serde(with = "serde_bytes"))]
pub values: Vec<u8>,
pub length: usize,
}
impl<H> From<ExportedCuckooFilter> for CuckooFilter<H> {
fn from(exported: ExportedCuckooFilter) -> Self {
Self {
buckets: exported
.values
.chunks(BUCKET_SIZE * FINGERPRINT_SIZE)
.map(Bucket::from)
.collect::<Vec<_>>()
.into_boxed_slice(),
len: exported.length,
_hasher: PhantomData,
}
}
}
impl<H> From<&CuckooFilter<H>> for ExportedCuckooFilter
where
H: Hasher + Default,
{
fn from(cuckoo: &CuckooFilter<H>) -> Self {
Self {
values: cuckoo.values(),
length: cuckoo.len(),
}
}
}