#![forbid(unsafe_code)]
#![doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/README.md"))]
use std::hash::{Hash, BuildHasher, RandomState};
use std::sync::atomic::{AtomicUsize, Ordering::Relaxed};
const USIZE_BITS: usize = usize::BITS as usize;
const INDEX_1_SHIFT: u32 = usize::ilog2(USIZE_BITS);
const INDEX_2_MASK: usize = (1 << INDEX_1_SHIFT) - 1;
#[derive(Debug)]
pub struct Filter<H = RandomState> {
bits: Box<[AtomicUsize]>,
num_hashes: usize,
hasher: H,
}
impl<H> Filter<H> {
pub fn with_hasher(capacity: usize, false_positive_rate: f64, hasher: H) -> Self {
assert!(0.0 < false_positive_rate && false_positive_rate < 1.0);
let log2_eps = f64::log2(false_positive_rate);
let num_hashes = 1usize.max(f64::round(-log2_eps) as usize);
let min_num_bits = f64::round(capacity as f64 * -log2_eps / std::f64::consts::LN_2) as usize;
let num_bits = min_num_bits.next_power_of_two();
let num_slots = 1usize.max(num_bits / USIZE_BITS);
let bits: Box<[_]> = (0..num_slots).map(|_| AtomicUsize::new(0)).collect();
Filter { bits, num_hashes, hasher }
}
#[inline]
pub const fn as_bits(&self) -> &[AtomicUsize] {
&self.bits
}
#[inline]
pub const fn as_mut_bits(&mut self) -> &mut [AtomicUsize] {
&mut self.bits
}
#[inline]
pub fn into_bits(self) -> Box<[AtomicUsize]> {
self.bits
}
#[inline]
pub const fn num_slots(&self) -> usize {
self.bits.len()
}
#[inline]
pub const fn num_bits(&self) -> usize {
self.bits.len() * USIZE_BITS
}
#[inline]
pub const fn num_hashes(&self) -> usize {
self.num_hashes
}
#[inline]
const fn slot_mask(&self) -> usize {
self.num_slots() - 1
}
pub fn insert_hash(&self, mut hash: u64) -> bool {
let slot_mask = self.slot_mask();
let aux = aux_hash(hash);
let mut exists = true;
for _ in 0..self.num_hashes {
let h = hash as usize;
let slot_idx = (h >> INDEX_1_SHIFT) & slot_mask;
let bit_idx = h & INDEX_2_MASK;
let bit_mask = 1 << bit_idx;
let slot_bits = self.bits[slot_idx].fetch_or(bit_mask, Relaxed);
hash = next_hash(hash, aux);
exists &= (slot_bits & bit_mask) != 0;
}
exists
}
pub fn contains_hash(&self, mut hash: u64) -> bool {
let slot_mask = self.slot_mask();
let aux = aux_hash(hash);
(0..self.num_hashes).all(|_| {
let h = hash as usize;
let slot_idx = (h >> INDEX_1_SHIFT) & slot_mask;
let bit_idx = h & INDEX_2_MASK;
let bit_mask = 1 << bit_idx;
let slot_bits = self.bits[slot_idx].load(Relaxed);
hash = next_hash(hash, aux);
slot_bits & bit_mask != 0
})
}
pub fn clear(&self) {
for slot in &self.bits {
slot.store(0, Relaxed);
}
}
}
impl<H: BuildHasher> Filter<H> {
pub fn insert<T: Hash>(&self, element: T) -> bool {
let hash = self.hasher.hash_one(element);
self.insert_hash(hash)
}
pub fn contains<T: Hash>(&self, element: T) -> bool {
let hash = self.hasher.hash_one(element);
self.contains_hash(hash)
}
#[inline]
pub fn hash_item<T: Hash>(&self, element: T) -> u64 {
self.hasher.hash_one(element)
}
}
impl<H: Default> Filter<H> {
#[inline]
pub fn with_default_hasher(capacity: usize, false_positive_rate: f64) -> Self {
Self::with_hasher(capacity, false_positive_rate, H::default())
}
}
impl Filter<RandomState> {
#[inline]
pub fn new(capacity: usize, false_positive_rate: f64) -> Self {
Self::with_default_hasher(capacity, false_positive_rate)
}
}
impl<H: Clone> Clone for Filter<H> {
fn clone(&self) -> Self {
Filter {
bits: self.bits.iter().map(|slot| AtomicUsize::new(slot.load(Relaxed))).collect(),
num_hashes: self.num_hashes,
hasher: self.hasher.clone(),
}
}
}
impl<H: PartialEq> PartialEq for Filter<H> {
fn eq(&self, other: &Self) -> bool {
self.num_hashes == other.num_hashes && self.hasher == other.hasher && self.bits.len() == other.bits.len()
&& self.bits.iter().map(|x| x.load(Relaxed)).eq(other.bits.iter().map(|y| y.load(Relaxed)))
}
}
impl<H: Eq> Eq for Filter<H> {}
impl<H> From<Filter<H>> for Box<[AtomicUsize]> {
fn from(filter: Filter<H>) -> Self {
filter.into_bits()
}
}
impl<H> AsRef<[AtomicUsize]> for Filter<H> {
fn as_ref(&self) -> &[AtomicUsize] {
self.as_bits()
}
}
impl<H> AsMut<[AtomicUsize]> for Filter<H> {
fn as_mut(&mut self) -> &mut [AtomicUsize] {
self.as_mut_bits()
}
}
impl<H, T> Extend<T> for &Filter<H>
where
H: BuildHasher,
T: Hash,
{
fn extend<I>(&mut self, iter: I)
where
I: IntoIterator<Item = T>
{
for item in iter {
self.insert(item);
}
}
}
impl<H, T> Extend<T> for Filter<H>
where
H: BuildHasher,
T: Hash,
{
fn extend<I>(&mut self, iter: I)
where
I: IntoIterator<Item = T>
{
let mut this: &Self = &*self;
this.extend(iter);
}
}
#[inline]
fn aux_hash(hash: u64) -> u64 {
hash.wrapping_shr(32)
.wrapping_mul(0x517c_c1b7_2722_0a95) }
#[inline]
fn next_hash(hash: u64, aux: u64) -> u64 {
hash.wrapping_add(aux).rotate_left(5)
}
#[cfg(test)]
mod tests {
use super::*;
use std::thread::{self, JoinHandle};
use ahash::RandomState as AHashRandomState;
fn generic_hasher<H>(desired_fpr: f64) -> JoinHandle<()>
where
H: Default + BuildHasher
{
thread::spawn(move || generic_hasher_impl::<H>(desired_fpr))
}
fn generic_hasher_impl<H>(desired_fpr: f64)
where
H: Default + BuildHasher
{
for log_cap in [0, 4, 8, 12, 16, 20, 24, 26] {
let cap = 1 << log_cap;
let len = cap;
let filter = Filter::<H>::with_default_hasher(cap, desired_fpr);
let mut fp = 0;
dbg!(log_cap, cap, len, filter.num_slots(), filter.num_bits(), filter.num_hashes());
for i in 0..len {
if i % 2 == 0 {
fp += u64::from(filter.insert(i));
}
}
for i in 0..len {
if i % 2 == 0 {
assert!(filter.contains(i));
} else {
fp += u64::from(filter.contains(i));
}
}
let actual_fpr = fp as f64 / (len as f64 / 2.0);
dbg!(std::any::type_name::<H>(), actual_fpr, desired_fpr);
assert!(actual_fpr <= 3.0 * desired_fpr);
}
}
#[test]
fn works_with_default_hasher() {
let handles: Vec<_> = (1..=9)
.map(|neg_log_fpr| {
generic_hasher::<RandomState>(f64::powi(10.0, -neg_log_fpr))
})
.collect();
for h in handles {
h.join().unwrap();
}
}
#[test]
fn works_with_3rd_party_hasher() {
let handles: Vec<_> = (1..=9)
.map(|neg_log_fpr| {
generic_hasher::<AHashRandomState>(f64::powi(10.0, -neg_log_fpr))
})
.collect();
for h in handles {
h.join().unwrap();
}
}
#[test]
fn multi_threaded() {
let num_threads = thread::available_parallelism().unwrap().get();
let items_per_thread = 1_000_000;
let desired_fpr = 1.0e-6;
let filter = Filter::<AHashRandomState>::with_default_hasher(num_threads * items_per_thread, desired_fpr);
thread::scope(|scope| {
let mut handles = Vec::with_capacity(num_threads);
for i in 0..num_threads {
let filter = &filter;
let h = scope.spawn(move || {
let start_idx = i * items_per_thread;
let end_idx = start_idx + items_per_thread;
let mut fp = 0;
for j in start_idx..end_idx {
if j % 3 == 1 {
fp += usize::from(filter.insert(j));
assert!(filter.contains(j));
}
}
for j in start_idx..end_idx {
if j % 3 == 1 {
assert!(filter.contains(j));
} else {
fp += usize::from(filter.contains(j));
}
}
let actual_fpr = fp as f64 / (items_per_thread as f64 * 2.0 / 3.0);
dbg!(i, actual_fpr);
assert!(actual_fpr < 3.0 * desired_fpr);
});
handles.push(h);
}
for h in handles {
h.join().unwrap();
}
});
}
}