1#![forbid(unsafe_code)]
2#![doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/README.md"))]
3
4use std::hash::{Hash, BuildHasher, RandomState};
5use std::sync::atomic::{AtomicUsize, Ordering::Relaxed};
6
7
8const USIZE_BITS: usize = usize::BITS as usize;
9const INDEX_1_SHIFT: u32 = usize::ilog2(USIZE_BITS);
10const INDEX_2_MASK: usize = (1 << INDEX_1_SHIFT) - 1;
11
12#[derive(Debug)]
14pub struct Filter<H = RandomState> {
15 bits: Box<[AtomicUsize]>,
16 num_hashes: usize,
17 hasher: H,
18}
19
20impl<H> Filter<H> {
21 pub fn with_hasher(capacity: usize, false_positive_rate: f64, hasher: H) -> Self {
28 assert!(0.0 < false_positive_rate && false_positive_rate < 1.0);
29
30 let log2_eps = f64::log2(false_positive_rate);
32 let num_hashes = 1usize.max(f64::round(-log2_eps) as usize);
33 let min_num_bits = f64::round(capacity as f64 * -log2_eps / std::f64::consts::LN_2) as usize;
34 let num_bits = min_num_bits.next_power_of_two();
35 let num_slots = 1usize.max(num_bits / USIZE_BITS);
36 let bits: Box<[_]> = (0..num_slots).map(|_| AtomicUsize::new(0)).collect();
37
38 Filter { bits, num_hashes, hasher }
39 }
40
41 #[inline]
42 pub const fn as_bits(&self) -> &[AtomicUsize] {
43 &self.bits
44 }
45
46 #[inline]
47 pub const fn as_mut_bits(&mut self) -> &mut [AtomicUsize] {
48 &mut self.bits
49 }
50
51 #[inline]
52 pub fn into_bits(self) -> Box<[AtomicUsize]> {
53 self.bits
54 }
55
56 #[inline]
57 pub const fn num_slots(&self) -> usize {
58 self.bits.len()
59 }
60
61 #[inline]
62 pub const fn num_bits(&self) -> usize {
63 self.bits.len() * USIZE_BITS
64 }
65
66 #[inline]
67 pub const fn num_hashes(&self) -> usize {
68 self.num_hashes
69 }
70
71 #[inline]
72 const fn slot_mask(&self) -> usize {
73 self.num_slots() - 1
74 }
75
76 pub fn insert_hash(&self, mut hash: u64) -> bool {
80 let slot_mask = self.slot_mask();
81 let aux = aux_hash(hash);
82 let mut exists = true;
83
84 for _ in 0..self.num_hashes {
85 let h = hash as usize;
86 let slot_idx = (h >> INDEX_1_SHIFT) & slot_mask;
87 let bit_idx = h & INDEX_2_MASK;
88 let bit_mask = 1 << bit_idx;
89 let slot_bits = self.bits[slot_idx].fetch_or(bit_mask, Relaxed);
90
91 hash = next_hash(hash, aux);
92 exists &= (slot_bits & bit_mask) != 0;
93 }
94
95 exists
96 }
97
98 pub fn contains_hash(&self, mut hash: u64) -> bool {
101 let slot_mask = self.slot_mask();
102 let aux = aux_hash(hash);
103
104 (0..self.num_hashes).all(|_| {
105 let h = hash as usize;
106 let slot_idx = (h >> INDEX_1_SHIFT) & slot_mask;
107 let bit_idx = h & INDEX_2_MASK;
108 let bit_mask = 1 << bit_idx;
109 let slot_bits = self.bits[slot_idx].load(Relaxed);
110
111 hash = next_hash(hash, aux);
112 slot_bits & bit_mask != 0
113 })
114 }
115
116 pub fn clear(&self) {
139 for slot in &self.bits {
140 slot.store(0, Relaxed);
141 }
142 }
143}
144
145impl<H: BuildHasher> Filter<H> {
146 pub fn insert<T: Hash>(&self, element: T) -> bool {
170 let hash = self.hasher.hash_one(element);
171 self.insert_hash(hash)
172 }
173
174 pub fn contains<T: Hash>(&self, element: T) -> bool {
175 let hash = self.hasher.hash_one(element);
176 self.contains_hash(hash)
177 }
178
179 #[inline]
182 pub fn hash_item<T: Hash>(&self, element: T) -> u64 {
183 self.hasher.hash_one(element)
184 }
185}
186
187impl<H: Default> Filter<H> {
188 #[inline]
193 pub fn with_default_hasher(capacity: usize, false_positive_rate: f64) -> Self {
194 Self::with_hasher(capacity, false_positive_rate, H::default())
195 }
196}
197
198impl Filter<RandomState> {
199 #[inline]
205 pub fn new(capacity: usize, false_positive_rate: f64) -> Self {
206 Self::with_default_hasher(capacity, false_positive_rate)
207 }
208}
209
210impl<H: Clone> Clone for Filter<H> {
211 fn clone(&self) -> Self {
212 Filter {
213 bits: self.bits.iter().map(|slot| AtomicUsize::new(slot.load(Relaxed))).collect(),
214 num_hashes: self.num_hashes,
215 hasher: self.hasher.clone(),
216 }
217 }
218}
219
220impl<H: PartialEq> PartialEq for Filter<H> {
221 fn eq(&self, other: &Self) -> bool {
222 self.num_hashes == other.num_hashes && self.hasher == other.hasher && self.bits.len() == other.bits.len()
225 && self.bits.iter().map(|x| x.load(Relaxed)).eq(other.bits.iter().map(|y| y.load(Relaxed)))
226 }
227}
228
229impl<H: Eq> Eq for Filter<H> {}
230
231impl<H> From<Filter<H>> for Box<[AtomicUsize]> {
232 fn from(filter: Filter<H>) -> Self {
233 filter.into_bits()
234 }
235}
236
237impl<H> AsRef<[AtomicUsize]> for Filter<H> {
238 fn as_ref(&self) -> &[AtomicUsize] {
239 self.as_bits()
240 }
241}
242
243impl<H> AsMut<[AtomicUsize]> for Filter<H> {
244 fn as_mut(&mut self) -> &mut [AtomicUsize] {
245 self.as_mut_bits()
246 }
247}
248
249impl<H, T> Extend<T> for &Filter<H>
253where
254 H: BuildHasher,
255 T: Hash,
256{
257 fn extend<I>(&mut self, iter: I)
279 where
280 I: IntoIterator<Item = T>
281 {
282 for item in iter {
283 self.insert(item);
284 }
285 }
286}
287
288impl<H, T> Extend<T> for Filter<H>
290where
291 H: BuildHasher,
292 T: Hash,
293{
294 fn extend<I>(&mut self, iter: I)
315 where
316 I: IntoIterator<Item = T>
317 {
318 let mut this: &Self = &*self;
319 this.extend(iter);
320 }
321}
322
323#[inline]
325fn aux_hash(hash: u64) -> u64 {
326 hash.wrapping_shr(32)
327 .wrapping_mul(0x517c_c1b7_2722_0a95) }
329
330#[inline]
332fn next_hash(hash: u64, aux: u64) -> u64 {
333 hash.wrapping_add(aux).rotate_left(5)
334}
335
336#[cfg(test)]
337mod tests {
338 use super::*;
339 use std::thread::{self, JoinHandle};
340 use ahash::RandomState as AHashRandomState;
341
342 fn generic_hasher<H>(desired_fpr: f64) -> JoinHandle<()>
343 where
344 H: Default + BuildHasher
345 {
346 thread::spawn(move || generic_hasher_impl::<H>(desired_fpr))
347 }
348
349 fn generic_hasher_impl<H>(desired_fpr: f64)
350 where
351 H: Default + BuildHasher
352 {
353 for log_cap in [0, 4, 8, 12, 16, 20, 24, 26] {
354 let cap = 1 << log_cap;
355 let len = cap;
356 let filter = Filter::<H>::with_default_hasher(cap, desired_fpr);
357 let mut fp = 0;
358
359 dbg!(log_cap, cap, len, filter.num_slots(), filter.num_bits(), filter.num_hashes());
360
361 for i in 0..len {
362 if i % 2 == 0 {
363 fp += u64::from(filter.insert(i));
364 }
365 }
366
367 for i in 0..len {
368 if i % 2 == 0 {
369 assert!(filter.contains(i));
371 } else {
372 fp += u64::from(filter.contains(i));
374 }
375 }
376
377 let actual_fpr = fp as f64 / (len as f64 / 2.0); dbg!(std::any::type_name::<H>(), actual_fpr, desired_fpr);
382 assert!(actual_fpr <= 3.0 * desired_fpr);
383 }
384 }
385
386 #[test]
387 fn works_with_default_hasher() {
388 let handles: Vec<_> = (1..=9)
389 .map(|neg_log_fpr| {
390 generic_hasher::<RandomState>(f64::powi(10.0, -neg_log_fpr))
391 })
392 .collect();
393
394 for h in handles {
396 h.join().unwrap();
397 }
398 }
399
400 #[test]
401 fn works_with_3rd_party_hasher() {
402 let handles: Vec<_> = (1..=9)
403 .map(|neg_log_fpr| {
404 generic_hasher::<AHashRandomState>(f64::powi(10.0, -neg_log_fpr))
405 })
406 .collect();
407
408 for h in handles {
410 h.join().unwrap();
411 }
412 }
413
414 #[test]
415 fn multi_threaded() {
416 let num_threads = thread::available_parallelism().unwrap().get();
417 let items_per_thread = 1_000_000;
418 let desired_fpr = 1.0e-6;
419 let filter = Filter::<AHashRandomState>::with_default_hasher(num_threads * items_per_thread, desired_fpr);
420
421 thread::scope(|scope| {
422 let mut handles = Vec::with_capacity(num_threads);
423
424 for i in 0..num_threads {
425 let filter = &filter;
426 let h = scope.spawn(move || {
427 let start_idx = i * items_per_thread;
428 let end_idx = start_idx + items_per_thread;
429 let mut fp = 0;
430
431 for j in start_idx..end_idx {
432 if j % 3 == 1 {
433 fp += usize::from(filter.insert(j));
434 assert!(filter.contains(j));
435 }
436 }
437
438 for j in start_idx..end_idx {
439 if j % 3 == 1 {
440 assert!(filter.contains(j));
441 } else {
442 fp += usize::from(filter.contains(j));
443 }
444 }
445
446 let actual_fpr = fp as f64 / (items_per_thread as f64 * 2.0 / 3.0);
448
449 dbg!(i, actual_fpr);
450 assert!(actual_fpr < 3.0 * desired_fpr);
451 });
452 handles.push(h);
453 }
454
455 for h in handles {
457 h.join().unwrap();
458 }
459 });
460 }
461}