#![warn(missing_docs)]
#![forbid(unsafe_code)]
mod concurrent;
mod config;
mod hashing;
mod scalable;
mod storage;
use bitvec::prelude::*;
use std::hash::Hash;
use std::path::{Path, PathBuf};
use hashing::HashGenerator;
pub use concurrent::ConcurrentBloomFilter;
pub use config::BloomConfig;
pub use scalable::ScalableBloomFilter;
pub type BloomBitVec = BitVec<u8, Lsb0>;
#[derive(Debug, Clone)]
pub struct BloomFilter {
pub(crate) bits: BloomBitVec,
pub(crate) size: usize,
pub(crate) hashes: usize,
pub(crate) items: usize,
pub(crate) target_path: Option<PathBuf>,
pub(crate) needs_save: bool,
}
impl BloomFilter {
pub fn new(size: usize, hashes: usize) -> Self {
assert!(size > 0, "Bloom filter size must be > 0");
assert!(hashes > 0, "Number of hash functions must be > 0");
Self {
bits: BloomBitVec::repeat(false, size),
size,
hashes,
items: 0,
target_path: None,
needs_save: true,
}
}
pub fn with_persistence<P: Into<PathBuf>>(mut self, path: P) -> Self {
self.target_path = Some(path.into());
self
}
pub fn load_or_new<P: AsRef<Path>>(path: P, size: usize, hashes: usize) -> Self {
if let Some(mut existing) = storage::load(path.as_ref()) {
existing.target_path = Some(path.as_ref().to_path_buf());
existing
} else {
Self::new(size, hashes).with_persistence(path.as_ref().to_path_buf())
}
}
pub fn fill_ratio(&self) -> f64 {
self.bits.count_ones() as f64 / self.size as f64
}
#[inline]
pub fn insert<T: Hash>(&mut self, item: &T) {
let gen = HashGenerator::new(item);
for i in 0..self.hashes {
let idx = (gen.nth(i) % self.size as u64) as usize;
self.bits.set(idx, true);
}
self.items += 1;
self.needs_save = true;
}
#[inline]
pub fn contains<T: Hash>(&self, item: &T) -> bool {
let gen = HashGenerator::new(item);
for i in 0..self.hashes {
let idx = (gen.nth(i) % self.size as u64) as usize;
if !self.bits[idx] {
return false;
}
}
true
}
pub fn save(&mut self) -> std::io::Result<()> {
if let Some(ref path) = self.target_path {
if self.needs_save {
storage::save(self, path)?;
self.needs_save = false;
}
}
Ok(())
}
#[inline]
pub fn len(&self) -> usize {
self.items
}
#[inline]
pub fn is_empty(&self) -> bool {
self.items == 0
}
}
impl Drop for BloomFilter {
fn drop(&mut self) {
if self.needs_save && self.target_path.is_some() {
let _ = self.save();
}
}
}