#![warn(missing_docs)]
#![forbid(unsafe_code)]
mod concurrent;
mod config;
mod hashing;
mod scalable;
mod storage;
use bitvec::prelude::*;
use std::hash::Hash;
use std::path::{Path, PathBuf};
use hashing::HashGenerator;
pub use concurrent::{AtomicBloomFilter, ConcurrentBloomFilter};
pub use config::BloomConfig;
pub use scalable::ScalableBloomFilter;
pub type BloomBitVec = BitVec<u8, Lsb0>;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum BloomMode {
#[default]
Standard,
Blocked,
}
const BLOCK_BITS: usize = 512;
#[derive(Debug, Clone)]
pub struct BloomFilter {
pub(crate) bits: BloomBitVec,
pub(crate) size: usize,
pub(crate) hashes: usize,
pub(crate) items: usize,
pub(crate) mode: BloomMode,
pub(crate) target_path: Option<PathBuf>,
pub(crate) needs_save: bool,
}
impl BloomFilter {
pub fn new(size: usize, hashes: usize) -> Self {
Self::with_mode(size, hashes, BloomMode::Standard)
}
pub fn with_mode(size: usize, hashes: usize, mode: BloomMode) -> Self {
assert!(size > 0, "Bloom filter size must be > 0");
assert!(hashes > 0, "Number of hash functions must be > 0");
let actual_size = if mode == BloomMode::Blocked {
size.div_ceil(BLOCK_BITS) * BLOCK_BITS
} else {
size
};
Self {
bits: BloomBitVec::repeat(false, actual_size),
size: actual_size,
hashes,
items: 0,
mode,
target_path: None,
needs_save: true,
}
}
pub fn with_persistence<P: Into<PathBuf>>(mut self, path: P) -> Self {
self.target_path = Some(path.into());
self
}
pub fn load_or_new<P: AsRef<Path>>(path: P, size: usize, hashes: usize) -> Self {
if let Some(mut existing) = storage::load(path.as_ref()) {
existing.target_path = Some(path.as_ref().to_path_buf());
existing
} else {
Self::new(size, hashes).with_persistence(path.as_ref().to_path_buf())
}
}
pub fn fill_ratio(&self) -> f64 {
self.bits.count_ones() as f64 / self.size as f64
}
pub fn mode(&self) -> BloomMode {
self.mode
}
#[inline]
pub fn insert<T: Hash>(&mut self, item: &T) {
let gen = HashGenerator::new(item);
match self.mode {
BloomMode::Standard => {
for i in 0..self.hashes {
let idx = (gen.nth(i) % self.size as u64) as usize;
self.bits.set(idx, true);
}
}
BloomMode::Blocked => {
let num_blocks = self.size / BLOCK_BITS;
let block = (gen.nth(0) % num_blocks as u64) as usize;
let block_start = block * BLOCK_BITS;
for i in 1..=self.hashes {
let bit_within = (gen.nth(i) % BLOCK_BITS as u64) as usize;
self.bits.set(block_start + bit_within, true);
}
}
}
self.items += 1;
self.needs_save = true;
}
#[inline]
pub fn contains<T: Hash>(&self, item: &T) -> bool {
let gen = HashGenerator::new(item);
match self.mode {
BloomMode::Standard => {
for i in 0..self.hashes {
let idx = (gen.nth(i) % self.size as u64) as usize;
if !self.bits[idx] {
return false;
}
}
}
BloomMode::Blocked => {
let num_blocks = self.size / BLOCK_BITS;
let block = (gen.nth(0) % num_blocks as u64) as usize;
let block_start = block * BLOCK_BITS;
for i in 1..=self.hashes {
let bit_within = (gen.nth(i) % BLOCK_BITS as u64) as usize;
if !self.bits[block_start + bit_within] {
return false;
}
}
}
}
true
}
pub fn save(&mut self) -> std::io::Result<()> {
if let Some(ref path) = self.target_path {
if self.needs_save {
storage::save(self, path)?;
self.needs_save = false;
}
}
Ok(())
}
#[inline]
pub fn len(&self) -> usize {
self.items
}
#[inline]
pub fn is_empty(&self) -> bool {
self.items == 0
}
}
impl Drop for BloomFilter {
fn drop(&mut self) {
if self.needs_save && self.target_path.is_some() {
let _ = self.save();
}
}
}