use std::hash::Hash;
use std::path::{Path, PathBuf};
use crate::config::BloomConfig;
use crate::BloomFilter;
const DEFAULT_TIGHTENING_RATIO: f64 = 0.9;
const DEFAULT_GROWTH_FACTOR: usize = 2;
#[derive(Debug, Clone)]
pub struct ScalableBloomFilter {
pub(crate) filters: Vec<BloomFilter>,
pub(crate) config: BloomConfig,
pub(crate) tightening_ratio: f64,
pub(crate) growth_factor: usize,
pub(crate) target_path: Option<PathBuf>,
pub(crate) needs_save: bool,
}
impl ScalableBloomFilter {
pub fn new(config: BloomConfig) -> Self {
Self::with_parameters(config, DEFAULT_TIGHTENING_RATIO, DEFAULT_GROWTH_FACTOR)
}
pub fn with_parameters(
config: BloomConfig,
tightening_ratio: f64,
growth_factor: usize,
) -> Self {
assert!(
0.0 < tightening_ratio && tightening_ratio < 1.0,
"tightening_ratio must be between 0 and 1"
);
assert!(growth_factor >= 2, "growth_factor must be >= 2");
let (size, hashes) = config.parameters();
let filter = BloomFilter::new(size, hashes);
Self {
filters: vec![filter],
config,
tightening_ratio,
growth_factor,
target_path: None,
needs_save: true,
}
}
pub fn with_persistence<P: Into<PathBuf>>(mut self, path: P) -> Self {
self.target_path = Some(path.into());
self
}
pub fn load_or_new<P: AsRef<Path>>(path: P, config: BloomConfig) -> Self {
if let Some(mut existing) = crate::storage::load_scalable(path.as_ref()) {
existing.target_path = Some(path.as_ref().to_path_buf());
existing
} else {
Self::new(config).with_persistence(path.as_ref().to_path_buf())
}
}
pub fn insert<T: Hash>(&mut self, item: &T) {
let last = self.filters.last_mut().unwrap();
if last.fill_ratio() >= 0.5 {
self.grow();
}
self.filters.last_mut().unwrap().insert(item);
self.needs_save = true;
}
pub fn contains<T: Hash>(&self, item: &T) -> bool {
for filter in self.filters.iter().rev() {
if filter.contains(item) {
return true;
}
}
false
}
pub fn layers(&self) -> usize {
self.filters.len()
}
pub fn len(&self) -> usize {
self.filters.iter().map(|f| f.len()).sum()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn save(&mut self) -> std::io::Result<()> {
if let Some(ref path) = self.target_path {
if self.needs_save {
crate::storage::save_scalable(self, path)?;
self.needs_save = false;
for f in &mut self.filters {
f.needs_save = false;
}
}
}
Ok(())
}
fn grow(&mut self) {
let layer = self.filters.len();
let new_expected = (self.config.expected_items as f64
* (self.growth_factor as f64).powi(layer as i32)) as usize;
let new_fpp = self.config.false_positive_rate * self.tightening_ratio.powi(layer as i32);
let cfg = BloomConfig::new(new_expected, new_fpp);
let (size, hashes) = cfg.parameters();
let filter = BloomFilter::new(size, hashes);
self.filters.push(filter);
}
}
impl Drop for ScalableBloomFilter {
fn drop(&mut self) {
if self.needs_save && self.target_path.is_some() {
let _ = self.save();
}
}
}