use crate::fnv1a64;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct PartitionedBloomFilter {
slice_bits: u32,
k: u32,
slices: Vec<Vec<u64>>,
}
impl PartitionedBloomFilter {
pub fn new(expected_entries: usize) -> Self {
let bit_count = expected_entries.saturating_mul(10).max(64) as u32;
let k = 7u32;
let slice_bits = bit_count.div_ceil(k);
let words = (slice_bits as usize).div_ceil(64);
Self {
slice_bits,
k,
slices: (0..k as usize).map(|_| vec![0u64; words]).collect(),
}
}
pub fn slice_bits(&self) -> u32 {
self.slice_bits
}
pub fn k(&self) -> u32 {
self.k
}
pub fn bit_count(&self) -> u32 {
self.slice_bits * self.k
}
pub fn add(&mut self, key: &str) {
let h = fnv1a64(key);
let h1 = h as u32;
let h2 = ((h >> 32) as u32) | 1;
for i in 0..self.k {
let idx = h1.wrapping_add(i.wrapping_mul(h2)) % self.slice_bits;
self.slices[i as usize][(idx / 64) as usize] |= 1u64 << (idx % 64);
}
}
pub fn might_contain(&self, key: &str) -> bool {
let h = fnv1a64(key);
let h1 = h as u32;
let h2 = ((h >> 32) as u32) | 1;
for i in 0..self.k {
let idx = h1.wrapping_add(i.wrapping_mul(h2)) % self.slice_bits;
if self.slices[i as usize][(idx / 64) as usize] & (1u64 << (idx % 64)) == 0 {
return false;
}
}
true
}
pub fn add_to_slice(&mut self, key: &str, slice: usize) {
assert!(slice < self.k as usize, "slice index out of range");
let h = fnv1a64(key);
let h1 = h as u32;
let h2 = ((h >> 32) as u32) | 1;
let idx = h1.wrapping_add((slice as u32).wrapping_mul(h2)) % self.slice_bits;
self.slices[slice][(idx / 64) as usize] |= 1u64 << (idx % 64);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn added_keys_always_match() {
let mut pb = PartitionedBloomFilter::new(1000);
for i in 0..1000 {
pb.add(&format!("k{i}"));
}
for i in 0..1000 {
assert!(pb.might_contain(&format!("k{i}")));
}
}
#[test]
fn empty_filter_rejects_everything() {
let pb = PartitionedBloomFilter::new(100);
assert!(!pb.might_contain("any"));
}
#[test]
fn fpr_target_holds_with_default_sizing() {
let mut pb = PartitionedBloomFilter::new(10_000);
for i in 0..10_000 {
pb.add(&format!("present{i}"));
}
let probes = 100_000;
let mut fp = 0;
for i in 0..probes {
if pb.might_contain(&format!("absent{i}")) {
fp += 1;
}
}
let fpr = fp as f64 / probes as f64;
assert!(fpr < 0.05, "FPR {fpr:.4} exceeded 5%");
}
#[test]
fn slice_count_equals_k() {
let pb = PartitionedBloomFilter::new(100);
assert_eq!(pb.slices.len(), pb.k() as usize);
}
#[test]
fn add_to_slice_independent_of_full_add() {
let mut pb = PartitionedBloomFilter::new(100);
pb.add_to_slice("partial", 0);
assert!(!pb.might_contain("partial"));
for i in 1..pb.k() as usize {
pb.add_to_slice("partial", i);
}
assert!(pb.might_contain("partial"));
}
#[test]
#[should_panic(expected = "slice index out of range")]
fn add_to_slice_rejects_bad_index() {
let mut pb = PartitionedBloomFilter::new(100);
pb.add_to_slice("k", 999);
}
}