use std::io::{self, Write};
const FNV_OFFSET: u64 = 0xcbf29ce484222325;
const FNV_PRIME: u64 = 0x100000001b3;
pub struct BloomFilter {
bit_count: u32,
k: u32,
bits: Vec<u64>,
}
impl BloomFilter {
pub fn new(expected_entries: usize) -> Self {
let bit_count = expected_entries.saturating_mul(10).max(64) as u32;
let words = ((bit_count as usize) + 63) / 64;
Self {
bit_count,
k: 7,
bits: vec![0u64; words],
}
}
pub fn add(&mut self, key: &str) {
let h = fnv1a64(key);
let h1 = h as u32;
let h2 = ((h >> 32) as u32) | 1;
for i in 0..self.k {
let idx = h1.wrapping_add(i.wrapping_mul(h2)) % self.bit_count;
self.bits[(idx / 64) as usize] |= 1u64 << (idx % 64);
}
}
pub fn might_contain(&self, key: &str) -> bool {
let h = fnv1a64(key);
let h1 = h as u32;
let h2 = ((h >> 32) as u32) | 1;
for i in 0..self.k {
let idx = h1.wrapping_add(i.wrapping_mul(h2)) % self.bit_count;
if self.bits[(idx / 64) as usize] & (1u64 << (idx % 64)) == 0 {
return false;
}
}
true
}
pub fn bit_count(&self) -> u32 {
self.bit_count
}
pub fn k(&self) -> u32 {
self.k
}
pub fn write_to<W: Write>(&self, out: &mut W) -> io::Result<()> {
out.write_all(&self.bit_count.to_be_bytes())?;
out.write_all(&self.k.to_be_bytes())?;
out.write_all(&(self.bits.len() as u32).to_be_bytes())?;
for w in &self.bits {
out.write_all(&w.to_be_bytes())?;
}
Ok(())
}
pub fn parse(buf: &[u8]) -> io::Result<Self> {
if buf.len() < 12 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "bloom section too short"));
}
let bit_count = u32::from_be_bytes(buf[0..4].try_into().unwrap());
let k = u32::from_be_bytes(buf[4..8].try_into().unwrap());
let words = u32::from_be_bytes(buf[8..12].try_into().unwrap()) as usize;
if buf.len() < 12 + words * 8 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "bloom section truncated"));
}
let mut bits = Vec::with_capacity(words);
for i in 0..words {
let off = 12 + i * 8;
bits.push(u64::from_be_bytes(buf[off..off + 8].try_into().unwrap()));
}
Ok(Self { bit_count, k, bits })
}
}
fn fnv1a64(key: &str) -> u64 {
let mut h = FNV_OFFSET;
for &b in key.as_bytes() {
h ^= b as u64;
h = h.wrapping_mul(FNV_PRIME);
}
h
}