pub(crate) const BLOOM_BITS_PER_KEY: u8 = 8;
const MIN_BLOOM_BYTES: usize = 32;
#[inline]
#[must_use]
fn probe_count(bits_per_key: u8) -> u32 {
let k = (u32::from(bits_per_key) * 693 + 500) / 1000;
k.clamp(1, 30)
}
#[must_use]
pub(crate) fn bloom_byte_len(num_keys: usize, bits_per_key: u8) -> usize {
let raw_bits = num_keys.saturating_mul(usize::from(bits_per_key));
let raw_bytes = raw_bits.div_ceil(8);
raw_bytes.next_multiple_of(8).max(MIN_BLOOM_BYTES)
}
#[inline]
#[must_use]
fn key_hash(key: &[u8]) -> (u32, u32) {
let mut acc: u64 = 0xcbf2_9ce4_8422_2325; let mut chunks = key.chunks_exact(8);
for c in &mut chunks {
let v = u64::from_le_bytes(c.try_into().unwrap());
acc = (acc ^ v).wrapping_mul(0x9E37_79B9_7F4A_7C15);
}
let rem = chunks.remainder();
if rem.is_empty() {
acc ^= key.len() as u64;
} else {
let mut buf = [0u8; 8];
buf[..rem.len()].copy_from_slice(rem);
acc = (acc ^ u64::from_le_bytes(buf) ^ (key.len() as u64))
.wrapping_mul(0x9E37_79B9_7F4A_7C15);
}
let mut z = acc.wrapping_add(0x9E37_79B9_7F4A_7C15);
z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
z ^= z >> 31;
let h1 = z as u32;
let h2 = ((z >> 32) as u32) | 1;
(h1, h2)
}
pub(crate) struct BloomBuilder {
bytes: Vec<u8>,
m_bits: u32,
k: u32,
}
impl BloomBuilder {
#[must_use]
pub(crate) fn new(num_keys: usize, bits_per_key: u8) -> Self {
let len = bloom_byte_len(num_keys, bits_per_key);
Self {
bytes: vec![0u8; len],
m_bits: (len * 8) as u32,
k: probe_count(bits_per_key),
}
}
pub(crate) fn add(&mut self, key: &[u8]) {
let (h1, h2) = key_hash(key);
let mut h = h1;
for _ in 0..self.k {
let bit = h % self.m_bits;
self.bytes[(bit / 8) as usize] |= 1u8 << (bit % 8);
h = h.wrapping_add(h2);
}
}
#[must_use]
pub(crate) fn into_bytes(self) -> Vec<u8> {
self.bytes
}
}
#[must_use]
pub(crate) fn bloom_contains(filter_bytes: &[u8], bits_per_key: u8, key: &[u8]) -> bool {
if filter_bytes.is_empty() {
return true; }
let m_bits = (filter_bytes.len() * 8) as u32;
let k = probe_count(bits_per_key);
let (h1, h2) = key_hash(key);
let mut h = h1;
for _ in 0..k {
let bit = h % m_bits;
if filter_bytes[(bit / 8) as usize] & (1u8 << (bit % 8)) == 0 {
return false; }
h = h.wrapping_add(h2);
}
true
}
#[cfg(test)]
mod tests {
use super::*;
fn make_key(n: u64) -> Vec<u8> {
let mut k = b"obj/bucket-7/".to_vec();
k.extend_from_slice(&n.to_le_bytes());
k
}
#[test]
fn probe_count_is_optimal_and_clamped() {
assert_eq!(probe_count(8), 6); assert_eq!(probe_count(1), 1); assert_eq!(probe_count(0), 1); assert_eq!(probe_count(255), 30); }
#[test]
fn byte_len_rounds_and_floors() {
assert_eq!(bloom_byte_len(0, 8), MIN_BLOOM_BYTES);
assert_eq!(bloom_byte_len(1, 8), MIN_BLOOM_BYTES); assert_eq!(bloom_byte_len(1000, 8), 1000);
assert_eq!(bloom_byte_len(123, 8) % 8, 0);
}
#[test]
fn no_false_negatives_every_present_key_is_maybe() {
for &n in &[1usize, 10, 100, 1000, 5000] {
for &bpk in &[4u8, 8, 12, 16] {
let mut b = BloomBuilder::new(n, bpk);
let keys: Vec<_> = (0..n as u64).map(make_key).collect();
for k in &keys {
b.add(k);
}
let bytes = b.into_bytes();
for k in &keys {
assert!(
bloom_contains(&bytes, bpk, k),
"false negative: present key missed (n={n}, bpk={bpk})"
);
}
}
}
}
#[test]
fn false_positive_rate_is_near_target() {
let n = 5000usize;
let bpk = 8u8;
let mut b = BloomBuilder::new(n, bpk);
for i in 0..n as u64 {
b.add(&make_key(i));
}
let bytes = b.into_bytes();
let probes = 20_000u64;
let mut fp = 0u64;
for i in 0..probes {
let mut k = b"absent/".to_vec();
k.extend_from_slice(&(i ^ 0xDEAD_BEEF).to_le_bytes());
if bloom_contains(&bytes, bpk, &k) {
fp += 1;
}
}
assert!(
fp * 20 < probes,
"FPR too high: {fp}/{probes} (expected ~0.02)"
);
}
#[test]
fn empty_or_malformed_filter_is_maybe() {
assert!(bloom_contains(&[], 8, b"anything"));
}
#[test]
fn distinct_filters_are_independent() {
let mut a = BloomBuilder::new(100, 8);
a.add(b"only-in-a");
let abytes = a.into_bytes();
let bbytes = BloomBuilder::new(100, 8).into_bytes();
assert!(bloom_contains(&abytes, 8, b"only-in-a"));
assert!(!bloom_contains(&bbytes, 8, b"only-in-a"));
}
}