const SALTS: [u32; 8] = [
0x47b6137b, 0x44974d91, 0x8824ad5b, 0xa2b7289d, 0x705495c7, 0x2df1424b, 0x9efc4947, 0x5c6bfb31,
];
#[repr(align(32))]
#[derive(Clone, Default)]
pub struct Block {
words: [u32; 8],
}
impl std::fmt::Debug for Block {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Block({:08x?})", &self.words)
}
}
#[derive(Debug)]
pub struct SplitBlockBloom {
blocks: Vec<Block>,
mask: usize,
}
impl SplitBlockBloom {
pub fn with_capacity(n: usize) -> Self {
let bits_needed = (n * 10).max(256);
let blocks_needed = bits_needed.div_ceil(256);
let num_blocks = blocks_needed.next_power_of_two();
Self {
blocks: vec![Block::default(); num_blocks],
mask: num_blocks - 1,
}
}
#[inline]
pub fn insert(&mut self, key: u32) {
let block_idx = (key as usize) & self.mask;
let block = &mut self.blocks[block_idx];
for (i, &salt) in SALTS.iter().enumerate() {
let bit = key.wrapping_mul(salt) >> 27; block.words[i] |= 1u32 << bit;
}
}
#[inline]
pub fn probe(&self, key: u32) -> bool {
let block_idx = (key as usize) & self.mask;
let block = &self.blocks[block_idx];
for (i, &salt) in SALTS.iter().enumerate() {
let bit = key.wrapping_mul(salt) >> 27;
if block.words[i] & (1u32 << bit) == 0 {
return false;
}
}
true
}
pub fn num_blocks(&self) -> usize {
self.blocks.len()
}
}
pub fn hash_value_u32(v: &crate::storage::schema::Value) -> u32 {
use std::hash::{Hash, Hasher};
let mut h = std::collections::hash_map::DefaultHasher::new();
v.hash(&mut h);
let bits = h.finish();
(bits ^ (bits >> 32)) as u32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_insert_then_probe() {
let mut bloom = SplitBlockBloom::with_capacity(100);
for i in 0u32..100 {
bloom.insert(i);
}
for i in 0u32..100 {
assert!(bloom.probe(i), "false negative for key {i}");
}
}
#[test]
fn test_absent_key_may_return_false() {
let mut bloom = SplitBlockBloom::with_capacity(1000);
for i in 0u32..1000 {
bloom.insert(i * 2); }
for i in 0u32..1000 {
assert!(bloom.probe(i * 2), "false negative for key {}", i * 2);
}
}
#[test]
fn test_false_positive_rate_approximately_one_percent() {
const N: usize = 10_000;
let mut bloom = SplitBlockBloom::with_capacity(N);
for i in 0u32..N as u32 {
bloom.insert(i);
}
let mut fp = 0usize;
let probes = 10_000usize;
for i in N as u32..(N as u32 + probes as u32) {
if bloom.probe(i) {
fp += 1;
}
}
let fpr = fp as f64 / probes as f64;
assert!(fpr < 0.05, "FPR too high: {fpr:.3}");
}
}