use std::hash::{Hash, Hasher};
#[derive(Debug)]
pub struct BloomFilter {
bits: Vec<u64>,
num_bits: usize,
num_hashes: usize,
}
impl BloomFilter {
pub fn new(expected_items: usize, false_positive_rate: f64) -> Self {
let num_bits = Self::optimal_num_bits(expected_items, false_positive_rate);
let num_hashes = Self::optimal_num_hashes(expected_items, num_bits);
let num_words = num_bits.div_ceil(64);
let bits = vec![0u64; num_words];
Self {
bits,
num_bits,
num_hashes,
}
}
fn optimal_num_bits(n: usize, p: f64) -> usize {
let m = -(n as f64 * p.ln()) / (2.0_f64.ln().powi(2));
m.ceil() as usize
}
fn optimal_num_hashes(n: usize, m: usize) -> usize {
let k = (m as f64 / n as f64) * 2.0_f64.ln();
(k.ceil() as usize).max(1)
}
pub fn insert<T: Hash>(&mut self, item: &T) {
for i in 0..self.num_hashes {
let hash = Self::hash(item, i);
let bit_index = hash % self.num_bits;
let word_index = bit_index / 64;
let bit_offset = bit_index % 64;
debug_assert!(
word_index < self.bits.len(),
"Bloom filter index out of bounds: {} >= {}",
word_index,
self.bits.len()
);
self.bits[word_index] |= 1u64 << bit_offset;
}
}
pub fn contains<T: Hash>(&self, item: &T) -> bool {
for i in 0..self.num_hashes {
let hash = Self::hash(item, i);
let bit_index = hash % self.num_bits;
let word_index = bit_index / 64;
let bit_offset = bit_index % 64;
debug_assert!(
word_index < self.bits.len(),
"Bloom filter index out of bounds: {} >= {}",
word_index,
self.bits.len()
);
if (self.bits[word_index] & (1u64 << bit_offset)) == 0 {
return false;
}
}
true
}
fn hash_h1<T: Hash>(item: &T) -> usize {
use std::collections::hash_map::DefaultHasher;
let mut hasher = blake3::Hasher::new();
let mut std_hasher = DefaultHasher::new();
item.hash(&mut std_hasher);
let hash_value = std_hasher.finish();
hasher.update(&hash_value.to_le_bytes());
let hash = hasher.finalize();
let bytes = hash.as_bytes();
u64::from_le_bytes(bytes[0..8].try_into().unwrap()) as usize
}
fn hash_h2<T: Hash>(item: &T) -> usize {
use std::collections::hash_map::DefaultHasher;
let mut hasher = blake3::Hasher::new();
hasher.update(b"BloomFilter::h2");
let mut std_hasher = DefaultHasher::new();
item.hash(&mut std_hasher);
let hash_value = std_hasher.finish();
hasher.update(&hash_value.to_le_bytes());
let hash = hasher.finalize();
u64::from_le_bytes(hash.as_bytes()[0..8].try_into().unwrap()) as usize
}
fn hash<T: Hash>(item: &T, index: usize) -> usize {
if index == 0 {
Self::hash_h1(item)
} else {
let h1 = Self::hash_h1(item);
let h2 = Self::hash_h2(item);
h1.wrapping_add(index.wrapping_mul(h2))
}
}
pub fn to_bytes(&self) -> Vec<u8> {
use byteorder::{LittleEndian, WriteBytesExt};
let mut buf = Vec::new();
buf.extend_from_slice(&[0x42, 0x4C, 0x4D, 0x02]);
buf.write_u64::<LittleEndian>(self.num_bits as u64).unwrap();
buf.write_u64::<LittleEndian>(self.num_hashes as u64)
.unwrap();
buf.write_u64::<LittleEndian>(self.bits.len() as u64)
.unwrap();
for &word in &self.bits {
buf.write_u64::<LittleEndian>(word).unwrap();
}
let checksum = blake3::hash(&buf);
buf.extend_from_slice(checksum.as_bytes());
buf
}
const MAGIC_V2: [u8; 4] = [0x42, 0x4C, 0x4D, 0x02];
pub fn from_bytes(bytes: &[u8]) -> std::io::Result<Self> {
use std::io::{Error, ErrorKind};
const CHECKSUM_SIZE: usize = 32; const MAGIC_SIZE: usize = 4;
const HEADER_SIZE: usize = 24; const MIN_SIZE_V2: usize = MAGIC_SIZE + HEADER_SIZE + CHECKSUM_SIZE; const MIN_SIZE_V1: usize = HEADER_SIZE + CHECKSUM_SIZE;
if bytes.len() >= MIN_SIZE_V2 && bytes[..4] == Self::MAGIC_V2 {
return Self::from_bytes_v2(bytes);
}
if bytes.len() >= MIN_SIZE_V1 {
return Self::from_bytes_v1(bytes);
}
Err(Error::new(
ErrorKind::InvalidData,
format!(
"Bloom filter data too small: {} bytes (minimum {})",
bytes.len(),
MIN_SIZE_V1
),
))
}
fn from_bytes_v2(bytes: &[u8]) -> std::io::Result<Self> {
use std::io::{Cursor, Error, ErrorKind};
const CHECKSUM_SIZE: usize = 32;
const MAGIC_SIZE: usize = 4;
let data_len = bytes.len() - CHECKSUM_SIZE;
let (data, stored_checksum) = bytes.split_at(data_len);
let computed_checksum = blake3::hash(data);
if computed_checksum.as_bytes() != stored_checksum {
return Err(Error::new(
ErrorKind::InvalidData,
format!(
"Bloom filter v2 corruption detected: checksum mismatch. \
Expected {:?}, got {:?}.",
hex::encode(stored_checksum),
hex::encode(computed_checksum.as_bytes())
),
));
}
let mut cursor = Cursor::new(&data[MAGIC_SIZE..]);
Self::parse_header_and_data(&mut cursor)
}
fn from_bytes_v1(bytes: &[u8]) -> std::io::Result<Self> {
use std::io::{Cursor, Error, ErrorKind};
const CHECKSUM_SIZE: usize = 32;
let data_len = bytes.len() - CHECKSUM_SIZE;
let (data, stored_checksum) = bytes.split_at(data_len);
let computed_checksum = blake3::hash(data);
if computed_checksum.as_bytes() != stored_checksum {
return Err(Error::new(
ErrorKind::InvalidData,
format!(
"Bloom filter v1 corruption detected: checksum mismatch. \
Expected {:?}, got {:?}.",
hex::encode(stored_checksum),
hex::encode(computed_checksum.as_bytes())
),
));
}
let mut cursor = Cursor::new(data);
Self::parse_header_and_data(&mut cursor)
}
fn parse_header_and_data(cursor: &mut std::io::Cursor<&[u8]>) -> std::io::Result<Self> {
use byteorder::{LittleEndian, ReadBytesExt};
use std::io::{Error, ErrorKind};
let num_bits = cursor.read_u64::<LittleEndian>()? as usize;
let num_hashes = cursor.read_u64::<LittleEndian>()? as usize;
let num_words = cursor.read_u64::<LittleEndian>()? as usize;
const MAX_BITS: usize = 1_000_000_000; const MAX_HASHES: usize = 32; const MAX_WORDS: usize = MAX_BITS / 64;
if num_bits > MAX_BITS {
return Err(Error::new(
ErrorKind::InvalidData,
format!(
"Bloom filter num_bits too large: {} > {}",
num_bits, MAX_BITS
),
));
}
if num_hashes > MAX_HASHES {
return Err(Error::new(
ErrorKind::InvalidData,
format!(
"Bloom filter num_hashes too large: {} > {}",
num_hashes, MAX_HASHES
),
));
}
if num_words > MAX_WORDS {
return Err(Error::new(
ErrorKind::InvalidData,
format!(
"Bloom filter num_words too large: {} > {}",
num_words, MAX_WORDS
),
));
}
let expected_words = num_bits.div_ceil(64);
if num_words != expected_words {
return Err(Error::new(
ErrorKind::InvalidData,
format!(
"Bloom filter size mismatch: num_words={} but expected={} for num_bits={}",
num_words, expected_words, num_bits
),
));
}
let mut bits = Vec::with_capacity(num_words);
for _ in 0..num_words {
bits.push(cursor.read_u64::<LittleEndian>()?);
}
Ok(Self {
bits,
num_bits,
num_hashes,
})
}
pub fn from_bytes_legacy(bytes: &[u8]) -> std::io::Result<Self> {
use byteorder::{LittleEndian, ReadBytesExt};
use std::io::{Cursor, Error, ErrorKind};
let mut cursor = Cursor::new(bytes);
let num_bits = cursor.read_u64::<LittleEndian>()? as usize;
let num_hashes = cursor.read_u64::<LittleEndian>()? as usize;
let num_words = cursor.read_u64::<LittleEndian>()? as usize;
const MAX_BITS: usize = 1_000_000_000;
const MAX_HASHES: usize = 32;
const MAX_WORDS: usize = MAX_BITS / 64;
if num_bits > MAX_BITS || num_hashes > MAX_HASHES || num_words > MAX_WORDS {
return Err(Error::new(
ErrorKind::InvalidData,
"Bloom filter parameters exceed limits",
));
}
let expected_words = num_bits.div_ceil(64);
if num_words != expected_words {
return Err(Error::new(
ErrorKind::InvalidData,
"Bloom filter size mismatch",
));
}
let mut bits = Vec::with_capacity(num_words);
for _ in 0..num_words {
bits.push(cursor.read_u64::<LittleEndian>()?);
}
Ok(Self {
bits,
num_bits,
num_hashes,
})
}
pub fn size_bytes(&self) -> usize {
24 + self.bits.len() * 8 }
pub fn memory_size(&self) -> usize {
self.size_bytes()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bloom_filter_basic() {
let mut bloom = BloomFilter::new(1000, 0.01);
for i in 0..100 {
bloom.insert(&i);
}
for i in 0..100 {
assert!(bloom.contains(&i));
}
let mut false_positives = 0;
for i in 100..1000 {
if bloom.contains(&i) {
false_positives += 1;
}
}
let fp_rate = false_positives as f64 / 900.0;
assert!(fp_rate < 0.03, "False positive rate too high: {}", fp_rate);
}
#[test]
fn test_bloom_filter_serialization() {
let mut bloom = BloomFilter::new(100, 0.01);
for i in 0..50 {
bloom.insert(&i);
}
let bytes = bloom.to_bytes();
let restored = BloomFilter::from_bytes(&bytes).unwrap();
for i in 0..50 {
assert!(restored.contains(&i));
}
}
#[test]
fn test_bloom_filter_strings() {
let mut bloom = BloomFilter::new(1000, 0.01);
let items = vec!["hello", "world", "foo", "bar", "baz"];
for item in &items {
bloom.insert(item);
}
for item in &items {
assert!(bloom.contains(item));
}
let _ = bloom.contains(&"not_there");
}
}
const CACHE_LINE_SIZE: usize = 64;
const BLOCK_BITS: usize = CACHE_LINE_SIZE * 8;
#[derive(Debug, Clone, Copy)]
pub struct LevelAdaptiveFPR {
pub l0: f64,
pub l1: f64,
pub l2_plus: f64,
}
impl Default for LevelAdaptiveFPR {
fn default() -> Self {
Self {
l0: 0.001, l1: 0.005, l2_plus: 0.01, }
}
}
impl LevelAdaptiveFPR {
pub fn for_level(&self, level: usize) -> f64 {
match level {
0 => self.l0,
1 => self.l1,
_ => self.l2_plus,
}
}
}
#[derive(Debug)]
pub struct BloomFilterCascade {
filters: Vec<BlockedBloomFilter>,
level_fpr: Vec<f64>,
expected_items: usize,
}
impl BloomFilterCascade {
pub fn new(expected_items: usize, level_fprs: Vec<f64>) -> Self {
let filters = level_fprs
.iter()
.map(|&fpr| BlockedBloomFilter::new(expected_items, fpr))
.collect();
Self {
filters,
level_fpr: level_fprs,
expected_items,
}
}
pub fn default_cascade(expected_items: usize) -> Self {
Self::new(expected_items, vec![0.01, 0.001, 0.0001])
}
pub fn compact(expected_items: usize) -> Self {
Self::new(expected_items, vec![0.01, 0.0001])
}
pub fn insert<T: Hash>(&mut self, item: &T) {
for filter in &mut self.filters {
filter.insert(item);
}
}
pub fn contains<T: Hash>(&self, item: &T) -> bool {
for filter in &self.filters {
if !filter.contains(item) {
return false;
}
}
true
}
pub fn contains_with_level<T: Hash>(&self, item: &T) -> (bool, usize) {
for (level, filter) in self.filters.iter().enumerate() {
if !filter.contains(item) {
return (false, level);
}
}
(true, self.filters.len())
}
pub fn combined_fpr(&self) -> f64 {
self.level_fpr.iter().product()
}
pub fn num_levels(&self) -> usize {
self.filters.len()
}
pub fn memory_usage(&self) -> usize {
self.filters.iter().map(|f| f.memory_usage()).sum()
}
pub fn to_bytes(&self) -> Vec<u8> {
use byteorder::{LittleEndian, WriteBytesExt};
let mut buf = Vec::new();
buf.extend_from_slice(&[0x42, 0x43, 0x46, 0x01]);
buf.write_u64::<LittleEndian>(self.expected_items as u64)
.unwrap();
buf.write_u64::<LittleEndian>(self.filters.len() as u64)
.unwrap();
for &fpr in &self.level_fpr {
buf.write_u64::<LittleEndian>(fpr.to_bits()).unwrap();
}
for filter in &self.filters {
let filter_bytes = filter.to_bytes();
buf.write_u64::<LittleEndian>(filter_bytes.len() as u64)
.unwrap();
buf.extend_from_slice(&filter_bytes);
}
let checksum = blake3::hash(&buf);
buf.extend_from_slice(checksum.as_bytes());
buf
}
pub fn from_bytes(bytes: &[u8]) -> std::io::Result<Self> {
use byteorder::{LittleEndian, ReadBytesExt};
use std::io::{Cursor, Error, ErrorKind};
const CHECKSUM_SIZE: usize = 32;
const MIN_SIZE: usize = 4 + 16 + CHECKSUM_SIZE;
if bytes.len() < MIN_SIZE {
return Err(Error::new(
ErrorKind::InvalidData,
"Bloom cascade data too small",
));
}
if bytes[..4] != [0x42, 0x43, 0x46, 0x01] {
return Err(Error::new(
ErrorKind::InvalidData,
"Invalid bloom cascade magic number",
));
}
let data_len = bytes.len() - CHECKSUM_SIZE;
let (data, stored_checksum) = bytes.split_at(data_len);
let computed_checksum = blake3::hash(data);
if computed_checksum.as_bytes() != stored_checksum {
return Err(Error::new(
ErrorKind::InvalidData,
"Bloom cascade checksum mismatch",
));
}
let mut cursor = Cursor::new(&bytes[4..]);
let expected_items = cursor.read_u64::<LittleEndian>()? as usize;
let num_levels = cursor.read_u64::<LittleEndian>()? as usize;
let mut level_fpr = Vec::with_capacity(num_levels);
for _ in 0..num_levels {
let bits = cursor.read_u64::<LittleEndian>()?;
level_fpr.push(f64::from_bits(bits));
}
let mut filters = Vec::with_capacity(num_levels);
for _ in 0..num_levels {
let filter_len = cursor.read_u64::<LittleEndian>()? as usize;
let pos = cursor.position() as usize;
let filter_bytes = &bytes[4 + pos..4 + pos + filter_len];
cursor.set_position((pos + filter_len) as u64);
let filter = BlockedBloomFilter::from_bytes(filter_bytes)?;
filters.push(filter);
}
Ok(Self {
filters,
level_fpr,
expected_items,
})
}
}
const BLOOM_MAGIC_V2: [u8; 4] = [0x42, 0x4C, 0x4D, 0x02]; const BLOCKED_BLOOM_MAGIC: [u8; 4] = [0x42, 0x42, 0x46, 0x01];
#[derive(Debug)]
pub enum UnifiedBloomFilter {
Standard(BloomFilter),
Blocked(BlockedBloomFilter),
}
impl UnifiedBloomFilter {
pub fn contains<T: std::hash::Hash>(&self, item: &T) -> bool {
match self {
UnifiedBloomFilter::Standard(bf) => bf.contains(item),
UnifiedBloomFilter::Blocked(bbf) => bbf.contains(item),
}
}
pub fn from_bytes(bytes: &[u8]) -> std::io::Result<Self> {
use std::io::{Error, ErrorKind};
if bytes.len() < 4 {
return Err(Error::new(
ErrorKind::InvalidData,
"Bloom filter data too small to detect format",
));
}
if bytes[..4] == BLOCKED_BLOOM_MAGIC {
BlockedBloomFilter::from_bytes(bytes).map(UnifiedBloomFilter::Blocked)
} else if bytes[..4] == BLOOM_MAGIC_V2 || bytes.len() >= 56 {
BloomFilter::from_bytes(bytes).map(UnifiedBloomFilter::Standard)
} else {
Err(Error::new(
ErrorKind::InvalidData,
"Unknown bloom filter format",
))
}
}
pub fn memory_size(&self) -> usize {
match self {
UnifiedBloomFilter::Standard(bf) => bf.memory_size(),
UnifiedBloomFilter::Blocked(bbf) => bbf.memory_usage(),
}
}
}
#[derive(Debug)]
pub struct BlockedBloomFilter {
blocks: Vec<[u64; 8]>,
num_blocks: usize,
num_hashes: usize,
expected_items: usize,
}
impl BlockedBloomFilter {
pub fn new(expected_items: usize, false_positive_rate: f64) -> Self {
let bits_per_key = (-1.44 * false_positive_rate.ln()).ceil() as usize;
let bits_per_key = bits_per_key.max(4);
let total_bits = expected_items * bits_per_key;
let num_blocks = total_bits.div_ceil(BLOCK_BITS);
let num_blocks = num_blocks.max(1);
let num_hashes = ((0.693 * bits_per_key as f64).ceil() as usize).clamp(1, 8);
let blocks = vec![[0u64; 8]; num_blocks];
Self {
blocks,
num_blocks,
num_hashes,
expected_items,
}
}
pub fn for_level(expected_items: usize, level: usize) -> Self {
let fpr = LevelAdaptiveFPR::default().for_level(level);
Self::new(expected_items, fpr)
}
pub fn insert<T: Hash>(&mut self, item: &T) {
let (block_idx, h2) = self.compute_hashes(item);
let block = &mut self.blocks[block_idx];
for i in 0..self.num_hashes {
let bit_pos = (h2.wrapping_add(i * 31)) % BLOCK_BITS;
let word_idx = bit_pos / 64;
let bit_offset = bit_pos % 64;
block[word_idx] |= 1u64 << bit_offset;
}
}
pub fn contains<T: Hash>(&self, item: &T) -> bool {
let (block_idx, h2) = self.compute_hashes(item);
let block = &self.blocks[block_idx];
for i in 0..self.num_hashes {
let bit_pos = (h2.wrapping_add(i * 31)) % BLOCK_BITS;
let word_idx = bit_pos / 64;
let bit_offset = bit_pos % 64;
if (block[word_idx] & (1u64 << bit_offset)) == 0 {
return false;
}
}
true
}
fn compute_hashes<T: Hash>(&self, item: &T) -> (usize, usize) {
use std::collections::hash_map::DefaultHasher;
let mut hasher = DefaultHasher::new();
item.hash(&mut hasher);
let h1 = hasher.finish();
let block_idx = (h1 as usize) % self.num_blocks;
let h2 = ((h1 >> 32) as usize) | ((h1 as usize) << 32);
(block_idx, h2)
}
pub fn to_bytes(&self) -> Vec<u8> {
use byteorder::{LittleEndian, WriteBytesExt};
let mut buf = Vec::new();
buf.extend_from_slice(&[0x42, 0x42, 0x46, 0x01]);
buf.write_u64::<LittleEndian>(self.num_blocks as u64)
.unwrap();
buf.write_u64::<LittleEndian>(self.num_hashes as u64)
.unwrap();
buf.write_u64::<LittleEndian>(self.expected_items as u64)
.unwrap();
for block in &self.blocks {
for &word in block {
buf.write_u64::<LittleEndian>(word).unwrap();
}
}
let checksum = blake3::hash(&buf);
buf.extend_from_slice(checksum.as_bytes());
buf
}
pub fn from_bytes(bytes: &[u8]) -> std::io::Result<Self> {
use byteorder::{LittleEndian, ReadBytesExt};
use std::io::{Cursor, Error, ErrorKind};
const MAGIC: [u8; 4] = [0x42, 0x42, 0x46, 0x01]; const CHECKSUM_SIZE: usize = 32;
const HEADER_SIZE: usize = 4 + 24;
if bytes.len() < HEADER_SIZE + CHECKSUM_SIZE {
return Err(Error::new(
ErrorKind::InvalidData,
"Blocked bloom filter data too small",
));
}
if bytes[..4] != MAGIC {
return Err(Error::new(
ErrorKind::InvalidData,
"Invalid blocked bloom filter magic number",
));
}
let data_len = bytes.len() - CHECKSUM_SIZE;
let (data, stored_checksum) = bytes.split_at(data_len);
let computed_checksum = blake3::hash(data);
if computed_checksum.as_bytes() != stored_checksum {
return Err(Error::new(
ErrorKind::InvalidData,
"Blocked bloom filter checksum mismatch",
));
}
let mut cursor = Cursor::new(&bytes[4..]);
let num_blocks = cursor.read_u64::<LittleEndian>()? as usize;
let num_hashes = cursor.read_u64::<LittleEndian>()? as usize;
let expected_items = cursor.read_u64::<LittleEndian>()? as usize;
let expected_data_size = HEADER_SIZE + num_blocks * 64 + CHECKSUM_SIZE;
if bytes.len() != expected_data_size {
return Err(Error::new(
ErrorKind::InvalidData,
format!(
"Blocked bloom filter size mismatch: {} vs {}",
bytes.len(),
expected_data_size
),
));
}
let mut blocks = Vec::with_capacity(num_blocks);
for _ in 0..num_blocks {
let mut block = [0u64; 8];
for word in &mut block {
*word = cursor.read_u64::<LittleEndian>()?;
}
blocks.push(block);
}
Ok(Self {
blocks,
num_blocks,
num_hashes,
expected_items,
})
}
pub fn size_bytes(&self) -> usize {
4 + 24 + self.num_blocks * 64 + 32 }
pub fn memory_usage(&self) -> usize {
std::mem::size_of::<Self>() + self.num_blocks * 64
}
pub fn fill_ratio(&self) -> f64 {
let set_bits: usize = self
.blocks
.iter()
.flat_map(|b| b.iter())
.map(|w| w.count_ones() as usize)
.sum();
let total_bits = self.num_blocks * BLOCK_BITS;
set_bits as f64 / total_bits as f64
}
}
#[cfg(test)]
mod blocked_bloom_tests {
use super::*;
#[test]
fn test_blocked_bloom_basic() {
let mut bloom = BlockedBloomFilter::new(1000, 0.01);
for i in 0..100 {
bloom.insert(&i);
}
for i in 0..100 {
assert!(bloom.contains(&i), "Missing inserted item {}", i);
}
let mut false_positives = 0;
for i in 100..1000 {
if bloom.contains(&i) {
false_positives += 1;
}
}
let fp_rate = false_positives as f64 / 900.0;
assert!(fp_rate < 0.05, "False positive rate too high: {}", fp_rate);
}
#[test]
fn test_blocked_bloom_serialization() {
let mut bloom = BlockedBloomFilter::new(100, 0.01);
for i in 0..50 {
bloom.insert(&i);
}
let bytes = bloom.to_bytes();
let restored = BlockedBloomFilter::from_bytes(&bytes).unwrap();
for i in 0..50 {
assert!(
restored.contains(&i),
"Missing item {} after deserialize",
i
);
}
}
#[test]
fn test_blocked_bloom_level_adaptive() {
let l0_bloom = BlockedBloomFilter::for_level(1000, 0);
let l2_bloom = BlockedBloomFilter::for_level(1000, 2);
assert!(
l0_bloom.memory_usage() >= l2_bloom.memory_usage(),
"L0 bloom should use >= memory than L2"
);
}
#[test]
fn test_blocked_bloom_u128_keys() {
let mut bloom = BlockedBloomFilter::new(1000, 0.01);
let edge_ids: Vec<u128> = (0..100).map(|i| i as u128 * 12345678901234567890).collect();
for id in &edge_ids {
bloom.insert(id);
}
for id in &edge_ids {
assert!(bloom.contains(id));
}
}
#[test]
fn test_blocked_bloom_checksum_corruption() {
let mut bloom = BlockedBloomFilter::new(100, 0.01);
bloom.insert(&42);
let mut bytes = bloom.to_bytes();
if bytes.len() > 10 {
bytes[10] ^= 0xFF;
}
assert!(BlockedBloomFilter::from_bytes(&bytes).is_err());
}
#[test]
fn test_blocked_bloom_fill_ratio() {
let mut bloom = BlockedBloomFilter::new(1000, 0.01);
assert_eq!(bloom.fill_ratio(), 0.0);
for i in 0..500 {
bloom.insert(&i);
}
let ratio = bloom.fill_ratio();
assert!(ratio > 0.0 && ratio < 1.0);
}
#[test]
fn test_level_adaptive_fpr() {
let fpr = LevelAdaptiveFPR::default();
assert_eq!(fpr.for_level(0), 0.001);
assert_eq!(fpr.for_level(1), 0.005);
assert_eq!(fpr.for_level(2), 0.01);
assert_eq!(fpr.for_level(5), 0.01);
}
#[test]
fn test_cascade_basic() {
let mut cascade = BloomFilterCascade::default_cascade(1000);
for i in 0..100 {
cascade.insert(&i);
}
for i in 0..100 {
assert!(cascade.contains(&i), "Item {} should be found", i);
}
assert_eq!(cascade.num_levels(), 3);
assert!(cascade.combined_fpr() < 0.0001);
}
#[test]
fn test_cascade_early_termination() {
let mut cascade = BloomFilterCascade::new(1000, vec![0.5, 0.5, 0.5]);
cascade.insert(&42);
let (found, level) = cascade.contains_with_level(&42);
assert!(found);
assert_eq!(level, 3);
let (found, level) = cascade.contains_with_level(&999999);
if !found {
assert!(level < 3, "Should fail before reaching all levels");
}
}
#[test]
fn test_cascade_serialization() {
let mut cascade = BloomFilterCascade::default_cascade(500);
for i in 0..100 {
cascade.insert(&i);
}
let bytes = cascade.to_bytes();
let restored = BloomFilterCascade::from_bytes(&bytes).unwrap();
assert_eq!(restored.num_levels(), cascade.num_levels());
assert_eq!(restored.expected_items, cascade.expected_items);
for i in 0..100 {
assert!(restored.contains(&i));
}
}
#[test]
fn test_cascade_memory() {
let cascade = BloomFilterCascade::default_cascade(10000);
let mem = cascade.memory_usage();
assert!(mem > 0);
let combined_fpr = cascade.combined_fpr();
assert!(
combined_fpr < 0.000001,
"Combined FPR should be < 1 in million"
);
}
#[test]
fn test_cascade_compact() {
let compact = BloomFilterCascade::compact(1000);
let full = BloomFilterCascade::default_cascade(1000);
assert_eq!(compact.num_levels(), 2);
assert_eq!(full.num_levels(), 3);
assert!(compact.combined_fpr() < 0.0001);
}
}