subms_bloom_filter/
lib.rs1#[cfg(feature = "harness")]
18pub mod recipe;
19
20#[cfg(any(feature = "counting", feature = "scalable", feature = "partitioned"))]
27pub mod features;
28
29#[cfg(feature = "counting")]
30pub use features::counting::CountingBloomFilter;
31#[cfg(feature = "partitioned")]
32pub use features::partitioned::PartitionedBloomFilter;
33#[cfg(feature = "scalable")]
34pub use features::scalable::ScalableBloomFilter;
35
36use std::io::{self, Write};
37
38pub(crate) const FNV_OFFSET: u64 = 0xcbf29ce484222325;
39pub(crate) const FNV_PRIME: u64 = 0x100000001b3;
40
41pub struct BloomFilter {
42 bit_count: u32,
43 k: u32,
44 bits: Vec<u64>,
45}
46
47impl BloomFilter {
48 pub fn new(expected_entries: usize) -> Self {
51 let bit_count = expected_entries.saturating_mul(10).max(64) as u32;
52 let words = (bit_count as usize).div_ceil(64);
53 Self {
54 bit_count,
55 k: 7,
56 bits: vec![0u64; words],
57 }
58 }
59
60 pub fn add(&mut self, key: &str) {
61 let h = fnv1a64(key);
62 let h1 = h as u32;
63 let h2 = ((h >> 32) as u32) | 1;
64 for i in 0..self.k {
65 let idx = h1.wrapping_add(i.wrapping_mul(h2)) % self.bit_count;
66 self.bits[(idx / 64) as usize] |= 1u64 << (idx % 64);
67 }
68 }
69
70 pub fn might_contain(&self, key: &str) -> bool {
71 let h = fnv1a64(key);
72 let h1 = h as u32;
73 let h2 = ((h >> 32) as u32) | 1;
74 for i in 0..self.k {
75 let idx = h1.wrapping_add(i.wrapping_mul(h2)) % self.bit_count;
76 if self.bits[(idx / 64) as usize] & (1u64 << (idx % 64)) == 0 {
77 return false;
78 }
79 }
80 true
81 }
82
83 pub fn bit_count(&self) -> u32 {
84 self.bit_count
85 }
86
87 pub fn k(&self) -> u32 {
88 self.k
89 }
90
91 pub fn write_to<W: Write>(&self, out: &mut W) -> io::Result<()> {
92 out.write_all(&self.bit_count.to_be_bytes())?;
93 out.write_all(&self.k.to_be_bytes())?;
94 out.write_all(&(self.bits.len() as u32).to_be_bytes())?;
95 for w in &self.bits {
96 out.write_all(&w.to_be_bytes())?;
97 }
98 Ok(())
99 }
100
101 pub fn parse(buf: &[u8]) -> io::Result<Self> {
104 if buf.len() < 12 {
105 return Err(io::Error::new(
106 io::ErrorKind::InvalidData,
107 "bloom section too short",
108 ));
109 }
110 let bit_count = u32::from_be_bytes(buf[0..4].try_into().unwrap());
111 let k = u32::from_be_bytes(buf[4..8].try_into().unwrap());
112 let words = u32::from_be_bytes(buf[8..12].try_into().unwrap()) as usize;
113 if buf.len() < 12 + words * 8 {
114 return Err(io::Error::new(
115 io::ErrorKind::InvalidData,
116 "bloom section truncated",
117 ));
118 }
119 let mut bits = Vec::with_capacity(words);
120 for i in 0..words {
121 let off = 12 + i * 8;
122 bits.push(u64::from_be_bytes(buf[off..off + 8].try_into().unwrap()));
123 }
124 Ok(Self { bit_count, k, bits })
125 }
126}
127
128pub(crate) fn fnv1a64(key: &str) -> u64 {
129 let mut h = FNV_OFFSET;
130 for &b in key.as_bytes() {
131 h ^= b as u64;
132 h = h.wrapping_mul(FNV_PRIME);
133 }
134 h
135}