aura_core/effects/
bloom.rs1use crate::AuraError;
24use serde::{Deserialize, Serialize};
25
26pub const MAX_BLOOM_BITS_BYTES: usize = 1_048_576;
27
28pub type BloomError = AuraError;
30
31#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
33pub struct BloomConfig {
34 pub expected_elements: u64,
36 pub false_positive_rate: f64,
38 pub num_hash_functions: u32,
40 pub bit_vector_size: u64,
42}
43
44impl BloomConfig {
45 pub fn optimal(expected_elements: u64, false_positive_rate: f64) -> Self {
50 let n = expected_elements as f64;
52 let p = false_positive_rate.clamp(0.00001, 0.99999); let m = (-n * p.ln() / (2.0_f64.ln().powi(2))).ceil() as u64;
54
55 let k = ((m as f64 / n) * 2.0_f64.ln()).round() as u32;
57
58 Self {
59 expected_elements,
60 false_positive_rate,
61 num_hash_functions: k.clamp(1, 32), bit_vector_size: m.max(64), }
64 }
65
66 pub fn for_sync(expected_ops: u64) -> Self {
68 Self::optimal(expected_ops, 0.01)
70 }
71
72 pub fn for_small_set(expected_elements: u64) -> Self {
74 Self::optimal(expected_elements, 0.001)
76 }
77
78 pub fn validate(&self) -> Result<(), BloomError> {
80 if self.expected_elements == 0 {
81 return Err(BloomError::invalid("Expected elements must be > 0"));
82 }
83 if self.false_positive_rate <= 0.0 || self.false_positive_rate >= 1.0 {
84 return Err(BloomError::invalid(
85 "False positive rate must be between 0.0 and 1.0",
86 ));
87 }
88 if self.num_hash_functions == 0 || self.num_hash_functions > 32 {
89 return Err(BloomError::invalid(
90 "Number of hash functions must be between 1 and 32",
91 ));
92 }
93 if self.bit_vector_size < 64 {
94 return Err(BloomError::invalid("Bit vector size must be at least 64"));
95 }
96 Ok(())
97 }
98}
99
100#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
102pub struct BloomFilter {
103 pub bits: Vec<u8>,
105 pub config: BloomConfig,
107 pub element_count: u64,
109}
110
111impl BloomFilter {
112 pub fn new(config: BloomConfig) -> Result<Self, BloomError> {
114 config.validate()?;
115
116 let byte_size = config.bit_vector_size.div_ceil(8); Ok(Self {
118 bits: vec![0u8; byte_size as usize],
119 config,
120 element_count: 0,
121 })
122 }
123
124 pub fn bit_size(&self) -> u64 {
126 self.config.bit_vector_size
127 }
128
129 pub fn byte_size(&self) -> u64 {
131 self.bits.len() as u64
132 }
133
134 pub fn is_empty(&self) -> bool {
136 self.element_count == 0
137 }
138
139 pub fn current_false_positive_rate(&self) -> f64 {
141 if self.element_count == 0 {
142 return 0.0;
143 }
144
145 let k = self.config.num_hash_functions as f64;
147 let n = self.element_count as f64;
148 let m = self.config.bit_vector_size as f64;
149
150 (1.0 - (-k * n / m).exp()).powf(k)
151 }
152
153 pub fn should_rebuild(&self) -> bool {
155 self.element_count > self.config.expected_elements * 2
156 || self.current_false_positive_rate() > self.config.false_positive_rate * 2.0
157 }
158}
159
160impl BloomConfig {
162 pub fn oplog_sync() -> Self {
164 Self::optimal(1000, 0.01) }
166
167 pub fn peer_discovery() -> Self {
169 Self::optimal(100, 0.001) }
171
172 pub fn chunk_dedup() -> Self {
174 Self::optimal(10000, 0.005) }
176}
177
178impl BloomFilter {
179 pub fn for_oplog_sync() -> Result<Self, BloomError> {
181 Self::new(BloomConfig::oplog_sync())
182 }
183
184 pub fn for_peer_discovery() -> Result<Self, BloomError> {
186 Self::new(BloomConfig::peer_discovery())
187 }
188
189 pub fn for_chunk_dedup() -> Result<Self, BloomError> {
191 Self::new(BloomConfig::chunk_dedup())
192 }
193}
194
195#[cfg(test)]
196mod tests {
197 use super::*;
198
199 #[test]
200 fn test_bloom_config_optimal() {
201 let config = BloomConfig::optimal(1000, 0.01);
202 assert!(config.validate().is_ok());
203 assert_eq!(config.expected_elements, 1000);
204 assert!((config.false_positive_rate - 0.01).abs() < f64::EPSILON);
205 assert!(config.num_hash_functions > 0);
206 assert!(config.bit_vector_size >= 64);
207 }
208
209 #[test]
210 fn test_bloom_config_validation() {
211 let valid_config = BloomConfig {
213 expected_elements: 100,
214 false_positive_rate: 0.01,
215 num_hash_functions: 7,
216 bit_vector_size: 1000,
217 };
218 assert!(valid_config.validate().is_ok());
219
220 let invalid_config = BloomConfig {
222 expected_elements: 0,
223 ..valid_config
224 };
225 assert!(invalid_config.validate().is_err());
226
227 let invalid_config = BloomConfig {
229 false_positive_rate: 0.0,
230 ..valid_config
231 };
232 assert!(invalid_config.validate().is_err());
233
234 let invalid_config = BloomConfig {
235 false_positive_rate: 1.0,
236 ..valid_config
237 };
238 assert!(invalid_config.validate().is_err());
239 }
240
241 #[test]
242 fn test_bloom_filter_creation() {
243 let config = BloomConfig::optimal(100, 0.01);
244 let filter = BloomFilter::new(config.clone()).unwrap();
245
246 assert_eq!(filter.config, config);
247 assert_eq!(filter.element_count, 0);
248 assert!(filter.is_empty());
249 assert!(filter.bit_size() >= 64);
250 }
251
252 #[test]
253 fn test_bloom_config_presets() {
254 let sync_config = BloomConfig::oplog_sync();
255 assert!(sync_config.validate().is_ok());
256 assert_eq!(sync_config.expected_elements, 1000);
257
258 let peer_config = BloomConfig::peer_discovery();
259 assert!(peer_config.validate().is_ok());
260 assert_eq!(peer_config.expected_elements, 100);
261
262 let chunk_config = BloomConfig::chunk_dedup();
263 assert!(chunk_config.validate().is_ok());
264 assert_eq!(chunk_config.expected_elements, 10000);
265 }
266
267 #[test]
268 fn test_filter_presets() {
269 let oplog_filter = BloomFilter::for_oplog_sync().unwrap();
270 assert!(oplog_filter.is_empty());
271
272 let peer_filter = BloomFilter::for_peer_discovery().unwrap();
273 assert!(peer_filter.is_empty());
274
275 let chunk_filter = BloomFilter::for_chunk_dedup().unwrap();
276 assert!(chunk_filter.is_empty());
277 }
278
279 #[test]
280 fn test_false_positive_rate_estimation() {
281 let config = BloomConfig::optimal(100, 0.01);
282 let mut filter = BloomFilter::new(config).unwrap();
283
284 assert_eq!(filter.current_false_positive_rate(), 0.0);
286
287 filter.element_count = 50;
289 let rate_50 = filter.current_false_positive_rate();
290
291 filter.element_count = 100;
292 let rate_100 = filter.current_false_positive_rate();
293
294 assert!(rate_100 > rate_50);
295 }
296
297 #[test]
298 fn test_should_rebuild() {
299 let config = BloomConfig::optimal(100, 0.01);
300 let mut filter = BloomFilter::new(config).unwrap();
301
302 assert!(!filter.should_rebuild());
304
305 filter.element_count = 100;
307 assert!(!filter.should_rebuild());
308
309 filter.element_count = 300;
311 assert!(filter.should_rebuild());
312 }
313}