rustywallet_bloom/
counting.rs1use crate::hash::{double_hash, fnv1a_64, fnv1a_64_seeded};
7
8#[derive(Debug, Clone, PartialEq, Eq)]
10pub enum CountingBloomError {
11 CounterUnderflow,
13 CounterOverflow,
15}
16
17impl std::fmt::Display for CountingBloomError {
18 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
19 match self {
20 CountingBloomError::CounterUnderflow => {
21 write!(f, "Counter underflow: item not in filter or already removed")
22 }
23 CountingBloomError::CounterOverflow => {
24 write!(f, "Counter overflow: too many insertions of same item")
25 }
26 }
27 }
28}
29
30impl std::error::Error for CountingBloomError {}
31
32pub struct CountingBloomFilter {
54 counters: Vec<u8>,
56 num_counters: usize,
58 num_hashes: usize,
60 count: usize,
62}
63
64impl CountingBloomFilter {
65 pub fn new(expected_items: usize, false_positive_rate: f64) -> Self {
82 let fpr = false_positive_rate.clamp(1e-10, 0.5);
83 let n = expected_items.max(1) as f64;
84
85 let ln2_sq = std::f64::consts::LN_2 * std::f64::consts::LN_2;
87 let num_counters = ((-n * fpr.ln()) / ln2_sq).ceil() as usize;
88 let num_counters = num_counters.max(64);
89
90 let num_hashes = ((num_counters as f64 / n) * std::f64::consts::LN_2).ceil() as usize;
92 let num_hashes = num_hashes.clamp(3, 20);
93
94 let num_bytes = num_counters.div_ceil(2);
96
97 Self {
98 counters: vec![0u8; num_bytes],
99 num_counters,
100 num_hashes,
101 count: 0,
102 }
103 }
104
105 pub fn with_params(num_counters: usize, num_hashes: usize) -> Self {
112 let num_counters = num_counters.max(64);
113 let num_hashes = num_hashes.clamp(1, 20);
114 let num_bytes = num_counters.div_ceil(2);
115
116 Self {
117 counters: vec![0u8; num_bytes],
118 num_counters,
119 num_hashes,
120 count: 0,
121 }
122 }
123
124 pub fn insert<T: AsRef<[u8]>>(&mut self, item: T) {
139 let data = item.as_ref();
140 let h1 = fnv1a_64(data);
141 let h2 = fnv1a_64_seeded(data, 0x9e3779b97f4a7c15);
142
143 for i in 0..self.num_hashes {
144 let pos = double_hash(h1, h2, i, self.num_counters);
145 self.increment_counter(pos);
146 }
147
148 self.count += 1;
149 }
150
151 pub fn try_insert<T: AsRef<[u8]>>(&mut self, item: T) -> Result<(), CountingBloomError> {
156 let data = item.as_ref();
157 let h1 = fnv1a_64(data);
158 let h2 = fnv1a_64_seeded(data, 0x9e3779b97f4a7c15);
159
160 for i in 0..self.num_hashes {
162 let pos = double_hash(h1, h2, i, self.num_counters);
163 if self.get_counter(pos) >= 15 {
164 return Err(CountingBloomError::CounterOverflow);
165 }
166 }
167
168 for i in 0..self.num_hashes {
170 let pos = double_hash(h1, h2, i, self.num_counters);
171 self.increment_counter(pos);
172 }
173
174 self.count += 1;
175 Ok(())
176 }
177
178 pub fn remove<T: AsRef<[u8]>>(&mut self, item: T) -> Result<(), CountingBloomError> {
201 let data = item.as_ref();
202 let h1 = fnv1a_64(data);
203 let h2 = fnv1a_64_seeded(data, 0x9e3779b97f4a7c15);
204
205 for i in 0..self.num_hashes {
207 let pos = double_hash(h1, h2, i, self.num_counters);
208 if self.get_counter(pos) == 0 {
209 return Err(CountingBloomError::CounterUnderflow);
210 }
211 }
212
213 for i in 0..self.num_hashes {
215 let pos = double_hash(h1, h2, i, self.num_counters);
216 self.decrement_counter(pos);
217 }
218
219 self.count = self.count.saturating_sub(1);
220 Ok(())
221 }
222
223 #[inline]
228 pub fn contains<T: AsRef<[u8]>>(&self, item: T) -> bool {
229 let data = item.as_ref();
230 let h1 = fnv1a_64(data);
231 let h2 = fnv1a_64_seeded(data, 0x9e3779b97f4a7c15);
232
233 for i in 0..self.num_hashes {
234 let pos = double_hash(h1, h2, i, self.num_counters);
235 if self.get_counter(pos) == 0 {
236 return false;
237 }
238 }
239
240 true
241 }
242
243 pub fn count_estimate<T: AsRef<[u8]>>(&self, item: T) -> u8 {
248 let data = item.as_ref();
249 let h1 = fnv1a_64(data);
250 let h2 = fnv1a_64_seeded(data, 0x9e3779b97f4a7c15);
251
252 let mut min_count = u8::MAX;
253 for i in 0..self.num_hashes {
254 let pos = double_hash(h1, h2, i, self.num_counters);
255 min_count = min_count.min(self.get_counter(pos));
256 }
257
258 min_count
259 }
260
261 pub fn len(&self) -> usize {
263 self.count
264 }
265
266 pub fn is_empty(&self) -> bool {
268 self.count == 0
269 }
270
271 pub fn memory_usage(&self) -> usize {
273 self.counters.len()
274 }
275
276 pub fn num_counters(&self) -> usize {
278 self.num_counters
279 }
280
281 pub fn num_hashes(&self) -> usize {
283 self.num_hashes
284 }
285
286 pub fn clear(&mut self) {
288 self.counters.fill(0);
289 self.count = 0;
290 }
291
292 #[inline]
294 fn get_counter(&self, pos: usize) -> u8 {
295 let byte_idx = pos / 2;
296 let nibble = pos % 2;
297
298 if nibble == 0 {
299 self.counters[byte_idx] & 0x0F
300 } else {
301 (self.counters[byte_idx] >> 4) & 0x0F
302 }
303 }
304
305 #[inline]
307 fn increment_counter(&mut self, pos: usize) {
308 let byte_idx = pos / 2;
309 let nibble = pos % 2;
310
311 let current = self.get_counter(pos);
312 if current < 15 {
313 if nibble == 0 {
314 self.counters[byte_idx] = (self.counters[byte_idx] & 0xF0) | (current + 1);
315 } else {
316 self.counters[byte_idx] = (self.counters[byte_idx] & 0x0F) | ((current + 1) << 4);
317 }
318 }
319 }
320
321 #[inline]
323 fn decrement_counter(&mut self, pos: usize) {
324 let byte_idx = pos / 2;
325 let nibble = pos % 2;
326
327 let current = self.get_counter(pos);
328 if current > 0 {
329 if nibble == 0 {
330 self.counters[byte_idx] = (self.counters[byte_idx] & 0xF0) | (current - 1);
331 } else {
332 self.counters[byte_idx] = (self.counters[byte_idx] & 0x0F) | ((current - 1) << 4);
333 }
334 }
335 }
336}
337
338impl std::fmt::Debug for CountingBloomFilter {
339 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
340 f.debug_struct("CountingBloomFilter")
341 .field("items", &self.count)
342 .field("counters", &self.num_counters)
343 .field("hashes", &self.num_hashes)
344 .field("memory_kb", &(self.memory_usage() / 1024))
345 .finish()
346 }
347}
348
349#[cfg(test)]
350mod tests {
351 use super::*;
352
353 #[test]
354 fn test_insert_and_contains() {
355 let mut filter = CountingBloomFilter::new(100, 0.01);
356
357 filter.insert("test1");
358 filter.insert("test2");
359
360 assert!(filter.contains("test1"));
361 assert!(filter.contains("test2"));
362 assert!(!filter.contains("test3"));
363 assert_eq!(filter.len(), 2);
364 }
365
366 #[test]
367 fn test_remove() {
368 let mut filter = CountingBloomFilter::new(100, 0.01);
369
370 filter.insert("test1");
371 filter.insert("test2");
372 assert!(filter.contains("test1"));
373
374 filter.remove("test1").unwrap();
375 assert!(!filter.contains("test1"));
376 assert!(filter.contains("test2"));
377 assert_eq!(filter.len(), 1);
378 }
379
380 #[test]
381 fn test_remove_underflow() {
382 let mut filter = CountingBloomFilter::new(100, 0.01);
383
384 let result = filter.remove("nonexistent");
386 assert_eq!(result, Err(CountingBloomError::CounterUnderflow));
387 }
388
389 #[test]
390 fn test_multiple_insert_remove() {
391 let mut filter = CountingBloomFilter::new(100, 0.01);
392
393 filter.insert("test");
395 filter.insert("test");
396 filter.insert("test");
397
398 assert!(filter.contains("test"));
399 assert!(filter.count_estimate("test") >= 3);
400
401 filter.remove("test").unwrap();
403 assert!(filter.contains("test"));
404
405 filter.remove("test").unwrap();
407 assert!(filter.contains("test"));
408
409 filter.remove("test").unwrap();
411 assert!(!filter.contains("test"));
412 }
413
414 #[test]
415 fn test_clear() {
416 let mut filter = CountingBloomFilter::new(100, 0.01);
417
418 filter.insert("test1");
419 filter.insert("test2");
420 assert!(filter.contains("test1"));
421
422 filter.clear();
423 assert!(!filter.contains("test1"));
424 assert!(!filter.contains("test2"));
425 assert_eq!(filter.len(), 0);
426 }
427
428 #[test]
429 fn test_with_params() {
430 let filter = CountingBloomFilter::with_params(10_000, 5);
431 assert_eq!(filter.num_counters(), 10_000);
432 assert_eq!(filter.num_hashes(), 5);
433 }
434
435 #[test]
436 fn test_memory_usage() {
437 let filter = CountingBloomFilter::new(10_000, 0.01);
438 let bytes = filter.memory_usage();
441 assert!(bytes > 0);
442 }
443
444 #[test]
445 fn test_counter_nibbles() {
446 let mut filter = CountingBloomFilter::with_params(100, 1);
447
448 for i in 0..16 {
450 filter.increment_counter(0);
451 }
452 assert_eq!(filter.get_counter(0), 15);
454
455 for i in 0..10 {
457 filter.increment_counter(1);
458 }
459 assert_eq!(filter.get_counter(1), 10);
460 assert_eq!(filter.get_counter(0), 15); }
462}