1const FNV_OFFSET_BASIS: u64 = 0xcbf29ce484222325u64;
16const FNV_PRIME: u64 = 0x00000100000001b3u64;
17
18fn fnv1a_64_seeded(data: &[u8], seed: u64) -> u64 {
23 let mut hash = seed;
24 for &byte in data {
25 hash ^= u64::from(byte);
26 hash = hash.wrapping_mul(FNV_PRIME);
27 }
28 hash
29}
30
31#[inline]
33fn h1(data: &[u8]) -> u64 {
34 fnv1a_64_seeded(data, FNV_OFFSET_BASIS)
35}
36
37#[inline]
42fn h2(data: &[u8]) -> u64 {
43 let seed = FNV_OFFSET_BASIS ^ 0xdeadbeefcafe1337u64;
45 fnv1a_64_seeded(data, seed) | 1
48}
49
50#[inline]
54fn double_hash_position(h1_val: u64, h2_val: u64, i: u64, num_bits: usize) -> usize {
55 let nb = num_bits as u64;
56 (h1_val.wrapping_add(i.wrapping_mul(h2_val)) % nb) as usize
58}
59
60fn optimal_num_bits(expected_items: usize, false_positive_rate: f64) -> usize {
66 let n = expected_items as f64;
67 let p = false_positive_rate.clamp(1e-15, 1.0 - f64::EPSILON);
68 let ln2_sq = std::f64::consts::LN_2 * std::f64::consts::LN_2;
69 let m = (-n * p.ln() / ln2_sq).ceil() as usize;
70 m.max(8)
72}
73
74fn optimal_num_hash_functions(num_bits: usize, expected_items: usize) -> u8 {
78 if expected_items == 0 {
79 return 1;
80 }
81 let m = num_bits as f64;
82 let n = expected_items as f64;
83 let k = ((m / n) * std::f64::consts::LN_2).round() as u64;
84 k.clamp(1, 255) as u8
86}
87
88#[derive(Debug, Clone)]
96pub struct BloomFilter {
97 bit_array: Vec<u8>,
99 num_bits: usize,
102 num_hash_functions: u8,
104 num_items: u64,
106}
107
108impl BloomFilter {
109 pub fn new(expected_items: usize, false_positive_rate: f64) -> Self {
115 assert!(expected_items > 0, "expected_items must be > 0");
116 assert!(
117 false_positive_rate > 0.0 && false_positive_rate < 1.0,
118 "false_positive_rate must be in (0, 1)"
119 );
120 let num_bits = optimal_num_bits(expected_items, false_positive_rate);
121 let num_hash_functions = optimal_num_hash_functions(num_bits, expected_items);
122 let byte_count = (num_bits + 7) / 8;
123 Self {
124 bit_array: vec![0u8; byte_count],
125 num_bits,
126 num_hash_functions,
127 num_items: 0,
128 }
129 }
130
131 fn set_bit(&mut self, pos: usize) {
135 let byte_idx = pos / 8;
136 let bit_idx = pos % 8;
137 if let Some(byte) = self.bit_array.get_mut(byte_idx) {
138 *byte |= 1u8 << bit_idx;
139 }
140 }
141
142 fn get_bit(&self, pos: usize) -> bool {
144 let byte_idx = pos / 8;
145 let bit_idx = pos % 8;
146 self.bit_array
147 .get(byte_idx)
148 .map(|byte| (byte >> bit_idx) & 1 == 1)
149 .unwrap_or(false)
150 }
151
152 pub fn insert(&mut self, item: &[u8]) {
157 let h1_val = h1(item);
158 let h2_val = h2(item);
159 for i in 0..self.num_hash_functions as u64 {
160 let pos = double_hash_position(h1_val, h2_val, i, self.num_bits);
161 self.set_bit(pos);
162 }
163 self.num_items += 1;
164 }
165
166 pub fn contains(&self, item: &[u8]) -> bool {
169 let h1_val = h1(item);
170 let h2_val = h2(item);
171 for i in 0..self.num_hash_functions as u64 {
172 let pos = double_hash_position(h1_val, h2_val, i, self.num_bits);
173 if !self.get_bit(pos) {
174 return false;
175 }
176 }
177 true
178 }
179
180 pub fn estimate_false_positive_rate(&self) -> f64 {
185 let k = self.num_hash_functions as f64;
186 let n = self.num_items as f64;
187 let m = self.num_bits as f64;
188 if m == 0.0 {
189 return 1.0;
190 }
191 (1.0_f64 - (-k * n / m).exp()).powf(k)
192 }
193
194 pub fn item_count(&self) -> u64 {
196 self.num_items
197 }
198
199 pub fn num_bits(&self) -> usize {
201 self.num_bits
202 }
203
204 pub fn num_hash_functions(&self) -> u8 {
206 self.num_hash_functions
207 }
208}
209
210#[derive(Debug, Clone)]
219pub struct CountingBloomFilter {
220 counts: Vec<u8>,
222 num_counters: usize,
224 num_hash_functions: u8,
226 num_items: u64,
228}
229
230impl CountingBloomFilter {
231 pub fn new(expected_items: usize, false_positive_rate: f64) -> Self {
233 assert!(expected_items > 0, "expected_items must be > 0");
234 assert!(
235 false_positive_rate > 0.0 && false_positive_rate < 1.0,
236 "false_positive_rate must be in (0, 1)"
237 );
238 let num_bits = optimal_num_bits(expected_items, false_positive_rate);
239 let num_hash_functions = optimal_num_hash_functions(num_bits, expected_items);
240 let byte_count = (num_bits + 1) / 2;
242 Self {
243 counts: vec![0u8; byte_count],
244 num_counters: num_bits,
245 num_hash_functions,
246 num_items: 0,
247 }
248 }
249
250 fn get_nibble(&self, pos: usize) -> u8 {
253 let byte_idx = pos / 2;
254 let nibble_shift = (pos % 2) * 4;
255 self.counts
256 .get(byte_idx)
257 .map(|b| (b >> nibble_shift) & 0x0F)
258 .unwrap_or(0)
259 }
260
261 fn increment_nibble(&mut self, pos: usize) {
262 let byte_idx = pos / 2;
263 let nibble_shift = (pos % 2) * 4;
264 if let Some(byte) = self.counts.get_mut(byte_idx) {
265 let nibble = (*byte >> nibble_shift) & 0x0F;
266 if nibble < 0x0F {
267 *byte += 1u8 << nibble_shift;
269 }
270 }
272 }
273
274 fn decrement_nibble(&mut self, pos: usize) -> bool {
275 let byte_idx = pos / 2;
276 let nibble_shift = (pos % 2) * 4;
277 if let Some(byte) = self.counts.get_mut(byte_idx) {
278 let nibble = (*byte >> nibble_shift) & 0x0F;
279 if nibble == 0x0F {
280 return false;
282 }
283 if nibble > 0 {
284 *byte -= 1u8 << nibble_shift;
285 return true;
286 }
287 }
288 false
289 }
290
291 pub fn insert(&mut self, item: &[u8]) {
295 let h1_val = h1(item);
296 let h2_val = h2(item);
297 for i in 0..self.num_hash_functions as u64 {
298 let pos = double_hash_position(h1_val, h2_val, i, self.num_counters);
299 self.increment_nibble(pos);
300 }
301 self.num_items += 1;
302 }
303
304 pub fn contains(&self, item: &[u8]) -> bool {
306 let h1_val = h1(item);
307 let h2_val = h2(item);
308 for i in 0..self.num_hash_functions as u64 {
309 let pos = double_hash_position(h1_val, h2_val, i, self.num_counters);
310 if self.get_nibble(pos) == 0 {
311 return false;
312 }
313 }
314 true
315 }
316
317 pub fn remove(&mut self, item: &[u8]) -> bool {
325 if !self.contains(item) {
327 return false;
328 }
329 let h1_val = h1(item);
330 let h2_val = h2(item);
331 let positions: Vec<usize> = (0..self.num_hash_functions as u64)
333 .map(|i| double_hash_position(h1_val, h2_val, i, self.num_counters))
334 .collect();
335 for &pos in &positions {
337 let nibble = self.get_nibble(pos);
338 if nibble == 0 || nibble == 0x0F {
339 return false;
340 }
341 }
342 for &pos in &positions {
344 self.decrement_nibble(pos);
345 }
346 if self.num_items > 0 {
347 self.num_items -= 1;
348 }
349 true
350 }
351
352 pub fn item_count(&self) -> u64 {
354 self.num_items
355 }
356
357 pub fn estimate_false_positive_rate(&self) -> f64 {
359 let k = self.num_hash_functions as f64;
360 let n = self.num_items as f64;
361 let m = self.num_counters as f64;
362 if m == 0.0 {
363 return 1.0;
364 }
365 (1.0_f64 - (-k * n / m).exp()).powf(k)
366 }
367}
368
369#[derive(Debug, Clone)]
384pub struct ScalableBloomFilter {
385 layers: Vec<BloomFilter>,
387 initial_capacity: usize,
389 max_fpr_per_layer: f64,
391 growth_factor: f64,
393 tightening_ratio: f64,
396 total_items: u64,
398}
399
400impl ScalableBloomFilter {
401 pub fn new(initial_capacity: usize, target_fpr: f64, growth_factor: f64) -> Self {
408 assert!(initial_capacity > 0, "initial_capacity must be > 0");
409 assert!(
410 target_fpr > 0.0 && target_fpr < 1.0,
411 "target_fpr must be in (0, 1)"
412 );
413 let gf = if growth_factor > 1.0 {
414 growth_factor
415 } else {
416 2.0
417 };
418 let first_layer = BloomFilter::new(initial_capacity, target_fpr);
419 Self {
420 layers: vec![first_layer],
421 initial_capacity,
422 max_fpr_per_layer: target_fpr,
423 growth_factor: gf,
424 tightening_ratio: 0.8,
425 total_items: 0,
426 }
427 }
428
429 pub fn insert(&mut self, item: &[u8]) {
434 if let Some(active) = self.layers.last() {
436 if active.estimate_false_positive_rate() > self.max_fpr_per_layer {
437 self.add_layer();
438 }
439 }
440 if let Some(active) = self.layers.last_mut() {
441 active.insert(item);
442 }
443 self.total_items += 1;
444 }
445
446 pub fn contains(&self, item: &[u8]) -> bool {
449 self.layers.iter().any(|layer| layer.contains(item))
450 }
451
452 pub fn estimate_false_positive_rate(&self) -> f64 {
457 let product: f64 = self
458 .layers
459 .iter()
460 .map(|l| 1.0 - l.estimate_false_positive_rate())
461 .product();
462 1.0 - product
463 }
464
465 pub fn total_item_count(&self) -> u64 {
467 self.total_items
468 }
469
470 pub fn layer_count(&self) -> usize {
472 self.layers.len()
473 }
474
475 fn add_layer(&mut self) {
477 let layer_idx = self.layers.len();
478 let capacity =
479 (self.initial_capacity as f64 * self.growth_factor.powi(layer_idx as i32)) as usize;
480 let capacity = capacity.max(1);
481 let fpr = self.max_fpr_per_layer * self.tightening_ratio.powi(layer_idx as i32);
482 let fpr = fpr.clamp(1e-15, 1.0 - f64::EPSILON);
483 self.layers.push(BloomFilter::new(capacity, fpr));
484 }
485
486 pub fn set_tightening_ratio(&mut self, ratio: f64) {
492 if ratio > 0.0 && ratio < 1.0 {
493 self.tightening_ratio = ratio;
494 }
495 }
496
497 pub fn tightening_ratio(&self) -> f64 {
499 self.tightening_ratio
500 }
501
502 pub fn growth_factor(&self) -> f64 {
504 self.growth_factor
505 }
506
507 pub fn layer_stats(&self) -> Vec<(u64, usize, f64)> {
509 self.layers
510 .iter()
511 .map(|l| {
512 (
513 l.item_count(),
514 l.num_bits(),
515 l.estimate_false_positive_rate(),
516 )
517 })
518 .collect()
519 }
520
521 pub fn estimated_capacity_remaining(&self) -> usize {
528 let active = match self.layers.last() {
529 Some(l) => l,
530 None => return 0,
531 };
532 let current_fpr = active.estimate_false_positive_rate();
533 if current_fpr >= self.max_fpr_per_layer {
534 return 0;
535 }
536 let k = active.num_hash_functions() as f64;
540 let m = active.num_bits() as f64;
541 let n = active.item_count() as f64;
542 let theoretical_max = (m / k) * std::f64::consts::LN_2;
543 let remaining = (theoretical_max - n).max(0.0);
544 remaining as usize
545 }
546
547 pub fn clear(&mut self) {
549 self.layers.clear();
550 self.total_items = 0;
551 let first_layer = BloomFilter::new(self.initial_capacity, self.max_fpr_per_layer);
552 self.layers.push(first_layer);
553 }
554
555 pub fn total_bits(&self) -> usize {
557 self.layers.iter().map(|l| l.num_bits()).sum()
558 }
559}
560
561#[cfg(test)]
564mod tests {
565 use super::*;
566
567 #[test]
570 fn test_fnv1a_deterministic() {
571 let a = h1(b"hello");
572 let b = h1(b"hello");
573 assert_eq!(a, b);
574 }
575
576 #[test]
577 fn test_h1_h2_differ() {
578 let v = h1(b"test");
579 let v2 = h2(b"test");
580 assert_ne!(v, v2, "h1 and h2 should produce different hashes");
581 }
582
583 #[test]
584 fn test_h2_always_odd() {
585 for seed in [b"a".as_ref(), b"hello", b"oximedia", b"\x00\xff"] {
586 assert_eq!(h2(seed) & 1, 1, "h2 must be odd for {seed:?}");
587 }
588 }
589
590 #[test]
593 fn test_new_bloom_filter() {
594 let bf = BloomFilter::new(1000, 0.01);
595 assert!(bf.num_bits() > 0);
596 assert!(bf.num_hash_functions() > 0);
597 assert_eq!(bf.item_count(), 0);
598 }
599
600 #[test]
601 fn test_optimal_num_bits_reasonable() {
602 let m = optimal_num_bits(10_000, 0.01);
604 assert!(m > 90_000 && m < 110_000, "unexpected m={m}");
605 }
606
607 #[test]
608 fn test_optimal_k_reasonable() {
609 let m = optimal_num_bits(10_000, 0.01);
610 let k = optimal_num_hash_functions(m, 10_000);
611 assert!(k >= 6 && k <= 8, "unexpected k={k}");
613 }
614
615 #[test]
618 fn test_insert_then_contains() {
619 let mut bf = BloomFilter::new(100, 0.01);
620 bf.insert(b"key1");
621 assert!(bf.contains(b"key1"));
622 }
623
624 #[test]
625 fn test_contains_absent_item() {
626 let bf = BloomFilter::new(100, 0.01);
627 assert!(!bf.contains(b"ghost"));
630 }
631
632 #[test]
633 fn test_no_false_negatives() {
634 let mut bf = BloomFilter::new(500, 0.01);
635 let items: Vec<Vec<u8>> = (0u32..200).map(|i| i.to_le_bytes().to_vec()).collect();
636 for item in &items {
637 bf.insert(item);
638 }
639 for item in &items {
640 assert!(bf.contains(item), "false negative detected for {:?}", item);
641 }
642 }
643
644 #[test]
645 fn test_item_count() {
646 let mut bf = BloomFilter::new(100, 0.05);
647 bf.insert(b"a");
648 bf.insert(b"b");
649 bf.insert(b"c");
650 assert_eq!(bf.item_count(), 3);
651 }
652
653 #[test]
656 fn test_estimate_fpr_empty() {
657 let bf = BloomFilter::new(1000, 0.01);
658 assert_eq!(bf.estimate_false_positive_rate(), 0.0);
659 }
660
661 #[test]
662 fn test_estimate_fpr_increases_with_fill() {
663 let mut bf = BloomFilter::new(100, 0.01);
664 let fpr_empty = bf.estimate_false_positive_rate();
665 for i in 0u32..50 {
666 bf.insert(&i.to_le_bytes());
667 }
668 let fpr_half = bf.estimate_false_positive_rate();
669 assert!(fpr_half > fpr_empty, "FPR should increase as filter fills");
670 }
671
672 #[test]
677 fn test_empirical_fpr_at_n10000_p001() {
678 let n = 10_000usize;
679 let p = 0.01_f64;
680 let mut bf = BloomFilter::new(n, p);
681
682 for i in 0u32..n as u32 {
683 let key = format!("inserted_{i}");
684 bf.insert(key.as_bytes());
685 }
686
687 let mut false_positives = 0usize;
688 let probes = 10_000usize;
689 for i in 0u32..probes as u32 {
690 let key = format!("absent_{i}");
691 if bf.contains(key.as_bytes()) {
692 false_positives += 1;
693 }
694 }
695
696 let observed_fpr = false_positives as f64 / probes as f64;
697 assert!(
699 observed_fpr <= p * 3.0,
700 "observed FPR {observed_fpr:.4} exceeded 3× target ({:.4})",
701 p * 3.0
702 );
703 }
704
705 #[test]
708 fn test_counting_bf_insert_contains() {
709 let mut cbf = CountingBloomFilter::new(200, 0.01);
710 cbf.insert(b"alpha");
711 assert!(cbf.contains(b"alpha"));
712 }
713
714 #[test]
715 fn test_counting_bf_remove() {
716 let mut cbf = CountingBloomFilter::new(200, 0.01);
717 cbf.insert(b"remove_me");
718 assert!(cbf.contains(b"remove_me"));
719 let removed = cbf.remove(b"remove_me");
720 assert!(removed, "remove should succeed");
721 assert!(!cbf.contains(b"remove_me"));
722 }
723
724 #[test]
725 fn test_counting_bf_remove_absent() {
726 let mut cbf = CountingBloomFilter::new(200, 0.01);
727 let removed = cbf.remove(b"never_inserted");
728 assert!(!removed, "cannot remove item that was never inserted");
729 }
730
731 #[test]
732 fn test_counting_bf_item_count() {
733 let mut cbf = CountingBloomFilter::new(100, 0.05);
734 cbf.insert(b"x");
735 cbf.insert(b"y");
736 assert_eq!(cbf.item_count(), 2);
737 cbf.remove(b"x");
738 assert_eq!(cbf.item_count(), 1);
739 }
740
741 #[test]
742 fn test_counting_bf_no_false_negatives() {
743 let mut cbf = CountingBloomFilter::new(300, 0.01);
744 let items: Vec<Vec<u8>> = (0u32..100).map(|i| i.to_le_bytes().to_vec()).collect();
745 for item in &items {
746 cbf.insert(item);
747 }
748 for item in &items {
749 assert!(cbf.contains(item), "false negative for {item:?}");
750 }
751 }
752
753 #[test]
754 fn test_counting_bf_multiple_inserts_then_single_remove() {
755 let mut cbf = CountingBloomFilter::new(200, 0.01);
756 cbf.insert(b"double");
758 cbf.insert(b"double");
759 cbf.remove(b"double");
760 assert!(cbf.contains(b"double"));
762 }
763
764 #[test]
765 fn test_double_hash_position_range() {
766 let item = b"test_item";
767 let h1_val = h1(item);
768 let h2_val = h2(item);
769 let num_bits = 1024;
770 for i in 0..10u64 {
771 let pos = double_hash_position(h1_val, h2_val, i, num_bits);
772 assert!(pos < num_bits, "position {pos} out of range");
773 }
774 }
775
776 #[test]
777 fn test_bloom_filter_clone() {
778 let mut bf = BloomFilter::new(100, 0.01);
779 bf.insert(b"cloned");
780 let bf2 = bf.clone();
781 assert!(bf2.contains(b"cloned"));
782 assert_eq!(bf2.item_count(), bf.item_count());
783 }
784
785 #[test]
788 fn test_scalable_bf_insert_contains() {
789 let mut sbf = ScalableBloomFilter::new(50, 0.01, 2.0);
790 sbf.insert(b"hello");
791 assert!(sbf.contains(b"hello"));
792 }
793
794 #[test]
795 fn test_scalable_bf_absent_item() {
796 let sbf = ScalableBloomFilter::new(50, 0.01, 2.0);
797 assert!(!sbf.contains(b"missing"));
798 }
799
800 #[test]
801 fn test_scalable_bf_no_false_negatives() {
802 let mut sbf = ScalableBloomFilter::new(50, 0.05, 2.0);
803 let items: Vec<Vec<u8>> = (0u32..200).map(|i| i.to_le_bytes().to_vec()).collect();
804 for item in &items {
805 sbf.insert(item);
806 }
807 for item in &items {
808 assert!(sbf.contains(item), "false negative for {item:?}");
809 }
810 }
811
812 #[test]
813 fn test_scalable_bf_grows_layers() {
814 let mut sbf = ScalableBloomFilter::new(10, 0.1, 2.0);
816 for i in 0u32..500 {
817 sbf.insert(&i.to_le_bytes());
818 }
819 assert!(
820 sbf.layer_count() > 1,
821 "should have grown beyond 1 layer, got {}",
822 sbf.layer_count()
823 );
824 }
825
826 #[test]
827 fn test_scalable_bf_total_item_count() {
828 let mut sbf = ScalableBloomFilter::new(100, 0.01, 2.0);
829 sbf.insert(b"a");
830 sbf.insert(b"b");
831 sbf.insert(b"c");
832 assert_eq!(sbf.total_item_count(), 3);
833 }
834
835 #[test]
836 fn test_scalable_bf_fpr_bounded() {
837 let mut sbf = ScalableBloomFilter::new(1000, 0.01, 2.0);
838 for i in 0u32..1000 {
839 sbf.insert(&i.to_le_bytes());
840 }
841 let fpr = sbf.estimate_false_positive_rate();
842 assert!(fpr < 0.10, "aggregate FPR {fpr:.4} is too high");
844 }
845
846 #[test]
847 fn test_scalable_bf_clone() {
848 let mut sbf = ScalableBloomFilter::new(100, 0.01, 2.0);
849 sbf.insert(b"test");
850 let sbf2 = sbf.clone();
851 assert!(sbf2.contains(b"test"));
852 assert_eq!(sbf2.total_item_count(), 1);
853 assert_eq!(sbf2.layer_count(), sbf.layer_count());
854 }
855
856 #[test]
857 fn test_scalable_bf_empty_fpr() {
858 let sbf = ScalableBloomFilter::new(100, 0.01, 2.0);
859 assert_eq!(sbf.estimate_false_positive_rate(), 0.0);
860 }
861
862 #[test]
865 fn test_scalable_bf_set_tightening_ratio() {
866 let mut sbf = ScalableBloomFilter::new(100, 0.01, 2.0);
867 sbf.set_tightening_ratio(0.5);
868 assert!((sbf.tightening_ratio() - 0.5).abs() < f64::EPSILON);
869 }
870
871 #[test]
872 fn test_scalable_bf_invalid_tightening_ratio_ignored() {
873 let mut sbf = ScalableBloomFilter::new(100, 0.01, 2.0);
874 let original = sbf.tightening_ratio();
875 sbf.set_tightening_ratio(0.0); assert!((sbf.tightening_ratio() - original).abs() < f64::EPSILON);
877 sbf.set_tightening_ratio(1.0); assert!((sbf.tightening_ratio() - original).abs() < f64::EPSILON);
879 sbf.set_tightening_ratio(-0.5); assert!((sbf.tightening_ratio() - original).abs() < f64::EPSILON);
881 }
882
883 #[test]
884 fn test_scalable_bf_growth_factor() {
885 let sbf = ScalableBloomFilter::new(100, 0.01, 3.0);
886 assert!((sbf.growth_factor() - 3.0).abs() < f64::EPSILON);
887 }
888
889 #[test]
890 fn test_scalable_bf_growth_factor_default_when_invalid() {
891 let sbf = ScalableBloomFilter::new(100, 0.01, 0.5);
893 assert!((sbf.growth_factor() - 2.0).abs() < f64::EPSILON);
894 }
895
896 #[test]
897 fn test_scalable_bf_layer_stats() {
898 let mut sbf = ScalableBloomFilter::new(10, 0.1, 2.0);
899 for i in 0u32..200 {
900 sbf.insert(&i.to_le_bytes());
901 }
902 let stats = sbf.layer_stats();
903 assert!(!stats.is_empty());
904 assert!(stats[0].0 > 0, "first layer should have items");
906 for (_, bits, _) in &stats {
908 assert!(*bits > 0);
909 }
910 }
911
912 #[test]
913 fn test_scalable_bf_estimated_capacity_remaining() {
914 let sbf = ScalableBloomFilter::new(1000, 0.01, 2.0);
915 let remaining = sbf.estimated_capacity_remaining();
916 assert!(remaining > 0, "fresh filter should have remaining capacity");
918 }
919
920 #[test]
921 fn test_scalable_bf_estimated_capacity_decreases() {
922 let mut sbf = ScalableBloomFilter::new(100, 0.01, 2.0);
923 let before = sbf.estimated_capacity_remaining();
924 for i in 0u32..50 {
925 sbf.insert(&i.to_le_bytes());
926 }
927 let after = sbf.estimated_capacity_remaining();
928 assert!(
929 after < before,
930 "remaining capacity should decrease after inserts"
931 );
932 }
933
934 #[test]
935 fn test_scalable_bf_clear() {
936 let mut sbf = ScalableBloomFilter::new(100, 0.01, 2.0);
937 for i in 0u32..50 {
938 sbf.insert(&i.to_le_bytes());
939 }
940 sbf.clear();
941 assert_eq!(sbf.total_item_count(), 0);
942 assert_eq!(sbf.layer_count(), 1);
943 assert!(!sbf.contains(&0u32.to_le_bytes()));
945 }
946
947 #[test]
948 fn test_scalable_bf_total_bits() {
949 let mut sbf = ScalableBloomFilter::new(10, 0.1, 2.0);
950 let bits_single = sbf.total_bits();
951 for i in 0u32..500 {
952 sbf.insert(&i.to_le_bytes());
953 }
954 let bits_multi = sbf.total_bits();
955 assert!(
956 bits_multi > bits_single,
957 "total bits should increase with layers"
958 );
959 }
960
961 #[test]
962 fn test_scalable_bf_tighter_ratio_more_layers() {
963 let mut sbf_tight = ScalableBloomFilter::new(10, 0.1, 2.0);
966 sbf_tight.set_tightening_ratio(0.5);
967 let mut sbf_loose = ScalableBloomFilter::new(10, 0.1, 2.0);
968 sbf_loose.set_tightening_ratio(0.9);
969
970 for i in 0u32..200 {
971 sbf_tight.insert(&i.to_le_bytes());
972 sbf_loose.insert(&i.to_le_bytes());
973 }
974 for i in 0u32..200 {
976 assert!(sbf_tight.contains(&i.to_le_bytes()));
977 assert!(sbf_loose.contains(&i.to_le_bytes()));
978 }
979 }
980
981 #[test]
982 fn test_scalable_bf_empirical_fpr() {
983 let mut sbf = ScalableBloomFilter::new(1000, 0.05, 2.0);
984 for i in 0u32..1000 {
985 sbf.insert(&i.to_le_bytes());
986 }
987 let mut fps = 0usize;
989 for i in 10000u32..20000 {
990 if sbf.contains(&i.to_le_bytes()) {
991 fps += 1;
992 }
993 }
994 let observed_fpr = fps as f64 / 10000.0;
995 assert!(
997 observed_fpr < 0.20,
998 "observed FPR {observed_fpr:.4} is too high"
999 );
1000 }
1001
1002 #[test]
1003 fn test_scalable_bf_stress_many_inserts() {
1004 let mut sbf = ScalableBloomFilter::new(50, 0.01, 2.0);
1005 for i in 0u32..10000 {
1006 sbf.insert(&i.to_le_bytes());
1007 }
1008 assert_eq!(sbf.total_item_count(), 10000);
1009 for i in [0u32, 999, 5000, 9999] {
1011 assert!(sbf.contains(&i.to_le_bytes()), "false negative for {i}");
1012 }
1013 }
1014}