rustywallet_bloom/lib.rs
1//! # rustywallet-bloom
2//!
3//! Fast and memory-efficient Bloom Filter implementations optimized for
4//! large datasets like cryptocurrency address lookups.
5//!
6//! ## Features
7//!
8//! - **Standard Bloom Filter**: Memory efficient (~1.2 bytes per item at 1% FPR)
9//! - **Counting Bloom Filter**: Supports removal of items (4x memory of standard)
10//! - **Fast**: Uses FNV-1a hash with double hashing technique
11//! - **No dependencies**: Pure Rust implementation
12//! - **Streaming insert**: Load millions of items efficiently
13//!
14//! ## Standard Bloom Filter
15//!
16//! ```rust
17//! use rustywallet_bloom::BloomFilter;
18//!
19//! // Create filter for 1 million items with 1% false positive rate
20//! let mut bloom = BloomFilter::new(1_000_000, 0.01);
21//!
22//! bloom.insert("1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa");
23//! bloom.insert("bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4");
24//!
25//! assert!(bloom.contains("1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa"));
26//! assert!(!bloom.contains("not_in_filter")); // probably false
27//! ```
28//!
29//! ## Counting Bloom Filter
30//!
31//! Supports removal of items:
32//!
33//! ```rust
34//! use rustywallet_bloom::CountingBloomFilter;
35//!
36//! let mut filter = CountingBloomFilter::new(100_000, 0.01);
37//!
38//! filter.insert("address1");
39//! filter.insert("address2");
40//! assert!(filter.contains("address1"));
41//!
42//! // Remove an item
43//! filter.remove("address1").unwrap();
44//! assert!(!filter.contains("address1"));
45//! assert!(filter.contains("address2"));
46//! ```
47
48mod bloom;
49mod counting;
50mod hash;
51
52pub use bloom::BloomFilter;
53pub use counting::{CountingBloomError, CountingBloomFilter};
54
55/// Prelude module for convenient imports
56pub mod prelude {
57 pub use crate::BloomFilter;
58 pub use crate::counting::{CountingBloomError, CountingBloomFilter};
59}
60
61#[cfg(test)]
62mod tests {
63 use super::*;
64
65 #[test]
66 fn test_basic_operations() {
67 let mut bloom = BloomFilter::new(1000, 0.01);
68
69 bloom.insert("hello");
70 bloom.insert("world");
71
72 assert!(bloom.contains("hello"));
73 assert!(bloom.contains("world"));
74 }
75
76 #[test]
77 fn test_false_positive_rate() {
78 let items = 10_000usize;
79 let fpr = 0.01;
80 let mut bloom = BloomFilter::new(items, fpr);
81
82 // Insert items
83 for i in 0..items {
84 bloom.insert(&format!("item_{}", i));
85 }
86
87 // Check inserted items - should all be found
88 for i in 0..items {
89 assert!(bloom.contains(&format!("item_{}", i)));
90 }
91
92 // Check non-existent items - count false positives
93 let test_count = 10_000;
94 let mut false_positives = 0;
95 for i in 0..test_count {
96 if bloom.contains(&format!("nonexistent_{}", i)) {
97 false_positives += 1;
98 }
99 }
100
101 let actual_fpr = false_positives as f64 / test_count as f64;
102 // Allow 3x tolerance for statistical variance
103 assert!(actual_fpr < fpr * 3.0,
104 "FPR too high: {} (expected < {})", actual_fpr, fpr * 3.0);
105 }
106
107 #[test]
108 fn test_memory_estimate() {
109 let bloom = BloomFilter::new(1_000_000, 0.01);
110 let bytes = bloom.memory_usage();
111
112 // Should be around 1.2MB for 1M items at 1% FPR
113 assert!(bytes > 1_000_000, "Too small: {} bytes", bytes);
114 assert!(bytes < 2_000_000, "Too large: {} bytes", bytes);
115 }
116
117 #[test]
118 fn test_counting_bloom_insert_remove() {
119 let mut filter = CountingBloomFilter::new(1000, 0.01);
120
121 filter.insert("test1");
122 filter.insert("test2");
123 assert!(filter.contains("test1"));
124 assert!(filter.contains("test2"));
125
126 filter.remove("test1").unwrap();
127 assert!(!filter.contains("test1"));
128 assert!(filter.contains("test2"));
129 }
130}