benchmark/
benchmark.rs

1use bloomlib::BloomFilter;
2use std::time::{Duration, Instant};
3
4/// A simple Xorshift pseudo-random number generator.
5/// Used to generate deterministic test data without external dependencies.
6/// Also see https://docs.rs/xorshift/latest/xorshift
7struct Random {
8    state: u64,
9}
10
11impl Random {
12    fn new(seed: u64) -> Self {
13        Self { state: seed }
14    }
15
16    fn next_u64(&mut self) -> u64 {
17        let mut state = self.state;
18        state ^= state << 13;
19        state ^= state >> 7;
20        state ^= state << 17;
21        self.state = state;
22        state
23    }
24}
25
26fn main() {
27    // Run a few benchmark configurations
28    run_benchmark(1_000_000, 0.000_1);
29    run_benchmark(1_000_000, 0.000_000_000_1);
30    // run_benchmark(100_000_000, 0.000_000_000_000_1);
31}
32
33fn run_benchmark(n: usize, fp_rate: f64) {
34    // --- Setup ---
35    let mut bf = BloomFilter::new(n, fp_rate);
36    let mut rng = Random::new(12345);
37
38    println!("\n---------- Bloom Filter Performance Benchmark -----------\n");
39    println!("{:<22}{}", "Items", n);
40    println!("{:<22}{}", "Target FP Rate", fp_rate);
41    println!("{:<22}{}", "Hash Count", bf.hash_count());
42    println!("\n---------------------------------------------------------\n");
43
44    // Generate data
45    let mut dataset = Vec::with_capacity(n);
46    for _ in 0..n {
47        dataset.push(rng.next_u64());
48    }
49
50    // --- Metric 1: Memory Usage ---
51    let ki = 1024.0;
52    let memory_bytes = bf.memory_usage_bytes();
53    let memory_mb = memory_bytes as f64 / ki / ki;
54    println!("[Memory Usage]");
55    println!(
56        "{:<22}{:.2} MB ({} bytes)",
57        "Bit Vector Size", memory_mb, memory_bytes
58    );
59    println!(
60        "{:<22}{:.2} bits",
61        "Bits per item",
62        (memory_bytes * 8) as f64 / n as f64
63    );
64
65    // --- Metric 2: Insert Performance ---
66    println!("\n[Insertion Performance]");
67    let start_insert = Instant::now();
68    for item in &dataset {
69        bf.insert(item);
70    }
71    let duration_insert = start_insert.elapsed();
72    print_timing(duration_insert, n);
73
74    // --- Metric 3: Worst Case Lookup (Seen Items) ---
75    // The worst case for Bloom Filter is when the item has been seen before.
76    // The algorithm MUST compute all k hashes and check k bits.
77    println!("\n[Lookup Performance - Worst Case (Seen Items)]");
78    let start_worst = Instant::now();
79    for item in &dataset {
80        // Assign the result to a volatile variable to prevent compiler optimization
81        let _ = std::hint::black_box(bf.contains(item));
82    }
83    let duration_worst = start_worst.elapsed();
84    print_timing(duration_worst, n);
85
86    // --- Metric 4: Average Case Lookup (Unseen Items) ---
87    // In a well-tuned Bloom Filter (50% bits set), an unseen item
88    // often fails on the 1st or 2nd bit check.
89    println!("\n[Lookup Performance - Average Case (Unseen Items)]");
90
91    // Generate new random items that are likely NOT in the set
92    let mut unseen_dataset = Vec::with_capacity(n);
93    for _ in 0..n {
94        unseen_dataset.push(rng.next_u64());
95    }
96
97    let start_avg = Instant::now();
98    for item in &unseen_dataset {
99        let _ = std::hint::black_box(bf.contains(item));
100    }
101    let duration_avg = start_avg.elapsed();
102    print_timing(duration_avg, n);
103
104    // --- Metric 5: Best Case Lookup (Empty Filter) ---
105    // The best case is checking against an empty filter; fails on the 1st bit.
106    println!("\n[Lookup Performance - Best Case (Empty Filter)]");
107    let empty_bf: BloomFilter<u64> = BloomFilter::new(n, fp_rate);
108
109    let start_best = Instant::now();
110    for item in &dataset {
111        let _ = std::hint::black_box(empty_bf.contains(item));
112    }
113    let duration_best = start_best.elapsed();
114    print_timing(duration_best, n);
115
116    println!("\n=========================================================\n");
117}
118
119fn print_timing(total_duration: Duration, iterations: usize) {
120    let total_ns = total_duration.as_nanos() as f64;
121    let ns_per_op = total_ns / iterations as f64;
122    let ops_per_sec = 1_000_000_000.0 / ns_per_op;
123
124    println!(
125        "                      {:.2} ns/op | {:.2} million ops/sec",
126        ns_per_op,
127        ops_per_sec / 1_000_000.0
128    );
129}