benchmark/
benchmark.rs

1//! Benchmark example comparing Marisa trie performance
2//!
3//! This example demonstrates the performance characteristics of the Marisa trie
4//! by building a large dictionary and measuring lookup times.
5
6use marisa_ffi::{Agent, Keyset, Trie};
7use std::time::Instant;
8
9fn generate_words(count: usize) -> Vec<String> {
10    let mut words = Vec::new();
11    let prefixes = [
12        "apple", "banana", "cherry", "dog", "elephant", "fish", "grape", "house",
13    ];
14    let suffixes = ["s", "ing", "ed", "er", "est", "ly", "ness", "tion"];
15
16    for i in 0..count {
17        let prefix = prefixes[i % prefixes.len()];
18        let suffix = suffixes[i % suffixes.len()];
19        let word = format!("{}{}", prefix, suffix);
20        words.push(word);
21    }
22
23    words
24}
25
26fn benchmark_lookup(trie: &Trie, words: &[String], iterations: usize) -> f64 {
27    let start = Instant::now();
28
29    for _ in 0..iterations {
30        for word in words {
31            let _ = trie.lookup(word);
32        }
33    }
34
35    let duration = start.elapsed();
36    duration.as_secs_f64()
37}
38
39fn benchmark_predictive_search(trie: &Trie, prefixes: &[String], iterations: usize) -> f64 {
40    let start = Instant::now();
41
42    for _ in 0..iterations {
43        for prefix in prefixes {
44            let mut agent = Agent::new().unwrap();
45            let _ = trie.predictive_search(prefix, &mut agent);
46        }
47    }
48
49    let duration = start.elapsed();
50    duration.as_secs_f64()
51}
52
53fn main() -> Result<(), Box<dyn std::error::Error>> {
54    println!("Marisa FFI Benchmark");
55    println!("===================");
56    println!();
57
58    // Generate test data
59    let word_count = 10000;
60    let words = generate_words(word_count);
61    let prefixes = vec![
62        "app".to_string(),
63        "ban".to_string(),
64        "che".to_string(),
65        "dog".to_string(),
66        "ele".to_string(),
67        "fis".to_string(),
68        "gra".to_string(),
69        "hou".to_string(),
70    ];
71
72    println!("Generated {} test words", word_count);
73    println!("Test prefixes: {:?}", prefixes);
74    println!();
75
76    // Build the trie
77    println!("Building trie...");
78    let build_start = Instant::now();
79
80    let mut keyset = Keyset::new()?;
81    for word in &words {
82        keyset.push(word)?;
83    }
84
85    let trie = Trie::build(&keyset)?;
86    let build_time = build_start.elapsed();
87
88    println!("Trie built in {:.3} seconds", build_time.as_secs_f64());
89    println!();
90
91    // Benchmark exact lookup
92    println!("=== Exact Lookup Benchmark ===");
93    let lookup_iterations = 100;
94    let lookup_time = benchmark_lookup(&trie, &words, lookup_iterations);
95    let total_lookups = words.len() * lookup_iterations;
96    let lookups_per_sec = total_lookups as f64 / lookup_time;
97
98    println!(
99        "Performed {} lookups in {:.3} seconds",
100        total_lookups, lookup_time
101    );
102    println!("Lookups per second: {:.0}", lookups_per_sec);
103    println!(
104        "Average lookup time: {:.3} microseconds",
105        (lookup_time * 1_000_000.0) / total_lookups as f64
106    );
107    println!();
108
109    // Benchmark predictive search
110    println!("=== Predictive Search Benchmark ===");
111    let search_iterations = 50;
112    let search_time = benchmark_predictive_search(&trie, &prefixes, search_iterations);
113    let total_searches = prefixes.len() * search_iterations;
114    let searches_per_sec = total_searches as f64 / search_time;
115
116    println!(
117        "Performed {} predictive searches in {:.3} seconds",
118        total_searches, search_time
119    );
120    println!("Searches per second: {:.0}", searches_per_sec);
121    println!(
122        "Average search time: {:.3} microseconds",
123        (search_time * 1_000_000.0) / total_searches as f64
124    );
125    println!();
126
127    // Memory usage estimation
128    println!("=== Memory Usage ===");
129    println!("Number of keys: {}", words.len());
130    println!(
131        "Average key length: {:.1} characters",
132        words.iter().map(|w| w.len()).sum::<usize>() as f64 / words.len() as f64
133    );
134
135    // Save trie to file to measure size
136    let filename = "benchmark_trie.marisa";
137    trie.save(filename)?;
138
139    if let Ok(metadata) = std::fs::metadata(filename) {
140        let file_size = metadata.len();
141        let bytes_per_key = file_size as f64 / words.len() as f64;
142
143        println!(
144            "Trie file size: {} bytes ({:.2} KB)",
145            file_size,
146            file_size as f64 / 1024.0
147        );
148        println!("Bytes per key: {:.2}", bytes_per_key);
149        println!("Compression ratio: {:.1}%", (bytes_per_key / 10.0) * 100.0); // Assuming ~10 bytes per key on average
150    }
151
152    // Clean up
153    std::fs::remove_file(filename)?;
154    println!();
155
156    // Sample some results
157    println!("=== Sample Results ===");
158    let sample_words = vec!["apples", "bananas", "cherries", "dogs", "elephants"];
159
160    for word in sample_words {
161        match trie.lookup(word) {
162            Some(id) => println!("'{}' -> ID: {}", word, id),
163            None => println!("'{}' -> Not found", word),
164        }
165    }
166
167    println!();
168    println!("Predictive search for 'app':");
169    let mut agent = Agent::new()?;
170    if trie.predictive_search("app", &mut agent)? {
171        // Note: agent.next() is not implemented yet
172        // For now, we'll show only the first result
173        let key = agent.key()?;
174        println!("  - {}", key);
175        println!("  Total: 1 key (iteration not implemented)");
176    }
177
178    Ok(())
179}