1use std::time::{Duration, Instant, SystemTime};
2use std::path::Path;
3use std::fs;
4use crate::{SiftDB, Snapshot};
5use crate::ingest::{Ingester, IngestOptions};
6use serde::{Serialize, Deserialize};
7
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct BenchmarkResults {
10 pub name: String,
11 pub duration: Duration,
12 pub files_processed: u64,
13 pub bytes_processed: u64,
14 pub queries_per_second: Option<f64>,
15 pub throughput_mbps: Option<f64>,
16}
17
18#[derive(Serialize, Deserialize)]
19pub struct BenchmarkSuite {
20 pub version: String,
21 pub timestamp: String,
22 pub git_commit: Option<String>,
23 pub test_environment: TestEnvironment,
24 pub benchmarks: Vec<BenchmarkResults>,
25}
26
27#[derive(Serialize, Deserialize)]
28pub struct TestEnvironment {
29 pub os: String,
30 pub cpu: String,
31 pub memory: String,
32}
33
34impl BenchmarkResults {
35 pub fn print(&self) {
36 println!("=== {} ===", self.name);
37 println!("Duration: {:.2}s", self.duration.as_secs_f64());
38 if self.files_processed > 0 {
39 println!("Files processed: {}", self.files_processed);
40 println!("Files/sec: {:.1}", self.files_processed as f64 / self.duration.as_secs_f64());
41 }
42 if self.bytes_processed > 0 {
43 let mb = self.bytes_processed as f64 / (1024.0 * 1024.0);
44 println!("Data processed: {:.2} MB", mb);
45 if let Some(throughput) = self.throughput_mbps {
46 println!("Throughput: {:.2} MB/s", throughput);
47 }
48 }
49 if let Some(qps) = self.queries_per_second {
50 println!("Queries/sec: {:.1}", qps);
51 }
52 println!();
53 }
54}
55
56pub struct SiftDBBenchmark {
57 collection_path: std::path::PathBuf,
58 source_path: std::path::PathBuf,
59}
60
61impl SiftDBBenchmark {
62 pub fn new<P1: AsRef<Path>, P2: AsRef<Path>>(collection_path: P1, source_path: P2) -> Self {
63 Self {
64 collection_path: collection_path.as_ref().to_path_buf(),
65 source_path: source_path.as_ref().to_path_buf(),
66 }
67 }
68
69 pub fn run_all(&mut self) -> Vec<BenchmarkResults> {
70 println!("🚀 SiftDB Performance Benchmark");
71 println!("================================");
72 println!();
73
74 let mut results = Vec::new();
75
76 results.push(self.bench_init());
78
79 results.push(self.bench_import());
81
82 results.extend(self.bench_searches());
84
85 self.print_summary(&results);
86
87 if let Err(e) = self.save_results(&results) {
89 eprintln!("Warning: Failed to save benchmark results: {}", e);
90 }
91
92 results
93 }
94
95 pub fn run_all_quiet(&mut self) -> Vec<BenchmarkResults> {
96 let mut results = Vec::new();
97
98 results.push(self.bench_init_quiet());
100
101 results.push(self.bench_import_quiet());
103
104 results.extend(self.bench_searches_quiet());
106
107 results
108 }
109
110 fn bench_init_quiet(&self) -> BenchmarkResults {
111 let start = Instant::now();
112 SiftDB::init(&self.collection_path).expect("Failed to initialize collection");
113 let duration = start.elapsed();
114
115 BenchmarkResults {
116 name: "Collection Initialization".to_string(),
117 duration,
118 files_processed: 0,
119 bytes_processed: 0,
120 queries_per_second: None,
121 throughput_mbps: None,
122 }
123 }
124
125 fn bench_import_quiet(&mut self) -> BenchmarkResults {
126 let start = Instant::now();
127 let _db = SiftDB::open(&self.collection_path).expect("Failed to open collection");
128
129 let mut options = IngestOptions::default();
130 options.include_patterns = vec!["**/*.rs".to_string(), "**/*.md".to_string(), "**/*.toml".to_string(), "**/*.json".to_string()];
131
132 let mut ingester = Ingester::new(self.collection_path.clone(), options);
133 let (source_files, source_bytes) = self.count_source_files();
134 let stats = ingester.ingest_from_fs(&self.source_path).expect("Failed to ingest");
135 let duration = start.elapsed();
136
137 BenchmarkResults {
138 name: "File Import".to_string(),
139 duration,
140 files_processed: stats.ingested,
141 bytes_processed: source_bytes,
142 queries_per_second: None,
143 throughput_mbps: Some(source_bytes as f64 / (1024.0 * 1024.0) / duration.as_secs_f64()),
144 }
145 }
146
147 fn bench_searches_quiet(&self) -> Vec<BenchmarkResults> {
148 let mut results = Vec::new();
149 let db = SiftDB::open(&self.collection_path).expect("Failed to open collection");
150 let mut snapshot = db.snapshot().expect("Failed to create snapshot");
151
152 let queries = vec![
153 ("fn", "Function definitions"),
154 ("println", "Print statements"),
155 ("use", "Import statements"),
156 ("struct", "Struct definitions"),
157 ("impl", "Implementation blocks"),
158 ("pub", "Public items"),
159 ("let", "Variable declarations"),
160 ("match", "Pattern matching"),
161 ("async", "Async code"),
162 ("Result", "Result types"),
163 ];
164
165 for (query, description) in queries {
166 results.push(self.bench_single_search_quiet(&mut snapshot, query, description));
167 }
168
169 results
170 }
171
172 fn bench_single_search_quiet(&self, snapshot: &mut crate::Snapshot, query: &str, description: &str) -> BenchmarkResults {
173 let iterations = 10;
174 let mut total_duration = Duration::new(0, 0);
175 let mut total_hits = 0;
176
177 snapshot.find(query, None, Some(1000)).ok();
179
180 for _ in 0..iterations {
182 let start = Instant::now();
183 if let Ok(hits) = snapshot.find(query, None, Some(1000)) {
184 total_hits = hits.len();
185 }
186 total_duration += start.elapsed();
187 }
188
189 let avg_duration = total_duration / iterations as u32;
190 let qps = if avg_duration.as_secs_f64() > 0.0 {
191 1.0 / avg_duration.as_secs_f64()
192 } else {
193 f64::INFINITY
194 };
195
196 BenchmarkResults {
197 name: format!("Search: '{}' ({})", query, description),
198 duration: avg_duration,
199 files_processed: total_hits as u64,
200 bytes_processed: 0,
201 queries_per_second: Some(qps),
202 throughput_mbps: None,
203 }
204 }
205
206 fn bench_init(&self) -> BenchmarkResults {
207 let start = Instant::now();
208
209 SiftDB::init(&self.collection_path).expect("Failed to initialize collection");
210
211 let duration = start.elapsed();
212
213 BenchmarkResults {
214 name: "Collection Initialization".to_string(),
215 duration,
216 files_processed: 0,
217 bytes_processed: 0,
218 queries_per_second: None,
219 throughput_mbps: None,
220 }
221 }
222
223 fn bench_import(&mut self) -> BenchmarkResults {
224 println!("📁 Starting import benchmark...");
225
226 let start = Instant::now();
227
228 let _db = SiftDB::open(&self.collection_path).expect("Failed to open collection");
229
230 let mut options = IngestOptions::default();
231 options.include_patterns = vec!["**/*.rs".to_string(), "**/*.md".to_string(), "**/*.toml".to_string(), "**/*.json".to_string()];
232
233 let mut ingester = Ingester::new(self.collection_path.clone(), options);
234
235 let (source_files, source_bytes) = self.count_source_files();
237 println!(" Source files found: {}", source_files);
238 println!(" Source data: {:.2} MB", source_bytes as f64 / (1024.0 * 1024.0));
239
240 let stats = ingester.ingest_from_fs(&self.source_path).expect("Failed to ingest");
241
242 let duration = start.elapsed();
243
244 let storage_bytes = self.calculate_total_bytes();
246 let compression_ratio = if source_bytes > 0 {
247 storage_bytes as f64 / source_bytes as f64
248 } else {
249 1.0
250 };
251
252 println!(" ✅ Import completed in {:.2}s", duration.as_secs_f64());
253 println!(" 📊 Files ingested: {} ({} skipped, {} errors)",
254 stats.ingested, stats.skipped, stats.errors);
255 println!(" 💾 Storage size: {:.2} MB (ratio: {:.2}x)",
256 storage_bytes as f64 / (1024.0 * 1024.0), compression_ratio);
257
258 BenchmarkResults {
259 name: "File Import".to_string(),
260 duration,
261 files_processed: stats.ingested,
262 bytes_processed: source_bytes,
263 queries_per_second: None,
264 throughput_mbps: Some(source_bytes as f64 / (1024.0 * 1024.0) / duration.as_secs_f64()),
265 }
266 }
267
268 fn bench_searches(&self) -> Vec<BenchmarkResults> {
269 let mut results = Vec::new();
270
271 let db = SiftDB::open(&self.collection_path).expect("Failed to open collection");
272 let mut snapshot = db.snapshot().expect("Failed to create snapshot");
273
274 let queries = vec![
276 ("fn", "Function definitions"),
277 ("println", "Print statements"),
278 ("use", "Import statements"),
279 ("struct", "Struct definitions"),
280 ("impl", "Implementation blocks"),
281 ("pub", "Public items"),
282 ("let", "Variable declarations"),
283 ("match", "Pattern matching"),
284 ("async", "Async code"),
285 ("Result", "Result types"),
286 ];
287
288 for (query, description) in queries {
289 results.push(self.bench_single_search(&mut snapshot, query, description));
290 }
291
292 results
293 }
294
295 fn bench_single_search(&self, snapshot: &mut crate::Snapshot, query: &str, description: &str) -> BenchmarkResults {
296 let iterations = 10;
297 let mut total_duration = Duration::new(0, 0);
298 let mut total_hits = 0;
299
300 snapshot.find(query, None, Some(1000)).ok();
302
303 for _ in 0..iterations {
305 let start = Instant::now();
306 if let Ok(hits) = snapshot.find(query, None, Some(1000)) {
307 total_hits = hits.len();
308 }
309 total_duration += start.elapsed();
310 }
311
312 let avg_duration = total_duration / iterations as u32;
313 let qps = iterations as f64 / total_duration.as_secs_f64();
314
315 BenchmarkResults {
316 name: format!("Search: '{}' ({})", query, description),
317 duration: avg_duration,
318 files_processed: total_hits as u64,
319 bytes_processed: 0,
320 queries_per_second: Some(qps),
321 throughput_mbps: None,
322 }
323 }
324
325 fn count_source_files(&self) -> (u64, u64) {
326 let mut file_count = 0;
327 let mut byte_count = 0;
328
329 let walker = ignore::WalkBuilder::new(&self.source_path)
330 .hidden(false)
331 .git_ignore(true)
332 .build();
333
334 for entry in walker {
335 if let Ok(entry) = entry {
336 let path = entry.path();
337 if path.is_file() {
338 let path_str = path.to_string_lossy();
340 if path_str.ends_with(".rs") || path_str.ends_with(".md") ||
341 path_str.ends_with(".toml") || path_str.ends_with(".json") {
342 file_count += 1;
343 if let Ok(metadata) = path.metadata() {
344 byte_count += metadata.len();
345 }
346 }
347 }
348 }
349 }
350
351 (file_count, byte_count)
352 }
353
354 fn calculate_total_bytes(&self) -> u64 {
355 let mut total = 0;
356
357 if let Ok(entries) = fs::read_dir(&self.collection_path.join("store")) {
358 for entry in entries.flatten() {
359 if let Ok(metadata) = entry.metadata() {
360 total += metadata.len();
361 }
362 }
363 }
364
365 total
366 }
367
368 fn save_results(&self, results: &[BenchmarkResults]) -> Result<(), Box<dyn std::error::Error>> {
369 let suite = BenchmarkSuite {
370 version: env!("CARGO_PKG_VERSION").to_string(),
371 timestamp: SystemTime::now()
372 .duration_since(SystemTime::UNIX_EPOCH)?
373 .as_secs().to_string(),
374 git_commit: self.get_git_commit(),
375 test_environment: TestEnvironment {
376 os: std::env::consts::OS.to_string(),
377 cpu: "unknown".to_string(), memory: "unknown".to_string(),
379 },
380 benchmarks: results.to_vec(),
381 };
382
383 let benchmarks_dir = self.collection_path.parent()
384 .unwrap_or(&self.collection_path)
385 .join("benchmarks/results");
386
387 std::fs::create_dir_all(&benchmarks_dir)?;
388
389 let filename = format!("benchmark-{}.json", suite.timestamp);
390 let filepath = benchmarks_dir.join(filename);
391
392 let json = serde_json::to_string_pretty(&suite)?;
393 std::fs::write(filepath, json)?;
394
395 Ok(())
396 }
397
398 fn get_git_commit(&self) -> Option<String> {
399 std::process::Command::new("git")
400 .arg("rev-parse")
401 .arg("--short")
402 .arg("HEAD")
403 .current_dir(self.collection_path.parent().unwrap_or(&self.collection_path))
404 .output()
405 .ok()
406 .and_then(|output| {
407 if output.status.success() {
408 String::from_utf8(output.stdout).ok()
409 .map(|s| s.trim().to_string())
410 } else {
411 None
412 }
413 })
414 }
415
416 fn print_summary(&self, results: &[BenchmarkResults]) {
417 println!("📊 Benchmark Summary");
418 println!("===================");
419
420 for result in results {
421 result.print();
422 }
423
424 let import_result = results.iter().find(|r| r.name.contains("Import"));
426
427 if let Some(import) = import_result {
428 println!("🎯 Key Performance Metrics:");
429 println!("- Import Rate: {:.0} files/sec", import.files_processed as f64 / import.duration.as_secs_f64());
430 if let Some(throughput) = import.throughput_mbps {
431 println!("- Import Throughput: {:.1} MB/s", throughput);
432 }
433
434 let search_results: Vec<_> = results.iter()
435 .filter(|r| r.name.contains("Search"))
436 .collect();
437
438 if !search_results.is_empty() {
439 let avg_qps: f64 = search_results.iter()
440 .filter_map(|r| r.queries_per_second)
441 .sum::<f64>() / search_results.len() as f64;
442 println!("- Average Search Rate: {:.1} queries/sec", avg_qps);
443 }
444 }
445
446 println!("✅ Benchmark completed successfully!");
447 }
448}
449
450#[cfg(test)]
451mod tests {
452 use super::*;
453 use tempfile::TempDir;
454
455 #[test]
456 #[ignore] fn bench_test() {
458 let temp_dir = TempDir::new().unwrap();
459 let collection_path = temp_dir.path().join("test-bench.sift");
460
461 let mut benchmark = SiftDBBenchmark::new(&collection_path, ".");
462 let _results = benchmark.run_all();
463 }
464}