1use std::time::{Duration, Instant, SystemTime};
2use std::path::{Path, PathBuf};
3use std::fs;
4use crate::SiftDB;
5use crate::locking::SWMRLockManager;
6use crate::compaction::CollectionCompactor;
7use crate::incremental::IncrementalUpdater;
8use crate::ingest::{Ingester, IngestOptions};
9use anyhow::Result;
10use serde::{Deserialize, Serialize};
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct BenchmarkResults {
14 pub name: String,
15 pub duration: Duration,
16 pub files_processed: u64,
17 pub bytes_processed: u64,
18 pub queries_per_second: Option<f64>,
19 pub throughput_mbps: Option<f64>,
20}
21
22#[derive(Serialize, Deserialize)]
23pub struct BenchmarkSuite {
24 pub version: String,
25 pub timestamp: String,
26 pub git_commit: Option<String>,
27 pub test_environment: TestEnvironment,
28 pub benchmarks: Vec<BenchmarkResults>,
29}
30
31#[derive(Serialize, Deserialize)]
32pub struct TestEnvironment {
33 pub os: String,
34 pub cpu: String,
35 pub memory: String,
36}
37
38impl BenchmarkResults {
39 pub fn print(&self) {
40 println!("=== {} ===", self.name);
41 println!("Duration: {:.2}s", self.duration.as_secs_f64());
42 if self.files_processed > 0 {
43 println!("Files processed: {}", self.files_processed);
44 println!("Files/sec: {:.1}", self.files_processed as f64 / self.duration.as_secs_f64());
45 }
46 if self.bytes_processed > 0 {
47 let mb = self.bytes_processed as f64 / (1024.0 * 1024.0);
48 println!("Data processed: {:.2} MB", mb);
49 if let Some(throughput) = self.throughput_mbps {
50 println!("Throughput: {:.2} MB/s", throughput);
51 }
52 }
53 if let Some(qps) = self.queries_per_second {
54 println!("Queries/sec: {:.1}", qps);
55 }
56 println!();
57 }
58}
59
60pub struct SiftDBBenchmark {
61 collection_path: std::path::PathBuf,
62 source_path: std::path::PathBuf,
63 lock_manager: SWMRLockManager,
64}
65
66impl SiftDBBenchmark {
67 pub fn new<P1: AsRef<Path>, P2: AsRef<Path>>(collection_path: P1, source_path: P2) -> Self {
68 let collection_path = collection_path.as_ref().to_path_buf();
69 let lock_manager = SWMRLockManager::new(&collection_path);
70 Self {
71 collection_path,
72 source_path: source_path.as_ref().to_path_buf(),
73 lock_manager,
74 }
75 }
76
77 pub fn run_all(&mut self) -> Vec<BenchmarkResults> {
78 println!("🚀 SiftDB Performance Benchmark");
79 println!("================================");
80 println!();
81
82 let mut results = Vec::new();
83
84 results.push(self.bench_init());
86
87 results.push(self.bench_import());
89
90 results.extend(self.bench_searches());
92
93 self.print_summary(&results);
94
95 if let Err(e) = self.save_results(&results) {
97 eprintln!("Warning: Failed to save benchmark results: {}", e);
98 }
99
100 results
101 }
102
103 pub fn run_all_quiet(&mut self) -> Vec<BenchmarkResults> {
104 let mut results = Vec::new();
105
106 results.push(self.bench_init_quiet());
108
109 results.push(self.bench_import_quiet());
111
112 results.extend(self.bench_searches_quiet());
114
115 results
116 }
117
118 fn bench_init_quiet(&self) -> BenchmarkResults {
119 let start = Instant::now();
120 SiftDB::init(&self.collection_path).expect("Failed to initialize collection");
121 let duration = start.elapsed();
122
123 BenchmarkResults {
124 name: "Collection Initialization".to_string(),
125 duration,
126 files_processed: 0,
127 bytes_processed: 0,
128 queries_per_second: None,
129 throughput_mbps: None,
130 }
131 }
132
133 fn bench_import_quiet(&mut self) -> BenchmarkResults {
134 let start = Instant::now();
135 let _db = SiftDB::open(&self.collection_path).expect("Failed to open collection");
136
137 let mut options = IngestOptions::default();
138 options.include_patterns = vec!["**/*.rs".to_string(), "**/*.md".to_string(), "**/*.toml".to_string(), "**/*.json".to_string()];
139
140 let mut ingester = Ingester::new(self.collection_path.clone(), options);
141 let (source_files, source_bytes) = self.count_source_files();
142 let stats = ingester.ingest_from_fs(&self.source_path).expect("Failed to ingest");
143 let duration = start.elapsed();
144
145 BenchmarkResults {
146 name: "File Import".to_string(),
147 duration,
148 files_processed: stats.ingested,
149 bytes_processed: source_bytes,
150 queries_per_second: None,
151 throughput_mbps: Some(source_bytes as f64 / (1024.0 * 1024.0) / duration.as_secs_f64()),
152 }
153 }
154
155 fn bench_searches_quiet(&self) -> Vec<BenchmarkResults> {
156 let mut results = Vec::new();
157 let db = SiftDB::open(&self.collection_path).expect("Failed to open collection");
158 let mut snapshot = db.snapshot().expect("Failed to create snapshot");
159
160 let queries = vec![
161 ("fn", "Function definitions"),
162 ("println", "Print statements"),
163 ("use", "Import statements"),
164 ("struct", "Struct definitions"),
165 ("impl", "Implementation blocks"),
166 ("pub", "Public items"),
167 ("let", "Variable declarations"),
168 ("match", "Pattern matching"),
169 ("async", "Async code"),
170 ("Result", "Result types"),
171 ];
172
173 for (query, description) in queries {
174 results.push(self.bench_single_search_quiet(&mut snapshot, query, description));
175 }
176
177 results
178 }
179
180 fn bench_single_search_quiet(&self, snapshot: &mut crate::Snapshot, query: &str, description: &str) -> BenchmarkResults {
181 let iterations = 10;
182 let mut total_duration = Duration::new(0, 0);
183 let mut total_hits = 0;
184
185 snapshot.find(query, None, Some(1000)).ok();
187
188 for _ in 0..iterations {
190 let start = Instant::now();
191 if let Ok(hits) = snapshot.find(query, None, Some(1000)) {
192 total_hits = hits.len();
193 }
194 total_duration += start.elapsed();
195 }
196
197 let avg_duration = total_duration / iterations as u32;
198 let qps = if avg_duration.as_secs_f64() > 0.0 {
199 1.0 / avg_duration.as_secs_f64()
200 } else {
201 f64::INFINITY
202 };
203
204 BenchmarkResults {
205 name: format!("Search: '{}' ({})", query, description),
206 duration: avg_duration,
207 files_processed: total_hits as u64,
208 bytes_processed: 0,
209 queries_per_second: Some(qps),
210 throughput_mbps: None,
211 }
212 }
213
214 fn bench_init(&self) -> BenchmarkResults {
215 let start = Instant::now();
216
217 SiftDB::init(&self.collection_path).expect("Failed to initialize collection");
218
219 let duration = start.elapsed();
220
221 BenchmarkResults {
222 name: "Collection Initialization".to_string(),
223 duration,
224 files_processed: 0,
225 bytes_processed: 0,
226 queries_per_second: None,
227 throughput_mbps: None,
228 }
229 }
230
231 fn bench_import(&mut self) -> BenchmarkResults {
232 println!("📁 Starting import benchmark...");
233
234 let start = Instant::now();
235
236 let _db = SiftDB::open(&self.collection_path).expect("Failed to open collection");
237
238 let mut options = IngestOptions::default();
239 options.include_patterns = vec!["**/*.rs".to_string(), "**/*.md".to_string(), "**/*.toml".to_string(), "**/*.json".to_string()];
240
241 let mut ingester = Ingester::new(self.collection_path.clone(), options);
242
243 let (source_files, source_bytes) = self.count_source_files();
245 println!(" Source files found: {}", source_files);
246 println!(" Source data: {:.2} MB", source_bytes as f64 / (1024.0 * 1024.0));
247
248 let stats = ingester.ingest_from_fs(&self.source_path).expect("Failed to ingest");
249
250 let duration = start.elapsed();
251
252 let storage_bytes = self.calculate_total_bytes();
254 let compression_ratio = if source_bytes > 0 {
255 storage_bytes as f64 / source_bytes as f64
256 } else {
257 1.0
258 };
259
260 println!(" ✅ Import completed in {:.2}s", duration.as_secs_f64());
261 println!(" 📊 Files ingested: {} ({} skipped, {} errors)",
262 stats.ingested, stats.skipped, stats.errors);
263 println!(" 💾 Storage size: {:.2} MB (ratio: {:.2}x)",
264 storage_bytes as f64 / (1024.0 * 1024.0), compression_ratio);
265
266 BenchmarkResults {
267 name: "File Import".to_string(),
268 duration,
269 files_processed: stats.ingested,
270 bytes_processed: source_bytes,
271 queries_per_second: None,
272 throughput_mbps: Some(source_bytes as f64 / (1024.0 * 1024.0) / duration.as_secs_f64()),
273 }
274 }
275
276 fn bench_searches(&self) -> Vec<BenchmarkResults> {
277 let mut results = Vec::new();
278
279 let db = SiftDB::open(&self.collection_path).expect("Failed to open collection");
280 let mut snapshot = db.snapshot().expect("Failed to create snapshot");
281
282 let queries = vec![
284 ("fn", "Function definitions"),
285 ("println", "Print statements"),
286 ("use", "Import statements"),
287 ("struct", "Struct definitions"),
288 ("impl", "Implementation blocks"),
289 ("pub", "Public items"),
290 ("let", "Variable declarations"),
291 ("match", "Pattern matching"),
292 ("async", "Async code"),
293 ("Result", "Result types"),
294 ];
295
296 for (query, description) in queries {
297 results.push(self.bench_single_search(&mut snapshot, query, description));
298 }
299
300 results
301 }
302
303 fn bench_single_search(&self, snapshot: &mut crate::Snapshot, query: &str, description: &str) -> BenchmarkResults {
304 let iterations = 10;
305 let mut total_duration = Duration::new(0, 0);
306 let mut total_hits = 0;
307
308 snapshot.find(query, None, Some(1000)).ok();
310
311 for _ in 0..iterations {
313 let start = Instant::now();
314 if let Ok(hits) = snapshot.find(query, None, Some(1000)) {
315 total_hits = hits.len();
316 }
317 total_duration += start.elapsed();
318 }
319
320 let avg_duration = total_duration / iterations as u32;
321 let qps = iterations as f64 / total_duration.as_secs_f64();
322
323 BenchmarkResults {
324 name: format!("Search: '{}' ({})", query, description),
325 duration: avg_duration,
326 files_processed: total_hits as u64,
327 bytes_processed: 0,
328 queries_per_second: Some(qps),
329 throughput_mbps: None,
330 }
331 }
332
333 fn count_source_files(&self) -> (u64, u64) {
334 let mut file_count = 0;
335 let mut byte_count = 0;
336
337 let walker = ignore::WalkBuilder::new(&self.source_path)
338 .hidden(false)
339 .git_ignore(true)
340 .build();
341
342 for entry in walker {
343 if let Ok(entry) = entry {
344 let path = entry.path();
345 if path.is_file() {
346 let path_str = path.to_string_lossy();
348 if path_str.ends_with(".rs") || path_str.ends_with(".md") ||
349 path_str.ends_with(".toml") || path_str.ends_with(".json") {
350 file_count += 1;
351 if let Ok(metadata) = path.metadata() {
352 byte_count += metadata.len();
353 }
354 }
355 }
356 }
357 }
358
359 (file_count, byte_count)
360 }
361
362 fn calculate_total_bytes(&self) -> u64 {
363 let mut total = 0;
364
365 if let Ok(entries) = fs::read_dir(&self.collection_path.join("store")) {
366 for entry in entries.flatten() {
367 if let Ok(metadata) = entry.metadata() {
368 total += metadata.len();
369 }
370 }
371 }
372
373 total
374 }
375
376 fn save_results(&self, results: &[BenchmarkResults]) -> Result<(), Box<dyn std::error::Error>> {
377 let suite = BenchmarkSuite {
378 version: env!("CARGO_PKG_VERSION").to_string(),
379 timestamp: SystemTime::now()
380 .duration_since(SystemTime::UNIX_EPOCH)?
381 .as_secs().to_string(),
382 git_commit: self.get_git_commit(),
383 test_environment: TestEnvironment {
384 os: std::env::consts::OS.to_string(),
385 cpu: "unknown".to_string(), memory: "unknown".to_string(),
387 },
388 benchmarks: results.to_vec(),
389 };
390
391 let benchmarks_dir = self.collection_path.parent()
392 .unwrap_or(&self.collection_path)
393 .join("benchmarks/results");
394
395 std::fs::create_dir_all(&benchmarks_dir)?;
396
397 let filename = format!("benchmark-{}.json", suite.timestamp);
398 let filepath = benchmarks_dir.join(filename);
399
400 let json = serde_json::to_string_pretty(&suite)?;
401 std::fs::write(filepath, json)?;
402
403 Ok(())
404 }
405
406 fn get_git_commit(&self) -> Option<String> {
407 std::process::Command::new("git")
408 .arg("rev-parse")
409 .arg("--short")
410 .arg("HEAD")
411 .current_dir(self.collection_path.parent().unwrap_or(&self.collection_path))
412 .output()
413 .ok()
414 .and_then(|output| {
415 if output.status.success() {
416 String::from_utf8(output.stdout).ok()
417 .map(|s| s.trim().to_string())
418 } else {
419 None
420 }
421 })
422 }
423
424 fn print_summary(&self, results: &[BenchmarkResults]) {
425 println!("📊 Benchmark Summary");
426 println!("===================");
427
428 for result in results {
429 result.print();
430 }
431
432 let import_result = results.iter().find(|r| r.name.contains("Import"));
434
435 if let Some(import) = import_result {
436 println!("🎯 Key Performance Metrics:");
437 println!("- Import Rate: {:.0} files/sec", import.files_processed as f64 / import.duration.as_secs_f64());
438 if let Some(throughput) = import.throughput_mbps {
439 println!("- Import Throughput: {:.1} MB/s", throughput);
440 }
441
442 let search_results: Vec<_> = results.iter()
443 .filter(|r| r.name.contains("Search"))
444 .collect();
445
446 if !search_results.is_empty() {
447 let avg_qps: f64 = search_results.iter()
448 .filter_map(|r| r.queries_per_second)
449 .sum::<f64>() / search_results.len() as f64;
450 println!("- Average Search Rate: {:.1} queries/sec", avg_qps);
451 }
452 }
453
454 println!("✅ Benchmark completed successfully!");
455 }
456}
457
458pub struct AdvancedBenchmark {
460 collection_path: PathBuf,
461 source_path: PathBuf,
462}
463
464#[derive(Serialize, Deserialize, Debug, Clone)]
465pub struct AdvancedBenchmarkResults {
466 pub incremental_update: IncrementalUpdateBenchmark,
467 pub compaction: CompactionBenchmark,
468 pub overall_stats: OverallAdvancedStats,
469}
470
471#[derive(Serialize, Deserialize, Debug, Clone)]
472pub struct IncrementalUpdateBenchmark {
473 pub initial_import_time_ms: u64,
474 pub file_change_detection_time_ms: u64,
475 pub delta_application_time_ms: u64,
476 pub total_update_time_ms: u64,
477 pub files_changed: usize,
478 pub files_added: usize,
479 pub files_removed: usize,
480 pub changes_per_second: f64,
481}
482
483#[derive(Serialize, Deserialize, Debug, Clone)]
484pub struct CompactionBenchmark {
485 pub tombstone_analysis_time_ms: u64,
486 pub compaction_time_ms: u64,
487 pub total_time_ms: u64,
488 pub tombstones_removed: usize,
489 pub segments_compacted: usize,
490 pub space_reclaimed_bytes: u64,
491 pub compaction_throughput_mb_per_sec: f64,
492}
493
494#[derive(Serialize, Deserialize, Debug, Clone)]
495pub struct OverallAdvancedStats {
496 pub total_benchmark_time_ms: u64,
497 pub collection_size_bytes: u64,
498 pub source_files_count: usize,
499 pub features_tested: Vec<String>,
500}
501
502impl AdvancedBenchmark {
503 pub fn new(collection_path: &Path, source_path: &Path) -> Self {
504 Self {
505 collection_path: collection_path.to_path_buf(),
506 source_path: source_path.to_path_buf(),
507 }
508 }
509
510 pub fn run_all(&mut self) -> Result<AdvancedBenchmarkResults> {
511 let start_time = Instant::now();
512
513 println!("📋 Phase 1: Incremental Update Benchmark");
514 let incremental_results = self.benchmark_incremental_updates()?;
515 println!(" ✅ Completed in {}ms", incremental_results.total_update_time_ms);
516 println!();
517
518 println!("📋 Phase 2: Compaction Benchmark");
519 let compaction_results = self.benchmark_compaction()?;
520 println!(" ✅ Completed in {}ms", compaction_results.total_time_ms);
521 println!();
522
523 let total_time = start_time.elapsed().as_millis() as u64;
524 let collection_size = self.calculate_collection_size()?;
525 let source_files = self.count_source_files()?;
526
527 let overall_stats = OverallAdvancedStats {
528 total_benchmark_time_ms: total_time,
529 collection_size_bytes: collection_size,
530 source_files_count: source_files,
531 features_tested: vec![
532 "incremental_updates".to_string(),
533 "compaction".to_string(),
534 "delta_manifests".to_string(),
535 "file_timestamp_tracking".to_string(),
536 ],
537 };
538
539 println!("🎯 Benchmark Summary");
540 println!(" Total time: {}ms", total_time);
541 println!(" Collection size: {} bytes", collection_size);
542 println!(" Source files: {}", source_files);
543 println!(" Incremental update performance: {:.2} changes/sec", incremental_results.changes_per_second);
544 println!(" Compaction throughput: {:.2} MB/sec", compaction_results.compaction_throughput_mb_per_sec);
545
546 Ok(AdvancedBenchmarkResults {
547 incremental_update: incremental_results,
548 compaction: compaction_results,
549 overall_stats,
550 })
551 }
552
553 pub fn run_all_quiet(&mut self) -> AdvancedBenchmarkResults {
554 self.run_all().unwrap_or_else(|_| AdvancedBenchmarkResults {
555 incremental_update: IncrementalUpdateBenchmark {
556 initial_import_time_ms: 0,
557 file_change_detection_time_ms: 0,
558 delta_application_time_ms: 0,
559 total_update_time_ms: 0,
560 files_changed: 0,
561 files_added: 0,
562 files_removed: 0,
563 changes_per_second: 0.0,
564 },
565 compaction: CompactionBenchmark {
566 tombstone_analysis_time_ms: 0,
567 compaction_time_ms: 0,
568 total_time_ms: 0,
569 tombstones_removed: 0,
570 segments_compacted: 0,
571 space_reclaimed_bytes: 0,
572 compaction_throughput_mb_per_sec: 0.0,
573 },
574 overall_stats: OverallAdvancedStats {
575 total_benchmark_time_ms: 0,
576 collection_size_bytes: 0,
577 source_files_count: 0,
578 features_tested: vec![],
579 },
580 })
581 }
582
583 fn benchmark_incremental_updates(&self) -> Result<IncrementalUpdateBenchmark> {
584 if self.collection_path.exists() {
586 fs::remove_dir_all(&self.collection_path).ok();
587 }
588
589 let initial_start = Instant::now();
591 let db = SiftDB::init(&self.collection_path)?;
592 let mut options = IngestOptions::default();
593 options.include_patterns = vec!["**/*.rs".to_string(), "**/*.md".to_string()];
594 let mut ingester = Ingester::new(self.collection_path.clone(), options);
595 ingester.ingest_from_fs(&self.source_path)?;
596 let initial_import_time = initial_start.elapsed().as_millis() as u64;
597
598 let temp_dir = self.source_path.join("temp_changes");
600 fs::create_dir_all(&temp_dir).ok();
601
602 for i in 0..5 {
604 let content = format!("New test file {} with timestamp", i);
605 fs::write(temp_dir.join(format!("new_file_{}.txt", i)), content)?;
606 }
607
608 let detection_start = Instant::now();
610 let updater = IncrementalUpdater::new(&self.collection_path);
611 let changes = updater.scan_for_changes(&self.source_path, &[], &[])?;
612 let detection_time = detection_start.elapsed().as_millis() as u64;
613
614 let application_start = Instant::now();
616 let _delta_manifest = updater.apply_changes(changes.clone(), &self.source_path)?;
617 let application_time = application_start.elapsed().as_millis() as u64;
618
619 let total_time = detection_time + application_time;
620 let total_changes = changes.len();
621 let changes_per_second = if total_time > 0 {
622 (total_changes as f64) / (total_time as f64 / 1000.0)
623 } else {
624 0.0
625 };
626
627 fs::remove_dir_all(&temp_dir).ok();
629
630 Ok(IncrementalUpdateBenchmark {
631 initial_import_time_ms: initial_import_time,
632 file_change_detection_time_ms: detection_time,
633 delta_application_time_ms: application_time,
634 total_update_time_ms: total_time,
635 files_changed: changes.iter().filter(|c| matches!(c.change_type, crate::incremental::ChangeType::Modified)).count(),
636 files_added: changes.iter().filter(|c| matches!(c.change_type, crate::incremental::ChangeType::Added)).count(),
637 files_removed: changes.iter().filter(|c| matches!(c.change_type, crate::incremental::ChangeType::Deleted)).count(),
638 changes_per_second,
639 })
640 }
641
642 fn benchmark_compaction(&self) -> Result<CompactionBenchmark> {
643 let db = SiftDB::open(&self.collection_path)?;
645
646 let tombstone_manager = crate::tombstone::TombstoneManager::new(&self.collection_path);
648
649 for i in 0..5 {
651 tombstone_manager.mark_file_deleted(
652 i as u32,
653 PathBuf::from(format!("test_file_{}.txt", i)),
654 1,
655 0,
656 0
657 )?;
658 }
659
660 let analysis_start = Instant::now();
662 let compactor = CollectionCompactor::new(&self.collection_path);
663 let needs_compaction = compactor.needs_compaction()?;
664 let analysis_time = analysis_start.elapsed().as_millis() as u64;
665
666 if !needs_compaction {
667 for i in 5..15 {
669 tombstone_manager.mark_file_deleted(
670 i as u32,
671 PathBuf::from(format!("test_file_{}.txt", i)),
672 1,
673 0,
674 0
675 )?;
676 }
677 }
678
679 let compaction_start = Instant::now();
681 let stats = compactor.compact()?;
682 let compaction_time = compaction_start.elapsed().as_millis() as u64;
683
684 let total_time = analysis_time + compaction_time;
685 let throughput_mb_per_sec = if compaction_time > 0 {
686 (stats.space_reclaimed_bytes as f64) / (1024.0 * 1024.0) / (compaction_time as f64 / 1000.0)
687 } else {
688 0.0
689 };
690
691 Ok(CompactionBenchmark {
692 tombstone_analysis_time_ms: analysis_time,
693 compaction_time_ms: compaction_time,
694 total_time_ms: total_time,
695 tombstones_removed: stats.tombstones_removed,
696 segments_compacted: stats.segments_compacted,
697 space_reclaimed_bytes: stats.space_reclaimed_bytes,
698 compaction_throughput_mb_per_sec: throughput_mb_per_sec,
699 })
700 }
701
702 fn calculate_collection_size(&self) -> Result<u64> {
703 let mut total_size = 0;
704 if self.collection_path.exists() {
705 for entry in fs::read_dir(&self.collection_path)? {
706 let entry = entry?;
707 if entry.path().is_file() {
708 total_size += entry.metadata()?.len();
709 }
710 }
711 }
712 Ok(total_size)
713 }
714
715 fn count_source_files(&self) -> Result<usize> {
716 let mut count = 0;
717 if self.source_path.exists() {
718 for entry in fs::read_dir(&self.source_path)? {
719 let entry = entry?;
720 if entry.path().is_file() {
721 count += 1;
722 }
723 }
724 }
725 Ok(count)
726 }
727}
728
729#[cfg(test)]
730mod tests {
731 use super::*;
732 use tempfile::TempDir;
733
734 #[test]
735 #[ignore] fn bench_test() {
737 let temp_dir = TempDir::new().unwrap();
738 let collection_path = temp_dir.path().join("test-bench.sift");
739
740 let mut benchmark = SiftDBBenchmark::new(&collection_path, ".");
741 let _results = benchmark.run_all();
742 }
743}