siftdb_core/
bench.rs

1use std::time::{Duration, Instant, SystemTime};
2use std::path::{Path, PathBuf};
3use std::fs;
4use crate::SiftDB;
5use crate::locking::SWMRLockManager;
6use crate::compaction::CollectionCompactor;
7use crate::incremental::IncrementalUpdater;
8use crate::ingest::{Ingester, IngestOptions};
9use anyhow::Result;
10use serde::{Deserialize, Serialize};
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct BenchmarkResults {
14    pub name: String,
15    pub duration: Duration, 
16    pub files_processed: u64,
17    pub bytes_processed: u64,
18    pub queries_per_second: Option<f64>,
19    pub throughput_mbps: Option<f64>,
20}
21
22#[derive(Serialize, Deserialize)]
23pub struct BenchmarkSuite {
24    pub version: String,
25    pub timestamp: String,
26    pub git_commit: Option<String>,
27    pub test_environment: TestEnvironment,
28    pub benchmarks: Vec<BenchmarkResults>,
29}
30
31#[derive(Serialize, Deserialize)]
32pub struct TestEnvironment {
33    pub os: String,
34    pub cpu: String,
35    pub memory: String,
36}
37
38impl BenchmarkResults {
39    pub fn print(&self) {
40        println!("=== {} ===", self.name);
41        println!("Duration: {:.2}s", self.duration.as_secs_f64());
42        if self.files_processed > 0 {
43            println!("Files processed: {}", self.files_processed);
44            println!("Files/sec: {:.1}", self.files_processed as f64 / self.duration.as_secs_f64());
45        }
46        if self.bytes_processed > 0 {
47            let mb = self.bytes_processed as f64 / (1024.0 * 1024.0);
48            println!("Data processed: {:.2} MB", mb);
49            if let Some(throughput) = self.throughput_mbps {
50                println!("Throughput: {:.2} MB/s", throughput);
51            }
52        }
53        if let Some(qps) = self.queries_per_second {
54            println!("Queries/sec: {:.1}", qps);
55        }
56        println!();
57    }
58}
59
60pub struct SiftDBBenchmark {
61    collection_path: std::path::PathBuf,
62    source_path: std::path::PathBuf,
63    lock_manager: SWMRLockManager,
64}
65
66impl SiftDBBenchmark {
67    pub fn new<P1: AsRef<Path>, P2: AsRef<Path>>(collection_path: P1, source_path: P2) -> Self {
68        let collection_path = collection_path.as_ref().to_path_buf();
69        let lock_manager = SWMRLockManager::new(&collection_path);
70        Self {
71            collection_path,
72            source_path: source_path.as_ref().to_path_buf(),
73            lock_manager,
74        }
75    }
76
77    pub fn run_all(&mut self) -> Vec<BenchmarkResults> {
78        println!("🚀 SiftDB Performance Benchmark");
79        println!("================================");
80        println!();
81        
82        let mut results = Vec::new();
83        
84        // Initialize collection
85        results.push(self.bench_init());
86        
87        // Import benchmark
88        results.push(self.bench_import());
89        
90        // Search benchmarks
91        results.extend(self.bench_searches());
92        
93        self.print_summary(&results);
94        
95        // Save results to file
96        if let Err(e) = self.save_results(&results) {
97            eprintln!("Warning: Failed to save benchmark results: {}", e);
98        }
99        
100        results
101    }
102
103    pub fn run_all_quiet(&mut self) -> Vec<BenchmarkResults> {
104        let mut results = Vec::new();
105        
106        // Initialize collection
107        results.push(self.bench_init_quiet());
108        
109        // Import benchmark
110        results.push(self.bench_import_quiet());
111        
112        // Search benchmarks
113        results.extend(self.bench_searches_quiet());
114        
115        results
116    }
117
118    fn bench_init_quiet(&self) -> BenchmarkResults {
119        let start = Instant::now();
120        SiftDB::init(&self.collection_path).expect("Failed to initialize collection");
121        let duration = start.elapsed();
122        
123        BenchmarkResults {
124            name: "Collection Initialization".to_string(),
125            duration,
126            files_processed: 0,
127            bytes_processed: 0,
128            queries_per_second: None,
129            throughput_mbps: None,
130        }
131    }
132
133    fn bench_import_quiet(&mut self) -> BenchmarkResults {
134        let start = Instant::now();
135        let _db = SiftDB::open(&self.collection_path).expect("Failed to open collection");
136        
137        let mut options = IngestOptions::default();
138        options.include_patterns = vec!["**/*.rs".to_string(), "**/*.md".to_string(), "**/*.toml".to_string(), "**/*.json".to_string()];
139        
140        let mut ingester = Ingester::new(self.collection_path.clone(), options);
141        let (source_files, source_bytes) = self.count_source_files();
142        let stats = ingester.ingest_from_fs(&self.source_path).expect("Failed to ingest");
143        let duration = start.elapsed();
144        
145        BenchmarkResults {
146            name: "File Import".to_string(),
147            duration,
148            files_processed: stats.ingested,
149            bytes_processed: source_bytes,
150            queries_per_second: None,
151            throughput_mbps: Some(source_bytes as f64 / (1024.0 * 1024.0) / duration.as_secs_f64()),
152        }
153    }
154
155    fn bench_searches_quiet(&self) -> Vec<BenchmarkResults> {
156        let mut results = Vec::new();
157        let db = SiftDB::open(&self.collection_path).expect("Failed to open collection");
158        let mut snapshot = db.snapshot().expect("Failed to create snapshot");
159        
160        let queries = vec![
161            ("fn", "Function definitions"),
162            ("println", "Print statements"),
163            ("use", "Import statements"),
164            ("struct", "Struct definitions"),
165            ("impl", "Implementation blocks"),
166            ("pub", "Public items"),
167            ("let", "Variable declarations"),
168            ("match", "Pattern matching"),
169            ("async", "Async code"),
170            ("Result", "Result types"),
171        ];
172
173        for (query, description) in queries {
174            results.push(self.bench_single_search_quiet(&mut snapshot, query, description));
175        }
176
177        results
178    }
179
180    fn bench_single_search_quiet(&self, snapshot: &mut crate::Snapshot, query: &str, description: &str) -> BenchmarkResults {
181        let iterations = 10;
182        let mut total_duration = Duration::new(0, 0);
183        let mut total_hits = 0;
184
185        // Warm up
186        snapshot.find(query, None, Some(1000)).ok();
187
188        // Run benchmark iterations
189        for _ in 0..iterations {
190            let start = Instant::now();
191            if let Ok(hits) = snapshot.find(query, None, Some(1000)) {
192                total_hits = hits.len();
193            }
194            total_duration += start.elapsed();
195        }
196
197        let avg_duration = total_duration / iterations as u32;
198        let qps = if avg_duration.as_secs_f64() > 0.0 {
199            1.0 / avg_duration.as_secs_f64()
200        } else {
201            f64::INFINITY
202        };
203
204        BenchmarkResults {
205            name: format!("Search: '{}' ({})", query, description),
206            duration: avg_duration,
207            files_processed: total_hits as u64,
208            bytes_processed: 0,
209            queries_per_second: Some(qps),
210            throughput_mbps: None,
211        }
212    }
213
214    fn bench_init(&self) -> BenchmarkResults {
215        let start = Instant::now();
216        
217        SiftDB::init(&self.collection_path).expect("Failed to initialize collection");
218        
219        let duration = start.elapsed();
220        
221        BenchmarkResults {
222            name: "Collection Initialization".to_string(),
223            duration,
224            files_processed: 0,
225            bytes_processed: 0,
226            queries_per_second: None,
227            throughput_mbps: None,
228        }
229    }
230
231    fn bench_import(&mut self) -> BenchmarkResults {
232        println!("📁 Starting import benchmark...");
233        
234        let start = Instant::now();
235        
236        let _db = SiftDB::open(&self.collection_path).expect("Failed to open collection");
237        
238        let mut options = IngestOptions::default();
239        options.include_patterns = vec!["**/*.rs".to_string(), "**/*.md".to_string(), "**/*.toml".to_string(), "**/*.json".to_string()];
240        
241        let mut ingester = Ingester::new(self.collection_path.clone(), options);
242        
243        // Count source files and bytes before import
244        let (source_files, source_bytes) = self.count_source_files();
245        println!("  Source files found: {}", source_files);  
246        println!("  Source data: {:.2} MB", source_bytes as f64 / (1024.0 * 1024.0));
247        
248        let stats = ingester.ingest_from_fs(&self.source_path).expect("Failed to ingest");
249        
250        let duration = start.elapsed();
251        
252        // Calculate storage efficiency
253        let storage_bytes = self.calculate_total_bytes();
254        let compression_ratio = if source_bytes > 0 {
255            storage_bytes as f64 / source_bytes as f64
256        } else {
257            1.0
258        };
259        
260        println!("  ✅ Import completed in {:.2}s", duration.as_secs_f64());
261        println!("  📊 Files ingested: {} ({} skipped, {} errors)", 
262                 stats.ingested, stats.skipped, stats.errors);
263        println!("  💾 Storage size: {:.2} MB (ratio: {:.2}x)", 
264                 storage_bytes as f64 / (1024.0 * 1024.0), compression_ratio);
265        
266        BenchmarkResults {
267            name: "File Import".to_string(),
268            duration,
269            files_processed: stats.ingested,
270            bytes_processed: source_bytes,
271            queries_per_second: None,
272            throughput_mbps: Some(source_bytes as f64 / (1024.0 * 1024.0) / duration.as_secs_f64()),
273        }
274    }
275
276    fn bench_searches(&self) -> Vec<BenchmarkResults> {
277        let mut results = Vec::new();
278        
279        let db = SiftDB::open(&self.collection_path).expect("Failed to open collection");
280        let mut snapshot = db.snapshot().expect("Failed to create snapshot");
281        
282        // Common search patterns
283        let queries = vec![
284            ("fn", "Function definitions"),
285            ("println", "Print statements"),
286            ("use", "Import statements"),
287            ("struct", "Struct definitions"),
288            ("impl", "Implementation blocks"),
289            ("pub", "Public items"),
290            ("let", "Variable declarations"),
291            ("match", "Pattern matching"),
292            ("async", "Async code"),
293            ("Result", "Result types"),
294        ];
295
296        for (query, description) in queries {
297            results.push(self.bench_single_search(&mut snapshot, query, description));
298        }
299
300        results
301    }
302
303    fn bench_single_search(&self, snapshot: &mut crate::Snapshot, query: &str, description: &str) -> BenchmarkResults {
304        let iterations = 10;
305        let mut total_duration = Duration::new(0, 0);
306        let mut total_hits = 0;
307
308        // Warm up
309        snapshot.find(query, None, Some(1000)).ok();
310
311        // Run benchmark iterations
312        for _ in 0..iterations {
313            let start = Instant::now();
314            if let Ok(hits) = snapshot.find(query, None, Some(1000)) {
315                total_hits = hits.len();
316            }
317            total_duration += start.elapsed();
318        }
319
320        let avg_duration = total_duration / iterations as u32;
321        let qps = iterations as f64 / total_duration.as_secs_f64();
322
323        BenchmarkResults {
324            name: format!("Search: '{}' ({})", query, description),
325            duration: avg_duration,
326            files_processed: total_hits as u64,
327            bytes_processed: 0,
328            queries_per_second: Some(qps),
329            throughput_mbps: None,
330        }
331    }
332
333    fn count_source_files(&self) -> (u64, u64) {
334        let mut file_count = 0;
335        let mut byte_count = 0;
336        
337        let walker = ignore::WalkBuilder::new(&self.source_path)
338            .hidden(false)
339            .git_ignore(true)
340            .build();
341            
342        for entry in walker {
343            if let Ok(entry) = entry {
344                let path = entry.path();
345                if path.is_file() {
346                    // Check if file matches our patterns
347                    let path_str = path.to_string_lossy();
348                    if path_str.ends_with(".rs") || path_str.ends_with(".md") || 
349                       path_str.ends_with(".toml") || path_str.ends_with(".json") {
350                        file_count += 1;
351                        if let Ok(metadata) = path.metadata() {
352                            byte_count += metadata.len();
353                        }
354                    }
355                }
356            }
357        }
358        
359        (file_count, byte_count)
360    }
361
362    fn calculate_total_bytes(&self) -> u64 {
363        let mut total = 0;
364        
365        if let Ok(entries) = fs::read_dir(&self.collection_path.join("store")) {
366            for entry in entries.flatten() {
367                if let Ok(metadata) = entry.metadata() {
368                    total += metadata.len();
369                }
370            }
371        }
372        
373        total
374    }
375    
376    fn save_results(&self, results: &[BenchmarkResults]) -> Result<(), Box<dyn std::error::Error>> {
377        let suite = BenchmarkSuite {
378            version: env!("CARGO_PKG_VERSION").to_string(),
379            timestamp: SystemTime::now()
380                .duration_since(SystemTime::UNIX_EPOCH)?
381                .as_secs().to_string(),
382            git_commit: self.get_git_commit(),
383            test_environment: TestEnvironment {
384                os: std::env::consts::OS.to_string(),
385                cpu: "unknown".to_string(), // Could use sysinfo crate later
386                memory: "unknown".to_string(),
387            },
388            benchmarks: results.to_vec(),
389        };
390        
391        let benchmarks_dir = self.collection_path.parent()
392            .unwrap_or(&self.collection_path)
393            .join("benchmarks/results");
394            
395        std::fs::create_dir_all(&benchmarks_dir)?;
396        
397        let filename = format!("benchmark-{}.json", suite.timestamp);
398        let filepath = benchmarks_dir.join(filename);
399        
400        let json = serde_json::to_string_pretty(&suite)?;
401        std::fs::write(filepath, json)?;
402        
403        Ok(())
404    }
405    
406    fn get_git_commit(&self) -> Option<String> {
407        std::process::Command::new("git")
408            .arg("rev-parse")
409            .arg("--short")
410            .arg("HEAD")
411            .current_dir(self.collection_path.parent().unwrap_or(&self.collection_path))
412            .output()
413            .ok()
414            .and_then(|output| {
415                if output.status.success() {
416                    String::from_utf8(output.stdout).ok()
417                        .map(|s| s.trim().to_string())
418                } else {
419                    None
420                }
421            })
422    }
423
424    fn print_summary(&self, results: &[BenchmarkResults]) {
425        println!("📊 Benchmark Summary");
426        println!("===================");
427        
428        for result in results {
429            result.print();
430        }
431
432        // Overall stats
433        let import_result = results.iter().find(|r| r.name.contains("Import"));
434        
435        if let Some(import) = import_result {
436            println!("🎯 Key Performance Metrics:");
437            println!("- Import Rate: {:.0} files/sec", import.files_processed as f64 / import.duration.as_secs_f64());
438            if let Some(throughput) = import.throughput_mbps {
439                println!("- Import Throughput: {:.1} MB/s", throughput);
440            }
441            
442            let search_results: Vec<_> = results.iter()
443                .filter(|r| r.name.contains("Search"))
444                .collect();
445            
446            if !search_results.is_empty() {
447                let avg_qps: f64 = search_results.iter()
448                    .filter_map(|r| r.queries_per_second)
449                    .sum::<f64>() / search_results.len() as f64;
450                println!("- Average Search Rate: {:.1} queries/sec", avg_qps);
451            }
452        }
453        
454        println!("✅ Benchmark completed successfully!");
455    }
456}
457
458/// Advanced benchmarking for Milestone 0.4 features
459pub struct AdvancedBenchmark {
460    collection_path: PathBuf,
461    source_path: PathBuf,
462}
463
464#[derive(Serialize, Deserialize, Debug, Clone)]
465pub struct AdvancedBenchmarkResults {
466    pub incremental_update: IncrementalUpdateBenchmark,
467    pub compaction: CompactionBenchmark,
468    pub overall_stats: OverallAdvancedStats,
469}
470
471#[derive(Serialize, Deserialize, Debug, Clone)]
472pub struct IncrementalUpdateBenchmark {
473    pub initial_import_time_ms: u64,
474    pub file_change_detection_time_ms: u64,
475    pub delta_application_time_ms: u64,
476    pub total_update_time_ms: u64,
477    pub files_changed: usize,
478    pub files_added: usize,
479    pub files_removed: usize,
480    pub changes_per_second: f64,
481}
482
483#[derive(Serialize, Deserialize, Debug, Clone)]
484pub struct CompactionBenchmark {
485    pub tombstone_analysis_time_ms: u64,
486    pub compaction_time_ms: u64,
487    pub total_time_ms: u64,
488    pub tombstones_removed: usize,
489    pub segments_compacted: usize,
490    pub space_reclaimed_bytes: u64,
491    pub compaction_throughput_mb_per_sec: f64,
492}
493
494#[derive(Serialize, Deserialize, Debug, Clone)]
495pub struct OverallAdvancedStats {
496    pub total_benchmark_time_ms: u64,
497    pub collection_size_bytes: u64,
498    pub source_files_count: usize,
499    pub features_tested: Vec<String>,
500}
501
502impl AdvancedBenchmark {
503    pub fn new(collection_path: &Path, source_path: &Path) -> Self {
504        Self {
505            collection_path: collection_path.to_path_buf(),
506            source_path: source_path.to_path_buf(),
507        }
508    }
509    
510    pub fn run_all(&mut self) -> Result<AdvancedBenchmarkResults> {
511        let start_time = Instant::now();
512        
513        println!("📋 Phase 1: Incremental Update Benchmark");
514        let incremental_results = self.benchmark_incremental_updates()?;
515        println!("   ✅ Completed in {}ms", incremental_results.total_update_time_ms);
516        println!();
517        
518        println!("📋 Phase 2: Compaction Benchmark");
519        let compaction_results = self.benchmark_compaction()?;
520        println!("   ✅ Completed in {}ms", compaction_results.total_time_ms);
521        println!();
522        
523        let total_time = start_time.elapsed().as_millis() as u64;
524        let collection_size = self.calculate_collection_size()?;
525        let source_files = self.count_source_files()?;
526        
527        let overall_stats = OverallAdvancedStats {
528            total_benchmark_time_ms: total_time,
529            collection_size_bytes: collection_size,
530            source_files_count: source_files,
531            features_tested: vec![
532                "incremental_updates".to_string(),
533                "compaction".to_string(),
534                "delta_manifests".to_string(),
535                "file_timestamp_tracking".to_string(),
536            ],
537        };
538        
539        println!("🎯 Benchmark Summary");
540        println!("   Total time: {}ms", total_time);
541        println!("   Collection size: {} bytes", collection_size);
542        println!("   Source files: {}", source_files);
543        println!("   Incremental update performance: {:.2} changes/sec", incremental_results.changes_per_second);
544        println!("   Compaction throughput: {:.2} MB/sec", compaction_results.compaction_throughput_mb_per_sec);
545        
546        Ok(AdvancedBenchmarkResults {
547            incremental_update: incremental_results,
548            compaction: compaction_results,
549            overall_stats,
550        })
551    }
552    
553    pub fn run_all_quiet(&mut self) -> AdvancedBenchmarkResults {
554        self.run_all().unwrap_or_else(|_| AdvancedBenchmarkResults {
555            incremental_update: IncrementalUpdateBenchmark {
556                initial_import_time_ms: 0,
557                file_change_detection_time_ms: 0,
558                delta_application_time_ms: 0,
559                total_update_time_ms: 0,
560                files_changed: 0,
561                files_added: 0,
562                files_removed: 0,
563                changes_per_second: 0.0,
564            },
565            compaction: CompactionBenchmark {
566                tombstone_analysis_time_ms: 0,
567                compaction_time_ms: 0,
568                total_time_ms: 0,
569                tombstones_removed: 0,
570                segments_compacted: 0,
571                space_reclaimed_bytes: 0,
572                compaction_throughput_mb_per_sec: 0.0,
573            },
574            overall_stats: OverallAdvancedStats {
575                total_benchmark_time_ms: 0,
576                collection_size_bytes: 0,
577                source_files_count: 0,
578                features_tested: vec![],
579            },
580        })
581    }
582    
583    fn benchmark_incremental_updates(&self) -> Result<IncrementalUpdateBenchmark> {
584        // First, ensure we have a clean collection
585        if self.collection_path.exists() {
586            fs::remove_dir_all(&self.collection_path).ok();
587        }
588        
589        // Initial import timing
590        let initial_start = Instant::now();
591        let db = SiftDB::init(&self.collection_path)?;
592        let mut options = IngestOptions::default();
593        options.include_patterns = vec!["**/*.rs".to_string(), "**/*.md".to_string()];
594        let mut ingester = Ingester::new(self.collection_path.clone(), options);
595        ingester.ingest_from_fs(&self.source_path)?;
596        let initial_import_time = initial_start.elapsed().as_millis() as u64;
597        
598        // Create some file changes by copying and modifying a few files
599        let temp_dir = self.source_path.join("temp_changes");
600        fs::create_dir_all(&temp_dir).ok();
601        
602        // Add some files
603        for i in 0..5 {
604            let content = format!("New test file {} with timestamp", i);
605            fs::write(temp_dir.join(format!("new_file_{}.txt", i)), content)?;
606        }
607        
608        // File change detection timing
609        let detection_start = Instant::now();
610        let updater = IncrementalUpdater::new(&self.collection_path);
611        let changes = updater.scan_for_changes(&self.source_path, &[], &[])?;
612        let detection_time = detection_start.elapsed().as_millis() as u64;
613        
614        // Delta application timing
615        let application_start = Instant::now();
616        let _delta_manifest = updater.apply_changes(changes.clone(), &self.source_path)?;
617        let application_time = application_start.elapsed().as_millis() as u64;
618        
619        let total_time = detection_time + application_time;
620        let total_changes = changes.len();
621        let changes_per_second = if total_time > 0 {
622            (total_changes as f64) / (total_time as f64 / 1000.0)
623        } else {
624            0.0
625        };
626        
627        // Clean up temp changes
628        fs::remove_dir_all(&temp_dir).ok();
629        
630        Ok(IncrementalUpdateBenchmark {
631            initial_import_time_ms: initial_import_time,
632            file_change_detection_time_ms: detection_time,
633            delta_application_time_ms: application_time,
634            total_update_time_ms: total_time,
635            files_changed: changes.iter().filter(|c| matches!(c.change_type, crate::incremental::ChangeType::Modified)).count(),
636            files_added: changes.iter().filter(|c| matches!(c.change_type, crate::incremental::ChangeType::Added)).count(),
637            files_removed: changes.iter().filter(|c| matches!(c.change_type, crate::incremental::ChangeType::Deleted)).count(),
638            changes_per_second,
639        })
640    }
641    
642    fn benchmark_compaction(&self) -> Result<CompactionBenchmark> {
643        // Create some tombstones by removing files
644        let db = SiftDB::open(&self.collection_path)?;
645        
646        // Create tombstones by using the tombstone manager directly
647        let tombstone_manager = crate::tombstone::TombstoneManager::new(&self.collection_path);
648        
649        // Add some fake tombstones for benchmarking
650        for i in 0..5 {
651            tombstone_manager.mark_file_deleted(
652                i as u32,
653                PathBuf::from(format!("test_file_{}.txt", i)),
654                1,
655                0,
656                0
657            )?;
658        }
659        
660        // Analysis timing
661        let analysis_start = Instant::now();
662        let compactor = CollectionCompactor::new(&self.collection_path);
663        let needs_compaction = compactor.needs_compaction()?;
664        let analysis_time = analysis_start.elapsed().as_millis() as u64;
665        
666        if !needs_compaction {
667            // Force create some additional tombstones for benchmarking
668            for i in 5..15 {
669                tombstone_manager.mark_file_deleted(
670                    i as u32,
671                    PathBuf::from(format!("test_file_{}.txt", i)),
672                    1,
673                    0,
674                    0
675                )?;
676            }
677        }
678        
679        // Compaction timing
680        let compaction_start = Instant::now();
681        let stats = compactor.compact()?;
682        let compaction_time = compaction_start.elapsed().as_millis() as u64;
683        
684        let total_time = analysis_time + compaction_time;
685        let throughput_mb_per_sec = if compaction_time > 0 {
686            (stats.space_reclaimed_bytes as f64) / (1024.0 * 1024.0) / (compaction_time as f64 / 1000.0)
687        } else {
688            0.0
689        };
690        
691        Ok(CompactionBenchmark {
692            tombstone_analysis_time_ms: analysis_time,
693            compaction_time_ms: compaction_time,
694            total_time_ms: total_time,
695            tombstones_removed: stats.tombstones_removed,
696            segments_compacted: stats.segments_compacted,
697            space_reclaimed_bytes: stats.space_reclaimed_bytes,
698            compaction_throughput_mb_per_sec: throughput_mb_per_sec,
699        })
700    }
701    
702    fn calculate_collection_size(&self) -> Result<u64> {
703        let mut total_size = 0;
704        if self.collection_path.exists() {
705            for entry in fs::read_dir(&self.collection_path)? {
706                let entry = entry?;
707                if entry.path().is_file() {
708                    total_size += entry.metadata()?.len();
709                }
710            }
711        }
712        Ok(total_size)
713    }
714    
715    fn count_source_files(&self) -> Result<usize> {
716        let mut count = 0;
717        if self.source_path.exists() {
718            for entry in fs::read_dir(&self.source_path)? {
719                let entry = entry?;
720                if entry.path().is_file() {
721                    count += 1;
722                }
723            }
724        }
725        Ok(count)
726    }
727}
728
729#[cfg(test)]
730mod tests {
731    use super::*;
732    use tempfile::TempDir;
733
734    #[test]
735    #[ignore] // Run with: cargo test --release bench_test -- --ignored
736    fn bench_test() {
737        let temp_dir = TempDir::new().unwrap();
738        let collection_path = temp_dir.path().join("test-bench.sift");
739        
740        let mut benchmark = SiftDBBenchmark::new(&collection_path, ".");
741        let _results = benchmark.run_all();
742    }
743}