1use crate::{SiftDB, ingest::{Ingester, IngestOptions}};
2use std::time::{Duration, Instant, SystemTime};
3use std::path::Path;
4use std::fs;
5use anyhow::Result;
6use serde::{Serialize, Deserialize};
7
8#[derive(Serialize, Deserialize, Clone, Debug)]
9pub struct BenchmarkResults {
10 pub name: String,
11 pub duration: Duration,
12 pub files_processed: u64,
13 pub bytes_processed: u64,
14 pub queries_per_second: Option<f64>,
15 pub throughput_mbps: Option<f64>,
16}
17
18#[derive(Serialize, Deserialize)]
19pub struct BenchmarkSuite {
20 pub version: String,
21 pub timestamp: String,
22 pub git_commit: Option<String>,
23 pub test_environment: TestEnvironment,
24 pub benchmarks: Vec<BenchmarkResults>,
25}
26
27#[derive(Serialize, Deserialize)]
28pub struct TestEnvironment {
29 pub os: String,
30 pub cpu: String,
31 pub memory: String,
32}
33
34impl BenchmarkResults {
35 pub fn print(&self) {
36 println!("=== {} ===", self.name);
37 println!("Duration: {:.2}s", self.duration.as_secs_f64());
38 if self.files_processed > 0 {
39 println!("Files processed: {}", self.files_processed);
40 println!("Files/sec: {:.1}", self.files_processed as f64 / self.duration.as_secs_f64());
41 }
42 if self.bytes_processed > 0 {
43 let mb = self.bytes_processed as f64 / (1024.0 * 1024.0);
44 println!("Data processed: {:.2} MB", mb);
45 if let Some(throughput) = self.throughput_mbps {
46 println!("Throughput: {:.2} MB/s", throughput);
47 }
48 }
49 if let Some(qps) = self.queries_per_second {
50 println!("Queries/sec: {:.1}", qps);
51 }
52 println!();
53 }
54}
55
56pub struct SiftDBBenchmark {
57 collection_path: std::path::PathBuf,
58 source_path: std::path::PathBuf,
59}
60
61impl SiftDBBenchmark {
62 pub fn new<P1: AsRef<Path>, P2: AsRef<Path>>(collection_path: P1, source_path: P2) -> Self {
63 Self {
64 collection_path: collection_path.as_ref().to_path_buf(),
65 source_path: source_path.as_ref().to_path_buf(),
66 }
67 }
68
69 pub fn run_all(&mut self) -> Vec<BenchmarkResults> {
70 println!("🚀 SiftDB Performance Benchmark");
71 println!("================================");
72 println!();
73
74 let mut results = Vec::new();
75
76 results.push(self.bench_init());
78
79 results.push(self.bench_import());
81
82 results.extend(self.bench_searches());
84
85 self.print_summary(&results);
86
87 if let Err(e) = self.save_results(&results) {
89 eprintln!("Warning: Failed to save benchmark results: {}", e);
90 }
91
92 results
93 }
94
95 fn bench_init(&self) -> BenchmarkResults {
96 let start = Instant::now();
97
98 SiftDB::init(&self.collection_path).expect("Failed to initialize collection");
99
100 let duration = start.elapsed();
101
102 BenchmarkResults {
103 name: "Collection Initialization".to_string(),
104 duration,
105 files_processed: 0,
106 bytes_processed: 0,
107 queries_per_second: None,
108 throughput_mbps: None,
109 }
110 }
111
112 fn bench_import(&mut self) -> BenchmarkResults {
113 println!("📁 Starting import benchmark...");
114
115 let start = Instant::now();
116
117 let _db = SiftDB::open(&self.collection_path).expect("Failed to open collection");
118
119 let mut options = IngestOptions::default();
120 options.include_patterns = vec!["**/*.rs".to_string(), "**/*.md".to_string(), "**/*.toml".to_string(), "**/*.json".to_string()];
121
122 let mut ingester = Ingester::new(self.collection_path.clone(), options);
123
124 let (source_files, source_bytes) = self.count_source_files();
126 println!(" Source files found: {}", source_files);
127 println!(" Source data: {:.2} MB", source_bytes as f64 / (1024.0 * 1024.0));
128
129 let stats = ingester.ingest_from_fs(&self.source_path).expect("Failed to ingest");
130
131 let duration = start.elapsed();
132
133 let storage_bytes = self.calculate_total_bytes();
135 let compression_ratio = if source_bytes > 0 {
136 storage_bytes as f64 / source_bytes as f64
137 } else {
138 1.0
139 };
140
141 println!(" ✅ Import completed in {:.2}s", duration.as_secs_f64());
142 println!(" 📊 Files ingested: {} ({} skipped, {} errors)",
143 stats.ingested, stats.skipped, stats.errors);
144 println!(" 💾 Storage size: {:.2} MB (ratio: {:.2}x)",
145 storage_bytes as f64 / (1024.0 * 1024.0), compression_ratio);
146
147 BenchmarkResults {
148 name: "File Import".to_string(),
149 duration,
150 files_processed: stats.ingested,
151 bytes_processed: source_bytes,
152 queries_per_second: None,
153 throughput_mbps: Some(source_bytes as f64 / (1024.0 * 1024.0) / duration.as_secs_f64()),
154 }
155 }
156
157 fn bench_searches(&self) -> Vec<BenchmarkResults> {
158 let mut results = Vec::new();
159
160 let db = SiftDB::open(&self.collection_path).expect("Failed to open collection");
161 let snapshot = db.snapshot().expect("Failed to create snapshot");
162
163 let queries = vec![
165 ("fn", "Function definitions"),
166 ("println", "Print statements"),
167 ("use", "Import statements"),
168 ("struct", "Struct definitions"),
169 ("impl", "Implementation blocks"),
170 ("pub", "Public items"),
171 ("let", "Variable declarations"),
172 ("match", "Pattern matching"),
173 ("async", "Async code"),
174 ("Result", "Result types"),
175 ];
176
177 for (query, description) in queries {
178 results.push(self.bench_single_search(&snapshot, query, description));
179 }
180
181 results
182 }
183
184 fn bench_single_search(&self, snapshot: &crate::Snapshot, query: &str, description: &str) -> BenchmarkResults {
185 let iterations = 10;
186 let mut total_duration = Duration::new(0, 0);
187 let mut total_hits = 0;
188
189 snapshot.find(query, None, Some(1000)).ok();
191
192 for _ in 0..iterations {
194 let start = Instant::now();
195 if let Ok(hits) = snapshot.find(query, None, Some(1000)) {
196 total_hits = hits.len();
197 }
198 total_duration += start.elapsed();
199 }
200
201 let avg_duration = total_duration / iterations as u32;
202 let qps = iterations as f64 / total_duration.as_secs_f64();
203
204 BenchmarkResults {
205 name: format!("Search: '{}' ({})", query, description),
206 duration: avg_duration,
207 files_processed: total_hits as u64,
208 bytes_processed: 0,
209 queries_per_second: Some(qps),
210 throughput_mbps: None,
211 }
212 }
213
214 fn count_source_files(&self) -> (u64, u64) {
215 let mut file_count = 0;
216 let mut byte_count = 0;
217
218 let walker = ignore::WalkBuilder::new(&self.source_path)
219 .hidden(false)
220 .git_ignore(true)
221 .build();
222
223 for entry in walker {
224 if let Ok(entry) = entry {
225 let path = entry.path();
226 if path.is_file() {
227 let path_str = path.to_string_lossy();
229 if path_str.ends_with(".rs") || path_str.ends_with(".md") ||
230 path_str.ends_with(".toml") || path_str.ends_with(".json") {
231 file_count += 1;
232 if let Ok(metadata) = path.metadata() {
233 byte_count += metadata.len();
234 }
235 }
236 }
237 }
238 }
239
240 (file_count, byte_count)
241 }
242
243 fn calculate_total_bytes(&self) -> u64 {
244 let mut total = 0;
245
246 if let Ok(entries) = fs::read_dir(&self.collection_path.join("store")) {
247 for entry in entries.flatten() {
248 if let Ok(metadata) = entry.metadata() {
249 total += metadata.len();
250 }
251 }
252 }
253
254 total
255 }
256
257 fn save_results(&self, results: &[BenchmarkResults]) -> Result<(), Box<dyn std::error::Error>> {
258 let suite = BenchmarkSuite {
259 version: env!("CARGO_PKG_VERSION").to_string(),
260 timestamp: SystemTime::now()
261 .duration_since(SystemTime::UNIX_EPOCH)?
262 .as_secs().to_string(),
263 git_commit: self.get_git_commit(),
264 test_environment: TestEnvironment {
265 os: std::env::consts::OS.to_string(),
266 cpu: "unknown".to_string(), memory: "unknown".to_string(),
268 },
269 benchmarks: results.to_vec(),
270 };
271
272 let benchmarks_dir = self.collection_path.parent()
273 .unwrap_or(&self.collection_path)
274 .join("benchmarks/results");
275
276 std::fs::create_dir_all(&benchmarks_dir)?;
277
278 let filename = format!("benchmark-{}.json", suite.timestamp);
279 let filepath = benchmarks_dir.join(filename);
280
281 let json = serde_json::to_string_pretty(&suite)?;
282 std::fs::write(filepath, json)?;
283
284 Ok(())
285 }
286
287 fn get_git_commit(&self) -> Option<String> {
288 std::process::Command::new("git")
289 .arg("rev-parse")
290 .arg("--short")
291 .arg("HEAD")
292 .current_dir(self.collection_path.parent().unwrap_or(&self.collection_path))
293 .output()
294 .ok()
295 .and_then(|output| {
296 if output.status.success() {
297 String::from_utf8(output.stdout).ok()
298 .map(|s| s.trim().to_string())
299 } else {
300 None
301 }
302 })
303 }
304
305 fn print_summary(&self, results: &[BenchmarkResults]) {
306 println!("📊 Benchmark Summary");
307 println!("===================");
308
309 for result in results {
310 result.print();
311 }
312
313 let import_result = results.iter().find(|r| r.name.contains("Import"));
315
316 if let Some(import) = import_result {
317 println!("🎯 Key Performance Metrics:");
318 println!("- Import Rate: {:.0} files/sec", import.files_processed as f64 / import.duration.as_secs_f64());
319 if let Some(throughput) = import.throughput_mbps {
320 println!("- Import Throughput: {:.1} MB/s", throughput);
321 }
322
323 let search_results: Vec<_> = results.iter()
324 .filter(|r| r.name.contains("Search"))
325 .collect();
326
327 if !search_results.is_empty() {
328 let avg_qps: f64 = search_results.iter()
329 .filter_map(|r| r.queries_per_second)
330 .sum::<f64>() / search_results.len() as f64;
331 println!("- Average Search Rate: {:.1} queries/sec", avg_qps);
332 }
333 }
334
335 println!("✅ Benchmark completed successfully!");
336 }
337}
338
339#[cfg(test)]
340mod tests {
341 use super::*;
342 use tempfile::TempDir;
343
344 #[test]
345 #[ignore] fn bench_test() {
347 let temp_dir = TempDir::new().unwrap();
348 let collection_path = temp_dir.path().join("test-bench.sift");
349
350 let mut benchmark = SiftDBBenchmark::new(&collection_path, ".");
351 let _results = benchmark.run_full_benchmark();
352 }
353}