siftdb_core/
compare.rs

1use crate::bench::BenchmarkResults;
2use serde::{Serialize, Deserialize};
3use std::path::Path;
4use std::fs;
5use anyhow::Result;
6use std::collections::HashMap;
7
8#[derive(Serialize, Deserialize, Debug)]
9pub struct BenchmarkComparison {
10    pub baseline: BenchmarkResults,
11    pub current: BenchmarkResults,
12    pub performance_delta: PerformanceDelta,
13}
14
15#[derive(Serialize, Deserialize, Debug)]
16pub struct PerformanceDelta {
17    pub duration_change_percent: f64,
18    pub throughput_change_percent: Option<f64>,
19    pub queries_per_second_change_percent: Option<f64>,
20    pub files_processed_change: i64,
21    pub bytes_processed_change: i64,
22}
23
24#[derive(Serialize, Deserialize, Debug)]
25pub struct RegressionReport {
26    pub comparisons: Vec<BenchmarkComparison>,
27    pub summary: RegressionSummary,
28}
29
30#[derive(Serialize, Deserialize, Debug)]
31pub struct RegressionSummary {
32    pub total_benchmarks: usize,
33    pub regressions: usize,
34    pub improvements: usize,
35    pub avg_performance_change: f64,
36}
37
38pub struct BenchmarkAnalyzer;
39
40impl BenchmarkAnalyzer {
41    /// Compare two benchmark results and calculate performance delta
42    pub fn compare_benchmarks(baseline: &BenchmarkResults, current: &BenchmarkResults) -> BenchmarkComparison {
43        let duration_change = ((current.duration.as_secs_f64() - baseline.duration.as_secs_f64()) 
44            / baseline.duration.as_secs_f64()) * 100.0;
45        
46        let throughput_change = match (baseline.throughput_mbps, current.throughput_mbps) {
47            (Some(base), Some(curr)) => Some(((curr - base) / base) * 100.0),
48            _ => None,
49        };
50        
51        let qps_change = match (baseline.queries_per_second, current.queries_per_second) {
52            (Some(base), Some(curr)) => Some(((curr - base) / base) * 100.0),
53            _ => None,
54        };
55        
56        BenchmarkComparison {
57            baseline: baseline.clone(),
58            current: current.clone(),
59            performance_delta: PerformanceDelta {
60                duration_change_percent: duration_change,
61                throughput_change_percent: throughput_change,
62                queries_per_second_change_percent: qps_change,
63                files_processed_change: current.files_processed as i64 - baseline.files_processed as i64,
64                bytes_processed_change: current.bytes_processed as i64 - baseline.bytes_processed as i64,
65            },
66        }
67    }
68    
69    /// Load baseline benchmarks from directory
70    pub fn load_baseline_benchmarks(benchmarks_dir: &Path) -> Result<HashMap<String, BenchmarkResults>> {
71        let baseline_path = benchmarks_dir.join("baseline-0.1.json");
72        if !baseline_path.exists() {
73            anyhow::bail!("Baseline benchmark file not found: {:?}", baseline_path);
74        }
75        
76        let content = fs::read_to_string(&baseline_path)?;
77        let baseline: BenchmarkResults = serde_json::from_str(&content)?;
78        
79        let mut baselines = HashMap::new();
80        baselines.insert(baseline.name.clone(), baseline);
81        Ok(baselines)
82    }
83    
84    /// Load current benchmark results from results directory
85    pub fn load_current_benchmarks(results_dir: &Path) -> Result<Vec<BenchmarkResults>> {
86        let mut results = Vec::new();
87        
88        if results_dir.exists() {
89            for entry in fs::read_dir(results_dir)? {
90                let entry = entry?;
91                let path = entry.path();
92                
93                if path.extension().map_or(false, |ext| ext == "json") {
94                    let content = fs::read_to_string(&path)?;
95                    if let Ok(result) = serde_json::from_str::<BenchmarkResults>(&content) {
96                        results.push(result);
97                    }
98                }
99            }
100        }
101        
102        Ok(results)
103    }
104    
105    /// Generate regression report comparing baseline vs current benchmarks
106    pub fn generate_regression_report(benchmarks_dir: &Path) -> Result<RegressionReport> {
107        let baselines = Self::load_baseline_benchmarks(benchmarks_dir)?;
108        let current_results = Self::load_current_benchmarks(&benchmarks_dir.join("results"))?;
109        
110        let mut comparisons = Vec::new();
111        let mut total_change = 0.0;
112        let mut regressions = 0;
113        let mut improvements = 0;
114        
115        for current in &current_results {
116            if let Some(baseline) = baselines.get(&current.name) {
117                let comparison = Self::compare_benchmarks(baseline, current);
118                
119                // Count regressions/improvements based on queries per second
120                if let Some(qps_change) = comparison.performance_delta.queries_per_second_change_percent {
121                    total_change += qps_change;
122                    if qps_change < -5.0 { // More than 5% slower is a regression
123                        regressions += 1;
124                    } else if qps_change > 5.0 { // More than 5% faster is an improvement
125                        improvements += 1;
126                    }
127                }
128                
129                comparisons.push(comparison);
130            }
131        }
132        
133        let avg_change = if comparisons.is_empty() { 0.0 } else { total_change / comparisons.len() as f64 };
134        
135        Ok(RegressionReport {
136            comparisons,
137            summary: RegressionSummary {
138                total_benchmarks: current_results.len(),
139                regressions,
140                improvements,
141                avg_performance_change: avg_change,
142            },
143        })
144    }
145    
146    /// Save regression report to file
147    pub fn save_regression_report(report: &RegressionReport, output_path: &Path) -> Result<()> {
148        let json_content = serde_json::to_string_pretty(report)?;
149        fs::write(output_path, json_content)?;
150        Ok(())
151    }
152    
153    /// Print regression report summary to console
154    pub fn print_regression_summary(report: &RegressionReport) {
155        println!("🔍 SiftDB Performance Regression Report");
156        println!("═══════════════════════════════════════");
157        println!("Total Benchmarks: {}", report.summary.total_benchmarks);
158        println!("Regressions (>5% slower): {}", report.summary.regressions);
159        println!("Improvements (>5% faster): {}", report.summary.improvements);
160        println!("Average Performance Change: {:.2}%", report.summary.avg_performance_change);
161        println!();
162        
163        for comparison in &report.comparisons {
164            println!("📊 Benchmark: {}", comparison.current.name);
165            println!("   Duration: {:.2}% change", comparison.performance_delta.duration_change_percent);
166            
167            if let Some(qps_change) = comparison.performance_delta.queries_per_second_change_percent {
168                let status = if qps_change < -5.0 {
169                    "🔴 REGRESSION"
170                } else if qps_change > 5.0 {
171                    "🟢 IMPROVEMENT"
172                } else {
173                    "🟡 STABLE"
174                };
175                println!("   Queries/sec: {:.2}% change ({})", qps_change, status);
176            }
177            
178            if let Some(throughput_change) = comparison.performance_delta.throughput_change_percent {
179                println!("   Throughput: {:.2}% change", throughput_change);
180            }
181            println!();
182        }
183    }
184}