1use crate::performance::benchmark::{BenchmarkResult, BenchmarkSuite};
4use anyhow::Result;
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use std::path::{Path, PathBuf};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct PerformanceRegression {
12 pub benchmark_name: String,
14 pub metric_name: String,
16 pub previous_value: f64,
18 pub current_value: f64,
20 pub regression_percent: f64,
22 pub is_significant: bool,
24 pub confidence: f64,
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct ContinuousBenchmarkConfig {
31 pub results_dir: PathBuf,
33 pub commit_sha: Option<String>,
35 pub branch: Option<String>,
37 pub build_config: String,
39 pub regression_threshold: f64,
41 pub num_runs: usize,
43 pub confidence_level: f64,
45}
46
47impl Default for ContinuousBenchmarkConfig {
48 fn default() -> Self {
49 Self {
50 results_dir: PathBuf::from("benchmark_results"),
51 commit_sha: None,
52 branch: None,
53 build_config: "release".to_string(),
54 regression_threshold: 5.0, num_runs: 5,
56 confidence_level: 0.95,
57 }
58 }
59}
60
61pub struct ContinuousBenchmark {
63 config: ContinuousBenchmarkConfig,
64 history: BenchmarkHistory,
65}
66
67impl ContinuousBenchmark {
68 pub fn new(config: ContinuousBenchmarkConfig) -> Result<Self> {
70 std::fs::create_dir_all(&config.results_dir)?;
72
73 let history = BenchmarkHistory::load(&config.results_dir)?;
75
76 Ok(Self { config, history })
77 }
78
79 pub fn run_and_check(
81 &mut self,
82 suite: &mut BenchmarkSuite,
83 ) -> Result<Vec<PerformanceRegression>> {
84 let mut all_results = Vec::new();
86
87 for run in 0..self.config.num_runs {
88 println!(
89 "Running benchmark iteration {}/{}",
90 run + 1,
91 self.config.num_runs
92 );
93 all_results.extend(suite.results().to_vec());
96 }
97
98 let run_id = self.generate_run_id();
100 self.save_results(&run_id, &all_results)?;
101
102 let regressions = self.check_regressions(&all_results)?;
104
105 self.history.add_run(run_id, all_results);
107 self.history.save(&self.config.results_dir)?;
108
109 Ok(regressions)
110 }
111
112 fn check_regressions(
114 &self,
115 current_results: &[BenchmarkResult],
116 ) -> Result<Vec<PerformanceRegression>> {
117 let mut regressions = Vec::new();
118
119 let baseline = self.history.get_baseline(&self.config.branch, &self.config.build_config);
121
122 if let Some(baseline_results) = baseline {
123 for current in current_results {
124 if let Some(baseline) = baseline_results.iter().find(|b| b.name == current.name) {
125 let latency_regression = self.check_metric_regression(
127 ¤t.name,
128 "avg_latency",
129 baseline.avg_latency_ms,
130 current.avg_latency_ms,
131 true, );
133
134 if let Some(reg) = latency_regression {
135 regressions.push(reg);
136 }
137
138 let throughput_regression = self.check_metric_regression(
140 ¤t.name,
141 "throughput",
142 baseline.throughput_tokens_per_sec,
143 current.throughput_tokens_per_sec,
144 false, );
146
147 if let Some(reg) = throughput_regression {
148 regressions.push(reg);
149 }
150
151 if let (Some(baseline_mem), Some(current_mem)) =
153 (baseline.memory_bytes, current.memory_bytes)
154 {
155 let memory_regression = self.check_metric_regression(
156 ¤t.name,
157 "memory",
158 baseline_mem as f64,
159 current_mem as f64,
160 true, );
162
163 if let Some(reg) = memory_regression {
164 regressions.push(reg);
165 }
166 }
167 }
168 }
169 }
170
171 Ok(regressions)
172 }
173
174 fn check_metric_regression(
176 &self,
177 benchmark_name: &str,
178 metric_name: &str,
179 baseline_value: f64,
180 current_value: f64,
181 higher_is_worse: bool,
182 ) -> Option<PerformanceRegression> {
183 let change_percent = if higher_is_worse {
184 (current_value - baseline_value) / baseline_value * 100.0
185 } else {
186 (baseline_value - current_value) / baseline_value * 100.0
187 };
188
189 if change_percent > self.config.regression_threshold {
190 let is_significant = change_percent > self.config.regression_threshold * 2.0;
192
193 Some(PerformanceRegression {
194 benchmark_name: benchmark_name.to_string(),
195 metric_name: metric_name.to_string(),
196 previous_value: baseline_value,
197 current_value,
198 regression_percent: change_percent,
199 is_significant,
200 confidence: if is_significant { 0.95 } else { 0.5 },
201 })
202 } else {
203 None
204 }
205 }
206
207 fn generate_run_id(&self) -> String {
209 let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
210 let commit = self.config.commit_sha.as_ref().map(|s| &s[..8]).unwrap_or("unknown");
211 format!("{}_{}", timestamp, commit)
212 }
213
214 fn save_results(&self, run_id: &str, results: &[BenchmarkResult]) -> Result<()> {
216 let file_path = self.config.results_dir.join(format!("{}.json", run_id));
217 let json = serde_json::to_string_pretty(results)?;
218 std::fs::write(file_path, json)?;
219 Ok(())
220 }
221
222 pub fn generate_report(&self) -> Result<PerformanceReport> {
224 let trends = self.history.calculate_trends()?;
225 let summary = self.history.generate_summary()?;
226
227 Ok(PerformanceReport {
228 trends,
229 summary,
230 latest_regressions: Vec::new(),
231 })
232 }
233}
234
235#[derive(Debug, Clone, Serialize, Deserialize)]
237struct BenchmarkHistory {
238 runs: HashMap<String, Vec<BenchmarkResult>>,
239 metadata: HashMap<String, RunMetadata>,
240}
241
242#[derive(Debug, Clone, Serialize, Deserialize)]
243struct RunMetadata {
244 run_id: String,
245 timestamp: chrono::DateTime<chrono::Utc>,
246 commit_sha: Option<String>,
247 branch: Option<String>,
248 build_config: String,
249}
250
251impl BenchmarkHistory {
252 fn load(dir: &Path) -> Result<Self> {
254 let history_file = dir.join("history.json");
255
256 if history_file.exists() {
257 let json = std::fs::read_to_string(history_file)?;
258 Ok(serde_json::from_str(&json)?)
259 } else {
260 Ok(Self {
261 runs: HashMap::new(),
262 metadata: HashMap::new(),
263 })
264 }
265 }
266
267 fn save(&self, dir: &Path) -> Result<()> {
269 let history_file = dir.join("history.json");
270 let json = serde_json::to_string_pretty(self)?;
271 std::fs::write(history_file, json)?;
272 Ok(())
273 }
274
275 fn add_run(&mut self, run_id: String, results: Vec<BenchmarkResult>) {
277 let metadata = RunMetadata {
278 run_id: run_id.clone(),
279 timestamp: chrono::Utc::now(),
280 commit_sha: None, branch: None, build_config: "release".to_string(),
283 };
284
285 self.runs.insert(run_id.clone(), results);
286 self.metadata.insert(run_id, metadata);
287 }
288
289 fn get_baseline(
291 &self,
292 branch: &Option<String>,
293 build_config: &str,
294 ) -> Option<&Vec<BenchmarkResult>> {
295 let mut matching_runs: Vec<_> = self
297 .metadata
298 .iter()
299 .filter(|(_, meta)| {
300 meta.branch.as_ref() == branch.as_ref() && meta.build_config == build_config
301 })
302 .collect();
303
304 matching_runs.sort_by_key(|(_, meta)| meta.timestamp);
305
306 matching_runs.last().and_then(|(run_id, _)| self.runs.get(*run_id))
307 }
308
309 fn calculate_trends(&self) -> Result<HashMap<String, PerformanceTrend>> {
311 let mut trends = HashMap::new();
312
313 let mut by_benchmark: HashMap<String, Vec<(&String, &BenchmarkResult)>> = HashMap::new();
315
316 for (run_id, results) in &self.runs {
317 for result in results {
318 by_benchmark.entry(result.name.clone()).or_default().push((run_id, result));
319 }
320 }
321
322 for (benchmark_name, mut runs) in by_benchmark {
324 runs.sort_by_key(|(run_id, _)| {
326 self.metadata.get(*run_id).map(|m| m.timestamp).unwrap_or_default()
327 });
328
329 if runs.len() >= 2 {
330 let latencies: Vec<f64> = runs.iter().map(|(_, r)| r.avg_latency_ms).collect();
331 let throughputs: Vec<f64> =
332 runs.iter().map(|(_, r)| r.throughput_tokens_per_sec).collect();
333
334 trends.insert(
335 benchmark_name,
336 PerformanceTrend {
337 latency_trend: calculate_trend(&latencies),
338 throughput_trend: calculate_trend(&throughputs),
339 sample_count: runs.len(),
340 },
341 );
342 }
343 }
344
345 Ok(trends)
346 }
347
348 fn generate_summary(&self) -> Result<PerformanceSummary> {
350 let total_runs = self.runs.len();
351 let total_benchmarks = self
352 .runs
353 .values()
354 .flat_map(|results| results.iter().map(|r| &r.name))
355 .collect::<std::collections::HashSet<_>>()
356 .len();
357
358 let latest_run = self.metadata.values().max_by_key(|m| m.timestamp).map(|m| m.timestamp);
359
360 Ok(PerformanceSummary {
361 total_runs,
362 total_benchmarks,
363 latest_run,
364 earliest_run: self.metadata.values().min_by_key(|m| m.timestamp).map(|m| m.timestamp),
365 })
366 }
367}
368
369#[derive(Debug, Clone, Serialize, Deserialize)]
371pub struct PerformanceTrend {
372 pub latency_trend: f64,
374 pub throughput_trend: f64,
376 pub sample_count: usize,
378}
379
380#[derive(Debug, Clone, Serialize, Deserialize)]
382pub struct PerformanceReport {
383 pub trends: HashMap<String, PerformanceTrend>,
385 pub summary: PerformanceSummary,
387 pub latest_regressions: Vec<PerformanceRegression>,
389}
390
391#[derive(Debug, Clone, Serialize, Deserialize)]
393pub struct PerformanceSummary {
394 pub total_runs: usize,
395 pub total_benchmarks: usize,
396 pub latest_run: Option<chrono::DateTime<chrono::Utc>>,
397 pub earliest_run: Option<chrono::DateTime<chrono::Utc>>,
398}
399
400fn calculate_trend(values: &[f64]) -> f64 {
402 if values.len() < 2 {
403 return 0.0;
404 }
405
406 let n = values.len() as f64;
407 let x_mean = (n - 1.0) / 2.0;
408 let y_mean = values.iter().sum::<f64>() / n;
409
410 let mut numerator = 0.0;
411 let mut denominator = 0.0;
412
413 for (i, &y) in values.iter().enumerate() {
414 let x = i as f64;
415 numerator += (x - x_mean) * (y - y_mean);
416 denominator += (x - x_mean) * (x - x_mean);
417 }
418
419 if denominator > 0.0 {
420 numerator / denominator
421 } else {
422 0.0
423 }
424}
425
426#[cfg(test)]
427mod tests {
428 use super::*;
429
430 #[test]
431 fn test_regression_detection() {
432 let config = ContinuousBenchmarkConfig::default();
433 let benchmark = ContinuousBenchmark::new(config).expect("operation failed in test");
434
435 let regression = benchmark.check_metric_regression(
436 "test_benchmark",
437 "latency",
438 100.0, 110.0, true, );
442
443 assert!(regression.is_some());
444 let reg = regression.expect("operation failed in test");
445 assert_eq!(reg.regression_percent, 10.0);
446 }
447
448 #[test]
449 fn test_trend_calculation() {
450 let values = vec![100.0, 102.0, 104.0, 106.0, 108.0];
451 let trend = calculate_trend(&values);
452 assert!(trend > 0.0); let values = vec![100.0, 98.0, 96.0, 94.0, 92.0];
455 let trend = calculate_trend(&values);
456 assert!(trend < 0.0); }
458}