use crate::performance::benchmark::{BenchmarkResult, BenchmarkSuite};
use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceRegression {
pub benchmark_name: String,
pub metric_name: String,
pub previous_value: f64,
pub current_value: f64,
pub regression_percent: f64,
pub is_significant: bool,
pub confidence: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContinuousBenchmarkConfig {
pub results_dir: PathBuf,
pub commit_sha: Option<String>,
pub branch: Option<String>,
pub build_config: String,
pub regression_threshold: f64,
pub num_runs: usize,
pub confidence_level: f64,
}
impl Default for ContinuousBenchmarkConfig {
fn default() -> Self {
Self {
results_dir: PathBuf::from("benchmark_results"),
commit_sha: None,
branch: None,
build_config: "release".to_string(),
regression_threshold: 5.0, num_runs: 5,
confidence_level: 0.95,
}
}
}
pub struct ContinuousBenchmark {
config: ContinuousBenchmarkConfig,
history: BenchmarkHistory,
}
impl ContinuousBenchmark {
pub fn new(config: ContinuousBenchmarkConfig) -> Result<Self> {
std::fs::create_dir_all(&config.results_dir)?;
let history = BenchmarkHistory::load(&config.results_dir)?;
Ok(Self { config, history })
}
pub fn run_and_check(
&mut self,
suite: &mut BenchmarkSuite,
) -> Result<Vec<PerformanceRegression>> {
let mut all_results = Vec::new();
for run in 0..self.config.num_runs {
println!(
"Running benchmark iteration {}/{}",
run + 1,
self.config.num_runs
);
all_results.extend(suite.results().to_vec());
}
let run_id = self.generate_run_id();
self.save_results(&run_id, &all_results)?;
let regressions = self.check_regressions(&all_results)?;
self.history.add_run(run_id, all_results);
self.history.save(&self.config.results_dir)?;
Ok(regressions)
}
fn check_regressions(
&self,
current_results: &[BenchmarkResult],
) -> Result<Vec<PerformanceRegression>> {
let mut regressions = Vec::new();
let baseline = self.history.get_baseline(&self.config.branch, &self.config.build_config);
if let Some(baseline_results) = baseline {
for current in current_results {
if let Some(baseline) = baseline_results.iter().find(|b| b.name == current.name) {
let latency_regression = self.check_metric_regression(
¤t.name,
"avg_latency",
baseline.avg_latency_ms,
current.avg_latency_ms,
true, );
if let Some(reg) = latency_regression {
regressions.push(reg);
}
let throughput_regression = self.check_metric_regression(
¤t.name,
"throughput",
baseline.throughput_tokens_per_sec,
current.throughput_tokens_per_sec,
false, );
if let Some(reg) = throughput_regression {
regressions.push(reg);
}
if let (Some(baseline_mem), Some(current_mem)) =
(baseline.memory_bytes, current.memory_bytes)
{
let memory_regression = self.check_metric_regression(
¤t.name,
"memory",
baseline_mem as f64,
current_mem as f64,
true, );
if let Some(reg) = memory_regression {
regressions.push(reg);
}
}
}
}
}
Ok(regressions)
}
fn check_metric_regression(
&self,
benchmark_name: &str,
metric_name: &str,
baseline_value: f64,
current_value: f64,
higher_is_worse: bool,
) -> Option<PerformanceRegression> {
let change_percent = if higher_is_worse {
(current_value - baseline_value) / baseline_value * 100.0
} else {
(baseline_value - current_value) / baseline_value * 100.0
};
if change_percent > self.config.regression_threshold {
let is_significant = change_percent > self.config.regression_threshold * 2.0;
Some(PerformanceRegression {
benchmark_name: benchmark_name.to_string(),
metric_name: metric_name.to_string(),
previous_value: baseline_value,
current_value,
regression_percent: change_percent,
is_significant,
confidence: if is_significant { 0.95 } else { 0.5 },
})
} else {
None
}
}
fn generate_run_id(&self) -> String {
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
let commit = self.config.commit_sha.as_ref().map(|s| &s[..8]).unwrap_or("unknown");
format!("{}_{}", timestamp, commit)
}
fn save_results(&self, run_id: &str, results: &[BenchmarkResult]) -> Result<()> {
let file_path = self.config.results_dir.join(format!("{}.json", run_id));
let json = serde_json::to_string_pretty(results)?;
std::fs::write(file_path, json)?;
Ok(())
}
pub fn generate_report(&self) -> Result<PerformanceReport> {
let trends = self.history.calculate_trends()?;
let summary = self.history.generate_summary()?;
Ok(PerformanceReport {
trends,
summary,
latest_regressions: Vec::new(),
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct BenchmarkHistory {
runs: HashMap<String, Vec<BenchmarkResult>>,
metadata: HashMap<String, RunMetadata>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct RunMetadata {
run_id: String,
timestamp: chrono::DateTime<chrono::Utc>,
commit_sha: Option<String>,
branch: Option<String>,
build_config: String,
}
impl BenchmarkHistory {
fn load(dir: &Path) -> Result<Self> {
let history_file = dir.join("history.json");
if history_file.exists() {
let json = std::fs::read_to_string(history_file)?;
Ok(serde_json::from_str(&json)?)
} else {
Ok(Self {
runs: HashMap::new(),
metadata: HashMap::new(),
})
}
}
fn save(&self, dir: &Path) -> Result<()> {
let history_file = dir.join("history.json");
let json = serde_json::to_string_pretty(self)?;
std::fs::write(history_file, json)?;
Ok(())
}
fn add_run(&mut self, run_id: String, results: Vec<BenchmarkResult>) {
let metadata = RunMetadata {
run_id: run_id.clone(),
timestamp: chrono::Utc::now(),
commit_sha: None, branch: None, build_config: "release".to_string(),
};
self.runs.insert(run_id.clone(), results);
self.metadata.insert(run_id, metadata);
}
fn get_baseline(
&self,
branch: &Option<String>,
build_config: &str,
) -> Option<&Vec<BenchmarkResult>> {
let mut matching_runs: Vec<_> = self
.metadata
.iter()
.filter(|(_, meta)| {
meta.branch.as_ref() == branch.as_ref() && meta.build_config == build_config
})
.collect();
matching_runs.sort_by_key(|(_, meta)| meta.timestamp);
matching_runs.last().and_then(|(run_id, _)| self.runs.get(*run_id))
}
fn calculate_trends(&self) -> Result<HashMap<String, PerformanceTrend>> {
let mut trends = HashMap::new();
let mut by_benchmark: HashMap<String, Vec<(&String, &BenchmarkResult)>> = HashMap::new();
for (run_id, results) in &self.runs {
for result in results {
by_benchmark.entry(result.name.clone()).or_default().push((run_id, result));
}
}
for (benchmark_name, mut runs) in by_benchmark {
runs.sort_by_key(|(run_id, _)| {
self.metadata.get(*run_id).map(|m| m.timestamp).unwrap_or_default()
});
if runs.len() >= 2 {
let latencies: Vec<f64> = runs.iter().map(|(_, r)| r.avg_latency_ms).collect();
let throughputs: Vec<f64> =
runs.iter().map(|(_, r)| r.throughput_tokens_per_sec).collect();
trends.insert(
benchmark_name,
PerformanceTrend {
latency_trend: calculate_trend(&latencies),
throughput_trend: calculate_trend(&throughputs),
sample_count: runs.len(),
},
);
}
}
Ok(trends)
}
fn generate_summary(&self) -> Result<PerformanceSummary> {
let total_runs = self.runs.len();
let total_benchmarks = self
.runs
.values()
.flat_map(|results| results.iter().map(|r| &r.name))
.collect::<std::collections::HashSet<_>>()
.len();
let latest_run = self.metadata.values().max_by_key(|m| m.timestamp).map(|m| m.timestamp);
Ok(PerformanceSummary {
total_runs,
total_benchmarks,
latest_run,
earliest_run: self.metadata.values().min_by_key(|m| m.timestamp).map(|m| m.timestamp),
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceTrend {
pub latency_trend: f64,
pub throughput_trend: f64,
pub sample_count: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceReport {
pub trends: HashMap<String, PerformanceTrend>,
pub summary: PerformanceSummary,
pub latest_regressions: Vec<PerformanceRegression>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceSummary {
pub total_runs: usize,
pub total_benchmarks: usize,
pub latest_run: Option<chrono::DateTime<chrono::Utc>>,
pub earliest_run: Option<chrono::DateTime<chrono::Utc>>,
}
fn calculate_trend(values: &[f64]) -> f64 {
if values.len() < 2 {
return 0.0;
}
let n = values.len() as f64;
let x_mean = (n - 1.0) / 2.0;
let y_mean = values.iter().sum::<f64>() / n;
let mut numerator = 0.0;
let mut denominator = 0.0;
for (i, &y) in values.iter().enumerate() {
let x = i as f64;
numerator += (x - x_mean) * (y - y_mean);
denominator += (x - x_mean) * (x - x_mean);
}
if denominator > 0.0 {
numerator / denominator
} else {
0.0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_regression_detection() {
let config = ContinuousBenchmarkConfig::default();
let benchmark = ContinuousBenchmark::new(config).expect("operation failed in test");
let regression = benchmark.check_metric_regression(
"test_benchmark",
"latency",
100.0, 110.0, true, );
assert!(regression.is_some());
let reg = regression.expect("operation failed in test");
assert_eq!(reg.regression_percent, 10.0);
}
#[test]
fn test_trend_calculation() {
let values = vec![100.0, 102.0, 104.0, 106.0, 108.0];
let trend = calculate_trend(&values);
assert!(trend > 0.0);
let values = vec![100.0, 98.0, 96.0, 94.0, 92.0];
let trend = calculate_trend(&values);
assert!(trend < 0.0); }
}