1use crate::config::ComparisonConfig;
2use crate::{BenchResult, CpuSnapshot, Percentiles};
3use serde::{Deserialize, Serialize};
4use sha2::{Digest, Sha256};
5use std::fs;
6use std::path::{Path, PathBuf};
7use std::time::Duration;
8
9fn get_primary_mac_address() -> Result<String, std::io::Error> {
14 let interface = default_net::get_default_interface().map_err(|e| {
15 std::io::Error::new(
16 std::io::ErrorKind::NotFound,
17 format!("Failed to get default network interface: {}", e),
18 )
19 })?;
20
21 let mac_addr = interface.mac_addr.ok_or_else(|| {
22 std::io::Error::new(
23 std::io::ErrorKind::NotFound,
24 "Default interface has no MAC address",
25 )
26 })?;
27
28 let mac_string = format!("{}", mac_addr).replace(':', "-").to_lowercase();
31
32 hash_mac_address(&mac_string)
34}
35
36fn hash_mac_address(mac: &str) -> Result<String, std::io::Error> {
40 let mut hasher = Sha256::new();
41 hasher.update(mac.as_bytes());
42 let result = hasher.finalize();
43
44 Ok(format!("{:x}", result)[..16].to_string())
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct BaselineData {
51 pub benchmark_name: String,
52 pub module: String,
53 pub timestamp: String,
54 pub samples: Vec<u128>,
56 pub statistics: crate::Statistics,
58 #[serde(alias = "hostname")]
59 pub machine_id: String,
60
61 #[serde(default, skip_serializing_if = "Vec::is_empty")]
63 pub cpu_samples: Vec<CpuSnapshot>,
64
65 #[serde(skip_serializing_if = "Option::is_none")]
67 pub percentiles: Option<Percentiles>,
68
69 #[serde(default, skip_serializing_if = "is_false")]
71 pub was_regression: bool,
72}
73
74fn is_false(b: &bool) -> bool {
75 !*b
76}
77
78impl BaselineData {
79 pub fn from_bench_result(
80 result: &BenchResult,
81 machine_id: String,
82 was_regression: bool,
83 ) -> Self {
84 let samples: Vec<u128> = result.all_timings.iter().map(|d| d.as_nanos()).collect();
86
87 let statistics = crate::calculate_statistics(&samples);
89
90 Self {
91 benchmark_name: result.name.clone(),
92 module: result.module.clone(),
93 timestamp: chrono::Utc::now().to_rfc3339(),
94 samples,
95 statistics,
96 machine_id,
97 cpu_samples: result.cpu_samples.clone(),
98 percentiles: Some(result.percentiles.clone()),
99 was_regression,
100 }
101 }
102
103 pub fn to_bench_result(&self) -> BenchResult {
104 let percentiles = if let Some(ref p) = self.percentiles {
106 p.clone()
107 } else {
108 Percentiles {
110 mean: Duration::from_nanos(self.statistics.mean as u64),
111 p50: Duration::from_nanos(self.statistics.median as u64),
112 p90: Duration::from_nanos(self.statistics.p90 as u64),
113 p99: Duration::from_nanos(self.statistics.p99 as u64),
114 }
115 };
116
117 let all_timings: Vec<Duration> = self
119 .samples
120 .iter()
121 .map(|&ns| Duration::from_nanos(ns as u64))
122 .collect();
123
124 BenchResult {
125 name: self.benchmark_name.clone(),
126 module: self.module.clone(),
127 percentiles,
128 samples: self.samples.len(),
129 all_timings,
130 cpu_samples: self.cpu_samples.clone(),
131 warmup_ms: None,
132 warmup_iterations: None,
133 }
134 }
135}
136
137#[derive(Debug)]
139pub struct BaselineManager {
140 root_dir: PathBuf,
141 machine_id: String,
142}
143
144impl BaselineManager {
145 pub fn new() -> Result<Self, std::io::Error> {
149 let machine_id = get_primary_mac_address()?;
150
151 Ok(Self {
152 root_dir: PathBuf::from(".benches"),
153 machine_id,
154 })
155 }
156
157 pub fn with_root_dir<P: AsRef<Path>>(root_dir: P) -> Result<Self, std::io::Error> {
159 let machine_id = get_primary_mac_address()?;
160
161 Ok(Self {
162 root_dir: root_dir.as_ref().to_path_buf(),
163 machine_id,
164 })
165 }
166
167 fn machine_dir(&self) -> PathBuf {
169 self.root_dir.join(&self.machine_id)
170 }
171
172 fn benchmark_dir(&self, crate_name: &str, benchmark_name: &str) -> PathBuf {
174 let dir_name = format!("{}_{}", crate_name, benchmark_name);
175 self.machine_dir().join(dir_name)
176 }
177
178 fn legacy_baseline_path(&self, crate_name: &str, benchmark_name: &str) -> PathBuf {
180 let filename = format!("{}_{}.json", crate_name, benchmark_name);
181 self.machine_dir().join(filename)
182 }
183
184 fn get_run_path(&self, crate_name: &str, benchmark_name: &str) -> PathBuf {
186 let timestamp = chrono::Utc::now().format("%Y-%m-%dT%H-%M-%S");
187 let filename = format!("{}.json", timestamp);
188 self.benchmark_dir(crate_name, benchmark_name)
189 .join(filename)
190 }
191
192 fn ensure_dir_exists(
194 &self,
195 crate_name: &str,
196 benchmark_name: &str,
197 ) -> Result<(), std::io::Error> {
198 fs::create_dir_all(self.benchmark_dir(crate_name, benchmark_name))
199 }
200
201 pub fn save_baseline(
203 &self,
204 crate_name: &str,
205 result: &BenchResult,
206 was_regression: bool,
207 ) -> Result<(), std::io::Error> {
208 self.ensure_dir_exists(crate_name, &result.name)?;
209
210 let baseline =
211 BaselineData::from_bench_result(result, self.machine_id.clone(), was_regression);
212 let json = serde_json::to_string_pretty(&baseline)?;
213
214 let path = self.get_run_path(crate_name, &result.name);
215 fs::write(path, json)?;
216
217 Ok(())
218 }
219
220 pub fn load_baseline(
222 &self,
223 crate_name: &str,
224 benchmark_name: &str,
225 ) -> Result<Option<BaselineData>, std::io::Error> {
226 let bench_dir = self.benchmark_dir(crate_name, benchmark_name);
227
228 if bench_dir.exists() && bench_dir.is_dir() {
230 let mut runs: Vec<_> = fs::read_dir(&bench_dir)?
232 .filter_map(|e| e.ok())
233 .filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
234 .collect();
235
236 if runs.is_empty() {
237 return Ok(None);
238 }
239
240 runs.sort_by_key(|e| e.file_name());
242 let latest = runs.last().unwrap();
243
244 let contents = fs::read_to_string(latest.path())?;
245 let baseline: BaselineData = serde_json::from_str(&contents)?;
246 return Ok(Some(baseline));
247 }
248
249 let legacy_path = self.legacy_baseline_path(crate_name, benchmark_name);
251 if legacy_path.exists() {
252 let contents = fs::read_to_string(legacy_path)?;
253 let baseline: BaselineData = serde_json::from_str(&contents)?;
254 return Ok(Some(baseline));
255 }
256
257 Ok(None)
258 }
259
260 pub fn has_baseline(&self, crate_name: &str, benchmark_name: &str) -> bool {
262 let bench_dir = self.benchmark_dir(crate_name, benchmark_name);
263 if bench_dir.exists() && bench_dir.is_dir() {
264 return true;
265 }
266 self.legacy_baseline_path(crate_name, benchmark_name)
267 .exists()
268 }
269
270 pub fn list_runs(
272 &self,
273 crate_name: &str,
274 benchmark_name: &str,
275 ) -> Result<Vec<String>, std::io::Error> {
276 let bench_dir = self.benchmark_dir(crate_name, benchmark_name);
277
278 if !bench_dir.exists() || !bench_dir.is_dir() {
279 return Ok(vec![]);
280 }
281
282 let mut runs: Vec<String> = fs::read_dir(&bench_dir)?
283 .filter_map(|e| e.ok())
284 .filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
285 .filter_map(|e| {
286 e.file_name()
287 .to_string_lossy()
288 .strip_suffix(".json")
289 .map(|s| s.to_string())
290 })
291 .collect();
292
293 runs.sort();
294 Ok(runs)
295 }
296
297 pub fn load_run(
299 &self,
300 crate_name: &str,
301 benchmark_name: &str,
302 timestamp: &str,
303 ) -> Result<Option<BaselineData>, std::io::Error> {
304 let bench_dir = self.benchmark_dir(crate_name, benchmark_name);
305 let filename = format!("{}.json", timestamp);
306 let path = bench_dir.join(filename);
307
308 if !path.exists() {
309 return Ok(None);
310 }
311
312 let contents = fs::read_to_string(path)?;
313 let baseline: BaselineData = serde_json::from_str(&contents)?;
314 Ok(Some(baseline))
315 }
316
317 pub fn list_baselines(&self, crate_name: &str) -> Result<Vec<String>, std::io::Error> {
319 let machine_dir = self.machine_dir();
320
321 if !machine_dir.exists() {
322 return Ok(vec![]);
323 }
324
325 let prefix = format!("{}_", crate_name);
326 let mut baselines = Vec::new();
327
328 for entry in fs::read_dir(machine_dir)? {
329 let entry = entry?;
330 let name = entry.file_name().to_string_lossy().to_string();
331
332 if name.starts_with(&prefix) && entry.path().is_dir() {
334 let benchmark_name = name.strip_prefix(&prefix).unwrap_or(&name).to_string();
336 baselines.push(benchmark_name);
337 }
338 else if name.starts_with(&prefix) && name.ends_with(".json") {
340 let benchmark_name = name
341 .strip_prefix(&prefix)
342 .and_then(|s| s.strip_suffix(".json"))
343 .unwrap_or(&name)
344 .to_string();
345 baselines.push(benchmark_name);
346 }
347 }
348
349 Ok(baselines)
350 }
351
352 pub fn load_recent_baselines(
358 &self,
359 crate_name: &str,
360 benchmark_name: &str,
361 count: usize,
362 ) -> Result<Vec<BaselineData>, std::io::Error> {
363 let bench_dir = self.benchmark_dir(crate_name, benchmark_name);
364
365 if !bench_dir.exists() || !bench_dir.is_dir() {
366 return Ok(vec![]);
367 }
368
369 let mut runs: Vec<_> = fs::read_dir(&bench_dir)?
371 .filter_map(|e| e.ok())
372 .filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
373 .collect();
374
375 if runs.is_empty() {
376 return Ok(vec![]);
377 }
378
379 runs.sort_by_key(|e| e.file_name());
381
382 let mut baselines = Vec::new();
384 for entry in runs.iter().rev() {
385 if baselines.len() >= count {
387 break;
388 }
389
390 let contents = fs::read_to_string(entry.path())?;
391 if let Ok(baseline) = serde_json::from_str::<BaselineData>(&contents) {
392 if !baseline.was_regression {
394 baselines.push(baseline);
395 }
396 }
397 }
398
399 baselines.reverse();
401
402 Ok(baselines)
403 }
404}
405
406impl Default for BaselineManager {
407 fn default() -> Self {
408 Self::new().expect("Failed to get primary MAC address")
409 }
410}
411
412#[derive(Debug, Clone)]
414pub struct ComparisonResult {
415 pub benchmark_name: String,
416 pub comparison: Option<crate::Comparison>,
417 pub is_regression: bool,
418}
419
420pub fn detect_regression_with_cpd(
429 current: &crate::BenchResult,
430 historical: &[BaselineData],
431 threshold: f64,
432 confidence_level: f64,
433 cp_threshold: f64,
434 hazard_rate: f64,
435) -> ComparisonResult {
436 if historical.is_empty() {
437 return ComparisonResult {
438 benchmark_name: current.name.clone(),
439 comparison: None,
440 is_regression: false,
441 };
442 }
443
444 let historical_means: Vec<f64> = historical
446 .iter()
447 .map(|b| b.statistics.mean as f64)
448 .collect();
449
450 let current_mean = current.percentiles.mean.as_nanos() as f64;
451
452 let hist_mean = crate::statistics::mean(&historical_means);
454 let hist_stddev = crate::statistics::standard_deviation(&historical_means);
455
456 let z_score_value = crate::statistics::z_score(current_mean, hist_mean, hist_stddev);
458
459 let z_critical = if (confidence_level - 0.90).abs() < 0.01 {
461 1.282 } else if (confidence_level - 0.95).abs() < 0.01 {
463 1.645 } else if (confidence_level - 0.99).abs() < 0.01 {
465 2.326 } else {
467 1.96 };
469
470 let upper_bound = hist_mean + (z_critical * hist_stddev);
471 let lower_bound = hist_mean - (z_critical * hist_stddev);
472
473 let statistically_significant = current_mean > upper_bound;
475
476 let change_probability = crate::changepoint::bayesian_change_point_probability(
478 current_mean,
479 &historical_means,
480 hazard_rate,
481 );
482
483 let percentage_change = ((current_mean - hist_mean) / hist_mean) * 100.0;
485 let practically_significant = percentage_change > threshold;
486
487 let is_regression = if z_score_value.abs() > 5.0 {
501 statistically_significant && practically_significant
503 } else if z_score_value.abs() > 2.0 {
504 statistically_significant && practically_significant && change_probability > cp_threshold
506 } else {
507 false
509 };
510
511 ComparisonResult {
512 benchmark_name: current.name.clone(),
513 comparison: Some(crate::Comparison {
514 current_mean: current.percentiles.mean,
515 baseline_mean: Duration::from_nanos(hist_mean as u64),
516 percentage_change,
517 baseline_count: historical.len(),
518 z_score: Some(z_score_value),
519 confidence_interval: Some((lower_bound, upper_bound)),
520 change_probability: Some(change_probability),
521 }),
522 is_regression,
523 }
524}
525
526pub fn process_with_baselines(
534 results: &[crate::BenchResult],
535 config: &ComparisonConfig,
536) -> Result<Vec<ComparisonResult>, std::io::Error> {
537 let baseline_manager = BaselineManager::new()?;
538 let mut comparisons = Vec::new();
539
540 for result in results {
541 let crate_name = result.module.split("::").next().unwrap_or("unknown");
543
544 let historical =
546 baseline_manager.load_recent_baselines(crate_name, &result.name, config.window_size)?;
547
548 let comparison_result = if !historical.is_empty() {
549 detect_regression_with_cpd(
551 result,
552 &historical,
553 config.threshold,
554 config.confidence_level,
555 config.cp_threshold,
556 config.hazard_rate,
557 )
558 } else {
559 ComparisonResult {
561 benchmark_name: result.name.clone(),
562 comparison: None,
563 is_regression: false,
564 }
565 };
566
567 let is_regression = comparison_result.is_regression;
568 comparisons.push(comparison_result);
569
570 baseline_manager.save_baseline(crate_name, result, is_regression)?;
572 }
573
574 Ok(comparisons)
575}
576
577pub fn check_regressions_and_exit(comparisons: &[ComparisonResult], config: &ComparisonConfig) {
579 if !config.ci_mode {
580 return;
581 }
582
583 let has_regression = comparisons.iter().any(|c| c.is_regression);
584
585 if has_regression {
586 use colored::Colorize;
587 eprintln!();
588 eprintln!(
589 "{}",
590 format!(
591 "FAILED: Performance regression detected (threshold: {}%)",
592 config.threshold
593 )
594 .red()
595 .bold()
596 );
597 std::process::exit(1);
598 }
599}
600
601#[cfg(test)]
602mod tests {
603 use super::*;
604 use std::time::Duration;
605 use tempfile::TempDir;
606
607 fn create_test_result(name: &str) -> BenchResult {
608 BenchResult {
609 name: name.to_string(),
610 module: "test_module".to_string(),
611 samples: 10,
612 percentiles: Percentiles {
613 p50: Duration::from_millis(5),
614 p90: Duration::from_millis(10),
615 p99: Duration::from_millis(15),
616 mean: Duration::from_millis(8),
617 },
618 all_timings: vec![Duration::from_millis(5); 10],
619 cpu_samples: vec![],
620 ..Default::default()
621 }
622 }
623
624 #[test]
625 fn test_baseline_data_conversion() {
626 let result = create_test_result("test_bench");
627 let machine_id = "0123456789abcdef".to_string(); let baseline = BaselineData::from_bench_result(&result, machine_id.clone(), false);
630
631 assert_eq!(baseline.benchmark_name, "test_bench");
632 assert_eq!(baseline.module, "test_module");
633 assert_eq!(baseline.machine_id, machine_id);
634 assert_eq!(baseline.statistics.sample_count, 10);
635 assert_eq!(baseline.samples.len(), 10);
636
637 let converted = baseline.to_bench_result();
638 assert_eq!(converted.name, result.name);
639 assert_eq!(converted.module, result.module);
640 assert_eq!(converted.percentiles.p90, result.percentiles.p90);
641 }
642
643 #[test]
644 fn test_save_and_load_baseline() {
645 let temp_dir = TempDir::new().unwrap();
646 let manager = BaselineManager::with_root_dir(temp_dir.path()).unwrap();
647
648 let result = create_test_result("test_bench");
649
650 manager.save_baseline("my_crate", &result, false).unwrap();
652
653 let loaded = manager.load_baseline("my_crate", "test_bench").unwrap();
655 assert!(loaded.is_some());
656
657 let baseline = loaded.unwrap();
658 assert_eq!(baseline.benchmark_name, "test_bench");
659 assert_eq!(baseline.module, "test_module");
660 assert!(baseline.percentiles.is_some());
661 assert_eq!(baseline.percentiles.unwrap().p90, Duration::from_millis(10));
662 }
663
664 #[test]
665 fn test_load_nonexistent_baseline() {
666 let temp_dir = TempDir::new().unwrap();
667 let manager = BaselineManager::with_root_dir(temp_dir.path()).unwrap();
668
669 let loaded = manager.load_baseline("my_crate", "nonexistent").unwrap();
670 assert!(loaded.is_none());
671 }
672
673 #[test]
674 fn test_has_baseline() {
675 let temp_dir = TempDir::new().unwrap();
676 let manager = BaselineManager::with_root_dir(temp_dir.path()).unwrap();
677
678 let result = create_test_result("test_bench");
679
680 assert!(!manager.has_baseline("my_crate", "test_bench"));
681
682 manager.save_baseline("my_crate", &result, false).unwrap();
683
684 assert!(manager.has_baseline("my_crate", "test_bench"));
685 }
686
687 #[test]
688 fn test_list_baselines() {
689 let temp_dir = TempDir::new().unwrap();
690 let manager = BaselineManager::with_root_dir(temp_dir.path()).unwrap();
691
692 let result1 = create_test_result("bench1");
693 let result2 = create_test_result("bench2");
694
695 manager.save_baseline("my_crate", &result1, false).unwrap();
696 manager.save_baseline("my_crate", &result2, false).unwrap();
697
698 let mut baselines = manager.list_baselines("my_crate").unwrap();
699 baselines.sort();
700
701 assert_eq!(baselines, vec!["bench1", "bench2"]);
702 }
703
704 #[test]
705 fn test_get_primary_mac_address() {
706 let result = get_primary_mac_address();
708
709 assert!(result.is_ok(), "Failed to get machine ID: {:?}", result);
711
712 let machine_id = result.unwrap();
713
714 assert_eq!(
716 machine_id.len(),
717 16,
718 "Machine ID should be 16 characters: {}",
719 machine_id
720 );
721
722 assert_eq!(
724 machine_id,
725 machine_id.to_lowercase(),
726 "Machine ID should be lowercase"
727 );
728 assert!(
729 machine_id.chars().all(|c| c.is_ascii_hexdigit()),
730 "Machine ID should contain only hex digits"
731 );
732 }
733
734 #[test]
735 fn test_mac_address_format() {
736 let manager_result = BaselineManager::new();
738 assert!(
739 manager_result.is_ok(),
740 "Failed to create BaselineManager: {:?}",
741 manager_result
742 );
743
744 let manager = manager_result.unwrap();
745
746 assert_eq!(
748 manager.machine_id.len(),
749 16,
750 "Machine ID should be 16 characters"
751 );
752 assert_eq!(manager.machine_id, manager.machine_id.to_lowercase());
753 assert!(manager.machine_id.chars().all(|c| c.is_ascii_hexdigit()));
754 }
755}