use crate::{ProfileEvent, TorshResult};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use torsh_core::TorshError;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RegressionConfig {
pub min_baseline_samples: usize,
pub max_baseline_age_days: u32,
pub regression_threshold_percent: f64,
pub improvement_threshold_percent: f64,
pub significance_level: f64,
pub adaptive_thresholds: bool,
pub rolling_window_size: usize,
pub outlier_detection: bool,
}
impl Default for RegressionConfig {
fn default() -> Self {
Self {
min_baseline_samples: 10,
max_baseline_age_days: 30,
regression_threshold_percent: 5.0, improvement_threshold_percent: 5.0, significance_level: 0.05, adaptive_thresholds: true,
rolling_window_size: 100,
outlier_detection: true,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceBaseline {
pub metric_name: String,
pub category: String,
pub samples: Vec<f64>,
pub mean: f64,
pub std_dev: f64,
pub median: f64,
pub percentile_95: f64,
pub percentile_99: f64,
pub last_updated: chrono::DateTime<chrono::Utc>,
pub sample_count: usize,
}
impl PerformanceBaseline {
pub fn new(metric_name: String, category: String, samples: Vec<f64>) -> Self {
let mean = samples.iter().sum::<f64>() / samples.len() as f64;
let variance =
samples.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / samples.len() as f64;
let std_dev = variance.sqrt();
let mut sorted_samples = samples.clone();
sorted_samples.sort_by(|a, b| {
a.partial_cmp(b)
.expect("samples should be comparable (no NaN values)")
});
let median = if sorted_samples.len() % 2 == 0 {
(sorted_samples[sorted_samples.len() / 2 - 1]
+ sorted_samples[sorted_samples.len() / 2])
/ 2.0
} else {
sorted_samples[sorted_samples.len() / 2]
};
let percentile_95 = sorted_samples[(sorted_samples.len() as f64 * 0.95) as usize - 1];
let percentile_99 = sorted_samples[(sorted_samples.len() as f64 * 0.99) as usize - 1];
Self {
metric_name,
category,
samples,
mean,
std_dev,
median,
percentile_95,
percentile_99,
last_updated: chrono::Utc::now(),
sample_count: sorted_samples.len(),
}
}
pub fn update(&mut self, new_samples: Vec<f64>, rolling_window_size: usize) {
self.samples.extend(new_samples);
if self.samples.len() > rolling_window_size {
let excess = self.samples.len() - rolling_window_size;
self.samples.drain(0..excess);
}
let mean = self.samples.iter().sum::<f64>() / self.samples.len() as f64;
let variance = self.samples.iter().map(|x| (x - mean).powi(2)).sum::<f64>()
/ self.samples.len() as f64;
let std_dev = variance.sqrt();
let mut sorted_samples = self.samples.clone();
sorted_samples.sort_by(|a, b| {
a.partial_cmp(b)
.expect("samples should be comparable (no NaN values)")
});
let median = if sorted_samples.len() % 2 == 0 {
(sorted_samples[sorted_samples.len() / 2 - 1]
+ sorted_samples[sorted_samples.len() / 2])
/ 2.0
} else {
sorted_samples[sorted_samples.len() / 2]
};
let percentile_95 = sorted_samples[(sorted_samples.len() as f64 * 0.95) as usize - 1];
let percentile_99 = sorted_samples[(sorted_samples.len() as f64 * 0.99) as usize - 1];
self.mean = mean;
self.std_dev = std_dev;
self.median = median;
self.percentile_95 = percentile_95;
self.percentile_99 = percentile_99;
self.last_updated = chrono::Utc::now();
self.sample_count = self.samples.len();
}
pub fn is_stale(&self, max_age_days: u32) -> bool {
let age = chrono::Utc::now().signed_duration_since(self.last_updated);
age.num_days() > max_age_days as i64
}
pub fn remove_outliers(&mut self) {
if self.samples.len() < 4 {
return; }
let mut sorted = self.samples.clone();
sorted.sort_by(|a, b| {
a.partial_cmp(b)
.expect("samples should be comparable (no NaN values)")
});
let q1_idx = sorted.len() / 4;
let q3_idx = (3 * sorted.len()) / 4;
let q1 = sorted[q1_idx];
let q3 = sorted[q3_idx];
let iqr = q3 - q1;
let lower_bound = q1 - 1.5 * iqr;
let upper_bound = q3 + 1.5 * iqr;
self.samples
.retain(|&x| x >= lower_bound && x <= upper_bound);
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RegressionResult {
pub metric_name: String,
pub category: String,
pub baseline_mean: f64,
pub current_mean: f64,
pub change_percent: f64,
pub is_regression: bool,
pub is_improvement: bool,
pub is_significant: bool,
pub p_value: f64,
pub confidence_interval: (f64, f64),
pub severity: RegressionSeverity,
pub recommendation: String,
pub timestamp: chrono::DateTime<chrono::Utc>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum RegressionSeverity {
Critical, High, Medium, Low, None, Improvement, }
impl RegressionSeverity {
pub fn from_change_percent(change_percent: f64) -> Self {
if change_percent < -5.0 {
Self::Improvement
} else if change_percent < 5.0 {
Self::None
} else if change_percent < 10.0 {
Self::Low
} else if change_percent < 20.0 {
Self::Medium
} else if change_percent < 50.0 {
Self::High
} else {
Self::Critical
}
}
}
pub struct RegressionDetector {
config: RegressionConfig,
baselines: HashMap<String, PerformanceBaseline>,
}
impl RegressionDetector {
pub fn new(config: RegressionConfig) -> Self {
Self {
config,
baselines: HashMap::new(),
}
}
pub fn with_defaults() -> Self {
let config = RegressionConfig {
min_baseline_samples: 5,
max_baseline_age_days: 30,
regression_threshold_percent: 10.0,
improvement_threshold_percent: 5.0,
significance_level: 0.05,
adaptive_thresholds: true,
rolling_window_size: 100,
outlier_detection: true,
};
Self::new(config)
}
pub fn update_baseline(
&mut self,
metric_name: &str,
category: &str,
samples: Vec<f64>,
) -> TorshResult<()> {
let key = format!("{category}::{metric_name}");
match self.baselines.get_mut(&key) {
Some(baseline) => {
baseline.update(samples, self.config.rolling_window_size);
if self.config.outlier_detection {
baseline.remove_outliers();
}
}
None => {
let mut baseline = PerformanceBaseline::new(
metric_name.to_string(),
category.to_string(),
samples,
);
if self.config.outlier_detection {
baseline.remove_outliers();
}
self.baselines.insert(key, baseline);
}
}
Ok(())
}
pub fn load_baselines(&mut self, filename: &str) -> TorshResult<()> {
let data = std::fs::read_to_string(filename)
.map_err(|e| TorshError::InvalidArgument(format!("Failed to read file: {e}")))?;
let baselines: HashMap<String, PerformanceBaseline> = serde_json::from_str(&data)
.map_err(|e| TorshError::InvalidArgument(format!("Failed to parse baselines: {e}")))?;
self.baselines = baselines;
Ok(())
}
pub fn save_baselines(&self, filename: &str) -> TorshResult<()> {
let data = serde_json::to_string_pretty(&self.baselines).map_err(|e| {
TorshError::InvalidArgument(format!("Failed to serialize baselines: {e}"))
})?;
std::fs::write(filename, data)
.map_err(|e| TorshError::InvalidArgument(format!("Failed to write file: {e}")))?;
Ok(())
}
pub fn detect_regressions(
&self,
events: &[ProfileEvent],
) -> TorshResult<Vec<RegressionResult>> {
let mut results = Vec::new();
let mut metrics: HashMap<String, Vec<f64>> = HashMap::new();
for event in events {
let duration_key = format!("{}::duration_us", event.category);
metrics
.entry(duration_key)
.or_default()
.push(event.duration_us as f64);
if let Some(flops) = event.flops {
if flops > 0 {
let flops_key = format!("{}::flops", event.category);
metrics.entry(flops_key).or_default().push(flops as f64);
}
}
if let Some(bytes_transferred) = event.bytes_transferred {
if bytes_transferred > 0 {
let bytes_key = format!("{}::bytes_transferred", event.category);
metrics
.entry(bytes_key)
.or_default()
.push(bytes_transferred as f64);
}
}
}
for (metric_key, values) in metrics.iter() {
if let Some(baseline) = self.baselines.get(metric_key) {
if baseline.sample_count >= self.config.min_baseline_samples
&& !baseline.is_stale(self.config.max_baseline_age_days)
{
let result = self.analyze_metric(baseline, values)?;
results.push(result);
}
}
}
results.sort_by(|a, b| {
match (a.severity, b.severity) {
(RegressionSeverity::Critical, _) => std::cmp::Ordering::Less,
(_, RegressionSeverity::Critical) => std::cmp::Ordering::Greater,
(RegressionSeverity::High, RegressionSeverity::High) => b
.change_percent
.partial_cmp(&a.change_percent)
.expect("change_percent should be comparable (no NaN values)"),
(RegressionSeverity::High, _) => std::cmp::Ordering::Less,
(_, RegressionSeverity::High) => std::cmp::Ordering::Greater,
_ => b
.change_percent
.partial_cmp(&a.change_percent)
.expect("change_percent should be comparable (no NaN values)"),
}
});
Ok(results)
}
fn analyze_metric(
&self,
baseline: &PerformanceBaseline,
current_values: &[f64],
) -> TorshResult<RegressionResult> {
if current_values.is_empty() {
return Err(TorshError::InvalidArgument(
"No current values provided".to_string(),
));
}
let current_mean = current_values.iter().sum::<f64>() / current_values.len() as f64;
let change_percent = ((current_mean - baseline.mean) / baseline.mean) * 100.0;
let current_variance = current_values
.iter()
.map(|x| (x - current_mean).powi(2))
.sum::<f64>()
/ current_values.len() as f64;
let current_std_dev = current_variance.sqrt();
let (is_significant, p_value) = self.welch_t_test(
baseline.mean,
baseline.std_dev,
baseline.sample_count,
current_mean,
current_std_dev,
current_values.len(),
);
let threshold = if self.config.adaptive_thresholds {
let cv = baseline.std_dev / baseline.mean; self.config.regression_threshold_percent * (1.0 + cv)
} else {
self.config.regression_threshold_percent
};
let is_regression = change_percent > threshold && is_significant;
let is_improvement =
change_percent < -self.config.improvement_threshold_percent && is_significant;
let severity = RegressionSeverity::from_change_percent(change_percent);
let confidence_interval = self.calculate_confidence_interval(
current_mean,
current_std_dev,
current_values.len(),
1.0 - self.config.significance_level,
);
let recommendation =
self.generate_recommendation(baseline, current_mean, change_percent, severity);
Ok(RegressionResult {
metric_name: baseline.metric_name.clone(),
category: baseline.category.clone(),
baseline_mean: baseline.mean,
current_mean,
change_percent,
is_regression,
is_improvement,
is_significant,
p_value,
confidence_interval,
severity,
recommendation,
timestamp: chrono::Utc::now(),
})
}
fn welch_t_test(
&self,
mean1: f64,
std1: f64,
n1: usize,
mean2: f64,
std2: f64,
n2: usize,
) -> (bool, f64) {
if n1 < 2 || n2 < 2 {
return (false, 1.0);
}
let var1 = std1.powi(2);
let var2 = std2.powi(2);
let se = ((var1 / n1 as f64) + (var2 / n2 as f64)).sqrt();
if se == 0.0 {
return (false, 1.0);
}
let t_stat = (mean1 - mean2).abs() / se;
let df_num = (var1 / n1 as f64 + var2 / n2 as f64).powi(2);
let df_denom = (var1 / n1 as f64).powi(2) / (n1 - 1) as f64
+ (var2 / n2 as f64).powi(2) / (n2 - 1) as f64;
let _df = df_num / df_denom;
let p_value = if t_stat > 2.0 {
0.01 } else if t_stat > 1.96 {
0.05 } else if t_stat > 1.0 {
0.15 } else {
0.5 };
let is_significant = p_value < self.config.significance_level;
(is_significant, p_value)
}
fn calculate_confidence_interval(
&self,
mean: f64,
std_dev: f64,
n: usize,
confidence: f64,
) -> (f64, f64) {
if n < 2 {
return (mean, mean);
}
let t_critical = if confidence >= 0.99 {
2.576
} else if confidence >= 0.95 {
1.96
} else {
1.645
};
let margin_of_error = t_critical * (std_dev / (n as f64).sqrt());
(mean - margin_of_error, mean + margin_of_error)
}
fn generate_recommendation(
&self,
baseline: &PerformanceBaseline,
current_mean: f64,
change_percent: f64,
severity: RegressionSeverity,
) -> String {
match severity {
RegressionSeverity::Critical => {
format!(
"CRITICAL REGRESSION: {} performance degraded by {:.1}% (from {:.2} to {:.2}). Immediate investigation required. Consider reverting recent changes.",
baseline.metric_name, change_percent, baseline.mean, current_mean
)
}
RegressionSeverity::High => {
format!(
"HIGH REGRESSION: {} performance degraded by {:.1}% (from {:.2} to {:.2}). Review recent optimizations and profile hotspots.",
baseline.metric_name, change_percent, baseline.mean, current_mean
)
}
RegressionSeverity::Medium => {
format!(
"MEDIUM REGRESSION: {} performance degraded by {:.1}% (from {:.2} to {:.2}). Monitor trend and consider optimization.",
baseline.metric_name, change_percent, baseline.mean, current_mean
)
}
RegressionSeverity::Low => {
format!(
"MINOR REGRESSION: {} performance degraded by {:.1}% (from {:.2} to {:.2}). Monitor for trend continuation.",
baseline.metric_name, change_percent, baseline.mean, current_mean
)
}
RegressionSeverity::Improvement => {
format!(
"IMPROVEMENT: {} performance improved by {:.1}% (from {:.2} to {:.2}). Excellent work!",
baseline.metric_name, change_percent.abs(), baseline.mean, current_mean
)
}
RegressionSeverity::None => {
format!(
"NO CHANGE: {} performance is stable (change: {:.1}%). Continue monitoring.",
baseline.metric_name, change_percent
)
}
}
}
pub fn get_baseline_summary(&self) -> Vec<&PerformanceBaseline> {
self.baselines.values().collect()
}
pub fn cleanup_stale_baselines(&mut self) -> usize {
let initial_count = self.baselines.len();
self.baselines
.retain(|_, baseline| !baseline.is_stale(self.config.max_baseline_age_days));
initial_count - self.baselines.len()
}
}
pub fn create_regression_detector() -> RegressionDetector {
RegressionDetector::new(RegressionConfig::default())
}
pub fn create_regression_detector_with_config(config: RegressionConfig) -> RegressionDetector {
RegressionDetector::new(config)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ProfileEvent;
#[test]
fn test_baseline_creation() {
let samples = vec![100.0, 105.0, 95.0, 110.0, 98.0];
let baseline = PerformanceBaseline::new(
"test_metric".to_string(),
"test_category".to_string(),
samples,
);
assert_eq!(baseline.metric_name, "test_metric");
assert_eq!(baseline.sample_count, 5);
assert!((baseline.mean - 101.6).abs() < 0.1);
}
#[test]
fn test_baseline_update() {
let mut baseline = PerformanceBaseline::new(
"test_metric".to_string(),
"test_category".to_string(),
vec![100.0, 105.0, 95.0],
);
baseline.update(vec![110.0, 98.0], 10);
assert_eq!(baseline.sample_count, 5);
}
#[test]
fn test_outlier_removal() {
let mut baseline = PerformanceBaseline::new(
"test_metric".to_string(),
"test_category".to_string(),
vec![100.0, 102.0, 98.0, 101.0, 1000.0], );
baseline.remove_outliers();
assert!(baseline.samples.len() < 5); assert!(!baseline.samples.contains(&1000.0));
}
#[test]
fn test_regression_detection() {
let config = RegressionConfig {
min_baseline_samples: 3,
regression_threshold_percent: 10.0,
..Default::default()
};
let mut detector = RegressionDetector::new(config);
detector
.update_baseline("duration_us", "test_category", vec![100.0, 105.0, 95.0])
.unwrap();
let events = vec![
ProfileEvent {
name: "test_event".to_string(),
category: "test_category".to_string(),
start_us: 0,
duration_us: 120, thread_id: 1,
operation_count: Some(1),
flops: Some(0),
bytes_transferred: Some(0),
stack_trace: Some("test trace".to_string()),
},
ProfileEvent {
name: "test_event".to_string(),
category: "test_category".to_string(),
start_us: 0,
duration_us: 125, thread_id: 1,
operation_count: Some(1),
flops: Some(0),
bytes_transferred: Some(0),
stack_trace: Some("test trace".to_string()),
},
];
let results = detector.detect_regressions(&events).unwrap();
assert_eq!(results.len(), 1);
assert!(results[0].is_regression);
assert!(results[0].change_percent > 10.0);
}
#[test]
fn test_improvement_detection() {
let config = RegressionConfig {
min_baseline_samples: 3,
improvement_threshold_percent: 5.0,
..Default::default()
};
let mut detector = RegressionDetector::new(config);
detector
.update_baseline("duration_us", "test_category", vec![100.0, 105.0, 95.0])
.unwrap();
let events = vec![
ProfileEvent {
name: "test_event".to_string(),
category: "test_category".to_string(),
start_us: 0,
duration_us: 85, thread_id: 1,
operation_count: Some(1),
flops: Some(0),
bytes_transferred: Some(0),
stack_trace: Some("test trace".to_string()),
},
ProfileEvent {
name: "test_event".to_string(),
category: "test_category".to_string(),
start_us: 0,
duration_us: 80, thread_id: 1,
operation_count: Some(1),
flops: Some(0),
bytes_transferred: Some(0),
stack_trace: Some("test trace".to_string()),
},
];
let results = detector.detect_regressions(&events).unwrap();
assert_eq!(results.len(), 1);
assert!(results[0].is_improvement);
assert!(results[0].change_percent < -5.0);
}
#[test]
fn test_save_load_baselines() {
let mut detector = create_regression_detector();
detector
.update_baseline("test_metric", "test_category", vec![100.0, 105.0, 95.0])
.unwrap();
let temp_file = std::env::temp_dir().join("test_baselines.json");
let temp_str = temp_file.display().to_string();
detector.save_baselines(&temp_str).unwrap();
let mut new_detector = create_regression_detector();
new_detector.load_baselines(&temp_str).unwrap();
assert_eq!(new_detector.baselines.len(), 1);
assert!(new_detector
.baselines
.contains_key("test_category::test_metric"));
let _ = std::fs::remove_file(&temp_file);
}
#[test]
fn test_severity_classification() {
assert!(matches!(
RegressionSeverity::from_change_percent(25.0),
RegressionSeverity::High
));
assert!(matches!(
RegressionSeverity::from_change_percent(7.0),
RegressionSeverity::Low
));
assert!(matches!(
RegressionSeverity::from_change_percent(2.0),
RegressionSeverity::None
));
assert!(matches!(
RegressionSeverity::from_change_percent(-10.0),
RegressionSeverity::Improvement
));
assert!(matches!(
RegressionSeverity::from_change_percent(60.0),
RegressionSeverity::Critical
));
}
}