use super::types::{ModelPerformanceMetrics, PerformanceSummary};
#[derive(Debug)]
pub struct PerformanceAnalyzer {
performance_history: Vec<ModelPerformanceMetrics>,
max_history_length: usize,
thresholds: PerformanceThresholds,
}
#[derive(Debug, Clone)]
pub struct PerformanceThresholds {
pub max_memory_mb: f64,
pub min_throughput: f64,
pub max_loss_increase_percent: f64,
pub max_loss_variance: f64,
}
impl Default for PerformanceThresholds {
fn default() -> Self {
Self {
max_memory_mb: 8192.0, min_throughput: 100.0,
max_loss_increase_percent: 10.0,
max_loss_variance: 0.1,
}
}
}
impl PerformanceAnalyzer {
pub fn new() -> Self {
Self {
performance_history: Vec::new(),
max_history_length: 1000,
thresholds: PerformanceThresholds::default(),
}
}
pub fn with_thresholds(thresholds: PerformanceThresholds) -> Self {
Self {
performance_history: Vec::new(),
max_history_length: 1000,
thresholds,
}
}
pub fn set_max_history_length(&mut self, length: usize) {
self.max_history_length = length;
if self.performance_history.len() > length {
self.performance_history.drain(0..self.performance_history.len() - length);
}
}
pub fn record_performance(&mut self, metrics: ModelPerformanceMetrics) {
self.performance_history.push(metrics);
if self.performance_history.len() > self.max_history_length {
self.performance_history.remove(0);
}
}
pub fn record_metrics(&mut self, metrics: ModelPerformanceMetrics) {
self.record_performance(metrics);
}
pub fn get_performance_history(&self) -> &[ModelPerformanceMetrics] {
&self.performance_history
}
pub fn generate_performance_summary(&self) -> PerformanceSummary {
if self.performance_history.is_empty() {
return PerformanceSummary::default();
}
let total_steps = self.performance_history.len();
let current_metrics = self
.performance_history
.last()
.expect("performance_history is non-empty after is_empty check");
let losses: Vec<f64> = self.performance_history.iter().map(|m| m.loss).collect();
let throughputs: Vec<f64> =
self.performance_history.iter().map(|m| m.throughput_samples_per_sec).collect();
let memory_usages: Vec<f64> =
self.performance_history.iter().map(|m| m.memory_usage_mb).collect();
let best_loss = losses.iter().fold(f64::INFINITY, |acc, &x| acc.min(x));
let avg_loss = losses.iter().sum::<f64>() / losses.len() as f64;
let avg_throughput = throughputs.iter().sum::<f64>() / throughputs.len() as f64;
let peak_memory_mb = memory_usages.iter().fold(0.0f64, |acc, &x| acc.max(x));
let avg_memory_mb = memory_usages.iter().sum::<f64>() / memory_usages.len() as f64;
PerformanceSummary {
total_steps,
current_loss: current_metrics.loss,
best_loss,
avg_loss,
current_throughput: current_metrics.throughput_samples_per_sec,
avg_throughput,
peak_memory_mb,
avg_memory_mb,
}
}
pub fn analyze_performance_trends(&self) -> PerformanceTrends {
if self.performance_history.len() < 10 {
return PerformanceTrends::default();
}
let losses: Vec<f64> = self.performance_history.iter().map(|m| m.loss).collect();
let throughputs: Vec<f64> =
self.performance_history.iter().map(|m| m.throughput_samples_per_sec).collect();
let memory_usages: Vec<f64> =
self.performance_history.iter().map(|m| m.memory_usage_mb).collect();
let loss_trend = self.compute_trend(&losses);
let throughput_trend = self.compute_trend(&throughputs);
let memory_trend = self.compute_trend(&memory_usages);
let loss_volatility = self.compute_volatility(&losses);
let throughput_volatility = self.compute_volatility(&throughputs);
PerformanceTrends {
loss_trend,
throughput_trend,
memory_trend,
loss_volatility,
throughput_volatility,
trend_confidence: self.compute_trend_confidence(&losses),
}
}
pub fn detect_performance_anomalies(&self) -> Vec<PerformanceAnomaly> {
let mut anomalies = Vec::new();
if self.performance_history.len() < 5 {
return anomalies;
}
if let Some(anomaly) = self.detect_memory_leak() {
anomalies.push(anomaly);
}
if let Some(anomaly) = self.detect_performance_degradation() {
anomalies.push(anomaly);
}
if let Some(anomaly) = self.detect_training_instability() {
anomalies.push(anomaly);
}
if let Some(anomaly) = self.detect_throughput_drops() {
anomalies.push(anomaly);
}
anomalies
}
pub fn generate_optimization_recommendations(&self) -> Vec<OptimizationRecommendation> {
let mut recommendations = Vec::new();
let summary = self.generate_performance_summary();
if summary.peak_memory_mb > self.thresholds.max_memory_mb {
recommendations.push(OptimizationRecommendation {
category: "Memory".to_string(),
priority: PerformanceRecommendationPriority::High,
description: "High memory usage detected".to_string(),
suggestion: "Consider reducing batch size or using gradient checkpointing"
.to_string(),
expected_improvement: 0.3,
});
}
if summary.avg_throughput < self.thresholds.min_throughput {
recommendations.push(OptimizationRecommendation {
category: "Throughput".to_string(),
priority: PerformanceRecommendationPriority::Medium,
description: "Low throughput detected".to_string(),
suggestion: "Consider increasing batch size or optimizing data loading".to_string(),
expected_improvement: 0.4,
});
}
let trends = self.analyze_performance_trends();
if trends.loss_trend > 0.01 {
recommendations.push(OptimizationRecommendation {
category: "Training".to_string(),
priority: PerformanceRecommendationPriority::High,
description: "Loss is increasing".to_string(),
suggestion: "Consider reducing learning rate or adding regularization".to_string(),
expected_improvement: 0.5,
});
}
recommendations
}
fn compute_trend(&self, values: &[f64]) -> f64 {
if values.len() < 2 {
return 0.0;
}
let n = values.len() as f64;
let x_mean = (n - 1.0) / 2.0;
let y_mean = values.iter().sum::<f64>() / n;
let mut numerator = 0.0;
let mut denominator = 0.0;
for (i, &y) in values.iter().enumerate() {
let x = i as f64;
numerator += (x - x_mean) * (y - y_mean);
denominator += (x - x_mean).powi(2);
}
if denominator == 0.0 {
0.0
} else {
numerator / denominator
}
}
fn compute_volatility(&self, values: &[f64]) -> f64 {
if values.len() < 2 {
return 0.0;
}
let mean = values.iter().sum::<f64>() / values.len() as f64;
let variance =
values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / (values.len() - 1) as f64;
let std_dev = variance.sqrt();
if mean == 0.0 {
0.0
} else {
std_dev / mean.abs()
}
}
fn compute_trend_confidence(&self, values: &[f64]) -> f64 {
if values.len() < 10 {
return 0.0;
}
let trend = self.compute_trend(values);
let volatility = self.compute_volatility(values);
let trend_strength = trend.abs();
let confidence = trend_strength / (1.0 + volatility);
confidence.min(1.0)
}
fn detect_memory_leak(&self) -> Option<PerformanceAnomaly> {
if self.performance_history.len() < 10 {
return None;
}
let recent_metrics = &self.performance_history[self.performance_history.len() - 10..];
let memory_usages: Vec<f64> = recent_metrics.iter().map(|m| m.memory_usage_mb).collect();
let memory_trend = self.compute_trend(&memory_usages);
if memory_trend > 10.0 {
Some(PerformanceAnomaly {
anomaly_type: AnomalyType::MemoryLeak,
severity: AnomalySeverity::High,
description: format!("Memory usage increasing at {:.1} MB/step", memory_trend),
detected_at_step: self
.performance_history
.last()
.expect("performance_history is non-empty after is_empty check")
.training_step,
confidence: 0.8,
})
} else {
None
}
}
fn detect_performance_degradation(&self) -> Option<PerformanceAnomaly> {
if self.performance_history.len() < 20 {
return None;
}
let recent_metrics = &self.performance_history[self.performance_history.len() - 10..];
let previous_metrics = &self.performance_history
[self.performance_history.len() - 20..self.performance_history.len() - 10];
let recent_avg_loss: f64 =
recent_metrics.iter().map(|m| m.loss).sum::<f64>() / recent_metrics.len() as f64;
let previous_avg_loss: f64 =
previous_metrics.iter().map(|m| m.loss).sum::<f64>() / previous_metrics.len() as f64;
let degradation_percent =
((recent_avg_loss - previous_avg_loss) / previous_avg_loss) * 100.0;
if degradation_percent > self.thresholds.max_loss_increase_percent {
Some(PerformanceAnomaly {
anomaly_type: AnomalyType::PerformanceDegradation,
severity: AnomalySeverity::High,
description: format!("Performance degraded by {:.1}%", degradation_percent),
detected_at_step: self
.performance_history
.last()
.expect("performance_history is non-empty after is_empty check")
.training_step,
confidence: 0.9,
})
} else {
None
}
}
fn detect_training_instability(&self) -> Option<PerformanceAnomaly> {
if self.performance_history.len() < 10 {
return None;
}
let recent_metrics = &self.performance_history[self.performance_history.len() - 10..];
let losses: Vec<f64> = recent_metrics.iter().map(|m| m.loss).collect();
let volatility = self.compute_volatility(&losses);
if volatility > self.thresholds.max_loss_variance {
Some(PerformanceAnomaly {
anomaly_type: AnomalyType::TrainingInstability,
severity: AnomalySeverity::Medium,
description: format!("High loss volatility: {:.3}", volatility),
detected_at_step: self
.performance_history
.last()
.expect("performance_history is non-empty after is_empty check")
.training_step,
confidence: 0.7,
})
} else {
None
}
}
fn detect_throughput_drops(&self) -> Option<PerformanceAnomaly> {
if self.performance_history.len() < 10 {
return None;
}
let recent_metrics = &self.performance_history[self.performance_history.len() - 5..];
let avg_recent_throughput: f64 =
recent_metrics.iter().map(|m| m.throughput_samples_per_sec).sum::<f64>()
/ recent_metrics.len() as f64;
if avg_recent_throughput < self.thresholds.min_throughput {
Some(PerformanceAnomaly {
anomaly_type: AnomalyType::ThroughputDrop,
severity: AnomalySeverity::Medium,
description: format!("Low throughput: {:.1} samples/sec", avg_recent_throughput),
detected_at_step: self
.performance_history
.last()
.expect("performance_history is non-empty after is_empty check")
.training_step,
confidence: 0.8,
})
} else {
None
}
}
pub fn clear(&mut self) {
self.performance_history.clear();
}
}
impl Default for PerformanceAnalyzer {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct PerformanceTrends {
pub loss_trend: f64,
pub throughput_trend: f64,
pub memory_trend: f64,
pub loss_volatility: f64,
pub throughput_volatility: f64,
pub trend_confidence: f64,
}
impl Default for PerformanceTrends {
fn default() -> Self {
Self {
loss_trend: 0.0,
throughput_trend: 0.0,
memory_trend: 0.0,
loss_volatility: 0.0,
throughput_volatility: 0.0,
trend_confidence: 0.0,
}
}
}
#[derive(Debug, Clone)]
pub struct PerformanceAnomaly {
pub anomaly_type: AnomalyType,
pub severity: AnomalySeverity,
pub description: String,
pub detected_at_step: usize,
pub confidence: f64,
}
#[derive(Debug, Clone)]
pub enum AnomalyType {
MemoryLeak,
PerformanceDegradation,
TrainingInstability,
ThroughputDrop,
}
#[derive(Debug, Clone)]
pub enum AnomalySeverity {
Low,
Medium,
High,
Critical,
}
#[derive(Debug, Clone)]
pub struct OptimizationRecommendation {
pub category: String,
pub priority: PerformanceRecommendationPriority,
pub description: String,
pub suggestion: String,
pub expected_improvement: f64,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub enum PerformanceRecommendationPriority {
Low,
Medium,
High,
Critical,
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
fn create_test_metrics(
step: usize,
loss: f64,
memory: f64,
throughput: f64,
) -> ModelPerformanceMetrics {
ModelPerformanceMetrics {
training_step: step,
loss,
accuracy: Some(0.8),
learning_rate: 0.001,
batch_size: 32,
throughput_samples_per_sec: throughput,
memory_usage_mb: memory,
gpu_utilization: Some(0.9),
timestamp: Utc::now(),
}
}
#[test]
fn test_performance_analyzer_creation() {
let analyzer = PerformanceAnalyzer::new();
assert_eq!(analyzer.performance_history.len(), 0);
assert_eq!(analyzer.max_history_length, 1000);
}
#[test]
fn test_record_performance() {
let mut analyzer = PerformanceAnalyzer::new();
let metrics = create_test_metrics(1, 0.5, 1000.0, 100.0);
analyzer.record_performance(metrics);
assert_eq!(analyzer.performance_history.len(), 1);
}
#[test]
fn test_performance_summary() {
let mut analyzer = PerformanceAnalyzer::new();
for i in 1..=5 {
let metrics = create_test_metrics(i, 1.0 / i as f64, 1000.0, 100.0);
analyzer.record_performance(metrics);
}
let summary = analyzer.generate_performance_summary();
assert_eq!(summary.total_steps, 5);
assert!(summary.best_loss < summary.avg_loss);
}
#[test]
fn test_trend_computation() {
let analyzer = PerformanceAnalyzer::new();
let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let trend = analyzer.compute_trend(&values);
assert!(trend > 0.0); }
#[test]
fn test_memory_leak_detection() {
let mut analyzer = PerformanceAnalyzer::new();
for i in 1..=15 {
let metrics = create_test_metrics(i, 0.5, 1000.0 + (i as f64 * 50.0), 100.0);
analyzer.record_performance(metrics);
}
let anomalies = analyzer.detect_performance_anomalies();
assert!(!anomalies.is_empty());
assert!(matches!(anomalies[0].anomaly_type, AnomalyType::MemoryLeak));
}
}