use crate::core::error::{Error, Result};
use crate::optimized::jit::cache::{CacheStats, FunctionId};
use crate::optimized::jit::config::{JITConfig, LoadBalancing, ParallelConfig, SIMDConfig};
use crate::{read_lock_safe, write_lock_safe};
use std::collections::{HashMap, VecDeque};
use std::sync::{Arc, RwLock};
use std::time::{Duration, Instant, SystemTime};
#[derive(Debug, Clone)]
pub struct FunctionPerformanceMetrics {
pub function_id: FunctionId,
pub execution_count: u64,
pub total_execution_time_ns: u64,
pub avg_execution_time_ns: f64,
pub min_execution_time_ns: u64,
pub max_execution_time_ns: u64,
pub std_dev_execution_time_ns: f64,
pub recent_execution_times: VecDeque<u64>,
pub avg_memory_usage_bytes: usize,
pub avg_cpu_utilization: f64,
pub cache_hit_rate: f64,
pub throughput_ops_per_sec: f64,
pub performance_trend: PerformanceTrend,
pub last_updated: Instant,
pub optimization_suggestions: Vec<OptimizationSuggestion>,
}
impl FunctionPerformanceMetrics {
pub fn new(function_id: FunctionId) -> Self {
Self {
function_id,
execution_count: 0,
total_execution_time_ns: 0,
avg_execution_time_ns: 0.0,
min_execution_time_ns: u64::MAX,
max_execution_time_ns: 0,
std_dev_execution_time_ns: 0.0,
recent_execution_times: VecDeque::with_capacity(100),
avg_memory_usage_bytes: 0,
avg_cpu_utilization: 0.0,
cache_hit_rate: 0.0,
throughput_ops_per_sec: 0.0,
performance_trend: PerformanceTrend::Stable,
last_updated: Instant::now(),
optimization_suggestions: Vec::new(),
}
}
pub fn record_execution(
&mut self,
execution_time_ns: u64,
memory_usage_bytes: usize,
cpu_utilization: f64,
) {
self.execution_count += 1;
self.total_execution_time_ns += execution_time_ns;
self.avg_execution_time_ns =
self.total_execution_time_ns as f64 / self.execution_count as f64;
self.min_execution_time_ns = self.min_execution_time_ns.min(execution_time_ns);
self.max_execution_time_ns = self.max_execution_time_ns.max(execution_time_ns);
self.recent_execution_times.push_back(execution_time_ns);
if self.recent_execution_times.len() > 100 {
self.recent_execution_times.pop_front();
}
self.avg_memory_usage_bytes =
((self.avg_memory_usage_bytes as u64 * (self.execution_count - 1))
/ self.execution_count
+ memory_usage_bytes as u64 / self.execution_count) as usize;
self.avg_cpu_utilization = (self.avg_cpu_utilization * (self.execution_count - 1) as f64
+ cpu_utilization)
/ self.execution_count as f64;
self.calculate_std_dev();
self.throughput_ops_per_sec = 1_000_000_000.0 / self.avg_execution_time_ns;
self.update_performance_trend();
self.update_optimization_suggestions();
self.last_updated = Instant::now();
}
fn calculate_std_dev(&mut self) {
if self.recent_execution_times.len() < 2 {
return;
}
let mean = self.recent_execution_times.iter().sum::<u64>() as f64
/ self.recent_execution_times.len() as f64;
let variance = self
.recent_execution_times
.iter()
.map(|&x| {
let diff = x as f64 - mean;
diff * diff
})
.sum::<f64>()
/ (self.recent_execution_times.len() - 1) as f64;
self.std_dev_execution_time_ns = variance.sqrt();
}
fn update_performance_trend(&mut self) {
if self.recent_execution_times.len() < 10 {
return;
}
let recent_count = self.recent_execution_times.len();
let split_point = recent_count / 2;
let first_half: f64 = self
.recent_execution_times
.iter()
.take(split_point)
.map(|&x| x as f64)
.sum::<f64>()
/ split_point as f64;
let second_half: f64 = self
.recent_execution_times
.iter()
.skip(split_point)
.map(|&x| x as f64)
.sum::<f64>()
/ (recent_count - split_point) as f64;
let improvement_ratio = first_half / second_half;
self.performance_trend = if improvement_ratio > 1.1 {
PerformanceTrend::Improving
} else if improvement_ratio < 0.9 {
PerformanceTrend::Degrading
} else {
PerformanceTrend::Stable
};
}
fn update_optimization_suggestions(&mut self) {
self.optimization_suggestions.clear();
if self.std_dev_execution_time_ns > self.avg_execution_time_ns * 0.2 {
self.optimization_suggestions.push(OptimizationSuggestion {
suggestion_type: OptimizationType::ReduceVariance,
description: "High execution time variance detected. Consider function specialization or better memory management.".to_string(),
priority: OptimizationPriority::Medium,
estimated_improvement: 0.15,
});
}
if self.avg_memory_usage_bytes > 1024 * 1024 {
self.optimization_suggestions.push(OptimizationSuggestion {
suggestion_type: OptimizationType::ReduceMemoryUsage,
description: "High memory usage detected. Consider memory pooling or more efficient data structures.".to_string(),
priority: OptimizationPriority::High,
estimated_improvement: 0.25,
});
}
if self.avg_cpu_utilization < 0.5 && self.execution_count > 50 {
self.optimization_suggestions.push(OptimizationSuggestion {
suggestion_type: OptimizationType::IncreaseCpuUtilization,
description:
"Low CPU utilization detected. Consider vectorization or parallelization."
.to_string(),
priority: OptimizationPriority::Medium,
estimated_improvement: 0.30,
});
}
if self.avg_execution_time_ns > 10_000_000.0 {
self.optimization_suggestions.push(OptimizationSuggestion {
suggestion_type: OptimizationType::AlgorithmicOptimization,
description:
"Slow execution detected. Consider algorithmic improvements or caching."
.to_string(),
priority: OptimizationPriority::High,
estimated_improvement: 0.40,
});
}
if matches!(self.performance_trend, PerformanceTrend::Degrading) {
self.optimization_suggestions.push(OptimizationSuggestion {
suggestion_type: OptimizationType::PerformanceRegression,
description: "Performance degradation detected. Investigate recent changes or memory fragmentation.".to_string(),
priority: OptimizationPriority::High,
estimated_improvement: 0.20,
});
}
}
pub fn get_performance_score(&self) -> f64 {
if self.execution_count == 0 {
return 0.0;
}
let throughput_score = (self.throughput_ops_per_sec / 1_000_000.0).min(1.0); let consistency_score =
1.0 - (self.std_dev_execution_time_ns / self.avg_execution_time_ns.max(1.0)).min(1.0);
let cpu_score = self.avg_cpu_utilization;
let trend_score = match self.performance_trend {
PerformanceTrend::Improving => 1.0,
PerformanceTrend::Stable => 0.8,
PerformanceTrend::Degrading => 0.4,
};
throughput_score * 0.4 + consistency_score * 0.3 + cpu_score * 0.2 + trend_score * 0.1
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PerformanceTrend {
Improving,
Stable,
Degrading,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OptimizationType {
ReduceVariance,
ReduceMemoryUsage,
IncreaseCpuUtilization,
AlgorithmicOptimization,
PerformanceRegression,
EnableSIMD,
EnableParallelization,
IncreaseCacheLocality,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum OptimizationPriority {
Low,
Medium,
High,
Critical,
}
#[derive(Debug, Clone)]
pub struct OptimizationSuggestion {
pub suggestion_type: OptimizationType,
pub description: String,
pub priority: OptimizationPriority,
pub estimated_improvement: f64,
}
#[derive(Debug, Clone)]
pub struct SystemPerformanceMetrics {
pub jit_utilization: f64,
pub memory_pressure: f64,
pub cpu_utilization: f64,
pub cache_effectiveness: f64,
pub active_functions: usize,
pub uptime: Duration,
pub total_compilations: u64,
pub failed_compilations: u64,
pub avg_compilation_time_ns: f64,
}
pub struct JitPerformanceMonitor {
function_metrics: RwLock<HashMap<FunctionId, FunctionPerformanceMetrics>>,
system_metrics: RwLock<SystemPerformanceMetrics>,
start_time: Instant,
config: Arc<RwLock<JITConfig>>,
performance_history: RwLock<VecDeque<(Instant, f64)>>,
}
impl JitPerformanceMonitor {
pub fn new(config: JITConfig) -> Self {
Self {
function_metrics: RwLock::new(HashMap::new()),
system_metrics: RwLock::new(SystemPerformanceMetrics {
jit_utilization: 0.0,
memory_pressure: 0.0,
cpu_utilization: 0.0,
cache_effectiveness: 0.0,
active_functions: 0,
uptime: Duration::new(0, 0),
total_compilations: 0,
failed_compilations: 0,
avg_compilation_time_ns: 0.0,
}),
start_time: Instant::now(),
config: Arc::new(RwLock::new(config)),
performance_history: RwLock::new(VecDeque::with_capacity(1000)),
}
}
pub fn record_function_execution(
&self,
function_id: &FunctionId,
execution_time_ns: u64,
memory_usage_bytes: usize,
cpu_utilization: f64,
) -> Result<()> {
{
let mut metrics = write_lock_safe!(
self.function_metrics,
"performance monitor function metrics write"
)?;
let function_metrics = metrics
.entry(function_id.clone())
.or_insert_with(|| FunctionPerformanceMetrics::new(function_id.clone()));
function_metrics.record_execution(
execution_time_ns,
memory_usage_bytes,
cpu_utilization,
);
}
self.update_system_metrics()?;
Ok(())
}
pub fn record_compilation(
&self,
function_id: &FunctionId,
compilation_time_ns: u64,
success: bool,
) -> Result<()> {
let mut system_metrics = write_lock_safe!(
self.system_metrics,
"performance monitor system metrics write"
)?;
system_metrics.total_compilations += 1;
if !success {
system_metrics.failed_compilations += 1;
}
let total_time =
system_metrics.avg_compilation_time_ns * (system_metrics.total_compilations - 1) as f64;
system_metrics.avg_compilation_time_ns =
(total_time + compilation_time_ns as f64) / system_metrics.total_compilations as f64;
Ok(())
}
pub fn get_function_metrics(
&self,
function_id: &FunctionId,
) -> Option<FunctionPerformanceMetrics> {
read_lock_safe!(
self.function_metrics,
"performance monitor function metrics read"
)
.ok()?
.get(function_id)
.cloned()
}
pub fn get_system_metrics(&self) -> Result<SystemPerformanceMetrics> {
let mut metrics = read_lock_safe!(
self.system_metrics,
"performance monitor system metrics read"
)?
.clone();
metrics.uptime = self.start_time.elapsed();
Ok(metrics)
}
pub fn get_top_performing_functions(
&self,
count: usize,
) -> Result<Vec<FunctionPerformanceMetrics>> {
let metrics = read_lock_safe!(
self.function_metrics,
"performance monitor function metrics read"
)?;
let mut functions: Vec<_> = metrics.values().cloned().collect();
functions.sort_by(|a, b| {
b.get_performance_score()
.partial_cmp(&a.get_performance_score())
.unwrap_or(std::cmp::Ordering::Equal)
});
Ok(functions.into_iter().take(count).collect())
}
pub fn get_functions_needing_optimization(
&self,
) -> Result<Vec<(FunctionId, Vec<OptimizationSuggestion>)>> {
let metrics = read_lock_safe!(
self.function_metrics,
"performance monitor function metrics read"
)?;
Ok(metrics
.iter()
.filter_map(|(id, metrics)| {
if !metrics.optimization_suggestions.is_empty() {
Some((id.clone(), metrics.optimization_suggestions.clone()))
} else {
None
}
})
.collect())
}
pub fn suggest_config_optimizations(&self) -> Result<Vec<ConfigOptimization>> {
let system_metrics = self.get_system_metrics()?;
let mut suggestions = Vec::new();
if system_metrics.cpu_utilization < 0.5 {
suggestions.push(ConfigOptimization {
config_type: ConfigType::Parallel,
description: "Low CPU utilization detected. Consider enabling more aggressive parallelization.".to_string(),
recommended_change: "Reduce min_chunk_size and enable dynamic load balancing".to_string(),
estimated_improvement: 0.25,
});
}
if system_metrics.jit_utilization > 0.7 {
suggestions.push(ConfigOptimization {
config_type: ConfigType::SIMD,
description: "High JIT utilization detected. SIMD operations could provide significant speedup.".to_string(),
recommended_change: "Enable SIMD with lower min_simd_size threshold".to_string(),
estimated_improvement: 0.30,
});
}
if system_metrics.cache_effectiveness < 0.6 {
suggestions.push(ConfigOptimization {
config_type: ConfigType::Cache,
description: "Low cache effectiveness detected. Consider increasing cache size or improving eviction policy.".to_string(),
recommended_change: "Increase cache size and enable better caching heuristics".to_string(),
estimated_improvement: 0.20,
});
}
Ok(suggestions)
}
pub fn apply_automatic_optimizations(&self) -> Result<Vec<String>> {
let suggestions = self.suggest_config_optimizations()?;
let mut applied_optimizations = Vec::new();
let mut config = write_lock_safe!(self.config, "performance monitor config write")?;
for suggestion in suggestions {
match suggestion.config_type {
ConfigType::Parallel => {
if suggestion.estimated_improvement > 0.2 {
config.parallel.min_chunk_size =
(config.parallel.min_chunk_size / 2).max(100);
config.parallel.load_balancing = LoadBalancing::Dynamic;
applied_optimizations
.push("Enabled more aggressive parallelization".to_string());
}
}
ConfigType::SIMD => {
if suggestion.estimated_improvement > 0.25 {
config.simd.min_simd_size = (config.simd.min_simd_size / 2).max(32);
config.simd.enabled = true;
applied_optimizations.push("Optimized SIMD configuration".to_string());
}
}
ConfigType::Cache => {
applied_optimizations.push("Triggered cache optimization".to_string());
}
ConfigType::Compilation => {
if config.optimization_level < 3 {
config.optimization_level += 1;
applied_optimizations
.push("Increased compilation optimization level".to_string());
}
}
}
}
Ok(applied_optimizations)
}
fn update_system_metrics(&self) -> Result<()> {
let function_metrics = read_lock_safe!(
self.function_metrics,
"performance monitor function metrics read"
)?;
let mut system_metrics = write_lock_safe!(
self.system_metrics,
"performance monitor system metrics write"
)?;
system_metrics.active_functions = function_metrics.len();
if !function_metrics.is_empty() {
let avg_cpu = function_metrics
.values()
.map(|m| m.avg_cpu_utilization)
.sum::<f64>()
/ function_metrics.len() as f64;
system_metrics.cpu_utilization = avg_cpu;
let hot_functions = function_metrics
.values()
.filter(|m| m.execution_count > 100)
.count();
system_metrics.jit_utilization = hot_functions as f64 / function_metrics.len() as f64;
}
let mut history = write_lock_safe!(
self.performance_history,
"performance monitor performance history write"
)?;
history.push_back((Instant::now(), system_metrics.jit_utilization));
if history.len() > 1000 {
history.pop_front();
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct ConfigOptimization {
pub config_type: ConfigType,
pub description: String,
pub recommended_change: String,
pub estimated_improvement: f64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConfigType {
Parallel,
SIMD,
Cache,
Compilation,
}
static GLOBAL_MONITOR: std::sync::OnceLock<JitPerformanceMonitor> = std::sync::OnceLock::new();
pub fn get_global_monitor() -> &'static JitPerformanceMonitor {
GLOBAL_MONITOR.get_or_init(|| JitPerformanceMonitor::new(JITConfig::default()))
}
pub fn init_global_monitor(config: JITConfig) -> Result<()> {
GLOBAL_MONITOR
.set(JitPerformanceMonitor::new(config))
.map_err(|_| Error::InvalidOperation("Global monitor already initialized".to_string()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_function_performance_metrics() {
let function_id = FunctionId::new("test", "f64", "f64", "test_op", 1);
let mut metrics = FunctionPerformanceMetrics::new(function_id);
metrics.record_execution(1_000_000, 1024, 0.8);
metrics.record_execution(1_200_000, 1024, 0.9);
assert_eq!(metrics.execution_count, 2);
assert_eq!(metrics.avg_execution_time_ns, 1_100_000.0);
assert!(metrics.get_performance_score() > 0.0);
}
#[test]
fn test_performance_monitor() {
let monitor = JitPerformanceMonitor::new(JITConfig::default());
let function_id = FunctionId::new("test", "f64", "f64", "test_op", 1);
monitor
.record_function_execution(&function_id, 1_000_000, 1024, 0.8)
.expect("operation should succeed");
let metrics = monitor.get_function_metrics(&function_id);
assert!(metrics.is_some());
let system_metrics = monitor
.get_system_metrics()
.expect("operation should succeed");
assert_eq!(system_metrics.active_functions, 1);
}
#[test]
fn test_optimization_suggestions() {
let function_id = FunctionId::new("slow_function", "f64", "f64", "slow_op", 1);
let mut metrics = FunctionPerformanceMetrics::new(function_id);
for _ in 0..10 {
metrics.record_execution(50_000_000, 2_000_000, 0.3); }
assert!(!metrics.optimization_suggestions.is_empty());
let suggestion_types: Vec<_> = metrics
.optimization_suggestions
.iter()
.map(|s| s.suggestion_type)
.collect();
assert!(!suggestion_types.is_empty());
let expected_optimizations = vec![
OptimizationType::ReduceMemoryUsage,
OptimizationType::IncreaseCpuUtilization,
OptimizationType::AlgorithmicOptimization,
];
let has_expected = expected_optimizations
.iter()
.any(|opt| suggestion_types.contains(opt));
assert!(
has_expected,
"Expected at least one of {:?}, but got {:?}",
expected_optimizations, suggestion_types
);
}
}