1use std::collections::HashMap;
9use std::sync::{Arc, Mutex};
10use std::time::{Duration, Instant};
11
12#[derive(Debug, Clone)]
14pub struct PerformanceMetrics {
15 pub total_time: Duration,
17 pub phase_times: HashMap<String, Duration>,
19 pub function_evaluations: usize,
21 pub jacobian_evaluations: usize,
23 pub linear_solves: usize,
25 pub memory_stats: MemoryStatistics,
27 pub algorithmmetrics: HashMap<String, f64>,
29 pub convergence_history: Vec<f64>,
31 pub step_size_history: Vec<f64>,
33 pub error_estimates: Vec<f64>,
35 pub cache_stats: CacheStatistics,
37}
38
39#[derive(Debug, Clone)]
41pub struct MemoryStatistics {
42 pub peak_memory: usize,
44 pub current_memory: usize,
46 pub allocation_count: usize,
48 pub deallocation_count: usize,
50 pub bandwidth_utilization: Option<f64>,
52}
53
54#[derive(Debug, Clone)]
56pub struct CacheStatistics {
57 pub cache_hit_rate: Option<f64>,
59 pub access_patterns: HashMap<String, usize>,
61 pub flops: Option<f64>,
63}
64
65pub struct PerformanceProfiler {
67 start_time: Instant,
68 phase_timers: HashMap<String, Instant>,
69 metrics: Arc<Mutex<PerformanceMetrics>>,
70 is_active: bool,
71}
72
73impl Default for PerformanceMetrics {
74 fn default() -> Self {
75 Self {
76 total_time: Duration::ZERO,
77 phase_times: HashMap::new(),
78 function_evaluations: 0,
79 jacobian_evaluations: 0,
80 linear_solves: 0,
81 memory_stats: MemoryStatistics {
82 peak_memory: 0,
83 current_memory: 0,
84 allocation_count: 0,
85 deallocation_count: 0,
86 bandwidth_utilization: None,
87 },
88 algorithmmetrics: HashMap::new(),
89 convergence_history: Vec::new(),
90 step_size_history: Vec::new(),
91 error_estimates: Vec::new(),
92 cache_stats: CacheStatistics {
93 cache_hit_rate: None,
94 access_patterns: HashMap::new(),
95 flops: None,
96 },
97 }
98 }
99}
100
101impl PerformanceProfiler {
102 pub fn new() -> Self {
104 Self {
105 start_time: Instant::now(),
106 phase_timers: HashMap::new(),
107 metrics: Arc::new(Mutex::new(PerformanceMetrics::default())),
108 is_active: true,
109 }
110 }
111
112 pub fn start_phase(&mut self, phasename: &str) {
114 if !self.is_active {
115 return;
116 }
117 self.phase_timers
118 .insert(phasename.to_string(), Instant::now());
119 }
120
121 pub fn end_phase(&mut self, phasename: &str) {
123 if !self.is_active {
124 return;
125 }
126
127 if let Some(start_time) = self.phase_timers.remove(phasename) {
128 let duration = start_time.elapsed();
129 if let Ok(mut metrics) = self.metrics.lock() {
130 *metrics
131 .phase_times
132 .entry(phasename.to_string())
133 .or_insert(Duration::ZERO) += duration;
134 }
135 }
136 }
137
138 pub fn record_function_evaluation(&mut self) {
140 if !self.is_active {
141 return;
142 }
143
144 if let Ok(mut metrics) = self.metrics.lock() {
145 metrics.function_evaluations += 1;
146 }
147 }
148
149 pub fn record_jacobian_evaluation(&mut self) {
151 if !self.is_active {
152 return;
153 }
154
155 if let Ok(mut metrics) = self.metrics.lock() {
156 metrics.jacobian_evaluations += 1;
157 }
158 }
159
160 pub fn record_linear_solve(&mut self) {
162 if !self.is_active {
163 return;
164 }
165
166 if let Ok(mut metrics) = self.metrics.lock() {
167 metrics.linear_solves += 1;
168 }
169 }
170
171 pub fn record_convergence(&mut self, residualnorm: f64) {
173 if !self.is_active {
174 return;
175 }
176
177 if let Ok(mut metrics) = self.metrics.lock() {
178 metrics.convergence_history.push(residualnorm);
179 }
180 }
181
182 pub fn record_step_size(&mut self, stepsize: f64) {
184 if !self.is_active {
185 return;
186 }
187
188 if let Ok(mut metrics) = self.metrics.lock() {
189 metrics.step_size_history.push(stepsize);
190 }
191 }
192
193 pub fn record_error_estimate(&mut self, error: f64) {
195 if !self.is_active {
196 return;
197 }
198
199 if let Ok(mut metrics) = self.metrics.lock() {
200 metrics.error_estimates.push(error);
201 }
202 }
203
204 pub fn record_metric(&mut self, name: &str, value: f64) {
206 if !self.is_active {
207 return;
208 }
209
210 if let Ok(mut metrics) = self.metrics.lock() {
211 metrics.algorithmmetrics.insert(name.to_string(), value);
212 }
213 }
214
215 pub fn update_memory_stats(&mut self, current_memory: usize, peak_memory: usize) {
217 if !self.is_active {
218 return;
219 }
220
221 if let Ok(mut metrics) = self.metrics.lock() {
222 metrics.memory_stats.current_memory = current_memory;
223 if peak_memory > metrics.memory_stats.peak_memory {
224 metrics.memory_stats.peak_memory = peak_memory;
225 }
226 metrics.memory_stats.allocation_count += 1;
227 }
228 }
229
230 pub fn estimate_flops(&mut self, operations: usize, time: Duration) {
232 if !self.is_active || time.is_zero() {
233 return;
234 }
235
236 let flops = operations as f64 / time.as_secs_f64();
237 if let Ok(mut metrics) = self.metrics.lock() {
238 metrics.cache_stats.flops = Some(flops);
239 }
240 }
241
242 pub fn finalize(&self) -> PerformanceMetrics {
244 let total_time = self.start_time.elapsed();
245
246 if let Ok(mut metrics) = self.metrics.lock() {
247 metrics.total_time = total_time;
248
249 self.compute_efficiencymetrics(&mut metrics);
251
252 metrics.clone()
253 } else {
254 PerformanceMetrics::default()
255 }
256 }
257
258 fn compute_efficiencymetrics(&self, metrics: &mut PerformanceMetrics) {
260 if metrics.convergence_history.len() > 1 {
262 let rates: Vec<f64> = metrics
263 .convergence_history
264 .windows(2)
265 .map(|window| {
266 if window[0] > 0.0 && window[1] > 0.0 {
267 (window[1] / window[0]).log10()
268 } else {
269 0.0
270 }
271 })
272 .collect();
273
274 if !rates.is_empty() {
275 let avg_rate = rates.iter().sum::<f64>() / rates.len() as f64;
276 metrics
277 .algorithmmetrics
278 .insert("convergence_rate".to_string(), avg_rate);
279 }
280 }
281
282 if metrics.function_evaluations > 0 && !metrics.total_time.is_zero() {
284 let eval_rate = metrics.function_evaluations as f64 / metrics.total_time.as_secs_f64();
285 metrics
286 .algorithmmetrics
287 .insert("evaluations_per_second".to_string(), eval_rate);
288 }
289
290 if !metrics.step_size_history.is_empty() {
292 let min_step = metrics
293 .step_size_history
294 .iter()
295 .fold(f64::INFINITY, |a, &b| a.min(b));
296 let max_step = metrics
297 .step_size_history
298 .iter()
299 .fold(f64::NEG_INFINITY, |a, &b| a.max(b));
300 let avg_step = metrics.step_size_history.iter().sum::<f64>()
301 / metrics.step_size_history.len() as f64;
302
303 metrics
304 .algorithmmetrics
305 .insert("min_step_size".to_string(), min_step);
306 metrics
307 .algorithmmetrics
308 .insert("max_step_size".to_string(), max_step);
309 metrics
310 .algorithmmetrics
311 .insert("avg_step_size".to_string(), avg_step);
312 }
313
314 if metrics.memory_stats.peak_memory > 0 {
316 let memory_mb = metrics.memory_stats.peak_memory as f64 / (1024.0 * 1024.0);
317 metrics
318 .algorithmmetrics
319 .insert("peak_memory_mb".to_string(), memory_mb);
320 }
321 }
322
323 pub fn disable(&mut self) {
325 self.is_active = false;
326 }
327
328 pub fn enable(&mut self) {
330 self.is_active = true;
331 }
332
333 pub fn is_active(&self) -> bool {
335 self.is_active
336 }
337}
338
339pub struct PerformanceAnalyzer;
341
342impl PerformanceAnalyzer {
343 pub fn analyze_convergence(metrics: &PerformanceMetrics) -> ConvergenceAnalysis {
345 let mut analysis = ConvergenceAnalysis::default();
346
347 if metrics.convergence_history.len() >= 2 {
348 let log_residuals: Vec<f64> = metrics
350 .convergence_history
351 .iter()
352 .filter(|&&r| r > 0.0)
353 .map(|&r| r.log10())
354 .collect();
355
356 if log_residuals.len() >= 3 {
357 let n = log_residuals.len();
359 let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
360
361 let x_mean = x.iter().sum::<f64>() / n as f64;
363 let y_mean = log_residuals.iter().sum::<f64>() / n as f64;
364
365 let numerator: f64 = x
366 .iter()
367 .zip(&log_residuals)
368 .map(|(xi, yi)| (xi - x_mean) * (yi - y_mean))
369 .sum();
370 let denominator: f64 = x.iter().map(|xi| (xi - x_mean).powi(2)).sum();
371
372 if denominator.abs() > 1e-10 {
373 analysis.convergence_rate = Some(-numerator / denominator);
374 }
375 }
376
377 analysis.final_residual = metrics.convergence_history.last().copied();
378 analysis.initial_residual = metrics.convergence_history.first().copied();
379 }
380
381 analysis
382 }
383
384 pub fn identify_bottlenecks(metrics: &PerformanceMetrics) -> Vec<PerformanceBottleneck> {
386 let mut bottlenecks = Vec::new();
387
388 let total_phase_time: Duration = metrics.phase_times.values().sum();
390 if !total_phase_time.is_zero() {
391 for (phase, duration) in &metrics.phase_times {
392 let percentage = duration.as_secs_f64() / total_phase_time.as_secs_f64() * 100.0;
393 if percentage > 30.0 {
394 bottlenecks.push(PerformanceBottleneck {
395 category: BottleneckCategory::ComputationPhase,
396 description: format!(
397 "Phase '{phase}' takes {percentage:.1}% of computation time"
398 ),
399 severity: if percentage > 50.0 {
400 Severity::High
401 } else {
402 Severity::Medium
403 },
404 suggested_improvements: vec![
405 "Consider algorithm optimization".to_string(),
406 "Check for unnecessary computations".to_string(),
407 "Consider parallelization".to_string(),
408 ],
409 });
410 }
411 }
412 }
413
414 if metrics.memory_stats.peak_memory > 1024 * 1024 * 1024 {
416 bottlenecks.push(PerformanceBottleneck {
418 category: BottleneckCategory::Memory,
419 description: format!(
420 "High memory usage: {:.1} MB",
421 metrics.memory_stats.peak_memory as f64 / (1024.0 * 1024.0)
422 ),
423 severity: Severity::Medium,
424 suggested_improvements: vec![
425 "Consider using memory pooling".to_string(),
426 "Implement iterative refinement".to_string(),
427 "Use sparse data structures".to_string(),
428 ],
429 });
430 }
431
432 if metrics.function_evaluations > 0 && !metrics.total_time.is_zero() {
434 let eval_rate = metrics.function_evaluations as f64 / metrics.total_time.as_secs_f64();
435 if eval_rate < 100.0 {
436 bottlenecks.push(PerformanceBottleneck {
438 category: BottleneckCategory::FunctionEvaluation,
439 description: format!("Low function evaluation rate: {eval_rate:.1} evals/sec"),
440 severity: Severity::Low,
441 suggested_improvements: vec![
442 "Optimize function implementation".to_string(),
443 "Consider SIMD vectorization".to_string(),
444 "Cache expensive computations".to_string(),
445 ],
446 });
447 }
448 }
449
450 bottlenecks
451 }
452
453 pub fn generate_report(metrics: &PerformanceMetrics) -> PerformanceReport {
455 let convergence_analysis = Self::analyze_convergence(metrics);
456 let bottlenecks = Self::identify_bottlenecks(metrics);
457
458 PerformanceReport {
459 metrics: metrics.clone(),
460 convergence_analysis,
461 bottlenecks,
462 recommendations: Self::generate_recommendations(metrics),
463 }
464 }
465
466 fn generate_recommendations(metrics: &PerformanceMetrics) -> Vec<OptimizationRecommendation> {
468 let mut recommendations = Vec::new();
469
470 if !metrics.step_size_history.is_empty() {
472 let min_step = metrics
473 .step_size_history
474 .iter()
475 .fold(f64::INFINITY, |a, &b| a.min(b));
476 let max_step = metrics
477 .step_size_history
478 .iter()
479 .fold(f64::NEG_INFINITY, |a, &b| a.max(b));
480
481 if max_step / min_step > 1000.0 {
482 recommendations.push(OptimizationRecommendation {
483 category: "Step Size Control".to_string(),
484 description: "Large step size variations detected".to_string(),
485 suggestion: "Consider more aggressive step size adaptation or better initial step size estimation".to_string(),
486 expected_improvement: 15.0,
487 });
488 }
489 }
490
491 if let Some(rate) = metrics.algorithmmetrics.get("convergence_rate") {
493 if *rate < 1.0 {
494 recommendations.push(OptimizationRecommendation {
495 category: "Convergence".to_string(),
496 description: "Slow convergence detected".to_string(),
497 suggestion: "Consider using higher-order methods or better preconditioning"
498 .to_string(),
499 expected_improvement: 25.0,
500 });
501 }
502 }
503
504 recommendations
505 }
506}
507
508#[derive(Debug, Clone, Default)]
510pub struct ConvergenceAnalysis {
511 pub convergence_rate: Option<f64>,
512 pub initial_residual: Option<f64>,
513 pub final_residual: Option<f64>,
514 pub convergence_order: Option<f64>,
515}
516
517#[derive(Debug, Clone)]
519pub struct PerformanceBottleneck {
520 pub category: BottleneckCategory,
521 pub description: String,
522 pub severity: Severity,
523 pub suggested_improvements: Vec<String>,
524}
525
526#[derive(Debug, Clone)]
527pub enum BottleneckCategory {
528 ComputationPhase,
529 Memory,
530 FunctionEvaluation,
531 LinearSolver,
532 StepSizeControl,
533}
534
535#[derive(Debug, Clone)]
536pub enum Severity {
537 Low,
538 Medium,
539 High,
540}
541
542#[derive(Debug, Clone)]
544pub struct OptimizationRecommendation {
545 pub category: String,
546 pub description: String,
547 pub suggestion: String,
548 pub expected_improvement: f64, }
550
551#[derive(Debug, Clone)]
553pub struct PerformanceReport {
554 pub metrics: PerformanceMetrics,
555 pub convergence_analysis: ConvergenceAnalysis,
556 pub bottlenecks: Vec<PerformanceBottleneck>,
557 pub recommendations: Vec<OptimizationRecommendation>,
558}
559
560impl PerformanceReport {
561 pub fn print_summary(&self) {
563 println!("=== Performance Analysis Report ===");
564 println!(
565 "Total computation time: {:.3}s",
566 self.metrics.total_time.as_secs_f64()
567 );
568 println!(
569 "Function evaluations: {}",
570 self.metrics.function_evaluations
571 );
572
573 if let Some(rate) = self.metrics.algorithmmetrics.get("evaluations_per_second") {
574 println!("Evaluation rate: {rate:.1} evals/sec");
575 }
576
577 println!(
578 "Peak memory usage: {:.1} MB",
579 self.metrics.memory_stats.peak_memory as f64 / (1024.0 * 1024.0)
580 );
581
582 if !self.bottlenecks.is_empty() {
583 println!("\n=== Performance Bottlenecks ===");
584 for bottleneck in &self.bottlenecks {
585 println!("- {:?}: {}", bottleneck.category, bottleneck.description);
586 }
587 }
588
589 if !self.recommendations.is_empty() {
590 println!("\n=== Optimization Recommendations ===");
591 for rec in &self.recommendations {
592 println!(
593 "- {}: {} (Expected improvement: {:.1}%)",
594 rec.category, rec.suggestion, rec.expected_improvement
595 );
596 }
597 }
598 }
599}
600
601#[macro_export]
603macro_rules! profile_block {
604 ($profiler:expr, $phase:expr, $code:block) => {{
605 $profiler.start_phase($phase);
606 let result = $code;
607 $profiler.end_phase($phase);
608 result
609 }};
610}
611
612#[cfg(test)]
613mod tests {
614 use super::*;
615 use std::thread;
616
617 #[test]
618 fn test_performance_profiler() {
619 let mut profiler = PerformanceProfiler::new();
620
621 profiler.start_phase("initialization");
623 thread::sleep(Duration::from_millis(10));
624 profiler.end_phase("initialization");
625
626 profiler.record_function_evaluation();
627 profiler.record_convergence(1e-3);
628 profiler.record_step_size(0.01);
629
630 let metrics = profiler.finalize();
631
632 assert!(metrics.total_time > Duration::ZERO);
633 assert_eq!(metrics.function_evaluations, 1);
634 assert_eq!(metrics.convergence_history.len(), 1);
635 assert_eq!(metrics.step_size_history.len(), 1);
636 }
637
638 #[test]
639 fn test_performance_analysis() {
640 let mut metrics = PerformanceMetrics::default();
641
642 metrics.convergence_history = vec![1e-1, 1e-2, 1e-3, 1e-4];
644 metrics.step_size_history = vec![0.1, 0.05, 0.02, 0.01];
645
646 let analysis = PerformanceAnalyzer::analyze_convergence(&metrics);
647 assert!(analysis.convergence_rate.is_some());
648
649 let report = PerformanceAnalyzer::generate_report(&metrics);
650 assert!(!report.recommendations.is_empty() || report.bottlenecks.is_empty());
651 }
652}