1use std::collections::{HashMap, VecDeque};
8use std::sync::{Arc, Mutex, RwLock};
9use std::thread;
10use std::time::{Duration, Instant};
11
12use scirs2_core::ndarray::{Array, ArrayView, Dimension, IxDyn};
13use scirs2_core::numeric::{Float, FromPrimitive};
14
15use crate::error::NdimageResult;
16
17#[derive(Debug)]
19pub struct PerformanceProfiler {
20 timing_records: Arc<RwLock<HashMap<String, Vec<OperationTiming>>>>,
22 memory_tracker: Arc<Mutex<MemoryTracker>>,
24 metrics_aggregator: Arc<Mutex<MetricsAggregator>>,
26 optimizer: Arc<Mutex<OptimizationEngine>>,
28 monitoring_active: Arc<Mutex<bool>>,
30 config: ProfilerConfig,
32}
33
34#[derive(Debug, Clone)]
35pub struct ProfilerConfig {
36 pub max_records_per_operation: usize,
38 pub memory_sampling_interval: Duration,
40 pub enable_simd_profiling: bool,
42 pub enable_cache_analysis: bool,
44 pub reporting_interval: Duration,
46}
47
48impl Default for ProfilerConfig {
49 fn default() -> Self {
50 Self {
51 max_records_per_operation: 1000,
52 memory_sampling_interval: Duration::from_millis(100),
53 enable_simd_profiling: true,
54 enable_cache_analysis: true,
55 reporting_interval: Duration::from_secs(30),
56 }
57 }
58}
59
60#[derive(Debug, Clone)]
61pub struct OperationTiming {
62 pub name: String,
64 pub input_dimensions: Vec<usize>,
66 pub data_type: String,
68 pub execution_time: Duration,
70 pub memory_allocated: usize,
72 pub memory_peak: usize,
74 pub simd_utilization: f64,
76 pub cache_hit_ratio: f64,
78 pub timestamp: Instant,
80 pub metadata: HashMap<String, String>,
82}
83
84#[derive(Debug)]
85pub struct MemoryTracker {
86 current_usage: usize,
88 peak_usage: usize,
90 usagehistory: VecDeque<(Instant, usize)>,
92 allocations: HashMap<String, usize>,
94}
95
96impl Default for MemoryTracker {
97 fn default() -> Self {
98 Self {
99 current_usage: 0,
100 peak_usage: 0,
101 usagehistory: VecDeque::new(),
102 allocations: HashMap::new(),
103 }
104 }
105}
106
107#[derive(Debug)]
108pub struct MetricsAggregator {
109 operationmetrics: HashMap<String, AggregatedMetrics>,
111 systemmetrics: SystemMetrics,
113 trends: PerformanceTrends,
115}
116
117#[derive(Debug, Clone)]
118pub struct AggregatedMetrics {
119 pub operation_count: usize,
121 pub avg_execution_time: Duration,
123 pub min_execution_time: Duration,
125 pub max_execution_time: Duration,
127 pub std_dev_execution_time: Duration,
129 pub avg_memory_usage: usize,
131 pub avg_simd_utilization: f64,
133 pub avg_cache_hit_ratio: f64,
135 pub efficiency_score: f64,
137}
138
139#[derive(Debug, Clone)]
140pub struct SystemMetrics {
141 pub total_operations: usize,
143 pub total_execution_time: Duration,
145 pub total_memory_allocated: usize,
147 pub avg_system_load: f64,
149 pub simd_capability_utilization: f64,
151}
152
153#[derive(Debug, Clone)]
154pub struct PerformanceTrends {
155 pub execution_time_trend: f64,
157 pub memory_usage_trend: f64,
159 pub efficiency_trend: f64,
161 pub trend_confidence: f64,
163}
164
165#[derive(Debug)]
166pub struct OptimizationEngine {
167 bottlenecks: Vec<PerformanceBottleneck>,
169 recommendations: Vec<OptimizationRecommendation>,
171 optimizationhistory: Vec<OptimizationImpact>,
173}
174
175#[derive(Debug, Clone)]
176pub struct PerformanceBottleneck {
177 pub bottleneck_type: BottleneckType,
179 pub operation: String,
181 pub severity: f64,
183 pub description: String,
185 pub impact_estimate: f64,
187}
188
189#[derive(Debug, Clone)]
190pub enum BottleneckType {
191 MemoryBandwidth,
192 CacheMisses,
193 UnoptimizedSIMD,
194 SuboptimalAlgorithm,
195 MemoryFragmentation,
196 ThreadContention,
197 IOBottleneck,
198}
199
200#[derive(Debug, Clone)]
201pub struct OptimizationRecommendation {
202 pub recommendation_type: RecommendationType,
204 pub operation: String,
206 pub priority: f64,
208 pub estimated_improvement: f64,
210 pub implementation_difficulty: f64,
212 pub description: String,
214 pub implementation_hints: Vec<String>,
216}
217
218#[derive(Debug, Clone)]
219pub enum RecommendationType {
220 EnableSIMD,
221 OptimizeMemoryLayout,
222 UseAlternativeAlgorithm,
223 IncreaseCacheEfficiency,
224 ReduceMemoryAllocations,
225 EnableParallelization,
226 OptimizeGPUUsage,
227}
228
229#[derive(Debug, Clone)]
230pub struct OptimizationImpact {
231 pub optimization: String,
233 pub beforemetrics: AggregatedMetrics,
235 pub aftermetrics: AggregatedMetrics,
237 pub improvement_achieved: f64,
239 pub timestamp: Instant,
241}
242
243impl PerformanceProfiler {
244 pub fn new(config: ProfilerConfig) -> Self {
246 Self {
247 timing_records: Arc::new(RwLock::new(HashMap::new())),
248 memory_tracker: Arc::new(Mutex::new(MemoryTracker::default())),
249 metrics_aggregator: Arc::new(Mutex::new(MetricsAggregator::new())),
250 optimizer: Arc::new(Mutex::new(OptimizationEngine::new())),
251 monitoring_active: Arc::new(Mutex::new(false)),
252 config,
253 }
254 }
255
256 pub fn start_monitoring(&self) -> NdimageResult<()> {
258 let mut active = self.monitoring_active.lock().expect("Operation failed");
259 if *active {
260 return Ok(()); }
262 *active = true;
263
264 let memory_tracker = Arc::clone(&self.memory_tracker);
266 let sampling_interval = self.config.memory_sampling_interval;
267 let monitoring_active = Arc::clone(&self.monitoring_active);
268
269 thread::spawn(move || {
270 while *monitoring_active.lock().expect("Operation failed") {
271 let current_memory = get_current_memory_usage();
272 let mut tracker = memory_tracker.lock().expect("Operation failed");
273 tracker.update_memory_usage(current_memory);
274 drop(tracker);
275
276 thread::sleep(sampling_interval);
277 }
278 });
279
280 let metrics_aggregator = Arc::clone(&self.metrics_aggregator);
282 let timing_records = Arc::clone(&self.timing_records);
283 let reporting_interval = self.config.reporting_interval;
284 let monitoring_active = Arc::clone(&self.monitoring_active);
285
286 thread::spawn(move || {
287 while *monitoring_active.lock().expect("Operation failed") {
288 {
289 let records = timing_records.read().expect("Operation failed");
290 let mut aggregator = metrics_aggregator.lock().expect("Operation failed");
291 aggregator.updatemetrics(&records);
292 }
293
294 thread::sleep(reporting_interval);
295 }
296 });
297
298 Ok(())
299 }
300
301 pub fn stop_monitoring(&self) {
303 let mut active = self.monitoring_active.lock().expect("Operation failed");
304 *active = false;
305 }
306
307 pub fn record_operation<T, D>(
309 &self,
310 operation_name: &str,
311 input: &ArrayView<T, D>,
312 execution_time: Duration,
313 memory_allocated: usize,
314 metadata: HashMap<String, String>,
315 ) -> NdimageResult<()>
316 where
317 T: Float + FromPrimitive,
318 D: Dimension,
319 {
320 let timing = OperationTiming {
321 name: operation_name.to_string(),
322 input_dimensions: input.shape().to_vec(),
323 data_type: std::any::type_name::<T>().to_string(),
324 execution_time,
325 memory_allocated,
326 memory_peak: self
327 .memory_tracker
328 .lock()
329 .expect("Operation failed")
330 .peak_usage,
331 simd_utilization: self.estimate_simd_utilization(operation_name, input.len()),
332 cache_hit_ratio: self.estimate_cache_hit_ratio(input.len()),
333 timestamp: Instant::now(),
334 metadata,
335 };
336
337 let mut records = self.timing_records.write().expect("Operation failed");
338 let operation_records = records
339 .entry(operation_name.to_string())
340 .or_insert_with(Vec::new);
341 operation_records.push(timing);
342
343 if operation_records.len() > self.config.max_records_per_operation {
345 operation_records.remove(0);
346 }
347
348 Ok(())
349 }
350
351 pub fn generate_performance_report(&self) -> PerformanceReport {
353 let _records = self.timing_records.read().expect("Operation failed");
354 let aggregator = self.metrics_aggregator.lock().expect("Operation failed");
355 let optimizer = self.optimizer.lock().expect("Operation failed");
356 let memory_tracker = self.memory_tracker.lock().expect("Operation failed");
357
358 PerformanceReport {
359 operationmetrics: aggregator.operationmetrics.clone(),
360 systemmetrics: aggregator.systemmetrics.clone(),
361 trends: aggregator.trends.clone(),
362 bottlenecks: optimizer.bottlenecks.clone(),
363 recommendations: optimizer.recommendations.clone(),
364 memory_statistics: memory_tracker.get_statistics(),
365 timestamp: Instant::now(),
366 }
367 }
368
369 pub fn get_optimization_recommendations(
371 &self,
372 operation_name: &str,
373 ) -> Vec<OptimizationRecommendation> {
374 let optimizer = self.optimizer.lock().expect("Operation failed");
375 optimizer
376 .recommendations
377 .iter()
378 .filter(|rec| rec.operation == operation_name)
379 .cloned()
380 .collect()
381 }
382
383 pub fn benchmark_operation<F, T>(
385 &self,
386 operation_name: &str,
387 operation: F,
388 test_sizes: &[Vec<usize>],
389 iterations: usize,
390 ) -> NdimageResult<BenchmarkResults>
391 where
392 F: Fn(&ArrayView<T, IxDyn>) -> NdimageResult<Array<T, IxDyn>>,
393 T: Float + FromPrimitive + Clone + Default,
394 {
395 let mut results = Vec::new();
396
397 for size in test_sizes {
398 let input = Array::default(size.as_slice());
399 let input_view = input.view();
400
401 let mut timings = Vec::new();
402 let mut memory_usages = Vec::new();
403
404 for _ in 0..iterations {
405 let start_memory = get_current_memory_usage();
406 let start_time = Instant::now();
407
408 let _result = operation(&input_view)?;
409
410 let execution_time = start_time.elapsed();
411 let end_memory = get_current_memory_usage();
412 let memory_used = end_memory.saturating_sub(start_memory);
413
414 timings.push(execution_time);
415 memory_usages.push(memory_used);
416 }
417
418 let avg_time = timings.iter().sum::<Duration>() / timings.len() as u32;
419 let min_time = timings.iter().min().expect("Operation failed").clone();
420 let max_time = timings.iter().max().expect("Operation failed").clone();
421 let avg_memory = memory_usages.iter().sum::<usize>() / memory_usages.len();
422
423 results.push(BenchmarkResult {
424 array_size: size.clone(),
425 average_time: avg_time,
426 min_time,
427 max_time,
428 average_memory: avg_memory,
429 throughput: calculate_throughput(size, avg_time),
430 });
431 }
432
433 Ok(BenchmarkResults {
434 operation_name: operation_name.to_string(),
435 results,
436 timestamp: Instant::now(),
437 })
438 }
439
440 fn estimate_simd_utilization(&self, operation_name: &str, _arraysize: usize) -> f64 {
443 match operation_name {
446 name if name.contains("simd") => 0.85,
447 name if name.contains("convolution") => 0.70,
448 name if name.contains("filter") => 0.60,
449 _ => 0.30,
450 }
451 }
452
453 fn estimate_cache_hit_ratio(&self, arraysize: usize) -> f64 {
454 if arraysize < 1024 * 1024 {
456 0.95
458 } else if arraysize < 16 * 1024 * 1024 {
459 0.80
461 } else {
462 0.60
463 }
464 }
465}
466
467impl MetricsAggregator {
468 fn new() -> Self {
469 Self {
470 operationmetrics: HashMap::new(),
471 systemmetrics: SystemMetrics {
472 total_operations: 0,
473 total_execution_time: Duration::ZERO,
474 total_memory_allocated: 0,
475 avg_system_load: 0.0,
476 simd_capability_utilization: 0.0,
477 },
478 trends: PerformanceTrends {
479 execution_time_trend: 0.0,
480 memory_usage_trend: 0.0,
481 efficiency_trend: 0.0,
482 trend_confidence: 0.0,
483 },
484 }
485 }
486
487 fn updatemetrics(&mut self, records: &HashMap<String, Vec<OperationTiming>>) {
488 for (operation_name, timings) in records {
489 let metrics = self.calculate_aggregatedmetrics(timings);
490 self.operationmetrics
491 .insert(operation_name.clone(), metrics);
492 }
493
494 self.update_systemmetrics(records);
495 self.update_trends(records);
496 }
497
498 fn calculate_aggregatedmetrics(&self, timings: &[OperationTiming]) -> AggregatedMetrics {
499 if timings.is_empty() {
500 return AggregatedMetrics {
501 operation_count: 0,
502 avg_execution_time: Duration::ZERO,
503 min_execution_time: Duration::ZERO,
504 max_execution_time: Duration::ZERO,
505 std_dev_execution_time: Duration::ZERO,
506 avg_memory_usage: 0,
507 avg_simd_utilization: 0.0,
508 avg_cache_hit_ratio: 0.0,
509 efficiency_score: 0.0,
510 };
511 }
512
513 let execution_times: Vec<Duration> = timings.iter().map(|t| t.execution_time).collect();
514 let avg_execution_time =
515 execution_times.iter().sum::<Duration>() / execution_times.len() as u32;
516 let min_execution_time = execution_times
517 .iter()
518 .min()
519 .expect("Operation failed")
520 .clone();
521 let max_execution_time = execution_times
522 .iter()
523 .max()
524 .expect("Operation failed")
525 .clone();
526
527 let avg_memory_usage =
528 timings.iter().map(|t| t.memory_allocated).sum::<usize>() / timings.len();
529 let avg_simd_utilization =
530 timings.iter().map(|t| t.simd_utilization).sum::<f64>() / timings.len() as f64;
531 let avg_cache_hit_ratio =
532 timings.iter().map(|t| t.cache_hit_ratio).sum::<f64>() / timings.len() as f64;
533
534 let variance = execution_times
536 .iter()
537 .map(|t| {
538 let diff = t.as_nanos() as f64 - avg_execution_time.as_nanos() as f64;
539 diff * diff
540 })
541 .sum::<f64>()
542 / execution_times.len() as f64;
543 let std_dev_nanos = variance.sqrt() as u64;
544 let std_dev_execution_time = Duration::from_nanos(std_dev_nanos);
545
546 let efficiency_score = (avg_simd_utilization + avg_cache_hit_ratio) / 2.0;
548
549 AggregatedMetrics {
550 operation_count: timings.len(),
551 avg_execution_time,
552 min_execution_time,
553 max_execution_time,
554 std_dev_execution_time,
555 avg_memory_usage,
556 avg_simd_utilization,
557 avg_cache_hit_ratio,
558 efficiency_score,
559 }
560 }
561
562 fn update_systemmetrics(&mut self, records: &HashMap<String, Vec<OperationTiming>>) {
563 let total_operations: usize = records.values().map(|v| v.len()).sum();
564 let total_execution_time: Duration =
565 records.values().flatten().map(|t| t.execution_time).sum();
566 let total_memory_allocated: usize =
567 records.values().flatten().map(|t| t.memory_allocated).sum();
568
569 self.systemmetrics.total_operations = total_operations;
570 self.systemmetrics.total_execution_time = total_execution_time;
571 self.systemmetrics.total_memory_allocated = total_memory_allocated;
572 }
573
574 fn update_trends(&mut self, _records: &HashMap<String, Vec<OperationTiming>>) {
575 self.trends.execution_time_trend = 0.0; self.trends.memory_usage_trend = 0.0; self.trends.efficiency_trend = 0.0; self.trends.trend_confidence = 0.5; }
582}
583
584impl OptimizationEngine {
585 fn new() -> Self {
586 Self {
587 bottlenecks: Vec::new(),
588 recommendations: Vec::new(),
589 optimizationhistory: Vec::new(),
590 }
591 }
592}
593
594impl MemoryTracker {
595 fn update_memory_usage(&mut self, usage: usize) {
596 self.current_usage = usage;
597 self.peak_usage = self.peak_usage.max(usage);
598
599 let now = Instant::now();
600 self.usagehistory.push_back((now, usage));
601
602 let cutoff = now - Duration::from_secs(3600);
604 while self
605 .usagehistory
606 .front()
607 .map_or(false, |&(time, _)| time < cutoff)
608 {
609 self.usagehistory.pop_front();
610 }
611 }
612
613 fn get_statistics(&self) -> MemoryStatistics {
614 let recent_usages: Vec<usize> = self.usagehistory.iter().map(|(_, usage)| *usage).collect();
615 let avg_usage = if recent_usages.is_empty() {
616 0
617 } else {
618 recent_usages.iter().sum::<usize>() / recent_usages.len()
619 };
620
621 MemoryStatistics {
622 current_usage: self.current_usage,
623 peak_usage: self.peak_usage,
624 average_usage: avg_usage,
625 allocations: self.allocations.clone(),
626 }
627 }
628}
629
630#[derive(Debug, Clone)]
633pub struct PerformanceReport {
634 pub operationmetrics: HashMap<String, AggregatedMetrics>,
636 pub systemmetrics: SystemMetrics,
638 pub trends: PerformanceTrends,
640 pub bottlenecks: Vec<PerformanceBottleneck>,
642 pub recommendations: Vec<OptimizationRecommendation>,
644 pub memory_statistics: MemoryStatistics,
646 pub timestamp: Instant,
648}
649
650#[derive(Debug, Clone)]
651pub struct MemoryStatistics {
652 pub current_usage: usize,
654 pub peak_usage: usize,
656 pub average_usage: usize,
658 pub allocations: HashMap<String, usize>,
660}
661
662#[derive(Debug, Clone)]
663pub struct BenchmarkResults {
664 pub operation_name: String,
666 pub results: Vec<BenchmarkResult>,
668 pub timestamp: Instant,
670}
671
672#[derive(Debug, Clone)]
673pub struct BenchmarkResult {
674 pub array_size: Vec<usize>,
676 pub average_time: Duration,
678 pub min_time: Duration,
680 pub max_time: Duration,
682 pub average_memory: usize,
684 pub throughput: f64,
686}
687
688impl PerformanceReport {
689 pub fn display(&self) {
691 println!("\n=== Performance Analysis Report ===");
692 println!("Generated at: {:?}", self.timestamp);
693
694 println!("\n--- System Metrics ---");
695 println!("Total Operations: {}", self.systemmetrics.total_operations);
696 println!(
697 "Total Execution Time: {:.3}s",
698 self.systemmetrics.total_execution_time.as_secs_f64()
699 );
700 println!(
701 "Total Memory Allocated: {:.2} MB",
702 self.systemmetrics.total_memory_allocated as f64 / (1024.0 * 1024.0)
703 );
704
705 println!("\n--- Memory Statistics ---");
706 println!(
707 "Current Usage: {:.2} MB",
708 self.memory_statistics.current_usage as f64 / (1024.0 * 1024.0)
709 );
710 println!(
711 "Peak Usage: {:.2} MB",
712 self.memory_statistics.peak_usage as f64 / (1024.0 * 1024.0)
713 );
714 println!(
715 "Average Usage: {:.2} MB",
716 self.memory_statistics.average_usage as f64 / (1024.0 * 1024.0)
717 );
718
719 println!("\n--- Top Operations by Time ---");
720 let mut operations: Vec<_> = self.operationmetrics.iter().collect();
721 operations.sort_by_key(|item| std::cmp::Reverse(item.1.avg_execution_time));
722
723 for (name, metrics) in operations.iter().take(5) {
724 println!(
725 "{}: {:.3}ms avg, {:.1}% SIMD, {:.1}% cache hits",
726 name,
727 metrics.avg_execution_time.as_secs_f64() * 1000.0,
728 metrics.avg_simd_utilization * 100.0,
729 metrics.avg_cache_hit_ratio * 100.0
730 );
731 }
732
733 if !self.recommendations.is_empty() {
734 println!("\n--- Optimization Recommendations ---");
735 for (i, rec) in self.recommendations.iter().take(3).enumerate() {
736 println!(
737 "{}. {} for '{}' (Priority: {:.1}, Est. improvement: {:.1}%)",
738 i + 1,
739 format!("{:?}", rec.recommendation_type),
740 rec.operation,
741 rec.priority * 100.0,
742 rec.estimated_improvement * 100.0
743 );
744 }
745 }
746
747 if !self.bottlenecks.is_empty() {
748 println!("\n--- Performance Bottlenecks ---");
749 for bottleneck in &self.bottlenecks {
750 println!(
751 "- {:?} in '{}': {} (Severity: {:.1}%)",
752 bottleneck.bottleneck_type,
753 bottleneck.operation,
754 bottleneck.description,
755 bottleneck.severity * 100.0
756 );
757 }
758 }
759 }
760
761 pub fn to_json(&self) -> serde_json::Result<String> {
763 Ok(format!(
765 "{{\"timestamp\": \"{:?}\", \"summary\": \"Performance report generated\"}}",
766 self.timestamp
767 ))
768 }
769}
770
771#[allow(dead_code)]
774fn get_current_memory_usage() -> usize {
775 1024 * 1024 * 100 }
781
782#[allow(dead_code)]
783fn calculate_throughput(array_size: &[usize], executiontime: Duration) -> f64 {
784 let total_elements: usize = array_size.iter().product();
785 let time_seconds = executiontime.as_secs_f64();
786
787 if time_seconds > 0.0 {
788 total_elements as f64 / time_seconds
789 } else {
790 0.0
791 }
792}
793
794#[cfg(test)]
795mod tests {
796 use super::*;
797 use scirs2_core::ndarray::Array2;
798
799 #[test]
800 fn test_profiler_creation() {
801 let config = ProfilerConfig::default();
802 let profiler = PerformanceProfiler::new(config);
803
804 assert!(!(*profiler.monitoring_active.lock().expect("Operation failed")));
806 }
807
808 #[test]
809 fn test_operation_recording() {
810 let profiler = PerformanceProfiler::new(ProfilerConfig::default());
811 let input = Array2::<f64>::zeros((100, 100));
812 let metadata = HashMap::new();
813
814 let result = profiler.record_operation(
815 "test_operation",
816 &input.view(),
817 Duration::from_millis(10),
818 1024,
819 metadata,
820 );
821
822 assert!(result.is_ok());
823
824 let records = profiler.timing_records.read().expect("Operation failed");
825 assert!(records.contains_key("test_operation"));
826 assert_eq!(records["test_operation"].len(), 1);
827 }
828
829 #[test]
830 fn test_performance_report_generation() {
831 let profiler = PerformanceProfiler::new(ProfilerConfig::default());
832 let report = profiler.generate_performance_report();
833
834 assert!(report.operationmetrics.is_empty()); assert_eq!(report.systemmetrics.total_operations, 0);
836 }
837}