hive_gpu/monitoring/
performance_monitor.rs

1//! Performance Monitoring and Benchmarking
2//!
3//! This module provides performance monitoring, benchmarking, and statistics
4//! collection for GPU operations.
5
6use crate::error::Result;
7use std::time::{Duration, Instant};
8use std::collections::HashMap;
9
10/// Performance Monitor for tracking GPU operation performance
11pub struct PerformanceMonitor {
12    /// Operation statistics
13    stats: HashMap<String, OperationStats>,
14    /// Start time for current operation
15    current_operation: Option<(String, Instant)>,
16}
17
18/// Statistics for a specific operation
19#[derive(Debug, Clone)]
20pub struct OperationStats {
21    /// Operation name
22    pub operation: String,
23    /// Total number of operations
24    pub count: usize,
25    /// Total duration
26    pub total_duration: Duration,
27    /// Average duration
28    pub avg_duration: Duration,
29    /// Minimum duration
30    pub min_duration: Duration,
31    /// Maximum duration
32    pub max_duration: Duration,
33    /// Operations per second
34    pub ops_per_second: f64,
35}
36
37/// Overall performance statistics
38#[derive(Debug, Clone)]
39pub struct PerformanceStats {
40    /// Total operations performed
41    pub total_operations: usize,
42    /// Total duration
43    pub total_duration: Duration,
44    /// Average operations per second
45    pub avg_ops_per_second: f64,
46    /// Operation breakdown
47    pub operation_stats: Vec<OperationStats>,
48}
49
50/// Benchmark result for a specific operation
51#[derive(Debug, Clone)]
52pub struct BenchmarkResult {
53    /// Operation name
54    pub operation: String,
55    /// Duration in milliseconds
56    pub duration_ms: f64,
57    /// Throughput (operations per second)
58    pub throughput: f64,
59    /// Memory usage in bytes
60    pub memory_usage: usize,
61    /// Success rate (0.0-1.0)
62    pub success_rate: f32,
63}
64
65impl PerformanceMonitor {
66    /// Create a new performance monitor
67    pub fn new() -> Self {
68        Self {
69            stats: HashMap::new(),
70            current_operation: None,
71        }
72    }
73
74    /// Start timing an operation
75    pub fn start_operation(&mut self, operation: String) {
76        self.current_operation = Some((operation, Instant::now()));
77    }
78
79    /// End timing the current operation
80    pub fn end_operation(&mut self) -> Result<Duration> {
81        if let Some((operation, start_time)) = self.current_operation.take() {
82            let duration = start_time.elapsed();
83            self.record_operation(operation, duration);
84            Ok(duration)
85        } else {
86            Err(crate::error::HiveGpuError::Other("No operation in progress".to_string()))
87        }
88    }
89
90    /// Record an operation with its duration
91    pub fn record_operation(&mut self, operation: String, duration: Duration) {
92        let stats = self.stats.entry(operation.clone()).or_insert(OperationStats {
93            operation: operation.clone(),
94            count: 0,
95            total_duration: Duration::ZERO,
96            avg_duration: Duration::ZERO,
97            min_duration: Duration::MAX,
98            max_duration: Duration::ZERO,
99            ops_per_second: 0.0,
100        });
101
102        stats.count += 1;
103        stats.total_duration += duration;
104        stats.avg_duration = stats.total_duration / stats.count as u32;
105        stats.min_duration = stats.min_duration.min(duration);
106        stats.max_duration = stats.max_duration.max(duration);
107        stats.ops_per_second = stats.count as f64 / stats.total_duration.as_secs_f64();
108    }
109
110    /// Get performance statistics
111    pub fn get_performance_stats(&self) -> PerformanceStats {
112        let total_operations: usize = self.stats.values().map(|s| s.count).sum();
113        let total_duration: Duration = self.stats.values().map(|s| s.total_duration).sum();
114        let avg_ops_per_second = if total_duration.as_secs_f64() > 0.0 {
115            total_operations as f64 / total_duration.as_secs_f64()
116        } else {
117            0.0
118        };
119
120        PerformanceStats {
121            total_operations,
122            total_duration,
123            avg_ops_per_second,
124            operation_stats: self.stats.values().cloned().collect(),
125        }
126    }
127
128    /// Get statistics for a specific operation
129    pub fn get_operation_stats(&self, operation: &str) -> Option<&OperationStats> {
130        self.stats.get(operation)
131    }
132
133    /// Clear all statistics
134    pub fn clear_stats(&mut self) {
135        self.stats.clear();
136    }
137
138    /// Generate performance report
139    pub fn generate_report(&self) -> String {
140        let stats = self.get_performance_stats();
141        let mut report = String::new();
142        
143        report.push_str("Performance Report:\n");
144        report.push_str(&format!("  Total Operations: {}\n", stats.total_operations));
145        report.push_str(&format!("  Total Duration: {:.2}s\n", stats.total_duration.as_secs_f64()));
146        report.push_str(&format!("  Average Ops/sec: {:.2}\n", stats.avg_ops_per_second));
147        
148        report.push_str("\nOperation Breakdown:\n");
149        for op_stats in &stats.operation_stats {
150            report.push_str(&format!("  {}:\n", op_stats.operation));
151            report.push_str(&format!("    Count: {}\n", op_stats.count));
152            report.push_str(&format!("    Avg Duration: {:.2}ms\n", op_stats.avg_duration.as_secs_f64() * 1000.0));
153            report.push_str(&format!("    Min Duration: {:.2}ms\n", op_stats.min_duration.as_secs_f64() * 1000.0));
154            report.push_str(&format!("    Max Duration: {:.2}ms\n", op_stats.max_duration.as_secs_f64() * 1000.0));
155            report.push_str(&format!("    Ops/sec: {:.2}\n", op_stats.ops_per_second));
156        }
157        
158        report
159    }
160}
161
162impl Default for PerformanceMonitor {
163    fn default() -> Self {
164        Self::new()
165    }
166}