1use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use std::time::{Duration, Instant};
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct GradientPerformanceTracker {
14 pub total_gradient_computations: usize,
15 pub average_computation_time: Duration,
16 pub memory_usage_bytes: usize,
17 pub throughput_gradients_per_second: f64,
18 pub bottleneck_layers: Vec<String>,
19 pub layer_performance_map: HashMap<String, LayerPerformanceMetrics>,
20 pub resource_utilization: ResourceUtilization,
21 pub performance_history: Vec<PerformanceSnapshot>,
22}
23
24impl Default for GradientPerformanceTracker {
25 fn default() -> Self {
26 Self {
27 total_gradient_computations: 0,
28 average_computation_time: Duration::from_millis(0),
29 memory_usage_bytes: 0,
30 throughput_gradients_per_second: 0.0,
31 bottleneck_layers: Vec::new(),
32 layer_performance_map: HashMap::new(),
33 resource_utilization: ResourceUtilization::default(),
34 performance_history: Vec::new(),
35 }
36 }
37}
38
39impl GradientPerformanceTracker {
40 pub fn new() -> Self {
41 Self::default()
42 }
43
44 pub fn start_timing(&mut self, layer_name: &str) -> PerformanceTimer {
45 PerformanceTimer::new(layer_name.to_string())
46 }
47
48 pub fn record_layer_performance(
49 &mut self,
50 layer_name: &str,
51 computation_time: Duration,
52 memory_used: usize,
53 ) {
54 let metrics = self
55 .layer_performance_map
56 .entry(layer_name.to_string())
57 .or_insert_with(|| LayerPerformanceMetrics::new(layer_name.to_string()));
58
59 metrics.update(computation_time, memory_used);
60 self.total_gradient_computations += 1;
61
62 self.update_overall_metrics();
64 self.identify_bottlenecks();
65 }
66
67 fn update_overall_metrics(&mut self) {
68 if self.layer_performance_map.is_empty() {
69 return;
70 }
71
72 let total_time: Duration =
73 self.layer_performance_map.values().map(|m| m.average_computation_time).sum();
74
75 let total_layers = self.layer_performance_map.len();
76 self.average_computation_time = total_time / total_layers as u32;
77
78 self.memory_usage_bytes =
79 self.layer_performance_map.values().map(|m| m.average_memory_usage).sum();
80
81 if self.average_computation_time.as_secs_f64() > 0.0 {
83 self.throughput_gradients_per_second =
84 1.0 / self.average_computation_time.as_secs_f64();
85 }
86 }
87
88 fn identify_bottlenecks(&mut self) {
89 self.bottleneck_layers.clear();
90
91 if self.layer_performance_map.len() < 2 {
92 return;
93 }
94
95 let times: Vec<f64> = self
97 .layer_performance_map
98 .values()
99 .map(|m| m.average_computation_time.as_secs_f64())
100 .collect();
101
102 let mean = times.iter().sum::<f64>() / times.len() as f64;
103 let variance = times.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / times.len() as f64;
104 let std_dev = variance.sqrt();
105
106 let threshold = mean + 1.5 * std_dev;
108
109 for (layer_name, metrics) in &self.layer_performance_map {
110 if metrics.average_computation_time.as_secs_f64() > threshold {
111 self.bottleneck_layers.push(layer_name.clone());
112 }
113 }
114 }
115
116 pub fn get_performance_trends(&self) -> PerformanceTrends {
117 if self.performance_history.len() < 2 {
118 return PerformanceTrends::default();
119 }
120
121 let recent_snapshots: Vec<&PerformanceSnapshot> =
122 self.performance_history.iter().rev().take(10).collect();
123
124 let older_snapshots: Vec<&PerformanceSnapshot> =
125 self.performance_history.iter().rev().skip(10).take(10).collect();
126
127 if older_snapshots.is_empty() {
128 return PerformanceTrends::default();
129 }
130
131 let recent_avg_throughput = recent_snapshots.iter().map(|s| s.throughput).sum::<f64>()
132 / recent_snapshots.len() as f64;
133
134 let older_avg_throughput = older_snapshots.iter().map(|s| s.throughput).sum::<f64>()
135 / older_snapshots.len() as f64;
136
137 let recent_avg_memory =
138 recent_snapshots.iter().map(|s| s.memory_usage).sum::<usize>() / recent_snapshots.len();
139
140 let older_avg_memory =
141 older_snapshots.iter().map(|s| s.memory_usage).sum::<usize>() / older_snapshots.len();
142
143 PerformanceTrends {
144 throughput_trend: Self::classify_trend(recent_avg_throughput, older_avg_throughput),
145 memory_trend: Self::classify_trend(recent_avg_memory as f64, older_avg_memory as f64),
146 bottleneck_stability: self
147 .analyze_bottleneck_stability(&recent_snapshots, &older_snapshots),
148 overall_performance_direction: self
149 .analyze_overall_direction(&recent_snapshots, &older_snapshots),
150 }
151 }
152
153 fn classify_trend(recent: f64, older: f64) -> TrendDirection {
154 let change_ratio = (recent - older) / older.max(1e-10);
155 let threshold = 0.05; if change_ratio > threshold {
158 TrendDirection::Improving
159 } else if change_ratio < -threshold {
160 TrendDirection::Degrading
161 } else {
162 TrendDirection::Stable
163 }
164 }
165
166 fn analyze_bottleneck_stability(
167 &self,
168 recent: &[&PerformanceSnapshot],
169 older: &[&PerformanceSnapshot],
170 ) -> BottleneckStability {
171 let recent_bottlenecks: std::collections::HashSet<&String> =
172 recent.iter().flat_map(|s| &s.active_bottlenecks).collect();
173
174 let older_bottlenecks: std::collections::HashSet<&String> =
175 older.iter().flat_map(|s| &s.active_bottlenecks).collect();
176
177 let intersection_size = recent_bottlenecks.intersection(&older_bottlenecks).count();
178 let union_size = recent_bottlenecks.union(&older_bottlenecks).count();
179
180 if union_size == 0 {
181 return BottleneckStability::Stable;
182 }
183
184 let stability_ratio = intersection_size as f64 / union_size as f64;
185
186 if stability_ratio > 0.8 {
187 BottleneckStability::Stable
188 } else if stability_ratio > 0.5 {
189 BottleneckStability::Moderate
190 } else {
191 BottleneckStability::Unstable
192 }
193 }
194
195 fn analyze_overall_direction(
196 &self,
197 recent: &[&PerformanceSnapshot],
198 older: &[&PerformanceSnapshot],
199 ) -> PerformanceDirection {
200 let recent_avg_time =
201 recent.iter().map(|s| s.average_time.as_secs_f64()).sum::<f64>() / recent.len() as f64;
202
203 let older_avg_time =
204 older.iter().map(|s| s.average_time.as_secs_f64()).sum::<f64>() / older.len() as f64;
205
206 if recent_avg_time < older_avg_time * 0.95 {
207 PerformanceDirection::Improving
208 } else if recent_avg_time > older_avg_time * 1.05 {
209 PerformanceDirection::Degrading
210 } else {
211 PerformanceDirection::Stable
212 }
213 }
214
215 pub fn generate_optimization_recommendations(&self) -> Vec<OptimizationRecommendation> {
216 let mut recommendations = Vec::new();
217
218 for layer_name in &self.bottleneck_layers {
220 if let Some(metrics) = self.layer_performance_map.get(layer_name) {
221 recommendations.push(OptimizationRecommendation {
222 layer_name: layer_name.clone(),
223 issue_type: OptimizationIssue::ComputationalBottleneck,
224 severity: self.calculate_bottleneck_severity(metrics),
225 recommendations: vec![
226 format!("Consider optimizing {} layer computation", layer_name),
227 "Check for inefficient operations or memory access patterns".to_string(),
228 "Consider layer-specific optimizations or hardware acceleration"
229 .to_string(),
230 ],
231 expected_improvement: self.estimate_improvement_potential(metrics),
232 });
233 }
234 }
235
236 if self.memory_usage_bytes > 1_000_000_000 {
238 recommendations.push(OptimizationRecommendation {
240 layer_name: "Global".to_string(),
241 issue_type: OptimizationIssue::HighMemoryUsage,
242 severity: OptimizationSeverity::High,
243 recommendations: vec![
244 "Consider gradient checkpointing to reduce memory usage".to_string(),
245 "Optimize batch size and sequence length".to_string(),
246 "Use memory-efficient attention mechanisms".to_string(),
247 ],
248 expected_improvement: 0.3,
249 });
250 }
251
252 if self.throughput_gradients_per_second < 1.0 {
254 recommendations.push(OptimizationRecommendation {
255 layer_name: "Global".to_string(),
256 issue_type: OptimizationIssue::LowThroughput,
257 severity: OptimizationSeverity::Medium,
258 recommendations: vec![
259 "Consider mixed precision training".to_string(),
260 "Optimize data loading and preprocessing pipelines".to_string(),
261 "Use gradient accumulation for larger effective batch sizes".to_string(),
262 ],
263 expected_improvement: 0.4,
264 });
265 }
266
267 recommendations
268 }
269
270 fn calculate_bottleneck_severity(
271 &self,
272 metrics: &LayerPerformanceMetrics,
273 ) -> OptimizationSeverity {
274 let relative_slowness = metrics.average_computation_time.as_secs_f64()
275 / self.average_computation_time.as_secs_f64();
276
277 if relative_slowness > 3.0 {
278 OptimizationSeverity::Critical
279 } else if relative_slowness > 2.0 {
280 OptimizationSeverity::High
281 } else if relative_slowness > 1.5 {
282 OptimizationSeverity::Medium
283 } else {
284 OptimizationSeverity::Low
285 }
286 }
287
288 fn estimate_improvement_potential(&self, metrics: &LayerPerformanceMetrics) -> f64 {
289 let relative_slowness = metrics.average_computation_time.as_secs_f64()
290 / self.average_computation_time.as_secs_f64();
291
292 (relative_slowness - 1.0).min(0.8).max(0.1)
294 }
295
296 pub fn start_monitoring(&mut self) {
298 self.total_gradient_computations = 0;
300 self.average_computation_time = Duration::from_millis(0);
301 self.memory_usage_bytes = 0;
302 self.throughput_gradients_per_second = 0.0;
303 self.bottleneck_layers.clear();
304 self.layer_performance_map.clear();
305
306 self.resource_utilization = ResourceUtilization {
308 cpu_usage_percent: 0.0,
309 memory_usage_percent: 0.0,
310 gpu_usage_percent: 0.0,
311 io_wait_percent: 0.0,
312 };
313 }
314
315 pub fn take_performance_snapshot(&self) -> PerformanceSnapshot {
317 PerformanceSnapshot {
318 timestamp: std::time::SystemTime::now(),
319 total_computations: self.total_gradient_computations,
320 average_time: self.average_computation_time,
321 memory_usage: self.memory_usage_bytes,
322 throughput: self.throughput_gradients_per_second,
323 active_bottlenecks: self.bottleneck_layers.clone(),
324 layer_count: self.layer_performance_map.len(),
325 }
326 }
327}
328
329#[derive(Debug, Clone, Serialize, Deserialize)]
331pub struct LayerPerformanceMetrics {
332 pub layer_name: String,
333 pub computation_count: usize,
334 pub total_computation_time: Duration,
335 pub average_computation_time: Duration,
336 pub total_memory_usage: usize,
337 pub average_memory_usage: usize,
338 pub min_computation_time: Duration,
339 pub max_computation_time: Duration,
340 pub performance_variance: f64,
341}
342
343impl LayerPerformanceMetrics {
344 pub fn new(layer_name: String) -> Self {
345 Self {
346 layer_name,
347 computation_count: 0,
348 total_computation_time: Duration::from_millis(0),
349 average_computation_time: Duration::from_millis(0),
350 total_memory_usage: 0,
351 average_memory_usage: 0,
352 min_computation_time: Duration::from_secs(u64::MAX),
353 max_computation_time: Duration::from_millis(0),
354 performance_variance: 0.0,
355 }
356 }
357
358 pub fn update(&mut self, computation_time: Duration, memory_used: usize) {
359 self.computation_count += 1;
360 self.total_computation_time += computation_time;
361 self.total_memory_usage += memory_used;
362
363 self.average_computation_time = self.total_computation_time / self.computation_count as u32;
364 self.average_memory_usage = self.total_memory_usage / self.computation_count;
365
366 if computation_time < self.min_computation_time {
367 self.min_computation_time = computation_time;
368 }
369 if computation_time > self.max_computation_time {
370 self.max_computation_time = computation_time;
371 }
372
373 self.update_variance(computation_time);
374 }
375
376 fn update_variance(&mut self, new_time: Duration) {
377 if self.computation_count < 2 {
378 self.performance_variance = 0.0;
379 return;
380 }
381
382 let mean = self.average_computation_time.as_secs_f64();
383 let new_value = new_time.as_secs_f64();
384
385 let old_variance = self.performance_variance;
387 let delta = new_value - mean;
388 self.performance_variance = ((self.computation_count - 1) as f64 * old_variance
389 + delta * delta)
390 / self.computation_count as f64;
391 }
392}
393
394#[derive(Debug)]
396pub struct PerformanceTimer {
397 layer_name: String,
398 start_time: Instant,
399}
400
401impl PerformanceTimer {
402 pub fn new(layer_name: String) -> Self {
403 Self {
404 layer_name,
405 start_time: Instant::now(),
406 }
407 }
408
409 pub fn finish(self) -> (String, Duration) {
410 (self.layer_name, self.start_time.elapsed())
411 }
412}
413
414#[derive(Debug, Clone, Serialize, Deserialize)]
416pub struct ResourceUtilization {
417 pub cpu_usage_percent: f64,
418 pub gpu_usage_percent: f64,
419 pub memory_usage_percent: f64,
420 pub io_wait_percent: f64,
421}
422
423impl Default for ResourceUtilization {
424 fn default() -> Self {
425 Self {
426 cpu_usage_percent: 0.0,
427 gpu_usage_percent: 0.0,
428 memory_usage_percent: 0.0,
429 io_wait_percent: 0.0,
430 }
431 }
432}
433
434#[derive(Debug, Clone, Serialize, Deserialize)]
436pub struct PerformanceSnapshot {
437 pub timestamp: std::time::SystemTime,
438 pub total_computations: usize,
439 pub average_time: Duration,
440 pub memory_usage: usize,
441 pub throughput: f64,
442 pub active_bottlenecks: Vec<String>,
443 pub layer_count: usize,
444}
445
446#[derive(Debug, Clone, Serialize, Deserialize)]
448pub struct PerformanceTrends {
449 pub throughput_trend: TrendDirection,
450 pub memory_trend: TrendDirection,
451 pub bottleneck_stability: BottleneckStability,
452 pub overall_performance_direction: PerformanceDirection,
453}
454
455impl Default for PerformanceTrends {
456 fn default() -> Self {
457 Self {
458 throughput_trend: TrendDirection::Stable,
459 memory_trend: TrendDirection::Stable,
460 bottleneck_stability: BottleneckStability::Stable,
461 overall_performance_direction: PerformanceDirection::Stable,
462 }
463 }
464}
465
466#[derive(Debug, Clone, Serialize, Deserialize)]
467pub enum TrendDirection {
468 Improving,
469 Stable,
470 Degrading,
471}
472
473#[derive(Debug, Clone, Serialize, Deserialize)]
474pub enum BottleneckStability {
475 Stable,
476 Moderate,
477 Unstable,
478}
479
480#[derive(Debug, Clone, Serialize, Deserialize)]
481pub enum PerformanceDirection {
482 Improving,
483 Stable,
484 Degrading,
485}
486
487#[derive(Debug, Clone, Serialize, Deserialize)]
489pub struct OptimizationRecommendation {
490 pub layer_name: String,
491 pub issue_type: OptimizationIssue,
492 pub severity: OptimizationSeverity,
493 pub recommendations: Vec<String>,
494 pub expected_improvement: f64,
495}
496
497#[derive(Debug, Clone, Serialize, Deserialize)]
498pub enum OptimizationIssue {
499 ComputationalBottleneck,
500 HighMemoryUsage,
501 LowThroughput,
502 ResourceUnderutilization,
503}
504
505#[derive(Debug, Clone, Serialize, Deserialize)]
506pub enum OptimizationSeverity {
507 Low,
508 Medium,
509 High,
510 Critical,
511}