1use super::anomaly_detection::*;
8use super::conflict_analysis::*;
9use super::enhanced_analysis::*;
10use super::monitoring::*;
11use super::performance_tracking::*;
12use super::types::*;
13use super::visualization::*;
14use crate::DebugConfig;
15use anyhow::Result;
16use serde::{Deserialize, Serialize};
17use std::collections::HashMap;
18
19#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct FlowAnalysis {
22 pub layer_analyses: HashMap<String, LayerFlowAnalysis>,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct LayerFlowAnalysis {
28 pub layer_name: String,
29 pub is_vanishing: bool,
30 pub is_exploding: bool,
31 pub gradient_norm: f64,
32 pub flow_consistency: f64,
33}
34
35#[derive(Debug)]
37pub struct GradientDebugger {
38 #[allow(dead_code)]
39 config: DebugConfig,
40 gradient_config: GradientDebugConfig,
41 gradient_histories: HashMap<String, GradientHistory>,
42 current_step: usize,
43 alerts: Vec<GradientAlert>,
44 layer_no_gradient_count: HashMap<String, usize>,
45
46 adaptive_thresholds: HashMap<String, AdaptiveThresholds>,
48 real_time_monitors: HashMap<String, RealTimeGradientMonitor>,
49 anomaly_detector: GradientAnomalyDetector,
50 performance_tracker: GradientPerformanceTracker,
51 conflict_analyzer: GradientConflictAnalyzer,
52 flow_visualizer: GradientFlowVisualizer,
53 enhanced_analyzer: EnhancedGradientAnalyzer,
54}
55
56impl GradientDebugger {
57 pub fn new(config: DebugConfig) -> Self {
59 let gradient_config = GradientDebugConfig::default();
60
61 Self {
62 config,
63 gradient_config: gradient_config.clone(),
64 gradient_histories: HashMap::new(),
65 current_step: 0,
66 alerts: Vec::new(),
67 layer_no_gradient_count: HashMap::new(),
68 adaptive_thresholds: HashMap::new(),
69 real_time_monitors: HashMap::new(),
70 anomaly_detector: GradientAnomalyDetector::default(),
71 performance_tracker: GradientPerformanceTracker::default(),
72 conflict_analyzer: GradientConflictAnalyzer::default(),
73 flow_visualizer: GradientFlowVisualizer::default(),
74 enhanced_analyzer: EnhancedGradientAnalyzer::default(),
75 }
76 }
77
78 pub fn with_gradient_config(config: DebugConfig, gradient_config: GradientDebugConfig) -> Self {
80 Self {
81 config,
82 gradient_config: gradient_config.clone(),
83 gradient_histories: HashMap::new(),
84 current_step: 0,
85 alerts: Vec::new(),
86 layer_no_gradient_count: HashMap::new(),
87 adaptive_thresholds: HashMap::new(),
88 real_time_monitors: HashMap::new(),
89 anomaly_detector: GradientAnomalyDetector::default(),
90 performance_tracker: GradientPerformanceTracker::default(),
91 conflict_analyzer: GradientConflictAnalyzer::default(),
92 flow_visualizer: GradientFlowVisualizer::default(),
93 enhanced_analyzer: EnhancedGradientAnalyzer::default(),
94 }
95 }
96
97 pub fn record_gradient_flow(
99 &mut self,
100 layer_name: &str,
101 gradient_norm: f64,
102 gradient_mean: f64,
103 gradient_std: f64,
104 ) -> Result<()> {
105 let flow = GradientFlow {
106 layer_name: layer_name.to_string(),
107 step: self.current_step,
108 gradient_norm,
109 gradient_mean,
110 gradient_std,
111 gradient_max: gradient_mean + gradient_std,
112 gradient_min: gradient_mean - gradient_std,
113 dead_neurons_ratio: self.estimate_dead_neurons_ratio(gradient_norm),
114 active_neurons_ratio: 1.0 - self.estimate_dead_neurons_ratio(gradient_norm),
115 timestamp: chrono::Utc::now(),
116 };
117
118 {
120 let history = self
121 .gradient_histories
122 .entry(layer_name.to_string())
123 .or_insert_with(|| GradientHistory::new(layer_name.to_string(), 1000));
124 history.add_gradient_flow(&flow);
125 }
126
127 let thresholds =
129 self.adaptive_thresholds.entry(layer_name.to_string()).or_insert_with(|| {
130 AdaptiveThresholds::new(
131 layer_name.to_string(),
132 self.gradient_config.vanishing_threshold,
133 self.gradient_config.exploding_threshold,
134 )
135 });
136 thresholds.update_thresholds(gradient_norm);
137
138 let monitor = self
140 .real_time_monitors
141 .entry(layer_name.to_string())
142 .or_insert_with(|| RealTimeGradientMonitor::new(layer_name.to_string()));
143 monitor.update(gradient_norm);
144
145 self.check_gradient_alerts(layer_name, &flow)?;
147
148 let timer = self.performance_tracker.start_timing(layer_name);
150 let (_, computation_time) = timer.finish();
151 self.performance_tracker
152 .record_layer_performance(layer_name, computation_time, 0); let anomalies =
156 self.anomaly_detector
157 .detect_anomalies(layer_name, gradient_norm, self.current_step);
158 for anomaly in anomalies {
159 self.alerts.push(GradientAlert::GradientOscillation {
160 layer_name: anomaly.layer_name,
161 variance: anomaly.severity,
162 });
163 }
164
165 if let Some(history) = self.gradient_histories.get(layer_name) {
167 if history.gradient_norms.len() == 50 {
168 let gradient_values: Vec<f64> = history.gradient_norms.iter().cloned().collect();
169 self.anomaly_detector.establish_baseline(layer_name, &gradient_values);
170 }
171 }
172
173 Ok(())
174 }
175
176 pub fn get_status(&self) -> GradientDebugStatus {
178 let layer_statuses: HashMap<String, LayerGradientStatus> = self
179 .gradient_histories
180 .iter()
181 .map(|(layer_name, history)| {
182 let status = self.compute_layer_status(layer_name, history);
183 (layer_name.clone(), status)
184 })
185 .collect();
186
187 let overall_health = self.compute_overall_health(&layer_statuses);
188 let recent_alerts: Vec<GradientAlert> =
189 self.alerts.iter().rev().take(10).cloned().collect();
190
191 GradientDebugStatus {
192 current_step: self.current_step,
193 overall_health,
194 layer_statuses,
195 recent_alerts,
196 total_alerts: self.alerts.len(),
197 active_layers: self.gradient_histories.len(),
198 }
199 }
200
201 fn generate_flow_analysis(&self) -> FlowAnalysis {
203 let mut layer_analyses = HashMap::new();
204
205 for (layer_name, history) in &self.gradient_histories {
206 let latest_gradient = history.gradient_norms.back().cloned().unwrap_or(0.0);
207
208 let is_vanishing = latest_gradient < 1e-8
210 || (history.gradient_norms.len() > 5
211 && history.gradient_norms.iter().rev().take(5).all(|&g| g < 1e-6));
212
213 let is_exploding = latest_gradient > 100.0
214 || (history.gradient_norms.len() > 3
215 && history.gradient_norms.iter().rev().take(3).any(|&g| g > 50.0));
216
217 let flow_consistency = if history.gradient_norms.len() > 1 {
219 let mean = history.gradient_norms.iter().sum::<f64>()
220 / history.gradient_norms.len() as f64;
221 let variance =
222 history.gradient_norms.iter().map(|&x| (x - mean).powi(2)).sum::<f64>()
223 / history.gradient_norms.len() as f64;
224 1.0 / (1.0 + variance) } else {
226 1.0
227 };
228
229 layer_analyses.insert(
230 layer_name.clone(),
231 LayerFlowAnalysis {
232 layer_name: layer_name.clone(),
233 is_vanishing,
234 is_exploding,
235 gradient_norm: latest_gradient,
236 flow_consistency,
237 },
238 );
239 }
240
241 FlowAnalysis { layer_analyses }
242 }
243
244 pub fn generate_comprehensive_report(&self) -> Result<ComprehensiveGradientReport> {
246 let status = self.get_status();
247 let conflict_analysis = self.conflict_analyzer.analyze_conflicts(&self.gradient_histories);
248 let visualization = self
249 .flow_visualizer
250 .generate_visualization(&self.gradient_histories, self.current_step);
251 let enhanced_analysis =
252 self.enhanced_analyzer.generate_enhanced_analysis(&self.gradient_histories);
253 let performance_snapshot = self.performance_tracker.take_performance_snapshot();
254 let anomaly_summary = self.anomaly_detector.get_anomaly_summary(None);
255
256 let flow_analysis = self.generate_flow_analysis();
257
258 Ok(ComprehensiveGradientReport {
259 timestamp: chrono::Utc::now(),
260 status,
261 conflict_analysis,
262 visualization,
263 enhanced_analysis,
264 flow_analysis,
265 performance_snapshot,
266 anomaly_summary,
267 recommendations: self.generate_comprehensive_recommendations()?,
268 })
269 }
270
271 pub fn analyze_gradient_conflicts(&self) -> GradientConflictAnalysis {
273 self.conflict_analyzer.analyze_conflicts(&self.gradient_histories)
274 }
275
276 pub fn generate_gradient_flow_visualization(&self) -> GradientFlowVisualization {
278 self.flow_visualizer
279 .generate_visualization(&self.gradient_histories, self.current_step)
280 }
281
282 pub fn generate_enhanced_layer_analysis(&self) -> EnhancedLayerGradientAnalysis {
284 self.enhanced_analyzer.generate_enhanced_analysis(&self.gradient_histories)
285 }
286
287 pub fn get_performance_insights(&self) -> PerformanceInsights {
289 let trends = self.performance_tracker.get_performance_trends();
290 let recommendations = self.performance_tracker.generate_optimization_recommendations();
291 let bottlenecks = self.performance_tracker.bottleneck_layers.clone();
292
293 PerformanceInsights {
294 trends,
295 recommendations,
296 bottlenecks,
297 current_throughput: self.performance_tracker.throughput_gradients_per_second,
298 memory_usage: self.performance_tracker.memory_usage_bytes,
299 }
300 }
301
302 pub fn next_step(&mut self) {
304 self.current_step += 1;
305
306 if self.alerts.len() > 100 {
308 self.alerts.drain(0..self.alerts.len() - 100);
309 }
310
311 for (layer_name, history) in &self.gradient_histories {
313 if let Some(latest_norm) = history.gradient_norms.back() {
314 if *latest_norm < 1e-8 {
315 *self.layer_no_gradient_count.entry(layer_name.clone()).or_insert(0) += 1;
316 } else {
317 self.layer_no_gradient_count.insert(layer_name.clone(), 0);
318 }
319 }
320 }
321
322 for (layer_name, &count) in &self.layer_no_gradient_count {
324 if count >= self.gradient_config.no_gradient_steps_threshold {
325 self.alerts.push(GradientAlert::NoGradientFlow {
326 layer_name: layer_name.clone(),
327 steps_without_gradient: count,
328 });
329 }
330 }
331 }
332
333 pub fn reset(&mut self) {
335 self.gradient_histories.clear();
336 self.current_step = 0;
337 self.alerts.clear();
338 self.layer_no_gradient_count.clear();
339 self.adaptive_thresholds.clear();
340 self.real_time_monitors.clear();
341 self.anomaly_detector = GradientAnomalyDetector::default();
342 self.performance_tracker = GradientPerformanceTracker::default();
343 }
344
345 pub fn get_layer_alerts(&self, layer_name: &str) -> Vec<&GradientAlert> {
347 self.alerts
348 .iter()
349 .filter(|alert| match alert {
350 GradientAlert::VanishingGradients {
351 layer_name: name, ..
352 } => name == layer_name,
353 GradientAlert::ExplodingGradients {
354 layer_name: name, ..
355 } => name == layer_name,
356 GradientAlert::DeadNeurons {
357 layer_name: name, ..
358 } => name == layer_name,
359 GradientAlert::GradientOscillation {
360 layer_name: name, ..
361 } => name == layer_name,
362 GradientAlert::NoGradientFlow {
363 layer_name: name, ..
364 } => name == layer_name,
365 })
366 .collect()
367 }
368
369 pub fn get_layer_history(&self, layer_name: &str) -> Option<&GradientHistory> {
371 self.gradient_histories.get(layer_name)
372 }
373
374 pub fn get_monitored_layers(&self) -> Vec<&String> {
376 self.gradient_histories.keys().collect()
377 }
378
379 fn estimate_dead_neurons_ratio(&self, gradient_norm: f64) -> f64 {
382 if gradient_norm < 1e-6 {
384 0.9 } else if gradient_norm < 1e-4 {
386 0.3 } else {
388 0.05 }
390 }
391
392 fn check_gradient_alerts(&mut self, layer_name: &str, flow: &GradientFlow) -> Result<()> {
393 if let Some(thresholds) = self.adaptive_thresholds.get(layer_name) {
395 let threshold_alerts = thresholds.check_thresholds(flow.gradient_norm);
396 self.alerts.extend(threshold_alerts);
397 } else {
398 if flow.gradient_norm < self.gradient_config.vanishing_threshold {
400 self.alerts.push(GradientAlert::VanishingGradients {
401 layer_name: layer_name.to_string(),
402 norm: flow.gradient_norm,
403 threshold: self.gradient_config.vanishing_threshold,
404 });
405 }
406
407 if flow.gradient_norm > self.gradient_config.exploding_threshold {
408 self.alerts.push(GradientAlert::ExplodingGradients {
409 layer_name: layer_name.to_string(),
410 norm: flow.gradient_norm,
411 threshold: self.gradient_config.exploding_threshold,
412 });
413 }
414 }
415
416 if flow.dead_neurons_ratio > self.gradient_config.dead_neuron_threshold {
418 self.alerts.push(GradientAlert::DeadNeurons {
419 layer_name: layer_name.to_string(),
420 ratio: flow.dead_neurons_ratio,
421 threshold: self.gradient_config.dead_neuron_threshold,
422 });
423 }
424
425 if let Some(monitor) = self.real_time_monitors.get(layer_name) {
427 if monitor.is_oscillating() {
428 self.alerts.push(GradientAlert::GradientOscillation {
429 layer_name: layer_name.to_string(),
430 variance: monitor.get_stability_score(),
431 });
432 }
433 }
434
435 Ok(())
436 }
437
438 fn compute_layer_status(
439 &self,
440 layer_name: &str,
441 history: &GradientHistory,
442 ) -> LayerGradientStatus {
443 let latest_norm = history.gradient_norms.back().cloned().unwrap_or(0.0);
444 let health = self.classify_layer_health(layer_name, history);
445 let alerts = self.get_layer_alerts(layer_name).len();
446 let trend = history.get_trend_slope().unwrap_or(0.0);
447
448 LayerGradientStatus {
449 layer_name: layer_name.to_string(),
450 health,
451 latest_gradient_norm: latest_norm,
452 gradient_trend: trend,
453 alert_count: alerts,
454 steps_recorded: history.gradient_norms.len(),
455 }
456 }
457
458 fn classify_layer_health(&self, layer_name: &str, history: &GradientHistory) -> LayerHealth {
459 let latest_norm = history.gradient_norms.back().cloned().unwrap_or(0.0);
460 let alert_count = self.get_layer_alerts(layer_name).len();
461
462 if latest_norm < 1e-7 || latest_norm > 100.0 || alert_count > 3 {
463 LayerHealth::Critical
464 } else if latest_norm < 1e-5 || latest_norm > 10.0 || alert_count > 0 {
465 LayerHealth::Warning
466 } else {
467 LayerHealth::Healthy
468 }
469 }
470
471 fn compute_overall_health(
472 &self,
473 layer_statuses: &HashMap<String, LayerGradientStatus>,
474 ) -> LayerHealth {
475 if layer_statuses.is_empty() {
476 return LayerHealth::Healthy;
477 }
478
479 let critical_count =
480 layer_statuses.values().filter(|s| s.health == LayerHealth::Critical).count();
481 let warning_count =
482 layer_statuses.values().filter(|s| s.health == LayerHealth::Warning).count();
483 let total = layer_statuses.len();
484
485 if critical_count > 0 || warning_count as f64 / total as f64 > 0.5 {
486 LayerHealth::Critical
487 } else if warning_count > 0 {
488 LayerHealth::Warning
489 } else {
490 LayerHealth::Healthy
491 }
492 }
493
494 fn generate_comprehensive_recommendations(&self) -> Result<Vec<GradientRecommendation>> {
495 let mut recommendations = Vec::new();
496
497 let perf_recs = self.performance_tracker.generate_optimization_recommendations();
499 for rec in perf_recs {
500 recommendations.push(GradientRecommendation {
501 recommendation_type: RecommendationType::Performance,
502 title: rec.layer_name,
503 description: format!("{:?}: {}", rec.issue_type, rec.recommendations.join(", ")),
504 priority: match rec.severity {
505 OptimizationSeverity::Critical => GradientRecommendationPriority::High,
506 OptimizationSeverity::High => GradientRecommendationPriority::High,
507 OptimizationSeverity::Medium => GradientRecommendationPriority::Medium,
508 OptimizationSeverity::Low => GradientRecommendationPriority::Low,
509 },
510 expected_impact: rec.expected_improvement,
511 });
512 }
513
514 let conflict_analysis = self.conflict_analyzer.analyze_conflicts(&self.gradient_histories);
516 for strategy in conflict_analysis.mitigation_strategies {
517 recommendations.push(GradientRecommendation {
518 recommendation_type: RecommendationType::Conflict,
519 title: strategy.strategy_name,
520 description: strategy.description,
521 priority: match strategy.implementation_complexity {
522 MitigationComplexity::Simple => GradientRecommendationPriority::High,
523 MitigationComplexity::Moderate => GradientRecommendationPriority::Medium,
524 MitigationComplexity::Complex => GradientRecommendationPriority::Medium,
525 MitigationComplexity::RequiresArchitectureChange => {
526 GradientRecommendationPriority::Low
527 },
528 },
529 expected_impact: strategy.effectiveness,
530 });
531 }
532
533 let anomaly_summary = self.anomaly_detector.get_anomaly_summary(None);
535 for rec_text in anomaly_summary.recommendations {
536 recommendations.push(GradientRecommendation {
537 recommendation_type: RecommendationType::Anomaly,
538 title: "Anomaly Mitigation".to_string(),
539 description: rec_text,
540 priority: if anomaly_summary.average_severity > 0.7 {
541 GradientRecommendationPriority::High
542 } else {
543 GradientRecommendationPriority::Medium
544 },
545 expected_impact: 1.0 - anomaly_summary.average_severity,
546 });
547 }
548
549 recommendations.sort_by(|a, b| {
551 let priority_cmp = b.priority.cmp(&a.priority);
552 if priority_cmp == std::cmp::Ordering::Equal {
553 b.expected_impact.partial_cmp(&a.expected_impact).unwrap()
554 } else {
555 priority_cmp
556 }
557 });
558
559 Ok(recommendations)
560 }
561
562 pub fn generate_recommendations(&self) -> Result<Vec<GradientRecommendation>> {
564 self.generate_comprehensive_recommendations()
565 }
566
567 pub async fn start(&mut self) -> Result<()> {
569 self.performance_tracker.start_monitoring();
571
572 self.current_step = 0;
574 self.alerts.clear();
575
576 for (layer_name, history) in &self.gradient_histories {
578 if !history.gradient_norms.is_empty() {
579 let thresholds = AdaptiveThresholds::from_history(history);
580 self.adaptive_thresholds.insert(layer_name.clone(), thresholds);
581 }
582 }
583
584 Ok(())
585 }
586
587 pub async fn generate_report(&self) -> Result<ComprehensiveGradientReport> {
589 let status = GradientDebugStatus {
590 current_step: self.current_step,
591 overall_health: self.evaluate_overall_health(),
592 layer_statuses: self.get_layer_statuses(),
593 recent_alerts: self.alerts.iter().rev().take(10).cloned().collect(),
594 total_alerts: self.alerts.len(),
595 active_layers: self.gradient_histories.len(),
596 };
597
598 let conflict_analysis = self.conflict_analyzer.analyze_conflicts(&self.gradient_histories);
599 let visualization = self.flow_visualizer.create_visualization(&self.gradient_histories);
600 let enhanced_analysis = self.enhanced_analyzer.analyze_gradients(&self.gradient_histories);
601 let performance_snapshot = self.performance_tracker.take_performance_snapshot();
602 let anomaly_summary = self.anomaly_detector.get_anomaly_summary(None);
603 let recommendations = self.generate_recommendations().unwrap_or_default();
604
605 let flow_analysis = self.generate_flow_analysis();
606
607 Ok(ComprehensiveGradientReport {
608 timestamp: chrono::Utc::now(),
609 status,
610 conflict_analysis,
611 visualization,
612 enhanced_analysis,
613 flow_analysis,
614 performance_snapshot,
615 anomaly_summary,
616 recommendations,
617 })
618 }
619
620 pub async fn quick_analysis(&self) -> Result<GradientQuickAnalysis> {
622 let mut problematic_layers = Vec::new();
623 let mut total_gradients = 0f64;
624 let mut active_layers = 0;
625
626 for (layer_name, history) in &self.gradient_histories {
627 if !history.gradient_norms.is_empty() {
628 active_layers += 1;
629 let latest_norm = history.gradient_norms.back().unwrap();
630 total_gradients += latest_norm;
631
632 if *latest_norm < 1e-8 {
634 problematic_layers.push(format!("{}: Vanishing gradients", layer_name));
635 } else if *latest_norm > 100.0 {
636 problematic_layers.push(format!("{}: Exploding gradients", layer_name));
637 }
638 }
639 }
640
641 let average_gradient =
642 if active_layers > 0 { total_gradients / active_layers as f64 } else { 0.0 };
643
644 let health_score = self.calculate_quick_health_score();
645
646 Ok(GradientQuickAnalysis {
647 overall_health: if health_score > 0.8 {
648 LayerHealth::Healthy
649 } else if health_score > 0.5 {
650 LayerHealth::Warning
651 } else {
652 LayerHealth::Critical
653 },
654 active_layers,
655 problematic_layers,
656 average_gradient_norm: average_gradient,
657 recent_alerts_count: self.alerts.len(),
658 timestamp: chrono::Utc::now(),
659 })
660 }
661
662 fn evaluate_overall_health(&self) -> LayerHealth {
664 if self.gradient_histories.is_empty() {
665 return LayerHealth::Unknown;
666 }
667
668 let mut healthy_count = 0;
669 let mut warning_count = 0;
670 let mut critical_count = 0;
671
672 for history in self.gradient_histories.values() {
673 if let Some(latest_norm) = history.gradient_norms.back() {
674 if *latest_norm < 1e-8 || *latest_norm > 100.0 {
675 critical_count += 1;
676 } else if *latest_norm < 1e-6 || *latest_norm > 10.0 {
677 warning_count += 1;
678 } else {
679 healthy_count += 1;
680 }
681 }
682 }
683
684 let total = healthy_count + warning_count + critical_count;
685 let critical_ratio = critical_count as f64 / total as f64;
686 let warning_ratio = (warning_count + critical_count) as f64 / total as f64;
687
688 if critical_ratio > 0.3 {
689 LayerHealth::Critical
690 } else if warning_ratio > 0.5 {
691 LayerHealth::Warning
692 } else {
693 LayerHealth::Healthy
694 }
695 }
696
697 fn get_layer_statuses(&self) -> HashMap<String, LayerGradientStatus> {
699 let mut statuses = HashMap::new();
700
701 for (layer_name, history) in &self.gradient_histories {
702 let status = if let Some(latest_norm) = history.gradient_norms.back() {
703 LayerGradientStatus {
704 layer_name: layer_name.clone(),
705 latest_gradient_norm: *latest_norm,
706 gradient_trend: self.calculate_trend_value(history),
707 health: if *latest_norm < 1e-8 {
708 LayerHealth::Critical
709 } else if *latest_norm > 100.0 {
710 LayerHealth::Critical
711 } else if *latest_norm < 1e-6 || *latest_norm > 10.0 {
712 LayerHealth::Warning
713 } else {
714 LayerHealth::Healthy
715 },
716 alert_count: self.get_layer_alerts(layer_name).len(),
717 steps_recorded: history.gradient_norms.len(),
718 }
719 } else {
720 LayerGradientStatus {
721 layer_name: layer_name.clone(),
722 latest_gradient_norm: 0.0,
723 gradient_trend: 0.0,
724 health: LayerHealth::Unknown,
725 alert_count: 0,
726 steps_recorded: 0,
727 }
728 };
729
730 statuses.insert(layer_name.clone(), status);
731 }
732
733 statuses
734 }
735
736 #[allow(dead_code)]
738 fn calculate_trend(&self, history: &GradientHistory) -> GradientTrend {
739 if history.gradient_norms.len() < 3 {
740 return GradientTrend::Unknown;
741 }
742
743 let recent: Vec<f64> = history.gradient_norms.iter().rev().take(3).cloned().collect();
744
745 if recent[0] > recent[1] && recent[1] > recent[2] {
746 GradientTrend::Increasing
747 } else if recent[0] < recent[1] && recent[1] < recent[2] {
748 GradientTrend::Decreasing
749 } else {
750 GradientTrend::Stable
751 }
752 }
753
754 fn calculate_trend_value(&self, history: &GradientHistory) -> f64 {
756 if history.gradient_norms.len() < 2 {
757 return 0.0;
758 }
759
760 let recent: Vec<f64> = history.gradient_norms.iter().rev().take(10).cloned().collect();
761 if recent.len() < 2 {
762 return 0.0;
763 }
764
765 let n = recent.len() as f64;
767 let sum_x = (0..recent.len()).sum::<usize>() as f64;
768 let sum_y = recent.iter().sum::<f64>();
769 let sum_xy = recent.iter().enumerate().map(|(i, &y)| i as f64 * y).sum::<f64>();
770 let sum_x2 = (0..recent.len()).map(|i| (i * i) as f64).sum::<f64>();
771
772 let slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x * sum_x);
773 slope
774 }
775
776 fn calculate_quick_health_score(&self) -> f64 {
778 if self.gradient_histories.is_empty() {
779 return 0.0;
780 }
781
782 let mut score = 0.0;
783 let mut count = 0;
784
785 for history in self.gradient_histories.values() {
786 if let Some(latest_norm) = history.gradient_norms.back() {
787 let norm_score = if *latest_norm >= 1e-4 && *latest_norm <= 1.0 {
789 1.0
790 } else if *latest_norm >= 1e-6 && *latest_norm <= 10.0 {
791 0.7
792 } else if *latest_norm >= 1e-8 && *latest_norm <= 100.0 {
793 0.3
794 } else {
795 0.0
796 };
797
798 score += norm_score;
799 count += 1;
800 }
801 }
802
803 if count == 0 {
804 0.0
805 } else {
806 score / count as f64
807 }
808 }
809}
810
811#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
813pub struct GradientDebugStatus {
814 pub current_step: usize,
815 pub overall_health: LayerHealth,
816 pub layer_statuses: HashMap<String, LayerGradientStatus>,
817 pub recent_alerts: Vec<GradientAlert>,
818 pub total_alerts: usize,
819 pub active_layers: usize,
820}
821
822#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
824pub struct ComprehensiveGradientReport {
825 pub timestamp: chrono::DateTime<chrono::Utc>,
826 pub status: GradientDebugStatus,
827 pub conflict_analysis: GradientConflictAnalysis,
828 pub visualization: GradientFlowVisualization,
829 pub enhanced_analysis: EnhancedLayerGradientAnalysis,
830 pub flow_analysis: FlowAnalysis,
831 pub performance_snapshot: PerformanceSnapshot,
832 pub anomaly_summary: AnomalySummary,
833 pub recommendations: Vec<GradientRecommendation>,
834}
835
836impl ComprehensiveGradientReport {
837 pub fn has_vanishing_gradients(&self) -> bool {
839 for (_, layer_status) in &self.status.layer_statuses {
841 if layer_status.latest_gradient_norm < 1e-8 {
842 return true;
843 }
844 }
845
846 for anomaly in &self.anomaly_summary.anomalies {
848 if matches!(
849 anomaly.anomaly_type,
850 crate::anomaly_detector::AnomalyType::GradientVanishing
851 ) {
852 return true;
853 }
854 }
855
856 false
857 }
858
859 pub fn has_exploding_gradients(&self) -> bool {
861 for (_, layer_status) in &self.status.layer_statuses {
863 if layer_status.latest_gradient_norm > 100.0 {
864 return true;
865 }
866 }
867
868 for anomaly in &self.anomaly_summary.anomalies {
870 if matches!(
871 anomaly.anomaly_type,
872 crate::anomaly_detector::AnomalyType::GradientExplosion
873 | crate::anomaly_detector::AnomalyType::NumericalInstability
874 ) {
875 return true;
876 }
877 }
878
879 false
880 }
881}
882
883#[derive(Debug, Clone)]
885pub struct PerformanceInsights {
886 pub trends: PerformanceTrends,
887 pub recommendations: Vec<OptimizationRecommendation>,
888 pub bottlenecks: Vec<String>,
889 pub current_throughput: f64,
890 pub memory_usage: usize,
891}
892
893#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
895pub struct GradientRecommendation {
896 pub recommendation_type: RecommendationType,
897 pub title: String,
898 pub description: String,
899 pub priority: GradientRecommendationPriority,
900 pub expected_impact: f64,
901}
902
903#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
904pub enum RecommendationType {
905 Performance,
906 Conflict,
907 Anomaly,
908 Architecture,
909 Optimization,
910}
911
912#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, serde::Serialize, serde::Deserialize)]
913pub enum GradientRecommendationPriority {
914 Low,
915 Medium,
916 High,
917}
918
919#[derive(Debug, Clone)]
921pub struct GradientQuickAnalysis {
922 pub overall_health: LayerHealth,
923 pub active_layers: usize,
924 pub problematic_layers: Vec<String>,
925 pub average_gradient_norm: f64,
926 pub recent_alerts_count: usize,
927 pub timestamp: chrono::DateTime<chrono::Utc>,
928}
929
930#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
932pub struct LayerGradientStatus {
933 pub layer_name: String,
934 pub health: LayerHealth,
935 pub latest_gradient_norm: f64,
936 pub gradient_trend: f64,
937 pub alert_count: usize,
938 pub steps_recorded: usize,
939}
940
941#[derive(Debug, Clone, PartialEq, Eq)]
943pub enum GradientTrend {
944 Unknown,
945 Increasing,
946 Decreasing,
947 Stable,
948}