1use super::types::*;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct EnhancedLayerGradientAnalysis {
14 pub layer_details: HashMap<String, LayerGradientDetails>,
15 pub network_level_analysis: NetworkLevelAnalysis,
16 pub gradient_hierarchy: GradientHierarchy,
17 pub optimization_priorities: Vec<OptimizationPriority>,
18}
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct LayerGradientDetails {
23 pub layer_name: String,
24 pub gradient_statistics: GradientStatistics,
25 pub flow_characteristics: FlowCharacteristics,
26 pub health_metrics: LayerHealthMetrics,
27 pub optimization_suggestions: Vec<LayerOptimizationSuggestion>,
28 pub comparative_analysis: ComparativeAnalysis,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct NetworkLevelAnalysis {
34 pub overall_gradient_health: LayerHealth,
35 pub gradient_distribution: GradientDistribution,
36 pub layer_interactions: Vec<LayerInteraction>,
37 pub convergence_indicators: ConvergenceIndicators,
38 pub training_dynamics: TrainingDynamics,
39 pub stability_assessment: StabilityAssessment,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct GradientDistribution {
45 pub mean_gradient_norm: f64,
46 pub gradient_variance: f64,
47 pub gradient_skewness: f64,
48 pub gradient_kurtosis: f64,
49 pub layer_gradient_ratios: HashMap<String, f64>,
50 pub distribution_type: DistributionType,
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub enum DistributionType {
55 Normal,
56 Skewed,
57 HeavyTailed,
58 Multimodal,
59 Degenerate,
60}
61
62#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct LayerInteraction {
65 pub layer1: String,
66 pub layer2: String,
67 pub interaction_strength: f64,
68 pub interaction_type: InteractionType,
69 pub impact_score: f64,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub enum InteractionType {
74 Cooperative,
75 Competitive,
76 Neutral,
77 Disruptive,
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct ConvergenceIndicators {
83 pub gradient_convergence_score: f64,
84 pub parameter_convergence_score: f64,
85 pub loss_convergence_score: f64,
86 pub convergence_trend: ConvergenceTrend,
87 pub estimated_steps_to_convergence: Option<usize>,
88}
89
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub enum ConvergenceTrend {
92 Converging,
93 Stable,
94 Diverging,
95 Oscillating,
96 Unknown,
97}
98
99#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct TrainingDynamics {
102 pub learning_phase: LearningPhase,
103 pub gradient_momentum: f64,
104 pub learning_velocity: f64,
105 pub adaptation_rate: f64,
106 pub plateau_detection: PlateauDetection,
107}
108
109#[derive(Debug, Clone, Serialize, Deserialize)]
110pub enum LearningPhase {
111 InitialLearning,
112 RapidLearning,
113 Refinement,
114 Convergence,
115 Plateau,
116 Overfitting,
117}
118
119#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct PlateauDetection {
122 pub is_plateau: bool,
123 pub plateau_duration: usize,
124 pub plateau_severity: PlateauSeverity,
125 pub suggested_actions: Vec<String>,
126}
127
128#[derive(Debug, Clone, Serialize, Deserialize)]
129pub enum PlateauSeverity {
130 Mild,
131 Moderate,
132 Severe,
133}
134
135#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct StabilityAssessment {
138 pub overall_stability: f64,
139 pub stability_trend: StabilityTrend,
140 pub instability_sources: Vec<InstabilitySource>,
141 pub stability_forecast: StabilityForecast,
142}
143
144#[derive(Debug, Clone, Serialize, Deserialize)]
145pub enum StabilityTrend {
146 Improving,
147 Stable,
148 Degrading,
149}
150
151#[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct InstabilitySource {
154 pub source_type: InstabilityType,
155 pub affected_layers: Vec<String>,
156 pub severity: f64,
157 pub description: String,
158}
159
160#[derive(Debug, Clone, Serialize, Deserialize)]
161pub enum InstabilityType {
162 GradientExplosion,
163 GradientVanishing,
164 Oscillation,
165 Stagnation,
166 Chaos,
167}
168
169#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct StabilityForecast {
172 pub short_term_outlook: StabilityOutlook,
173 pub long_term_outlook: StabilityOutlook,
174 pub confidence_level: f64,
175 pub recommended_monitoring: Vec<String>,
176}
177
178#[derive(Debug, Clone, Serialize, Deserialize)]
179pub enum StabilityOutlook {
180 Stable,
181 Improving,
182 Deteriorating,
183 Uncertain,
184}
185
186#[derive(Debug, Clone, Serialize, Deserialize)]
188pub struct GradientHierarchy {
189 pub layer_groups: Vec<LayerGroup>,
190 pub hierarchy_levels: Vec<HierarchyLevel>,
191 pub cross_level_interactions: Vec<CrossLevelInteraction>,
192}
193
194#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct LayerGroup {
197 pub group_name: String,
198 pub layers: Vec<String>,
199 pub group_characteristics: GroupCharacteristics,
200 pub internal_coherence: f64,
201}
202
203#[derive(Debug, Clone, Serialize, Deserialize)]
205pub struct GroupCharacteristics {
206 pub average_gradient_norm: f64,
207 pub gradient_synchronization: f64,
208 pub learning_rate_sensitivity: f64,
209 pub optimization_difficulty: OptimizationDifficulty,
210}
211
212#[derive(Debug, Clone, Serialize, Deserialize)]
213pub enum OptimizationDifficulty {
214 Easy,
215 Moderate,
216 Difficult,
217 VeryDifficult,
218}
219
220#[derive(Debug, Clone, Serialize, Deserialize)]
222pub struct HierarchyLevel {
223 pub level_id: usize,
224 pub level_name: String,
225 pub layer_groups: Vec<String>,
226 pub level_importance: f64,
227 pub optimization_impact: f64,
228}
229
230#[derive(Debug, Clone, Serialize, Deserialize)]
232pub struct CrossLevelInteraction {
233 pub from_level: usize,
234 pub to_level: usize,
235 pub interaction_strength: f64,
236 pub interaction_direction: InteractionDirection,
237}
238
239#[derive(Debug, Clone, Serialize, Deserialize)]
240pub enum InteractionDirection {
241 TopDown,
242 BottomUp,
243 Bidirectional,
244}
245
246#[derive(Debug, Clone, Serialize, Deserialize)]
248pub struct OptimizationPriority {
249 pub target_name: String,
250 pub target_type: OptimizationTarget,
251 pub priority_score: f64,
252 pub urgency_level: UrgencyLevel,
253 pub optimization_potential: f64,
254 pub recommended_actions: Vec<PrioritizedAction>,
255}
256
257#[derive(Debug, Clone, Serialize, Deserialize)]
258pub enum OptimizationTarget {
259 IndividualLayer,
260 LayerGroup,
261 NetworkLevel,
262}
263
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub enum UrgencyLevel {
266 Low,
267 Medium,
268 High,
269 Critical,
270}
271
272#[derive(Debug, Clone, Serialize, Deserialize)]
274pub struct PrioritizedAction {
275 pub action_name: String,
276 pub action_type: ActionType,
277 pub expected_impact: f64,
278 pub implementation_effort: ImplementationEffort,
279 pub prerequisites: Vec<String>,
280}
281
282#[derive(Debug, Clone, Serialize, Deserialize)]
283pub enum ActionType {
284 ParameterAdjustment,
285 ArchitecturalChange,
286 OptimizationTechnique,
287 RegularizationMethod,
288 LearningRateScheduling,
289}
290
291#[derive(Debug, Clone, Serialize, Deserialize)]
292pub enum ImplementationEffort {
293 Minimal,
294 Low,
295 Moderate,
296 High,
297 Extensive,
298}
299
300#[derive(Debug, Clone, Serialize, Deserialize)]
302pub struct LayerOptimizationSuggestion {
303 pub suggestion_type: SuggestionType,
304 pub description: String,
305 pub rationale: String,
306 pub expected_improvement: f64,
307 pub implementation_complexity: ImplementationComplexity,
308 pub side_effects: Vec<String>,
309}
310
311#[derive(Debug, Clone, Serialize, Deserialize)]
312pub enum SuggestionType {
313 WeightInitialization,
314 LearningRateAdjustment,
315 RegularizationAdd,
316 ArchitecturalModification,
317 OptimizationAlgorithm,
318 BatchNormalization,
319 DropoutAdjustment,
320}
321
322#[derive(Debug, Clone, Serialize, Deserialize)]
323pub enum ImplementationComplexity {
324 Simple,
325 Moderate,
326 Complex,
327 RequiresRetraining,
328}
329
330#[derive(Debug)]
332pub struct EnhancedGradientAnalyzer {
333 #[allow(dead_code)]
334 analysis_depth: AnalysisDepth,
335 convergence_window: usize,
336 stability_threshold: f64,
337}
338
339#[derive(Debug, Clone)]
340pub enum AnalysisDepth {
341 Basic,
342 Standard,
343 Comprehensive,
344 Expert,
345}
346
347impl Default for EnhancedGradientAnalyzer {
348 fn default() -> Self {
349 Self {
350 analysis_depth: AnalysisDepth::Standard,
351 convergence_window: 100,
352 stability_threshold: 0.8,
353 }
354 }
355}
356
357impl EnhancedGradientAnalyzer {
358 pub fn new(depth: AnalysisDepth, window: usize, threshold: f64) -> Self {
359 Self {
360 analysis_depth: depth,
361 convergence_window: window,
362 stability_threshold: threshold,
363 }
364 }
365
366 pub fn generate_enhanced_analysis(
367 &self,
368 gradient_histories: &HashMap<String, GradientHistory>,
369 ) -> EnhancedLayerGradientAnalysis {
370 let layer_details = self.generate_layer_details(gradient_histories);
371 let network_level_analysis = self.analyze_network_level_gradients(&layer_details);
372 let gradient_hierarchy = self.build_gradient_hierarchy(&layer_details);
373 let optimization_priorities =
374 self.rank_optimization_priorities(&layer_details, &network_level_analysis);
375
376 EnhancedLayerGradientAnalysis {
377 layer_details,
378 network_level_analysis,
379 gradient_hierarchy,
380 optimization_priorities,
381 }
382 }
383
384 fn generate_layer_details(
385 &self,
386 gradient_histories: &HashMap<String, GradientHistory>,
387 ) -> HashMap<String, LayerGradientDetails> {
388 let mut layer_details = HashMap::new();
389
390 for (layer_name, history) in gradient_histories {
391 let gradient_statistics = self.compute_detailed_gradient_stats(history);
392 let flow_characteristics = self.analyze_flow_characteristics(history);
393 let health_metrics = self.compute_layer_health_metrics(history);
394 let optimization_suggestions =
395 self.generate_layer_optimization_suggestions(layer_name, history);
396 let comparative_analysis =
397 self.compare_with_other_layers(layer_name, history, gradient_histories);
398
399 let analysis = LayerGradientDetails {
400 layer_name: layer_name.clone(),
401 gradient_statistics,
402 flow_characteristics,
403 health_metrics,
404 optimization_suggestions,
405 comparative_analysis,
406 };
407
408 layer_details.insert(layer_name.clone(), analysis);
409 }
410
411 layer_details
412 }
413
414 fn compute_detailed_gradient_stats(&self, history: &GradientHistory) -> GradientStatistics {
415 if history.gradient_norms.is_empty() {
416 return GradientStatistics {
417 mean: 0.0,
418 std: 0.0,
419 median: 0.0,
420 percentile_95: 0.0,
421 percentile_5: 0.0,
422 samples: 0,
423 variance: 0.0,
424 skewness: 0.0,
425 kurtosis: 0.0,
426 };
427 }
428
429 let values: Vec<f64> = history.gradient_norms.iter().cloned().collect();
430 let n = values.len() as f64;
431 let mean = values.iter().sum::<f64>() / n;
432 let variance = values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n;
433 let std = variance.sqrt();
434
435 let mut sorted_values = values.clone();
436 sorted_values.sort_by(|a, b| a.partial_cmp(b).unwrap());
437
438 let median_idx = values.len() / 2;
439 let median = if values.len() % 2 == 0 {
440 (sorted_values[median_idx - 1] + sorted_values[median_idx]) / 2.0
441 } else {
442 sorted_values[median_idx]
443 };
444
445 let percentile_5_idx = (values.len() as f64 * 0.05) as usize;
446 let percentile_95_idx = (values.len() as f64 * 0.95) as usize;
447 let percentile_5 = sorted_values[percentile_5_idx];
448 let percentile_95 = sorted_values[percentile_95_idx.min(sorted_values.len() - 1)];
449
450 let skewness = if std > 0.0 {
452 values.iter().map(|&x| ((x - mean) / std).powi(3)).sum::<f64>() / n
453 } else {
454 0.0
455 };
456
457 let kurtosis = if std > 0.0 {
458 values.iter().map(|&x| ((x - mean) / std).powi(4)).sum::<f64>() / n - 3.0
459 } else {
460 0.0
461 };
462
463 GradientStatistics {
464 mean,
465 std,
466 median,
467 percentile_95,
468 percentile_5,
469 samples: values.len(),
470 variance,
471 skewness,
472 kurtosis,
473 }
474 }
475
476 fn analyze_flow_characteristics(&self, history: &GradientHistory) -> FlowCharacteristics {
477 let consistency_score = self.compute_flow_consistency(history);
478 let smoothness_index = self.compute_smoothness_index(history);
479 let trend_strength = self.compute_trend_strength(history);
480 let oscillation_frequency = self.compute_oscillation_frequency(history);
481 let stability_measure = self.compute_stability_measure(history);
482
483 FlowCharacteristics {
484 consistency_score,
485 smoothness_index,
486 trend_strength,
487 oscillation_frequency,
488 stability_measure,
489 }
490 }
491
492 fn compute_flow_consistency(&self, history: &GradientHistory) -> f64 {
493 if history.gradient_norms.len() < 2 {
494 return 1.0;
495 }
496
497 let variations: Vec<f64> = history
498 .gradient_norms
499 .iter()
500 .collect::<Vec<&f64>>()
501 .windows(2)
502 .map(|pair| (*pair[1] - *pair[0]).abs() / (*pair[0] + 1e-8))
503 .collect();
504
505 let avg_variation = variations.iter().sum::<f64>() / variations.len() as f64;
506 (1.0_f64 / (1.0 + avg_variation)).min(1.0)
507 }
508
509 fn compute_smoothness_index(&self, history: &GradientHistory) -> f64 {
510 if history.gradient_norms.len() < 3 {
511 return 1.0;
512 }
513
514 let second_derivatives: Vec<f64> = history
516 .gradient_norms
517 .iter()
518 .collect::<Vec<&f64>>()
519 .windows(3)
520 .map(|window| *window[2] - 2.0 * *window[1] + *window[0])
521 .collect();
522
523 let avg_second_derivative = second_derivatives.iter().map(|&x| x.abs()).sum::<f64>()
524 / second_derivatives.len() as f64;
525 (1.0_f64 / (1.0 + avg_second_derivative)).min(1.0)
526 }
527
528 fn compute_trend_strength(&self, history: &GradientHistory) -> f64 {
529 history.get_trend_slope().map(|slope| slope.abs().min(1.0)).unwrap_or(0.0)
530 }
531
532 fn compute_oscillation_frequency(&self, history: &GradientHistory) -> f64 {
533 if history.gradient_norms.len() < 4 {
534 return 0.0;
535 }
536
537 let sign_changes = history
538 .gradient_norms
539 .iter()
540 .collect::<Vec<&f64>>()
541 .windows(2)
542 .map(|pair| *pair[1] - *pair[0])
543 .collect::<Vec<f64>>()
544 .windows(2)
545 .filter(|pair| pair[0] * pair[1] < 0.0)
546 .count();
547
548 sign_changes as f64 / history.gradient_norms.len() as f64
549 }
550
551 fn compute_stability_measure(&self, history: &GradientHistory) -> f64 {
552 if history.gradient_norms.is_empty() {
553 return 0.0;
554 }
555
556 let mean = history.gradient_norms.iter().sum::<f64>() / history.gradient_norms.len() as f64;
557 let variance = history.gradient_norms.iter().map(|&x| (x - mean).powi(2)).sum::<f64>()
558 / history.gradient_norms.len() as f64;
559
560 if mean == 0.0 {
561 return 0.0;
562 }
563
564 let coefficient_of_variation = variance.sqrt() / mean;
565 (1.0 / (1.0 + coefficient_of_variation)).min(1.0)
566 }
567
568 fn compute_layer_health_metrics(&self, history: &GradientHistory) -> LayerHealthMetrics {
569 let gradient_statistics = self.compute_detailed_gradient_stats(history);
570 let flow_characteristics = self.analyze_flow_characteristics(history);
571
572 let gradient_stability = flow_characteristics.stability_measure;
573 let information_flow_rate = self.compute_information_flow_rate(history);
574 let neuron_activity_ratio = self.estimate_neuron_activity_ratio(history);
575 let convergence_indicator = self.compute_convergence_indicator(history);
576
577 let mut risk_factors = Vec::new();
578 if gradient_statistics.mean < 1e-5 {
579 risk_factors.push("Very low gradient magnitude".to_string());
580 }
581 if gradient_statistics.mean > 100.0 {
582 risk_factors.push("Very high gradient magnitude".to_string());
583 }
584 if gradient_stability < 0.5 {
585 risk_factors.push("High gradient instability".to_string());
586 }
587 if flow_characteristics.oscillation_frequency > 0.5 {
588 risk_factors.push("High oscillation frequency".to_string());
589 }
590
591 let overall_health = if !risk_factors.is_empty() {
592 if risk_factors.len() > 2 {
593 LayerHealth::Critical
594 } else {
595 LayerHealth::Warning
596 }
597 } else {
598 LayerHealth::Healthy
599 };
600
601 LayerHealthMetrics {
602 overall_health,
603 gradient_stability,
604 information_flow_rate,
605 neuron_activity_ratio,
606 convergence_indicator,
607 risk_factors,
608 }
609 }
610
611 fn compute_information_flow_rate(&self, history: &GradientHistory) -> f64 {
612 if history.gradient_norms.len() < 2 {
613 return 0.0;
614 }
615
616 let total_change: f64 = history
617 .gradient_norms
618 .iter()
619 .collect::<Vec<&f64>>()
620 .windows(2)
621 .map(|pair| (*pair[1] - *pair[0]).abs())
622 .sum();
623
624 total_change / history.gradient_norms.len() as f64
625 }
626
627 fn estimate_neuron_activity_ratio(&self, history: &GradientHistory) -> f64 {
628 let mean_gradient =
630 history.gradient_norms.iter().sum::<f64>() / history.gradient_norms.len() as f64;
631 (mean_gradient / (mean_gradient + 1e-5)).min(1.0)
632 }
633
634 fn compute_convergence_indicator(&self, history: &GradientHistory) -> f64 {
635 if history.gradient_norms.len() < self.convergence_window {
636 return 0.5; }
638
639 let recent: Vec<f64> = history
640 .gradient_norms
641 .iter()
642 .rev()
643 .take(self.convergence_window)
644 .cloned()
645 .collect();
646 let trend_slope = self.compute_trend_for_values(&recent);
647
648 if trend_slope < 0.0 {
650 (-trend_slope).min(1.0)
651 } else {
652 0.0
653 }
654 }
655
656 fn compute_trend_for_values(&self, values: &[f64]) -> f64 {
657 if values.len() < 3 {
658 return 0.0;
659 }
660
661 let n = values.len() as f64;
662 let sum_x: f64 = (0..values.len()).map(|i| i as f64).sum();
663 let sum_y: f64 = values.iter().sum();
664 let sum_xy: f64 = values.iter().enumerate().map(|(i, &y)| i as f64 * y).sum();
665 let sum_x2: f64 = (0..values.len()).map(|i| (i as f64).powi(2)).sum();
666
667 (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x.powi(2))
668 }
669
670 fn generate_layer_optimization_suggestions(
671 &self,
672 _layer_name: &str,
673 history: &GradientHistory,
674 ) -> Vec<LayerOptimizationSuggestion> {
675 let mut suggestions = Vec::new();
676 let stats = self.compute_detailed_gradient_stats(history);
677 let flow = self.analyze_flow_characteristics(history);
678
679 if stats.mean < 1e-5 {
681 suggestions.push(LayerOptimizationSuggestion {
682 suggestion_type: SuggestionType::WeightInitialization,
683 description: "Consider better weight initialization methods".to_string(),
684 rationale: "Very low gradients may indicate poor initialization".to_string(),
685 expected_improvement: 0.7,
686 implementation_complexity: ImplementationComplexity::Simple,
687 side_effects: vec!["May require retraining from scratch".to_string()],
688 });
689 }
690
691 if stats.mean > 10.0 {
693 suggestions.push(LayerOptimizationSuggestion {
694 suggestion_type: SuggestionType::LearningRateAdjustment,
695 description: "Reduce learning rate for this layer".to_string(),
696 rationale: "High gradients may indicate learning rate is too large".to_string(),
697 expected_improvement: 0.6,
698 implementation_complexity: ImplementationComplexity::Simple,
699 side_effects: vec!["May slow down convergence".to_string()],
700 });
701 }
702
703 if flow.oscillation_frequency > 0.5 {
705 suggestions.push(LayerOptimizationSuggestion {
706 suggestion_type: SuggestionType::RegularizationAdd,
707 description: "Add dropout or weight decay".to_string(),
708 rationale: "High oscillation may indicate overfitting or instability".to_string(),
709 expected_improvement: 0.5,
710 implementation_complexity: ImplementationComplexity::Moderate,
711 side_effects: vec!["May reduce model capacity".to_string()],
712 });
713 }
714
715 suggestions
716 }
717
718 fn compare_with_other_layers(
719 &self,
720 layer_name: &str,
721 history: &GradientHistory,
722 all_histories: &HashMap<String, GradientHistory>,
723 ) -> ComparativeAnalysis {
724 let current_stats = self.compute_detailed_gradient_stats(history);
725 let mut other_means = Vec::new();
726
727 for (other_name, other_history) in all_histories {
728 if other_name != layer_name {
729 let other_stats = self.compute_detailed_gradient_stats(other_history);
730 other_means.push(other_stats.mean);
731 }
732 }
733
734 if other_means.is_empty() {
735 return ComparativeAnalysis {
736 relative_performance: 1.0,
737 rank_among_layers: 1,
738 similar_layers: vec![],
739 performance_gap: 0.0,
740 optimization_potential: 0.5,
741 };
742 }
743
744 other_means.sort_by(|a, b| b.partial_cmp(a).unwrap());
745 let rank = other_means
746 .iter()
747 .position(|&x| x <= current_stats.mean)
748 .unwrap_or(other_means.len())
749 + 1;
750
751 let avg_other_mean = other_means.iter().sum::<f64>() / other_means.len() as f64;
752 let relative_performance =
753 if avg_other_mean > 0.0 { current_stats.mean / avg_other_mean } else { 1.0 };
754
755 let performance_gap = (current_stats.mean - avg_other_mean).abs();
756
757 let similar_layers: Vec<String> = all_histories
759 .iter()
760 .filter(|(other_name, other_history)| {
761 if *other_name == layer_name {
762 return false;
763 }
764 let other_stats = self.compute_detailed_gradient_stats(other_history);
765 let ratio = (current_stats.mean / (other_stats.mean + 1e-8))
766 .max(other_stats.mean / (current_stats.mean + 1e-8));
767 ratio <= 1.2
768 })
769 .map(|(name, _)| name.clone())
770 .collect();
771
772 let optimization_potential = if relative_performance < 0.5 {
773 0.8
774 } else if relative_performance < 0.8 {
775 0.6
776 } else {
777 0.3
778 };
779
780 ComparativeAnalysis {
781 relative_performance,
782 rank_among_layers: rank,
783 similar_layers,
784 performance_gap,
785 optimization_potential,
786 }
787 }
788
789 fn analyze_network_level_gradients(
790 &self,
791 layer_details: &HashMap<String, LayerGradientDetails>,
792 ) -> NetworkLevelAnalysis {
793 let overall_gradient_health = self.assess_overall_health(layer_details);
794 let gradient_distribution = self.analyze_gradient_distribution(layer_details);
795 let layer_interactions = self.analyze_layer_interactions(layer_details);
796 let convergence_indicators = self.analyze_convergence_indicators(layer_details);
797 let training_dynamics = self.analyze_training_dynamics(layer_details);
798 let stability_assessment = self.assess_network_stability(layer_details);
799
800 NetworkLevelAnalysis {
801 overall_gradient_health,
802 gradient_distribution,
803 layer_interactions,
804 convergence_indicators,
805 training_dynamics,
806 stability_assessment,
807 }
808 }
809
810 fn assess_overall_health(
811 &self,
812 layer_details: &HashMap<String, LayerGradientDetails>,
813 ) -> LayerHealth {
814 let health_counts = layer_details
815 .values()
816 .map(|details| &details.health_metrics.overall_health)
817 .fold([0, 0, 0], |mut acc, health| {
818 match health {
819 LayerHealth::Healthy => acc[0] += 1,
820 LayerHealth::Warning => acc[1] += 1,
821 LayerHealth::Critical => acc[2] += 1,
822 LayerHealth::Unknown => {}, }
824 acc
825 });
826
827 let total = health_counts.iter().sum::<usize>();
828 if total == 0 {
829 return LayerHealth::Healthy;
830 }
831
832 let critical_ratio = health_counts[2] as f64 / total as f64;
833 let warning_ratio = health_counts[1] as f64 / total as f64;
834
835 if critical_ratio > 0.3 {
836 LayerHealth::Critical
837 } else if critical_ratio > 0.1 || warning_ratio > 0.5 {
838 LayerHealth::Warning
839 } else {
840 LayerHealth::Healthy
841 }
842 }
843
844 fn analyze_gradient_distribution(
845 &self,
846 layer_details: &HashMap<String, LayerGradientDetails>,
847 ) -> GradientDistribution {
848 let gradient_means: Vec<f64> =
849 layer_details.values().map(|details| details.gradient_statistics.mean).collect();
850
851 if gradient_means.is_empty() {
852 return GradientDistribution {
853 mean_gradient_norm: 0.0,
854 gradient_variance: 0.0,
855 gradient_skewness: 0.0,
856 gradient_kurtosis: 0.0,
857 layer_gradient_ratios: HashMap::new(),
858 distribution_type: DistributionType::Degenerate,
859 };
860 }
861
862 let n = gradient_means.len() as f64;
863 let mean_gradient_norm = gradient_means.iter().sum::<f64>() / n;
864 let gradient_variance =
865 gradient_means.iter().map(|&x| (x - mean_gradient_norm).powi(2)).sum::<f64>() / n;
866
867 let std_dev = gradient_variance.sqrt();
868 let gradient_skewness = if std_dev > 0.0 {
869 gradient_means
870 .iter()
871 .map(|&x| ((x - mean_gradient_norm) / std_dev).powi(3))
872 .sum::<f64>()
873 / n
874 } else {
875 0.0
876 };
877
878 let gradient_kurtosis = if std_dev > 0.0 {
879 gradient_means
880 .iter()
881 .map(|&x| ((x - mean_gradient_norm) / std_dev).powi(4))
882 .sum::<f64>()
883 / n
884 - 3.0
885 } else {
886 0.0
887 };
888
889 let mut layer_gradient_ratios = HashMap::new();
890 for (layer_name, details) in layer_details {
891 let ratio = if mean_gradient_norm > 0.0 {
892 details.gradient_statistics.mean / mean_gradient_norm
893 } else {
894 1.0
895 };
896 layer_gradient_ratios.insert(layer_name.clone(), ratio);
897 }
898
899 let distribution_type =
900 self.classify_distribution_type(gradient_skewness, gradient_kurtosis);
901
902 GradientDistribution {
903 mean_gradient_norm,
904 gradient_variance,
905 gradient_skewness,
906 gradient_kurtosis,
907 layer_gradient_ratios,
908 distribution_type,
909 }
910 }
911
912 fn classify_distribution_type(&self, skewness: f64, kurtosis: f64) -> DistributionType {
913 if skewness.abs() > 2.0 {
914 DistributionType::Skewed
915 } else if kurtosis > 3.0 {
916 DistributionType::HeavyTailed
917 } else if kurtosis < -1.0 {
918 DistributionType::Multimodal
919 } else {
920 DistributionType::Normal
921 }
922 }
923
924 fn analyze_layer_interactions(
925 &self,
926 layer_details: &HashMap<String, LayerGradientDetails>,
927 ) -> Vec<LayerInteraction> {
928 let mut interactions = Vec::new();
929
930 let layer_names: Vec<String> = layer_details.keys().cloned().collect();
931 for i in 0..layer_names.len() {
932 for j in (i + 1)..layer_names.len() {
933 let layer1 = &layer_names[i];
934 let layer2 = &layer_names[j];
935
936 if let (Some(details1), Some(details2)) =
937 (layer_details.get(layer1), layer_details.get(layer2))
938 {
939 let interaction_strength =
940 self.compute_interaction_strength(details1, details2);
941 let interaction_type = self.classify_interaction_type(details1, details2);
942 let impact_score = interaction_strength * 0.5; interactions.push(LayerInteraction {
945 layer1: layer1.clone(),
946 layer2: layer2.clone(),
947 interaction_strength,
948 interaction_type,
949 impact_score,
950 });
951 }
952 }
953 }
954
955 interactions
956 }
957
958 fn compute_interaction_strength(
959 &self,
960 details1: &LayerGradientDetails,
961 details2: &LayerGradientDetails,
962 ) -> f64 {
963 let mean_diff =
964 (details1.gradient_statistics.mean - details2.gradient_statistics.mean).abs();
965 let stability_diff = (details1.flow_characteristics.stability_measure
966 - details2.flow_characteristics.stability_measure)
967 .abs();
968
969 let combined_diff = mean_diff + stability_diff;
971 1.0 / (1.0 + combined_diff)
972 }
973
974 fn classify_interaction_type(
975 &self,
976 details1: &LayerGradientDetails,
977 details2: &LayerGradientDetails,
978 ) -> InteractionType {
979 let convergence_diff = (details1.health_metrics.convergence_indicator
980 - details2.health_metrics.convergence_indicator)
981 .abs();
982
983 if convergence_diff < 0.1 {
984 InteractionType::Cooperative
985 } else if convergence_diff > 0.5 {
986 InteractionType::Competitive
987 } else {
988 InteractionType::Neutral
989 }
990 }
991
992 fn analyze_convergence_indicators(
993 &self,
994 layer_details: &HashMap<String, LayerGradientDetails>,
995 ) -> ConvergenceIndicators {
996 let convergence_scores: Vec<f64> = layer_details
997 .values()
998 .map(|details| details.health_metrics.convergence_indicator)
999 .collect();
1000
1001 let gradient_convergence_score =
1002 convergence_scores.iter().sum::<f64>() / convergence_scores.len().max(1) as f64;
1003 let parameter_convergence_score = gradient_convergence_score * 0.8; let loss_convergence_score = gradient_convergence_score * 0.9; let convergence_trend = if gradient_convergence_score > 0.8 {
1007 ConvergenceTrend::Converging
1008 } else if gradient_convergence_score > 0.6 {
1009 ConvergenceTrend::Stable
1010 } else if gradient_convergence_score < 0.3 {
1011 ConvergenceTrend::Diverging
1012 } else {
1013 ConvergenceTrend::Unknown
1014 };
1015
1016 let estimated_steps_to_convergence = if gradient_convergence_score > 0.1 {
1017 Some(((1.0 - gradient_convergence_score) * 1000.0) as usize)
1018 } else {
1019 None
1020 };
1021
1022 ConvergenceIndicators {
1023 gradient_convergence_score,
1024 parameter_convergence_score,
1025 loss_convergence_score,
1026 convergence_trend,
1027 estimated_steps_to_convergence,
1028 }
1029 }
1030
1031 fn analyze_training_dynamics(
1032 &self,
1033 layer_details: &HashMap<String, LayerGradientDetails>,
1034 ) -> TrainingDynamics {
1035 let avg_convergence = layer_details
1036 .values()
1037 .map(|details| details.health_metrics.convergence_indicator)
1038 .sum::<f64>()
1039 / layer_details.len().max(1) as f64;
1040
1041 let learning_phase = match avg_convergence {
1042 x if x < 0.2 => LearningPhase::InitialLearning,
1043 x if x < 0.4 => LearningPhase::RapidLearning,
1044 x if x < 0.6 => LearningPhase::Refinement,
1045 x if x < 0.8 => LearningPhase::Convergence,
1046 _ => LearningPhase::Plateau,
1047 };
1048
1049 let gradient_momentum = avg_convergence * 0.8; let learning_velocity = avg_convergence * 1.2; let adaptation_rate = 1.0 - avg_convergence; let plateau_detection = PlateauDetection {
1054 is_plateau: avg_convergence > 0.9,
1055 plateau_duration: if avg_convergence > 0.9 { 10 } else { 0 },
1056 plateau_severity: if avg_convergence > 0.95 {
1057 PlateauSeverity::Severe
1058 } else {
1059 PlateauSeverity::Mild
1060 },
1061 suggested_actions: if avg_convergence > 0.9 {
1062 vec![
1063 "Consider learning rate reduction".to_string(),
1064 "Add regularization".to_string(),
1065 ]
1066 } else {
1067 vec![]
1068 },
1069 };
1070
1071 TrainingDynamics {
1072 learning_phase,
1073 gradient_momentum,
1074 learning_velocity,
1075 adaptation_rate,
1076 plateau_detection,
1077 }
1078 }
1079
1080 fn assess_network_stability(
1081 &self,
1082 layer_details: &HashMap<String, LayerGradientDetails>,
1083 ) -> StabilityAssessment {
1084 let stability_scores: Vec<f64> = layer_details
1085 .values()
1086 .map(|details| details.flow_characteristics.stability_measure)
1087 .collect();
1088
1089 let overall_stability =
1090 stability_scores.iter().sum::<f64>() / stability_scores.len().max(1) as f64;
1091
1092 let stability_trend = if overall_stability > self.stability_threshold {
1093 StabilityTrend::Stable
1094 } else {
1095 StabilityTrend::Degrading
1096 };
1097
1098 let instability_sources = self.identify_instability_sources(layer_details);
1099
1100 let stability_forecast = StabilityForecast {
1101 short_term_outlook: if overall_stability > 0.7 {
1102 StabilityOutlook::Stable
1103 } else {
1104 StabilityOutlook::Deteriorating
1105 },
1106 long_term_outlook: if overall_stability > 0.8 {
1107 StabilityOutlook::Stable
1108 } else {
1109 StabilityOutlook::Uncertain
1110 },
1111 confidence_level: overall_stability,
1112 recommended_monitoring: vec![
1113 "Monitor gradient norms".to_string(),
1114 "Track convergence indicators".to_string(),
1115 ],
1116 };
1117
1118 StabilityAssessment {
1119 overall_stability,
1120 stability_trend,
1121 instability_sources,
1122 stability_forecast,
1123 }
1124 }
1125
1126 fn identify_instability_sources(
1127 &self,
1128 layer_details: &HashMap<String, LayerGradientDetails>,
1129 ) -> Vec<InstabilitySource> {
1130 let mut sources = Vec::new();
1131
1132 for (layer_name, details) in layer_details {
1133 if details.gradient_statistics.mean > 100.0 {
1134 sources.push(InstabilitySource {
1135 source_type: InstabilityType::GradientExplosion,
1136 affected_layers: vec![layer_name.clone()],
1137 severity: details.gradient_statistics.mean / 100.0,
1138 description: format!("High gradient magnitude in layer {}", layer_name),
1139 });
1140 }
1141
1142 if details.gradient_statistics.mean < 1e-5 {
1143 sources.push(InstabilitySource {
1144 source_type: InstabilityType::GradientVanishing,
1145 affected_layers: vec![layer_name.clone()],
1146 severity: 1.0 - (details.gradient_statistics.mean * 1e5),
1147 description: format!("Very low gradient magnitude in layer {}", layer_name),
1148 });
1149 }
1150
1151 if details.flow_characteristics.oscillation_frequency > 0.5 {
1152 sources.push(InstabilitySource {
1153 source_type: InstabilityType::Oscillation,
1154 affected_layers: vec![layer_name.clone()],
1155 severity: details.flow_characteristics.oscillation_frequency,
1156 description: format!("High oscillation frequency in layer {}", layer_name),
1157 });
1158 }
1159 }
1160
1161 sources
1162 }
1163
1164 fn build_gradient_hierarchy(
1165 &self,
1166 layer_details: &HashMap<String, LayerGradientDetails>,
1167 ) -> GradientHierarchy {
1168 let mut layer_groups = Vec::new();
1170 let mut hierarchy_levels = Vec::new();
1171 let cross_level_interactions = Vec::new(); let high_gradient_layers: Vec<String> = layer_details
1175 .iter()
1176 .filter(|(_, details)| details.gradient_statistics.mean > 1.0)
1177 .map(|(name, _)| name.clone())
1178 .collect();
1179
1180 let medium_gradient_layers: Vec<String> = layer_details
1181 .iter()
1182 .filter(|(_, details)| {
1183 details.gradient_statistics.mean >= 0.1 && details.gradient_statistics.mean <= 1.0
1184 })
1185 .map(|(name, _)| name.clone())
1186 .collect();
1187
1188 let low_gradient_layers: Vec<String> = layer_details
1189 .iter()
1190 .filter(|(_, details)| details.gradient_statistics.mean < 0.1)
1191 .map(|(name, _)| name.clone())
1192 .collect();
1193
1194 if !high_gradient_layers.is_empty() {
1195 layer_groups.push(LayerGroup {
1196 group_name: "High Gradient Layers".to_string(),
1197 layers: high_gradient_layers.clone(),
1198 group_characteristics: GroupCharacteristics {
1199 average_gradient_norm: 2.0, gradient_synchronization: 0.8,
1201 learning_rate_sensitivity: 0.9,
1202 optimization_difficulty: OptimizationDifficulty::Difficult,
1203 },
1204 internal_coherence: 0.7,
1205 });
1206
1207 hierarchy_levels.push(HierarchyLevel {
1208 level_id: 0,
1209 level_name: "High Gradient Level".to_string(),
1210 layer_groups: vec!["High Gradient Layers".to_string()],
1211 level_importance: 0.9,
1212 optimization_impact: 0.8,
1213 });
1214 }
1215
1216 if !medium_gradient_layers.is_empty() {
1217 layer_groups.push(LayerGroup {
1218 group_name: "Medium Gradient Layers".to_string(),
1219 layers: medium_gradient_layers,
1220 group_characteristics: GroupCharacteristics {
1221 average_gradient_norm: 0.5,
1222 gradient_synchronization: 0.6,
1223 learning_rate_sensitivity: 0.5,
1224 optimization_difficulty: OptimizationDifficulty::Moderate,
1225 },
1226 internal_coherence: 0.8,
1227 });
1228
1229 hierarchy_levels.push(HierarchyLevel {
1230 level_id: 1,
1231 level_name: "Medium Gradient Level".to_string(),
1232 layer_groups: vec!["Medium Gradient Layers".to_string()],
1233 level_importance: 0.7,
1234 optimization_impact: 0.6,
1235 });
1236 }
1237
1238 if !low_gradient_layers.is_empty() {
1239 layer_groups.push(LayerGroup {
1240 group_name: "Low Gradient Layers".to_string(),
1241 layers: low_gradient_layers,
1242 group_characteristics: GroupCharacteristics {
1243 average_gradient_norm: 0.05,
1244 gradient_synchronization: 0.4,
1245 learning_rate_sensitivity: 0.3,
1246 optimization_difficulty: OptimizationDifficulty::Easy,
1247 },
1248 internal_coherence: 0.5,
1249 });
1250
1251 hierarchy_levels.push(HierarchyLevel {
1252 level_id: 2,
1253 level_name: "Low Gradient Level".to_string(),
1254 layer_groups: vec!["Low Gradient Layers".to_string()],
1255 level_importance: 0.5,
1256 optimization_impact: 0.4,
1257 });
1258 }
1259
1260 GradientHierarchy {
1261 layer_groups,
1262 hierarchy_levels,
1263 cross_level_interactions,
1264 }
1265 }
1266
1267 fn rank_optimization_priorities(
1268 &self,
1269 layer_details: &HashMap<String, LayerGradientDetails>,
1270 network_analysis: &NetworkLevelAnalysis,
1271 ) -> Vec<OptimizationPriority> {
1272 let mut priorities = Vec::new();
1273
1274 for (layer_name, details) in layer_details {
1275 let priority_score = self.calculate_priority_score(details, network_analysis);
1276 let urgency_level = self.determine_urgency_level(details);
1277 let optimization_potential = details.comparative_analysis.optimization_potential;
1278 let recommended_actions = self.generate_prioritized_actions(details);
1279
1280 priorities.push(OptimizationPriority {
1281 target_name: layer_name.clone(),
1282 target_type: OptimizationTarget::IndividualLayer,
1283 priority_score,
1284 urgency_level,
1285 optimization_potential,
1286 recommended_actions,
1287 });
1288 }
1289
1290 priorities.sort_by(|a, b| b.priority_score.partial_cmp(&a.priority_score).unwrap());
1292
1293 priorities
1294 }
1295
1296 fn calculate_priority_score(
1297 &self,
1298 details: &LayerGradientDetails,
1299 network_analysis: &NetworkLevelAnalysis,
1300 ) -> f64 {
1301 let health_weight = match details.health_metrics.overall_health {
1302 LayerHealth::Critical => 1.0,
1303 LayerHealth::Warning => 0.7,
1304 LayerHealth::Healthy => 0.3,
1305 LayerHealth::Unknown => 0.5, };
1307
1308 let stability_weight = 1.0 - details.flow_characteristics.stability_measure;
1309 let optimization_weight = details.comparative_analysis.optimization_potential;
1310 let network_impact_weight = details.health_metrics.information_flow_rate
1311 / network_analysis.gradient_distribution.mean_gradient_norm.max(1e-8);
1312
1313 (health_weight * 0.4
1314 + stability_weight * 0.3
1315 + optimization_weight * 0.2
1316 + network_impact_weight * 0.1)
1317 .min(1.0)
1318 }
1319
1320 fn determine_urgency_level(&self, details: &LayerGradientDetails) -> UrgencyLevel {
1321 match details.health_metrics.overall_health {
1322 LayerHealth::Critical => UrgencyLevel::Critical,
1323 LayerHealth::Warning => {
1324 if details.flow_characteristics.stability_measure < 0.3 {
1325 UrgencyLevel::High
1326 } else {
1327 UrgencyLevel::Medium
1328 }
1329 },
1330 LayerHealth::Healthy => UrgencyLevel::Low,
1331 LayerHealth::Unknown => UrgencyLevel::Medium, }
1333 }
1334
1335 fn generate_prioritized_actions(
1336 &self,
1337 details: &LayerGradientDetails,
1338 ) -> Vec<PrioritizedAction> {
1339 let mut actions = Vec::new();
1340
1341 if details.gradient_statistics.mean < 1e-5 {
1342 actions.push(PrioritizedAction {
1343 action_name: "Weight Initialization Improvement".to_string(),
1344 action_type: ActionType::ParameterAdjustment,
1345 expected_impact: 0.8,
1346 implementation_effort: ImplementationEffort::Moderate,
1347 prerequisites: vec!["Model architecture review".to_string()],
1348 });
1349 }
1350
1351 if details.gradient_statistics.mean > 10.0 {
1352 actions.push(PrioritizedAction {
1353 action_name: "Learning Rate Reduction".to_string(),
1354 action_type: ActionType::LearningRateScheduling,
1355 expected_impact: 0.7,
1356 implementation_effort: ImplementationEffort::Minimal,
1357 prerequisites: vec![],
1358 });
1359 }
1360
1361 if details.flow_characteristics.stability_measure < 0.5 {
1362 actions.push(PrioritizedAction {
1363 action_name: "Gradient Clipping".to_string(),
1364 action_type: ActionType::OptimizationTechnique,
1365 expected_impact: 0.6,
1366 implementation_effort: ImplementationEffort::Low,
1367 prerequisites: vec!["Hyperparameter tuning".to_string()],
1368 });
1369 }
1370
1371 actions
1372 }
1373
1374 pub fn analyze_gradients(
1376 &self,
1377 gradient_histories: &HashMap<String, GradientHistory>,
1378 ) -> EnhancedLayerGradientAnalysis {
1379 self.generate_enhanced_analysis(gradient_histories)
1381 }
1382}