use anyhow::Result;
use chrono::{DateTime, Utc};
use scirs2_core::ndarray::*; use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AdvancedMLDebuggingConfig {
pub enable_layer_wise_lr_analysis: bool,
pub enable_model_sensitivity_analysis: bool,
pub enable_gradient_flow_optimization: bool,
pub enable_neural_architecture_debugging: bool,
pub enable_activation_pattern_analysis: bool,
pub enable_weight_distribution_analysis: bool,
pub enable_training_dynamics_analysis: bool,
pub enable_optimization_landscape_analysis: bool,
pub sensitivity_samples: usize,
pub lr_adaptation_threshold: f64,
pub max_layers_to_analyze: usize,
}
impl Default for AdvancedMLDebuggingConfig {
fn default() -> Self {
Self {
enable_layer_wise_lr_analysis: true,
enable_model_sensitivity_analysis: true,
enable_gradient_flow_optimization: true,
enable_neural_architecture_debugging: true,
enable_activation_pattern_analysis: true,
enable_weight_distribution_analysis: true,
enable_training_dynamics_analysis: true,
enable_optimization_landscape_analysis: true,
sensitivity_samples: 1000,
lr_adaptation_threshold: 0.1,
max_layers_to_analyze: 50,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LayerWiseLRAnalysisResult {
pub timestamp: DateTime<Utc>,
pub layer_lr_recommendations: HashMap<String, LayerLRRecommendation>,
pub global_lr_insights: GlobalLRInsights,
pub adaptation_strategy: LRAdaptationStrategy,
pub training_phase_recommendations: Vec<TrainingPhaseRecommendation>,
pub lr_schedule_predictions: Vec<LRSchedulePrediction>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LayerLRRecommendation {
pub layer_id: String,
pub layer_type: String,
pub current_lr: f64,
pub recommended_lr: f64,
pub confidence: f64,
pub reasoning: String,
pub layer_metrics: LayerLRMetrics,
pub lr_sensitivity: f64,
pub urgency: AdaptationUrgency,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LayerLRMetrics {
pub gradient_magnitude: f64,
pub weight_update_magnitude: f64,
pub parameter_norm: f64,
pub loss_contribution: f64,
pub stability_score: f64,
pub convergence_rate: f64,
pub learning_efficiency: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum AdaptationUrgency {
Low,
Medium,
High,
Critical,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GlobalLRInsights {
pub overall_efficiency: f64,
pub lr_distribution_health: f64,
pub gradient_flow_quality: f64,
pub training_stability: TrainingStability,
pub global_adjustments: Vec<GlobalLRAdjustment>,
pub critical_issues: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TrainingStability {
pub stability_score: f64,
pub instability_indicators: Vec<InstabilityIndicator>,
pub stability_trends: Vec<StabilityTrendPoint>,
pub predicted_stability: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InstabilityIndicator {
pub instability_type: InstabilityType,
pub severity: f64,
pub affected_layers: Vec<String>,
pub recommended_actions: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum InstabilityType {
GradientExplosion,
GradientVanishing,
OscillatingLoss,
SlowConvergence,
WeightDivergence,
NumericalInstability,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StabilityTrendPoint {
pub time_step: usize,
pub stability_score: f64,
pub contributing_factors: HashMap<String, f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GlobalLRAdjustment {
pub adjustment_type: GlobalAdjustmentType,
pub magnitude: f64,
pub expected_impact: f64,
pub priority: AdjustmentPriority,
pub instructions: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum GlobalAdjustmentType {
UniformScaling,
LayerTypeSpecific,
DepthDependent,
AdaptiveScheduling,
WarmupAdjustment,
DecayRateModification,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum AdjustmentPriority {
Low,
Medium,
High,
Immediate,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LRAdaptationStrategy {
pub strategy_name: String,
pub description: String,
pub implementation_steps: Vec<ImplementationStep>,
pub expected_benefits: Vec<String>,
pub potential_risks: Vec<String>,
pub success_metrics: Vec<String>,
pub monitoring_requirements: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImplementationStep {
pub step_number: usize,
pub description: String,
pub code_changes: Vec<String>,
pub timeline: String,
pub dependencies: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TrainingPhaseRecommendation {
pub phase_name: String,
pub duration_epochs: usize,
pub lr_schedule: LRSchedule,
pub objectives: Vec<String>,
pub success_criteria: Vec<String>,
pub transition_conditions: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LRSchedule {
pub schedule_type: LRScheduleType,
pub initial_lr: f64,
pub parameters: HashMap<String, f64>,
pub layer_multipliers: HashMap<String, f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum LRScheduleType {
Constant,
LinearDecay,
ExponentialDecay,
CosineAnnealing,
StepDecay,
CyclicalLR,
OneCycleLR,
AdaptiveSchedule,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LRSchedulePrediction {
pub schedule: LRSchedule,
pub predicted_accuracy: f64,
pub predicted_convergence_epochs: usize,
pub predicted_stability: f64,
pub prediction_confidence: f64,
pub risk_assessment: RiskAssessment,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RiskAssessment {
pub overall_risk: RiskLevel,
pub specific_risks: Vec<SpecificRisk>,
pub mitigation_strategies: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum RiskLevel {
VeryLow,
Low,
Medium,
High,
VeryHigh,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpecificRisk {
pub risk_type: String,
pub probability: f64,
pub impact: f64,
pub description: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelSensitivityAnalysisResult {
pub timestamp: DateTime<Utc>,
pub hyperparameter_sensitivity: HyperparameterSensitivity,
pub architecture_sensitivity: ArchitectureSensitivity,
pub data_sensitivity: DataSensitivity,
pub training_sensitivity: TrainingSensitivity,
pub sensitivity_insights: SensitivityInsights,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HyperparameterSensitivity {
pub learning_rate_sensitivity: ParameterSensitivity,
pub batch_size_sensitivity: ParameterSensitivity,
pub regularization_sensitivity: ParameterSensitivity,
pub architecture_param_sensitivity: HashMap<String, ParameterSensitivity>,
pub most_sensitive_params: Vec<String>,
pub least_sensitive_params: Vec<String>,
pub interaction_effects: Vec<ParameterInteraction>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ParameterSensitivity {
pub parameter_name: String,
pub current_value: f64,
pub sensitivity_score: f64,
pub optimal_range: (f64, f64),
pub impact_curve: Vec<(f64, f64)>,
pub stability_region: (f64, f64),
pub critical_thresholds: Vec<f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ParameterInteraction {
pub param1: String,
pub param2: String,
pub interaction_strength: f64,
pub interaction_type: InteractionType,
pub joint_optimal_region: HashMap<String, (f64, f64)>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum InteractionType {
Synergistic,
Antagonistic,
Independent,
Conditional,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ArchitectureSensitivity {
pub depth_sensitivity: ArchitecturalSensitivity,
pub width_sensitivity: ArchitecturalSensitivity,
pub attention_head_sensitivity: ArchitecturalSensitivity,
pub skip_connection_sensitivity: ArchitecturalSensitivity,
pub component_importance: HashMap<String, f64>,
pub bottlenecks: Vec<ArchitecturalBottleneck>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ArchitecturalSensitivity {
pub component_name: String,
pub change_sensitivity: f64,
pub degradation_curve: Vec<(f64, f64)>,
pub min_viable_config: f64,
pub optimal_config: f64,
pub diminishing_returns_threshold: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ArchitecturalBottleneck {
pub location: String,
pub bottleneck_type: BottleneckType,
pub severity: f64,
pub performance_impact: f64,
pub resolution_recommendations: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum BottleneckType {
ComputationalBottleneck,
MemoryBottleneck,
InformationBottleneck,
CapacityBottleneck,
CommunicationBottleneck,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DataSensitivity {
pub data_size_sensitivity: DataSizeSensitivity,
pub data_quality_sensitivity: DataQualitySensitivity,
pub distribution_sensitivity: DistributionSensitivity,
pub feature_sensitivity: FeatureSensitivityAnalysis,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DataSizeSensitivity {
pub current_size: usize,
pub minimum_effective_size: usize,
pub performance_curve: Vec<(usize, f64)>,
pub data_efficiency: f64,
pub diminishing_returns_point: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DataQualitySensitivity {
pub noise_tolerance: f64,
pub label_quality_importance: f64,
pub feature_quality_importance: f64,
pub quality_impact_curve: Vec<(f64, f64)>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DistributionSensitivity {
pub shift_sensitivity: f64,
pub imbalance_sensitivity: f64,
pub domain_adaptation_requirements: Vec<String>,
pub distribution_robustness: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FeatureSensitivityAnalysis {
pub most_important_features: Vec<String>,
pub least_important_features: Vec<String>,
pub feature_interactions: HashMap<(String, String), f64>,
pub feature_stability: HashMap<String, f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TrainingSensitivity {
pub initialization_sensitivity: InitializationSensitivity,
pub optimization_sensitivity: OptimizationSensitivity,
pub schedule_sensitivity: ScheduleSensitivity,
pub regularization_sensitivity: RegularizationSensitivity,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InitializationSensitivity {
pub weight_init_sensitivity: f64,
pub bias_init_sensitivity: f64,
pub seed_sensitivity: f64,
pub scheme_importance: HashMap<String, f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OptimizationSensitivity {
pub optimizer_sensitivity: f64,
pub momentum_sensitivity: f64,
pub second_moment_sensitivity: f64,
pub optimizer_comparison: HashMap<String, f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScheduleSensitivity {
pub lr_schedule_sensitivity: f64,
pub duration_sensitivity: f64,
pub warmup_sensitivity: f64,
pub schedule_param_importance: HashMap<String, f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RegularizationSensitivity {
pub dropout_sensitivity: f64,
pub weight_decay_sensitivity: f64,
pub batch_norm_sensitivity: f64,
pub method_comparison: HashMap<String, f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SensitivityInsights {
pub most_critical_factors: Vec<String>,
pub least_critical_factors: Vec<String>,
pub surprising_findings: Vec<String>,
pub robustness_assessment: RobustnessAssessment,
pub optimization_recommendations: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RobustnessAssessment {
pub overall_robustness: f64,
pub category_robustness: HashMap<String, f64>,
pub vulnerabilities: Vec<Vulnerability>,
pub strengths: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Vulnerability {
pub vulnerability_type: String,
pub severity: f64,
pub impact: String,
pub mitigation_strategies: Vec<String>,
}
#[derive(Debug)]
pub struct AdvancedMLDebugger {
config: AdvancedMLDebuggingConfig,
lr_analysis_results: Vec<LayerWiseLRAnalysisResult>,
sensitivity_analysis_results: Vec<ModelSensitivityAnalysisResult>,
}
impl AdvancedMLDebugger {
pub fn new(config: AdvancedMLDebuggingConfig) -> Self {
Self {
config,
lr_analysis_results: Vec::new(),
sensitivity_analysis_results: Vec::new(),
}
}
pub async fn analyze_layer_wise_learning_rates(
&mut self,
layer_gradients: &HashMap<String, ArrayD<f32>>,
layer_weights: &HashMap<String, ArrayD<f32>>,
current_lr: f64,
loss_history: &[f64],
) -> Result<LayerWiseLRAnalysisResult> {
if !self.config.enable_layer_wise_lr_analysis {
return Err(anyhow::anyhow!(
"Layer-wise learning rate analysis is disabled"
));
}
let mut layer_lr_recommendations = HashMap::new();
for (layer_id, gradients) in layer_gradients {
if let Some(weights) = layer_weights.get(layer_id) {
let recommendation = self.analyze_single_layer_lr(
layer_id,
gradients,
weights,
current_lr,
loss_history,
);
layer_lr_recommendations.insert(layer_id.clone(), recommendation);
}
}
let global_lr_insights =
self.generate_global_lr_insights(&layer_lr_recommendations, loss_history);
let adaptation_strategy =
self.create_lr_adaptation_strategy(&layer_lr_recommendations, &global_lr_insights);
let training_phase_recommendations =
self.generate_training_phase_recommendations(&adaptation_strategy);
let lr_schedule_predictions =
self.predict_lr_schedule_performance(&layer_lr_recommendations);
let result = LayerWiseLRAnalysisResult {
timestamp: Utc::now(),
layer_lr_recommendations,
global_lr_insights,
adaptation_strategy,
training_phase_recommendations,
lr_schedule_predictions,
};
self.lr_analysis_results.push(result.clone());
Ok(result)
}
pub async fn analyze_model_sensitivity(
&mut self,
model_params: &HashMap<String, f64>,
performance_metrics: &[f64],
architecture_config: &HashMap<String, f64>,
) -> Result<ModelSensitivityAnalysisResult> {
if !self.config.enable_model_sensitivity_analysis {
return Err(anyhow::anyhow!("Model sensitivity analysis is disabled"));
}
let hyperparameter_sensitivity =
self.analyze_hyperparameter_sensitivity(model_params, performance_metrics);
let architecture_sensitivity =
self.analyze_architecture_sensitivity(architecture_config, performance_metrics);
let data_sensitivity = self.analyze_data_sensitivity(performance_metrics);
let training_sensitivity =
self.analyze_training_sensitivity(model_params, performance_metrics);
let sensitivity_insights = self.generate_sensitivity_insights(
&hyperparameter_sensitivity,
&architecture_sensitivity,
&data_sensitivity,
&training_sensitivity,
);
let result = ModelSensitivityAnalysisResult {
timestamp: Utc::now(),
hyperparameter_sensitivity,
architecture_sensitivity,
data_sensitivity,
training_sensitivity,
sensitivity_insights,
};
self.sensitivity_analysis_results.push(result.clone());
Ok(result)
}
pub async fn generate_report(&self) -> Result<AdvancedMLDebuggingReport> {
Ok(AdvancedMLDebuggingReport {
timestamp: Utc::now(),
config: self.config.clone(),
lr_analysis_count: self.lr_analysis_results.len(),
sensitivity_analysis_count: self.sensitivity_analysis_results.len(),
recent_lr_analyses: self.lr_analysis_results.iter().rev().take(3).cloned().collect(),
recent_sensitivity_analyses: self
.sensitivity_analysis_results
.iter()
.rev()
.take(3)
.cloned()
.collect(),
advanced_insights: self.generate_advanced_insights(),
})
}
fn analyze_single_layer_lr(
&self,
layer_id: &str,
gradients: &ArrayD<f32>,
weights: &ArrayD<f32>,
current_lr: f64,
loss_history: &[f64],
) -> LayerLRRecommendation {
let gradient_magnitude =
gradients.iter().map(|&x| x.abs() as f64).sum::<f64>() / gradients.len() as f64;
let weight_magnitude =
weights.iter().map(|&x| x.abs() as f64).sum::<f64>() / weights.len() as f64;
let gradient_variance =
gradients.iter().map(|&x| (x as f64 - gradient_magnitude).powi(2)).sum::<f64>()
/ gradients.len() as f64;
let gradient_norm = gradient_magnitude;
let recommended_lr = if gradient_norm > 0.0 {
let base_lr = 0.001;
let adaptation_factor = (1.0 / (1.0 + gradient_variance)).sqrt();
let magnitude_factor = (1.0 / (1.0 + gradient_norm)).sqrt();
base_lr * adaptation_factor * magnitude_factor * 10.0
} else {
current_lr
};
let layer_metrics = LayerLRMetrics {
gradient_magnitude,
weight_update_magnitude: gradient_magnitude * current_lr,
parameter_norm: weight_magnitude,
loss_contribution: self.estimate_layer_loss_contribution(loss_history),
stability_score: self.calculate_layer_stability(gradients),
convergence_rate: self.estimate_convergence_rate(loss_history),
learning_efficiency: gradient_magnitude / (weight_magnitude + 1e-8),
};
let lr_ratio = recommended_lr / current_lr;
let urgency = if !(0.1..=10.0).contains(&lr_ratio) {
AdaptationUrgency::Critical
} else if !(0.33..=3.0).contains(&lr_ratio) {
AdaptationUrgency::High
} else if !(0.67..=1.5).contains(&lr_ratio) {
AdaptationUrgency::Medium
} else {
AdaptationUrgency::Low
};
let reasoning = if recommended_lr > current_lr * 1.2 {
"Layer shows slow learning with small gradients, increase learning rate".to_string()
} else if recommended_lr < current_lr * 0.8 {
"Layer shows instability or large gradients, decrease learning rate".to_string()
} else {
"Current learning rate appears appropriate for this layer".to_string()
};
LayerLRRecommendation {
layer_id: layer_id.to_string(),
layer_type: self.infer_layer_type(layer_id),
current_lr,
recommended_lr,
confidence: 0.8, reasoning,
layer_metrics,
lr_sensitivity: lr_ratio.abs(),
urgency,
}
}
fn generate_global_lr_insights(
&self,
layer_recommendations: &HashMap<String, LayerLRRecommendation>,
loss_history: &[f64],
) -> GlobalLRInsights {
let overall_efficiency = layer_recommendations
.values()
.map(|rec| rec.layer_metrics.learning_efficiency)
.sum::<f64>()
/ layer_recommendations.len() as f64;
let lr_distribution_health = self.calculate_lr_distribution_health(layer_recommendations);
let gradient_flow_quality = self.calculate_gradient_flow_quality(layer_recommendations);
let training_stability =
self.assess_training_stability(layer_recommendations, loss_history);
let global_adjustments = self.generate_global_adjustments(layer_recommendations);
let critical_issues = self.identify_critical_issues(layer_recommendations);
GlobalLRInsights {
overall_efficiency,
lr_distribution_health,
gradient_flow_quality,
training_stability,
global_adjustments,
critical_issues,
}
}
fn create_lr_adaptation_strategy(
&self,
_layer_recommendations: &HashMap<String, LayerLRRecommendation>,
global_insights: &GlobalLRInsights,
) -> LRAdaptationStrategy {
let strategy_name = if global_insights.overall_efficiency < 0.5 {
"Aggressive Learning Rate Adaptation".to_string()
} else {
"Conservative Learning Rate Tuning".to_string()
};
LRAdaptationStrategy {
strategy_name: strategy_name.clone(),
description: "Strategy to optimize learning rates based on current model state"
.to_string(),
implementation_steps: vec![ImplementationStep {
step_number: 1,
description: "Implement layer-wise learning rate multipliers".to_string(),
code_changes: vec!["Add lr_multipliers to optimizer config".to_string()],
timeline: "1-2 days".to_string(),
dependencies: vec!["Optimizer modification".to_string()],
}],
expected_benefits: vec![
"Improved convergence speed".to_string(),
"Better training stability".to_string(),
"Reduced overfitting risk".to_string(),
],
potential_risks: vec!["Initial instability during adaptation".to_string()],
success_metrics: vec![
"Faster loss reduction".to_string(),
"Improved validation accuracy".to_string(),
],
monitoring_requirements: vec!["Track per-layer gradient norms".to_string()],
}
}
fn generate_training_phase_recommendations(
&self,
_strategy: &LRAdaptationStrategy,
) -> Vec<TrainingPhaseRecommendation> {
vec![TrainingPhaseRecommendation {
phase_name: "Warmup Phase".to_string(),
duration_epochs: 5,
lr_schedule: LRSchedule {
schedule_type: LRScheduleType::LinearDecay,
initial_lr: 0.0001,
parameters: HashMap::new(),
layer_multipliers: HashMap::new(),
},
objectives: vec!["Stabilize training".to_string()],
success_criteria: vec!["Decreasing loss".to_string()],
transition_conditions: vec!["Stable gradient norms".to_string()],
}]
}
fn predict_lr_schedule_performance(
&self,
_layer_recommendations: &HashMap<String, LayerLRRecommendation>,
) -> Vec<LRSchedulePrediction> {
vec![LRSchedulePrediction {
schedule: LRSchedule {
schedule_type: LRScheduleType::ExponentialDecay,
initial_lr: 0.001,
parameters: HashMap::new(),
layer_multipliers: HashMap::new(),
},
predicted_accuracy: 0.92,
predicted_convergence_epochs: 50,
predicted_stability: 0.8,
prediction_confidence: 0.7,
risk_assessment: RiskAssessment {
overall_risk: RiskLevel::Medium,
specific_risks: vec![],
mitigation_strategies: vec![],
},
}]
}
fn analyze_hyperparameter_sensitivity(
&self,
params: &HashMap<String, f64>,
_metrics: &[f64],
) -> HyperparameterSensitivity {
let learning_rate_sensitivity = ParameterSensitivity {
parameter_name: "learning_rate".to_string(),
current_value: params.get("learning_rate").copied().unwrap_or(0.001),
sensitivity_score: 0.8,
optimal_range: (0.0001, 0.01),
impact_curve: vec![(0.0001, 0.7), (0.001, 0.9), (0.01, 0.85)],
stability_region: (0.0005, 0.005),
critical_thresholds: vec![0.0001, 0.1],
};
let batch_size_sensitivity = ParameterSensitivity {
parameter_name: "batch_size".to_string(),
current_value: params.get("batch_size").copied().unwrap_or(32.0),
sensitivity_score: 0.6,
optimal_range: (16.0, 128.0),
impact_curve: vec![(16.0, 0.85), (32.0, 0.9), (64.0, 0.88), (128.0, 0.82)],
stability_region: (16.0, 64.0),
critical_thresholds: vec![8.0, 256.0],
};
let regularization_sensitivity = ParameterSensitivity {
parameter_name: "weight_decay".to_string(),
current_value: params.get("weight_decay").copied().unwrap_or(0.01),
sensitivity_score: 0.4,
optimal_range: (0.001, 0.1),
impact_curve: vec![(0.001, 0.88), (0.01, 0.9), (0.1, 0.87)],
stability_region: (0.005, 0.05),
critical_thresholds: vec![0.0001, 1.0],
};
HyperparameterSensitivity {
learning_rate_sensitivity,
batch_size_sensitivity,
regularization_sensitivity,
architecture_param_sensitivity: HashMap::new(),
most_sensitive_params: vec!["learning_rate".to_string(), "batch_size".to_string()],
least_sensitive_params: vec!["weight_decay".to_string()],
interaction_effects: vec![],
}
}
fn analyze_architecture_sensitivity(
&self,
_config: &HashMap<String, f64>,
_metrics: &[f64],
) -> ArchitectureSensitivity {
ArchitectureSensitivity {
depth_sensitivity: ArchitecturalSensitivity {
component_name: "model_depth".to_string(),
change_sensitivity: 0.7,
degradation_curve: vec![(6.0, 0.85), (12.0, 0.9), (24.0, 0.88)],
min_viable_config: 6.0,
optimal_config: 12.0,
diminishing_returns_threshold: 18.0,
},
width_sensitivity: ArchitecturalSensitivity {
component_name: "hidden_size".to_string(),
change_sensitivity: 0.6,
degradation_curve: vec![(256.0, 0.82), (512.0, 0.9), (1024.0, 0.91)],
min_viable_config: 256.0,
optimal_config: 512.0,
diminishing_returns_threshold: 768.0,
},
attention_head_sensitivity: ArchitecturalSensitivity {
component_name: "num_attention_heads".to_string(),
change_sensitivity: 0.5,
degradation_curve: vec![(4.0, 0.87), (8.0, 0.9), (16.0, 0.89)],
min_viable_config: 4.0,
optimal_config: 8.0,
diminishing_returns_threshold: 12.0,
},
skip_connection_sensitivity: ArchitecturalSensitivity {
component_name: "skip_connections".to_string(),
change_sensitivity: 0.8,
degradation_curve: vec![(0.0, 0.75), (1.0, 0.9)],
min_viable_config: 1.0,
optimal_config: 1.0,
diminishing_returns_threshold: 1.0,
},
component_importance: HashMap::new(),
bottlenecks: vec![],
}
}
fn analyze_data_sensitivity(&self, _metrics: &[f64]) -> DataSensitivity {
DataSensitivity {
data_size_sensitivity: DataSizeSensitivity {
current_size: 10000,
minimum_effective_size: 1000,
performance_curve: vec![(1000, 0.7), (5000, 0.85), (10000, 0.9), (20000, 0.92)],
data_efficiency: 0.85,
diminishing_returns_point: 15000,
},
data_quality_sensitivity: DataQualitySensitivity {
noise_tolerance: 0.1,
label_quality_importance: 0.9,
feature_quality_importance: 0.7,
quality_impact_curve: vec![(0.9, 0.9), (0.8, 0.85), (0.7, 0.75)],
},
distribution_sensitivity: DistributionSensitivity {
shift_sensitivity: 0.6,
imbalance_sensitivity: 0.5,
domain_adaptation_requirements: vec!["Gradual domain adaptation".to_string()],
distribution_robustness: 0.7,
},
feature_sensitivity: FeatureSensitivityAnalysis {
most_important_features: vec!["feature_1".to_string(), "feature_2".to_string()],
least_important_features: vec!["feature_10".to_string()],
feature_interactions: HashMap::new(),
feature_stability: HashMap::new(),
},
}
}
fn analyze_training_sensitivity(
&self,
_params: &HashMap<String, f64>,
_metrics: &[f64],
) -> TrainingSensitivity {
TrainingSensitivity {
initialization_sensitivity: InitializationSensitivity {
weight_init_sensitivity: 0.6,
bias_init_sensitivity: 0.3,
seed_sensitivity: 0.2,
scheme_importance: HashMap::new(),
},
optimization_sensitivity: OptimizationSensitivity {
optimizer_sensitivity: 0.7,
momentum_sensitivity: 0.5,
second_moment_sensitivity: 0.4,
optimizer_comparison: HashMap::new(),
},
schedule_sensitivity: ScheduleSensitivity {
lr_schedule_sensitivity: 0.8,
duration_sensitivity: 0.6,
warmup_sensitivity: 0.4,
schedule_param_importance: HashMap::new(),
},
regularization_sensitivity: RegularizationSensitivity {
dropout_sensitivity: 0.5,
weight_decay_sensitivity: 0.4,
batch_norm_sensitivity: 0.6,
method_comparison: HashMap::new(),
},
}
}
fn generate_sensitivity_insights(
&self,
_hyper_sens: &HyperparameterSensitivity,
_arch_sens: &ArchitectureSensitivity,
_data_sens: &DataSensitivity,
_training_sens: &TrainingSensitivity,
) -> SensitivityInsights {
SensitivityInsights {
most_critical_factors: vec![
"learning_rate".to_string(),
"model_depth".to_string(),
"skip_connections".to_string(),
],
least_critical_factors: vec!["bias_initialization".to_string()],
surprising_findings: vec!["Batch size has higher than expected impact".to_string()],
robustness_assessment: RobustnessAssessment {
overall_robustness: 0.7,
category_robustness: HashMap::new(),
vulnerabilities: vec![],
strengths: vec!["Good hyperparameter stability".to_string()],
},
optimization_recommendations: vec![
"Focus on learning rate tuning first".to_string(),
"Consider architectural modifications second".to_string(),
],
}
}
fn estimate_layer_loss_contribution(&self, loss_history: &[f64]) -> f64 {
if loss_history.len() >= 2 {
(loss_history[loss_history.len() - 2] - loss_history[loss_history.len() - 1]).abs()
} else {
0.1
}
}
fn calculate_layer_stability(&self, gradients: &ArrayD<f32>) -> f64 {
let gradient_variance = gradients.iter().map(|&x| x as f64).collect::<Vec<_>>();
if gradient_variance.is_empty() {
return 0.5;
}
let mean = gradient_variance.iter().sum::<f64>() / gradient_variance.len() as f64;
let variance = gradient_variance.iter().map(|&x| (x - mean).powi(2)).sum::<f64>()
/ gradient_variance.len() as f64;
1.0 / (1.0 + variance) }
fn estimate_convergence_rate(&self, loss_history: &[f64]) -> f64 {
if loss_history.len() < 3 {
return 0.5;
}
let recent_improvement =
loss_history[loss_history.len() - 3] - loss_history[loss_history.len() - 1];
recent_improvement.abs()
}
fn infer_layer_type(&self, layer_id: &str) -> String {
if layer_id.contains("attention") {
"attention".to_string()
} else if layer_id.contains("feedforward") || layer_id.contains("mlp") {
"feedforward".to_string()
} else if layer_id.contains("embedding") {
"embedding".to_string()
} else {
"unknown".to_string()
}
}
fn calculate_lr_distribution_health(
&self,
recommendations: &HashMap<String, LayerLRRecommendation>,
) -> f64 {
let lr_ratios: Vec<f64> = recommendations
.values()
.map(|rec| rec.recommended_lr / rec.current_lr)
.collect();
if lr_ratios.is_empty() {
return 0.5;
}
let mean_ratio = lr_ratios.iter().sum::<f64>() / lr_ratios.len() as f64;
let variance = lr_ratios.iter().map(|&x| (x - mean_ratio).powi(2)).sum::<f64>()
/ lr_ratios.len() as f64;
1.0 / (1.0 + variance) }
fn calculate_gradient_flow_quality(
&self,
recommendations: &HashMap<String, LayerLRRecommendation>,
) -> f64 {
recommendations
.values()
.map(|rec| rec.layer_metrics.stability_score)
.sum::<f64>()
/ recommendations.len() as f64
}
fn assess_training_stability(
&self,
recommendations: &HashMap<String, LayerLRRecommendation>,
_loss_history: &[f64],
) -> TrainingStability {
let stability_score = recommendations
.values()
.map(|rec| rec.layer_metrics.stability_score)
.sum::<f64>()
/ recommendations.len() as f64;
TrainingStability {
stability_score,
instability_indicators: vec![],
stability_trends: vec![],
predicted_stability: stability_score * 0.9, }
}
fn generate_global_adjustments(
&self,
_recommendations: &HashMap<String, LayerLRRecommendation>,
) -> Vec<GlobalLRAdjustment> {
vec![GlobalLRAdjustment {
adjustment_type: GlobalAdjustmentType::LayerTypeSpecific,
magnitude: 1.5,
expected_impact: 0.1,
priority: AdjustmentPriority::Medium,
instructions: "Apply different learning rates to attention vs feedforward layers"
.to_string(),
}]
}
fn identify_critical_issues(
&self,
recommendations: &HashMap<String, LayerLRRecommendation>,
) -> Vec<String> {
let mut issues = Vec::new();
for recommendation in recommendations.values() {
if matches!(recommendation.urgency, AdaptationUrgency::Critical) {
issues.push(format!(
"Critical learning rate issue in layer {}",
recommendation.layer_id
));
}
}
issues
}
fn generate_advanced_insights(&self) -> HashMap<String, String> {
let mut insights = HashMap::new();
insights.insert(
"total_lr_analyses".to_string(),
self.lr_analysis_results.len().to_string(),
);
insights.insert(
"total_sensitivity_analyses".to_string(),
self.sensitivity_analysis_results.len().to_string(),
);
if let Some(latest_lr) = self.lr_analysis_results.last() {
insights.insert(
"latest_lr_efficiency".to_string(),
format!("{:.2}", latest_lr.global_lr_insights.overall_efficiency),
);
}
insights
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AdvancedMLDebuggingReport {
pub timestamp: DateTime<Utc>,
pub config: AdvancedMLDebuggingConfig,
pub lr_analysis_count: usize,
pub sensitivity_analysis_count: usize,
pub recent_lr_analyses: Vec<LayerWiseLRAnalysisResult>,
pub recent_sensitivity_analyses: Vec<ModelSensitivityAnalysisResult>,
pub advanced_insights: HashMap<String, String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_advanced_ml_debugger_creation() {
let config = AdvancedMLDebuggingConfig::default();
let debugger = AdvancedMLDebugger::new(config);
assert_eq!(debugger.lr_analysis_results.len(), 0);
}
#[tokio::test]
async fn test_layer_wise_lr_analysis() {
let config = AdvancedMLDebuggingConfig::default();
let mut debugger = AdvancedMLDebugger::new(config);
let mut layer_gradients = HashMap::new();
let mut layer_weights = HashMap::new();
let gradients =
ArrayD::from_shape_vec(vec![10, 10], (0..100).map(|x| x as f32 * 0.01).collect())
.expect("operation failed in test");
let weights =
ArrayD::from_shape_vec(vec![10, 10], (0..100).map(|x| x as f32 * 0.1).collect())
.expect("operation failed in test");
layer_gradients.insert("layer_0".to_string(), gradients);
layer_weights.insert("layer_0".to_string(), weights);
let loss_history = vec![1.0, 0.8, 0.6, 0.5];
let result = debugger
.analyze_layer_wise_learning_rates(
&layer_gradients,
&layer_weights,
0.001,
&loss_history,
)
.await;
assert!(result.is_ok());
let analysis = result.expect("operation failed in test");
assert_eq!(analysis.layer_lr_recommendations.len(), 1);
assert!(analysis.layer_lr_recommendations.contains_key("layer_0"));
}
}