trustformers_debug/core/
session.rs

1//! Core debugging session and configuration management
2//!
3//! This module contains the fundamental components for TrustformeRS debugging including
4//! the main DebugSession coordinator, configuration structures, and session lifecycle management.
5
6use crate::*;
7use anyhow::Result;
8use serde::{Deserialize, Serialize};
9use std::fmt;
10use uuid::Uuid;
11
12/// Configuration for debugging session
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct DebugConfig {
15    /// Enable tensor inspection
16    pub enable_tensor_inspection: bool,
17    /// Enable gradient debugging
18    pub enable_gradient_debugging: bool,
19    /// Enable model diagnostics
20    pub enable_model_diagnostics: bool,
21    /// Enable visual debugging (requires display)
22    pub enable_visualization: bool,
23    /// Enable memory profiling
24    pub enable_memory_profiling: bool,
25    /// Enable computation graph analysis
26    pub enable_computation_graph_analysis: bool,
27    /// Maximum number of tensors to track
28    pub max_tracked_tensors: usize,
29    /// Maximum history length for gradients
30    pub max_gradient_history: usize,
31    /// Output directory for debug artifacts
32    pub output_dir: Option<String>,
33    /// Sampling rate for expensive operations (0.0 to 1.0)
34    pub sampling_rate: f32,
35    /// Memory profiling configuration
36    pub memory_profiling_config: MemoryProfilingConfig,
37    /// Computation graph analysis configuration
38    pub graph_analysis_config: GraphAnalysisConfig,
39    /// Architecture analysis configuration
40    pub architecture_analysis_config: architecture_analysis::ArchitectureAnalysisConfig,
41    /// Behavior analysis configuration
42    pub behavior_analysis_config: BehaviorAnalysisConfig,
43    /// Training dynamics analysis configuration
44    pub training_dynamics_config: TrainingDynamicsConfig,
45    /// Differential debugging configuration
46    pub differential_debugging_config: DifferentialDebuggingConfig,
47    /// Interpretability tools configuration
48    pub interpretability_config: InterpretabilityConfig,
49    /// Neural network debugging configuration
50    pub neural_network_debugging_config: Option<neural_network_debugging::TransformerDebugConfig>,
51    /// Advanced ML debugging configuration
52    pub advanced_ml_debugging_config: AdvancedMLDebuggingConfig,
53    /// Advanced GPU profiling configuration
54    pub advanced_gpu_profiling_config: AdvancedGpuProfilingConfig,
55    /// Kernel optimization configuration
56    pub kernel_optimization_config: KernelOptimizationConfig,
57    /// AI code analysis configuration
58    pub ai_code_analysis_config: AIAnalysisConfig,
59    /// Distributed debugging configuration
60    pub distributed_debugging_config: Option<DistributedDebugConfig>,
61    /// Environmental monitoring configuration
62    pub environmental_monitoring_config: EnvironmentalConfig,
63}
64
65impl Default for DebugConfig {
66    fn default() -> Self {
67        Self {
68            enable_tensor_inspection: true,
69            enable_gradient_debugging: true,
70            enable_model_diagnostics: true,
71            enable_visualization: false,
72            enable_memory_profiling: true,
73            enable_computation_graph_analysis: true,
74            max_tracked_tensors: 1000,
75            max_gradient_history: 100,
76            output_dir: None,
77            sampling_rate: 1.0,
78            memory_profiling_config: MemoryProfilingConfig::default(),
79            graph_analysis_config: GraphAnalysisConfig::default(),
80            architecture_analysis_config:
81                architecture_analysis::ArchitectureAnalysisConfig::default(),
82            behavior_analysis_config: BehaviorAnalysisConfig::default(),
83            training_dynamics_config: TrainingDynamicsConfig::default(),
84            differential_debugging_config: DifferentialDebuggingConfig::default(),
85            interpretability_config: InterpretabilityConfig::default(),
86            neural_network_debugging_config: None,
87            advanced_ml_debugging_config: AdvancedMLDebuggingConfig::default(),
88            advanced_gpu_profiling_config: AdvancedGpuProfilingConfig::default(),
89            kernel_optimization_config: KernelOptimizationConfig::default(),
90            ai_code_analysis_config: AIAnalysisConfig::default(),
91            distributed_debugging_config: None,
92            environmental_monitoring_config: EnvironmentalConfig::default(),
93        }
94    }
95}
96
97/// Main debugging session that coordinates all debugging tools
98#[derive(Debug)]
99pub struct DebugSession {
100    id: Uuid,
101    config: DebugConfig,
102    tensor_inspector: TensorInspector,
103    gradient_debugger: GradientDebugger,
104    model_diagnostics: ModelDiagnostics,
105    hooks: HookManager,
106    profiler: Profiler,
107    memory_profiler: Option<MemoryProfiler>,
108    interactive_debugger: InteractiveDebugger,
109    anomaly_detector: AnomalyDetector,
110    computation_graph_analyzer: ComputationGraphAnalyzer,
111    architecture_analyzer: architecture_analysis::ArchitectureAnalyzer,
112    behavior_analyzer: BehaviorAnalyzer,
113    training_dynamics_analyzer: TrainingDynamicsAnalyzer,
114    differential_debugger: DifferentialDebugger,
115    interpretability_analyzer: InterpretabilityAnalyzer,
116    health_checker: crate::health_checker::HealthChecker,
117    transformer_debugger: Option<neural_network_debugging::TransformerDebugger>,
118    advanced_ml_debugger: AdvancedMLDebugger,
119    advanced_gpu_profiler: Option<AdvancedGpuMemoryProfiler>,
120    #[allow(dead_code)]
121    kernel_optimizer: KernelOptimizationAnalyzer,
122    ai_code_analyzer: Option<AICodeAnalyzer>,
123    distributed_debugger: Option<DistributedDebugger>,
124    environmental_monitor: Option<EnvironmentalMonitor>,
125}
126
127impl DebugSession {
128    /// Create a new debugging session
129    pub fn new(config: DebugConfig) -> Self {
130        let id = Uuid::new_v4();
131
132        let memory_profiler = if config.enable_memory_profiling {
133            Some(MemoryProfiler::new(config.memory_profiling_config.clone()))
134        } else {
135            None
136        };
137
138        let transformer_debugger =
139            if let Some(ref neural_config) = config.neural_network_debugging_config {
140                Some(neural_network_debugging::TransformerDebugger::new(
141                    neural_config.clone(),
142                ))
143            } else {
144                None
145            };
146
147        let advanced_gpu_profiler = if config.advanced_gpu_profiling_config.enable_gpu_profiling {
148            AdvancedGpuMemoryProfiler::new(config.advanced_gpu_profiling_config.device_count).ok()
149        } else {
150            None
151        };
152
153        let ai_code_analyzer = if config.ai_code_analysis_config.enable_deep_analysis {
154            Some(AICodeAnalyzer::new(config.ai_code_analysis_config.clone()))
155        } else {
156            None
157        };
158
159        let distributed_debugger =
160            if let Some(ref dist_config) = config.distributed_debugging_config {
161                let node_id = NodeId::new(0, "debug-node".to_string());
162                Some(DistributedDebugger::new(dist_config.clone(), node_id))
163            } else {
164                None
165            };
166
167        let environmental_monitor = if config.environmental_monitoring_config.enable_carbon_tracking
168        {
169            Some(EnvironmentalMonitor::new(
170                config.environmental_monitoring_config.clone(),
171            ))
172        } else {
173            None
174        };
175
176        Self {
177            id,
178            tensor_inspector: TensorInspector::new(&config),
179            gradient_debugger: GradientDebugger::new(config.clone()),
180            model_diagnostics: ModelDiagnostics::new(&config),
181            hooks: HookManager::new(),
182            profiler: Profiler::new(&config),
183            memory_profiler,
184            interactive_debugger: InteractiveDebugger::new(&config),
185            anomaly_detector: AnomalyDetector::new(&config),
186            computation_graph_analyzer: ComputationGraphAnalyzer::new(
187                config.graph_analysis_config.clone(),
188            ),
189            architecture_analyzer: architecture_analysis::ArchitectureAnalyzer::new(
190                config.architecture_analysis_config.clone(),
191            ),
192            behavior_analyzer: BehaviorAnalyzer::new(config.behavior_analysis_config.clone()),
193            training_dynamics_analyzer: TrainingDynamicsAnalyzer::new(),
194            differential_debugger: DifferentialDebugger::new(
195                config.differential_debugging_config.clone(),
196            ),
197            interpretability_analyzer: InterpretabilityAnalyzer::new(
198                config.interpretability_config.clone(),
199            ),
200            health_checker: crate::health_checker::HealthChecker::new(&config),
201            transformer_debugger,
202            advanced_ml_debugger: AdvancedMLDebugger::new(
203                config.advanced_ml_debugging_config.clone(),
204            ),
205            advanced_gpu_profiler,
206            kernel_optimizer: match KernelOptimizationAnalyzer::new() {
207                Ok(analyzer) => analyzer,
208                Err(e) => {
209                    tracing::warn!(
210                        "Failed to initialize kernel optimizer: {}, using stub implementation",
211                        e
212                    );
213                    // Return a stub analyzer that won't crash but provides limited functionality
214                    KernelOptimizationAnalyzer::new_stub()
215                },
216            },
217            ai_code_analyzer,
218            distributed_debugger,
219            environmental_monitor,
220            config,
221        }
222    }
223
224    /// Get session ID
225    pub fn id(&self) -> Uuid {
226        self.id
227    }
228
229    /// Get debug configuration
230    pub fn config(&self) -> &DebugConfig {
231        &self.config
232    }
233
234    /// Get tensor inspector
235    pub fn tensor_inspector(&self) -> &TensorInspector {
236        &self.tensor_inspector
237    }
238
239    /// Get mutable tensor inspector
240    pub fn tensor_inspector_mut(&mut self) -> &mut TensorInspector {
241        &mut self.tensor_inspector
242    }
243
244    /// Get gradient debugger
245    pub fn gradient_debugger(&self) -> &GradientDebugger {
246        &self.gradient_debugger
247    }
248
249    /// Get mutable gradient debugger
250    pub fn gradient_debugger_mut(&mut self) -> &mut GradientDebugger {
251        &mut self.gradient_debugger
252    }
253
254    /// Get model diagnostics
255    pub fn model_diagnostics(&self) -> &ModelDiagnostics {
256        &self.model_diagnostics
257    }
258
259    /// Get mutable model diagnostics
260    pub fn model_diagnostics_mut(&mut self) -> &mut ModelDiagnostics {
261        &mut self.model_diagnostics
262    }
263
264    /// Get hook manager
265    pub fn hooks(&self) -> &HookManager {
266        &self.hooks
267    }
268
269    /// Get mutable hook manager
270    pub fn hooks_mut(&mut self) -> &mut HookManager {
271        &mut self.hooks
272    }
273
274    /// Get profiler
275    pub fn profiler(&self) -> &Profiler {
276        &self.profiler
277    }
278
279    /// Get mutable profiler
280    pub fn profiler_mut(&mut self) -> &mut Profiler {
281        &mut self.profiler
282    }
283
284    /// Get memory profiler
285    pub fn memory_profiler(&self) -> Option<&MemoryProfiler> {
286        self.memory_profiler.as_ref()
287    }
288
289    /// Get mutable memory profiler
290    pub fn memory_profiler_mut(&mut self) -> Option<&mut MemoryProfiler> {
291        self.memory_profiler.as_mut()
292    }
293
294    /// Get interactive debugger
295    pub fn interactive_debugger(&self) -> &InteractiveDebugger {
296        &self.interactive_debugger
297    }
298
299    /// Get mutable interactive debugger
300    pub fn interactive_debugger_mut(&mut self) -> &mut InteractiveDebugger {
301        &mut self.interactive_debugger
302    }
303
304    /// Get anomaly detector
305    pub fn anomaly_detector(&self) -> &AnomalyDetector {
306        &self.anomaly_detector
307    }
308
309    /// Get mutable anomaly detector
310    pub fn anomaly_detector_mut(&mut self) -> &mut AnomalyDetector {
311        &mut self.anomaly_detector
312    }
313
314    /// Get computation graph analyzer
315    pub fn computation_graph_analyzer(&self) -> &ComputationGraphAnalyzer {
316        &self.computation_graph_analyzer
317    }
318
319    /// Get mutable computation graph analyzer
320    pub fn computation_graph_analyzer_mut(&mut self) -> &mut ComputationGraphAnalyzer {
321        &mut self.computation_graph_analyzer
322    }
323
324    /// Get architecture analyzer
325    pub fn architecture_analyzer(&self) -> &architecture_analysis::ArchitectureAnalyzer {
326        &self.architecture_analyzer
327    }
328
329    /// Get mutable architecture analyzer
330    pub fn architecture_analyzer_mut(
331        &mut self,
332    ) -> &mut architecture_analysis::ArchitectureAnalyzer {
333        &mut self.architecture_analyzer
334    }
335
336    /// Get behavior analyzer
337    pub fn behavior_analyzer(&self) -> &BehaviorAnalyzer {
338        &self.behavior_analyzer
339    }
340
341    /// Get mutable behavior analyzer
342    pub fn behavior_analyzer_mut(&mut self) -> &mut BehaviorAnalyzer {
343        &mut self.behavior_analyzer
344    }
345
346    /// Get training dynamics analyzer
347    pub fn training_dynamics_analyzer(&self) -> &TrainingDynamicsAnalyzer {
348        &self.training_dynamics_analyzer
349    }
350
351    /// Get mutable training dynamics analyzer
352    pub fn training_dynamics_analyzer_mut(&mut self) -> &mut TrainingDynamicsAnalyzer {
353        &mut self.training_dynamics_analyzer
354    }
355
356    /// Get differential debugger
357    pub fn differential_debugger(&self) -> &DifferentialDebugger {
358        &self.differential_debugger
359    }
360
361    /// Get mutable differential debugger
362    pub fn differential_debugger_mut(&mut self) -> &mut DifferentialDebugger {
363        &mut self.differential_debugger
364    }
365
366    /// Get interpretability analyzer
367    pub fn interpretability_analyzer(&self) -> &InterpretabilityAnalyzer {
368        &self.interpretability_analyzer
369    }
370
371    /// Get mutable interpretability analyzer
372    pub fn interpretability_analyzer_mut(&mut self) -> &mut InterpretabilityAnalyzer {
373        &mut self.interpretability_analyzer
374    }
375
376    /// Get health checker
377    pub fn health_checker(&self) -> &crate::health_checker::HealthChecker {
378        &self.health_checker
379    }
380
381    /// Get mutable health checker
382    pub fn health_checker_mut(&mut self) -> &mut crate::health_checker::HealthChecker {
383        &mut self.health_checker
384    }
385
386    /// Get transformer debugger
387    pub fn transformer_debugger(&self) -> Option<&neural_network_debugging::TransformerDebugger> {
388        self.transformer_debugger.as_ref()
389    }
390
391    /// Get mutable transformer debugger
392    pub fn transformer_debugger_mut(
393        &mut self,
394    ) -> Option<&mut neural_network_debugging::TransformerDebugger> {
395        self.transformer_debugger.as_mut()
396    }
397
398    /// Get advanced ML debugger
399    pub fn advanced_ml_debugger(&self) -> &AdvancedMLDebugger {
400        &self.advanced_ml_debugger
401    }
402
403    /// Get mutable advanced ML debugger
404    pub fn advanced_ml_debugger_mut(&mut self) -> &mut AdvancedMLDebugger {
405        &mut self.advanced_ml_debugger
406    }
407
408    /// Get AI code analyzer
409    pub fn ai_code_analyzer(&self) -> Option<&AICodeAnalyzer> {
410        self.ai_code_analyzer.as_ref()
411    }
412
413    /// Get mutable AI code analyzer
414    pub fn ai_code_analyzer_mut(&mut self) -> Option<&mut AICodeAnalyzer> {
415        self.ai_code_analyzer.as_mut()
416    }
417
418    /// Get distributed debugger
419    pub fn distributed_debugger(&self) -> Option<&DistributedDebugger> {
420        self.distributed_debugger.as_ref()
421    }
422
423    /// Get mutable distributed debugger
424    pub fn distributed_debugger_mut(&mut self) -> Option<&mut DistributedDebugger> {
425        self.distributed_debugger.as_mut()
426    }
427
428    /// Get environmental monitor
429    pub fn environmental_monitor(&self) -> Option<&EnvironmentalMonitor> {
430        self.environmental_monitor.as_ref()
431    }
432
433    /// Get mutable environmental monitor
434    pub fn environmental_monitor_mut(&mut self) -> Option<&mut EnvironmentalMonitor> {
435        self.environmental_monitor.as_mut()
436    }
437
438    /// Start debugging session
439    pub async fn start(&mut self) -> Result<()> {
440        tracing::info!("Starting debug session {}", self.id);
441
442        if self.config.enable_tensor_inspection {
443            self.tensor_inspector.start().await?;
444        }
445
446        if self.config.enable_gradient_debugging {
447            self.gradient_debugger.start().await?;
448        }
449
450        if self.config.enable_model_diagnostics {
451            self.model_diagnostics.start().await?;
452        }
453
454        self.profiler.start().await?;
455
456        if let Some(ref mut memory_profiler) = self.memory_profiler {
457            memory_profiler.start().await?;
458        }
459
460        self.interactive_debugger.start().await?;
461        self.anomaly_detector.start().await?;
462
463        Ok(())
464    }
465
466    /// Stop debugging session and generate report
467    pub async fn stop(&mut self) -> Result<DebugReport> {
468        tracing::info!("Stopping debug session {}", self.id);
469
470        let tensor_report = if self.config.enable_tensor_inspection {
471            Some(self.tensor_inspector.generate_report().await?)
472        } else {
473            None
474        };
475
476        let gradient_report = if self.config.enable_gradient_debugging {
477            Some(self.gradient_debugger.generate_report().await?)
478        } else {
479            None
480        };
481
482        let diagnostics_report = if self.config.enable_model_diagnostics {
483            Some(self.model_diagnostics.generate_report().await?)
484        } else {
485            None
486        };
487
488        let profiler_report = self.profiler.generate_report().await?;
489
490        let memory_profiler_report = if let Some(ref mut memory_profiler) = self.memory_profiler {
491            Some(memory_profiler.stop().await?)
492        } else {
493            None
494        };
495
496        let interactive_debugger_report = self.interactive_debugger.generate_report().await?;
497        let anomaly_report = self.anomaly_detector.generate_report().await?;
498
499        // Get computation graph analysis results (if any graphs were analyzed)
500        let computation_graph_report = None; // Would be populated if graphs were analyzed
501
502        // Get new analyzer reports
503        let architecture_analysis_report =
504            Some(self.architecture_analyzer.generate_report().await?);
505        let behavior_analysis_report = Some(self.behavior_analyzer.generate_report().await?);
506        let training_dynamics_report =
507            Some(self.training_dynamics_analyzer.generate_report().await?);
508        let differential_debugging_report =
509            Some(self.differential_debugger.generate_report().await?);
510        let interpretability_report = Some(self.interpretability_analyzer.generate_report().await?);
511        let advanced_ml_debugging_report = Some(self.advanced_ml_debugger.generate_report().await?);
512
513        // Generate GPU profiling reports
514        let advanced_gpu_profiling_report = if let Some(ref profiler) = self.advanced_gpu_profiler {
515            Some(profiler.get_memory_analysis_report())
516        } else {
517            None
518        };
519
520        let kernel_optimization_report =
521            Some(self.generate_kernel_optimization_summary_report().await?);
522
523        Ok(DebugReport {
524            session_id: self.id,
525            tensor_report,
526            gradient_report,
527            diagnostics_report,
528            profiler_report,
529            memory_profiler_report,
530            interactive_debugger_report,
531            anomaly_report,
532            computation_graph_report,
533            architecture_analysis_report,
534            behavior_analysis_report,
535            training_dynamics_report,
536            differential_debugging_report,
537            interpretability_report,
538            advanced_ml_debugging_report,
539            advanced_gpu_profiling_report,
540            kernel_optimization_report,
541            config: self.config.clone(),
542        })
543    }
544
545    /// Export debug session to file
546    pub async fn export(&self, path: &str) -> Result<()> {
547        let report = self.generate_snapshot().await?;
548        let json = serde_json::to_string_pretty(&report)?;
549        tokio::fs::write(path, json).await?;
550        Ok(())
551    }
552
553    /// Generate a snapshot of current state
554    pub async fn generate_snapshot(&self) -> Result<DebugReport> {
555        let tensor_report = if self.config.enable_tensor_inspection {
556            Some(self.tensor_inspector.generate_report().await?)
557        } else {
558            None
559        };
560
561        let gradient_report = if self.config.enable_gradient_debugging {
562            Some(self.gradient_debugger.generate_report().await?)
563        } else {
564            None
565        };
566
567        let diagnostics_report = if self.config.enable_model_diagnostics {
568            Some(self.model_diagnostics.generate_report().await?)
569        } else {
570            None
571        };
572
573        let profiler_report = self.profiler.generate_report().await?;
574
575        let memory_profiler_report = if let Some(ref _memory_profiler) = self.memory_profiler {
576            // For snapshot, we don't stop the profiler, just get current state
577            None // Simplified for now
578        } else {
579            None
580        };
581
582        let interactive_debugger_report = self.interactive_debugger.generate_report().await?;
583        let anomaly_report = self.anomaly_detector.generate_report().await?;
584
585        // Get computation graph analysis results (if any graphs were analyzed)
586        let computation_graph_report = None; // Would be populated if graphs were analyzed
587
588        // Get new analyzer reports
589        let architecture_analysis_report =
590            Some(self.architecture_analyzer.generate_report().await?);
591        let behavior_analysis_report = Some(self.behavior_analyzer.generate_report().await?);
592        let training_dynamics_report =
593            Some(self.training_dynamics_analyzer.generate_report().await?);
594        let differential_debugging_report =
595            Some(self.differential_debugger.generate_report().await?);
596        let interpretability_report = Some(self.interpretability_analyzer.generate_report().await?);
597        let advanced_ml_debugging_report = Some(self.advanced_ml_debugger.generate_report().await?);
598
599        // Generate GPU profiling reports for snapshot
600        let advanced_gpu_profiling_report = if let Some(ref profiler) = self.advanced_gpu_profiler {
601            Some(profiler.get_memory_analysis_report())
602        } else {
603            None
604        };
605
606        let kernel_optimization_report =
607            Some(self.generate_kernel_optimization_summary_report().await?);
608
609        Ok(DebugReport {
610            session_id: self.id,
611            tensor_report,
612            gradient_report,
613            diagnostics_report,
614            profiler_report,
615            memory_profiler_report,
616            interactive_debugger_report,
617            anomaly_report,
618            computation_graph_report,
619            architecture_analysis_report,
620            behavior_analysis_report,
621            training_dynamics_report,
622            differential_debugging_report,
623            interpretability_report,
624            advanced_ml_debugging_report,
625            advanced_gpu_profiling_report,
626            kernel_optimization_report,
627            config: self.config.clone(),
628        })
629    }
630
631    /// Convenience method for debugging tensors (used by debug_tensor! macro)
632    pub fn debug_tensor<T>(&mut self, tensor: &ArrayD<T>, name: &str) -> Result<Uuid>
633    where
634        T: Clone + Into<f64> + fmt::Debug + 'static,
635    {
636        self.tensor_inspector.inspect_tensor(tensor, name, None, None)
637    }
638
639    /// Generate kernel optimization summary report
640    async fn generate_kernel_optimization_summary_report(
641        &self,
642    ) -> Result<KernelOptimizationSummaryReport> {
643        // In a real implementation, this would analyze all kernel profiles
644        // and generate comprehensive optimization recommendations
645        Ok(KernelOptimizationSummaryReport {
646            total_kernels_analyzed: 0,
647            optimization_opportunities_found: 0,
648            high_impact_optimizations: vec![],
649            fusion_opportunities: 0,
650            regression_alerts: 0,
651            overall_optimization_score: 85.0,
652            top_recommendations: vec!["No kernel analysis data available yet".to_string()],
653        })
654    }
655
656    /// Convenience method for debugging gradients (used by debug_gradient! macro)
657    pub fn debug_gradients<T>(&mut self, _layer_name: &str, gradients: &[T]) -> Result<()>
658    where
659        T: Clone + Into<f64> + fmt::Debug + 'static,
660    {
661        // Convert gradients vector to ndarray
662        use scirs2_core::ndarray::Array; // SciRS2 Integration Policy
663        let gradient_array = Array::from_vec(gradients.to_vec()).into_dyn();
664
665        // Create a dummy tensor ID for gradients (in real usage, this would be linked to an actual tensor)
666        let tensor_id = Uuid::new_v4();
667
668        self.tensor_inspector.inspect_gradients(tensor_id, &gradient_array)
669    }
670}
671
672/// Comprehensive debug report
673#[derive(Debug, Clone, Serialize, Deserialize)]
674pub struct DebugReport {
675    pub session_id: Uuid,
676    pub tensor_report: Option<TensorInspectionReport>,
677    pub gradient_report: Option<GradientDebugReport>,
678    pub diagnostics_report: Option<ModelDiagnosticsReport>,
679    pub profiler_report: ProfilerReport,
680    pub memory_profiler_report: Option<MemoryProfilingReport>,
681    pub interactive_debugger_report: InteractiveDebuggerReport,
682    pub anomaly_report: AnomalyDetectorReport,
683    pub computation_graph_report: Option<GraphAnalysisResult>,
684    pub architecture_analysis_report: Option<ArchitectureAnalysisReport>,
685    pub behavior_analysis_report: Option<BehaviorAnalysisReport>,
686    pub training_dynamics_report: Option<model_diagnostics::training::TrainingDynamicsReport>,
687    pub differential_debugging_report: Option<DifferentialDebuggingReport>,
688    pub interpretability_report: Option<InterpretabilityReport>,
689    pub advanced_ml_debugging_report: Option<AdvancedMLDebuggingReport>,
690    pub advanced_gpu_profiling_report: Option<MemoryAnalysisReport>,
691    pub kernel_optimization_report: Option<KernelOptimizationSummaryReport>,
692    pub config: DebugConfig,
693}
694
695impl DebugReport {
696    /// Get summary of key findings
697    pub fn summary(&self) -> DebugSummary {
698        let mut issues = Vec::new();
699        let mut recommendations = Vec::new();
700
701        // Analyze tensor issues
702        if let Some(ref tensor_report) = self.tensor_report {
703            if tensor_report.has_nan_values() {
704                issues.push("NaN values detected in tensors".to_string());
705                recommendations.push("Check input data and model initialization".to_string());
706            }
707
708            if tensor_report.has_inf_values() {
709                issues.push("Infinite values detected in tensors".to_string());
710                recommendations.push("Reduce learning rate or add gradient clipping".to_string());
711            }
712        }
713
714        // Analyze gradient issues
715        if let Some(ref gradient_report) = self.gradient_report {
716            if gradient_report.has_vanishing_gradients() {
717                issues.push("Vanishing gradients detected".to_string());
718                recommendations
719                    .push("Consider residual connections or gradient scaling".to_string());
720            }
721
722            if gradient_report.has_exploding_gradients() {
723                issues.push("Exploding gradients detected".to_string());
724                recommendations.push("Add gradient clipping".to_string());
725            }
726        }
727
728        DebugSummary {
729            session_id: self.session_id,
730            total_issues: issues.len(),
731            critical_issues: issues
732                .iter()
733                .filter(|i| i.contains("NaN") || i.contains("exploding"))
734                .count(),
735            issues,
736            recommendations,
737        }
738    }
739}
740
741/// High-level summary of debug findings
742#[derive(Debug, Serialize, Deserialize)]
743pub struct DebugSummary {
744    pub session_id: Uuid,
745    pub total_issues: usize,
746    pub critical_issues: usize,
747    pub issues: Vec<String>,
748    pub recommendations: Vec<String>,
749}
750
751/// Convenience function to create a debug session with default config
752pub fn debug_session() -> DebugSession {
753    DebugSession::new(DebugConfig::default())
754}
755
756/// Convenience function to create a debug session with custom config
757pub fn debug_session_with_config(config: DebugConfig) -> DebugSession {
758    DebugSession::new(config)
759}
760
761/// Convenience function to create a debug session with transformer debugging enabled
762pub fn debug_session_with_transformer() -> DebugSession {
763    let mut config = DebugConfig::default();
764    config.neural_network_debugging_config =
765        Some(neural_network_debugging::TransformerDebugConfig::default());
766    DebugSession::new(config)
767}