Skip to main content

trustformers_debug/core/
session.rs

1//! Core debugging session and configuration management
2//!
3//! This module contains the fundamental components for TrustformeRS debugging including
4//! the main DebugSession coordinator, configuration structures, and session lifecycle management.
5
6use crate::*;
7use anyhow::Result;
8use serde::{Deserialize, Serialize};
9use std::fmt;
10use uuid::Uuid;
11
12/// Configuration for debugging session
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct DebugConfig {
15    /// Enable tensor inspection
16    pub enable_tensor_inspection: bool,
17    /// Enable gradient debugging
18    pub enable_gradient_debugging: bool,
19    /// Enable model diagnostics
20    pub enable_model_diagnostics: bool,
21    /// Enable visual debugging (requires display)
22    pub enable_visualization: bool,
23    /// Enable memory profiling
24    pub enable_memory_profiling: bool,
25    /// Enable computation graph analysis
26    pub enable_computation_graph_analysis: bool,
27    /// Maximum number of tensors to track
28    pub max_tracked_tensors: usize,
29    /// Maximum history length for gradients
30    pub max_gradient_history: usize,
31    /// Output directory for debug artifacts
32    pub output_dir: Option<String>,
33    /// Sampling rate for expensive operations (0.0 to 1.0)
34    pub sampling_rate: f32,
35    /// Memory profiling configuration
36    pub memory_profiling_config: MemoryProfilingConfig,
37    /// Computation graph analysis configuration
38    pub graph_analysis_config: GraphAnalysisConfig,
39    /// Architecture analysis configuration
40    pub architecture_analysis_config: architecture_analysis::ArchitectureAnalysisConfig,
41    /// Behavior analysis configuration
42    pub behavior_analysis_config: BehaviorAnalysisConfig,
43    /// Training dynamics analysis configuration
44    pub training_dynamics_config: TrainingDynamicsConfig,
45    /// Differential debugging configuration
46    pub differential_debugging_config: DifferentialDebuggingConfig,
47    /// Interpretability tools configuration
48    pub interpretability_config: InterpretabilityConfig,
49    /// Neural network debugging configuration
50    pub neural_network_debugging_config: Option<neural_network_debugging::TransformerDebugConfig>,
51    /// Advanced ML debugging configuration
52    pub advanced_ml_debugging_config: AdvancedMLDebuggingConfig,
53    /// Advanced GPU profiling configuration
54    pub advanced_gpu_profiling_config: AdvancedGpuProfilingConfig,
55    /// Kernel optimization configuration
56    pub kernel_optimization_config: KernelOptimizationConfig,
57    /// AI code analysis configuration
58    pub ai_code_analysis_config: AIAnalysisConfig,
59    /// Distributed debugging configuration
60    pub distributed_debugging_config: Option<DistributedDebugConfig>,
61    /// Environmental monitoring configuration
62    pub environmental_monitoring_config: EnvironmentalConfig,
63}
64
65impl Default for DebugConfig {
66    fn default() -> Self {
67        Self {
68            enable_tensor_inspection: true,
69            enable_gradient_debugging: true,
70            enable_model_diagnostics: true,
71            enable_visualization: false,
72            enable_memory_profiling: true,
73            enable_computation_graph_analysis: true,
74            max_tracked_tensors: 1000,
75            max_gradient_history: 100,
76            output_dir: None,
77            sampling_rate: 1.0,
78            memory_profiling_config: MemoryProfilingConfig::default(),
79            graph_analysis_config: GraphAnalysisConfig::default(),
80            architecture_analysis_config:
81                architecture_analysis::ArchitectureAnalysisConfig::default(),
82            behavior_analysis_config: BehaviorAnalysisConfig::default(),
83            training_dynamics_config: TrainingDynamicsConfig::default(),
84            differential_debugging_config: DifferentialDebuggingConfig::default(),
85            interpretability_config: InterpretabilityConfig,
86            neural_network_debugging_config: None,
87            advanced_ml_debugging_config: AdvancedMLDebuggingConfig::default(),
88            advanced_gpu_profiling_config: AdvancedGpuProfilingConfig::default(),
89            kernel_optimization_config: KernelOptimizationConfig::default(),
90            ai_code_analysis_config: AIAnalysisConfig::default(),
91            distributed_debugging_config: None,
92            environmental_monitoring_config: EnvironmentalConfig::default(),
93        }
94    }
95}
96
97/// Main debugging session that coordinates all debugging tools
98#[derive(Debug)]
99pub struct DebugSession {
100    id: Uuid,
101    config: DebugConfig,
102    tensor_inspector: TensorInspector,
103    gradient_debugger: GradientDebugger,
104    model_diagnostics: ModelDiagnostics,
105    hooks: HookManager,
106    profiler: Profiler,
107    memory_profiler: Option<MemoryProfiler>,
108    interactive_debugger: InteractiveDebugger,
109    anomaly_detector: AnomalyDetector,
110    computation_graph_analyzer: ComputationGraphAnalyzer,
111    architecture_analyzer: architecture_analysis::ArchitectureAnalyzer,
112    behavior_analyzer: BehaviorAnalyzer,
113    training_dynamics_analyzer: TrainingDynamicsAnalyzer,
114    differential_debugger: DifferentialDebugger,
115    interpretability_analyzer: InterpretabilityAnalyzer,
116    health_checker: crate::health_checker::HealthChecker,
117    transformer_debugger: Option<neural_network_debugging::TransformerDebugger>,
118    advanced_ml_debugger: AdvancedMLDebugger,
119    advanced_gpu_profiler: Option<AdvancedGpuMemoryProfiler>,
120    #[allow(dead_code)]
121    kernel_optimizer: KernelOptimizationAnalyzer,
122    ai_code_analyzer: Option<AICodeAnalyzer>,
123    distributed_debugger: Option<DistributedDebugger>,
124    environmental_monitor: Option<EnvironmentalMonitor>,
125}
126
127impl DebugSession {
128    /// Create a new debugging session
129    pub fn new(config: DebugConfig) -> Self {
130        let id = Uuid::new_v4();
131
132        let memory_profiler = if config.enable_memory_profiling {
133            Some(MemoryProfiler::new(config.memory_profiling_config.clone()))
134        } else {
135            None
136        };
137
138        let transformer_debugger =
139            config.neural_network_debugging_config.as_ref().map(|neural_config| {
140                neural_network_debugging::TransformerDebugger::new(neural_config.clone())
141            });
142
143        let advanced_gpu_profiler = if config.advanced_gpu_profiling_config.enable_gpu_profiling {
144            AdvancedGpuMemoryProfiler::new(config.advanced_gpu_profiling_config.device_count).ok()
145        } else {
146            None
147        };
148
149        let ai_code_analyzer = if config.ai_code_analysis_config.enable_deep_analysis {
150            Some(AICodeAnalyzer::new(config.ai_code_analysis_config.clone()))
151        } else {
152            None
153        };
154
155        let distributed_debugger =
156            if let Some(ref dist_config) = config.distributed_debugging_config {
157                let node_id = NodeId::new(0, "debug-node".to_string());
158                Some(DistributedDebugger::new(dist_config.clone(), node_id))
159            } else {
160                None
161            };
162
163        let environmental_monitor = if config.environmental_monitoring_config.enable_carbon_tracking
164        {
165            Some(EnvironmentalMonitor::new(
166                config.environmental_monitoring_config.clone(),
167            ))
168        } else {
169            None
170        };
171
172        Self {
173            id,
174            tensor_inspector: TensorInspector::new(&config),
175            gradient_debugger: GradientDebugger::new(config.clone()),
176            model_diagnostics: ModelDiagnostics::new(&config),
177            hooks: HookManager::new(),
178            profiler: Profiler::new(&config),
179            memory_profiler,
180            interactive_debugger: InteractiveDebugger::new(&config),
181            anomaly_detector: AnomalyDetector::new(&config),
182            computation_graph_analyzer: ComputationGraphAnalyzer::new(
183                config.graph_analysis_config.clone(),
184            ),
185            architecture_analyzer: architecture_analysis::ArchitectureAnalyzer::new(
186                config.architecture_analysis_config.clone(),
187            ),
188            behavior_analyzer: BehaviorAnalyzer::new(config.behavior_analysis_config.clone()),
189            training_dynamics_analyzer: TrainingDynamicsAnalyzer::new(),
190            differential_debugger: DifferentialDebugger::new(
191                config.differential_debugging_config.clone(),
192            ),
193            interpretability_analyzer: InterpretabilityAnalyzer::new(
194                config.interpretability_config.clone(),
195            ),
196            health_checker: crate::health_checker::HealthChecker::new(&config),
197            transformer_debugger,
198            advanced_ml_debugger: AdvancedMLDebugger::new(
199                config.advanced_ml_debugging_config.clone(),
200            ),
201            advanced_gpu_profiler,
202            kernel_optimizer: match KernelOptimizationAnalyzer::new() {
203                Ok(analyzer) => analyzer,
204                Err(e) => {
205                    tracing::warn!(
206                        "Failed to initialize kernel optimizer: {}, using stub implementation",
207                        e
208                    );
209                    // Return a stub analyzer that won't crash but provides limited functionality
210                    KernelOptimizationAnalyzer::new_stub()
211                },
212            },
213            ai_code_analyzer,
214            distributed_debugger,
215            environmental_monitor,
216            config,
217        }
218    }
219
220    /// Get session ID
221    pub fn id(&self) -> Uuid {
222        self.id
223    }
224
225    /// Get debug configuration
226    pub fn config(&self) -> &DebugConfig {
227        &self.config
228    }
229
230    /// Get tensor inspector
231    pub fn tensor_inspector(&self) -> &TensorInspector {
232        &self.tensor_inspector
233    }
234
235    /// Get mutable tensor inspector
236    pub fn tensor_inspector_mut(&mut self) -> &mut TensorInspector {
237        &mut self.tensor_inspector
238    }
239
240    /// Get gradient debugger
241    pub fn gradient_debugger(&self) -> &GradientDebugger {
242        &self.gradient_debugger
243    }
244
245    /// Get mutable gradient debugger
246    pub fn gradient_debugger_mut(&mut self) -> &mut GradientDebugger {
247        &mut self.gradient_debugger
248    }
249
250    /// Get model diagnostics
251    pub fn model_diagnostics(&self) -> &ModelDiagnostics {
252        &self.model_diagnostics
253    }
254
255    /// Get mutable model diagnostics
256    pub fn model_diagnostics_mut(&mut self) -> &mut ModelDiagnostics {
257        &mut self.model_diagnostics
258    }
259
260    /// Get hook manager
261    pub fn hooks(&self) -> &HookManager {
262        &self.hooks
263    }
264
265    /// Get mutable hook manager
266    pub fn hooks_mut(&mut self) -> &mut HookManager {
267        &mut self.hooks
268    }
269
270    /// Get profiler
271    pub fn profiler(&self) -> &Profiler {
272        &self.profiler
273    }
274
275    /// Get mutable profiler
276    pub fn profiler_mut(&mut self) -> &mut Profiler {
277        &mut self.profiler
278    }
279
280    /// Get memory profiler
281    pub fn memory_profiler(&self) -> Option<&MemoryProfiler> {
282        self.memory_profiler.as_ref()
283    }
284
285    /// Get mutable memory profiler
286    pub fn memory_profiler_mut(&mut self) -> Option<&mut MemoryProfiler> {
287        self.memory_profiler.as_mut()
288    }
289
290    /// Get interactive debugger
291    pub fn interactive_debugger(&self) -> &InteractiveDebugger {
292        &self.interactive_debugger
293    }
294
295    /// Get mutable interactive debugger
296    pub fn interactive_debugger_mut(&mut self) -> &mut InteractiveDebugger {
297        &mut self.interactive_debugger
298    }
299
300    /// Get anomaly detector
301    pub fn anomaly_detector(&self) -> &AnomalyDetector {
302        &self.anomaly_detector
303    }
304
305    /// Get mutable anomaly detector
306    pub fn anomaly_detector_mut(&mut self) -> &mut AnomalyDetector {
307        &mut self.anomaly_detector
308    }
309
310    /// Get computation graph analyzer
311    pub fn computation_graph_analyzer(&self) -> &ComputationGraphAnalyzer {
312        &self.computation_graph_analyzer
313    }
314
315    /// Get mutable computation graph analyzer
316    pub fn computation_graph_analyzer_mut(&mut self) -> &mut ComputationGraphAnalyzer {
317        &mut self.computation_graph_analyzer
318    }
319
320    /// Get architecture analyzer
321    pub fn architecture_analyzer(&self) -> &architecture_analysis::ArchitectureAnalyzer {
322        &self.architecture_analyzer
323    }
324
325    /// Get mutable architecture analyzer
326    pub fn architecture_analyzer_mut(
327        &mut self,
328    ) -> &mut architecture_analysis::ArchitectureAnalyzer {
329        &mut self.architecture_analyzer
330    }
331
332    /// Get behavior analyzer
333    pub fn behavior_analyzer(&self) -> &BehaviorAnalyzer {
334        &self.behavior_analyzer
335    }
336
337    /// Get mutable behavior analyzer
338    pub fn behavior_analyzer_mut(&mut self) -> &mut BehaviorAnalyzer {
339        &mut self.behavior_analyzer
340    }
341
342    /// Get training dynamics analyzer
343    pub fn training_dynamics_analyzer(&self) -> &TrainingDynamicsAnalyzer {
344        &self.training_dynamics_analyzer
345    }
346
347    /// Get mutable training dynamics analyzer
348    pub fn training_dynamics_analyzer_mut(&mut self) -> &mut TrainingDynamicsAnalyzer {
349        &mut self.training_dynamics_analyzer
350    }
351
352    /// Get differential debugger
353    pub fn differential_debugger(&self) -> &DifferentialDebugger {
354        &self.differential_debugger
355    }
356
357    /// Get mutable differential debugger
358    pub fn differential_debugger_mut(&mut self) -> &mut DifferentialDebugger {
359        &mut self.differential_debugger
360    }
361
362    /// Get interpretability analyzer
363    pub fn interpretability_analyzer(&self) -> &InterpretabilityAnalyzer {
364        &self.interpretability_analyzer
365    }
366
367    /// Get mutable interpretability analyzer
368    pub fn interpretability_analyzer_mut(&mut self) -> &mut InterpretabilityAnalyzer {
369        &mut self.interpretability_analyzer
370    }
371
372    /// Get health checker
373    pub fn health_checker(&self) -> &crate::health_checker::HealthChecker {
374        &self.health_checker
375    }
376
377    /// Get mutable health checker
378    pub fn health_checker_mut(&mut self) -> &mut crate::health_checker::HealthChecker {
379        &mut self.health_checker
380    }
381
382    /// Get transformer debugger
383    pub fn transformer_debugger(&self) -> Option<&neural_network_debugging::TransformerDebugger> {
384        self.transformer_debugger.as_ref()
385    }
386
387    /// Get mutable transformer debugger
388    pub fn transformer_debugger_mut(
389        &mut self,
390    ) -> Option<&mut neural_network_debugging::TransformerDebugger> {
391        self.transformer_debugger.as_mut()
392    }
393
394    /// Get advanced ML debugger
395    pub fn advanced_ml_debugger(&self) -> &AdvancedMLDebugger {
396        &self.advanced_ml_debugger
397    }
398
399    /// Get mutable advanced ML debugger
400    pub fn advanced_ml_debugger_mut(&mut self) -> &mut AdvancedMLDebugger {
401        &mut self.advanced_ml_debugger
402    }
403
404    /// Get AI code analyzer
405    pub fn ai_code_analyzer(&self) -> Option<&AICodeAnalyzer> {
406        self.ai_code_analyzer.as_ref()
407    }
408
409    /// Get mutable AI code analyzer
410    pub fn ai_code_analyzer_mut(&mut self) -> Option<&mut AICodeAnalyzer> {
411        self.ai_code_analyzer.as_mut()
412    }
413
414    /// Get distributed debugger
415    pub fn distributed_debugger(&self) -> Option<&DistributedDebugger> {
416        self.distributed_debugger.as_ref()
417    }
418
419    /// Get mutable distributed debugger
420    pub fn distributed_debugger_mut(&mut self) -> Option<&mut DistributedDebugger> {
421        self.distributed_debugger.as_mut()
422    }
423
424    /// Get environmental monitor
425    pub fn environmental_monitor(&self) -> Option<&EnvironmentalMonitor> {
426        self.environmental_monitor.as_ref()
427    }
428
429    /// Get mutable environmental monitor
430    pub fn environmental_monitor_mut(&mut self) -> Option<&mut EnvironmentalMonitor> {
431        self.environmental_monitor.as_mut()
432    }
433
434    /// Start debugging session
435    pub async fn start(&mut self) -> Result<()> {
436        tracing::info!("Starting debug session {}", self.id);
437
438        if self.config.enable_tensor_inspection {
439            self.tensor_inspector.start().await?;
440        }
441
442        if self.config.enable_gradient_debugging {
443            self.gradient_debugger.start().await?;
444        }
445
446        if self.config.enable_model_diagnostics {
447            self.model_diagnostics.start().await?;
448        }
449
450        self.profiler.start().await?;
451
452        if let Some(ref mut memory_profiler) = self.memory_profiler {
453            memory_profiler.start().await?;
454        }
455
456        self.interactive_debugger.start().await?;
457        self.anomaly_detector.start().await?;
458
459        Ok(())
460    }
461
462    /// Stop debugging session and generate report
463    pub async fn stop(&mut self) -> Result<DebugReport> {
464        tracing::info!("Stopping debug session {}", self.id);
465
466        let tensor_report = if self.config.enable_tensor_inspection {
467            Some(self.tensor_inspector.generate_report().await?)
468        } else {
469            None
470        };
471
472        let gradient_report = if self.config.enable_gradient_debugging {
473            Some(self.gradient_debugger.generate_report().await?)
474        } else {
475            None
476        };
477
478        let diagnostics_report = if self.config.enable_model_diagnostics {
479            Some(self.model_diagnostics.generate_report().await?)
480        } else {
481            None
482        };
483
484        let profiler_report = self.profiler.generate_report().await?;
485
486        let memory_profiler_report = if let Some(ref mut memory_profiler) = self.memory_profiler {
487            Some(memory_profiler.stop().await?)
488        } else {
489            None
490        };
491
492        let interactive_debugger_report = self.interactive_debugger.generate_report().await?;
493        let anomaly_report = self.anomaly_detector.generate_report().await?;
494
495        // Get computation graph analysis results (if any graphs were analyzed)
496        let computation_graph_report = None; // Would be populated if graphs were analyzed
497
498        // Get new analyzer reports
499        let architecture_analysis_report =
500            Some(self.architecture_analyzer.generate_report().await?);
501        let behavior_analysis_report = Some(self.behavior_analyzer.generate_report().await?);
502        let training_dynamics_report =
503            Some(self.training_dynamics_analyzer.generate_report().await?);
504        let differential_debugging_report =
505            Some(self.differential_debugger.generate_report().await?);
506        let interpretability_report = Some(self.interpretability_analyzer.generate_report().await?);
507        let advanced_ml_debugging_report = Some(self.advanced_ml_debugger.generate_report().await?);
508
509        // Generate GPU profiling reports
510        let advanced_gpu_profiling_report = self
511            .advanced_gpu_profiler
512            .as_ref()
513            .map(|profiler| profiler.get_memory_analysis_report());
514
515        let kernel_optimization_report =
516            Some(self.generate_kernel_optimization_summary_report().await?);
517
518        Ok(DebugReport {
519            session_id: self.id,
520            tensor_report,
521            gradient_report,
522            diagnostics_report,
523            profiler_report,
524            memory_profiler_report,
525            interactive_debugger_report,
526            anomaly_report,
527            computation_graph_report,
528            architecture_analysis_report,
529            behavior_analysis_report,
530            training_dynamics_report,
531            differential_debugging_report,
532            interpretability_report,
533            advanced_ml_debugging_report,
534            advanced_gpu_profiling_report,
535            kernel_optimization_report,
536            config: self.config.clone(),
537        })
538    }
539
540    /// Export debug session to file
541    pub async fn export(&self, path: &str) -> Result<()> {
542        let report = self.generate_snapshot().await?;
543        let json = serde_json::to_string_pretty(&report)?;
544        tokio::fs::write(path, json).await?;
545        Ok(())
546    }
547
548    /// Generate a snapshot of current state
549    pub async fn generate_snapshot(&self) -> Result<DebugReport> {
550        let tensor_report = if self.config.enable_tensor_inspection {
551            Some(self.tensor_inspector.generate_report().await?)
552        } else {
553            None
554        };
555
556        let gradient_report = if self.config.enable_gradient_debugging {
557            Some(self.gradient_debugger.generate_report().await?)
558        } else {
559            None
560        };
561
562        let diagnostics_report = if self.config.enable_model_diagnostics {
563            Some(self.model_diagnostics.generate_report().await?)
564        } else {
565            None
566        };
567
568        let profiler_report = self.profiler.generate_report().await?;
569
570        let memory_profiler_report = if let Some(ref _memory_profiler) = self.memory_profiler {
571            // For snapshot, we don't stop the profiler, just get current state
572            None // Simplified for now
573        } else {
574            None
575        };
576
577        let interactive_debugger_report = self.interactive_debugger.generate_report().await?;
578        let anomaly_report = self.anomaly_detector.generate_report().await?;
579
580        // Get computation graph analysis results (if any graphs were analyzed)
581        let computation_graph_report = None; // Would be populated if graphs were analyzed
582
583        // Get new analyzer reports
584        let architecture_analysis_report =
585            Some(self.architecture_analyzer.generate_report().await?);
586        let behavior_analysis_report = Some(self.behavior_analyzer.generate_report().await?);
587        let training_dynamics_report =
588            Some(self.training_dynamics_analyzer.generate_report().await?);
589        let differential_debugging_report =
590            Some(self.differential_debugger.generate_report().await?);
591        let interpretability_report = Some(self.interpretability_analyzer.generate_report().await?);
592        let advanced_ml_debugging_report = Some(self.advanced_ml_debugger.generate_report().await?);
593
594        // Generate GPU profiling reports for snapshot
595        let advanced_gpu_profiling_report = self
596            .advanced_gpu_profiler
597            .as_ref()
598            .map(|profiler| profiler.get_memory_analysis_report());
599
600        let kernel_optimization_report =
601            Some(self.generate_kernel_optimization_summary_report().await?);
602
603        Ok(DebugReport {
604            session_id: self.id,
605            tensor_report,
606            gradient_report,
607            diagnostics_report,
608            profiler_report,
609            memory_profiler_report,
610            interactive_debugger_report,
611            anomaly_report,
612            computation_graph_report,
613            architecture_analysis_report,
614            behavior_analysis_report,
615            training_dynamics_report,
616            differential_debugging_report,
617            interpretability_report,
618            advanced_ml_debugging_report,
619            advanced_gpu_profiling_report,
620            kernel_optimization_report,
621            config: self.config.clone(),
622        })
623    }
624
625    /// Convenience method for debugging tensors (used by debug_tensor! macro)
626    pub fn debug_tensor<T>(&mut self, tensor: &ArrayD<T>, name: &str) -> Result<Uuid>
627    where
628        T: Clone + Into<f64> + fmt::Debug + 'static,
629    {
630        self.tensor_inspector.inspect_tensor(tensor, name, None, None)
631    }
632
633    /// Generate kernel optimization summary report
634    async fn generate_kernel_optimization_summary_report(
635        &self,
636    ) -> Result<KernelOptimizationSummaryReport> {
637        // In a real implementation, this would analyze all kernel profiles
638        // and generate comprehensive optimization recommendations
639        Ok(KernelOptimizationSummaryReport {
640            total_kernels_analyzed: 0,
641            optimization_opportunities_found: 0,
642            high_impact_optimizations: vec![],
643            fusion_opportunities: 0,
644            regression_alerts: 0,
645            overall_optimization_score: 85.0,
646            top_recommendations: vec!["No kernel analysis data available yet".to_string()],
647        })
648    }
649
650    /// Convenience method for debugging gradients (used by debug_gradient! macro)
651    pub fn debug_gradients<T>(&mut self, _layer_name: &str, gradients: &[T]) -> Result<()>
652    where
653        T: Clone + Into<f64> + fmt::Debug + 'static,
654    {
655        // Convert gradients vector to ndarray
656        use scirs2_core::ndarray::Array; // SciRS2 Integration Policy
657        let gradient_array = Array::from_vec(gradients.to_vec()).into_dyn();
658
659        // Create a dummy tensor ID for gradients (in real usage, this would be linked to an actual tensor)
660        let tensor_id = Uuid::new_v4();
661
662        self.tensor_inspector.inspect_gradients(tensor_id, &gradient_array)
663    }
664}
665
666/// Comprehensive debug report
667#[derive(Debug, Clone, Serialize, Deserialize)]
668pub struct DebugReport {
669    pub session_id: Uuid,
670    pub tensor_report: Option<TensorInspectionReport>,
671    pub gradient_report: Option<GradientDebugReport>,
672    pub diagnostics_report: Option<ModelDiagnosticsReport>,
673    pub profiler_report: ProfilerReport,
674    pub memory_profiler_report: Option<MemoryProfilingReport>,
675    pub interactive_debugger_report: InteractiveDebuggerReport,
676    pub anomaly_report: AnomalyDetectorReport,
677    pub computation_graph_report: Option<GraphAnalysisResult>,
678    pub architecture_analysis_report: Option<ArchitectureAnalysisReport>,
679    pub behavior_analysis_report: Option<BehaviorAnalysisReport>,
680    pub training_dynamics_report: Option<model_diagnostics::training::TrainingDynamicsReport>,
681    pub differential_debugging_report: Option<DifferentialDebuggingReport>,
682    pub interpretability_report: Option<InterpretabilityReport>,
683    pub advanced_ml_debugging_report: Option<AdvancedMLDebuggingReport>,
684    pub advanced_gpu_profiling_report: Option<MemoryAnalysisReport>,
685    pub kernel_optimization_report: Option<KernelOptimizationSummaryReport>,
686    pub config: DebugConfig,
687}
688
689impl DebugReport {
690    /// Get summary of key findings
691    pub fn summary(&self) -> DebugSummary {
692        let mut issues = Vec::new();
693        let mut recommendations = Vec::new();
694
695        // Analyze tensor issues
696        if let Some(ref tensor_report) = self.tensor_report {
697            if tensor_report.has_nan_values() {
698                issues.push("NaN values detected in tensors".to_string());
699                recommendations.push("Check input data and model initialization".to_string());
700            }
701
702            if tensor_report.has_inf_values() {
703                issues.push("Infinite values detected in tensors".to_string());
704                recommendations.push("Reduce learning rate or add gradient clipping".to_string());
705            }
706        }
707
708        // Analyze gradient issues
709        if let Some(ref gradient_report) = self.gradient_report {
710            if gradient_report.has_vanishing_gradients() {
711                issues.push("Vanishing gradients detected".to_string());
712                recommendations
713                    .push("Consider residual connections or gradient scaling".to_string());
714            }
715
716            if gradient_report.has_exploding_gradients() {
717                issues.push("Exploding gradients detected".to_string());
718                recommendations.push("Add gradient clipping".to_string());
719            }
720        }
721
722        DebugSummary {
723            session_id: self.session_id,
724            total_issues: issues.len(),
725            critical_issues: issues
726                .iter()
727                .filter(|i| i.contains("NaN") || i.contains("exploding"))
728                .count(),
729            issues,
730            recommendations,
731        }
732    }
733}
734
735/// High-level summary of debug findings
736#[derive(Debug, Serialize, Deserialize)]
737pub struct DebugSummary {
738    pub session_id: Uuid,
739    pub total_issues: usize,
740    pub critical_issues: usize,
741    pub issues: Vec<String>,
742    pub recommendations: Vec<String>,
743}
744
745/// Convenience function to create a debug session with default config
746pub fn debug_session() -> DebugSession {
747    DebugSession::new(DebugConfig::default())
748}
749
750/// Convenience function to create a debug session with custom config
751pub fn debug_session_with_config(config: DebugConfig) -> DebugSession {
752    DebugSession::new(config)
753}
754
755/// Convenience function to create a debug session with transformer debugging enabled
756pub fn debug_session_with_transformer() -> DebugSession {
757    let config = DebugConfig {
758        neural_network_debugging_config: Some(
759            neural_network_debugging::TransformerDebugConfig::default(),
760        ),
761        ..Default::default()
762    };
763    DebugSession::new(config)
764}