Skip to main content

trustformers_debug/guided/
debugger.rs

1//! Guided debugging wizard for step-by-step model analysis
2//!
3//! This module provides a structured debugging approach with automated step detection,
4//! progress tracking, and detailed execution guidance for comprehensive model debugging.
5
6use crate::core::session::{DebugConfig, DebugReport, DebugSession};
7use crate::interface::simple::{
8    QuickAnomalySummary, QuickArchitectureSummary, QuickGradientSummary, QuickHealthSummary,
9};
10use crate::{MemoryProfilingReport, ProfilerReport};
11use anyhow::Result;
12use serde::{Deserialize, Serialize};
13
14/// Guided debugging wizard
15pub struct GuidedDebugger {
16    session: DebugSession,
17    current_step: usize,
18    steps: Vec<DebugStep>,
19}
20
21/// Debug step in guided debugging
22#[derive(Debug)]
23pub struct DebugStep {
24    pub name: String,
25    pub description: String,
26    pub action: DebugAction,
27    pub expected_time: std::time::Duration,
28}
29
30/// Debug action for guided debugging
31#[derive(Debug)]
32pub enum DebugAction {
33    HealthCheck,
34    GradientAnalysis,
35    ArchitectureAnalysis,
36    MemoryProfiling,
37    PerformanceProfiling,
38    AnomalyDetection,
39    ComprehensiveAnalysis,
40}
41
42impl Default for GuidedDebugger {
43    fn default() -> Self {
44        Self::new()
45    }
46}
47
48impl GuidedDebugger {
49    /// Create new guided debugger with automatic step detection
50    pub fn new() -> Self {
51        let config = DebugConfig::default();
52        let session = DebugSession::new(config);
53
54        let steps = vec![
55            DebugStep {
56                name: "Health Check".to_string(),
57                description: "Quick model health assessment".to_string(),
58                action: DebugAction::HealthCheck,
59                expected_time: std::time::Duration::from_secs(5),
60            },
61            DebugStep {
62                name: "Gradient Analysis".to_string(),
63                description: "Analyze gradient flow and stability".to_string(),
64                action: DebugAction::GradientAnalysis,
65                expected_time: std::time::Duration::from_secs(10),
66            },
67            DebugStep {
68                name: "Architecture Analysis".to_string(),
69                description: "Analyze model architecture and efficiency".to_string(),
70                action: DebugAction::ArchitectureAnalysis,
71                expected_time: std::time::Duration::from_secs(8),
72            },
73            DebugStep {
74                name: "Memory Profiling".to_string(),
75                description: "Profile memory usage and detect leaks".to_string(),
76                action: DebugAction::MemoryProfiling,
77                expected_time: std::time::Duration::from_secs(15),
78            },
79            DebugStep {
80                name: "Performance Profiling".to_string(),
81                description: "Analyze computational performance".to_string(),
82                action: DebugAction::PerformanceProfiling,
83                expected_time: std::time::Duration::from_secs(20),
84            },
85            DebugStep {
86                name: "Anomaly Detection".to_string(),
87                description: "Detect numerical anomalies and instabilities".to_string(),
88                action: DebugAction::AnomalyDetection,
89                expected_time: std::time::Duration::from_secs(12),
90            },
91        ];
92
93        Self {
94            session,
95            current_step: 0,
96            steps,
97        }
98    }
99
100    /// Get current step
101    pub fn current_step(&self) -> Option<&DebugStep> {
102        self.steps.get(self.current_step)
103    }
104
105    /// Get total number of steps
106    pub fn total_steps(&self) -> usize {
107        self.steps.len()
108    }
109
110    /// Get progress percentage
111    pub fn progress(&self) -> f64 {
112        (self.current_step as f64 / self.total_steps() as f64) * 100.0
113    }
114
115    /// Execute current step
116    pub async fn execute_current_step(&mut self) -> Result<StepResult> {
117        if let Some(step) = self.current_step() {
118            let start_time = std::time::Instant::now();
119
120            let result = match &step.action {
121                DebugAction::HealthCheck => {
122                    let summary = self.session.health_checker().quick_health_check().await?;
123                    StepResult::Health(summary)
124                },
125                DebugAction::GradientAnalysis => {
126                    let analysis = self.session.gradient_debugger().quick_analysis().await?;
127                    let summary =
128                        crate::interface::simple::QuickGradientSummary::from_analysis(&analysis);
129                    StepResult::Gradient(summary)
130                },
131                DebugAction::ArchitectureAnalysis => {
132                    let summary = self.session.architecture_analyzer().quick_analysis().await?;
133                    StepResult::Architecture(summary)
134                },
135                DebugAction::MemoryProfiling => {
136                    if let Some(profiler) = self.session.memory_profiler_mut() {
137                        let end_time = std::time::SystemTime::now();
138                        let duration_secs = 60.0; // Default duration
139                        let profiling_overhead_ms = 0.0; // Default overhead
140                        let report = profiler
141                            .generate_report(end_time, duration_secs, profiling_overhead_ms)
142                            .await?;
143                        StepResult::Memory(report)
144                    } else {
145                        StepResult::Skipped("Memory profiling not enabled".to_string())
146                    }
147                },
148                DebugAction::PerformanceProfiling => {
149                    let report = self.session.profiler().generate_report().await?;
150                    StepResult::Performance(report)
151                },
152                DebugAction::AnomalyDetection => {
153                    let summary = self.session.anomaly_detector().quick_check().await?;
154                    StepResult::Anomaly(summary)
155                },
156                DebugAction::ComprehensiveAnalysis => {
157                    let report = self.session.generate_snapshot().await?;
158                    StepResult::Comprehensive(report)
159                },
160            };
161
162            let _elapsed = start_time.elapsed();
163            self.current_step += 1;
164
165            Ok(result)
166        } else {
167            Err(anyhow::anyhow!("No more steps to execute"))
168        }
169    }
170
171    /// Skip current step
172    pub fn skip_current_step(&mut self) -> Result<()> {
173        if self.current_step < self.total_steps() {
174            self.current_step += 1;
175            Ok(())
176        } else {
177            Err(anyhow::anyhow!("No more steps to skip"))
178        }
179    }
180
181    /// Reset to beginning
182    pub fn reset(&mut self) {
183        self.current_step = 0;
184    }
185
186    /// Check if debugging is complete
187    pub fn is_complete(&self) -> bool {
188        self.current_step >= self.total_steps()
189    }
190}
191
192/// Result of a debug step
193#[derive(Debug, Serialize, Deserialize)]
194pub enum StepResult {
195    Health(QuickHealthSummary),
196    Gradient(QuickGradientSummary),
197    Architecture(QuickArchitectureSummary),
198    Memory(MemoryProfilingReport),
199    Performance(ProfilerReport),
200    Anomaly(QuickAnomalySummary),
201    Comprehensive(DebugReport),
202    Skipped(String),
203}