1use crate::error::{Result, SklearsComposeError};
12use scirs2_core::ndarray::{Array2, ArrayView2};
13use serde::{Deserialize, Serialize};
14use std::collections::{HashMap, VecDeque};
15use std::fmt;
16use std::sync::{Arc, Mutex, RwLock};
17use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
18
19#[derive(Debug)]
21pub struct PipelineDebugger {
22 tracer: Arc<RwLock<ExecutionTracer>>,
24
25 profiler: Arc<RwLock<PerformanceProfiler>>,
27
28 interactive_debugger: Arc<Mutex<InteractiveDebugger>>,
30
31 data_inspector: Arc<RwLock<DataInspector>>,
33
34 error_manager: Arc<RwLock<ErrorContextManager>>,
36
37 config: DebuggingConfig,
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct DebuggingConfig {
44 pub enable_execution_tracing: bool,
46
47 pub enable_performance_profiling: bool,
49
50 pub enable_data_inspection: bool,
52
53 pub enable_interactive_debugging: bool,
55
56 pub max_execution_history: usize,
58
59 pub data_inspection_sample_rate: f64,
61
62 pub verbose_logging: bool,
64
65 pub export_format: ExportFormat,
67}
68
69#[derive(Debug, Clone, Serialize, Deserialize)]
71pub enum ExportFormat {
72 Json,
74 Html,
76 Markdown,
78 Csv,
80}
81
82#[derive(Debug)]
84pub struct ExecutionTracer {
85 execution_history: VecDeque<ExecutionStep>,
87
88 current_context: Option<ExecutionContext>,
90
91 statistics: ExecutionStatistics,
93
94 config: TracingConfig,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct ExecutionStep {
101 pub step_id: String,
103
104 pub step_name: String,
106
107 pub step_type: StepType,
109
110 pub start_time: SystemTime,
112
113 pub duration: Option<Duration>,
115
116 pub input_metadata: DataMetadata,
118
119 pub output_metadata: Option<DataMetadata>,
121
122 pub performance_metrics: PerformanceMetrics,
124
125 pub warnings: Vec<String>,
127
128 pub status: StepStatus,
130
131 pub error_info: Option<ErrorInfo>,
133}
134
135#[derive(Debug, Clone, Serialize, Deserialize)]
137pub enum StepType {
138 Transform,
140 Fit,
142 Predict,
144 Validate,
146 Preprocess,
148 FeatureEngineering,
150 ModelSelection,
152 Ensemble,
154 Custom(String),
156}
157
158#[derive(Debug, Clone, Serialize, Deserialize)]
160pub enum StepStatus {
161 Running,
163 Completed,
165 Failed,
167 Skipped,
169 Warning,
171}
172
173#[derive(Debug, Clone)]
175pub struct ExecutionContext {
176 pub pipeline_name: String,
178
179 pub current_step: String,
181
182 pub execution_start: Instant,
184
185 pub context_variables: HashMap<String, ContextValue>,
187
188 pub execution_state: ExecutionState,
190}
191
192#[derive(Debug, Clone)]
194pub enum ExecutionState {
195 Initializing,
197 Running { current_step: String },
199 Paused { reason: String },
201 Completed,
203 Failed { error: String },
205}
206
207#[derive(Debug, Clone)]
209pub enum ContextValue {
210 String(String),
212 Number(f64),
214 Boolean(bool),
216 Array(Vec<f64>),
218 Metadata(HashMap<String, String>),
220}
221
222#[derive(Debug)]
224pub struct PerformanceProfiler {
225 step_measurements: HashMap<String, Vec<PerformanceMetrics>>,
227
228 current_measurements: HashMap<String, MeasurementSession>,
230
231 config: ProfilingConfig,
233
234 bottleneck_analysis: Option<BottleneckAnalysis>,
236}
237
238#[derive(Debug, Clone, Serialize, Deserialize)]
240pub struct PerformanceMetrics {
241 pub execution_time_ms: f64,
243
244 pub memory_usage_bytes: u64,
246
247 pub cpu_utilization: f64,
249
250 pub input_size: usize,
252
253 pub output_size: usize,
255
256 pub cache_statistics: CacheStatistics,
258
259 pub custom_metrics: HashMap<String, f64>,
261}
262
263#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct CacheStatistics {
266 pub hits: u64,
267 pub misses: u64,
268 pub hit_ratio: f64,
269}
270
271#[derive(Debug)]
273pub struct MeasurementSession {
274 start_time: Instant,
275 initial_memory: u64,
276 step_name: String,
277}
278
279#[derive(Debug)]
281pub struct InteractiveDebugger {
282 breakpoints: HashMap<String, Breakpoint>,
284
285 watch_expressions: Vec<WatchExpression>,
287
288 current_session: Option<DebugSession>,
290
291 command_queue: VecDeque<DebugCommand>,
293
294 config: InteractiveConfig,
296}
297
298#[derive(Debug, Clone)]
300pub struct Breakpoint {
301 pub id: String,
303
304 pub step_name: String,
306
307 pub condition: Option<BreakpointCondition>,
309
310 pub enabled: bool,
312
313 pub hit_count: usize,
315}
316
317#[derive(Debug, Clone)]
319pub enum BreakpointCondition {
320 Always,
322 DataShape { expected_shape: Vec<usize> },
324 PerformanceThreshold { max_time_ms: f64 },
326 ErrorOccurred,
328 Custom(String),
330}
331
332#[derive(Debug, Clone)]
334pub struct WatchExpression {
335 pub id: String,
337
338 pub expression: String,
340
341 pub current_value: Option<String>,
343
344 pub value_history: Vec<(SystemTime, String)>,
346}
347
348#[derive(Debug)]
350pub struct DebugSession {
351 session_id: String,
353
354 current_step: String,
356
357 session_variables: HashMap<String, ContextValue>,
359
360 debug_state: DebugState,
362}
363
364#[derive(Debug, Clone)]
366pub enum DebugState {
367 Running,
369 Paused { reason: String },
371 StepOver,
373 StepInto,
375 Continue,
377}
378
379#[derive(Debug, Clone)]
381pub enum DebugCommand {
382 Continue,
384 StepOver,
386 StepInto,
388 StepOut,
390 SetBreakpoint {
392 step_name: String,
393 condition: Option<BreakpointCondition>,
394 },
395 RemoveBreakpoint { breakpoint_id: String },
397 AddWatch { expression: String },
399 RemoveWatch { watch_id: String },
401 InspectVariable { variable_name: String },
403 EvaluateExpression { expression: String },
405}
406
407#[derive(Debug)]
409pub struct DataInspector {
410 data_snapshots: HashMap<String, DataSnapshot>,
412
413 data_flow_analysis: Option<DataFlowAnalysis>,
415
416 transformation_summaries: Vec<TransformationSummary>,
418
419 config: InspectionConfig,
421}
422
423#[derive(Debug, Clone)]
425pub struct DataSnapshot {
426 pub step_name: String,
428
429 pub shape: Vec<usize>,
431
432 pub dtype: String,
434
435 pub statistics: DataStatistics,
437
438 pub sample_data: Option<Vec<f64>>,
440
441 pub timestamp: SystemTime,
443}
444
445#[derive(Debug, Clone, Serialize, Deserialize)]
447pub struct DataStatistics {
448 pub n_samples: usize,
450
451 pub n_features: usize,
453
454 pub means: Vec<f64>,
456
457 pub stds: Vec<f64>,
459
460 pub mins: Vec<f64>,
462
463 pub maxs: Vec<f64>,
465
466 pub missing_counts: Vec<usize>,
468
469 pub quality_score: f64,
471}
472
473#[derive(Debug, Clone)]
475pub struct DataFlowAnalysis {
476 pub lineage: Vec<DataLineageNode>,
478
479 pub transformation_graph: TransformationGraph,
481
482 pub quality_metrics: Vec<QualityMetric>,
484}
485
486#[derive(Debug, Clone)]
488pub struct DataLineageNode {
489 pub node_id: String,
491
492 pub step_name: String,
494
495 pub inputs: Vec<String>,
497
498 pub outputs: Vec<String>,
500
501 pub transformation: String,
503}
504
505#[derive(Debug)]
507pub struct ErrorContextManager {
508 error_history: Vec<ErrorContext>,
510
511 error_patterns: HashMap<String, Vec<ErrorSuggestion>>,
513
514 config: ErrorConfig,
516}
517
518#[derive(Debug, Clone)]
520pub struct ErrorContext {
521 pub error_id: String,
523
524 pub error_message: String,
526
527 pub error_type: String,
529
530 pub step_name: String,
532
533 pub input_context: DataMetadata,
535
536 pub execution_state: String,
538
539 pub stack_trace: Vec<String>,
541
542 pub suggestions: Vec<ErrorSuggestion>,
544
545 pub timestamp: SystemTime,
547}
548
549#[derive(Debug, Clone)]
551pub struct ErrorSuggestion {
552 pub description: String,
554
555 pub confidence: f64,
557
558 pub code_fix: Option<String>,
560
561 pub documentation_link: Option<String>,
563}
564
565#[derive(Debug, Clone, Serialize, Deserialize)]
567pub struct DataMetadata {
568 pub shape: Vec<usize>,
570
571 pub dtype: String,
573
574 pub memory_usage: u64,
576
577 pub metadata: HashMap<String, String>,
579}
580
581#[derive(Debug, Clone, Serialize, Deserialize)]
583pub struct ErrorInfo {
584 pub message: String,
586
587 pub error_type: String,
589
590 pub stack_trace: Vec<String>,
592}
593
594#[derive(Debug, Clone)]
596pub struct TracingConfig {
597 pub max_history_size: usize,
598 pub enable_metadata_collection: bool,
599 pub enable_performance_tracking: bool,
600}
601
602#[derive(Debug, Clone)]
603pub struct ProfilingConfig {
604 pub enable_memory_tracking: bool,
605 pub enable_cpu_monitoring: bool,
606 pub measurement_interval_ms: u64,
607}
608
609#[derive(Debug, Clone)]
610pub struct InteractiveConfig {
611 pub enable_auto_breakpoints: bool,
612 pub max_watch_expressions: usize,
613 pub command_timeout_ms: u64,
614}
615
616#[derive(Debug, Clone)]
617pub struct InspectionConfig {
618 pub sample_size: usize,
619 pub enable_statistical_analysis: bool,
620 pub enable_quality_metrics: bool,
621}
622
623#[derive(Debug, Clone)]
624pub struct ErrorConfig {
625 pub max_error_history: usize,
626 pub enable_pattern_recognition: bool,
627 pub enable_auto_suggestions: bool,
628}
629
630#[derive(Debug, Clone, Serialize, Deserialize)]
632pub struct ExecutionStatistics {
633 pub total_steps: usize,
634 pub successful_steps: usize,
635 pub failed_steps: usize,
636 pub total_execution_time: Duration,
637 pub average_step_time: Duration,
638}
639
640#[derive(Debug, Clone)]
641pub struct BottleneckAnalysis {
642 pub slowest_steps: Vec<(String, Duration)>,
643 pub memory_intensive_steps: Vec<(String, u64)>,
644 pub optimization_suggestions: Vec<String>,
645}
646
647#[derive(Debug, Clone)]
648pub struct TransformationSummary {
649 pub step_name: String,
650 pub input_shape: Vec<usize>,
651 pub output_shape: Vec<usize>,
652 pub transformation_type: String,
653 pub quality_impact: f64,
654}
655
656#[derive(Debug, Clone)]
657pub struct TransformationGraph {
658 pub nodes: Vec<TransformationNode>,
659 pub edges: Vec<TransformationEdge>,
660}
661
662#[derive(Debug, Clone)]
663pub struct TransformationNode {
664 pub id: String,
665 pub step_name: String,
666 pub node_type: String,
667}
668
669#[derive(Debug, Clone)]
670pub struct TransformationEdge {
671 pub from: String,
672 pub to: String,
673 pub data_flow: String,
674}
675
676#[derive(Debug, Clone)]
677pub struct QualityMetric {
678 pub step_name: String,
679 pub metric_name: String,
680 pub value: f64,
681 pub threshold: Option<f64>,
682}
683
684impl Default for DebuggingConfig {
685 fn default() -> Self {
686 Self {
687 enable_execution_tracing: true,
688 enable_performance_profiling: true,
689 enable_data_inspection: true,
690 enable_interactive_debugging: false,
691 max_execution_history: 1000,
692 data_inspection_sample_rate: 0.1,
693 verbose_logging: false,
694 export_format: ExportFormat::Json,
695 }
696 }
697}
698
699impl Default for PipelineDebugger {
700 fn default() -> Self {
701 Self::new()
702 }
703}
704
705impl PipelineDebugger {
706 #[must_use]
708 pub fn new() -> Self {
709 Self::with_config(DebuggingConfig::default())
710 }
711
712 #[must_use]
714 pub fn with_config(config: DebuggingConfig) -> Self {
715 let tracer_config = TracingConfig {
716 max_history_size: config.max_execution_history,
717 enable_metadata_collection: config.enable_data_inspection,
718 enable_performance_tracking: config.enable_performance_profiling,
719 };
720
721 let profiler_config = ProfilingConfig {
722 enable_memory_tracking: true,
723 enable_cpu_monitoring: true,
724 measurement_interval_ms: 100,
725 };
726
727 let interactive_config = InteractiveConfig {
728 enable_auto_breakpoints: false,
729 max_watch_expressions: 50,
730 command_timeout_ms: 5000,
731 };
732
733 let inspection_config = InspectionConfig {
734 sample_size: (config.data_inspection_sample_rate * 1000.0) as usize,
735 enable_statistical_analysis: true,
736 enable_quality_metrics: true,
737 };
738
739 let error_config = ErrorConfig {
740 max_error_history: 100,
741 enable_pattern_recognition: true,
742 enable_auto_suggestions: true,
743 };
744
745 Self {
746 tracer: Arc::new(RwLock::new(ExecutionTracer::new(tracer_config))),
747 profiler: Arc::new(RwLock::new(PerformanceProfiler::new(profiler_config))),
748 interactive_debugger: Arc::new(Mutex::new(InteractiveDebugger::new(
749 interactive_config,
750 ))),
751 data_inspector: Arc::new(RwLock::new(DataInspector::new(inspection_config))),
752 error_manager: Arc::new(RwLock::new(ErrorContextManager::new(error_config))),
753 config,
754 }
755 }
756
757 pub fn start_execution(&self, pipeline_name: &str) -> Result<String> {
759 if self.config.enable_execution_tracing {
760 let mut tracer = self.tracer.write().unwrap();
761 let context = ExecutionContext {
762 pipeline_name: pipeline_name.to_string(),
763 current_step: "initialization".to_string(),
764 execution_start: Instant::now(),
765 context_variables: HashMap::new(),
766 execution_state: ExecutionState::Initializing,
767 };
768 tracer.current_context = Some(context);
769 }
770
771 Ok(format!(
772 "debug_session_{}",
773 SystemTime::now()
774 .duration_since(UNIX_EPOCH)
775 .unwrap()
776 .as_secs()
777 ))
778 }
779
780 pub fn step_start(
782 &self,
783 step_name: &str,
784 step_type: StepType,
785 input_data: ArrayView2<f64>,
786 ) -> Result<String> {
787 let step_id = format!(
788 "{}_{}",
789 step_name,
790 SystemTime::now()
791 .duration_since(UNIX_EPOCH)
792 .unwrap()
793 .as_millis()
794 );
795
796 if self.config.enable_execution_tracing {
797 let mut tracer = self.tracer.write().unwrap();
798 let input_metadata = self.create_data_metadata(&input_data);
799
800 let step = ExecutionStep {
801 step_id: step_id.clone(),
802 step_name: step_name.to_string(),
803 step_type,
804 start_time: SystemTime::now(),
805 duration: None,
806 input_metadata,
807 output_metadata: None,
808 performance_metrics: PerformanceMetrics::default(),
809 warnings: Vec::new(),
810 status: StepStatus::Running,
811 error_info: None,
812 };
813
814 tracer.add_step(step);
815 }
816
817 if self.config.enable_performance_profiling {
818 let mut profiler = self.profiler.write().unwrap();
819 profiler.start_measurement(&step_id, step_name);
820 }
821
822 if self.config.enable_data_inspection {
823 let mut inspector = self.data_inspector.write().unwrap();
824 inspector.capture_data_snapshot(step_name, &input_data)?;
825 }
826
827 if self.config.enable_interactive_debugging {
829 let debugger = self.interactive_debugger.lock().unwrap();
830 if debugger.should_break(step_name) {
831 println!("Breakpoint hit at step: {step_name}");
833 }
834 }
835
836 Ok(step_id)
837 }
838
839 pub fn step_complete(&self, step_id: &str, output_data: ArrayView2<f64>) -> Result<()> {
841 if self.config.enable_execution_tracing {
842 let mut tracer = self.tracer.write().unwrap();
843 let output_metadata = self.create_data_metadata(&output_data);
844 tracer.complete_step(step_id, output_metadata)?;
845 }
846
847 if self.config.enable_performance_profiling {
848 let mut profiler = self.profiler.write().unwrap();
849 if let Some(metrics) = profiler.end_measurement(step_id) {
850 if let Ok(mut tracer) = self.tracer.write() {
852 tracer.update_step_metrics(step_id, metrics)?;
853 }
854 }
855 }
856
857 Ok(())
858 }
859
860 pub fn step_error(&self, step_id: &str, error: &SklearsComposeError) -> Result<()> {
862 if self.config.enable_execution_tracing {
863 let mut tracer = self.tracer.write().unwrap();
864 let error_info = ErrorInfo {
865 message: error.to_string(),
866 error_type: format!("{error:?}"),
867 stack_trace: vec![], };
869 tracer.fail_step(step_id, error_info)?;
870 }
871
872 let mut error_manager = self.error_manager.write().unwrap();
874 error_manager.record_error(step_id, error)?;
875
876 Ok(())
877 }
878
879 pub fn generate_report(&self) -> Result<DebugReport> {
881 let tracer = self.tracer.read().unwrap();
882 let profiler = self.profiler.read().unwrap();
883 let inspector = self.data_inspector.read().unwrap();
884 let error_manager = self.error_manager.read().unwrap();
885
886 let report = DebugReport {
887 execution_summary: tracer.get_execution_summary(),
888 performance_analysis: profiler.get_analysis(),
889 data_flow_summary: inspector.get_data_flow_summary(),
890 error_summary: error_manager.get_error_summary(),
891 recommendations: self.generate_recommendations()?,
892 timestamp: SystemTime::now(),
893 };
894
895 Ok(report)
896 }
897
898 pub fn export_debug_info(&self, format: ExportFormat) -> Result<String> {
900 let report = self.generate_report()?;
901
902 match format {
903 ExportFormat::Json => Ok(serde_json::to_string_pretty(&report).unwrap()),
904 ExportFormat::Html => Ok(self.generate_html_report(&report)),
905 ExportFormat::Markdown => Ok(self.generate_markdown_report(&report)),
906 ExportFormat::Csv => Ok(self.generate_csv_report(&report)),
907 }
908 }
909
910 fn create_data_metadata(&self, data: &ArrayView2<f64>) -> DataMetadata {
912 DataMetadata {
914 shape: data.shape().to_vec(),
915 dtype: "f64".to_string(),
916 memory_usage: (data.len() * std::mem::size_of::<f64>()) as u64,
917 metadata: HashMap::new(),
918 }
919 }
920
921 fn generate_recommendations(&self) -> Result<Vec<String>> {
922 let mut recommendations = Vec::new();
923
924 if let Ok(profiler) = self.profiler.read() {
926 if let Some(analysis) = &profiler.bottleneck_analysis {
927 recommendations.extend(analysis.optimization_suggestions.clone());
928 }
929 }
930
931 if let Ok(error_manager) = self.error_manager.read() {
933 recommendations
935 .push("Consider adding input validation to prevent common errors".to_string());
936 }
937
938 Ok(recommendations)
939 }
940
941 fn generate_html_report(&self, report: &DebugReport) -> String {
942 format!(
943 r#"<!DOCTYPE html>
944<html>
945<head>
946 <title>Pipeline Debug Report</title>
947 <style>
948 body {{ font-family: Arial, sans-serif; margin: 20px; }}
949 .section {{ margin-bottom: 30px; }}
950 .metric {{ margin: 5px 0; }}
951 table {{ border-collapse: collapse; width: 100%; }}
952 th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
953 th {{ background-color: #f2f2f2; }}
954 </style>
955</head>
956<body>
957 <h1>Pipeline Debug Report</h1>
958 <div class="section">
959 <h2>Execution Summary</h2>
960 <p>Total Steps: {}</p>
961 <p>Successful Steps: {}</p>
962 <p>Failed Steps: {}</p>
963 </div>
964 <div class="section">
965 <h2>Recommendations</h2>
966 <ul>{}</ul>
967 </div>
968</body>
969</html>"#,
970 report.execution_summary.total_steps,
971 report.execution_summary.successful_steps,
972 report.execution_summary.failed_steps,
973 report
974 .recommendations
975 .iter()
976 .map(|r| format!("<li>{r}</li>"))
977 .collect::<String>()
978 )
979 }
980
981 fn generate_markdown_report(&self, report: &DebugReport) -> String {
982 format!(
983 r"# Pipeline Debug Report
984
985## Execution Summary
986- Total Steps: {}
987- Successful Steps: {}
988- Failed Steps: {}
989
990## Performance Analysis
991- Average Step Time: {:?}
992- Total Execution Time: {:?}
993
994## Recommendations
995{}
996",
997 report.execution_summary.total_steps,
998 report.execution_summary.successful_steps,
999 report.execution_summary.failed_steps,
1000 report.execution_summary.average_step_time,
1001 report.execution_summary.total_execution_time,
1002 report
1003 .recommendations
1004 .iter()
1005 .map(|r| format!("- {r}"))
1006 .collect::<Vec<_>>()
1007 .join("\n")
1008 )
1009 }
1010
1011 fn generate_csv_report(&self, _report: &DebugReport) -> String {
1012 "step_name,duration_ms,status,memory_usage\n".to_string()
1014 }
1015}
1016
1017#[derive(Debug, Serialize, Deserialize)]
1019pub struct DebugReport {
1020 pub execution_summary: ExecutionStatistics,
1021 pub performance_analysis: String, pub data_flow_summary: String, pub error_summary: String, pub recommendations: Vec<String>,
1025 pub timestamp: SystemTime,
1026}
1027
1028impl ExecutionTracer {
1031 fn new(config: TracingConfig) -> Self {
1032 Self {
1033 execution_history: VecDeque::with_capacity(config.max_history_size),
1034 current_context: None,
1035 statistics: ExecutionStatistics {
1036 total_steps: 0,
1037 successful_steps: 0,
1038 failed_steps: 0,
1039 total_execution_time: Duration::from_secs(0),
1040 average_step_time: Duration::from_secs(0),
1041 },
1042 config,
1043 }
1044 }
1045
1046 fn add_step(&mut self, step: ExecutionStep) {
1047 if self.execution_history.len() >= self.config.max_history_size {
1048 self.execution_history.pop_front();
1049 }
1050 self.execution_history.push_back(step);
1051 self.statistics.total_steps += 1;
1052 }
1053
1054 fn complete_step(&mut self, step_id: &str, output_metadata: DataMetadata) -> Result<()> {
1055 if let Some(step) = self
1056 .execution_history
1057 .iter_mut()
1058 .find(|s| s.step_id == step_id)
1059 {
1060 step.status = StepStatus::Completed;
1061 step.output_metadata = Some(output_metadata);
1062 step.duration = Some(step.start_time.elapsed().unwrap_or(Duration::from_secs(0)));
1063 self.statistics.successful_steps += 1;
1064 }
1065 Ok(())
1066 }
1067
1068 fn fail_step(&mut self, step_id: &str, error_info: ErrorInfo) -> Result<()> {
1069 if let Some(step) = self
1070 .execution_history
1071 .iter_mut()
1072 .find(|s| s.step_id == step_id)
1073 {
1074 step.status = StepStatus::Failed;
1075 step.error_info = Some(error_info);
1076 step.duration = Some(step.start_time.elapsed().unwrap_or(Duration::from_secs(0)));
1077 self.statistics.failed_steps += 1;
1078 }
1079 Ok(())
1080 }
1081
1082 fn update_step_metrics(&mut self, step_id: &str, metrics: PerformanceMetrics) -> Result<()> {
1083 if let Some(step) = self
1084 .execution_history
1085 .iter_mut()
1086 .find(|s| s.step_id == step_id)
1087 {
1088 step.performance_metrics = metrics;
1089 }
1090 Ok(())
1091 }
1092
1093 fn get_execution_summary(&self) -> ExecutionStatistics {
1094 self.statistics.clone()
1095 }
1096}
1097
1098impl PerformanceProfiler {
1099 fn new(config: ProfilingConfig) -> Self {
1100 Self {
1101 step_measurements: HashMap::new(),
1102 current_measurements: HashMap::new(),
1103 config,
1104 bottleneck_analysis: None,
1105 }
1106 }
1107
1108 fn start_measurement(&mut self, step_id: &str, step_name: &str) {
1109 let session = MeasurementSession {
1110 start_time: Instant::now(),
1111 initial_memory: 0, step_name: step_name.to_string(),
1113 };
1114 self.current_measurements
1115 .insert(step_id.to_string(), session);
1116 }
1117
1118 fn end_measurement(&mut self, step_id: &str) -> Option<PerformanceMetrics> {
1119 if let Some(session) = self.current_measurements.remove(step_id) {
1120 let metrics = PerformanceMetrics {
1121 execution_time_ms: session.start_time.elapsed().as_millis() as f64,
1122 memory_usage_bytes: 0, cpu_utilization: 0.0, input_size: 0, output_size: 0, cache_statistics: CacheStatistics {
1127 hits: 0,
1128 misses: 0,
1129 hit_ratio: 0.0,
1130 },
1131 custom_metrics: HashMap::new(),
1132 };
1133
1134 self.step_measurements
1135 .entry(session.step_name)
1136 .or_default()
1137 .push(metrics.clone());
1138
1139 Some(metrics)
1140 } else {
1141 None
1142 }
1143 }
1144
1145 fn get_analysis(&self) -> String {
1146 format!(
1148 "Performance analysis for {} unique steps",
1149 self.step_measurements.len()
1150 )
1151 }
1152}
1153
1154impl InteractiveDebugger {
1155 fn new(config: InteractiveConfig) -> Self {
1156 Self {
1157 breakpoints: HashMap::new(),
1158 watch_expressions: Vec::new(),
1159 current_session: None,
1160 command_queue: VecDeque::new(),
1161 config,
1162 }
1163 }
1164
1165 fn should_break(&self, step_name: &str) -> bool {
1166 self.breakpoints
1167 .values()
1168 .any(|bp| bp.step_name == step_name && bp.enabled)
1169 }
1170}
1171
1172impl DataInspector {
1173 fn new(config: InspectionConfig) -> Self {
1174 Self {
1175 data_snapshots: HashMap::new(),
1176 data_flow_analysis: None,
1177 transformation_summaries: Vec::new(),
1178 config,
1179 }
1180 }
1181
1182 fn capture_data_snapshot(&mut self, step_name: &str, data: &ArrayView2<f64>) -> Result<()> {
1183 let statistics = self.compute_statistics(data);
1184
1185 let snapshot = DataSnapshot {
1186 step_name: step_name.to_string(),
1187 shape: data.shape().to_vec(),
1188 dtype: "f64".to_string(),
1189 statistics,
1190 sample_data: None, timestamp: SystemTime::now(),
1192 };
1193
1194 self.data_snapshots.insert(step_name.to_string(), snapshot);
1195 Ok(())
1196 }
1197
1198 fn compute_statistics(&self, data: &ArrayView2<f64>) -> DataStatistics {
1199 let (n_samples, n_features) = (data.nrows(), data.ncols());
1200
1201 let mut means = Vec::with_capacity(n_features);
1203 let mut stds = Vec::with_capacity(n_features);
1204 let mut mins = Vec::with_capacity(n_features);
1205 let mut maxs = Vec::with_capacity(n_features);
1206 let mut missing_counts = Vec::with_capacity(n_features);
1207
1208 for col in 0..n_features {
1209 let column_data: Vec<f64> = data.column(col).to_vec();
1210
1211 let mean = column_data.iter().sum::<f64>() / n_samples as f64;
1212 let variance =
1213 column_data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n_samples as f64;
1214 let std = variance.sqrt();
1215 let min = column_data.iter().copied().fold(f64::INFINITY, f64::min);
1216 let max = column_data
1217 .iter()
1218 .copied()
1219 .fold(f64::NEG_INFINITY, f64::max);
1220 let missing = column_data.iter().filter(|x| x.is_nan()).count();
1221
1222 means.push(mean);
1223 stds.push(std);
1224 mins.push(min);
1225 maxs.push(max);
1226 missing_counts.push(missing);
1227 }
1228
1229 let total_missing = missing_counts.iter().sum::<usize>();
1231 let quality_score = 1.0 - (total_missing as f64 / (n_samples * n_features) as f64);
1232
1233 DataStatistics {
1235 n_samples,
1236 n_features,
1237 means,
1238 stds,
1239 mins,
1240 maxs,
1241 missing_counts,
1242 quality_score,
1243 }
1244 }
1245
1246 fn get_data_flow_summary(&self) -> String {
1247 format!(
1248 "Data flow analysis for {} snapshots",
1249 self.data_snapshots.len()
1250 )
1251 }
1252}
1253
1254impl ErrorContextManager {
1255 fn new(config: ErrorConfig) -> Self {
1256 Self {
1257 error_history: Vec::new(),
1258 error_patterns: HashMap::new(),
1259 config,
1260 }
1261 }
1262
1263 fn record_error(&mut self, step_id: &str, error: &SklearsComposeError) -> Result<()> {
1264 let error_context = ErrorContext {
1265 error_id: format!(
1266 "err_{}_{}",
1267 step_id,
1268 SystemTime::now()
1269 .duration_since(UNIX_EPOCH)
1270 .unwrap()
1271 .as_millis()
1272 ),
1273 error_message: error.to_string(),
1274 error_type: format!("{error:?}"),
1275 step_name: step_id.to_string(),
1276 input_context: DataMetadata {
1277 shape: vec![],
1278 dtype: "unknown".to_string(),
1279 memory_usage: 0,
1280 metadata: HashMap::new(),
1281 },
1282 execution_state: "error".to_string(),
1283 stack_trace: vec![],
1284 suggestions: self.generate_suggestions(error),
1285 timestamp: SystemTime::now(),
1286 };
1287
1288 if self.error_history.len() >= self.config.max_error_history {
1289 self.error_history.remove(0);
1290 }
1291 self.error_history.push(error_context);
1292
1293 Ok(())
1294 }
1295
1296 fn generate_suggestions(&self, error: &SklearsComposeError) -> Vec<ErrorSuggestion> {
1297 let mut suggestions = Vec::new();
1298
1299 let error_str = error.to_string();
1301 if error_str.contains("shape") {
1302 suggestions.push(ErrorSuggestion {
1303 description: "Check input data shape compatibility with the model".to_string(),
1304 confidence: 0.8,
1305 code_fix: Some(
1306 "Verify that input dimensions match expected model input".to_string(),
1307 ),
1308 documentation_link: Some(
1309 "https://docs.rs/sklears-compose/pipeline-shapes".to_string(),
1310 ),
1311 });
1312 }
1313
1314 if error_str.contains("memory") {
1315 suggestions.push(ErrorSuggestion {
1316 description: "Consider reducing batch size or using streaming processing"
1317 .to_string(),
1318 confidence: 0.7,
1319 code_fix: Some(
1320 "Use smaller batches or enable memory-efficient processing".to_string(),
1321 ),
1322 documentation_link: Some(
1323 "https://docs.rs/sklears-compose/memory-optimization".to_string(),
1324 ),
1325 });
1326 }
1327
1328 suggestions
1329 }
1330
1331 fn get_error_summary(&self) -> String {
1332 format!(
1333 "Recorded {} errors with {} unique patterns",
1334 self.error_history.len(),
1335 self.error_patterns.len()
1336 )
1337 }
1338}
1339
1340impl Default for PerformanceMetrics {
1341 fn default() -> Self {
1342 Self {
1343 execution_time_ms: 0.0,
1344 memory_usage_bytes: 0,
1345 cpu_utilization: 0.0,
1346 input_size: 0,
1347 output_size: 0,
1348 cache_statistics: CacheStatistics {
1349 hits: 0,
1350 misses: 0,
1351 hit_ratio: 0.0,
1352 },
1353 custom_metrics: HashMap::new(),
1354 }
1355 }
1356}
1357
1358impl fmt::Display for StepType {
1359 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1360 match self {
1361 StepType::Transform => write!(f, "Transform"),
1362 StepType::Fit => write!(f, "Fit"),
1363 StepType::Predict => write!(f, "Predict"),
1364 StepType::Validate => write!(f, "Validate"),
1365 StepType::Preprocess => write!(f, "Preprocess"),
1366 StepType::FeatureEngineering => write!(f, "FeatureEngineering"),
1367 StepType::ModelSelection => write!(f, "ModelSelection"),
1368 StepType::Ensemble => write!(f, "Ensemble"),
1369 StepType::Custom(name) => write!(f, "Custom({name})"),
1370 }
1371 }
1372}
1373
1374impl PipelineDebugger {
1376 #[must_use]
1378 pub fn for_development() -> Self {
1379 let config = DebuggingConfig {
1380 enable_execution_tracing: true,
1381 enable_performance_profiling: true,
1382 enable_data_inspection: true,
1383 enable_interactive_debugging: true,
1384 max_execution_history: 500,
1385 data_inspection_sample_rate: 0.2,
1386 verbose_logging: true,
1387 export_format: ExportFormat::Html,
1388 };
1389 Self::with_config(config)
1390 }
1391
1392 #[must_use]
1394 pub fn for_production() -> Self {
1395 let config = DebuggingConfig {
1396 enable_execution_tracing: true,
1397 enable_performance_profiling: true,
1398 enable_data_inspection: false,
1399 enable_interactive_debugging: false,
1400 max_execution_history: 100,
1401 data_inspection_sample_rate: 0.01,
1402 verbose_logging: false,
1403 export_format: ExportFormat::Json,
1404 };
1405 Self::with_config(config)
1406 }
1407
1408 pub fn debug_transform<F>(
1410 &self,
1411 step_name: &str,
1412 input: ArrayView2<f64>,
1413 transform_fn: F,
1414 ) -> Result<Array2<f64>>
1415 where
1416 F: FnOnce(ArrayView2<f64>) -> Result<Array2<f64>>,
1417 {
1418 let step_id = self.step_start(step_name, StepType::Transform, input)?;
1419
1420 match transform_fn(input) {
1421 Ok(output) => {
1422 self.step_complete(&step_id, output.view())?;
1423 Ok(output)
1424 }
1425 Err(e) => {
1426 self.step_error(&step_id, &e)?;
1427 Err(e)
1428 }
1429 }
1430 }
1431}
1432
1433#[allow(non_snake_case)]
1434#[cfg(test)]
1435mod tests {
1436 use super::*;
1437 use scirs2_core::ndarray::Array2;
1438
1439 #[test]
1440 fn test_debugger_creation() {
1441 let debugger = PipelineDebugger::new();
1442 assert!(debugger.config.enable_execution_tracing);
1443 }
1444
1445 #[test]
1446 fn test_step_tracking() {
1447 let debugger = PipelineDebugger::new();
1448 let data = Array2::zeros((10, 5));
1449
1450 let session = debugger.start_execution("test_pipeline").unwrap();
1451 let step_id = debugger
1452 .step_start("test_step", StepType::Transform, data.view())
1453 .unwrap();
1454 let result = debugger.step_complete(&step_id, data.view());
1455
1456 assert!(result.is_ok());
1457 assert!(!session.is_empty());
1458 }
1459
1460 #[test]
1461 fn test_development_debugger() {
1462 let debugger = PipelineDebugger::for_development();
1463 assert!(debugger.config.enable_interactive_debugging);
1464 assert!(debugger.config.verbose_logging);
1465 }
1466
1467 #[test]
1468 fn test_production_debugger() {
1469 let debugger = PipelineDebugger::for_production();
1470 assert!(!debugger.config.enable_interactive_debugging);
1471 assert!(!debugger.config.verbose_logging);
1472 }
1473
1474 #[test]
1475 fn test_debug_transform() {
1476 let debugger = PipelineDebugger::new();
1477 let input = Array2::ones((5, 3));
1478
1479 let result =
1480 debugger.debug_transform("scale", input.view(), |data| Ok(data.to_owned() * 2.0));
1481
1482 assert!(result.is_ok());
1483 let output = result.unwrap();
1484 assert_eq!(output[[0, 0]], 2.0);
1485 }
1486
1487 #[test]
1488 fn test_error_handling() {
1489 let debugger = PipelineDebugger::new();
1490 let input = Array2::ones((5, 3));
1491
1492 let result = debugger.debug_transform("failing_step", input.view(), |_| {
1493 Err(SklearsComposeError::InvalidConfiguration(
1494 "Test error".to_string(),
1495 ))
1496 });
1497
1498 assert!(result.is_err());
1499 }
1500
1501 #[test]
1502 fn test_report_generation() {
1503 let debugger = PipelineDebugger::new();
1504 let report = debugger.generate_report();
1505 assert!(report.is_ok());
1506 }
1507
1508 #[test]
1509 fn test_export_formats() {
1510 let debugger = PipelineDebugger::new();
1511
1512 let json_export = debugger.export_debug_info(ExportFormat::Json);
1513 assert!(json_export.is_ok());
1514
1515 let html_export = debugger.export_debug_info(ExportFormat::Html);
1516 assert!(html_export.is_ok());
1517
1518 let markdown_export = debugger.export_debug_info(ExportFormat::Markdown);
1519 assert!(markdown_export.is_ok());
1520 }
1521
1522 #[test]
1523 fn test_performance_metrics() {
1524 let metrics = PerformanceMetrics::default();
1525 assert_eq!(metrics.execution_time_ms, 0.0);
1526 assert_eq!(metrics.memory_usage_bytes, 0);
1527 }
1528
1529 #[test]
1530 fn test_data_statistics() {
1531 let debugger = PipelineDebugger::new();
1532 let data = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1533
1534 let inspector = debugger.data_inspector.read().unwrap();
1535 let stats = inspector.compute_statistics(&data.view());
1536
1537 assert_eq!(stats.n_samples, 3);
1538 assert_eq!(stats.n_features, 2);
1539 assert_eq!(stats.means.len(), 2);
1540 }
1541
1542 #[test]
1543 fn test_step_types() {
1544 assert_eq!(StepType::Transform.to_string(), "Transform");
1545 assert_eq!(
1546 StepType::Custom("test".to_string()).to_string(),
1547 "Custom(test)"
1548 );
1549 }
1550}