scirs2_stats/
error_diagnostics.rs

1//! Error diagnostics and monitoring system
2//!
3//! This module provides comprehensive error diagnostics, monitoring, and intelligent
4//! recovery strategies for production statistical computing environments.
5
6use crate::error_handling_v2::ErrorCode;
7use std::collections::{HashMap, VecDeque};
8use std::sync::{
9    atomic::{AtomicUsize, Ordering},
10    Arc, Mutex,
11};
12use std::time::{Duration, Instant, SystemTime};
13
14/// Error pattern detection and analysis
15#[derive(Debug, Clone)]
16pub struct ErrorPattern {
17    /// Pattern identifier
18    pub id: String,
19    /// Error codes that form this pattern
20    pub error_codes: Vec<ErrorCode>,
21    /// Frequency threshold for detection
22    pub frequency_threshold: usize,
23    /// Time window for pattern detection
24    pub time_window: Duration,
25    /// Confidence score (0.0 - 1.0)
26    pub confidence: f64,
27    /// Description of what this pattern indicates
28    pub description: String,
29    /// Suggested mitigation strategy
30    pub mitigation: String,
31}
32
33impl ErrorPattern {
34    /// Create a new error pattern
35    pub fn new(
36        id: impl Into<String>,
37        error_codes: Vec<ErrorCode>,
38        frequency_threshold: usize,
39        time_window: Duration,
40        description: impl Into<String>,
41        mitigation: impl Into<String>,
42    ) -> Self {
43        Self {
44            id: id.into(),
45            error_codes,
46            frequency_threshold,
47            time_window,
48            confidence: 0.0,
49            description: description.into(),
50            mitigation: mitigation.into(),
51        }
52    }
53}
54
55/// Error occurrence record
56#[derive(Debug, Clone)]
57pub struct ErrorOccurrence {
58    /// Error code
59    pub code: ErrorCode,
60    /// When the error occurred
61    pub timestamp: Instant,
62    /// Operation context
63    pub operation: String,
64    /// Frequency count
65    pub count: usize,
66    /// Resolution status
67    pub resolved: bool,
68    /// Recovery action taken
69    pub recovery_action: Option<String>,
70}
71
72/// Comprehensive error monitoring and analytics
73pub struct ErrorMonitor {
74    /// Recent error occurrences
75    error_history: Arc<Mutex<VecDeque<ErrorOccurrence>>>,
76    /// Error frequency counters
77    error_counts: Arc<Mutex<HashMap<ErrorCode, AtomicUsize>>>,
78    /// Known error patterns
79    patterns: Vec<ErrorPattern>,
80    /// Maximum history size
81    max_historysize: usize,
82    /// Pattern detection enabled
83    pattern_detection_enabled: bool,
84    /// Error rate thresholds
85    error_rate_thresholds: HashMap<ErrorCode, f64>,
86    /// Monitoring start time
87    start_time: Instant,
88}
89
90impl ErrorMonitor {
91    /// Create a new error monitor
92    pub fn new() -> Self {
93        let mut monitor = Self {
94            error_history: Arc::new(Mutex::new(VecDeque::new())),
95            error_counts: Arc::new(Mutex::new(HashMap::new())),
96            patterns: Vec::new(),
97            max_historysize: 1000,
98            pattern_detection_enabled: true,
99            error_rate_thresholds: HashMap::new(),
100            start_time: Instant::now(),
101        };
102
103        monitor.initialize_default_patterns();
104        monitor.initialize_default_thresholds();
105        monitor
106    }
107
108    /// Initialize default error patterns
109    fn initialize_default_patterns(&mut self) {
110        // Memory pressure pattern
111        self.patterns.push(ErrorPattern::new(
112            "memory_pressure",
113            vec![ErrorCode::E5001, ErrorCode::E5002],
114            3,
115            Duration::from_secs(60),
116            "High memory allocation failures indicating memory pressure",
117            "Reduce data size, enable streaming processing, or increase available memory",
118        ));
119
120        // Numerical instability pattern
121        self.patterns.push(ErrorPattern::new(
122            "numerical_instability",
123            vec![
124                ErrorCode::E3001,
125                ErrorCode::E3002,
126                ErrorCode::E3005,
127                ErrorCode::E3006,
128            ],
129            5,
130            Duration::from_secs(30),
131            "Frequent numerical errors indicating data quality or algorithm issues",
132            "Check data preprocessing, scaling, and consider more stable algorithms",
133        ));
134
135        // Convergence issues pattern
136        self.patterns.push(ErrorPattern::new(
137            "convergence_issues",
138            vec![ErrorCode::E3003, ErrorCode::E4001, ErrorCode::E4002],
139            3,
140            Duration::from_secs(120),
141            "Repeated convergence failures in iterative algorithms",
142            "Adjust algorithm parameters, improve initial conditions, or use different methods",
143        ));
144
145        // Data quality pattern
146        self.patterns.push(ErrorPattern::new(
147            "data_quality_issues",
148            vec![
149                ErrorCode::E2003,
150                ErrorCode::E2004,
151                ErrorCode::E1001,
152                ErrorCode::E1002,
153            ],
154            4,
155            Duration::from_secs(60),
156            "Frequent data validation errors indicating poor data quality",
157            "Implement comprehensive data validation and cleaning pipeline",
158        ));
159    }
160
161    /// Initialize default error rate thresholds
162    fn initialize_default_thresholds(&mut self) {
163        self.error_rate_thresholds.insert(ErrorCode::E5001, 0.01); // Memory errors - very low tolerance
164        self.error_rate_thresholds.insert(ErrorCode::E3001, 0.05); // Overflow - low tolerance
165        self.error_rate_thresholds.insert(ErrorCode::E3005, 0.10); // NaN - moderate tolerance
166        self.error_rate_thresholds.insert(ErrorCode::E4001, 0.20); // Max iterations - higher tolerance
167    }
168
169    /// Record an error occurrence
170    pub fn record_error(&self, code: ErrorCode, operation: impl Into<String>) {
171        let occurrence = ErrorOccurrence {
172            code,
173            timestamp: Instant::now(),
174            operation: operation.into(),
175            count: 1,
176            resolved: false,
177            recovery_action: None,
178        };
179
180        // Update history
181        {
182            let mut history = self.error_history.lock().unwrap();
183            if history.len() >= self.max_historysize {
184                history.pop_front();
185            }
186            history.push_back(occurrence);
187        }
188
189        // Update counters
190        {
191            let mut counts = self.error_counts.lock().unwrap();
192            counts
193                .entry(code)
194                .or_insert_with(|| AtomicUsize::new(0))
195                .fetch_add(1, Ordering::Relaxed);
196        }
197
198        // Check for patterns if enabled
199        if self.pattern_detection_enabled {
200            self.check_patterns();
201        }
202    }
203
204    /// Check for error patterns in recent history
205    fn check_patterns(&self) {
206        let history = self.error_history.lock().unwrap();
207        let now = Instant::now();
208
209        for pattern in &self.patterns {
210            let relevant_errors: Vec<_> = history
211                .iter()
212                .filter(|err| {
213                    pattern.error_codes.contains(&err.code)
214                        && now.duration_since(err.timestamp) <= pattern.time_window
215                })
216                .collect();
217
218            if relevant_errors.len() >= pattern.frequency_threshold {
219                eprintln!(
220                    "āš ļø  ERROR PATTERN DETECTED: {} - {} ({})",
221                    pattern.id, pattern.description, pattern.mitigation
222                );
223            }
224        }
225    }
226
227    /// Get error statistics
228    pub fn get_statistics(&self) -> ErrorStatistics {
229        let counts = self.error_counts.lock().unwrap();
230        let history = self.error_history.lock().unwrap();
231
232        let total_errors: usize = counts
233            .values()
234            .map(|counter| counter.load(Ordering::Relaxed))
235            .sum();
236
237        let uptime = self.start_time.elapsed();
238        let error_rate = total_errors as f64 / uptime.as_secs_f64();
239
240        // Calculate error distribution
241        let mut error_distribution = HashMap::new();
242        for (code, counter) in counts.iter() {
243            let count = counter.load(Ordering::Relaxed);
244            if count > 0 {
245                error_distribution.insert(*code, count);
246            }
247        }
248
249        // Find most frequent errors
250        let mut frequent_errors: Vec<_> = error_distribution.clone().into_iter().collect();
251        frequent_errors.sort_by(|a, b| b.1.cmp(&a.1));
252        let top_errors: Vec<_> = frequent_errors.into_iter().take(5).collect();
253
254        // Calculate recent error rate (last hour)
255        let one_hour_ago = Instant::now() - Duration::from_secs(3600);
256        let recent_errors = history
257            .iter()
258            .filter(|err| err.timestamp > one_hour_ago)
259            .count();
260        let recent_error_rate = recent_errors as f64 / 3600.0;
261
262        ErrorStatistics {
263            total_errors,
264            error_rate,
265            recent_error_rate,
266            uptime,
267            error_distribution,
268            top_errors: top_errors.into_iter().collect(),
269            active_patterns: self.detect_active_patterns(),
270        }
271    }
272
273    /// Detect currently active error patterns
274    fn detect_active_patterns(&self) -> Vec<String> {
275        let history = self.error_history.lock().unwrap();
276        let now = Instant::now();
277        let mut active_patterns = Vec::new();
278
279        for pattern in &self.patterns {
280            let recent_errors: Vec<_> = history
281                .iter()
282                .filter(|err| {
283                    pattern.error_codes.contains(&err.code)
284                        && now.duration_since(err.timestamp) <= pattern.time_window
285                })
286                .collect();
287
288            if recent_errors.len() >= pattern.frequency_threshold {
289                active_patterns.push(pattern.id.clone());
290            }
291        }
292
293        active_patterns
294    }
295
296    /// Generate comprehensive health report
297    pub fn generate_health_report(&self) -> HealthReport {
298        let stats = self.get_statistics();
299        let history = self.error_history.lock().unwrap();
300
301        // Calculate health score (0-100)
302        let health_score = self.calculate_health_score(&stats);
303
304        // Identify critical issues
305        let critical_issues = self.identify_critical_issues(&stats);
306
307        // Generate recommendations
308        let recommendations = self.generate_recommendations(&stats, &critical_issues);
309
310        // Calculate trend information
311        let trend = self.calculate_error_trend(&history);
312
313        HealthReport {
314            health_score,
315            critical_issues,
316            recommendations,
317            statistics: stats,
318            trend,
319            timestamp: SystemTime::now(),
320        }
321    }
322
323    /// Calculate overall system health score
324    fn calculate_health_score(&self, stats: &ErrorStatistics) -> u8 {
325        let mut score = 100.0;
326
327        // Penalty for high error rates
328        if stats.error_rate > 1.0 {
329            score -= 30.0;
330        } else if stats.error_rate > 0.1 {
331            score -= 20.0;
332        } else if stats.error_rate > 0.01 {
333            score -= 10.0;
334        }
335
336        // Penalty for active patterns
337        score -= stats.active_patterns.len() as f64 * 15.0;
338
339        // Penalty for critical errors
340        for (code, count) in &stats.top_errors {
341            if code.severity() <= 2 {
342                score -= *count as f64 * 5.0;
343            }
344        }
345
346        // Penalty for recent error spike
347        if stats.recent_error_rate > stats.error_rate * 2.0 {
348            score -= 20.0;
349        }
350
351        score.max(0.0).min(100.0) as u8
352    }
353
354    /// Identify critical issues requiring immediate attention
355    fn identify_critical_issues(&self, stats: &ErrorStatistics) -> Vec<CriticalIssue> {
356        let mut issues = Vec::new();
357
358        // Check for severe error patterns
359        if stats
360            .active_patterns
361            .contains(&"memory_pressure".to_string())
362        {
363            issues.push(CriticalIssue {
364                severity: 1,
365                title: "Memory Pressure Detected".to_string(),
366                description: "High memory allocation failures indicate system memory pressure"
367                    .to_string(),
368                impact: "May cause application crashes or severe performance degradation"
369                    .to_string(),
370                action_required: "Immediate memory optimization or resource scaling required"
371                    .to_string(),
372            });
373        }
374
375        // Check for high critical error rates
376        for (code, count) in &stats.top_errors {
377            if code.severity() <= 2 && *count > 10 {
378                issues.push(CriticalIssue {
379                    severity: code.severity(),
380                    title: format!("High {} Error Rate", code),
381                    description: format!("Frequent {} errors detected", code.description()),
382                    impact: "May indicate fundamental data or algorithm issues".to_string(),
383                    action_required: "Investigate root cause and implement fixes".to_string(),
384                });
385            }
386        }
387
388        // Check for error rate spikes
389        if stats.recent_error_rate > stats.error_rate * 3.0 {
390            issues.push(CriticalIssue {
391                severity: 2,
392                title: "Error Rate Spike".to_string(),
393                description: "Recent error rate significantly higher than baseline".to_string(),
394                impact: "Indicates potential system instability or new issues".to_string(),
395                action_required: "Monitor closely and investigate recent changes".to_string(),
396            });
397        }
398
399        issues
400    }
401
402    /// Generate actionable recommendations
403    fn generate_recommendations(
404        &self,
405        stats: &ErrorStatistics,
406        issues: &[CriticalIssue],
407    ) -> Vec<Recommendation> {
408        let mut recommendations = Vec::new();
409
410        // Recommendations based on error patterns
411        if stats
412            .active_patterns
413            .contains(&"numerical_instability".to_string())
414        {
415            recommendations.push(Recommendation {
416                priority: 1,
417                category: "Data Quality".to_string(),
418                title: "Improve Numerical Stability".to_string(),
419                description: "Implement data preprocessing and normalization".to_string(),
420                steps: vec![
421                    "Check for extreme values in input data".to_string(),
422                    "Apply appropriate data scaling or normalization".to_string(),
423                    "Consider using more numerically stable algorithms".to_string(),
424                ],
425                expected_impact: "Reduce numerical errors by 70-90%".to_string(),
426            });
427        }
428
429        // Recommendations based on frequent errors
430        for (code, count) in &stats.top_errors {
431            match code {
432                ErrorCode::E3005 => {
433                    recommendations.push(Recommendation {
434                        priority: 2,
435                        category: "Data Validation".to_string(),
436                        title: "Handle NaN Values".to_string(),
437                        description: "Implement comprehensive NaN handling strategy".to_string(),
438                        steps: vec![
439                            "Add data validation checks before processing".to_string(),
440                            "Implement NaN filtering or imputation".to_string(),
441                            "Use statistical methods that handle missing data".to_string(),
442                        ],
443                        expected_impact: "Eliminate NaN-related errors".to_string(),
444                    });
445                }
446                ErrorCode::E3003 => {
447                    recommendations.push(Recommendation {
448                        priority: 2,
449                        category: "Algorithm Tuning".to_string(),
450                        title: "Optimize Convergence Parameters".to_string(),
451                        description: "Adjust algorithm parameters for better convergence"
452                            .to_string(),
453                        steps: vec![
454                            "Increase maximum iterations for iterative algorithms".to_string(),
455                            "Adjust convergence tolerance based on data characteristics"
456                                .to_string(),
457                            "Consider using different initialization strategies".to_string(),
458                        ],
459                        expected_impact: "Improve convergence rate by 50-80%".to_string(),
460                    });
461                }
462                _ => {}
463            }
464        }
465
466        // General recommendations based on health score
467        if stats.error_rate > 0.1 {
468            recommendations.push(Recommendation {
469                priority: 1,
470                category: "System Health".to_string(),
471                title: "Reduce Overall Error Rate".to_string(),
472                description: "Implement comprehensive error prevention strategy".to_string(),
473                steps: vec![
474                    "Add input validation at system boundaries".to_string(),
475                    "Implement data quality checks".to_string(),
476                    "Use defensive programming practices".to_string(),
477                ],
478                expected_impact: "Reduce overall error rate significantly".to_string(),
479            });
480        }
481
482        recommendations
483    }
484
485    /// Calculate error trend over time
486    fn calculate_error_trend(&self, history: &VecDeque<ErrorOccurrence>) -> ErrorTrend {
487        if history.len() < 10 {
488            return ErrorTrend {
489                direction: TrendDirection::Stable,
490                magnitude: 0.0,
491                confidence: 0.0,
492                description: "Insufficient data for trend analysis".to_string(),
493            };
494        }
495
496        let now = Instant::now();
497        let recent_window = Duration::from_secs(1800); // 30 minutes
498        let older_window = Duration::from_secs(3600); // 1 hour
499
500        let recent_errors = history
501            .iter()
502            .filter(|err| now.duration_since(err.timestamp) <= recent_window)
503            .count();
504
505        let older_errors = history
506            .iter()
507            .filter(|err| {
508                let age = now.duration_since(err.timestamp);
509                age > recent_window && age <= older_window
510            })
511            .count();
512
513        let recent_rate = recent_errors as f64 / recent_window.as_secs_f64();
514        let older_rate = older_errors as f64 / recent_window.as_secs_f64(); // Same window size for comparison
515
516        let change_ratio = if older_rate > 0.0 {
517            recent_rate / older_rate
518        } else if recent_rate > 0.0 {
519            2.0 // Arbitrary large value indicating increase from zero
520        } else {
521            1.0 // No change
522        };
523
524        let (direction, description) = if change_ratio > 1.5 {
525            (
526                TrendDirection::Increasing,
527                "Error rate is increasing significantly".to_string(),
528            )
529        } else if change_ratio < 0.5 {
530            (
531                TrendDirection::Decreasing,
532                "Error rate is decreasing significantly".to_string(),
533            )
534        } else {
535            (
536                TrendDirection::Stable,
537                "Error rate is relatively stable".to_string(),
538            )
539        };
540
541        let magnitude = (change_ratio - 1.0).abs();
542        let confidence = if history.len() > 50 { 0.8 } else { 0.5 };
543
544        ErrorTrend {
545            direction,
546            magnitude,
547            confidence,
548            description,
549        }
550    }
551}
552
553impl Default for ErrorMonitor {
554    fn default() -> Self {
555        Self::new()
556    }
557}
558
559/// Error statistics summary
560#[derive(Debug)]
561pub struct ErrorStatistics {
562    /// Total number of errors
563    pub total_errors: usize,
564    /// Overall error rate (errors per second)
565    pub error_rate: f64,
566    /// Recent error rate (last hour)
567    pub recent_error_rate: f64,
568    /// System uptime
569    pub uptime: Duration,
570    /// Error distribution by type
571    pub error_distribution: HashMap<ErrorCode, usize>,
572    /// Top 5 most frequent errors
573    pub top_errors: Vec<(ErrorCode, usize)>,
574    /// Currently active error patterns
575    pub active_patterns: Vec<String>,
576}
577
578/// Critical issue requiring immediate attention
579#[derive(Debug)]
580pub struct CriticalIssue {
581    /// Severity level (1 = most critical)
582    pub severity: u8,
583    /// Issue title
584    pub title: String,
585    /// Detailed description
586    pub description: String,
587    /// Potential impact
588    pub impact: String,
589    /// Required action
590    pub action_required: String,
591}
592
593/// Actionable recommendation
594#[derive(Debug)]
595pub struct Recommendation {
596    /// Priority level (1 = highest)
597    pub priority: u8,
598    /// Category of recommendation
599    pub category: String,
600    /// Recommendation title
601    pub title: String,
602    /// Description
603    pub description: String,
604    /// Step-by-step actions
605    pub steps: Vec<String>,
606    /// Expected impact
607    pub expected_impact: String,
608}
609
610/// Error trend analysis
611#[derive(Debug)]
612pub struct ErrorTrend {
613    /// Trend direction
614    pub direction: TrendDirection,
615    /// Magnitude of change
616    pub magnitude: f64,
617    /// Confidence in the trend (0.0-1.0)
618    pub confidence: f64,
619    /// Trend description
620    pub description: String,
621}
622
623/// Trend direction enumeration
624#[derive(Debug)]
625pub enum TrendDirection {
626    Increasing,
627    Decreasing,
628    Stable,
629}
630
631/// Comprehensive health report
632#[derive(Debug)]
633pub struct HealthReport {
634    /// Overall health score (0-100)
635    pub health_score: u8,
636    /// Critical issues requiring attention
637    pub critical_issues: Vec<CriticalIssue>,
638    /// Actionable recommendations
639    pub recommendations: Vec<Recommendation>,
640    /// Detailed statistics
641    pub statistics: ErrorStatistics,
642    /// Error trend analysis
643    pub trend: ErrorTrend,
644    /// Report generation timestamp
645    pub timestamp: SystemTime,
646}
647
648impl HealthReport {
649    /// Generate a formatted text report
650    pub fn to_formatted_string(&self) -> String {
651        let mut report = String::new();
652
653        report.push_str("=== STATISTICAL COMPUTING HEALTH REPORT ===\n\n");
654        report.push_str(&format!(
655            "šŸ“Š Overall Health Score: {}/100\n",
656            self.health_score
657        ));
658        report.push_str(&format!("ā±ļø  Report Generated: {:?}\n\n", self.timestamp));
659
660        // Health indicator
661        let health_indicator = match self.health_score {
662            90..=100 => "🟢 EXCELLENT",
663            70..=89 => "🟔 GOOD",
664            50..=69 => "🟠 FAIR",
665            30..=49 => "šŸ”“ POOR",
666            _ => "🚨 CRITICAL",
667        };
668        report.push_str(&format!("Status: {}\n\n", health_indicator));
669
670        // Critical Issues
671        if !self.critical_issues.is_empty() {
672            report.push_str("🚨 CRITICAL ISSUES:\n");
673            for (i, issue) in self.critical_issues.iter().enumerate() {
674                report.push_str(&format!(
675                    "{}. {} (Severity: {})\n   {}\n   Impact: {}\n   Action: {}\n\n",
676                    i + 1,
677                    issue.title,
678                    issue.severity,
679                    issue.description,
680                    issue.impact,
681                    issue.action_required
682                ));
683            }
684        }
685
686        // Statistics Summary
687        report.push_str("šŸ“ˆ STATISTICS SUMMARY:\n");
688        report.push_str(&format!(
689            "• Total Errors: {}\n",
690            self.statistics.total_errors
691        ));
692        report.push_str(&format!(
693            "• Error Rate: {:.4} errors/sec\n",
694            self.statistics.error_rate
695        ));
696        report.push_str(&format!(
697            "• Recent Rate: {:.4} errors/sec\n",
698            self.statistics.recent_error_rate
699        ));
700        report.push_str(&format!(
701            "• Uptime: {:.2} hours\n",
702            self.statistics.uptime.as_secs_f64() / 3600.0
703        ));
704
705        if !self.statistics.top_errors.is_empty() {
706            report.push_str("\nšŸ“‹ TOP ERRORS:\n");
707            for (i, (code, count)) in self.statistics.top_errors.iter().enumerate() {
708                report.push_str(&format!("   {}. {}: {} occurrences\n", i + 1, code, count));
709            }
710        }
711
712        // Trend Analysis
713        report.push_str(&format!("\nšŸ“Š TREND: {}\n", self.trend.description));
714
715        // Recommendations
716        if !self.recommendations.is_empty() {
717            report.push_str("\nšŸ’” RECOMMENDATIONS:\n");
718            for (i, rec) in self.recommendations.iter().enumerate() {
719                report.push_str(&format!(
720                    "{}. {} (Priority: {})\n   {}\n   Expected Impact: {}\n",
721                    i + 1,
722                    rec.title,
723                    rec.priority,
724                    rec.description,
725                    rec.expected_impact
726                ));
727                if !rec.steps.is_empty() {
728                    report.push_str("   Steps:\n");
729                    for step in &rec.steps {
730                        report.push_str(&format!("   • {}\n", step));
731                    }
732                }
733                report.push('\n');
734            }
735        }
736
737        report
738    }
739
740    /// Check if immediate action is required
741    pub fn requires_immediate_action(&self) -> bool {
742        self.health_score < 50 || self.critical_issues.iter().any(|issue| issue.severity <= 2)
743    }
744}
745
746/// Global error monitor instance
747static GLOBAL_MONITOR: std::sync::OnceLock<ErrorMonitor> = std::sync::OnceLock::new();
748
749/// Get the global error monitor instance
750#[allow(dead_code)]
751pub fn global_monitor() -> &'static ErrorMonitor {
752    GLOBAL_MONITOR.get_or_init(ErrorMonitor::new)
753}
754
755/// Convenience function to record an error globally
756#[allow(dead_code)]
757pub fn record_global_error(code: ErrorCode, operation: impl Into<String>) {
758    global_monitor().record_error(code, operation);
759}
760
761/// Convenience function to get global error statistics
762#[allow(dead_code)]
763pub fn get_global_statistics() -> ErrorStatistics {
764    global_monitor().get_statistics()
765}
766
767/// Convenience function to generate global health report
768#[allow(dead_code)]
769pub fn generate_global_health_report() -> HealthReport {
770    global_monitor().generate_health_report()
771}
772
773#[cfg(test)]
774mod tests {
775    use super::*;
776    use std::thread;
777
778    #[test]
779    #[ignore = "timeout"]
780    fn test_error_monitor_basic() {
781        let monitor = ErrorMonitor::new();
782        monitor.record_error(ErrorCode::E3005, "test_operation");
783
784        let stats = monitor.get_statistics();
785        assert_eq!(stats.total_errors, 1);
786        assert!(stats.error_distribution.contains_key(&ErrorCode::E3005));
787    }
788
789    #[test]
790    #[ignore = "timeout"]
791    fn test_pattern_detection() {
792        let monitor = ErrorMonitor::new();
793
794        // Record multiple memory errors to trigger pattern
795        for _ in 0..5 {
796            monitor.record_error(ErrorCode::E5001, "memory_test");
797            // Remove sleep - not needed for testing functionality
798        }
799
800        let stats = monitor.get_statistics();
801        // Pattern detection should identify memory pressure
802        // (This would be more testable with dependency injection)
803    }
804
805    #[test]
806    #[ignore = "timeout"]
807    fn test_health_score_calculation() {
808        let monitor = ErrorMonitor::new();
809
810        // Fresh monitor should have perfect health
811        let health_report = monitor.generate_health_report();
812        assert_eq!(health_report.health_score, 100);
813
814        // Record some errors and check health degrades
815        monitor.record_error(ErrorCode::E3001, "overflow_test");
816        monitor.record_error(ErrorCode::E5001, "memory_test");
817
818        let health_report = monitor.generate_health_report();
819        assert!(health_report.health_score < 100);
820    }
821}