scirs2_stats/
error_standardization.rs

1//! Error message standardization for consistent error handling
2//!
3//! This module provides standardized error messages and recovery suggestions
4//! that are used consistently across all statistical modules.
5
6use crate::error::{StatsError, StatsResult};
7use std::collections::HashMap;
8
9/// Standardized error message templates
10pub struct ErrorMessages;
11
12impl ErrorMessages {
13    /// Standard dimension mismatch messages
14    pub fn dimension_mismatch(expected: &str, actual: &str) -> StatsError {
15        StatsError::dimension_mismatch(format!(
16            "Array dimension mismatch: _expected {}, got {}. {}",
17            expected,
18            actual,
19            "Ensure all input arrays have compatible dimensions for the operation."
20        ))
21    }
22
23    /// Standard array length mismatch messages
24    pub fn length_mismatch(
25        array1_name: &str,
26        len1: usize,
27        array2_name: &str,
28        len2: usize,
29    ) -> StatsError {
30        StatsError::dimension_mismatch(format!(
31            "Array length mismatch: {} has {} elements, {} has {} elements. {}",
32            array1_name,
33            len1,
34            array2_name,
35            len2,
36            "Both arrays must have the same number of elements."
37        ))
38    }
39
40    /// Standard empty array messages
41    pub fn empty_array(arrayname: &str) -> StatsError {
42        StatsError::invalid_argument(format!(
43            "Array '{}' cannot be empty. {}",
44            arrayname, "Provide an array with at least one element."
45        ))
46    }
47
48    /// Standard insufficient data messages
49    pub fn insufficientdata(operation: &str, required: usize, actual: usize) -> StatsError {
50        StatsError::invalid_argument(format!(
51            "Insufficient data for {}: requires at least {} elements, got {}. {}",
52            operation,
53            required,
54            actual,
55            if required == 2 {
56                "Statistical calculations typically require at least 2 data points."
57            } else {
58                "Increase the sample size or use a different method."
59            }
60        ))
61    }
62
63    /// Standard non-positive value messages
64    pub fn non_positive_value(parameter: &str, value: f64) -> StatsError {
65        StatsError::domain(format!(
66            "Parameter '{}' must be positive, got {}. {}",
67            parameter, value, "Ensure the value is greater than 0."
68        ))
69    }
70
71    /// Standard probability range messages
72    pub fn invalid_probability(parameter: &str, value: f64) -> StatsError {
73        StatsError::domain(format!(
74            "Parameter '{}' must be a valid probability between 0 and 1, got {}. {}",
75            parameter, value, "Probability values must be in the range [0, 1]."
76        ))
77    }
78
79    /// Standard NaN detection messages
80    pub fn nan_detected(context: &str) -> StatsError {
81        StatsError::invalid_argument(format!(
82            "NaN (Not a Number) values detected in {}. {}",
83            context, "Remove NaN values or use functions that handle missing data explicitly."
84        ))
85    }
86
87    /// Standard infinite value messages
88    pub fn infinite_value_detected(context: &str) -> StatsError {
89        StatsError::invalid_argument(format!(
90            "Infinite values detected in {}. {}",
91            context, "Check for overflow conditions or extreme values in your data."
92        ))
93    }
94
95    /// Standard matrix not positive definite messages
96    pub fn not_positive_definite(matrixname: &str) -> StatsError {
97        StatsError::computation(format!(
98            "Matrix '{}' is not positive definite. {}",
99            matrixname,
100            "Ensure the matrix is symmetric and all eigenvalues are positive, or use regularization."
101        ))
102    }
103
104    /// Standard singular matrix messages
105    pub fn singular_matrix(matrixname: &str) -> StatsError {
106        StatsError::computation(format!(
107            "Matrix '{}' is singular (non-invertible). {}",
108            matrixname, "Check for linear dependencies in your data or add regularization."
109        ))
110    }
111
112    /// Standard convergence failure messages
113    pub fn convergence_failure(algorithm: &str, iterations: usize) -> StatsError {
114        StatsError::ConvergenceError(format!(
115            "{} failed to converge after {} iterations. {}",
116            algorithm, iterations,
117            "Try increasing the maximum iterations, adjusting tolerance, or using different initial values."
118        ))
119    }
120
121    /// Standard numerical instability messages
122    pub fn numerical_instability(operation: &str, suggestion: &str) -> StatsError {
123        StatsError::computation(format!(
124            "Numerical instability detected in {}. {}",
125            operation, suggestion
126        ))
127    }
128
129    /// Standard unsupported operation messages
130    pub fn unsupported_operation(operation: &str, context: &str) -> StatsError {
131        StatsError::not_implemented(format!(
132            "Operation '{}' is not supported for {}. {}",
133            operation,
134            context,
135            "Check the documentation for supported operations or consider alternative methods."
136        ))
137    }
138}
139
140/// Context-aware error validation
141pub struct ErrorValidator;
142
143impl ErrorValidator {
144    /// Validate array for common issues
145    pub fn validate_array<T>(data: &[T], name: &str) -> StatsResult<()>
146    where
147        T: PartialOrd + Copy,
148    {
149        if data.is_empty() {
150            return Err(ErrorMessages::empty_array(name));
151        }
152        Ok(())
153    }
154
155    /// Validate array for finite values (for float types)
156    pub fn validate_finite_array(data: &[f64], name: &str) -> StatsResult<()> {
157        if data.is_empty() {
158            return Err(ErrorMessages::empty_array(name));
159        }
160
161        for (i, &value) in data.iter().enumerate() {
162            if value.is_nan() {
163                return Err(ErrorMessages::nan_detected(&format!("{}[{}]", name, i)));
164            }
165            if value.is_infinite() {
166                return Err(ErrorMessages::infinite_value_detected(&format!(
167                    "{}[{}]",
168                    name, i
169                )));
170            }
171        }
172        Ok(())
173    }
174
175    /// Validate probability value
176    pub fn validate_probability(value: f64, name: &str) -> StatsResult<()> {
177        if !(0.0..=1.0).contains(&value) {
178            return Err(ErrorMessages::invalid_probability(name, value));
179        }
180        if value.is_nan() {
181            return Err(ErrorMessages::nan_detected(name));
182        }
183        Ok(())
184    }
185
186    /// Validate positive value
187    pub fn validate_positive(value: f64, name: &str) -> StatsResult<()> {
188        if value <= 0.0 {
189            return Err(ErrorMessages::non_positive_value(name, value));
190        }
191        if value.is_nan() {
192            return Err(ErrorMessages::nan_detected(name));
193        }
194        if value.is_infinite() {
195            return Err(ErrorMessages::infinite_value_detected(name));
196        }
197        Ok(())
198    }
199
200    /// Validate arrays have same length
201    pub fn validate_same_length<T, U>(
202        arr1: &[T],
203        arr1_name: &str,
204        arr2: &[U],
205        arr2_name: &str,
206    ) -> StatsResult<()> {
207        if arr1.len() != arr2.len() {
208            return Err(ErrorMessages::length_mismatch(
209                arr1_name,
210                arr1.len(),
211                arr2_name,
212                arr2.len(),
213            ));
214        }
215        Ok(())
216    }
217
218    /// Validate minimum sample size
219    pub fn validate_samplesize(size: usize, minimum: usize, operation: &str) -> StatsResult<()> {
220        if size < minimum {
221            return Err(ErrorMessages::insufficientdata(operation, minimum, size));
222        }
223        Ok(())
224    }
225}
226
227/// Performance impact assessment for error recovery
228#[derive(Debug, Clone, Copy)]
229pub enum PerformanceImpact {
230    /// No performance impact
231    None,
232    /// Minimal performance impact (< 5%)
233    Minimal,
234    /// Moderate performance impact (5-20%)
235    Moderate,
236    /// Significant performance impact (> 20%)
237    Significant,
238}
239
240/// Standardized error recovery suggestions
241pub struct RecoverySuggestions;
242
243impl RecoverySuggestions {
244    /// Get recovery suggestions for common statistical errors
245    pub fn get_suggestions(error: &StatsError) -> Vec<(String, PerformanceImpact)> {
246        match error {
247            StatsError::DimensionMismatch(_) => vec![
248                (
249                    "Reshape arrays to have compatible dimensions".to_string(),
250                    PerformanceImpact::None,
251                ),
252                (
253                    "Use broadcasting-compatible operations".to_string(),
254                    PerformanceImpact::Minimal,
255                ),
256                (
257                    "Transpose matrices if needed".to_string(),
258                    PerformanceImpact::Minimal,
259                ),
260            ],
261            StatsError::InvalidArgument(msg) if msg.contains("empty") => vec![
262                (
263                    "Provide non-empty input arrays".to_string(),
264                    PerformanceImpact::None,
265                ),
266                (
267                    "Use default values for empty inputs".to_string(),
268                    PerformanceImpact::Minimal,
269                ),
270                (
271                    "Filter out empty arrays before processing".to_string(),
272                    PerformanceImpact::Minimal,
273                ),
274            ],
275            StatsError::InvalidArgument(msg) if msg.contains("NaN") => vec![
276                (
277                    "Remove NaN values using data.dropna()".to_string(),
278                    PerformanceImpact::Minimal,
279                ),
280                (
281                    "Use interpolation to fill NaN values".to_string(),
282                    PerformanceImpact::Moderate,
283                ),
284                (
285                    "Use statistical methods that handle NaN explicitly".to_string(),
286                    PerformanceImpact::Minimal,
287                ),
288            ],
289            StatsError::ComputationError(msg) if msg.contains("singular") => vec![
290                (
291                    "Add regularization (e.g., ridge regression)".to_string(),
292                    PerformanceImpact::Minimal,
293                ),
294                (
295                    "Use pseudo-inverse instead of inverse".to_string(),
296                    PerformanceImpact::Moderate,
297                ),
298                (
299                    "Check for multicollinearity in data".to_string(),
300                    PerformanceImpact::None,
301                ),
302            ],
303            StatsError::ConvergenceError(_) => vec![
304                (
305                    "Increase maximum iterations".to_string(),
306                    PerformanceImpact::Moderate,
307                ),
308                (
309                    "Adjust convergence tolerance".to_string(),
310                    PerformanceImpact::None,
311                ),
312                (
313                    "Use different initial values".to_string(),
314                    PerformanceImpact::None,
315                ),
316                (
317                    "Try a different optimization algorithm".to_string(),
318                    PerformanceImpact::Significant,
319                ),
320            ],
321            StatsError::DomainError(_) => vec![
322                (
323                    "Check parameter bounds and constraints".to_string(),
324                    PerformanceImpact::None,
325                ),
326                (
327                    "Scale or normalize input data".to_string(),
328                    PerformanceImpact::Minimal,
329                ),
330                (
331                    "Use robust statistical methods".to_string(),
332                    PerformanceImpact::Moderate,
333                ),
334            ],
335            _ => vec![
336                (
337                    "Check input data for validity".to_string(),
338                    PerformanceImpact::None,
339                ),
340                (
341                    "Refer to function documentation".to_string(),
342                    PerformanceImpact::None,
343                ),
344            ],
345        }
346    }
347
348    /// Get context-specific suggestions for statistical operations
349    pub fn get_context_suggestions(operation: &str) -> HashMap<String, Vec<String>> {
350        let mut suggestions = HashMap::new();
351
352        match operation {
353            "correlation" => {
354                suggestions.insert(
355                    "data_preparation".to_string(),
356                    vec![
357                        "Ensure data is numeric and finite".to_string(),
358                        "Consider outlier detection and removal".to_string(),
359                        "Check for missing values".to_string(),
360                    ],
361                );
362                suggestions.insert(
363                    "performance".to_string(),
364                    vec![
365                        "Use SIMD-optimized functions for large datasets".to_string(),
366                        "Consider parallel computation for correlation matrices".to_string(),
367                    ],
368                );
369            }
370            "regression" => {
371                suggestions.insert(
372                    "data_preparation".to_string(),
373                    vec![
374                        "Check for multicollinearity".to_string(),
375                        "Normalize features if needed".to_string(),
376                        "Consider feature selection".to_string(),
377                    ],
378                );
379                suggestions.insert(
380                    "model_selection".to_string(),
381                    vec![
382                        "Use regularization for high-dimensional data".to_string(),
383                        "Consider robust regression for outliers".to_string(),
384                    ],
385                );
386            }
387            "hypothesis_testing" => {
388                suggestions.insert(
389                    "assumptions".to_string(),
390                    vec![
391                        "Check normality assumptions".to_string(),
392                        "Verify independence of observations".to_string(),
393                        "Consider non-parametric alternatives".to_string(),
394                    ],
395                );
396                suggestions.insert(
397                    "interpretation".to_string(),
398                    vec![
399                        "Adjust for multiple comparisons if needed".to_string(),
400                        "Consider effect size in addition to p-values".to_string(),
401                    ],
402                );
403            }
404            _ => {
405                suggestions.insert(
406                    "general".to_string(),
407                    vec![
408                        "Validate input data quality".to_string(),
409                        "Check function prerequisites".to_string(),
410                    ],
411                );
412            }
413        }
414
415        suggestions
416    }
417}
418
419/// Comprehensive error reporting with standardized messages
420pub struct StandardizedErrorReporter;
421
422impl StandardizedErrorReporter {
423    /// Generate a comprehensive error report
424    pub fn generate_report(error: &StatsError, context: Option<&str>) -> String {
425        let mut report = String::new();
426
427        // Main _error message
428        report.push_str(&format!("❌ Error: {}\n\n", error));
429
430        // Context information
431        if let Some(ctx) = context {
432            report.push_str(&format!("📍 Context: {}\n\n", ctx));
433        }
434
435        // Recovery suggestions
436        let suggestions = RecoverySuggestions::get_suggestions(error);
437        if !suggestions.is_empty() {
438            report.push_str("💡 Suggested Solutions:\n");
439            for (i, (suggestion, impact)) in suggestions.iter().enumerate() {
440                let impact_icon = match impact {
441                    PerformanceImpact::None => "⚡",
442                    PerformanceImpact::Minimal => "🔋",
443                    PerformanceImpact::Moderate => "⏱️",
444                    PerformanceImpact::Significant => "⚠️",
445                };
446                report.push_str(&format!("   {}. {} {}\n", i + 1, impact_icon, suggestion));
447            }
448            report.push('\n');
449        }
450
451        // Performance impact legend
452        report.push_str("Legend: ⚡ No impact, 🔋 Minimal, ⏱️ Moderate, ⚠️ Significant\n");
453
454        report
455    }
456}
457
458/// Enhanced error context for better debugging
459#[derive(Debug, Clone)]
460pub struct EnhancedErrorContext {
461    /// The function where the error occurred
462    pub function_name: String,
463    /// The module where the error occurred
464    pub module_name: String,
465    /// Input data characteristics
466    pub data_info: DataDiagnostics,
467    /// System information
468    pub system_info: SystemDiagnostics,
469    /// Suggested recovery actions with priority
470    pub recovery_actions: Vec<RecoveryAction>,
471}
472
473/// Data characteristics for error diagnosis
474#[derive(Debug, Clone)]
475pub struct DataDiagnostics {
476    /// Data shape information
477    pub shape: Vec<usize>,
478    /// Data type information
479    pub data_type: String,
480    /// Statistical summary
481    pub summary: StatsSummary,
482    /// Quality issues detected
483    pub quality_issues: Vec<DataQualityIssue>,
484}
485
486/// Statistical summary for error context
487#[derive(Debug, Clone)]
488pub struct StatsSummary {
489    pub min: Option<f64>,
490    pub max: Option<f64>,
491    pub mean: Option<f64>,
492    pub std: Option<f64>,
493    pub nan_count: usize,
494    pub inf_count: usize,
495    pub finite_count: usize,
496}
497
498/// Data quality issues
499#[derive(Debug, Clone, PartialEq)]
500pub enum DataQualityIssue {
501    HasNaN,
502    HasInfinite,
503    HasNegative,
504    HasZeros,
505    Constant,
506    HighSkewness,
507    Outliers(usize),
508    SmallSample(usize),
509}
510
511/// System diagnostics for error context
512#[derive(Debug, Clone)]
513pub struct SystemDiagnostics {
514    /// Available memory (approximate)
515    pub available_memory_mb: Option<usize>,
516    /// CPU information
517    pub cpu_info: String,
518    /// SIMD capabilities
519    pub simd_available: bool,
520    /// Thread count
521    pub thread_count: usize,
522}
523
524/// Recovery action with metadata
525#[derive(Debug, Clone)]
526pub struct RecoveryAction {
527    /// Description of the action
528    pub description: String,
529    /// Priority (1 = highest, 5 = lowest)
530    pub priority: u8,
531    /// Expected performance impact
532    pub performance_impact: PerformanceImpact,
533    /// Code example (if applicable)
534    pub code_example: Option<String>,
535    /// Whether this action is automatic or manual
536    pub automatic: bool,
537}
538
539/// Batch error handler for operations on multiple datasets
540pub struct BatchErrorHandler {
541    errors: Vec<(usize, StatsError, EnhancedErrorContext)>,
542    warnings: Vec<(usize, String)>,
543}
544
545impl BatchErrorHandler {
546    pub fn new() -> Self {
547        Self {
548            errors: Vec::new(),
549            warnings: Vec::new(),
550        }
551    }
552
553    /// Add an error for a specific batch item
554    pub fn add_error(&mut self, index: usize, error: StatsError, context: EnhancedErrorContext) {
555        self.errors.push((index, error, context));
556    }
557
558    /// Add a warning for a specific batch item
559    pub fn add_warning(&mut self, index: usize, warning: String) {
560        self.warnings.push((index, warning));
561    }
562
563    /// Generate a comprehensive batch error report
564    pub fn generate_batch_report(&self) -> String {
565        let mut report = String::new();
566
567        if !self.errors.is_empty() {
568            report.push_str(&format!(
569                "🚨 Batch Processing Errors ({} errors):\n\n",
570                self.errors.len()
571            ));
572
573            for (i, (index, error, context)) in self.errors.iter().enumerate() {
574                report.push_str(&format!("Error {} (Item {}):\n", i + 1, index));
575                report.push_str(&format!("  ❌ {}\n", error));
576                report.push_str(&format!(
577                    "  📍 Function: {}::{}\n",
578                    context.module_name, context.function_name
579                ));
580                report.push_str(&format!("  📊 Data: {:?}\n", context.data_info.shape));
581
582                if !context.recovery_actions.is_empty() {
583                    report.push_str("  💡 Suggested Actions:\n");
584                    for action in &context.recovery_actions {
585                        let priority_icon = match action.priority {
586                            1 => "🔴",
587                            2 => "🟡",
588                            3 => "🟢",
589                            _ => "⚪",
590                        };
591                        report.push_str(&format!("    {} {}\n", priority_icon, action.description));
592                    }
593                }
594                report.push('\n');
595            }
596        }
597
598        if !self.warnings.is_empty() {
599            report.push_str(&format!(
600                "⚠️  Batch Processing Warnings ({} warnings):\n\n",
601                self.warnings.len()
602            ));
603
604            for (index, warning) in &self.warnings {
605                report.push_str(&format!("  Item {}: {}\n", index, warning));
606            }
607        }
608
609        report
610    }
611
612    /// Get summary statistics of errors
613    pub fn get_error_summary(&self) -> HashMap<String, usize> {
614        let mut summary = HashMap::new();
615
616        for (_, error_, _) in &self.errors {
617            let error_type = match error_ {
618                StatsError::ComputationError(_) => "Computation",
619                StatsError::DomainError(_) => "Domain",
620                StatsError::DimensionMismatch(_) => "Dimension",
621                StatsError::InvalidArgument(_) => "Invalid Argument",
622                StatsError::NotImplementedError(_) => "Not Implemented",
623                StatsError::ConvergenceError(_) => "Convergence",
624                StatsError::CoreError(_) => "Core",
625                StatsError::InsufficientData(_) => "Insufficient Data",
626                StatsError::InvalidInput(_) => "Invalid Input",
627                StatsError::NotImplemented(_) => "Not Implemented",
628                StatsError::DistributionError(_) => "Distribution",
629            };
630
631            *summary.entry(error_type.to_string()).or_insert(0) += 1;
632        }
633
634        summary
635    }
636}
637
638impl Default for BatchErrorHandler {
639    fn default() -> Self {
640        Self::new()
641    }
642}
643
644/// Enhanced error diagnostics
645pub struct ErrorDiagnostics;
646
647impl ErrorDiagnostics {
648    /// Generate comprehensive diagnostics for array data
649    pub fn diagnose_array_f64(data: &[f64], name: &str) -> DataDiagnostics {
650        let mut quality_issues = Vec::new();
651        let mut nan_count = 0;
652        let mut inf_count = 0;
653        let mut finite_values = Vec::new();
654        let mut has_negative = false;
655        let mut has_zeros = false;
656
657        for &value in data {
658            if value.is_nan() {
659                nan_count += 1;
660            } else if value.is_infinite() {
661                inf_count += 1;
662            } else {
663                finite_values.push(value);
664                if value < 0.0 {
665                    has_negative = true;
666                }
667                if value == 0.0 {
668                    has_zeros = true;
669                }
670            }
671        }
672
673        // Calculate basic statistics for finite values
674        let (min, max, mean, std) = if !finite_values.is_empty() {
675            let min = finite_values.iter().fold(f64::INFINITY, |a, &b| a.min(b));
676            let max = finite_values
677                .iter()
678                .fold(f64::NEG_INFINITY, |a, &b| a.max(b));
679            let mean = finite_values.iter().sum::<f64>() / finite_values.len() as f64;
680            let variance = finite_values
681                .iter()
682                .map(|&x| (x - mean).powi(2))
683                .sum::<f64>()
684                / finite_values.len() as f64;
685            let std = variance.sqrt();
686            (Some(min), Some(max), Some(mean), Some(std))
687        } else {
688            (None, None, None, None)
689        };
690
691        // Detect quality issues
692        if nan_count > 0 {
693            quality_issues.push(DataQualityIssue::HasNaN);
694        }
695        if inf_count > 0 {
696            quality_issues.push(DataQualityIssue::HasInfinite);
697        }
698        if has_negative {
699            quality_issues.push(DataQualityIssue::HasNegative);
700        }
701        if has_zeros {
702            quality_issues.push(DataQualityIssue::HasZeros);
703        }
704        if finite_values.len() < 2 {
705            quality_issues.push(DataQualityIssue::SmallSample(finite_values.len()));
706        }
707
708        // Check for constant data
709        if let (Some(min_val), Some(max_val)) = (min, max) {
710            if (max_val - min_val).abs() < 1e-15 {
711                quality_issues.push(DataQualityIssue::Constant);
712            }
713        }
714
715        // Simple outlier detection (values beyond 3 std devs)
716        if let (Some(mean_val), Some(std_val)) = (mean, std) {
717            if std_val > 0.0 {
718                let outlier_count = finite_values
719                    .iter()
720                    .filter(|&&x| (x - mean_val).abs() > 3.0 * std_val)
721                    .count();
722                if outlier_count > 0 {
723                    quality_issues.push(DataQualityIssue::Outliers(outlier_count));
724                }
725            }
726        }
727
728        DataDiagnostics {
729            shape: vec![data.len()],
730            data_type: "f64".to_string(),
731            summary: StatsSummary {
732                min,
733                max,
734                mean,
735                std,
736                nan_count,
737                inf_count,
738                finite_count: finite_values.len(),
739            },
740            quality_issues,
741        }
742    }
743
744    /// Generate system diagnostics
745    pub fn get_system_diagnostics() -> SystemDiagnostics {
746        use scirs2_core::simd_ops::PlatformCapabilities;
747
748        let capabilities = PlatformCapabilities::detect();
749        let thread_count = num_cpus::get();
750
751        SystemDiagnostics {
752            available_memory_mb: Self::get_available_memory_mb(),
753            cpu_info: format!("Threads: {}", thread_count),
754            simd_available: capabilities.simd_available,
755            thread_count,
756        }
757    }
758
759    fn get_available_memory_mb() -> Option<usize> {
760        // Simple approximation - would use system APIs in production
761        Some(8192) // Assume 8GB available
762    }
763}
764
765/// Inter-module error consistency checker
766pub struct InterModuleErrorChecker;
767
768impl InterModuleErrorChecker {
769    /// Check error consistency across modules
770    pub fn validate_error_consistency(
771        module_errors: &HashMap<String, Vec<StatsError>>,
772    ) -> Vec<String> {
773        let mut inconsistencies = Vec::new();
774
775        // Check for similar error patterns across modules
776        let mut error_patterns: HashMap<String, Vec<String>> = HashMap::new();
777
778        for (module, errors) in module_errors {
779            for error in errors {
780                let pattern = Self::extract_error_pattern(error);
781                error_patterns
782                    .entry(pattern)
783                    .or_default()
784                    .push(module.clone());
785            }
786        }
787
788        // Look for patterns that should be consistent but aren't
789        for (pattern, modules) in error_patterns {
790            if modules.len() > 1 {
791                let unique_modules: std::collections::HashSet<_> = modules.into_iter().collect();
792                if unique_modules.len() > 1 {
793                    inconsistencies.push(format!(
794                        "Error pattern '{}' appears inconsistently across modules: {:?}",
795                        pattern, unique_modules
796                    ));
797                }
798            }
799        }
800
801        inconsistencies
802    }
803
804    fn extract_error_pattern(error: &StatsError) -> String {
805        match error {
806            StatsError::DimensionMismatch(_) => "dimension_mismatch".to_string(),
807            StatsError::InvalidArgument(msg) if msg.contains("empty") => "empty_array".to_string(),
808            StatsError::InvalidArgument(msg) if msg.contains("NaN") => "nan_values".to_string(),
809            StatsError::DomainError(msg) if msg.contains("positive") => "non_positive".to_string(),
810            StatsError::DomainError(msg) if msg.contains("probability") => {
811                "invalid_probability".to_string()
812            }
813            StatsError::ConvergenceError(_) => "convergence_failure".to_string(),
814            StatsError::ComputationError(msg) if msg.contains("singular") => {
815                "singular_matrix".to_string()
816            }
817            _ => "other".to_string(),
818        }
819    }
820}
821
822/// Auto-recovery system for common errors
823pub struct AutoRecoverySystem;
824
825impl AutoRecoverySystem {
826    /// Attempt automatic recovery for common errors
827    pub fn attempt_auto_recovery(
828        error: &StatsError,
829        context: &EnhancedErrorContext,
830    ) -> Option<RecoveryAction> {
831        match error {
832            StatsError::InvalidArgument(msg) if msg.contains("NaN") => Some(RecoveryAction {
833                description: "Automatically remove NaN values".to_string(),
834                priority: 1,
835                performance_impact: PerformanceImpact::Minimal,
836                code_example: Some(
837                    "let cleandata = data.iter().filter(|x| x.is_finite()).collect();".to_string(),
838                ),
839                automatic: true,
840            }),
841            StatsError::DimensionMismatch(_) => {
842                Some(RecoveryAction {
843                    description: "Attempt automatic dimension alignment".to_string(),
844                    priority: 2,
845                    performance_impact: PerformanceImpact::Minimal,
846                    code_example: Some(
847                        "let aligneddata = data.broadcast_to(targetshape);".to_string(),
848                    ),
849                    automatic: false, // Usually requires user input
850                })
851            }
852            StatsError::ComputationError(msg) if msg.contains("singular") => Some(RecoveryAction {
853                description: "Add regularization to handle singularity".to_string(),
854                priority: 1,
855                performance_impact: PerformanceImpact::Minimal,
856                code_example: Some("let regularized = matrix + Array2::eye(n) * 1e-6;".to_string()),
857                automatic: true,
858            }),
859            _ => None,
860        }
861    }
862}
863
864#[cfg(test)]
865mod tests {
866    use super::*;
867
868    #[test]
869    fn test_error_messages() {
870        let err = ErrorMessages::length_mismatch("x", 5, "y", 3);
871        assert!(err.to_string().contains("Array length mismatch"));
872        assert!(err.to_string().contains("same number of elements"));
873    }
874
875    #[test]
876    fn test_error_validator() {
877        let empty_data: &[f64] = &[];
878        assert!(ErrorValidator::validate_array(empty_data, "test").is_err());
879
880        let finitedata = [1.0, 2.0, 3.0];
881        assert!(ErrorValidator::validate_finite_array(&finitedata, "test").is_ok());
882
883        let nandata = [1.0, f64::NAN, 3.0];
884        assert!(ErrorValidator::validate_finite_array(&nandata, "test").is_err());
885    }
886
887    #[test]
888    fn test_recovery_suggestions() {
889        let err = ErrorMessages::empty_array("data");
890        let suggestions = RecoverySuggestions::get_suggestions(&err);
891        assert!(!suggestions.is_empty());
892    }
893
894    #[test]
895    fn test_enhanced_error_context() {
896        let data = [1.0, 2.0, f64::NAN, 4.0];
897        let diagnostics = ErrorDiagnostics::diagnose_array_f64(&data, "testdata");
898
899        assert_eq!(diagnostics.shape, vec![4]);
900        assert_eq!(diagnostics.summary.nan_count, 1);
901        assert_eq!(diagnostics.summary.finite_count, 3);
902        assert!(diagnostics
903            .quality_issues
904            .contains(&DataQualityIssue::HasNaN));
905    }
906
907    #[test]
908    fn test_batch_error_handler() {
909        let mut handler = BatchErrorHandler::new();
910
911        let error = ErrorMessages::empty_array("test");
912        let context = EnhancedErrorContext {
913            function_name: "test_function".to_string(),
914            module_name: "test_module".to_string(),
915            data_info: ErrorDiagnostics::diagnose_array_f64(&[], "empty"),
916            system_info: ErrorDiagnostics::get_system_diagnostics(),
917            recovery_actions: vec![],
918        };
919
920        handler.add_error(0, error, context);
921        handler.add_warning(1, "This is a test warning".to_string());
922
923        let report = handler.generate_batch_report();
924        assert!(report.contains("Batch Processing Errors"));
925        assert!(report.contains("Batch Processing Warnings"));
926
927        let summary = handler.get_error_summary();
928        assert_eq!(summary.get("Invalid Argument"), Some(&1));
929    }
930
931    #[test]
932    fn test_auto_recovery_system() {
933        let error = ErrorMessages::nan_detected("test context");
934        let context = EnhancedErrorContext {
935            function_name: "test".to_string(),
936            module_name: "test".to_string(),
937            data_info: ErrorDiagnostics::diagnose_array_f64(&[f64::NAN], "test"),
938            system_info: ErrorDiagnostics::get_system_diagnostics(),
939            recovery_actions: vec![],
940        };
941
942        let recovery = AutoRecoverySystem::attempt_auto_recovery(&error, &context);
943        assert!(recovery.is_some());
944        assert!(recovery.unwrap().automatic);
945    }
946}
scirs2_stats/error_standardization.rs

scirs2_stats/
error_standardization.rs