Skip to main content

torsh_data/
error.rs

1//! Enhanced error handling for torsh-data
2//!
3//! This module provides comprehensive error types with detailed context
4//! and recovery suggestions for data loading operations.
5
6use scirs2_core::random::thread_rng; // SciRS2 POLICY compliant
7use std::fmt;
8
9/// Enhanced error types specific to data loading operations
10#[derive(Debug, Clone)]
11pub enum DataError {
12    /// Dataset-related errors
13    Dataset {
14        kind: DatasetErrorKind,
15        context: String,
16        suggestion: Option<String>,
17    },
18
19    /// Data loader errors
20    DataLoader {
21        kind: DataLoaderErrorKind,
22        context: String,
23        suggestion: Option<String>,
24    },
25
26    /// Transform operation errors
27    Transform {
28        kind: TransformErrorKind,
29        transform_name: String,
30        context: String,
31        suggestion: Option<String>,
32    },
33
34    /// Sampler-related errors
35    Sampler {
36        kind: SamplerErrorKind,
37        sampler_type: String,
38        context: String,
39        suggestion: Option<String>,
40    },
41
42    /// Collation errors
43    Collation {
44        kind: CollationErrorKind,
45        batch_info: BatchInfo,
46        context: String,
47        suggestion: Option<String>,
48    },
49
50    /// I/O and file system errors
51    Io {
52        kind: IoErrorKind,
53        path: Option<String>,
54        operation: String,
55        context: String,
56        suggestion: Option<String>,
57    },
58
59    /// Configuration and validation errors
60    Configuration {
61        kind: ConfigErrorKind,
62        parameter: String,
63        value: String,
64        context: String,
65        suggestion: Option<String>,
66    },
67
68    /// Memory and resource errors
69    Resource {
70        kind: ResourceErrorKind,
71        resource_type: String,
72        requested: Option<usize>,
73        available: Option<usize>,
74        context: String,
75        suggestion: Option<String>,
76    },
77
78    /// Privacy and differential privacy errors
79    Privacy {
80        kind: PrivacyErrorKind,
81        privacy_parameter: String,
82        context: String,
83        suggestion: Option<String>,
84    },
85
86    /// GPU acceleration and compute errors
87    GpuError(String),
88
89    /// Other errors
90    Other(String),
91}
92
93#[derive(Debug, Clone)]
94pub enum DatasetErrorKind {
95    IndexOutOfBounds,
96    EmptyDataset,
97    IncompatibleShapes,
98    MissingData,
99    CorruptedData,
100    UnsupportedFormat,
101    AccessDenied,
102}
103
104#[derive(Debug, Clone)]
105pub enum DataLoaderErrorKind {
106    WorkerPanic,
107    ChannelClosed,
108    Timeout,
109    ConfigurationInvalid,
110    BackendUnavailable,
111    BatchGenerationFailed,
112}
113
114#[derive(Debug, Clone)]
115pub enum TransformErrorKind {
116    InvalidInput,
117    IncompatibleDimensions,
118    NumericalInstability,
119    UnsupportedOperation,
120    ConfigurationError,
121    ResourceExhaustion,
122}
123
124#[derive(Debug, Clone)]
125pub enum SamplerErrorKind {
126    InvalidWeights,
127    EmptyPopulation,
128    InvalidProbability,
129    IndexOutOfRange,
130    InsufficientData,
131    ConfigurationConflict,
132}
133
134#[derive(Debug, Clone)]
135pub enum CollationErrorKind {
136    ShapeMismatch,
137    TypeMismatch,
138    BatchSizeExceeded,
139    MemoryExhaustion,
140    InvalidPadding,
141    UnsupportedCollation,
142}
143
144#[derive(Debug, Clone)]
145pub enum IoErrorKind {
146    FileNotFound,
147    PermissionDenied,
148    DiskFull,
149    NetworkError,
150    CorruptedFile,
151    UnsupportedFormat,
152    WriteError,
153    ReadError,
154}
155
156#[derive(Debug, Clone)]
157pub enum ConfigErrorKind {
158    InvalidValue,
159    MissingRequired,
160    ConflictingValues,
161    OutOfRange,
162    InvalidType,
163    Deprecated,
164}
165
166#[derive(Debug, Clone)]
167pub enum ResourceErrorKind {
168    MemoryExhaustion,
169    CpuOverload,
170    GpuUnavailable,
171    DiskSpaceExhaustion,
172    ThreadPoolExhaustion,
173    CacheOverflow,
174}
175
176#[derive(Debug, Clone)]
177pub enum PrivacyErrorKind {
178    BudgetExceeded,
179    InvalidPrivacyParameter,
180    AccessLimitExceeded,
181    AccessDenied,
182    TensorCreationFailed,
183    NoiseGenerationFailed,
184    CompositionError,
185}
186
187#[derive(Debug, Clone)]
188pub struct BatchInfo {
189    pub batch_size: usize,
190    pub item_shapes: Vec<Vec<usize>>,
191    pub item_types: Vec<String>,
192}
193
194impl BatchInfo {
195    pub fn new(batch_size: usize) -> Self {
196        Self {
197            batch_size,
198            item_shapes: Vec::new(),
199            item_types: Vec::new(),
200        }
201    }
202
203    pub fn with_shape(mut self, shape: Vec<usize>) -> Self {
204        self.item_shapes.push(shape);
205        self
206    }
207
208    pub fn with_type(mut self, type_name: String) -> Self {
209        self.item_types.push(type_name);
210        self
211    }
212}
213
214impl DataError {
215    /// Create a dataset error with context
216    pub fn dataset(kind: DatasetErrorKind, context: impl Into<String>) -> Self {
217        Self::Dataset {
218            kind,
219            context: context.into(),
220            suggestion: None,
221        }
222    }
223
224    /// Create a data loader error with context
225    pub fn dataloader(kind: DataLoaderErrorKind, context: impl Into<String>) -> Self {
226        Self::DataLoader {
227            kind,
228            context: context.into(),
229            suggestion: None,
230        }
231    }
232
233    /// Create a transform error with context
234    pub fn transform(
235        kind: TransformErrorKind,
236        transform_name: impl Into<String>,
237        context: impl Into<String>,
238    ) -> Self {
239        Self::Transform {
240            kind,
241            transform_name: transform_name.into(),
242            context: context.into(),
243            suggestion: None,
244        }
245    }
246
247    /// Create a sampler error with context
248    pub fn sampler(
249        kind: SamplerErrorKind,
250        sampler_type: impl Into<String>,
251        context: impl Into<String>,
252    ) -> Self {
253        Self::Sampler {
254            kind,
255            sampler_type: sampler_type.into(),
256            context: context.into(),
257            suggestion: None,
258        }
259    }
260
261    /// Create a collation error with context
262    pub fn collation(
263        kind: CollationErrorKind,
264        batch_info: BatchInfo,
265        context: impl Into<String>,
266    ) -> Self {
267        Self::Collation {
268            kind,
269            batch_info,
270            context: context.into(),
271            suggestion: None,
272        }
273    }
274
275    /// Create an I/O error with context
276    pub fn io(kind: IoErrorKind, operation: impl Into<String>, context: impl Into<String>) -> Self {
277        Self::Io {
278            kind,
279            path: None,
280            operation: operation.into(),
281            context: context.into(),
282            suggestion: None,
283        }
284    }
285
286    /// Create a configuration error with context
287    pub fn config(
288        kind: ConfigErrorKind,
289        parameter: impl Into<String>,
290        value: impl Into<String>,
291        context: impl Into<String>,
292    ) -> Self {
293        Self::Configuration {
294            kind,
295            parameter: parameter.into(),
296            value: value.into(),
297            context: context.into(),
298            suggestion: None,
299        }
300    }
301
302    /// Create a resource error with context
303    pub fn resource(
304        kind: ResourceErrorKind,
305        resource_type: impl Into<String>,
306        context: impl Into<String>,
307    ) -> Self {
308        Self::Resource {
309            kind,
310            resource_type: resource_type.into(),
311            requested: None,
312            available: None,
313            context: context.into(),
314            suggestion: None,
315        }
316    }
317
318    /// Create a privacy error with context
319    pub fn privacy(
320        kind: PrivacyErrorKind,
321        privacy_parameter: impl Into<String>,
322        context: impl Into<String>,
323    ) -> Self {
324        Self::Privacy {
325            kind,
326            privacy_parameter: privacy_parameter.into(),
327            context: context.into(),
328            suggestion: None,
329        }
330    }
331
332    // Convenience methods for specific privacy errors
333
334    /// Create a privacy budget exceeded error
335    pub fn privacy_budget_exceeded(context: impl Into<String>) -> Self {
336        Self::privacy(PrivacyErrorKind::BudgetExceeded, "privacy_budget", context)
337    }
338
339    /// Create an invalid privacy parameter error
340    pub fn invalid_privacy_parameter(context: impl Into<String>) -> Self {
341        Self::privacy(
342            PrivacyErrorKind::InvalidPrivacyParameter,
343            "privacy_parameter",
344            context,
345        )
346    }
347
348    /// Create an access limit exceeded error
349    pub fn privacy_access_limit_exceeded(context: impl Into<String>) -> Self {
350        Self::privacy(
351            PrivacyErrorKind::AccessLimitExceeded,
352            "access_limit",
353            context,
354        )
355    }
356
357    /// Create an access denied error
358    pub fn privacy_access_denied(context: impl Into<String>) -> Self {
359        Self::privacy(PrivacyErrorKind::AccessDenied, "access_control", context)
360    }
361
362    /// Create a tensor creation failed error
363    pub fn tensor_creation_failed(context: impl Into<String>) -> Self {
364        Self::privacy(
365            PrivacyErrorKind::TensorCreationFailed,
366            "tensor_creation",
367            context,
368        )
369    }
370
371    /// Create a noise generation failed error
372    pub fn noise_generation_failed(context: impl Into<String>) -> Self {
373        Self::privacy(
374            PrivacyErrorKind::NoiseGenerationFailed,
375            "noise_generation",
376            context,
377        )
378    }
379
380    /// Create a composition error
381    pub fn privacy_composition_error(context: impl Into<String>) -> Self {
382        Self::privacy(
383            PrivacyErrorKind::CompositionError,
384            "privacy_composition",
385            context,
386        )
387    }
388
389    /// Add a suggestion for error recovery
390    pub fn with_suggestion(mut self, suggestion: impl Into<String>) -> Self {
391        match &mut self {
392            DataError::Dataset { suggestion: s, .. }
393            | DataError::DataLoader { suggestion: s, .. }
394            | DataError::Transform { suggestion: s, .. }
395            | DataError::Sampler { suggestion: s, .. }
396            | DataError::Collation { suggestion: s, .. }
397            | DataError::Io { suggestion: s, .. }
398            | DataError::Configuration { suggestion: s, .. }
399            | DataError::Resource { suggestion: s, .. }
400            | DataError::Privacy { suggestion: s, .. } => {
401                *s = Some(suggestion.into());
402            }
403            DataError::GpuError(_) | DataError::Other(_) => {
404                // These error types don't have suggestion fields
405            }
406        }
407        self
408    }
409
410    /// Add path information for I/O errors
411    pub fn with_path(mut self, path: impl Into<String>) -> Self {
412        if let DataError::Io { path: p, .. } = &mut self {
413            *p = Some(path.into());
414        }
415        self
416    }
417
418    /// Add resource information for resource errors
419    pub fn with_resource_info(mut self, requested: usize, available: Option<usize>) -> Self {
420        if let DataError::Resource {
421            requested: r,
422            available: a,
423            ..
424        } = &mut self
425        {
426            *r = Some(requested);
427            *a = available;
428        }
429        self
430    }
431
432    /// Check if the error is recoverable
433    pub fn is_recoverable(&self) -> bool {
434        match self {
435            DataError::Dataset { kind, .. } => match kind {
436                DatasetErrorKind::IndexOutOfBounds => true,
437                DatasetErrorKind::EmptyDataset => false,
438                DatasetErrorKind::IncompatibleShapes => false,
439                DatasetErrorKind::MissingData => true,
440                DatasetErrorKind::CorruptedData => false,
441                DatasetErrorKind::UnsupportedFormat => false,
442                DatasetErrorKind::AccessDenied => true,
443            },
444            DataError::DataLoader { kind, .. } => match kind {
445                DataLoaderErrorKind::WorkerPanic => true,
446                DataLoaderErrorKind::ChannelClosed => true,
447                DataLoaderErrorKind::Timeout => true,
448                DataLoaderErrorKind::ConfigurationInvalid => false,
449                DataLoaderErrorKind::BackendUnavailable => true,
450                DataLoaderErrorKind::BatchGenerationFailed => true,
451            },
452            DataError::Transform { kind, .. } => match kind {
453                TransformErrorKind::InvalidInput => false,
454                TransformErrorKind::IncompatibleDimensions => false,
455                TransformErrorKind::NumericalInstability => true,
456                TransformErrorKind::UnsupportedOperation => false,
457                TransformErrorKind::ConfigurationError => false,
458                TransformErrorKind::ResourceExhaustion => true,
459            },
460            DataError::Sampler { kind, .. } => match kind {
461                SamplerErrorKind::InvalidWeights => false,
462                SamplerErrorKind::EmptyPopulation => false,
463                SamplerErrorKind::InvalidProbability => false,
464                SamplerErrorKind::IndexOutOfRange => true,
465                SamplerErrorKind::InsufficientData => true,
466                SamplerErrorKind::ConfigurationConflict => false,
467            },
468            DataError::Collation { kind, .. } => match kind {
469                CollationErrorKind::ShapeMismatch => false,
470                CollationErrorKind::TypeMismatch => false,
471                CollationErrorKind::BatchSizeExceeded => true,
472                CollationErrorKind::MemoryExhaustion => true,
473                CollationErrorKind::InvalidPadding => false,
474                CollationErrorKind::UnsupportedCollation => false,
475            },
476            DataError::Io { kind, .. } => match kind {
477                IoErrorKind::FileNotFound => true,
478                IoErrorKind::PermissionDenied => true,
479                IoErrorKind::DiskFull => true,
480                IoErrorKind::NetworkError => true,
481                IoErrorKind::CorruptedFile => false,
482                IoErrorKind::UnsupportedFormat => false,
483                IoErrorKind::WriteError => true,
484                IoErrorKind::ReadError => true,
485            },
486            DataError::Configuration { .. } => false,
487            DataError::Resource { kind, .. } => match kind {
488                ResourceErrorKind::MemoryExhaustion => true,
489                ResourceErrorKind::CpuOverload => true,
490                ResourceErrorKind::GpuUnavailable => true,
491                ResourceErrorKind::DiskSpaceExhaustion => true,
492                ResourceErrorKind::ThreadPoolExhaustion => true,
493                ResourceErrorKind::CacheOverflow => true,
494            },
495            DataError::Privacy { kind, .. } => match kind {
496                PrivacyErrorKind::BudgetExceeded => false,
497                PrivacyErrorKind::InvalidPrivacyParameter => false,
498                PrivacyErrorKind::AccessLimitExceeded => false,
499                PrivacyErrorKind::AccessDenied => true,
500                PrivacyErrorKind::TensorCreationFailed => true,
501                PrivacyErrorKind::NoiseGenerationFailed => true,
502                PrivacyErrorKind::CompositionError => false,
503            },
504            DataError::GpuError(_) => true, // GPU errors are typically recoverable with CPU fallback
505            DataError::Other(_) => false,   // Generic errors are typically not recoverable
506        }
507    }
508
509    /// Get error severity level
510    pub fn severity(&self) -> ErrorSeverity {
511        match self {
512            DataError::Dataset { kind, .. } => match kind {
513                DatasetErrorKind::IndexOutOfBounds => ErrorSeverity::Warning,
514                DatasetErrorKind::EmptyDataset => ErrorSeverity::Error,
515                DatasetErrorKind::IncompatibleShapes => ErrorSeverity::Error,
516                DatasetErrorKind::MissingData => ErrorSeverity::Warning,
517                DatasetErrorKind::CorruptedData => ErrorSeverity::Critical,
518                DatasetErrorKind::UnsupportedFormat => ErrorSeverity::Error,
519                DatasetErrorKind::AccessDenied => ErrorSeverity::Error,
520            },
521            DataError::DataLoader { kind, .. } => match kind {
522                DataLoaderErrorKind::WorkerPanic => ErrorSeverity::Critical,
523                DataLoaderErrorKind::ChannelClosed => ErrorSeverity::Error,
524                DataLoaderErrorKind::Timeout => ErrorSeverity::Warning,
525                DataLoaderErrorKind::ConfigurationInvalid => ErrorSeverity::Error,
526                DataLoaderErrorKind::BackendUnavailable => ErrorSeverity::Error,
527                DataLoaderErrorKind::BatchGenerationFailed => ErrorSeverity::Warning,
528            },
529            DataError::Transform { .. } => ErrorSeverity::Warning,
530            DataError::Sampler { .. } => ErrorSeverity::Warning,
531            DataError::Collation { .. } => ErrorSeverity::Warning,
532            DataError::Io { kind, .. } => match kind {
533                IoErrorKind::CorruptedFile => ErrorSeverity::Critical,
534                _ => ErrorSeverity::Error,
535            },
536            DataError::Configuration { .. } => ErrorSeverity::Error,
537            DataError::Resource { .. } => ErrorSeverity::Warning,
538            DataError::Privacy { kind, .. } => match kind {
539                PrivacyErrorKind::BudgetExceeded => ErrorSeverity::Critical,
540                PrivacyErrorKind::InvalidPrivacyParameter => ErrorSeverity::Error,
541                PrivacyErrorKind::AccessLimitExceeded => ErrorSeverity::Error,
542                PrivacyErrorKind::AccessDenied => ErrorSeverity::Warning,
543                PrivacyErrorKind::TensorCreationFailed => ErrorSeverity::Error,
544                PrivacyErrorKind::NoiseGenerationFailed => ErrorSeverity::Error,
545                PrivacyErrorKind::CompositionError => ErrorSeverity::Critical,
546            },
547            DataError::GpuError(_) => ErrorSeverity::Warning,
548            DataError::Other(_) => ErrorSeverity::Error,
549        }
550    }
551}
552
553#[derive(Debug, Clone, PartialEq, Eq, Hash)]
554pub enum ErrorSeverity {
555    Warning,
556    Error,
557    Critical,
558}
559
560impl fmt::Display for DataError {
561    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
562        match self {
563            DataError::Dataset {
564                kind,
565                context,
566                suggestion,
567            } => {
568                write!(f, "Dataset error ({kind:?}): {context}")?;
569                if let Some(s) = suggestion {
570                    write!(f, " Suggestion: {s}")?;
571                }
572            }
573            DataError::DataLoader {
574                kind,
575                context,
576                suggestion,
577            } => {
578                write!(f, "DataLoader error ({kind:?}): {context}")?;
579                if let Some(s) = suggestion {
580                    write!(f, " Suggestion: {s}")?;
581                }
582            }
583            DataError::Transform {
584                kind,
585                transform_name,
586                context,
587                suggestion,
588            } => {
589                write!(
590                    f,
591                    "Transform error in '{transform_name}' ({kind:?}): {context}"
592                )?;
593                if let Some(s) = suggestion {
594                    write!(f, " Suggestion: {s}")?;
595                }
596            }
597            DataError::Sampler {
598                kind,
599                sampler_type,
600                context,
601                suggestion,
602            } => {
603                write!(f, "Sampler error in '{sampler_type}' ({kind:?}): {context}")?;
604                if let Some(s) = suggestion {
605                    write!(f, " Suggestion: {s}")?;
606                }
607            }
608            DataError::Collation {
609                kind,
610                batch_info,
611                context,
612                suggestion,
613            } => {
614                write!(
615                    f,
616                    "Collation error ({:?}) for batch size {}: {}",
617                    kind, batch_info.batch_size, context
618                )?;
619                if let Some(s) = suggestion {
620                    write!(f, " Suggestion: {s}")?;
621                }
622            }
623            DataError::Io {
624                kind,
625                path,
626                operation,
627                context,
628                suggestion,
629            } => {
630                write!(f, "I/O error ({kind:?}) during '{operation}': {context}")?;
631                if let Some(p) = path {
632                    write!(f, " Path: {p}")?;
633                }
634                if let Some(s) = suggestion {
635                    write!(f, " Suggestion: {s}")?;
636                }
637            }
638            DataError::Configuration {
639                kind,
640                parameter,
641                value,
642                context,
643                suggestion,
644            } => {
645                write!(f, "Configuration error ({kind:?}) for parameter '{parameter}' = '{value}': {context}")?;
646                if let Some(s) = suggestion {
647                    write!(f, " Suggestion: {s}")?;
648                }
649            }
650            DataError::Resource {
651                kind,
652                resource_type,
653                requested,
654                available,
655                context,
656                suggestion,
657            } => {
658                write!(
659                    f,
660                    "Resource error ({kind:?}) for {resource_type}: {context}"
661                )?;
662                if let (Some(req), Some(avail)) = (requested, available) {
663                    write!(f, " (requested: {req}, available: {avail})")?;
664                } else if let Some(req) = requested {
665                    write!(f, " (requested: {req})")?;
666                }
667                if let Some(s) = suggestion {
668                    write!(f, " Suggestion: {s}")?;
669                }
670            }
671            DataError::Privacy {
672                kind,
673                privacy_parameter,
674                context,
675                suggestion,
676            } => {
677                write!(
678                    f,
679                    "Privacy error ({kind:?}) for parameter '{privacy_parameter}': {context}"
680                )?;
681                if let Some(s) = suggestion {
682                    write!(f, " Suggestion: {s}")?;
683                }
684            }
685            DataError::GpuError(msg) => {
686                write!(f, "GPU error: {msg}")?;
687            }
688            DataError::Other(msg) => {
689                write!(f, "Data error: {msg}")?;
690            }
691        }
692        Ok(())
693    }
694}
695
696impl std::error::Error for DataError {}
697
698impl From<DataError> for torsh_core::TorshError {
699    fn from(err: DataError) -> Self {
700        torsh_core::TorshError::Other(format!("Data error: {err}"))
701    }
702}
703
704impl From<torsh_core::TorshError> for DataError {
705    fn from(err: torsh_core::TorshError) -> Self {
706        DataError::Other(format!("Torsh error: {err}"))
707    }
708}
709
710/// Result type for data operations
711pub type Result<T> = std::result::Result<T, DataError>;
712
713/// Error context builder for chaining operations
714pub struct ErrorContext {
715    operation: String,
716    details: Vec<String>,
717}
718
719impl ErrorContext {
720    pub fn new(operation: impl Into<String>) -> Self {
721        Self {
722            operation: operation.into(),
723            details: Vec::new(),
724        }
725    }
726
727    pub fn with_detail(mut self, detail: impl Into<String>) -> Self {
728        self.details.push(detail.into());
729        self
730    }
731
732    pub fn build_context(&self) -> String {
733        let mut context = self.operation.clone();
734        if !self.details.is_empty() {
735            context.push_str(": ");
736            context.push_str(&self.details.join(", "));
737        }
738        context
739    }
740}
741
742/// Trait for adding context to errors
743pub trait WithContext<T> {
744    fn with_context<F>(self, f: F) -> Result<T>
745    where
746        F: FnOnce() -> ErrorContext;
747
748    fn with_simple_context(self, operation: &str) -> Result<T>;
749}
750
751impl<T, E> WithContext<T> for std::result::Result<T, E>
752where
753    E: std::error::Error + Send + Sync + 'static,
754{
755    fn with_context<F>(self, f: F) -> Result<T>
756    where
757        F: FnOnce() -> ErrorContext,
758    {
759        self.map_err(|e| {
760            let context = f();
761            DataError::io(
762                IoErrorKind::ReadError,
763                &context.operation,
764                format!("{}: {}", context.build_context(), e),
765            )
766        })
767    }
768
769    fn with_simple_context(self, operation: &str) -> Result<T> {
770        self.with_context(|| ErrorContext::new(operation))
771    }
772}
773
774/// Error recovery strategies and mechanisms
775pub mod recovery {
776    use super::*;
777    use std::time::{Duration, Instant};
778
779    /// Retry strategy configuration
780    #[derive(Debug, Clone)]
781    pub struct RetryStrategy {
782        pub max_attempts: usize,
783        pub base_delay: Duration,
784        pub max_delay: Duration,
785        pub backoff_multiplier: f64,
786        pub jitter: bool,
787    }
788
789    impl Default for RetryStrategy {
790        fn default() -> Self {
791            Self {
792                max_attempts: 3,
793                base_delay: Duration::from_millis(100),
794                max_delay: Duration::from_secs(30),
795                backoff_multiplier: 2.0,
796                jitter: true,
797            }
798        }
799    }
800
801    impl RetryStrategy {
802        /// Create a new retry strategy
803        pub fn new(max_attempts: usize) -> Self {
804            Self {
805                max_attempts,
806                ..Default::default()
807            }
808        }
809
810        /// Set base delay
811        pub fn with_base_delay(mut self, delay: Duration) -> Self {
812            self.base_delay = delay;
813            self
814        }
815
816        /// Set maximum delay
817        pub fn with_max_delay(mut self, delay: Duration) -> Self {
818            self.max_delay = delay;
819            self
820        }
821
822        /// Set backoff multiplier
823        pub fn with_backoff_multiplier(mut self, multiplier: f64) -> Self {
824            self.backoff_multiplier = multiplier;
825            self
826        }
827
828        /// Enable or disable jitter
829        pub fn with_jitter(mut self, jitter: bool) -> Self {
830            self.jitter = jitter;
831            self
832        }
833
834        /// Calculate delay for a given attempt
835        pub fn delay_for_attempt(&self, attempt: usize) -> Duration {
836            let delay = (self.base_delay.as_millis() as f64
837                * self.backoff_multiplier.powi(attempt as i32)) as u64;
838
839            let delay = Duration::from_millis(delay.min(self.max_delay.as_millis() as u64));
840
841            if self.jitter && attempt > 0 {
842                let jitter_ms =
843                    (delay.as_millis() as f64 * 0.1 * thread_rng().random::<f64>()) as u64;
844                delay + Duration::from_millis(jitter_ms)
845            } else {
846                delay
847            }
848        }
849    }
850
851    /// Error recovery context
852    #[derive(Debug)]
853    pub struct RecoveryContext {
854        pub original_error: DataError,
855        pub attempt: usize,
856        pub started_at: Instant,
857        pub last_attempt_at: Instant,
858    }
859
860    impl RecoveryContext {
861        /// Create a new recovery context
862        pub fn new(error: DataError) -> Self {
863            let now = Instant::now();
864            Self {
865                original_error: error,
866                attempt: 0,
867                started_at: now,
868                last_attempt_at: now,
869            }
870        }
871
872        /// Record a new attempt
873        pub fn next_attempt(&mut self) {
874            self.attempt += 1;
875            self.last_attempt_at = Instant::now();
876        }
877
878        /// Get total elapsed time
879        pub fn total_elapsed(&self) -> Duration {
880            self.started_at.elapsed()
881        }
882
883        /// Get time since last attempt
884        pub fn time_since_last_attempt(&self) -> Duration {
885            self.last_attempt_at.elapsed()
886        }
887    }
888
889    /// Automatic error recovery utility
890    pub fn retry_operation<T, F>(mut operation: F, strategy: &RetryStrategy) -> Result<T>
891    where
892        F: FnMut() -> Result<T>,
893    {
894        let mut last_error = None;
895
896        for attempt in 0..strategy.max_attempts {
897            match operation() {
898                Ok(result) => return Ok(result),
899                Err(error) => {
900                    if !error.is_recoverable() || attempt == strategy.max_attempts - 1 {
901                        return Err(error);
902                    }
903
904                    let delay = strategy.delay_for_attempt(attempt);
905                    std::thread::sleep(delay);
906                    last_error = Some(error);
907                }
908            }
909        }
910
911        Err(last_error.unwrap_or_else(|| {
912            DataError::Other("Retry operation failed without error".to_string())
913        }))
914    }
915
916    /// Async version of retry operation
917    #[cfg(feature = "async-support")]
918    pub async fn retry_operation_async<T, F, Fut>(
919        mut operation: F,
920        strategy: &RetryStrategy,
921    ) -> Result<T>
922    where
923        F: FnMut() -> Fut,
924        Fut: std::future::Future<Output = Result<T>>,
925    {
926        let mut last_error = None;
927
928        for attempt in 0..strategy.max_attempts {
929            match operation().await {
930                Ok(result) => return Ok(result),
931                Err(error) => {
932                    if !error.is_recoverable() || attempt == strategy.max_attempts - 1 {
933                        return Err(error);
934                    }
935
936                    let delay = strategy.delay_for_attempt(attempt);
937                    tokio::time::sleep(delay).await;
938                    last_error = Some(error);
939                }
940            }
941        }
942
943        Err(last_error.unwrap_or_else(|| {
944            DataError::Other("Async retry operation failed without error".to_string())
945        }))
946    }
947}
948
949/// Error diagnostics and debugging utilities
950pub mod diagnostics {
951    use super::*;
952    use std::collections::HashMap;
953
954    /// Error statistics collector
955    #[derive(Debug, Default)]
956    pub struct ErrorStatistics {
957        pub total_errors: usize,
958        pub error_counts: HashMap<String, usize>,
959        pub severity_counts: HashMap<ErrorSeverity, usize>,
960        pub recoverable_count: usize,
961        pub non_recoverable_count: usize,
962    }
963
964    impl ErrorStatistics {
965        /// Create a new error statistics collector
966        pub fn new() -> Self {
967            Self::default()
968        }
969
970        /// Record an error
971        pub fn record_error(&mut self, error: &DataError) {
972            self.total_errors += 1;
973
974            let error_type = match error {
975                DataError::Dataset { kind, .. } => format!("Dataset::{kind:?}"),
976                DataError::DataLoader { kind, .. } => format!("DataLoader::{kind:?}"),
977                DataError::Transform { kind, .. } => format!("Transform::{kind:?}"),
978                DataError::Sampler { kind, .. } => format!("Sampler::{kind:?}"),
979                DataError::Collation { kind, .. } => format!("Collation::{kind:?}"),
980                DataError::Io { kind, .. } => format!("Io::{kind:?}"),
981                DataError::Configuration { kind, .. } => format!("Configuration::{kind:?}"),
982                DataError::Resource { kind, .. } => format!("Resource::{kind:?}"),
983                DataError::Privacy { kind, .. } => format!("Privacy::{kind:?}"),
984                DataError::GpuError(_) => "GpuError".to_string(),
985                DataError::Other(_) => "Other".to_string(),
986            };
987
988            *self.error_counts.entry(error_type).or_insert(0) += 1;
989            *self.severity_counts.entry(error.severity()).or_insert(0) += 1;
990
991            if error.is_recoverable() {
992                self.recoverable_count += 1;
993            } else {
994                self.non_recoverable_count += 1;
995            }
996        }
997
998        /// Get the most common error type
999        pub fn most_common_error(&self) -> Option<(&String, &usize)> {
1000            self.error_counts.iter().max_by_key(|(_, count)| *count)
1001        }
1002
1003        /// Get error rate by severity
1004        pub fn error_rate_by_severity(&self, severity: ErrorSeverity) -> f64 {
1005            if self.total_errors == 0 {
1006                0.0
1007            } else {
1008                *self.severity_counts.get(&severity).unwrap_or(&0) as f64 / self.total_errors as f64
1009            }
1010        }
1011
1012        /// Get recovery rate
1013        pub fn recovery_rate(&self) -> f64 {
1014            if self.total_errors == 0 {
1015                0.0
1016            } else {
1017                self.recoverable_count as f64 / self.total_errors as f64
1018            }
1019        }
1020
1021        /// Generate a diagnostic report
1022        pub fn generate_report(&self) -> String {
1023            let mut report = "Error Statistics Report\n".to_string();
1024            report.push_str(&format!("Total Errors: {}\n", self.total_errors));
1025            report.push_str(&format!(
1026                "Recovery Rate: {:.2}%\n",
1027                self.recovery_rate() * 100.0
1028            ));
1029
1030            report.push_str("\nSeverity Breakdown:\n");
1031            for (severity, count) in &self.severity_counts {
1032                report.push_str(&format!(
1033                    "  {:?}: {} ({:.1}%)\n",
1034                    severity,
1035                    count,
1036                    (*count as f64 / self.total_errors as f64) * 100.0
1037                ));
1038            }
1039
1040            report.push_str("\nMost Common Errors:\n");
1041            let mut sorted_errors: Vec<_> = self.error_counts.iter().collect();
1042            sorted_errors.sort_by_key(|(_, count)| std::cmp::Reverse(**count));
1043
1044            for (error_type, count) in sorted_errors.iter().take(5) {
1045                report.push_str(&format!(
1046                    "  {}: {} ({:.1}%)\n",
1047                    error_type,
1048                    count,
1049                    (**count as f64 / self.total_errors as f64) * 100.0
1050                ));
1051            }
1052
1053            report
1054        }
1055    }
1056
1057    /// Error chain analyzer for debugging
1058    pub struct ErrorChainAnalyzer {
1059        errors: Vec<DataError>,
1060        max_chain_length: usize,
1061    }
1062
1063    impl ErrorChainAnalyzer {
1064        /// Create a new error chain analyzer
1065        pub fn new(max_chain_length: usize) -> Self {
1066            Self {
1067                errors: Vec::new(),
1068                max_chain_length,
1069            }
1070        }
1071
1072        /// Add an error to the chain
1073        pub fn add_error(&mut self, error: DataError) {
1074            if self.errors.len() >= self.max_chain_length {
1075                self.errors.remove(0);
1076            }
1077            self.errors.push(error);
1078        }
1079
1080        /// Analyze error patterns
1081        pub fn analyze_patterns(&self) -> Vec<String> {
1082            let mut patterns = Vec::new();
1083
1084            if self.errors.len() < 2 {
1085                return patterns;
1086            }
1087
1088            // Check for repeated error types
1089            let mut consecutive_same = 1;
1090            for i in 1..self.errors.len() {
1091                if std::mem::discriminant(&self.errors[i])
1092                    == std::mem::discriminant(&self.errors[i - 1])
1093                {
1094                    consecutive_same += 1;
1095                } else {
1096                    if consecutive_same > 2 {
1097                        patterns.push(format!("Repeated error type {consecutive_same} times"));
1098                    }
1099                    consecutive_same = 1;
1100                }
1101            }
1102
1103            // Check for patterns that continue until the end
1104            if consecutive_same > 2 {
1105                patterns.push(format!("Repeated error type {consecutive_same} times"));
1106            }
1107
1108            // Check for escalating severity
1109            let mut severity_escalating = true;
1110            for i in 1..self.errors.len() {
1111                let prev_severity = &self.errors[i - 1].severity();
1112                let curr_severity = &self.errors[i].severity();
1113
1114                match (prev_severity, curr_severity) {
1115                    (ErrorSeverity::Warning, ErrorSeverity::Error)
1116                    | (ErrorSeverity::Warning, ErrorSeverity::Critical)
1117                    | (ErrorSeverity::Error, ErrorSeverity::Critical) => {}
1118                    _ => {
1119                        severity_escalating = false;
1120                        break;
1121                    }
1122                }
1123            }
1124
1125            if severity_escalating && self.errors.len() > 2 {
1126                patterns.push("Error severity is escalating".to_string());
1127            }
1128
1129            patterns
1130        }
1131
1132        /// Get error chain summary
1133        pub fn chain_summary(&self) -> String {
1134            if self.errors.is_empty() {
1135                return "No errors in chain".to_string();
1136            }
1137
1138            let mut summary = format!("Error Chain ({} errors):\n", self.errors.len());
1139
1140            for (i, error) in self.errors.iter().enumerate() {
1141                summary.push_str(&format!(
1142                    "  {}. {:?} - {}\n",
1143                    i + 1,
1144                    error.severity(),
1145                    error
1146                ));
1147            }
1148
1149            let patterns = self.analyze_patterns();
1150            if !patterns.is_empty() {
1151                summary.push_str("\nPatterns Detected:\n");
1152                for pattern in patterns {
1153                    summary.push_str(&format!("  - {pattern}\n"));
1154                }
1155            }
1156
1157            summary
1158        }
1159    }
1160}
1161
1162/// Common error patterns and utilities
1163pub mod patterns {
1164    use super::*;
1165
1166    /// Create index out of bounds error with helpful suggestions
1167    pub fn index_out_of_bounds(index: usize, len: usize) -> DataError {
1168        DataError::dataset(
1169            DatasetErrorKind::IndexOutOfBounds,
1170            format!("Index {index} is out of bounds for dataset of length {len}"),
1171        )
1172        .with_suggestion(format!("Valid indices are 0 to {}", len.saturating_sub(1)))
1173    }
1174
1175    /// Create shape mismatch error with detailed information
1176    pub fn shape_mismatch(expected: &[usize], actual: &[usize], context: &str) -> DataError {
1177        DataError::transform(
1178            TransformErrorKind::IncompatibleDimensions,
1179            context,
1180            format!("Expected shape {expected:?}, got {actual:?}"),
1181        )
1182        .with_suggestion("Ensure input tensors have compatible shapes for the operation")
1183    }
1184
1185    /// Create configuration validation error
1186    pub fn invalid_config<T: fmt::Display>(param: &str, value: T, reason: &str) -> DataError {
1187        DataError::config(
1188            ConfigErrorKind::InvalidValue,
1189            param,
1190            value.to_string(),
1191            reason,
1192        )
1193    }
1194
1195    /// Create memory exhaustion error with resource information
1196    pub fn memory_exhausted(requested: usize, available: Option<usize>) -> DataError {
1197        let mut error = DataError::resource(
1198            ResourceErrorKind::MemoryExhaustion,
1199            "memory",
1200            format!("Requested {requested} bytes"),
1201        )
1202        .with_resource_info(requested, available);
1203
1204        if let Some(avail) = available {
1205            error = error.with_suggestion(format!(
1206                "Reduce batch size or dataset size. {avail} bytes available, {requested} bytes requested"
1207            ));
1208        } else {
1209            error = error.with_suggestion("Reduce batch size or dataset size");
1210        }
1211
1212        error
1213    }
1214
1215    /// Create file not found error with search suggestions
1216    pub fn file_not_found(path: &str, search_paths: &[String]) -> DataError {
1217        let mut error = DataError::io(
1218            IoErrorKind::FileNotFound,
1219            "file access",
1220            format!("File not found: {path}"),
1221        )
1222        .with_path(path);
1223
1224        if !search_paths.is_empty() {
1225            error = error.with_suggestion(format!(
1226                "Check file exists and path is correct. Searched in: {}",
1227                search_paths.join(", ")
1228            ));
1229        }
1230
1231        error
1232    }
1233}
1234
1235#[cfg(test)]
1236mod tests {
1237    use super::*;
1238    use std::time::Duration;
1239
1240    #[test]
1241    fn test_error_creation() {
1242        let error = DataError::dataset(
1243            DatasetErrorKind::IndexOutOfBounds,
1244            "Index 10 out of bounds for dataset of size 5",
1245        )
1246        .with_suggestion("Use valid index between 0-4");
1247
1248        assert!(error.is_recoverable());
1249        assert_eq!(error.severity(), ErrorSeverity::Warning);
1250
1251        let error_str = error.to_string();
1252        assert!(error_str.contains("IndexOutOfBounds"));
1253        assert!(error_str.contains("Suggestion"));
1254    }
1255
1256    #[test]
1257    fn test_patterns() {
1258        let error = patterns::index_out_of_bounds(10, 5);
1259        assert!(error.to_string().contains("Index 10 is out of bounds"));
1260
1261        let error = patterns::shape_mismatch(&[3, 224, 224], &[3, 256, 256], "resize");
1262        assert!(error.to_string().contains("Expected shape"));
1263
1264        let error = patterns::invalid_config("batch_size", -1, "Must be positive");
1265        assert!(error.to_string().contains("batch_size"));
1266    }
1267
1268    #[test]
1269    fn test_error_context() {
1270        let context = ErrorContext::new("loading dataset")
1271            .with_detail("path: /data/train.csv")
1272            .with_detail("format: CSV");
1273
1274        let context_str = context.build_context();
1275        assert!(context_str.contains("loading dataset"));
1276        assert!(context_str.contains("path: /data/train.csv"));
1277    }
1278
1279    #[test]
1280    fn test_batch_info() {
1281        let batch_info = BatchInfo::new(32)
1282            .with_shape(vec![3, 224, 224])
1283            .with_type("f32".to_string());
1284
1285        assert_eq!(batch_info.batch_size, 32);
1286        assert_eq!(batch_info.item_shapes.len(), 1);
1287        assert_eq!(batch_info.item_types.len(), 1);
1288    }
1289
1290    #[test]
1291    fn test_retry_strategy() {
1292        let strategy = recovery::RetryStrategy::new(3)
1293            .with_base_delay(Duration::from_millis(10))
1294            .with_backoff_multiplier(2.0);
1295
1296        assert_eq!(strategy.max_attempts, 3);
1297        assert_eq!(strategy.base_delay, Duration::from_millis(10));
1298
1299        let delay0 = strategy.delay_for_attempt(0);
1300        let delay1 = strategy.delay_for_attempt(1);
1301        assert!(delay1 >= delay0); // Should increase with backoff
1302    }
1303
1304    #[test]
1305    fn test_retry_operation() {
1306        let mut attempt = 0;
1307        let strategy = recovery::RetryStrategy::new(3).with_base_delay(Duration::from_millis(1));
1308
1309        // Test successful retry
1310        let result = recovery::retry_operation(
1311            || {
1312                attempt += 1;
1313                if attempt < 3 {
1314                    Err(DataError::dataloader(
1315                        DataLoaderErrorKind::Timeout,
1316                        "Connection timeout",
1317                    ))
1318                } else {
1319                    Ok(42)
1320                }
1321            },
1322            &strategy,
1323        );
1324
1325        assert_eq!(result.unwrap(), 42);
1326        assert_eq!(attempt, 3);
1327    }
1328
1329    #[test]
1330    fn test_error_statistics() {
1331        let mut stats = diagnostics::ErrorStatistics::new();
1332
1333        let error1 = DataError::dataset(DatasetErrorKind::IndexOutOfBounds, "test");
1334        let error2 = DataError::dataset(DatasetErrorKind::IndexOutOfBounds, "test");
1335        let error3 = DataError::dataloader(DataLoaderErrorKind::Timeout, "test");
1336
1337        stats.record_error(&error1);
1338        stats.record_error(&error2);
1339        stats.record_error(&error3);
1340
1341        assert_eq!(stats.total_errors, 3);
1342        assert_eq!(stats.recoverable_count, 3); // All these errors are recoverable
1343
1344        let report = stats.generate_report();
1345        assert!(report.contains("Total Errors: 3"));
1346        assert!(report.contains("Recovery Rate"));
1347    }
1348
1349    #[test]
1350    fn test_error_chain_analyzer() {
1351        let mut analyzer = diagnostics::ErrorChainAnalyzer::new(5);
1352
1353        let error1 = DataError::dataset(DatasetErrorKind::IndexOutOfBounds, "test");
1354        let error2 = DataError::dataset(DatasetErrorKind::IndexOutOfBounds, "test");
1355        let error3 = DataError::dataset(DatasetErrorKind::IndexOutOfBounds, "test");
1356        let error4 = DataError::dataset(DatasetErrorKind::CorruptedData, "test");
1357
1358        analyzer.add_error(error1);
1359        analyzer.add_error(error2);
1360        analyzer.add_error(error3);
1361        analyzer.add_error(error4);
1362
1363        let patterns = analyzer.analyze_patterns();
1364        assert!(!patterns.is_empty());
1365
1366        let summary = analyzer.chain_summary();
1367        assert!(summary.contains("Error Chain"));
1368    }
1369
1370    #[test]
1371    fn test_recovery_context() {
1372        let error = DataError::dataset(DatasetErrorKind::IndexOutOfBounds, "test");
1373        let mut context = recovery::RecoveryContext::new(error);
1374
1375        assert_eq!(context.attempt, 0);
1376
1377        context.next_attempt();
1378        assert_eq!(context.attempt, 1);
1379
1380        let elapsed = context.total_elapsed();
1381        assert!(elapsed.as_millis() > 0 || elapsed.as_millis() == 0); // Duration is always valid
1382    }
1383}