memscope_rs/error/
recovery.rs

1use super::{ErrorKind, ErrorSeverity, MemScopeError};
2use std::collections::HashMap;
3use std::time::{Duration, Instant};
4
5/// Comprehensive recovery strategy system
6pub struct RecoveryStrategy {
7    /// Recovery actions by error kind
8    action_map: HashMap<ErrorKind, RecoveryAction>,
9    /// Retry configuration
10    retry_config: RetryConfig,
11    /// Fallback mechanisms
12    fallback_registry: FallbackRegistry,
13    /// Circuit breaker for preventing cascade failures
14    circuit_breaker: CircuitBreaker,
15}
16
17/// Specific recovery actions for different error types
18#[derive(Debug, Clone)]
19pub enum RecoveryAction {
20    /// Retry operation with exponential backoff
21    RetryWithBackoff {
22        max_attempts: u32,
23        initial_delay: Duration,
24        max_delay: Duration,
25        backoff_multiplier: f64,
26    },
27    /// Switch to alternative implementation
28    Fallback {
29        strategy: FallbackStrategy,
30        timeout: Duration,
31    },
32    /// Gracefully degrade functionality
33    Degrade {
34        level: DegradationLevel,
35        duration: Duration,
36    },
37    /// Reset component state
38    Reset {
39        component: String,
40        preserve_data: bool,
41    },
42    /// Skip operation and continue
43    Skip,
44    /// Terminate operation safely
45    Terminate,
46}
47
48/// Types of fallback strategies
49#[derive(Debug, Clone)]
50pub enum FallbackStrategy {
51    /// Use cached data instead of live computation
52    UseCache,
53    /// Use simplified algorithm
54    SimplifiedAlgorithm,
55    /// Use mock/default data
56    MockData,
57    /// Delegate to backup system
58    BackupSystem,
59}
60
61/// Levels of functionality degradation
62#[derive(Debug, Clone, PartialEq, PartialOrd)]
63pub enum DegradationLevel {
64    /// Minimal impact, reduce precision
65    Minimal,
66    /// Moderate impact, disable non-essential features
67    Moderate,
68    /// Significant impact, basic functionality only
69    Significant,
70    /// Severe impact, emergency mode
71    Severe,
72}
73
74/// Retry configuration parameters
75#[derive(Debug, Clone)]
76pub struct RetryConfig {
77    /// Default maximum retry attempts
78    pub default_max_attempts: u32,
79    /// Default initial retry delay
80    pub default_initial_delay: Duration,
81    /// Default maximum retry delay
82    pub default_max_delay: Duration,
83    /// Default backoff multiplier
84    pub default_backoff_multiplier: f64,
85    /// Whether to add jitter to retry delays
86    pub enable_jitter: bool,
87}
88
89/// Registry of fallback mechanisms
90pub struct FallbackRegistry {
91    /// Available fallback strategies by name
92    strategies: HashMap<String, Box<dyn Fn() -> Result<(), MemScopeError> + Send + Sync>>,
93}
94
95/// Circuit breaker pattern implementation
96pub struct CircuitBreaker {
97    /// Current state of the circuit breaker
98    state: CircuitState,
99    /// Failure count in current window
100    failure_count: u32,
101    /// Threshold for opening circuit
102    failure_threshold: u32,
103    /// Time when circuit was opened
104    opened_at: Option<Instant>,
105    /// How long to wait before trying again
106    timeout: Duration,
107    /// Time window for counting failures
108    window_duration: Duration,
109    /// When current window started
110    window_start: Instant,
111}
112
113/// Circuit breaker states
114#[derive(Debug, Clone, PartialEq)]
115pub enum CircuitState {
116    /// Normal operation
117    Closed,
118    /// Failing, stop trying
119    Open,
120    /// Testing if service is back
121    HalfOpen,
122}
123
124impl RecoveryStrategy {
125    /// Create new recovery strategy with default configuration
126    pub fn new() -> Self {
127        let mut strategy = Self {
128            action_map: HashMap::new(),
129            retry_config: RetryConfig::default(),
130            fallback_registry: FallbackRegistry::new(),
131            circuit_breaker: CircuitBreaker::new(),
132        };
133
134        strategy.setup_default_actions();
135        strategy
136    }
137
138    /// Execute recovery action for given error
139    pub fn recover(&mut self, error: &MemScopeError) -> RecoveryResult {
140        // Check circuit breaker first
141        if !self.circuit_breaker.can_execute() {
142            return RecoveryResult::CircuitOpen;
143        }
144
145        // Get appropriate recovery action
146        let action = self.get_recovery_action(error);
147
148        // Execute recovery action
149        let result = self.execute_action(action, error);
150
151        // Update circuit breaker based on result
152        match &result {
153            RecoveryResult::Success => self.circuit_breaker.record_success(),
154            RecoveryResult::Failed(_) => self.circuit_breaker.record_failure(),
155            _ => {} // Other results don't affect circuit breaker
156        }
157
158        result
159    }
160
161    /// Register custom recovery action for error kind
162    pub fn register_action(&mut self, kind: ErrorKind, action: RecoveryAction) {
163        self.action_map.insert(kind, action);
164    }
165
166    /// Register fallback strategy
167    pub fn register_fallback<F>(&mut self, name: String, strategy: F)
168    where
169        F: Fn() -> Result<(), MemScopeError> + Send + Sync + 'static,
170    {
171        self.fallback_registry.register(name, Box::new(strategy));
172    }
173
174    /// Get circuit breaker status
175    pub fn get_circuit_status(&self) -> CircuitState {
176        self.circuit_breaker.state.clone()
177    }
178
179    /// Force circuit breaker to reset
180    pub fn reset_circuit(&mut self) {
181        self.circuit_breaker.reset();
182    }
183
184    fn setup_default_actions(&mut self) {
185        // Memory errors: retry with backoff
186        self.action_map.insert(
187            ErrorKind::MemoryError,
188            RecoveryAction::RetryWithBackoff {
189                max_attempts: 3,
190                initial_delay: Duration::from_millis(100),
191                max_delay: Duration::from_secs(5),
192                backoff_multiplier: 2.0,
193            },
194        );
195
196        // Configuration errors: reset and retry
197        self.action_map.insert(
198            ErrorKind::ConfigurationError,
199            RecoveryAction::Reset {
200                component: "configuration".to_string(),
201                preserve_data: false,
202            },
203        );
204
205        // I/O errors: fallback to cache
206        self.action_map.insert(
207            ErrorKind::IoError,
208            RecoveryAction::Fallback {
209                strategy: FallbackStrategy::UseCache,
210                timeout: Duration::from_secs(30),
211            },
212        );
213
214        // Symbol resolution errors: degrade gracefully
215        self.action_map.insert(
216            ErrorKind::SymbolResolutionError,
217            RecoveryAction::Degrade {
218                level: DegradationLevel::Minimal,
219                duration: Duration::from_secs(60),
220            },
221        );
222
223        // Stack trace errors: skip and continue
224        self.action_map
225            .insert(ErrorKind::StackTraceError, RecoveryAction::Skip);
226
227        // Cache errors: reset cache
228        self.action_map.insert(
229            ErrorKind::CacheError,
230            RecoveryAction::Reset {
231                component: "cache".to_string(),
232                preserve_data: false,
233            },
234        );
235
236        // Fatal errors: terminate safely
237        self.action_map
238            .insert(ErrorKind::InternalError, RecoveryAction::Terminate);
239    }
240
241    fn get_recovery_action(&self, error: &MemScopeError) -> RecoveryAction {
242        // Check for registered action
243        if let Some(action) = self.action_map.get(&error.kind) {
244            return action.clone();
245        }
246
247        // Fallback based on severity
248        match error.severity {
249            ErrorSeverity::Warning => RecoveryAction::Skip,
250            ErrorSeverity::Error => RecoveryAction::RetryWithBackoff {
251                max_attempts: self.retry_config.default_max_attempts,
252                initial_delay: self.retry_config.default_initial_delay,
253                max_delay: self.retry_config.default_max_delay,
254                backoff_multiplier: self.retry_config.default_backoff_multiplier,
255            },
256            ErrorSeverity::Critical => RecoveryAction::Fallback {
257                strategy: FallbackStrategy::MockData,
258                timeout: Duration::from_secs(10),
259            },
260            ErrorSeverity::Fatal => RecoveryAction::Terminate,
261        }
262    }
263
264    fn execute_action(&mut self, action: RecoveryAction, error: &MemScopeError) -> RecoveryResult {
265        match action {
266            RecoveryAction::RetryWithBackoff { .. } => RecoveryResult::Retry {
267                action,
268                delay: self.calculate_retry_delay(error),
269            },
270            RecoveryAction::Fallback { strategy, .. } => {
271                if let Ok(()) = self.execute_fallback(&strategy) {
272                    RecoveryResult::Success
273                } else {
274                    RecoveryResult::Failed("Fallback strategy failed".to_string())
275                }
276            }
277            RecoveryAction::Degrade { level, duration } => {
278                RecoveryResult::Degraded { level, duration }
279            }
280            RecoveryAction::Reset {
281                component,
282                preserve_data,
283            } => RecoveryResult::Reset {
284                component,
285                preserve_data,
286            },
287            RecoveryAction::Skip => RecoveryResult::Skipped,
288            RecoveryAction::Terminate => RecoveryResult::Terminated,
289        }
290    }
291
292    fn calculate_retry_delay(&self, _error: &MemScopeError) -> Duration {
293        // Simple implementation - could be made more sophisticated
294        self.retry_config.default_initial_delay
295    }
296
297    fn execute_fallback(&self, strategy: &FallbackStrategy) -> Result<(), Box<MemScopeError>> {
298        match strategy {
299            FallbackStrategy::UseCache => {
300                // Implementation would check cache availability
301                Ok(())
302            }
303            FallbackStrategy::SimplifiedAlgorithm => {
304                // Implementation would switch to simpler algorithm
305                Ok(())
306            }
307            FallbackStrategy::MockData => {
308                // Implementation would return mock data
309                Ok(())
310            }
311            FallbackStrategy::BackupSystem => {
312                // Implementation would delegate to backup
313                Ok(())
314            }
315        }
316    }
317}
318
319/// Result of recovery action execution
320#[derive(Debug, Clone)]
321pub enum RecoveryResult {
322    /// Recovery successful, continue normal operation
323    Success,
324    /// Should retry with specified action and delay
325    Retry {
326        action: RecoveryAction,
327        delay: Duration,
328    },
329    /// Operation degraded to specified level
330    Degraded {
331        level: DegradationLevel,
332        duration: Duration,
333    },
334    /// Component was reset
335    Reset {
336        component: String,
337        preserve_data: bool,
338    },
339    /// Operation was skipped
340    Skipped,
341    /// Operation was terminated
342    Terminated,
343    /// Circuit breaker is open, operation blocked
344    CircuitOpen,
345    /// Recovery failed
346    Failed(String),
347}
348
349impl Default for CircuitBreaker {
350    fn default() -> Self {
351        Self::new()
352    }
353}
354
355impl CircuitBreaker {
356    /// Create new circuit breaker with default configuration
357    pub fn new() -> Self {
358        Self {
359            state: CircuitState::Closed,
360            failure_count: 0,
361            failure_threshold: 5,
362            opened_at: None,
363            timeout: Duration::from_secs(60),
364            window_duration: Duration::from_secs(60),
365            window_start: Instant::now(),
366        }
367    }
368
369    /// Check if operation can be executed
370    pub fn can_execute(&mut self) -> bool {
371        self.update_state();
372
373        match self.state {
374            CircuitState::Closed => true,
375            CircuitState::Open => false,
376            CircuitState::HalfOpen => true,
377        }
378    }
379
380    /// Record successful operation
381    pub fn record_success(&mut self) {
382        match self.state {
383            CircuitState::HalfOpen => {
384                self.state = CircuitState::Closed;
385                self.failure_count = 0;
386            }
387            CircuitState::Closed => {
388                // Reset failure count on success
389                self.failure_count = 0;
390            }
391            CircuitState::Open => {} // Shouldn't happen
392        }
393    }
394
395    /// Record failed operation
396    pub fn record_failure(&mut self) {
397        self.failure_count += 1;
398
399        if self.failure_count >= self.failure_threshold {
400            self.state = CircuitState::Open;
401            self.opened_at = Some(Instant::now());
402        }
403    }
404
405    /// Force reset to closed state
406    pub fn reset(&mut self) {
407        self.state = CircuitState::Closed;
408        self.failure_count = 0;
409        self.opened_at = None;
410        self.window_start = Instant::now();
411    }
412
413    fn update_state(&mut self) {
414        // Reset window if expired
415        if self.window_start.elapsed() > self.window_duration {
416            self.window_start = Instant::now();
417            self.failure_count = 0;
418        }
419
420        // Check if we should transition from Open to HalfOpen
421        if self.state == CircuitState::Open {
422            if let Some(opened_at) = self.opened_at {
423                if opened_at.elapsed() > self.timeout {
424                    self.state = CircuitState::HalfOpen;
425                }
426            }
427        }
428    }
429}
430
431impl Default for FallbackRegistry {
432    fn default() -> Self {
433        Self::new()
434    }
435}
436
437impl FallbackRegistry {
438    pub fn new() -> Self {
439        Self {
440            strategies: HashMap::new(),
441        }
442    }
443
444    pub fn register<F>(&mut self, name: String, strategy: F)
445    where
446        F: Fn() -> Result<(), MemScopeError> + Send + Sync + 'static,
447    {
448        self.strategies.insert(name, Box::new(strategy));
449    }
450
451    pub fn execute(&self, name: &str) -> Result<(), Box<MemScopeError>> {
452        if let Some(strategy) = self.strategies.get(name) {
453            strategy().map_err(Box::new)
454        } else {
455            Err(Box::new(MemScopeError::new(
456                ErrorKind::ConfigurationError,
457                &format!("Fallback strategy '{}' not found", name),
458            )))
459        }
460    }
461}
462
463impl Default for RetryConfig {
464    fn default() -> Self {
465        Self {
466            default_max_attempts: 3,
467            default_initial_delay: Duration::from_millis(100),
468            default_max_delay: Duration::from_secs(10),
469            default_backoff_multiplier: 2.0,
470            enable_jitter: true,
471        }
472    }
473}
474
475impl Default for RecoveryStrategy {
476    fn default() -> Self {
477        Self::new()
478    }
479}
480
481#[cfg(test)]
482mod tests {
483    use super::*;
484
485    #[test]
486    fn test_recovery_strategy_creation() {
487        let strategy = RecoveryStrategy::new();
488        assert!(strategy.action_map.contains_key(&ErrorKind::MemoryError));
489        assert!(strategy
490            .action_map
491            .contains_key(&ErrorKind::ConfigurationError));
492    }
493
494    #[test]
495    fn test_circuit_breaker_basic() {
496        let mut breaker = CircuitBreaker::new();
497
498        // Should start closed
499        assert!(breaker.can_execute());
500        assert_eq!(breaker.state, CircuitState::Closed);
501
502        // Record failures to open circuit
503        for _ in 0..5 {
504            breaker.record_failure();
505        }
506
507        assert!(!breaker.can_execute());
508        assert_eq!(breaker.state, CircuitState::Open);
509    }
510
511    #[test]
512    fn test_recovery_action_selection() {
513        let mut strategy = RecoveryStrategy::new();
514
515        let memory_error = MemScopeError::new(ErrorKind::MemoryError, "allocation failed");
516        let result = strategy.recover(&memory_error);
517
518        match result {
519            RecoveryResult::Retry { .. } => {} // Expected
520            _ => panic!("Expected retry for memory error"),
521        }
522    }
523
524    #[test]
525    fn test_fallback_registry() {
526        let mut registry = FallbackRegistry::new();
527
528        registry.register("test_fallback".to_string(), || Ok(()));
529
530        assert!(registry.execute("test_fallback").is_ok());
531        assert!(registry.execute("nonexistent").is_err());
532    }
533
534    #[test]
535    fn test_degradation_levels() {
536        let levels = [
537            DegradationLevel::Minimal,
538            DegradationLevel::Moderate,
539            DegradationLevel::Significant,
540            DegradationLevel::Severe,
541        ];
542
543        // Test ordering
544        assert!(levels[0] < levels[1]);
545        assert!(levels[1] < levels[2]);
546        assert!(levels[2] < levels[3]);
547    }
548}