mockforge_chaos/
auto_remediation.rs

1//! Auto-remediation engine for chaos recommendations
2//!
3//! Automatically applies low-risk chaos recommendations with safety checks,
4//! rollback mechanisms, and approval workflows.
5
6use crate::recommendations::{Recommendation, RecommendationCategory, RecommendationSeverity};
7use chrono::{DateTime, Duration, Utc};
8use parking_lot::RwLock;
9use serde::{Deserialize, Serialize};
10use std::collections::{HashMap, VecDeque};
11use std::sync::Arc;
12use uuid::Uuid;
13
14/// Auto-remediation configuration
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct RemediationConfig {
17    /// Enable auto-remediation
18    pub enabled: bool,
19    /// Auto-apply recommendations with severity at or below this level
20    pub max_auto_severity: RecommendationSeverity,
21    /// Require manual approval for these categories
22    pub require_approval_categories: Vec<RecommendationCategory>,
23    /// Maximum concurrent remediations
24    pub max_concurrent: usize,
25    /// Cooldown period between remediations (minutes)
26    pub cooldown_minutes: i64,
27    /// Auto-rollback on failure
28    pub auto_rollback: bool,
29    /// Dry-run mode (don't actually apply)
30    pub dry_run: bool,
31    /// Maximum retries on failure
32    pub max_retries: u32,
33}
34
35impl Default for RemediationConfig {
36    fn default() -> Self {
37        Self {
38            enabled: false, // Disabled by default for safety
39            max_auto_severity: RecommendationSeverity::Low,
40            require_approval_categories: vec![
41                RecommendationCategory::FaultInjection,
42                RecommendationCategory::CircuitBreaker,
43            ],
44            max_concurrent: 1,
45            cooldown_minutes: 30,
46            auto_rollback: true,
47            dry_run: false,
48            max_retries: 3,
49        }
50    }
51}
52
53/// Remediation action status
54#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
55#[serde(rename_all = "lowercase")]
56pub enum RemediationStatus {
57    Pending,
58    AwaitingApproval,
59    Approved,
60    Rejected,
61    Applying,
62    Applied,
63    Failed,
64    RolledBack,
65    Cancelled,
66}
67
68/// Applied remediation action
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct RemediationAction {
71    /// Unique action ID
72    pub id: String,
73    /// Source recommendation ID
74    pub recommendation_id: String,
75    /// Status
76    pub status: RemediationStatus,
77    /// Created at
78    pub created_at: DateTime<Utc>,
79    /// Applied at (if applied)
80    pub applied_at: Option<DateTime<Utc>>,
81    /// Completed at (success or failure)
82    pub completed_at: Option<DateTime<Utc>>,
83    /// Applied configuration changes
84    pub config_changes: HashMap<String, String>,
85    /// Rollback data (to restore previous state)
86    pub rollback_data: Option<RollbackData>,
87    /// Execution logs
88    pub logs: Vec<String>,
89    /// Success indicator
90    pub success: bool,
91    /// Error message (if failed)
92    pub error: Option<String>,
93    /// Retry count
94    pub retry_count: u32,
95    /// Approved by (if approval was required)
96    pub approved_by: Option<String>,
97    /// Approval timestamp
98    pub approved_at: Option<DateTime<Utc>>,
99}
100
101/// Rollback data to restore previous state
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct RollbackData {
104    pub previous_config: HashMap<String, String>,
105    pub restore_commands: Vec<String>,
106    pub created_at: DateTime<Utc>,
107}
108
109/// Remediation result
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct RemediationResult {
112    pub action_id: String,
113    pub success: bool,
114    pub message: String,
115    pub applied_changes: Vec<String>,
116    pub duration_ms: u64,
117}
118
119/// Remediation effectiveness metrics
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct EffectivenessMetrics {
122    pub recommendation_id: String,
123    pub action_id: String,
124    /// Metrics before remediation
125    pub before_metrics: SystemMetrics,
126    /// Metrics after remediation
127    pub after_metrics: SystemMetrics,
128    /// Improvement score (0.0 - 1.0, higher is better)
129    pub improvement_score: f64,
130    /// Measurement period
131    pub measurement_period_hours: i64,
132    pub measured_at: DateTime<Utc>,
133}
134
135/// System metrics for effectiveness comparison
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct SystemMetrics {
138    pub error_rate: f64,
139    pub avg_latency_ms: f64,
140    pub p95_latency_ms: f64,
141    pub p99_latency_ms: f64,
142    pub success_rate: f64,
143    pub chaos_impact: f64,
144    pub resilience_score: f64,
145}
146
147/// Approval request
148#[derive(Debug, Clone, Serialize, Deserialize)]
149pub struct ApprovalRequest {
150    pub action_id: String,
151    pub recommendation: Recommendation,
152    pub proposed_changes: HashMap<String, String>,
153    pub risk_assessment: RiskAssessment,
154    pub created_at: DateTime<Utc>,
155    pub expires_at: DateTime<Utc>,
156}
157
158/// Risk assessment for remediation
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct RiskAssessment {
161    pub risk_level: RiskLevel,
162    pub impact_scope: Vec<String>,
163    pub reversible: bool,
164    pub estimated_downtime_ms: u64,
165    pub safety_checks: Vec<SafetyCheck>,
166}
167
168/// Risk level
169#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
170#[serde(rename_all = "lowercase")]
171pub enum RiskLevel {
172    Minimal,
173    Low,
174    Medium,
175    High,
176    Critical,
177}
178
179/// Safety check
180#[derive(Debug, Clone, Serialize, Deserialize)]
181pub struct SafetyCheck {
182    pub name: String,
183    pub passed: bool,
184    pub message: String,
185}
186
187/// Auto-remediation engine
188pub struct RemediationEngine {
189    config: Arc<RwLock<RemediationConfig>>,
190    actions: Arc<RwLock<HashMap<String, RemediationAction>>>,
191    effectiveness_metrics: Arc<RwLock<HashMap<String, EffectivenessMetrics>>>,
192    approval_queue: Arc<RwLock<VecDeque<ApprovalRequest>>>,
193    action_history: Arc<RwLock<VecDeque<RemediationAction>>>,
194    max_history: usize,
195}
196
197impl RemediationEngine {
198    /// Create a new remediation engine
199    pub fn new() -> Self {
200        Self::with_config(RemediationConfig::default())
201    }
202
203    /// Create with custom configuration
204    pub fn with_config(config: RemediationConfig) -> Self {
205        Self {
206            config: Arc::new(RwLock::new(config)),
207            actions: Arc::new(RwLock::new(HashMap::new())),
208            effectiveness_metrics: Arc::new(RwLock::new(HashMap::new())),
209            approval_queue: Arc::new(RwLock::new(VecDeque::new())),
210            action_history: Arc::new(RwLock::new(VecDeque::new())),
211            max_history: 1000,
212        }
213    }
214
215    /// Update configuration
216    pub fn update_config(&self, config: RemediationConfig) {
217        let mut cfg = self.config.write();
218        *cfg = config;
219    }
220
221    /// Get current configuration
222    pub fn get_config(&self) -> RemediationConfig {
223        self.config.read().clone()
224    }
225
226    /// Process a recommendation for auto-remediation
227    pub fn process_recommendation(
228        &self,
229        recommendation: &Recommendation,
230    ) -> Result<String, String> {
231        let config = self.config.read().clone();
232
233        if !config.enabled {
234            return Err("Auto-remediation is disabled".to_string());
235        }
236
237        // Check cooldown
238        if !self.check_cooldown(&config) {
239            return Err("Cooldown period not elapsed".to_string());
240        }
241
242        // Check concurrent limit
243        if !self.check_concurrent_limit(&config) {
244            return Err("Maximum concurrent remediations reached".to_string());
245        }
246
247        // Assess risk
248        let risk_assessment = self.assess_risk(recommendation);
249
250        // Determine if approval is needed
251        let needs_approval = self.needs_approval(recommendation, &config, &risk_assessment);
252
253        // Create remediation action
254        let action = self.create_action(recommendation, risk_assessment.clone());
255        let action_id = action.id.clone();
256
257        // Store action
258        {
259            let mut actions = self.actions.write();
260            actions.insert(action_id.clone(), action.clone());
261        }
262
263        if needs_approval {
264            // Queue for approval
265            self.queue_for_approval(action_id.clone(), recommendation.clone(), risk_assessment);
266            self.update_action_status(&action_id, RemediationStatus::AwaitingApproval);
267            Ok(format!("Action {} queued for approval", action_id))
268        } else {
269            // Auto-apply
270            self.apply_action(&action_id)?;
271            Ok(format!("Action {} applied successfully", action_id))
272        }
273    }
274
275    /// Create a remediation action from recommendation
276    fn create_action(
277        &self,
278        recommendation: &Recommendation,
279        _risk_assessment: RiskAssessment,
280    ) -> RemediationAction {
281        let config_changes = self.extract_config_changes(recommendation);
282        let rollback_data = self.create_rollback_data(&config_changes);
283
284        RemediationAction {
285            id: format!("action-{}", Uuid::new_v4()),
286            recommendation_id: recommendation.id.clone(),
287            status: RemediationStatus::Pending,
288            created_at: Utc::now(),
289            applied_at: None,
290            completed_at: None,
291            config_changes,
292            rollback_data: Some(rollback_data),
293            logs: vec![format!(
294                "Action created from recommendation: {}",
295                recommendation.title
296            )],
297            success: false,
298            error: None,
299            retry_count: 0,
300            approved_by: None,
301            approved_at: None,
302        }
303    }
304
305    /// Extract configuration changes from recommendation
306    fn extract_config_changes(&self, recommendation: &Recommendation) -> HashMap<String, String> {
307        let mut changes = HashMap::new();
308
309        // Parse recommendation action to determine config changes
310        match recommendation.category {
311            RecommendationCategory::Latency => {
312                if let Some(ref example) = recommendation.example {
313                    if let Some(latency) = self.extract_latency_value(example) {
314                        changes.insert("chaos_latency_ms".to_string(), latency.to_string());
315                    }
316                }
317            }
318            RecommendationCategory::FaultInjection => {
319                changes.insert("chaos_fault_probability".to_string(), "0.3".to_string());
320            }
321            RecommendationCategory::RateLimit => {
322                changes.insert("chaos_rate_limit".to_string(), "100".to_string());
323            }
324            _ => {}
325        }
326
327        changes
328    }
329
330    /// Extract latency value from example command
331    fn extract_latency_value(&self, example: &str) -> Option<u64> {
332        // Parse: --chaos-latency-ms 1500
333        example
334            .split_whitespace()
335            .position(|s| s == "--chaos-latency-ms")
336            .and_then(|i| example.split_whitespace().nth(i + 1))
337            .and_then(|v| v.parse().ok())
338    }
339
340    /// Create rollback data
341    fn create_rollback_data(&self, config_changes: &HashMap<String, String>) -> RollbackData {
342        // In a real implementation, this would capture current config values
343        let mut previous_config = HashMap::new();
344        for key in config_changes.keys() {
345            previous_config.insert(key.clone(), "default".to_string());
346        }
347
348        RollbackData {
349            previous_config,
350            restore_commands: vec!["mockforge serve --reset-chaos".to_string()],
351            created_at: Utc::now(),
352        }
353    }
354
355    /// Assess risk of applying recommendation
356    fn assess_risk(&self, recommendation: &Recommendation) -> RiskAssessment {
357        let risk_level = match recommendation.severity {
358            RecommendationSeverity::Info => RiskLevel::Minimal,
359            RecommendationSeverity::Low => RiskLevel::Low,
360            RecommendationSeverity::Medium => RiskLevel::Medium,
361            RecommendationSeverity::High => RiskLevel::High,
362            RecommendationSeverity::Critical => RiskLevel::Critical,
363        };
364
365        let safety_checks = vec![
366            SafetyCheck {
367                name: "configuration_valid".to_string(),
368                passed: true,
369                message: "Configuration changes are valid".to_string(),
370            },
371            SafetyCheck {
372                name: "rollback_available".to_string(),
373                passed: true,
374                message: "Rollback mechanism available".to_string(),
375            },
376        ];
377
378        RiskAssessment {
379            risk_level,
380            impact_scope: recommendation.affected_endpoints.clone(),
381            reversible: true,
382            estimated_downtime_ms: 0,
383            safety_checks,
384        }
385    }
386
387    /// Check if recommendation needs approval
388    fn needs_approval(
389        &self,
390        recommendation: &Recommendation,
391        config: &RemediationConfig,
392        risk: &RiskAssessment,
393    ) -> bool {
394        // Require approval if severity is above threshold
395        if recommendation.severity > config.max_auto_severity {
396            return true;
397        }
398
399        // Require approval for specific categories
400        if config.require_approval_categories.contains(&recommendation.category) {
401            return true;
402        }
403
404        // Require approval if risk is high
405        if risk.risk_level >= RiskLevel::High {
406            return true;
407        }
408
409        // Require approval if not reversible
410        if !risk.reversible {
411            return true;
412        }
413
414        false
415    }
416
417    /// Queue action for approval
418    fn queue_for_approval(
419        &self,
420        action_id: String,
421        recommendation: Recommendation,
422        risk: RiskAssessment,
423    ) {
424        let mut changes = HashMap::new();
425        changes.insert("example".to_string(), recommendation.example.clone().unwrap_or_default());
426
427        let request = ApprovalRequest {
428            action_id,
429            recommendation,
430            proposed_changes: changes,
431            risk_assessment: risk,
432            created_at: Utc::now(),
433            expires_at: Utc::now() + Duration::hours(24),
434        };
435
436        let mut queue = self.approval_queue.write();
437        queue.push_back(request);
438    }
439
440    /// Get pending approval requests
441    pub fn get_approval_queue(&self) -> Vec<ApprovalRequest> {
442        let queue = self.approval_queue.read();
443        queue.iter().cloned().collect()
444    }
445
446    /// Approve a remediation action
447    pub fn approve_action(&self, action_id: &str, approver: &str) -> Result<(), String> {
448        // Remove from approval queue
449        {
450            let mut queue = self.approval_queue.write();
451            queue.retain(|req| req.action_id != action_id);
452        }
453
454        // Update action
455        {
456            let mut actions = self.actions.write();
457            if let Some(action) = actions.get_mut(action_id) {
458                action.status = RemediationStatus::Approved;
459                action.approved_by = Some(approver.to_string());
460                action.approved_at = Some(Utc::now());
461                action.logs.push(format!("Approved by {}", approver));
462            } else {
463                return Err("Action not found".to_string());
464            }
465        }
466
467        // Apply the action
468        self.apply_action(action_id)?;
469
470        Ok(())
471    }
472
473    /// Reject a remediation action
474    pub fn reject_action(&self, action_id: &str, reason: &str) -> Result<(), String> {
475        // Remove from approval queue
476        {
477            let mut queue = self.approval_queue.write();
478            queue.retain(|req| req.action_id != action_id);
479        }
480
481        self.update_action_status(action_id, RemediationStatus::Rejected);
482        self.add_action_log(action_id, &format!("Rejected: {}", reason));
483
484        Ok(())
485    }
486
487    /// Apply a remediation action
488    fn apply_action(&self, action_id: &str) -> Result<RemediationResult, String> {
489        let config = self.config.read().clone();
490        let start_time = Utc::now();
491
492        self.update_action_status(action_id, RemediationStatus::Applying);
493
494        // Get action
495        let action = {
496            let actions = self.actions.read();
497            actions.get(action_id).cloned().ok_or_else(|| "Action not found".to_string())?
498        };
499
500        if config.dry_run {
501            self.add_action_log(action_id, "Dry-run mode: changes not actually applied");
502            self.update_action_status(action_id, RemediationStatus::Applied);
503
504            return Ok(RemediationResult {
505                action_id: action_id.to_string(),
506                success: true,
507                message: "Dry-run completed successfully".to_string(),
508                applied_changes: action.config_changes.keys().cloned().collect(),
509                duration_ms: (Utc::now() - start_time).num_milliseconds() as u64,
510            });
511        }
512
513        // Apply changes (in real implementation, this would modify actual config)
514        let applied_changes: Vec<String> =
515            action.config_changes.iter().map(|(k, v)| format!("{} = {}", k, v)).collect();
516
517        self.add_action_log(action_id, &format!("Applied changes: {:?}", applied_changes));
518
519        // Update action
520        {
521            let mut actions = self.actions.write();
522            if let Some(action) = actions.get_mut(action_id) {
523                action.status = RemediationStatus::Applied;
524                action.success = true;
525                action.applied_at = Some(Utc::now());
526                action.completed_at = Some(Utc::now());
527            }
528        }
529
530        // Add to history
531        self.add_to_history(action);
532
533        Ok(RemediationResult {
534            action_id: action_id.to_string(),
535            success: true,
536            message: "Remediation applied successfully".to_string(),
537            applied_changes: applied_changes.to_vec(),
538            duration_ms: (Utc::now() - start_time).num_milliseconds() as u64,
539        })
540    }
541
542    /// Rollback a remediation action
543    pub fn rollback_action(&self, action_id: &str) -> Result<(), String> {
544        let action = {
545            let actions = self.actions.read();
546            actions.get(action_id).cloned().ok_or_else(|| "Action not found".to_string())?
547        };
548
549        if action.status != RemediationStatus::Applied {
550            return Err("Can only rollback applied actions".to_string());
551        }
552
553        let rollback_data =
554            action.rollback_data.ok_or_else(|| "No rollback data available".to_string())?;
555
556        self.add_action_log(action_id, "Rolling back changes");
557
558        // Apply rollback (in real implementation, this would restore config)
559        for cmd in &rollback_data.restore_commands {
560            self.add_action_log(action_id, &format!("Executing: {}", cmd));
561        }
562
563        self.update_action_status(action_id, RemediationStatus::RolledBack);
564        self.add_action_log(action_id, "Rollback completed");
565
566        Ok(())
567    }
568
569    /// Record effectiveness metrics
570    pub fn record_effectiveness(
571        &self,
572        recommendation_id: &str,
573        action_id: &str,
574        before: SystemMetrics,
575        after: SystemMetrics,
576        measurement_period_hours: i64,
577    ) {
578        let improvement_score = self.calculate_improvement_score(&before, &after);
579
580        let metrics = EffectivenessMetrics {
581            recommendation_id: recommendation_id.to_string(),
582            action_id: action_id.to_string(),
583            before_metrics: before,
584            after_metrics: after,
585            improvement_score,
586            measurement_period_hours,
587            measured_at: Utc::now(),
588        };
589
590        let mut effectiveness = self.effectiveness_metrics.write();
591        effectiveness.insert(action_id.to_string(), metrics);
592    }
593
594    /// Calculate improvement score
595    fn calculate_improvement_score(&self, before: &SystemMetrics, after: &SystemMetrics) -> f64 {
596        let mut score = 0.0;
597        let mut weight_total = 0.0;
598
599        // Error rate improvement (weight: 0.3)
600        if before.error_rate > 0.0 {
601            let error_improvement = (before.error_rate - after.error_rate) / before.error_rate;
602            score += error_improvement * 0.3;
603            weight_total += 0.3;
604        }
605
606        // Latency improvement (weight: 0.2)
607        if before.avg_latency_ms > 0.0 {
608            let latency_improvement =
609                (before.avg_latency_ms - after.avg_latency_ms) / before.avg_latency_ms;
610            score += latency_improvement * 0.2;
611            weight_total += 0.2;
612        }
613
614        // Success rate improvement (weight: 0.25)
615        let success_improvement = after.success_rate - before.success_rate;
616        score += success_improvement * 0.25;
617        weight_total += 0.25;
618
619        // Resilience improvement (weight: 0.25)
620        let resilience_improvement = after.resilience_score - before.resilience_score;
621        score += resilience_improvement * 0.25;
622        weight_total += 0.25;
623
624        if weight_total > 0.0 {
625            (score / weight_total).clamp(0.0, 1.0)
626        } else {
627            0.0
628        }
629    }
630
631    /// Get effectiveness metrics for an action
632    pub fn get_effectiveness(&self, action_id: &str) -> Option<EffectivenessMetrics> {
633        let metrics = self.effectiveness_metrics.read();
634        metrics.get(action_id).cloned()
635    }
636
637    /// Get all effectiveness metrics
638    pub fn get_all_effectiveness(&self) -> Vec<EffectivenessMetrics> {
639        let metrics = self.effectiveness_metrics.read();
640        metrics.values().cloned().collect()
641    }
642
643    /// Get action by ID
644    pub fn get_action(&self, action_id: &str) -> Option<RemediationAction> {
645        let actions = self.actions.read();
646        actions.get(action_id).cloned()
647    }
648
649    /// Get all active actions
650    pub fn get_active_actions(&self) -> Vec<RemediationAction> {
651        let actions = self.actions.read();
652        actions
653            .values()
654            .filter(|a| {
655                matches!(
656                    a.status,
657                    RemediationStatus::Pending
658                        | RemediationStatus::Applying
659                        | RemediationStatus::Applied
660                )
661            })
662            .cloned()
663            .collect()
664    }
665
666    /// Get action history
667    pub fn get_history(&self, limit: usize) -> Vec<RemediationAction> {
668        let history = self.action_history.read();
669        history.iter().take(limit).cloned().collect()
670    }
671
672    /// Get statistics
673    pub fn get_stats(&self) -> RemediationStats {
674        let actions = self.actions.read();
675        let history = self.action_history.read();
676
677        let total_actions = actions.len() + history.len();
678        let successful = actions.values().filter(|a| a.success).count()
679            + history.iter().filter(|a| a.success).count();
680        let failed = actions.values().filter(|a| a.status == RemediationStatus::Failed).count()
681            + history.iter().filter(|a| a.status == RemediationStatus::Failed).count();
682        let pending_approval = actions
683            .values()
684            .filter(|a| a.status == RemediationStatus::AwaitingApproval)
685            .count();
686        let rolled_back =
687            history.iter().filter(|a| a.status == RemediationStatus::RolledBack).count();
688
689        let effectiveness_metrics = self.effectiveness_metrics.read();
690        let avg_improvement = if effectiveness_metrics.is_empty() {
691            0.0
692        } else {
693            effectiveness_metrics.values().map(|m| m.improvement_score).sum::<f64>()
694                / effectiveness_metrics.len() as f64
695        };
696
697        RemediationStats {
698            total_actions,
699            successful_actions: successful,
700            failed_actions: failed,
701            pending_approval,
702            rolled_back,
703            avg_improvement_score: avg_improvement,
704            total_effectiveness_measurements: effectiveness_metrics.len(),
705        }
706    }
707
708    // Helper methods
709
710    fn check_cooldown(&self, config: &RemediationConfig) -> bool {
711        let actions = self.actions.read();
712        let cooldown_threshold = Utc::now() - Duration::minutes(config.cooldown_minutes);
713
714        !actions.values().any(|a| {
715            a.status == RemediationStatus::Applied
716                && a.completed_at.is_some_and(|t| t > cooldown_threshold)
717        })
718    }
719
720    fn check_concurrent_limit(&self, config: &RemediationConfig) -> bool {
721        let actions = self.actions.read();
722        let active_count = actions
723            .values()
724            .filter(|a| matches!(a.status, RemediationStatus::Applying))
725            .count();
726
727        active_count < config.max_concurrent
728    }
729
730    fn update_action_status(&self, action_id: &str, status: RemediationStatus) {
731        let mut actions = self.actions.write();
732        if let Some(action) = actions.get_mut(action_id) {
733            action.status = status;
734        }
735    }
736
737    fn add_action_log(&self, action_id: &str, message: &str) {
738        let mut actions = self.actions.write();
739        if let Some(action) = actions.get_mut(action_id) {
740            action
741                .logs
742                .push(format!("[{}] {}", Utc::now().format("%Y-%m-%d %H:%M:%S"), message));
743        }
744    }
745
746    fn add_to_history(&self, action: RemediationAction) {
747        let mut history = self.action_history.write();
748        history.push_front(action);
749        if history.len() > self.max_history {
750            history.pop_back();
751        }
752    }
753}
754
755impl Default for RemediationEngine {
756    fn default() -> Self {
757        Self::new()
758    }
759}
760
761/// Remediation statistics
762#[derive(Debug, Clone, Serialize, Deserialize)]
763pub struct RemediationStats {
764    pub total_actions: usize,
765    pub successful_actions: usize,
766    pub failed_actions: usize,
767    pub pending_approval: usize,
768    pub rolled_back: usize,
769    pub avg_improvement_score: f64,
770    pub total_effectiveness_measurements: usize,
771}
772
773#[cfg(test)]
774mod tests {
775    use super::*;
776
777    #[test]
778    fn test_engine_creation() {
779        let engine = RemediationEngine::new();
780        assert!(!engine.get_config().enabled);
781    }
782
783    #[test]
784    fn test_config_update() {
785        let engine = RemediationEngine::new();
786        let config = RemediationConfig {
787            enabled: true,
788            ..Default::default()
789        };
790        engine.update_config(config);
791        assert!(engine.get_config().enabled);
792    }
793
794    #[test]
795    fn test_improvement_score_calculation() {
796        let engine = RemediationEngine::new();
797
798        let before = SystemMetrics {
799            error_rate: 0.5,
800            avg_latency_ms: 1000.0,
801            p95_latency_ms: 1500.0,
802            p99_latency_ms: 2000.0,
803            success_rate: 0.5,
804            chaos_impact: 0.8,
805            resilience_score: 0.3,
806        };
807
808        let after = SystemMetrics {
809            error_rate: 0.2,
810            avg_latency_ms: 500.0,
811            p95_latency_ms: 750.0,
812            p99_latency_ms: 1000.0,
813            success_rate: 0.8,
814            chaos_impact: 0.4,
815            resilience_score: 0.7,
816        };
817
818        let score = engine.calculate_improvement_score(&before, &after);
819        assert!(score > 0.0 && score <= 1.0);
820    }
821}
mockforge_chaos/auto_remediation.rs

mockforge_chaos/
auto_remediation.rs