mockforge_chaos/
scenarios.rs

1//! Chaos engineering scenarios for orchestrating complex failure patterns
2
3use crate::config::{ChaosConfig, FaultInjectionConfig, LatencyConfig};
4use chrono::{DateTime, Utc};
5use parking_lot::RwLock;
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::sync::Arc;
9use tracing::{debug, info};
10
11/// A chaos engineering scenario
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ChaosScenario {
14    /// Scenario name
15    pub name: String,
16    /// Scenario description
17    pub description: Option<String>,
18    /// Chaos configuration to apply
19    pub chaos_config: ChaosConfig,
20    /// Duration in seconds (0 = infinite)
21    pub duration_seconds: u64,
22    /// Start time (None = start immediately)
23    pub start_time: Option<DateTime<Utc>>,
24    /// End time (None = run indefinitely or until duration expires)
25    pub end_time: Option<DateTime<Utc>>,
26    /// Tags for organization
27    pub tags: Vec<String>,
28}
29
30impl ChaosScenario {
31    /// Create a new chaos scenario
32    pub fn new(name: impl Into<String>, chaos_config: ChaosConfig) -> Self {
33        Self {
34            name: name.into(),
35            description: None,
36            chaos_config,
37            duration_seconds: 0,
38            start_time: None,
39            end_time: None,
40            tags: Vec::new(),
41        }
42    }
43
44    /// Set description
45    pub fn with_description(mut self, description: impl Into<String>) -> Self {
46        self.description = Some(description.into());
47        self
48    }
49
50    /// Set duration
51    pub fn with_duration(mut self, seconds: u64) -> Self {
52        self.duration_seconds = seconds;
53        self
54    }
55
56    /// Set start time
57    pub fn with_start_time(mut self, start: DateTime<Utc>) -> Self {
58        self.start_time = Some(start);
59        self
60    }
61
62    /// Add tags
63    pub fn with_tags(mut self, tags: Vec<String>) -> Self {
64        self.tags = tags;
65        self
66    }
67
68    /// Check if scenario is currently active
69    pub fn is_active(&self) -> bool {
70        let now = Utc::now();
71
72        // Check start time
73        if let Some(start) = self.start_time {
74            if now < start {
75                return false;
76            }
77        }
78
79        // Check end time
80        if let Some(end) = self.end_time {
81            if now > end {
82                return false;
83            }
84        }
85
86        true
87    }
88}
89
90/// Predefined chaos scenarios
91pub struct PredefinedScenarios;
92
93impl PredefinedScenarios {
94    /// Network degradation scenario (high latency, packet loss)
95    pub fn network_degradation() -> ChaosScenario {
96        ChaosScenario::new(
97            "network_degradation",
98            ChaosConfig {
99                enabled: true,
100                latency: Some(LatencyConfig {
101                    enabled: true,
102                    fixed_delay_ms: Some(500),
103                    random_delay_range_ms: None,
104                    jitter_percent: 20.0,
105                    probability: 0.8,
106                }),
107                traffic_shaping: Some(crate::config::TrafficShapingConfig {
108                    enabled: true,
109                    packet_loss_percent: 5.0,
110                    bandwidth_limit_bps: 100_000, // 100KB/s
111                    ..Default::default()
112                }),
113                ..Default::default()
114            },
115        )
116        .with_description("Simulates degraded network conditions with high latency and packet loss")
117        .with_tags(vec!["network".to_string(), "latency".to_string()])
118    }
119
120    /// Service instability scenario (random errors)
121    pub fn service_instability() -> ChaosScenario {
122        ChaosScenario::new(
123            "service_instability",
124            ChaosConfig {
125                enabled: true,
126                fault_injection: Some(FaultInjectionConfig {
127                    enabled: true,
128                    http_errors: vec![500, 502, 503, 504],
129                    http_error_probability: 0.2,
130                    timeout_errors: true,
131                    timeout_probability: 0.1,
132                    ..Default::default()
133                }),
134                ..Default::default()
135            },
136        )
137        .with_description("Simulates an unstable service with random errors and timeouts")
138        .with_tags(vec!["service".to_string(), "errors".to_string()])
139    }
140
141    /// Cascading failure scenario (combined failures)
142    pub fn cascading_failure() -> ChaosScenario {
143        ChaosScenario::new(
144            "cascading_failure",
145            ChaosConfig {
146                enabled: true,
147                latency: Some(LatencyConfig {
148                    enabled: true,
149                    fixed_delay_ms: None,
150                    random_delay_range_ms: Some((1000, 5000)),
151                    jitter_percent: 30.0,
152                    probability: 0.7,
153                }),
154                fault_injection: Some(FaultInjectionConfig {
155                    enabled: true,
156                    http_errors: vec![500, 503],
157                    http_error_probability: 0.3,
158                    timeout_errors: true,
159                    timeout_probability: 0.2,
160                    connection_errors: true,
161                    connection_error_probability: 0.1,
162                    ..Default::default()
163                }),
164                rate_limit: Some(crate::config::RateLimitConfig {
165                    enabled: true,
166                    requests_per_second: 10,
167                    burst_size: 2,
168                    per_ip: true,
169                    per_endpoint: false,
170                }),
171                ..Default::default()
172            },
173        )
174        .with_description("Simulates a cascading failure with multiple simultaneous issues")
175        .with_tags(vec!["critical".to_string(), "cascading".to_string()])
176    }
177
178    /// Peak traffic scenario (rate limiting stress test)
179    pub fn peak_traffic() -> ChaosScenario {
180        ChaosScenario::new(
181            "peak_traffic",
182            ChaosConfig {
183                enabled: true,
184                rate_limit: Some(crate::config::RateLimitConfig {
185                    enabled: true,
186                    requests_per_second: 50,
187                    burst_size: 10,
188                    per_ip: false,
189                    per_endpoint: true,
190                }),
191                ..Default::default()
192            },
193        )
194        .with_description("Simulates peak traffic conditions with aggressive rate limiting")
195        .with_tags(vec!["traffic".to_string(), "load".to_string()])
196    }
197
198    /// Slow backend scenario (consistent high latency)
199    pub fn slow_backend() -> ChaosScenario {
200        ChaosScenario::new(
201            "slow_backend",
202            ChaosConfig {
203                enabled: true,
204                latency: Some(LatencyConfig {
205                    enabled: true,
206                    fixed_delay_ms: Some(2000),
207                    random_delay_range_ms: None,
208                    jitter_percent: 10.0,
209                    probability: 1.0,
210                }),
211                ..Default::default()
212            },
213        )
214        .with_description("Simulates a consistently slow backend service")
215        .with_tags(vec!["latency".to_string(), "performance".to_string()])
216    }
217}
218
219/// Scenario engine for managing active chaos scenarios
220pub struct ScenarioEngine {
221    active_scenarios: Arc<RwLock<HashMap<String, ChaosScenario>>>,
222}
223
224impl ScenarioEngine {
225    /// Create a new scenario engine
226    pub fn new() -> Self {
227        Self {
228            active_scenarios: Arc::new(RwLock::new(HashMap::new())),
229        }
230    }
231
232    /// Start a scenario
233    pub fn start_scenario(&self, scenario: ChaosScenario) {
234        let name = scenario.name.clone();
235        info!("Starting chaos scenario: {}", name);
236
237        let mut scenarios = self.active_scenarios.write();
238        scenarios.insert(name, scenario);
239    }
240
241    /// Stop a scenario by name
242    pub fn stop_scenario(&self, name: &str) -> bool {
243        info!("Stopping chaos scenario: {}", name);
244
245        let mut scenarios = self.active_scenarios.write();
246        scenarios.remove(name).is_some()
247    }
248
249    /// Stop all scenarios
250    pub fn stop_all_scenarios(&self) {
251        info!("Stopping all chaos scenarios");
252
253        let mut scenarios = self.active_scenarios.write();
254        scenarios.clear();
255    }
256
257    /// Get active scenarios
258    pub fn get_active_scenarios(&self) -> Vec<ChaosScenario> {
259        let scenarios = self.active_scenarios.read();
260        scenarios.values().cloned().collect()
261    }
262
263    /// Get a specific scenario
264    pub fn get_scenario(&self, name: &str) -> Option<ChaosScenario> {
265        let scenarios = self.active_scenarios.read();
266        scenarios.get(name).cloned()
267    }
268
269    /// Get merged chaos config from all active scenarios
270    pub fn get_merged_config(&self) -> Option<ChaosConfig> {
271        let scenarios = self.active_scenarios.read();
272
273        if scenarios.is_empty() {
274            return None;
275        }
276
277        // For simplicity, use the first active scenario's config
278        // In a more sophisticated implementation, you could merge configs
279        scenarios.values().find(|s| s.is_active()).map(|s| s.chaos_config.clone())
280    }
281
282    /// Clean up expired scenarios
283    pub fn cleanup_expired(&self) {
284        debug!("Cleaning up expired scenarios");
285
286        let mut scenarios = self.active_scenarios.write();
287        scenarios.retain(|name, scenario| {
288            let active = scenario.is_active();
289            if !active {
290                info!("Removing expired scenario: {}", name);
291            }
292            active
293        });
294    }
295}
296
297impl Default for ScenarioEngine {
298    fn default() -> Self {
299        Self::new()
300    }
301}
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306
307    #[test]
308    fn test_scenario_creation() {
309        let scenario = PredefinedScenarios::network_degradation();
310        assert_eq!(scenario.name, "network_degradation");
311        assert!(scenario.chaos_config.enabled);
312        assert!(scenario.chaos_config.latency.is_some());
313    }
314
315    #[test]
316    fn test_scenario_engine() {
317        let engine = ScenarioEngine::new();
318
319        let scenario = PredefinedScenarios::service_instability();
320        engine.start_scenario(scenario.clone());
321
322        let active = engine.get_active_scenarios();
323        assert_eq!(active.len(), 1);
324        assert_eq!(active[0].name, "service_instability");
325
326        assert!(engine.stop_scenario("service_instability"));
327        assert_eq!(engine.get_active_scenarios().len(), 0);
328    }
329
330    #[test]
331    fn test_predefined_scenarios() {
332        let scenarios = vec![
333            PredefinedScenarios::network_degradation(),
334            PredefinedScenarios::service_instability(),
335            PredefinedScenarios::cascading_failure(),
336            PredefinedScenarios::peak_traffic(),
337            PredefinedScenarios::slow_backend(),
338        ];
339
340        for scenario in scenarios {
341            assert!(!scenario.name.is_empty());
342            assert!(scenario.chaos_config.enabled);
343        }
344    }
345}