Skip to main content

certeza/
chaos.rs

1//! # Chaos Engineering Module
2//!
3//! Chaos engineering infrastructure adapted from **renacer v0.4.1**
4//! (<https://github.com/paiml/renacer>).
5//!
6//! This module provides chaos engineering capabilities for testing system resilience
7//! under adverse conditions. It integrates with the certeza testing framework to
8//! validate behavior under resource constraints and failure scenarios.
9//!
10//! ## Chaos Testing Philosophy
11//!
12//! Chaos engineering validates that systems behave correctly under stress:
13//! - Memory constraints (allocation pressure)
14//! - CPU limits (throttling)
15//! - Timeout conditions (deadlines)
16//! - Signal injection (interrupts)
17//!
18//! ## Tier Integration
19//!
20//! - **Tier 2 (ON-COMMIT)**: Basic chaos tests with gentle constraints
21//! - **Tier 3 (ON-MERGE/NIGHTLY)**: Aggressive chaos tests with extreme limits
22//!
23//! ## Example
24//!
25//! ```rust
26//! use certeza::chaos::{ChaosConfig, ChaosResult};
27//! use std::time::Duration;
28//!
29//! // Gentle chaos for development (renacer preset)
30//! let config = ChaosConfig::gentle();
31//!
32//! // Aggressive chaos for CI (renacer preset)
33//! let aggressive = ChaosConfig::aggressive();
34//!
35//! // Custom configuration (renacer builder pattern)
36//! let custom = ChaosConfig::new()
37//!     .with_memory_limit(128 * 1024 * 1024)  // 128MB
38//!     .with_cpu_limit(0.5)                   // 50% CPU
39//!     .with_timeout(Duration::from_secs(30))
40//!     .with_signal_injection(true)
41//!     .build();
42//! ```
43//!
44//! ## Source
45//!
46//! This implementation is based on the renacer project's Sprint 29 chaos engineering
47//! framework, adapted for certeza's tiered TDD-X approach.
48
49use std::time::Duration;
50
51/// Result type for chaos engineering operations.
52///
53/// Based on renacer's `ChaosResult<T>` pattern.
54///
55/// # Examples
56///
57/// ```rust
58/// use certeza::chaos::{ChaosResult, ChaosError};
59/// use std::time::Duration;
60///
61/// fn simulated_operation(limit: Duration) -> ChaosResult<String> {
62///     let elapsed = Duration::from_secs(5);
63///     if elapsed > limit {
64///         Err(ChaosError::Timeout { elapsed, limit })
65///     } else {
66///         Ok("success".to_string())
67///     }
68/// }
69/// ```
70pub type ChaosResult<T> = Result<T, ChaosError>;
71
72/// Error types for chaos engineering scenarios.
73///
74/// Based on renacer's chaos error taxonomy. These errors represent
75/// intentional failure modes injected during chaos testing.
76///
77/// # Examples
78///
79/// ```rust
80/// use certeza::chaos::ChaosError;
81/// use std::time::Duration;
82///
83/// let error = ChaosError::MemoryLimitExceeded {
84///     limit: 1024,
85///     used: 2048,
86/// };
87///
88/// assert_eq!(
89///     format!("{}", error),
90///     "Memory limit exceeded: 2048 > 1024 bytes"
91/// );
92/// ```
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub enum ChaosError {
95    /// Memory allocation exceeded configured limit.
96    ///
97    /// Represents out-of-memory conditions under resource constraints.
98    MemoryLimitExceeded {
99        /// Configured memory limit in bytes
100        limit: usize,
101        /// Actual memory usage in bytes
102        used: usize,
103    },
104
105    /// Operation exceeded configured timeout.
106    ///
107    /// Represents deadline violations in time-constrained scenarios.
108    Timeout {
109        /// Actual elapsed time
110        elapsed: Duration,
111        /// Configured timeout limit
112        limit: Duration,
113    },
114
115    /// Signal injection failed.
116    ///
117    /// Represents failures in interrupt simulation (SIGINT, SIGTERM, etc.).
118    SignalInjectionFailed {
119        /// Signal number (e.g., 2 for SIGINT, 15 for SIGTERM)
120        signal: i32,
121        /// Human-readable failure reason
122        reason: String,
123    },
124}
125
126impl std::fmt::Display for ChaosError {
127    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
128        match self {
129            Self::MemoryLimitExceeded { limit, used } => {
130                write!(f, "Memory limit exceeded: {used} > {limit} bytes")
131            }
132            Self::Timeout { elapsed, limit } => {
133                write!(f, "Timeout: {elapsed:?} > {limit:?}")
134            }
135            Self::SignalInjectionFailed { signal, reason } => {
136                write!(f, "Signal injection failed ({signal}): {reason}")
137            }
138        }
139    }
140}
141
142impl std::error::Error for ChaosError {}
143
144/// Configuration for chaos engineering experiments.
145///
146/// Defines resource constraints and failure scenarios to inject during testing.
147/// All constraints are optional - zero/default values mean no limit enforced.
148///
149/// # Examples
150///
151/// ```rust
152/// use certeza::chaos::ChaosConfig;
153/// use std::time::Duration;
154///
155/// // Default: no limits
156/// let default = ChaosConfig::default();
157/// assert_eq!(default.memory_limit, 0);
158/// assert_eq!(default.cpu_limit, 0.0);
159///
160/// // Gentle constraints for local development
161/// let gentle = ChaosConfig::gentle();
162/// assert_eq!(gentle.memory_limit, 512 * 1024 * 1024);  // 512MB
163///
164/// // Aggressive constraints for CI/CD
165/// let aggressive = ChaosConfig::aggressive();
166/// assert_eq!(aggressive.memory_limit, 64 * 1024 * 1024);  // 64MB
167/// assert!(aggressive.signal_injection);
168/// ```
169#[derive(Debug, Clone, PartialEq)]
170pub struct ChaosConfig {
171    /// Maximum memory allocation in bytes (0 = unlimited).
172    ///
173    /// When non-zero, operations exceeding this limit should fail gracefully.
174    pub memory_limit: usize,
175
176    /// CPU utilization limit as fraction (0.0-1.0, 0.0 = unlimited).
177    ///
178    /// Represents the maximum CPU fraction available to the process.
179    /// Values outside [0.0, 1.0] are automatically clamped.
180    pub cpu_limit: f64,
181
182    /// Operation timeout duration.
183    ///
184    /// Long-running operations should respect this deadline and fail
185    /// gracefully if exceeded.
186    pub timeout: Duration,
187
188    /// Whether to inject simulated signal interrupts.
189    ///
190    /// When enabled, tests may simulate SIGINT, SIGTERM, or other signals
191    /// to validate interrupt handling.
192    pub signal_injection: bool,
193}
194
195impl Default for ChaosConfig {
196    /// Creates a configuration with no limits (passthrough mode).
197    ///
198    /// Useful for baseline testing without chaos constraints.
199    ///
200    /// # Examples
201    ///
202    /// ```rust
203    /// use certeza::chaos::ChaosConfig;
204    ///
205    /// let config = ChaosConfig::default();
206    /// assert_eq!(config.memory_limit, 0);
207    /// assert_eq!(config.cpu_limit, 0.0);
208    /// assert!(!config.signal_injection);
209    /// ```
210    fn default() -> Self {
211        Self {
212            memory_limit: 0,
213            cpu_limit: 0.0,
214            timeout: Duration::from_secs(60),
215            signal_injection: false,
216        }
217    }
218}
219
220impl ChaosConfig {
221    /// Creates a new chaos configuration with default (unlimited) settings.
222    ///
223    /// Use the builder pattern to customize constraints.
224    ///
225    /// # Examples
226    ///
227    /// ```rust
228    /// use certeza::chaos::ChaosConfig;
229    /// use std::time::Duration;
230    ///
231    /// let config = ChaosConfig::new()
232    ///     .with_memory_limit(256 * 1024 * 1024)
233    ///     .with_timeout(Duration::from_secs(120))
234    ///     .build();
235    ///
236    /// assert_eq!(config.memory_limit, 256 * 1024 * 1024);
237    /// ```
238    #[must_use]
239    pub fn new() -> Self {
240        Self::default()
241    }
242
243    /// Sets the memory limit in bytes.
244    ///
245    /// # Examples
246    ///
247    /// ```rust
248    /// use certeza::chaos::ChaosConfig;
249    ///
250    /// let config = ChaosConfig::new()
251    ///     .with_memory_limit(128 * 1024 * 1024);  // 128MB
252    ///
253    /// assert_eq!(config.memory_limit, 128 * 1024 * 1024);
254    /// ```
255    #[must_use]
256    pub const fn with_memory_limit(mut self, bytes: usize) -> Self {
257        self.memory_limit = bytes;
258        self
259    }
260
261    /// Sets the CPU utilization limit as a fraction (0.0-1.0).
262    ///
263    /// Values outside the valid range are automatically clamped.
264    ///
265    /// # Examples
266    ///
267    /// ```rust
268    /// use certeza::chaos::ChaosConfig;
269    ///
270    /// // Valid fraction
271    /// let config = ChaosConfig::new().with_cpu_limit(0.5);
272    /// assert_eq!(config.cpu_limit, 0.5);
273    ///
274    /// // Clamped to maximum
275    /// let clamped = ChaosConfig::new().with_cpu_limit(2.0);
276    /// assert_eq!(clamped.cpu_limit, 1.0);
277    ///
278    /// // Clamped to minimum
279    /// let negative = ChaosConfig::new().with_cpu_limit(-0.5);
280    /// assert_eq!(negative.cpu_limit, 0.0);
281    /// ```
282    #[must_use]
283    pub const fn with_cpu_limit(mut self, fraction: f64) -> Self {
284        self.cpu_limit = fraction.clamp(0.0, 1.0);
285        self
286    }
287
288    /// Sets the operation timeout.
289    ///
290    /// # Examples
291    ///
292    /// ```rust
293    /// use certeza::chaos::ChaosConfig;
294    /// use std::time::Duration;
295    ///
296    /// let config = ChaosConfig::new()
297    ///     .with_timeout(Duration::from_secs(30));
298    ///
299    /// assert_eq!(config.timeout, Duration::from_secs(30));
300    /// ```
301    #[must_use]
302    pub const fn with_timeout(mut self, timeout: Duration) -> Self {
303        self.timeout = timeout;
304        self
305    }
306
307    /// Enables or disables signal injection.
308    ///
309    /// # Examples
310    ///
311    /// ```rust
312    /// use certeza::chaos::ChaosConfig;
313    ///
314    /// let config = ChaosConfig::new().with_signal_injection(true);
315    /// assert!(config.signal_injection);
316    /// ```
317    #[must_use]
318    pub const fn with_signal_injection(mut self, enabled: bool) -> Self {
319        self.signal_injection = enabled;
320        self
321    }
322
323    /// Finalizes the configuration (terminal builder method).
324    ///
325    /// This is a no-op that exists for ergonomic builder pattern completion.
326    ///
327    /// # Examples
328    ///
329    /// ```rust
330    /// use certeza::chaos::ChaosConfig;
331    ///
332    /// let config = ChaosConfig::new()
333    ///     .with_memory_limit(256 * 1024 * 1024)
334    ///     .build();
335    /// ```
336    #[must_use]
337    pub const fn build(self) -> Self {
338        self
339    }
340
341    /// Creates a gentle chaos configuration for local development.
342    ///
343    /// Applies moderate constraints that catch obvious issues without
344    /// excessive developer friction.
345    ///
346    /// **Constraints:**
347    /// - Memory: 512MB
348    /// - CPU: 80% (0.8)
349    /// - Timeout: 120 seconds
350    /// - Signal injection: disabled
351    ///
352    /// # Examples
353    ///
354    /// ```rust
355    /// use certeza::chaos::ChaosConfig;
356    /// use std::time::Duration;
357    ///
358    /// let config = ChaosConfig::gentle();
359    /// assert_eq!(config.memory_limit, 512 * 1024 * 1024);
360    /// assert_eq!(config.cpu_limit, 0.8);
361    /// assert_eq!(config.timeout, Duration::from_secs(120));
362    /// assert!(!config.signal_injection);
363    /// ```
364    #[must_use]
365    pub fn gentle() -> Self {
366        Self::new()
367            .with_memory_limit(512 * 1024 * 1024) // 512MB
368            .with_cpu_limit(0.8) // 80% CPU
369            .with_timeout(Duration::from_secs(120))
370    }
371
372    /// Creates an aggressive chaos configuration for CI/CD and stress testing.
373    ///
374    /// Applies severe constraints to expose edge cases and failure modes
375    /// that might occur in production under load.
376    ///
377    /// **Constraints:**
378    /// - Memory: 64MB
379    /// - CPU: 25% (0.25)
380    /// - Timeout: 10 seconds
381    /// - Signal injection: enabled
382    ///
383    /// # Examples
384    ///
385    /// ```rust
386    /// use certeza::chaos::ChaosConfig;
387    /// use std::time::Duration;
388    ///
389    /// let config = ChaosConfig::aggressive();
390    /// assert_eq!(config.memory_limit, 64 * 1024 * 1024);
391    /// assert_eq!(config.cpu_limit, 0.25);
392    /// assert_eq!(config.timeout, Duration::from_secs(10));
393    /// assert!(config.signal_injection);
394    /// ```
395    #[must_use]
396    pub fn aggressive() -> Self {
397        Self::new()
398            .with_memory_limit(64 * 1024 * 1024) // 64MB
399            .with_cpu_limit(0.25) // 25% CPU
400            .with_timeout(Duration::from_secs(10))
401            .with_signal_injection(true)
402    }
403}
404
405#[cfg(test)]
406mod tests {
407    use super::*;
408
409    // ChaosConfig tests
410    #[test]
411    #[allow(clippy::float_cmp)]
412    fn test_default_config() {
413        let config = ChaosConfig::default();
414        assert_eq!(config.memory_limit, 0);
415        assert_eq!(config.cpu_limit, 0.0);
416        assert_eq!(config.timeout, Duration::from_secs(60));
417        assert!(!config.signal_injection);
418    }
419
420    #[test]
421    #[allow(clippy::float_cmp)]
422    fn test_builder_pattern() {
423        let config = ChaosConfig::new()
424            .with_memory_limit(1024)
425            .with_cpu_limit(0.5)
426            .with_timeout(Duration::from_secs(30))
427            .with_signal_injection(true)
428            .build();
429
430        assert_eq!(config.memory_limit, 1024);
431        assert_eq!(config.cpu_limit, 0.5);
432        assert_eq!(config.timeout, Duration::from_secs(30));
433        assert!(config.signal_injection);
434    }
435
436    #[test]
437    #[allow(clippy::float_cmp)]
438    fn test_cpu_limit_clamping() {
439        let too_high = ChaosConfig::new().with_cpu_limit(2.0);
440        assert_eq!(too_high.cpu_limit, 1.0);
441
442        let too_low = ChaosConfig::new().with_cpu_limit(-1.0);
443        assert_eq!(too_low.cpu_limit, 0.0);
444
445        let valid = ChaosConfig::new().with_cpu_limit(0.75);
446        assert_eq!(valid.cpu_limit, 0.75);
447    }
448
449    #[test]
450    #[allow(clippy::float_cmp)]
451    fn test_gentle_preset() {
452        let config = ChaosConfig::gentle();
453        assert_eq!(config.memory_limit, 512 * 1024 * 1024);
454        assert_eq!(config.cpu_limit, 0.8);
455        assert_eq!(config.timeout, Duration::from_secs(120));
456        assert!(!config.signal_injection);
457    }
458
459    #[test]
460    #[allow(clippy::float_cmp)]
461    fn test_aggressive_preset() {
462        let config = ChaosConfig::aggressive();
463        assert_eq!(config.memory_limit, 64 * 1024 * 1024);
464        assert_eq!(config.cpu_limit, 0.25);
465        assert_eq!(config.timeout, Duration::from_secs(10));
466        assert!(config.signal_injection);
467    }
468
469    #[test]
470    #[allow(clippy::redundant_clone)]
471    fn test_clone() {
472        let original = ChaosConfig::gentle();
473        let cloned = original.clone();
474        assert_eq!(&original, &cloned);
475    }
476
477    #[test]
478    fn test_debug_format() {
479        let config = ChaosConfig::new();
480        let debug_str = format!("{config:?}");
481        assert!(debug_str.contains("ChaosConfig"));
482        assert!(debug_str.contains("memory_limit"));
483    }
484
485    // ChaosError tests
486    #[test]
487    fn test_memory_limit_exceeded_display() {
488        let error = ChaosError::MemoryLimitExceeded { limit: 1024, used: 2048 };
489        assert_eq!(format!("{error}"), "Memory limit exceeded: 2048 > 1024 bytes");
490    }
491
492    #[test]
493    fn test_timeout_display() {
494        let error =
495            ChaosError::Timeout { elapsed: Duration::from_secs(5), limit: Duration::from_secs(3) };
496        let display = format!("{error}");
497        assert!(display.contains("Timeout"));
498        assert!(display.contains("5s"));
499        assert!(display.contains("3s"));
500    }
501
502    #[test]
503    fn test_signal_injection_failed_display() {
504        let error = ChaosError::SignalInjectionFailed {
505            signal: 2,
506            reason: "Process not found".to_string(),
507        };
508        assert_eq!(format!("{error}"), "Signal injection failed (2): Process not found");
509    }
510
511    #[test]
512    #[allow(clippy::redundant_clone)]
513    fn test_chaos_error_clone() {
514        let error = ChaosError::MemoryLimitExceeded { limit: 100, used: 200 };
515        let cloned = error.clone();
516        assert_eq!(&error, &cloned);
517    }
518
519    #[test]
520    fn test_chaos_error_debug() {
521        let error = ChaosError::Timeout {
522            elapsed: Duration::from_secs(1),
523            limit: Duration::from_millis(500),
524        };
525        let debug_str = format!("{error:?}");
526        assert!(debug_str.contains("Timeout"));
527    }
528
529    // ChaosResult tests
530    #[test]
531    fn test_chaos_result_ok() {
532        let result: ChaosResult<i32> = Ok(42);
533        if let Ok(value) = result {
534            assert_eq!(value, 42);
535        } else {
536            panic!("Expected Ok result");
537        }
538    }
539
540    #[test]
541    fn test_chaos_result_err() {
542        let result: ChaosResult<i32> =
543            Err(ChaosError::MemoryLimitExceeded { limit: 100, used: 200 });
544        assert!(result.is_err());
545    }
546}