1#![cfg_attr(docsrs, feature(doc_cfg))]
29#![warn(missing_docs)]
30#![warn(rust_2018_idioms)]
31
32use std::collections::HashSet;
33use std::sync::atomic::{AtomicUsize, Ordering};
34
35use dev_report::{CheckResult, Severity};
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39pub enum FailureMode {
40 IoError,
42 PartialWrite,
44 ConnectionReset,
46 Timeout,
48 Corruption,
50 PermissionDenied,
52}
53
54impl FailureMode {
55 pub fn as_str(&self) -> &'static str {
57 match self {
58 FailureMode::IoError => "io_error",
59 FailureMode::PartialWrite => "partial_write",
60 FailureMode::ConnectionReset => "connection_reset",
61 FailureMode::Timeout => "timeout",
62 FailureMode::Corruption => "corruption",
63 FailureMode::PermissionDenied => "permission_denied",
64 }
65 }
66}
67
68#[derive(Debug, Clone)]
70pub struct InjectedFailure {
71 pub mode: FailureMode,
73 pub attempt: usize,
75}
76
77impl std::fmt::Display for InjectedFailure {
78 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79 write!(
80 f,
81 "injected failure {} at attempt {}",
82 self.mode.as_str(),
83 self.attempt
84 )
85 }
86}
87
88impl std::error::Error for InjectedFailure {}
89
90pub struct FailureSchedule {
92 failing_attempts: HashSet<usize>,
93 mode: FailureMode,
94 invocations: AtomicUsize,
95}
96
97impl FailureSchedule {
98 pub fn on_attempts(attempts: &[usize], mode: FailureMode) -> Self {
101 Self {
102 failing_attempts: attempts.iter().copied().collect(),
103 mode,
104 invocations: AtomicUsize::new(0),
105 }
106 }
107
108 pub fn every_n(n: usize, mode: FailureMode) -> Self {
110 let mut s = HashSet::new();
111 for k in 1..=1024 {
116 if k % n == 0 {
117 s.insert(k);
118 }
119 }
120 Self {
121 failing_attempts: s,
122 mode,
123 invocations: AtomicUsize::new(0),
124 }
125 }
126
127 pub fn maybe_fail(&self, attempt: usize) -> Result<(), InjectedFailure> {
130 self.invocations.fetch_add(1, Ordering::Relaxed);
131 if self.failing_attempts.contains(&attempt) {
132 Err(InjectedFailure {
133 mode: self.mode,
134 attempt,
135 })
136 } else {
137 Ok(())
138 }
139 }
140
141 pub fn invocation_count(&self) -> usize {
143 self.invocations.load(Ordering::Relaxed)
144 }
145}
146
147pub fn assert_recovered(
153 name: impl Into<String>,
154 expected_failures: usize,
155 actual_failures: usize,
156 final_state_ok: bool,
157) -> CheckResult {
158 let name = format!("chaos::{}", name.into());
159 if !final_state_ok {
160 return CheckResult::fail(name, Severity::Critical).with_detail(format!(
161 "system did not recover. expected {expected_failures} injected failures, observed {actual_failures}, final state failed validation"
162 ));
163 }
164 if actual_failures < expected_failures {
165 return CheckResult::warn(name, Severity::Warning).with_detail(format!(
166 "fewer failures observed than scheduled (expected {expected_failures}, observed {actual_failures})"
167 ));
168 }
169 CheckResult::pass(name).with_detail(format!(
170 "recovered after {actual_failures} injected failure(s)"
171 ))
172}
173
174#[cfg(test)]
175mod tests {
176 use super::*;
177
178 #[test]
179 fn schedule_fails_on_specified_attempts() {
180 let s = FailureSchedule::on_attempts(&[2, 4], FailureMode::IoError);
181 assert!(s.maybe_fail(1).is_ok());
182 assert!(s.maybe_fail(2).is_err());
183 assert!(s.maybe_fail(3).is_ok());
184 assert!(s.maybe_fail(4).is_err());
185 assert_eq!(s.invocation_count(), 4);
186 }
187
188 #[test]
189 fn every_n_pattern() {
190 let s = FailureSchedule::every_n(3, FailureMode::Timeout);
191 assert!(s.maybe_fail(1).is_ok());
192 assert!(s.maybe_fail(2).is_ok());
193 assert!(s.maybe_fail(3).is_err());
194 assert!(s.maybe_fail(6).is_err());
195 assert!(s.maybe_fail(9).is_err());
196 }
197
198 #[test]
199 fn recovery_check_pass() {
200 let c = assert_recovered("write_log", 2, 2, true);
201 assert!(matches!(c.verdict, dev_report::Verdict::Pass));
202 }
203
204 #[test]
205 fn recovery_check_fail_when_state_invalid() {
206 let c = assert_recovered("write_log", 2, 2, false);
207 assert!(matches!(c.verdict, dev_report::Verdict::Fail));
208 }
209}