1#![cfg_attr(docsrs, feature(doc_cfg))]
44#![warn(missing_docs)]
45#![warn(rust_2018_idioms)]
46
47use std::collections::HashSet;
48use std::sync::atomic::{AtomicUsize, Ordering};
49
50use dev_report::{CheckResult, Evidence, Producer, Report, Severity};
51
52pub mod crash;
53pub mod io;
54pub mod latency;
55
56#[cfg(feature = "async-io")]
57#[cfg_attr(docsrs, doc(cfg(feature = "async-io")))]
58pub mod async_io;
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
69pub enum FailureMode {
70 IoError,
72 PartialWrite,
74 ConnectionReset,
76 Timeout,
78 Corruption,
80 PermissionDenied,
82}
83
84impl FailureMode {
85 pub fn as_str(&self) -> &'static str {
87 match self {
88 FailureMode::IoError => "io_error",
89 FailureMode::PartialWrite => "partial_write",
90 FailureMode::ConnectionReset => "connection_reset",
91 FailureMode::Timeout => "timeout",
92 FailureMode::Corruption => "corruption",
93 FailureMode::PermissionDenied => "permission_denied",
94 }
95 }
96
97 pub fn to_io_kind(&self) -> std::io::ErrorKind {
99 match self {
100 FailureMode::IoError => std::io::ErrorKind::Other,
101 FailureMode::PartialWrite => std::io::ErrorKind::WriteZero,
102 FailureMode::ConnectionReset => std::io::ErrorKind::ConnectionReset,
103 FailureMode::Timeout => std::io::ErrorKind::TimedOut,
104 FailureMode::Corruption => std::io::ErrorKind::InvalidData,
105 FailureMode::PermissionDenied => std::io::ErrorKind::PermissionDenied,
106 }
107 }
108}
109
110#[derive(Debug, Clone)]
120pub struct InjectedFailure {
121 pub mode: FailureMode,
123 pub attempt: usize,
125}
126
127impl std::fmt::Display for InjectedFailure {
128 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
129 write!(
130 f,
131 "injected failure {} at attempt {}",
132 self.mode.as_str(),
133 self.attempt
134 )
135 }
136}
137
138impl std::error::Error for InjectedFailure {}
139
140impl From<InjectedFailure> for std::io::Error {
141 fn from(f: InjectedFailure) -> Self {
142 std::io::Error::new(f.mode.to_io_kind(), f.to_string())
143 }
144}
145
146pub struct FailureSchedule {
161 inner: ScheduleKind,
162 mode: FailureMode,
163 invocations: AtomicUsize,
164}
165
166enum ScheduleKind {
167 Explicit(HashSet<usize>),
168 EveryN(usize),
169 SeededRandom { seed: u64, prob_thousandths: u32 },
170}
171
172impl FailureSchedule {
173 pub fn on_attempts(attempts: &[usize], mode: FailureMode) -> Self {
185 Self {
186 inner: ScheduleKind::Explicit(attempts.iter().copied().collect()),
187 mode,
188 invocations: AtomicUsize::new(0),
189 }
190 }
191
192 pub fn every_n(n: usize, mode: FailureMode) -> Self {
203 let n = n.max(1);
204 Self {
205 inner: ScheduleKind::EveryN(n),
206 mode,
207 invocations: AtomicUsize::new(0),
208 }
209 }
210
211 pub fn seeded_random(seed: u64, probability: f64, mode: FailureMode) -> Self {
235 let p = probability.clamp(0.0, 1.0);
236 let prob_thousandths = (p * 1000.0).round() as u32;
237 Self {
238 inner: ScheduleKind::SeededRandom {
239 seed,
240 prob_thousandths,
241 },
242 mode,
243 invocations: AtomicUsize::new(0),
244 }
245 }
246
247 pub fn maybe_fail(&self, attempt: usize) -> Result<(), InjectedFailure> {
252 self.invocations.fetch_add(1, Ordering::Relaxed);
253 if self.fires(attempt) {
254 Err(InjectedFailure {
255 mode: self.mode,
256 attempt,
257 })
258 } else {
259 Ok(())
260 }
261 }
262
263 fn fires(&self, attempt: usize) -> bool {
264 match &self.inner {
265 ScheduleKind::Explicit(set) => set.contains(&attempt),
266 ScheduleKind::EveryN(n) => attempt % *n == 0,
267 ScheduleKind::SeededRandom {
268 seed,
269 prob_thousandths,
270 } => {
271 let mut x =
273 (*seed).wrapping_add((attempt as u64).wrapping_mul(0x9E37_79B9_7F4A_7C15));
274 x = (x ^ (x >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
275 x = (x ^ (x >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
276 x ^= x >> 31;
277 let bucket = (x % 1000) as u32;
278 bucket < *prob_thousandths
279 }
280 }
281 }
282
283 pub fn invocation_count(&self) -> usize {
285 self.invocations.load(Ordering::Relaxed)
286 }
287
288 pub fn mode(&self) -> FailureMode {
290 self.mode
291 }
292}
293
294pub fn assert_recovered(
315 name: impl Into<String>,
316 expected_failures: usize,
317 actual_failures: usize,
318 final_state_ok: bool,
319) -> CheckResult {
320 let check_name = format!("chaos::{}", name.into());
321 let evidence = vec![
322 Evidence::numeric("expected_failures", expected_failures as f64),
323 Evidence::numeric("actual_failures", actual_failures as f64),
324 Evidence::numeric("final_state_ok", if final_state_ok { 1.0 } else { 0.0 }),
325 ];
326
327 if !final_state_ok {
328 let mut tags = vec![
329 "chaos".to_string(),
330 "recovery".to_string(),
331 "regression".to_string(),
332 ];
333 tags.sort();
334 let mut c = CheckResult::fail(check_name, Severity::Critical).with_detail(format!(
335 "system did not recover. expected {expected_failures} injected failures, observed {actual_failures}, final state failed validation"
336 ));
337 c.tags = tags;
338 c.evidence = evidence;
339 return c;
340 }
341
342 if actual_failures < expected_failures {
343 let mut tags = vec!["chaos".to_string(), "recovery".to_string()];
344 tags.sort();
345 let mut c = CheckResult::warn(check_name, Severity::Warning).with_detail(format!(
346 "fewer failures observed than scheduled (expected {expected_failures}, observed {actual_failures})"
347 ));
348 c.tags = tags;
349 c.evidence = evidence;
350 return c;
351 }
352
353 let mut tags = vec!["chaos".to_string(), "recovery".to_string()];
354 tags.sort();
355 let mut c = CheckResult::pass(check_name).with_detail(format!(
356 "recovered after {actual_failures} injected failure(s)"
357 ));
358 c.tags = tags;
359 c.evidence = evidence;
360 c
361}
362
363pub struct ChaosProducer<F>
384where
385 F: Fn() -> Vec<CheckResult>,
386{
387 run: F,
388 subject: String,
389 subject_version: String,
390}
391
392impl<F> ChaosProducer<F>
393where
394 F: Fn() -> Vec<CheckResult>,
395{
396 pub fn new(run: F, subject: impl Into<String>, subject_version: impl Into<String>) -> Self {
398 Self {
399 run,
400 subject: subject.into(),
401 subject_version: subject_version.into(),
402 }
403 }
404}
405
406impl<F> Producer for ChaosProducer<F>
407where
408 F: Fn() -> Vec<CheckResult>,
409{
410 fn produce(&self) -> Report {
411 let checks = (self.run)();
412 let mut r = Report::new(self.subject.clone(), self.subject_version.clone())
413 .with_producer("dev-chaos");
414 for c in checks {
415 r.push(c);
416 }
417 r.finish();
418 r
419 }
420}
421
422#[cfg(test)]
423mod tests {
424 use super::*;
425 use dev_report::Verdict;
426
427 #[test]
428 fn schedule_fails_on_specified_attempts() {
429 let s = FailureSchedule::on_attempts(&[2, 4], FailureMode::IoError);
430 assert!(s.maybe_fail(1).is_ok());
431 assert!(s.maybe_fail(2).is_err());
432 assert!(s.maybe_fail(3).is_ok());
433 assert!(s.maybe_fail(4).is_err());
434 assert_eq!(s.invocation_count(), 4);
435 }
436
437 #[test]
438 fn every_n_fires_on_multiples() {
439 let s = FailureSchedule::every_n(3, FailureMode::Timeout);
440 assert!(s.maybe_fail(1).is_ok());
441 assert!(s.maybe_fail(2).is_ok());
442 assert!(s.maybe_fail(3).is_err());
443 assert!(s.maybe_fail(6).is_err());
444 assert!(s.maybe_fail(9).is_err());
445 assert!(s.maybe_fail(3_000).is_err());
447 }
448
449 #[test]
450 fn seeded_random_is_deterministic() {
451 let a = FailureSchedule::seeded_random(7, 0.5, FailureMode::IoError);
452 let b = FailureSchedule::seeded_random(7, 0.5, FailureMode::IoError);
453 for attempt in 1..=200 {
454 assert_eq!(
455 a.fires(attempt),
456 b.fires(attempt),
457 "differs at attempt {}",
458 attempt
459 );
460 }
461 }
462
463 #[test]
464 fn seeded_random_zero_probability_never_fires() {
465 let s = FailureSchedule::seeded_random(7, 0.0, FailureMode::IoError);
466 for attempt in 1..=1000 {
467 assert!(s.maybe_fail(attempt).is_ok());
468 }
469 }
470
471 #[test]
472 fn seeded_random_full_probability_always_fires() {
473 let s = FailureSchedule::seeded_random(7, 1.0, FailureMode::IoError);
474 for attempt in 1..=200 {
475 assert!(s.maybe_fail(attempt).is_err());
476 }
477 }
478
479 #[test]
480 fn injected_failure_converts_to_io_error() {
481 let f = InjectedFailure {
482 mode: FailureMode::Timeout,
483 attempt: 5,
484 };
485 let e: std::io::Error = f.into();
486 assert_eq!(e.kind(), std::io::ErrorKind::TimedOut);
487 }
488
489 #[test]
490 fn recovery_check_pass() {
491 let c = assert_recovered("write_log", 2, 2, true);
492 assert_eq!(c.verdict, Verdict::Pass);
493 assert!(c.has_tag("chaos"));
494 assert!(c.has_tag("recovery"));
495 assert!(!c.has_tag("regression"));
496 }
497
498 #[test]
499 fn recovery_check_fail_when_state_invalid() {
500 let c = assert_recovered("write_log", 2, 2, false);
501 assert_eq!(c.verdict, Verdict::Fail);
502 assert_eq!(c.severity, Some(Severity::Critical));
503 assert!(c.has_tag("regression"));
504 }
505
506 #[test]
507 fn recovery_check_warns_on_under_injection() {
508 let c = assert_recovered("write_log", 5, 2, true);
509 assert_eq!(c.verdict, Verdict::Warn);
510 }
511
512 #[test]
513 fn recovery_check_carries_numeric_evidence() {
514 let c = assert_recovered("op", 3, 3, true);
515 let labels: Vec<&str> = c.evidence.iter().map(|e| e.label.as_str()).collect();
516 assert!(labels.contains(&"expected_failures"));
517 assert!(labels.contains(&"actual_failures"));
518 assert!(labels.contains(&"final_state_ok"));
519 }
520
521 #[test]
522 fn chaos_producer_emits_report() {
523 let producer = ChaosProducer::new(
524 || {
525 vec![
526 assert_recovered("a", 1, 1, true),
527 assert_recovered("b", 2, 2, true),
528 ]
529 },
530 "my-crate",
531 "0.1.0",
532 );
533 let report = producer.produce();
534 assert_eq!(report.checks.len(), 2);
535 assert_eq!(report.producer.as_deref(), Some("dev-chaos"));
536 assert_eq!(report.overall_verdict(), Verdict::Pass);
537 }
538}