Skip to main content

moonpool_sim/
lib.rs

1//! # Moonpool Simulation Framework
2//!
3//! Deterministic simulation for testing distributed systems, inspired by
4//! [FoundationDB's simulation testing](https://apple.github.io/foundationdb/testing.html).
5//!
6//! ## Why Deterministic Simulation?
7//!
8//! FoundationDB's insight: **bugs hide in error paths**. Production code rarely
9//! exercises timeout handlers, retry logic, or failure recovery. Deterministic
10//! simulation with fault injection finds these bugs before production does.
11//!
12//! Key properties:
13//! - **Reproducible**: Same seed produces identical execution
14//! - **Comprehensive**: Tests all failure modes (network, timing, corruption)
15//! - **Fast**: Logical time skips idle periods
16//!
17//! ## Core Components
18//!
19//! - [`SimWorld`]: The simulation runtime managing events and time
20//! - [`SimulationBuilder`]: Configure and run simulations
21//! - [`chaos`]: Fault injection (buggify, 14 assertion macros, invariants)
22//! - [`storage`]: Storage simulation with fault injection
23//! - Multiverse exploration via `moonpool-explorer` (re-exported as [`ExplorationConfig`], [`AdaptiveConfig`])
24//!
25//! ## Quick Start
26//!
27//! ```ignore
28//! use moonpool_sim::{SimulationBuilder, WorkloadTopology};
29//!
30//! SimulationBuilder::new()
31//!     .topology(WorkloadTopology::ClientServer { clients: 2, servers: 1 })
32//!     .run(|ctx| async move {
33//!         // Your distributed system workload
34//!     });
35//! ```
36//!
37//! ## Fault Injection Overview
38//!
39//! See [`chaos`] module for detailed documentation.
40//!
41//! | Mechanism | Default | What it tests |
42//! |-----------|---------|---------------|
43//! | TCP latencies | 1-11ms connect | Async scheduling |
44//! | Random connection close | 0.001% | Reconnection, redelivery |
45//! | Bit flip corruption | 0.01% | Checksum validation |
46//! | Connect failure | 50% probabilistic | Timeout handling, retries |
47//! | Clock drift | 100ms max | Leases, heartbeats |
48//! | Buggified delays | 25% | Race conditions |
49//! | Partial writes | 1000 bytes max | Message fragmentation |
50//! | Packet loss | disabled | At-least-once delivery |
51//! | Network partitions | disabled | Split-brain handling |
52//! | Storage corruption | configurable | Checksum validation, recovery |
53//! | Torn writes | configurable | Write atomicity, journaling |
54//! | Sync failures | configurable | Durability guarantees |
55//!
56//! ## Multi-Seed Testing
57//!
58//! Tests run across multiple seeds to explore the state space:
59//!
60//! ```ignore
61//! SimulationBuilder::new()
62//!     .run_count(IterationControl::UntilAllSometimesReached(1000))
63//!     .run(workload);
64//! ```
65//!
66//! Debugging a failing seed:
67//!
68//! ```ignore
69//! SimulationBuilder::new()
70//!     .set_seed(failing_seed)
71//!     .run_count(IterationControl::FixedCount(1))
72//!     .run(workload);
73//! ```
74//!
75//! ## Coverage-Preserving Multi-Seed Exploration
76//!
77//! When exploration is enabled, multiple seeds share coverage context. The
78//! explored map (coverage bitmap union) and assertion watermarks are preserved
79//! between seeds so each subsequent seed focuses energy on genuinely new
80//! branches rather than re-treading already-discovered paths.
81//!
82//! A **warm start** mechanism reduces wasted forks: on seeds after the first,
83//! marks whose first probe batch finds zero new coverage bits stop after
84//! `warm_min_timelines` forks instead of the full `min_timelines`.
85//!
86//! ```text
87//!  Seed 1 (cold start)           Seed 2 (warm start)          Seed 3 (warm start)
88//!  energy: 400K                  energy: 400K                 energy: 400K
89//!
90//!  root ─┬─ mark A ──> 400       root ─┬─ mark A ──> 30      root ─┬─ mark A ──> 30
91//!        │  (new bits!) forks           │  (barren!) skip           │  (barren!) skip
92//!        │                              │                           │
93//!        ├─ mark B ──> 400              ├─ mark B ──> 30            ├─ mark B ──> 30
94//!        │  (new bits!) forks           │  (barren!) skip           │  (barren!) skip
95//!        │                              │                           │
96//!        └─ mark C ──> 400              ├─ mark C ──> 30            ├─ mark C ──> 30
97//!           (new bits!) forks           │  (barren!) skip           │  (barren!) skip
98//!                                       │                           │
99//!                                       └─ mark D ──> 400          └─ mark E ──> 400
100//!                                          (NEW bits!) forks           (NEW bits!) forks
101//!                        │                               │                            │
102//!       ┌────────────────┘              ┌────────────────┘           ┌────────────────┘
103//!       v                               v                            v
104//!  ┌──────────┐  ──preserved──>  ┌──────────┐  ──preserved──>  ┌──────────┐
105//!  │ explored │  coverage map    │ explored │  coverage map    │ explored │
106//!  │ map:     │  + watermarks    │ map:     │  + watermarks    │ map:     │
107//!  │ A,B,C    │                  │ A,B,C,D  │                  │ A,B,C,D,E│
108//!  └──────────┘                  └──────────┘                  └──────────┘
109//!
110//!  Total: each seed spends most energy on NEW discoveries.
111//!  Warm marks (A,B,C on seed 2) exit after warm_min_timelines (30)
112//!  instead of min_timelines (400), saving ~95% energy per barren mark.
113//! ```
114//!
115//! ```ignore
116//! SimulationBuilder::new()
117//!     .set_iterations(3)  // 3 root seeds with coverage forwarding
118//!     .enable_exploration(ExplorationConfig {
119//!         max_depth: 120,
120//!         timelines_per_split: 4,
121//!         global_energy: 400_000,  // per-seed energy budget
122//!         adaptive: Some(AdaptiveConfig {
123//!             batch_size: 30,
124//!             min_timelines: 400,
125//!             max_timelines: 1_000,
126//!             per_mark_energy: 10_000,
127//!             warm_min_timelines: Some(30),  // quick skip for barren warm marks
128//!         }),
129//!         parallelism: Some(Parallelism::HalfCores),
130//!     })
131//!     .workload(my_workload);
132//! ```
133
134#![deny(missing_docs)]
135#![deny(clippy::unwrap_used)]
136
137// Re-export core types for convenience
138pub use moonpool_core::{
139    CodecError, Endpoint, JsonCodec, MessageCodec, NetworkAddress, NetworkAddressParseError,
140    NetworkProvider, Providers, RandomProvider, SimulationError, SimulationResult, TaskProvider,
141    TcpListenerTrait, TimeError, TimeProvider, TokioNetworkProvider, TokioProviders,
142    TokioTaskProvider, TokioTimeProvider, UID, WELL_KNOWN_RESERVED_COUNT, WellKnownToken,
143};
144
145// =============================================================================
146// Core Modules
147// =============================================================================
148
149/// Core simulation engine for deterministic testing.
150pub mod sim;
151
152/// Simulation runner and orchestration framework.
153pub mod runner;
154
155/// Chaos testing infrastructure for deterministic fault injection.
156pub mod chaos;
157
158/// Provider implementations for simulation.
159pub mod providers;
160
161/// Network simulation and configuration.
162pub mod network;
163
164/// Storage simulation and configuration.
165pub mod storage;
166
167// =============================================================================
168// Public API Re-exports
169// =============================================================================
170
171// Sim module re-exports
172pub use sim::{
173    ConnectionStateChange, Event, EventQueue, NetworkOperation, ScheduledEvent, SimWorld,
174    SleepFuture, StorageOperation, WeakSimWorld, clear_rng_breakpoints, get_current_sim_seed,
175    get_rng_call_count, reset_rng_call_count, reset_sim_rng, set_rng_breakpoints, set_sim_seed,
176    sim_random, sim_random_range, sim_random_range_or_default,
177};
178
179// Runner module re-exports
180pub use runner::{
181    FaultContext, FaultInjector, IterationControl, PhaseConfig, SimContext, SimulationBuilder,
182    SimulationMetrics, SimulationReport, TokioReport, TokioRunner, Workload, WorkloadCount,
183    WorkloadTopology,
184};
185
186// Chaos module re-exports
187pub use chaos::{
188    AssertionStats, Invariant, StateHandle, buggify_init, buggify_reset, get_assertion_results,
189    has_always_violations, invariant_fn, panic_on_assertion_violations, reset_always_violations,
190    reset_assertion_results, validate_assertion_contracts,
191};
192
193// Network exports
194pub use network::{
195    ChaosConfiguration, ConnectFailureMode, NetworkConfiguration, SimNetworkProvider,
196    sample_duration,
197};
198
199// Storage exports
200pub use storage::{
201    InMemoryStorage, SECTOR_SIZE, SectorBitSet, SimStorageProvider, StorageConfiguration,
202};
203
204// Provider exports
205pub use providers::{SimProviders, SimRandomProvider, SimTimeProvider};
206
207// Explorer re-exports
208pub use moonpool_explorer::{
209    AdaptiveConfig, AssertCmp, AssertKind, ExplorationConfig, Parallelism, format_timeline,
210    parse_timeline,
211};
212pub use runner::report::{BugRecipe, ExplorationReport};
213
214// Macros are automatically available at crate root when defined with #[macro_export]