moonpool_sim/lib.rs
1//! # Moonpool Simulation Framework
2//!
3//! Deterministic simulation for testing distributed systems, inspired by
4//! [FoundationDB's simulation testing](https://apple.github.io/foundationdb/testing.html).
5//!
6//! ## Why Deterministic Simulation?
7//!
8//! FoundationDB's insight: **bugs hide in error paths**. Production code rarely
9//! exercises timeout handlers, retry logic, or failure recovery. Deterministic
10//! simulation with fault injection finds these bugs before production does.
11//!
12//! Key properties:
13//! - **Reproducible**: Same seed produces identical execution
14//! - **Comprehensive**: Tests all failure modes (network, timing, corruption)
15//! - **Fast**: Logical time skips idle periods
16//!
17//! ## Core Components
18//!
19//! - [`SimWorld`]: The simulation runtime managing events and time
20//! - [`SimulationBuilder`]: Configure and run simulations
21//! - [`chaos`]: Fault injection (buggify, 14 assertion macros, invariants)
22//! - [`storage`]: Storage simulation with fault injection
23//! - Multiverse exploration via `moonpool-explorer` (re-exported as [`ExplorationConfig`], [`AdaptiveConfig`])
24//!
25//! ## Quick Start
26//!
27//! ```ignore
28//! use moonpool_sim::{SimulationBuilder, WorkloadTopology};
29//!
30//! SimulationBuilder::new()
31//! .topology(WorkloadTopology::ClientServer { clients: 2, servers: 1 })
32//! .run(|ctx| async move {
33//! // Your distributed system workload
34//! });
35//! ```
36//!
37//! ## Fault Injection Overview
38//!
39//! See [`chaos`] module for detailed documentation.
40//!
41//! | Mechanism | Default | What it tests |
42//! |-----------|---------|---------------|
43//! | TCP latencies | 1-11ms connect | Async scheduling |
44//! | Random connection close | 0.001% | Reconnection, redelivery |
45//! | Bit flip corruption | 0.01% | Checksum validation |
46//! | Connect failure | 50% probabilistic | Timeout handling, retries |
47//! | Clock drift | 100ms max | Leases, heartbeats |
48//! | Buggified delays | 25% | Race conditions |
49//! | Partial writes | 1000 bytes max | Message fragmentation |
50//! | Packet loss | disabled | At-least-once delivery |
51//! | Network partitions | disabled | Split-brain handling |
52//! | Storage corruption | configurable | Checksum validation, recovery |
53//! | Torn writes | configurable | Write atomicity, journaling |
54//! | Sync failures | configurable | Durability guarantees |
55//!
56//! ## Multi-Seed Testing
57//!
58//! Tests run across multiple seeds to explore the state space:
59//!
60//! ```ignore
61//! SimulationBuilder::new()
62//! .run_count(IterationControl::UntilAllSometimesReached(1000))
63//! .run(workload);
64//! ```
65//!
66//! Debugging a failing seed:
67//!
68//! ```ignore
69//! SimulationBuilder::new()
70//! .set_seed(failing_seed)
71//! .run_count(IterationControl::FixedCount(1))
72//! .run(workload);
73//! ```
74//!
75//! ## Coverage-Preserving Multi-Seed Exploration
76//!
77//! When exploration is enabled, multiple seeds share coverage context. The
78//! explored map (coverage bitmap union) and assertion watermarks are preserved
79//! between seeds so each subsequent seed focuses energy on genuinely new
80//! branches rather than re-treading already-discovered paths.
81//!
82//! A **warm start** mechanism reduces wasted forks: on seeds after the first,
83//! marks whose first probe batch finds zero new coverage bits stop after
84//! `warm_min_timelines` forks instead of the full `min_timelines`.
85//!
86//! ```text
87//! Seed 1 (cold start) Seed 2 (warm start) Seed 3 (warm start)
88//! energy: 400K energy: 400K energy: 400K
89//!
90//! root ─┬─ mark A ──> 400 root ─┬─ mark A ──> 30 root ─┬─ mark A ──> 30
91//! │ (new bits!) forks │ (barren!) skip │ (barren!) skip
92//! │ │ │
93//! ├─ mark B ──> 400 ├─ mark B ──> 30 ├─ mark B ──> 30
94//! │ (new bits!) forks │ (barren!) skip │ (barren!) skip
95//! │ │ │
96//! └─ mark C ──> 400 ├─ mark C ──> 30 ├─ mark C ──> 30
97//! (new bits!) forks │ (barren!) skip │ (barren!) skip
98//! │ │
99//! └─ mark D ──> 400 └─ mark E ──> 400
100//! (NEW bits!) forks (NEW bits!) forks
101//! │ │ │
102//! ┌────────────────┘ ┌────────────────┘ ┌────────────────┘
103//! v v v
104//! ┌──────────┐ ──preserved──> ┌──────────┐ ──preserved──> ┌──────────┐
105//! │ explored │ coverage map │ explored │ coverage map │ explored │
106//! │ map: │ + watermarks │ map: │ + watermarks │ map: │
107//! │ A,B,C │ │ A,B,C,D │ │ A,B,C,D,E│
108//! └──────────┘ └──────────┘ └──────────┘
109//!
110//! Total: each seed spends most energy on NEW discoveries.
111//! Warm marks (A,B,C on seed 2) exit after warm_min_timelines (30)
112//! instead of min_timelines (400), saving ~95% energy per barren mark.
113//! ```
114//!
115//! ```ignore
116//! SimulationBuilder::new()
117//! .set_iterations(3) // 3 root seeds with coverage forwarding
118//! .enable_exploration(ExplorationConfig {
119//! max_depth: 120,
120//! timelines_per_split: 4,
121//! global_energy: 400_000, // per-seed energy budget
122//! adaptive: Some(AdaptiveConfig {
123//! batch_size: 30,
124//! min_timelines: 400,
125//! max_timelines: 1_000,
126//! per_mark_energy: 10_000,
127//! warm_min_timelines: Some(30), // quick skip for barren warm marks
128//! }),
129//! parallelism: Some(Parallelism::HalfCores),
130//! })
131//! .workload(my_workload);
132//! ```
133
134#![deny(missing_docs)]
135#![deny(clippy::unwrap_used)]
136
137// Re-export core types for convenience
138pub use moonpool_core::{
139 CodecError, Endpoint, JsonCodec, MessageCodec, NetworkAddress, NetworkAddressParseError,
140 NetworkProvider, Providers, RandomProvider, SimulationError, SimulationResult, TaskProvider,
141 TcpListenerTrait, TimeError, TimeProvider, TokioNetworkProvider, TokioProviders,
142 TokioTaskProvider, TokioTimeProvider, UID, WELL_KNOWN_RESERVED_COUNT, WellKnownToken,
143};
144
145// =============================================================================
146// Core Modules
147// =============================================================================
148
149/// Core simulation engine for deterministic testing.
150pub mod sim;
151
152/// Simulation runner and orchestration framework.
153pub mod runner;
154
155/// Chaos testing infrastructure for deterministic fault injection.
156pub mod chaos;
157
158/// Provider implementations for simulation.
159pub mod providers;
160
161/// Network simulation and configuration.
162pub mod network;
163
164/// Storage simulation and configuration.
165pub mod storage;
166
167// =============================================================================
168// Public API Re-exports
169// =============================================================================
170
171// Sim module re-exports
172pub use sim::{
173 ConnectionStateChange, Event, EventQueue, NetworkOperation, ScheduledEvent, SimWorld,
174 SleepFuture, StorageOperation, WeakSimWorld, clear_rng_breakpoints, get_current_sim_seed,
175 get_rng_call_count, reset_rng_call_count, reset_sim_rng, set_rng_breakpoints, set_sim_seed,
176 sim_random, sim_random_range, sim_random_range_or_default,
177};
178
179// Runner module re-exports
180pub use runner::{
181 FaultContext, FaultInjector, IterationControl, PhaseConfig, SimContext, SimulationBuilder,
182 SimulationMetrics, SimulationReport, TokioReport, TokioRunner, Workload, WorkloadCount,
183 WorkloadTopology,
184};
185
186// Chaos module re-exports
187pub use chaos::{
188 AssertionStats, Invariant, StateHandle, buggify_init, buggify_reset, get_assertion_results,
189 has_always_violations, invariant_fn, panic_on_assertion_violations, reset_always_violations,
190 reset_assertion_results, validate_assertion_contracts,
191};
192
193// Network exports
194pub use network::{
195 ChaosConfiguration, ConnectFailureMode, NetworkConfiguration, SimNetworkProvider,
196 sample_duration,
197};
198
199// Storage exports
200pub use storage::{
201 InMemoryStorage, SECTOR_SIZE, SectorBitSet, SimStorageProvider, StorageConfiguration,
202};
203
204// Provider exports
205pub use providers::{SimProviders, SimRandomProvider, SimTimeProvider};
206
207// Explorer re-exports
208pub use moonpool_explorer::{
209 AdaptiveConfig, AssertCmp, AssertKind, ExplorationConfig, Parallelism, format_timeline,
210 parse_timeline,
211};
212pub use runner::report::{BugRecipe, ExplorationReport};
213
214// Macros are automatically available at crate root when defined with #[macro_export]