moonpool_sim/network/
config.rs

1use crate::sim::rng::{sim_random_range, sim_random_range_or_default};
2use std::ops::Range;
3use std::time::Duration;
4
5/// Connection establishment failure mode for fault injection.
6///
7/// Controls how connection attempts fail during chaos testing.
8/// FDB ref: sim2.actor.cpp:1243-1250 (SIM_CONNECT_ERROR_MODE)
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
10pub enum ConnectFailureMode {
11    /// Disabled - no connection failures injected
12    #[default]
13    Disabled,
14    /// Always fail with `ConnectionRefused` when buggified
15    AlwaysFail,
16    /// Probabilistic: 50% fail with `ConnectionRefused`, 50% hang forever
17    Probabilistic,
18}
19
20impl ConnectFailureMode {
21    /// Create a random failure mode for chaos testing
22    pub fn random_for_seed() -> Self {
23        match sim_random_range(0..3) {
24            0 => Self::Disabled,
25            1 => Self::AlwaysFail,
26            _ => Self::Probabilistic,
27        }
28    }
29}
30
31/// Configuration for chaos injection in simulations.
32///
33/// This struct contains all settings related to fault injection and chaos testing,
34/// following FoundationDB's BUGGIFY patterns for deterministic testing.
35#[derive(Debug, Clone)]
36pub struct ChaosConfiguration {
37    /// Clogging probability for individual writes (0.0 - 1.0)
38    pub clog_probability: f64,
39    /// Duration range for clog delays
40    pub clog_duration: Range<Duration>,
41
42    /// Network partition probability (0.0 - 1.0)
43    pub partition_probability: f64,
44    /// Duration range for network partitions
45    pub partition_duration: Range<Duration>,
46
47    /// Bit flip probability for packet corruption (0.0 - 1.0)
48    pub bit_flip_probability: f64,
49    /// Minimum number of bits to flip (power-law distribution lower bound)
50    pub bit_flip_min_bits: u32,
51    /// Maximum number of bits to flip (power-law distribution upper bound)
52    pub bit_flip_max_bits: u32,
53    /// Cooldown duration after bit flip to prevent excessive corruption
54    pub bit_flip_cooldown: Duration,
55
56    /// Maximum bytes for partial write simulation (BUGGIFY truncates writes to 0-max_bytes)
57    /// Following FDB's approach of truncating writes to test TCP backpressure handling
58    pub partial_write_max_bytes: usize,
59
60    /// Random connection close probability per I/O operation (0.0 - 1.0)
61    /// FDB default: 0.00001 (0.001%) - see sim2.actor.cpp:584
62    pub random_close_probability: f64,
63
64    /// Cooldown duration after a random close event (prevents cascading failures)
65    /// FDB uses connectionFailuresDisableDuration - see sim2.actor.cpp:583
66    pub random_close_cooldown: Duration,
67
68    /// Ratio of explicit exceptions vs silent failures (0.0 - 1.0)
69    /// FDB default: 0.3 (30% explicit) - see sim2.actor.cpp:602
70    pub random_close_explicit_ratio: f64,
71
72    /// Packet loss probability (0.0 - 1.0)
73    /// When non-zero, poll_write succeeds but data is probabilistically dropped
74    /// and never delivered, simulating unreliable networks. Higher-level protocols
75    /// must rely on timeouts to detect this.
76    pub packet_loss_probability: f64,
77
78    /// Enable clock drift simulation
79    /// When enabled, timer() can return a time up to clock_drift_max ahead of now()
80    /// FDB ref: sim2.actor.cpp:1058-1064
81    pub clock_drift_enabled: bool,
82
83    /// Maximum clock drift (default 100ms per FDB)
84    /// timer() can be up to this much ahead of now()
85    pub clock_drift_max: Duration,
86
87    /// Enable buggified delays on sleep/timer operations
88    /// When enabled, 25% of sleep operations get extra delay
89    /// FDB ref: sim2.actor.cpp:1100-1105
90    pub buggified_delay_enabled: bool,
91
92    /// Maximum additional delay for buggified sleep (default 100ms)
93    /// Uses power-law distribution: max_delay * pow(random01(), 1000.0)
94    /// FDB ref: sim2.actor.cpp:1104
95    pub buggified_delay_max: Duration,
96
97    /// Probability of adding buggified delay (default 25% per FDB)
98    pub buggified_delay_probability: f64,
99
100    /// Connection establishment failure mode (per FDB)
101    /// FDB ref: sim2.actor.cpp:1243-1250 (SIM_CONNECT_ERROR_MODE)
102    pub connect_failure_mode: ConnectFailureMode,
103
104    /// Probability of connect failure when Probabilistic mode is enabled (default 50%)
105    pub connect_failure_probability: f64,
106}
107
108impl Default for ChaosConfiguration {
109    fn default() -> Self {
110        Self {
111            clog_probability: 0.0,
112            clog_duration: Duration::from_millis(100)..Duration::from_millis(300),
113            partition_probability: 0.0,
114            partition_duration: Duration::from_millis(200)..Duration::from_secs(2),
115            bit_flip_probability: 0.0001, // 0.01% - matches FDB's BUGGIFY_WITH_PROB(0.0001)
116            bit_flip_min_bits: 1,
117            bit_flip_max_bits: 32,
118            bit_flip_cooldown: Duration::ZERO, // No cooldown by default for maximum chaos
119            partial_write_max_bytes: 1000,     // Matches FDB's randomInt(0, 1000)
120            random_close_probability: 0.00001, // 0.001% - matches FDB's sim2.actor.cpp:584
121            random_close_cooldown: Duration::from_secs(5), // Reasonable default
122            random_close_explicit_ratio: 0.3,  // 30% explicit - matches FDB's sim2.actor.cpp:602
123            packet_loss_probability: 0.0,      // Disabled by default
124            clock_drift_enabled: true,         // Enable by default for chaos testing
125            clock_drift_max: Duration::from_millis(100), // FDB default: 0.1 seconds
126            buggified_delay_enabled: true,     // Enable by default for chaos testing
127            buggified_delay_max: Duration::from_millis(100), // FDB: MAX_BUGGIFIED_DELAY
128            buggified_delay_probability: 0.25, // FDB: random01() < 0.25
129            connect_failure_mode: ConnectFailureMode::Probabilistic, // FDB: SIM_CONNECT_ERROR_MODE = 2
130            connect_failure_probability: 0.5,                        // FDB: random01() > 0.5
131        }
132    }
133}
134
135impl ChaosConfiguration {
136    /// Create a configuration with all chaos disabled (for fast local testing)
137    pub fn disabled() -> Self {
138        Self {
139            clog_probability: 0.0,
140            clog_duration: Duration::ZERO..Duration::ZERO,
141            partition_probability: 0.0,
142            partition_duration: Duration::ZERO..Duration::ZERO,
143            bit_flip_probability: 0.0,
144            bit_flip_min_bits: 1,
145            bit_flip_max_bits: 32,
146            bit_flip_cooldown: Duration::ZERO,
147            partial_write_max_bytes: 1000,
148            random_close_probability: 0.0,
149            random_close_cooldown: Duration::ZERO,
150            random_close_explicit_ratio: 0.3,
151            packet_loss_probability: 0.0,
152            clock_drift_enabled: false,
153            clock_drift_max: Duration::from_millis(100),
154            buggified_delay_enabled: false,
155            buggified_delay_max: Duration::from_millis(100),
156            buggified_delay_probability: 0.25,
157            connect_failure_mode: ConnectFailureMode::Disabled,
158            connect_failure_probability: 0.5,
159        }
160    }
161
162    /// Create a randomized chaos configuration for seed-based testing
163    pub fn random_for_seed() -> Self {
164        Self {
165            clog_probability: sim_random_range(0..20) as f64 / 100.0, // 0-20% for clogging
166            clog_duration: Duration::from_micros(sim_random_range(50000..300000))
167                ..Duration::from_micros(sim_random_range(100000..500000)),
168            partition_probability: sim_random_range(0..15) as f64 / 100.0, // 0-15% (lower than faults)
169            partition_duration: Duration::from_millis(sim_random_range(100..1000))
170                ..Duration::from_millis(sim_random_range(500..3000)),
171            // Bit flip probability range: 0.001% to 0.02% (very low, like FDB)
172            bit_flip_probability: sim_random_range(1..20) as f64 / 100000.0,
173            bit_flip_min_bits: 1,
174            bit_flip_max_bits: 32,
175            bit_flip_cooldown: Duration::from_millis(sim_random_range(0..100)),
176            partial_write_max_bytes: sim_random_range(100..2000), // Vary max bytes for different scenarios
177            // Random close probability: 0.0001% to 0.01% (very low, like FDB)
178            random_close_probability: sim_random_range(1..100) as f64 / 1000000.0,
179            random_close_cooldown: Duration::from_millis(sim_random_range(1000..10000)),
180            random_close_explicit_ratio: sim_random_range(20..40) as f64 / 100.0, // 20-40%
181            // Packet loss probability: 0-5% (low but noticeable)
182            packet_loss_probability: sim_random_range(0..50) as f64 / 1000.0,
183            clock_drift_enabled: true,
184            clock_drift_max: Duration::from_millis(sim_random_range(50..150)), // 50-150ms
185            buggified_delay_enabled: true,
186            buggified_delay_max: Duration::from_millis(sim_random_range(50..150)), // 50-150ms
187            buggified_delay_probability: sim_random_range(20..30) as f64 / 100.0,  // 20-30%
188            connect_failure_mode: ConnectFailureMode::random_for_seed(),
189            connect_failure_probability: sim_random_range(40..60) as f64 / 100.0, // 40-60%
190        }
191    }
192}
193
194/// Configuration for network simulation parameters
195#[derive(Debug, Clone)]
196pub struct NetworkConfiguration {
197    /// Latency range for bind operations
198    pub bind_latency: Range<Duration>,
199    /// Latency range for accept operations
200    pub accept_latency: Range<Duration>,
201    /// Latency range for connect operations
202    pub connect_latency: Range<Duration>,
203    /// Latency range for read operations
204    pub read_latency: Range<Duration>,
205    /// Latency range for write operations
206    pub write_latency: Range<Duration>,
207
208    /// Chaos injection configuration
209    pub chaos: ChaosConfiguration,
210}
211
212impl Default for NetworkConfiguration {
213    fn default() -> Self {
214        Self {
215            bind_latency: Duration::from_micros(50)..Duration::from_micros(150),
216            accept_latency: Duration::from_millis(1)..Duration::from_millis(6),
217            connect_latency: Duration::from_millis(1)..Duration::from_millis(11),
218            read_latency: Duration::from_micros(10)..Duration::from_micros(60),
219            write_latency: Duration::from_micros(100)..Duration::from_micros(600),
220            chaos: ChaosConfiguration::default(),
221        }
222    }
223}
224
225/// Sample a random duration from a range
226pub fn sample_duration(range: &Range<Duration>) -> Duration {
227    let start_nanos = range.start.as_nanos() as u64;
228    let end_nanos = range.end.as_nanos() as u64;
229    let random_nanos = sim_random_range_or_default(start_nanos..end_nanos);
230    Duration::from_nanos(random_nanos)
231}
232
233impl NetworkConfiguration {
234    /// Create a new network configuration with default settings
235    pub fn new() -> Self {
236        Self::default()
237    }
238
239    /// Create a randomized network configuration for chaos testing
240    pub fn random_for_seed() -> Self {
241        Self {
242            bind_latency: Duration::from_micros(sim_random_range(10..200))
243                ..Duration::from_micros(sim_random_range(50..300)),
244            accept_latency: Duration::from_micros(sim_random_range(1000..10000))
245                ..Duration::from_micros(sim_random_range(5000..15000)),
246            connect_latency: Duration::from_micros(sim_random_range(1000..50000))
247                ..Duration::from_micros(sim_random_range(10000..100000)),
248            read_latency: Duration::from_micros(sim_random_range(5..100))
249                ..Duration::from_micros(sim_random_range(50..200)),
250            write_latency: Duration::from_micros(sim_random_range(50..1000))
251                ..Duration::from_micros(sim_random_range(200..2000)),
252            chaos: ChaosConfiguration::random_for_seed(),
253        }
254    }
255
256    /// Create a configuration optimized for fast local testing
257    pub fn fast_local() -> Self {
258        let one_us = Duration::from_micros(1);
259        let ten_us = Duration::from_micros(10);
260        Self {
261            bind_latency: one_us..one_us,
262            accept_latency: ten_us..ten_us,
263            connect_latency: ten_us..ten_us,
264            read_latency: one_us..one_us,
265            write_latency: one_us..one_us,
266            chaos: ChaosConfiguration::disabled(),
267        }
268    }
269}