ant_node/replication/config.rs
1//! Tunable parameters for the replication subsystem.
2//!
3//! All values below are a reference profile used for logic validation.
4//! Parameter safety constraints (Section 4):
5//! 1. `1 <= QUORUM_THRESHOLD <= CLOSE_GROUP_SIZE`
6//! 2. Effective paid-list threshold is per-key dynamic:
7//! `ConfirmNeeded(K) = floor(PaidGroupSize(K)/2)+1`
8//! 3. If constraints are violated at runtime reconfiguration, node MUST reject
9//! the config.
10
11#![allow(clippy::module_name_repetitions)]
12
13use std::time::Duration;
14
15use rand::Rng;
16
17use crate::ant_protocol::{CLOSE_GROUP_SIZE, MAX_CHUNK_SIZE};
18
19// ---------------------------------------------------------------------------
20// Static constants (compile-time reference profile)
21// ---------------------------------------------------------------------------
22
23/// Maximum number of peers per k-bucket in the Kademlia routing table.
24pub const K_BUCKET_SIZE: usize = 20;
25
26/// Extra local-routing-table positions accepted for local chunk storage
27/// admission and stored-record pruning.
28///
29/// This margin absorbs small local RT disagreement between peers. It does not
30/// widen audit, quorum, or paid-list target sets; those remain strict
31/// `close_group_size` / paid-list group checks.
32pub const STORAGE_ADMISSION_MARGIN: usize = 2;
33
34/// Full-network target for required positive presence votes.
35///
36/// Effective per-key threshold is
37/// `QuorumNeeded(K) = min(QUORUM_THRESHOLD, floor(|QuorumTargets|/2)+1)`.
38pub const QUORUM_THRESHOLD: usize = 4; // floor(CLOSE_GROUP_SIZE / 2) + 1
39
40/// Maximum number of closest nodes tracking paid status for a key.
41pub const PAID_LIST_CLOSE_GROUP_SIZE: usize = 20;
42
43/// Number of closest peers to self eligible for neighbor sync.
44pub const NEIGHBOR_SYNC_SCOPE: usize = 20;
45
46/// Number of close-neighbor peers synced concurrently per round-robin repair
47/// round.
48pub const NEIGHBOR_SYNC_PEER_COUNT: usize = 4;
49
50/// Best-effort delivery retries for a fresh-replication push, per peer.
51///
52/// ADR-0003: on a transport/send failure the offer is retried up to this many
53/// times so a transient hiccup does not silently drop it. This is delivery
54/// assurance only — possession is judged separately by the delayed possession
55/// check, which still penalises a close peer that lacks the chunk even if the
56/// push never reached it.
57pub const FRESH_REPLICATION_DELIVERY_MAX_RETRIES: u32 = 2;
58
59const POSSESSION_CHECK_DELAY_MIN_SECS: u64 = 5 * 60;
60const POSSESSION_CHECK_DELAY_MAX_SECS: u64 = 15 * 60;
61
62/// Lower bound of the delay before a fresh-replication possession check runs
63/// (ADR-0003).
64///
65/// The delay lets replication settle so an honest peer still mid-store is not
66/// judged prematurely, and makes the check unpredictable to the peer.
67pub const POSSESSION_CHECK_DELAY_MIN: Duration =
68 Duration::from_secs(POSSESSION_CHECK_DELAY_MIN_SECS);
69
70/// Upper bound of the possession-check delay (ADR-0003).
71pub const POSSESSION_CHECK_DELAY_MAX: Duration =
72 Duration::from_secs(POSSESSION_CHECK_DELAY_MAX_SECS);
73
74// The possession probe reuses the `AuditChallenge` wire and the bandwidth-
75// calibrated `audit_response_timeout(1)` deadline, so it needs no bespoke
76// per-probe timeout or retry constants.
77
78/// Width used when deciding whether this node may locally store or retain a
79/// chunk.
80#[must_use]
81pub const fn storage_admission_width(close_group_size: usize) -> usize {
82 close_group_size.saturating_add(STORAGE_ADMISSION_MARGIN)
83}
84
85/// Minimum neighbor-sync cadence. Actual interval is randomized within
86/// `[min, max]`.
87const NEIGHBOR_SYNC_INTERVAL_MIN_SECS: u64 = 10 * 60;
88/// Maximum neighbor-sync cadence.
89const NEIGHBOR_SYNC_INTERVAL_MAX_SECS: u64 = 20 * 60;
90
91/// Neighbor sync cadence range (min).
92pub const NEIGHBOR_SYNC_INTERVAL_MIN: Duration =
93 Duration::from_secs(NEIGHBOR_SYNC_INTERVAL_MIN_SECS);
94
95/// Neighbor sync cadence range (max).
96pub const NEIGHBOR_SYNC_INTERVAL_MAX: Duration =
97 Duration::from_secs(NEIGHBOR_SYNC_INTERVAL_MAX_SECS);
98
99/// Per-peer minimum spacing between successive syncs with the same peer.
100const NEIGHBOR_SYNC_COOLDOWN_SECS: u64 = 60 * 60; // 1 hour
101/// Per-peer minimum spacing between successive syncs with the same peer.
102pub const NEIGHBOR_SYNC_COOLDOWN: Duration = Duration::from_secs(NEIGHBOR_SYNC_COOLDOWN_SECS);
103
104/// Minimum age for a replica repair hint before the hinted peer can be audited
105/// for that key.
106const REPAIR_HINT_MIN_AGE_SECS: u64 = 60 * 60; // 1 hour
107/// Minimum age for a replica repair hint before the hinted peer can be audited
108/// for that key.
109pub const REPAIR_HINT_MIN_AGE: Duration = Duration::from_secs(REPAIR_HINT_MIN_AGE_SECS);
110
111/// Minimum self-lookup cadence.
112const SELF_LOOKUP_INTERVAL_MIN_SECS: u64 = 5 * 60;
113/// Maximum self-lookup cadence.
114const SELF_LOOKUP_INTERVAL_MAX_SECS: u64 = 10 * 60;
115
116/// Periodic self-lookup cadence range (min) to keep close neighborhood
117/// current.
118pub const SELF_LOOKUP_INTERVAL_MIN: Duration = Duration::from_secs(SELF_LOOKUP_INTERVAL_MIN_SECS);
119
120/// Periodic self-lookup cadence range (max).
121pub const SELF_LOOKUP_INTERVAL_MAX: Duration = Duration::from_secs(SELF_LOOKUP_INTERVAL_MAX_SECS);
122
123/// Maximum number of concurrent outbound replication sends.
124///
125/// Caps how many fresh-replication chunk transfers can be in-flight at once
126/// across the entire replication engine. Prevents bandwidth saturation on
127/// home broadband connections when multiple chunks arrive simultaneously.
128/// Each send transfers up to 4 MB (`MAX_CHUNK_SIZE`), so a limit of 3 means
129/// at most ~12 MB queued for the upload link at any instant.
130pub const MAX_CONCURRENT_REPLICATION_SENDS: usize = 3;
131
132/// Maximum number of concurrent in-flight audit-responder tasks.
133///
134/// The responsible-chunk (audit #2), subtree (round 1), and byte (round 2)
135/// challenge handlers are all spawned off the serial replication message loop so
136/// their disk reads don't stall replication. This caps how many run at once
137/// across the engine, restoring backpressure: a peer flooding audit challenges
138/// cannot fan out unbounded `get_raw` reads or multi-MiB byte serves. When the
139/// cap is hit, the challenge is dropped and the caller's audit-specific timeout
140/// policy applies. The cap must therefore stay high enough for honest audit
141/// traffic while still throttling flooders.
142/// Sized to cover a handful of concurrent honest auditors (the per-peer
143/// gossip-audit cooldown is 30 min, so genuine concurrent audits are few) while
144/// bounding the byte round's worst-case resident bytes
145/// (`N × MAX_BYTE_CHALLENGE_KEYS × MAX_CHUNK_SIZE`).
146pub const MAX_CONCURRENT_AUDIT_RESPONSES: usize = 16;
147
148/// Maximum concurrent in-flight audit-responder tasks from any SINGLE peer.
149///
150/// The global [`MAX_CONCURRENT_AUDIT_RESPONSES`] ceiling alone is not
151/// flood-fair: one peer spamming challenges could occupy every slot and starve
152/// honest auditors (whose dropped challenges convert to audit failures or
153/// timeout verdicts on the challenged peers). This per-peer cap guarantees no
154/// source holds more than its share, so a flood self-throttles. Audits are
155/// cooldown-gated (one
156/// gossip-triggered audit per peer per 30 min), so 2 in-flight per peer
157/// comfortably covers the legitimate round-1 + round-2 overlap.
158pub const MAX_AUDIT_RESPONSES_PER_PEER: u32 = 2;
159
160/// Concurrent fetches cap, derived from hardware thread count.
161///
162/// Uses `std::thread::available_parallelism()` so the node scales to the
163/// machine it runs on. Falls back to 4 if the OS query fails.
164const AVAILABLE_PARALLELISM_FALLBACK: usize = 4;
165
166/// Returns the number of hardware threads available, used as the fetch
167/// concurrency limit.
168#[allow(clippy::incompatible_msrv)] // NonZero::get is stable since 1.79; MSRV lint conflicts with redundant_closure
169pub fn max_parallel_fetch() -> usize {
170 std::thread::available_parallelism()
171 .map_or(AVAILABLE_PARALLELISM_FALLBACK, std::num::NonZero::get)
172}
173
174/// Minimum audit-scheduler cadence.
175const AUDIT_TICK_INTERVAL_MIN_SECS: u64 = 10 * 60;
176/// Maximum audit-scheduler cadence.
177const AUDIT_TICK_INTERVAL_MAX_SECS: u64 = 20 * 60;
178
179/// Audit scheduler cadence range (min).
180pub const AUDIT_TICK_INTERVAL_MIN: Duration = Duration::from_secs(AUDIT_TICK_INTERVAL_MIN_SECS);
181
182/// Audit scheduler cadence range (max).
183pub const AUDIT_TICK_INTERVAL_MAX: Duration = Duration::from_secs(AUDIT_TICK_INTERVAL_MAX_SECS);
184
185/// Floor on the audit response deadline (independent of challenge size).
186///
187/// Sized to absorb worst-case global RTT for the audit envelope
188/// (the request + response messages are KB-scale, not chunk-scale)
189/// plus scheduling jitter. Tokyo↔NY round-trip is ~150ms each way,
190/// so 2 seconds comfortably covers cross-continent communication
191/// for the round-1 proof, whose payload is hashes (KB-scale).
192const AUDIT_RESPONSE_FLOOR_SECS: u64 = 2;
193
194/// Floor on the round-2 BYTE-challenge deadline.
195///
196/// Unlike round 1 (KB of hashes), the byte challenge ships up to
197/// `MAX_BYTE_CHALLENGE_KEYS` full chunks (2 × 4 MiB = 8 MiB) back over the
198/// wire, so the envelope must also cover a cold QUIC handshake, the
199/// multi-MiB upload back to the auditor, and a busy honest peer's disk read.
200/// The round-1 2 s floor (sized for a hashes-only reply) is too tight here —
201/// the §4 finding. 5 s matches the cross-continent-RTT + handshake + 8 MiB
202/// transfer budget while keeping a relay that must fetch the bytes over a
203/// residential link outside it (the scaled term adds the per-byte estimate on
204/// top). Mirrors main's more generous byte-round base.
205const BYTE_AUDIT_RESPONSE_FLOOR_SECS: u64 = 5;
206
207/// Conservative honest-responder read throughput, in bytes per second.
208///
209/// Used to size the audit response deadline. An honest peer answers
210/// a k-key challenge by reading k chunks from local disk, computing
211/// BLAKE3 + path proofs, and signing the response. The bottleneck is
212/// disk read; BLAKE3 at ~3 GB/s + ML-DSA signing at ~3 ms are
213/// negligible.
214///
215/// Set conservatively below any modern SSD (typical: 500 MB/s+).
216/// At 50 MB/s, a k=10 sample at 4 MiB chunks reads in ~0.8s, well
217/// inside even an aggressive timeout. A relay attacker who must
218/// fetch the same 40 MB over the network at typical bandwidth
219/// (100 Mbps = 12.5 MB/s) takes 3+ seconds for the data alone, plus
220/// per-chunk network round-trips. At larger sample sizes the gap
221/// is exponential in the relay's disadvantage.
222const AUDIT_HONEST_READ_BPS: u64 = 50 * 1024 * 1024;
223
224/// Slack multiplier on the honest-read estimate.
225///
226/// Set so an honest peer that's slower than `HONEST_READ_BPS` (e.g. an
227/// HDD-backed node, or one under load) still answers within the
228/// timeout. 5× is generous; a relay peer fetching the same data over a
229/// residential link (~5-12 MB/s) sees ~10-100× higher latency than disk
230/// and misses the budget. This is an economic deterrent calibrated for
231/// residential bandwidth, NOT a hard cryptographic bound — a relay on a
232/// datacenter cross-connect could still fetch fast enough to answer in
233/// time (see the §7 note on `audit_response_timeout`).
234const AUDIT_RESPONSE_HONEST_MULTIPLIER: u64 = 5;
235
236/// Single-key prune audit response deadline.
237///
238/// Prune audits ask a peer whether they still hold one specific key
239/// they previously claimed. The relay-defence rationale that motivates
240/// the tight commitment-bound timeout does NOT apply here: the
241/// auditor's own out-of-range hysteresis (`PRUNE_HYSTERESIS_DURATION`,
242/// 3 days) already makes "fetch on demand" infeasible as a sustained
243/// strategy.
244///
245/// Sized to comfortably accommodate cold cross-continent QUIC
246/// handshake plus scheduling jitter on a busy honest peer answering
247/// a single-key challenge: 10 s.
248const PRUNE_AUDIT_RESPONSE_SECS: u64 = 10;
249
250/// Maximum duration a peer may claim bootstrap status before penalties apply.
251const BOOTSTRAP_CLAIM_GRACE_PERIOD_SECS: u64 = 24 * 60 * 60; // 24 h
252/// Maximum duration a peer may claim bootstrap status before penalties apply.
253pub const BOOTSTRAP_CLAIM_GRACE_PERIOD: Duration =
254 Duration::from_secs(BOOTSTRAP_CLAIM_GRACE_PERIOD_SECS);
255
256/// Minimum continuous out-of-range duration before pruning a key.
257const PRUNE_HYSTERESIS_DURATION_SECS: u64 = 3 * 24 * 60 * 60; // 3 days
258/// Minimum continuous out-of-range duration before pruning a key.
259pub const PRUNE_HYSTERESIS_DURATION: Duration = Duration::from_secs(PRUNE_HYSTERESIS_DURATION_SECS);
260
261/// Protocol identifier for replication operations.
262///
263/// Bumped to `v2` for the v12 storage-bound audit. That change extends the
264/// wire types (`NeighborSyncRequest`/`Response` carry an optional trailing
265/// `StorageCommitment`, and the gossip-triggered storage-commitment audit adds
266/// the `SubtreeAuditChallenge`/`SubtreeAuditResponse` and `SubtreeByteChallenge`/
267/// `SubtreeByteResponse` messages). The bump is for SEMANTIC interop, not
268/// decode failure: postcard tolerates the appended optional field (an old
269/// decoder reads the fields it knows and ignores the trailer — pinned by the
270/// `old_decoder_tolerates_new_neighbor_sync_*` tests in `protocol.rs`), but
271/// tolerating bytes is not interoperating. A v1 node cannot decode the NEW
272/// message variants at all (unknown enum discriminant) and never acts on a
273/// piggybacked commitment, so mixed-version replication would half-function —
274/// audit challenges unanswered, commitments silently dropped — and a v2 node
275/// could read that silence as misbehaviour. Rather than reason about each
276/// such case, we route v12 replication on a distinct protocol id: a node only
277/// delivers messages whose topic matches its own id (see the topic check in
278/// `mod.rs`), so v1 and v2 nodes simply do not exchange replication traffic
279/// during a mixed-version window. This is the rollout-safe behaviour: no
280/// half-interpreted exchange, no spurious eviction. Replication between
281/// matched-version peers is unaffected. (DHT routing/lookups are a separate
282/// protocol and continue to span both versions.)
283pub const REPLICATION_PROTOCOL_ID: &str = "autonomi.ant.replication.v2";
284
285/// 10 MiB — maximum replication wire message size (accommodates hint batches).
286const REPLICATION_MESSAGE_SIZE_MIB: usize = 10;
287/// Maximum replication wire message size.
288pub const MAX_REPLICATION_MESSAGE_SIZE: usize = REPLICATION_MESSAGE_SIZE_MIB * 1024 * 1024;
289
290/// Headroom reserved for the envelope (enum tags, ids, length prefixes) when
291/// sizing a round-2 byte-challenge batch against the wire cap.
292const BYTE_CHALLENGE_RESPONSE_HEADROOM: usize = 64 * 1024;
293
294/// Maximum keys per round-2 [`SubtreeByteChallenge`] (per-batch cap).
295///
296/// Sized so the WORST-CASE response (every requested chunk at
297/// `MAX_CHUNK_SIZE`) still encodes under [`MAX_REPLICATION_MESSAGE_SIZE`].
298/// The auditor splits its spot-check sample into batches of this size (one
299/// challenge per batch, same nonce/pin); the responder rejects any single
300/// challenge requesting more.
301///
302/// [`SubtreeByteChallenge`]: crate::replication::protocol::SubtreeByteChallenge
303pub const MAX_BYTE_CHALLENGE_KEYS: usize =
304 (MAX_REPLICATION_MESSAGE_SIZE - BYTE_CHALLENGE_RESPONSE_HEADROOM) / MAX_CHUNK_SIZE;
305const _: () = assert!(
306 MAX_BYTE_CHALLENGE_KEYS >= 1,
307 "wire cap must fit at least one max-size chunk per byte-challenge response"
308);
309
310/// Verification request timeout (per-batch).
311const VERIFICATION_REQUEST_TIMEOUT_SECS: u64 = 15;
312/// Verification request timeout (per-batch).
313pub const VERIFICATION_REQUEST_TIMEOUT: Duration =
314 Duration::from_secs(VERIFICATION_REQUEST_TIMEOUT_SECS);
315
316/// Fetch request timeout.
317const FETCH_REQUEST_TIMEOUT_SECS: u64 = 30;
318/// Fetch request timeout.
319pub const FETCH_REQUEST_TIMEOUT: Duration = Duration::from_secs(FETCH_REQUEST_TIMEOUT_SECS);
320
321/// Maximum age for pending-verification entries before stale eviction.
322const PENDING_VERIFY_MAX_AGE_SECS: u64 = 30 * 60;
323/// Maximum age for pending-verification entries before stale eviction.
324pub const PENDING_VERIFY_MAX_AGE: Duration = Duration::from_secs(PENDING_VERIFY_MAX_AGE_SECS);
325
326/// Trust event weight for confirmed audit failures.
327pub const AUDIT_FAILURE_TRUST_WEIGHT: f64 = 5.0;
328
329/// Probability of launching a subtree audit when a peer's *changed* commitment
330/// is ingested via gossip (ADR-0002). Keeps audits occasional surprise exams.
331pub const AUDIT_ON_GOSSIP_PROBABILITY: f64 = 0.2;
332
333/// Per-peer cooldown between gossip-triggered subtree audits (ADR-0002), in
334/// seconds. Bounds how often any one peer is audited regardless of gossip rate.
335pub const AUDIT_ON_GOSSIP_COOLDOWN_SECS: u64 = 30 * 60;
336
337/// Number of subtree leaves spot-checked against real chunk bytes per audit
338/// (ADR-0002 real-bytes layer).
339///
340/// The auditor clamps this to its 3..=5 band (`BYTE_SPOTCHECK_MIN..=MAX` in
341/// `storage_commitment_audit`), so this is the effective MAXIMUM — set it
342/// within the band rather than advertising a sample size the auditor never
343/// requests.
344pub const AUDIT_SPOTCHECK_COUNT: u32 = 5;
345
346/// Conservative leaf-count hint for sizing the subtree-audit response deadline.
347///
348/// The deadline is set before the proof arrives, so we size for the largest
349/// legal store: `sqrt(MAX_COMMITMENT_KEY_COUNT) = 1000`. Honest small stores
350/// finish well within it.
351pub const SUBTREE_AUDIT_TIMEOUT_LEAF_HINT: usize = 1000;
352
353/// Maximum number of prune-confirmation audit challenges sent per prune pass.
354pub const MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS: usize = 64;
355
356/// Seconds to wait for `DhtNetworkEvent::BootstrapComplete` before proceeding
357/// with bootstrap sync. Covers bootstrap nodes with no peers to connect to.
358const BOOTSTRAP_COMPLETE_TIMEOUT_SECS: u64 = 60;
359
360// ---------------------------------------------------------------------------
361// Runtime-configurable wrapper
362// ---------------------------------------------------------------------------
363
364/// Runtime-configurable replication parameters.
365///
366/// Validated on construction — node rejects invalid configs.
367#[derive(Debug, Clone)]
368pub struct ReplicationConfig {
369 /// Close-group width and target holder count per key.
370 pub close_group_size: usize,
371 /// Required positive presence votes for quorum.
372 pub quorum_threshold: usize,
373 /// Maximum closest nodes tracking paid status for a key.
374 pub paid_list_close_group_size: usize,
375 /// Number of closest peers to self eligible for neighbor sync.
376 pub neighbor_sync_scope: usize,
377 /// Peers synced concurrently per round-robin repair round.
378 pub neighbor_sync_peer_count: usize,
379 /// Neighbor sync cadence range (min).
380 pub neighbor_sync_interval_min: Duration,
381 /// Neighbor sync cadence range (max).
382 pub neighbor_sync_interval_max: Duration,
383 /// Minimum spacing between successive syncs with the same peer.
384 pub neighbor_sync_cooldown: Duration,
385 /// Self-lookup cadence range (min).
386 pub self_lookup_interval_min: Duration,
387 /// Self-lookup cadence range (max).
388 pub self_lookup_interval_max: Duration,
389 /// Audit scheduler cadence range (min).
390 pub audit_tick_interval_min: Duration,
391 /// Audit scheduler cadence range (max).
392 pub audit_tick_interval_max: Duration,
393 /// Floor on the audit response deadline. Covers global RTT for
394 /// the small request/response envelope plus scheduling jitter.
395 /// See `AUDIT_RESPONSE_FLOOR_SECS` for sizing.
396 pub audit_response_floor: Duration,
397 /// Conservative honest-responder read throughput (bytes/sec).
398 /// Used to scale the audit response deadline against the size of
399 /// the challenge. Slow enough that even an HDD-backed honest peer
400 /// fits inside the budget; fast enough that a relay attacker who
401 /// must fetch bytes over the network falls outside.
402 pub audit_honest_read_bps: u64,
403 /// Slack multiplier on the honest-read estimate before
404 /// declaring an audit timed out.
405 pub audit_response_honest_multiplier: u64,
406 /// Single-key prune-audit response deadline. Has its own constant
407 /// because the relay-defence rationale that motivates the tight
408 /// commitment-bound budget does not apply to a single-key prune
409 /// challenge.
410 pub prune_audit_response_timeout: Duration,
411 /// Maximum duration a peer may claim bootstrap status.
412 pub bootstrap_claim_grace_period: Duration,
413 /// Minimum continuous out-of-range duration before pruning a key.
414 pub prune_hysteresis_duration: Duration,
415 /// Verification request timeout (per-batch).
416 pub verification_request_timeout: Duration,
417 /// Fetch request timeout.
418 pub fetch_request_timeout: Duration,
419 /// Seconds to wait for `DhtNetworkEvent::BootstrapComplete` before
420 /// proceeding with bootstrap sync (covers bootstrap nodes with no peers).
421 pub bootstrap_complete_timeout_secs: u64,
422 /// Lower bound of the delay before a fresh-replication possession check
423 /// runs (ADR-0003). Defaults to [`POSSESSION_CHECK_DELAY_MIN`]; tests
424 /// shorten it so the scheduled check fires quickly.
425 pub possession_check_delay_min: Duration,
426 /// Upper bound of the possession-check delay window (ADR-0003). Defaults
427 /// to [`POSSESSION_CHECK_DELAY_MAX`].
428 pub possession_check_delay_max: Duration,
429}
430
431impl Default for ReplicationConfig {
432 fn default() -> Self {
433 Self {
434 close_group_size: CLOSE_GROUP_SIZE,
435 quorum_threshold: QUORUM_THRESHOLD,
436 paid_list_close_group_size: PAID_LIST_CLOSE_GROUP_SIZE,
437 neighbor_sync_scope: NEIGHBOR_SYNC_SCOPE,
438 neighbor_sync_peer_count: NEIGHBOR_SYNC_PEER_COUNT,
439 neighbor_sync_interval_min: NEIGHBOR_SYNC_INTERVAL_MIN,
440 neighbor_sync_interval_max: NEIGHBOR_SYNC_INTERVAL_MAX,
441 neighbor_sync_cooldown: NEIGHBOR_SYNC_COOLDOWN,
442 self_lookup_interval_min: SELF_LOOKUP_INTERVAL_MIN,
443 self_lookup_interval_max: SELF_LOOKUP_INTERVAL_MAX,
444 audit_tick_interval_min: AUDIT_TICK_INTERVAL_MIN,
445 audit_tick_interval_max: AUDIT_TICK_INTERVAL_MAX,
446 audit_response_floor: Duration::from_secs(AUDIT_RESPONSE_FLOOR_SECS),
447 audit_honest_read_bps: AUDIT_HONEST_READ_BPS,
448 audit_response_honest_multiplier: AUDIT_RESPONSE_HONEST_MULTIPLIER,
449 prune_audit_response_timeout: Duration::from_secs(PRUNE_AUDIT_RESPONSE_SECS),
450 bootstrap_claim_grace_period: BOOTSTRAP_CLAIM_GRACE_PERIOD,
451 prune_hysteresis_duration: PRUNE_HYSTERESIS_DURATION,
452 verification_request_timeout: VERIFICATION_REQUEST_TIMEOUT,
453 fetch_request_timeout: FETCH_REQUEST_TIMEOUT,
454 bootstrap_complete_timeout_secs: BOOTSTRAP_COMPLETE_TIMEOUT_SECS,
455 possession_check_delay_min: POSSESSION_CHECK_DELAY_MIN,
456 possession_check_delay_max: POSSESSION_CHECK_DELAY_MAX,
457 }
458 }
459}
460
461impl ReplicationConfig {
462 /// Validate safety constraints. Returns `Err` with a description if any
463 /// constraint is violated.
464 ///
465 /// # Errors
466 ///
467 /// Returns a human-readable message describing the first violated
468 /// constraint.
469 pub fn validate(&self) -> Result<(), String> {
470 if self.close_group_size == 0 {
471 return Err("close_group_size must be >= 1".to_string());
472 }
473 if self.quorum_threshold == 0 || self.quorum_threshold > self.close_group_size {
474 return Err(format!(
475 "quorum_threshold ({}) must satisfy 1 <= quorum_threshold <= close_group_size ({})",
476 self.quorum_threshold, self.close_group_size,
477 ));
478 }
479 if self.close_group_size > MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS {
480 return Err(format!(
481 "close_group_size ({}) must be <= MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS ({})",
482 self.close_group_size, MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS,
483 ));
484 }
485 if self.paid_list_close_group_size == 0 {
486 return Err("paid_list_close_group_size must be >= 1".to_string());
487 }
488 if self.neighbor_sync_interval_min > self.neighbor_sync_interval_max {
489 return Err(format!(
490 "neighbor_sync_interval_min ({:?}) must be <= neighbor_sync_interval_max ({:?})",
491 self.neighbor_sync_interval_min, self.neighbor_sync_interval_max,
492 ));
493 }
494 if self.audit_tick_interval_min > self.audit_tick_interval_max {
495 return Err(format!(
496 "audit_tick_interval_min ({:?}) must be <= audit_tick_interval_max ({:?})",
497 self.audit_tick_interval_min, self.audit_tick_interval_max,
498 ));
499 }
500 if self.self_lookup_interval_min > self.self_lookup_interval_max {
501 return Err(format!(
502 "self_lookup_interval_min ({:?}) must be <= self_lookup_interval_max ({:?})",
503 self.self_lookup_interval_min, self.self_lookup_interval_max,
504 ));
505 }
506 if self.neighbor_sync_peer_count == 0 {
507 return Err("neighbor_sync_peer_count must be >= 1".to_string());
508 }
509 if self.neighbor_sync_scope == 0 {
510 return Err("neighbor_sync_scope must be >= 1".to_string());
511 }
512 if self.neighbor_sync_scope > K_BUCKET_SIZE {
513 return Err(format!(
514 "neighbor_sync_scope ({}) must be <= K_BUCKET_SIZE ({})",
515 self.neighbor_sync_scope, K_BUCKET_SIZE,
516 ));
517 }
518 Ok(())
519 }
520
521 /// Effective quorum votes required for a key given the number of
522 /// reachable quorum targets.
523 ///
524 /// `min(self.quorum_threshold, floor(quorum_targets_count / 2) + 1)`
525 #[must_use]
526 pub fn quorum_needed(&self, quorum_targets_count: usize) -> usize {
527 if quorum_targets_count == 0 {
528 return 0;
529 }
530 let majority = quorum_targets_count / 2 + 1;
531 self.quorum_threshold.min(majority)
532 }
533
534 /// Confirmations required for paid-list consensus given the number of
535 /// peers in the paid-list close group for a key.
536 ///
537 /// `floor(paid_group_size / 2) + 1`
538 #[must_use]
539 pub fn confirm_needed(paid_group_size: usize) -> usize {
540 paid_group_size / 2 + 1
541 }
542
543 /// Returns a random duration in `[neighbor_sync_interval_min,
544 /// neighbor_sync_interval_max]`.
545 #[must_use]
546 pub fn random_neighbor_sync_interval(&self) -> Duration {
547 random_duration_in_range(
548 self.neighbor_sync_interval_min,
549 self.neighbor_sync_interval_max,
550 )
551 }
552
553 /// Compute the number of keys to sample for an audit round, scaled
554 /// dynamically by the total number of locally stored keys.
555 ///
556 /// Formula: `max(floor(sqrt(total_keys)), 1)`, capped at `total_keys`.
557 #[must_use]
558 pub fn audit_sample_count(total_keys: usize) -> usize {
559 #[allow(
560 clippy::cast_possible_truncation,
561 clippy::cast_sign_loss,
562 clippy::cast_precision_loss
563 )]
564 let sqrt = (total_keys as f64).sqrt() as usize;
565 sqrt.max(1).min(total_keys)
566 }
567
568 /// Maximum number of keys to accept in an incoming audit challenge.
569 ///
570 /// Scales dynamically: `2 * audit_sample_count(stored_chunks)`. The 2x
571 /// margin accounts for the challenger having a larger store than us and
572 /// therefore sampling more keys.
573 #[must_use]
574 pub fn max_incoming_audit_keys(stored_chunks: usize) -> usize {
575 // Allow at least 1 key so a newly-joined node can still be audited.
576 (2 * Self::audit_sample_count(stored_chunks)).max(1)
577 }
578
579 /// Compute the audit response timeout for a challenge with
580 /// `challenged_key_count` keys, **sized to be tight enough that a
581 /// relay attacker that must fetch the chunk bytes from elsewhere
582 /// falls outside the budget**.
583 ///
584 /// Formula:
585 /// `floor + (challenged_bytes / honest_read_bps) × multiplier`
586 ///
587 /// Where `challenged_bytes = k × MAX_CHUNK_SIZE`. An honest peer
588 /// reads `k × 4 MiB` from local disk at `honest_read_bps` (set
589 /// conservatively at 50 MB/s — well below modern SSDs); the
590 /// multiplier of 5 absorbs jitter, BLAKE3, ML-DSA, and slow disks.
591 ///
592 /// A relay attacker on a residential link (~5-12 MB/s) who must
593 /// fetch the same `k × 4 MiB` over the network sees ~10-100× higher
594 /// latency than disk for the data alone, plus per-chunk round-trips,
595 /// and misses the budget. In the periodic responsible-chunk
596 /// `AuditChallenge`, prune-confirmation, and ADR-0003 possession-check paths
597 /// that timeout is an immediate audit failure. The heavier subtree audit
598 /// still graces timeouts separately.
599 ///
600 /// This is an economic deterrent for the §7 relay limit calibrated
601 /// for residential bandwidth, NOT a hard bound: a relay on a
602 /// datacenter cross-connect (≥1 Gbps) can fetch `k × 4 MiB` fast
603 /// enough to answer in time. It raises the relay's cost (bandwidth
604 /// per audit) without claiming to make relaying impossible. The
605 /// cryptographic guarantee remains commitment-binding (the relay
606 /// must still hold or fetch the exact committed bytes); the timeout
607 /// only attacks the economics.
608 #[must_use]
609 pub fn audit_response_timeout(&self, challenged_key_count: usize) -> Duration {
610 let bytes_per_key = u64::try_from(crate::ant_protocol::MAX_CHUNK_SIZE).unwrap_or(u64::MAX);
611 let keys = u64::try_from(challenged_key_count).unwrap_or(u64::MAX);
612 let total_bytes = bytes_per_key.saturating_mul(keys);
613 let bps = self.audit_honest_read_bps.max(1);
614 // Apply the multiplier BEFORE integer-dividing by bps so each
615 // chunk contributes a fractional second rather than rounding
616 // down to zero. Otherwise k in 1..=12 would all collapse to the
617 // floor (~40 MiB / 50 MB/s = 0 secs in integer arithmetic), and
618 // an honest HDD-backed peer at sqrt(N)=10 stored chunks could
619 // miss the budget under load.
620 let multiplied = total_bytes.saturating_mul(self.audit_response_honest_multiplier);
621 // Resolve the scaled term in MILLISECONDS, not seconds: at the
622 // byte-round sizes (MAX_BYTE_CHALLENGE_KEYS = 2 → 8 MiB) the per-second
623 // quotient `multiplied / bps` integer-truncates to 0, leaving only the
624 // floor (the §4 finding: a 2×4 MiB honest serve under load could blow a
625 // 2 s budget). Computing in ms keeps the sub-second honest-read estimate
626 // (e.g. 8 MiB × 5 / 50 MB/s ≈ 840 ms) instead of dropping it.
627 let scaled_ms = multiplied.saturating_mul(1000) / bps;
628 // saturating_add avoids a panic if the floor plus the scaled term would
629 // overflow `Duration::MAX`.
630 self.audit_response_floor
631 .saturating_add(Duration::from_millis(scaled_ms))
632 }
633
634 /// Deadline for the round-2 BYTE challenge serving `challenged_key_count`
635 /// full chunks back to the auditor.
636 ///
637 /// Same per-byte scaling as [`Self::audit_response_timeout`] (so a relay
638 /// that must fetch the bytes over a residential link still blows it), but on
639 /// a higher floor (`BYTE_AUDIT_RESPONSE_FLOOR_SECS`) because the reply
640 /// carries up to
641 /// `MAX_BYTE_CHALLENGE_KEYS × MAX_CHUNK_SIZE` of chunk data — handshake +
642 /// multi-MiB upload + a busy honest disk read do not fit the hashes-only
643 /// round-1 floor (the §4 finding).
644 #[must_use]
645 pub fn byte_audit_response_timeout(&self, challenged_key_count: usize) -> Duration {
646 let scaled = self
647 .audit_response_timeout(challenged_key_count)
648 .saturating_sub(self.audit_response_floor);
649 Duration::from_secs(BYTE_AUDIT_RESPONSE_FLOOR_SECS).saturating_add(scaled)
650 }
651
652 /// Number of subtree leaves to spot-check against real chunk bytes per
653 /// audit (ADR-0002 real-bytes layer). Faking a fraction `x` of nonced
654 /// leaves survives only `(1 - x)^k`.
655 #[must_use]
656 pub fn audit_spotcheck_count(&self) -> u32 {
657 AUDIT_SPOTCHECK_COUNT
658 }
659
660 /// Conservative leaf-count hint for sizing the subtree-audit response
661 /// deadline before the proof arrives.
662 ///
663 /// The selected subtree holds about `sqrt(key_count)` real leaves; sizing
664 /// for a large store keeps an honest peer with a big store from timing out.
665 #[must_use]
666 pub fn subtree_audit_timeout_leaf_hint(&self) -> usize {
667 SUBTREE_AUDIT_TIMEOUT_LEAF_HINT
668 }
669
670 /// Returns a random duration in `[audit_tick_interval_min,
671 /// audit_tick_interval_max]`.
672 #[must_use]
673 pub fn random_audit_tick_interval(&self) -> Duration {
674 random_duration_in_range(self.audit_tick_interval_min, self.audit_tick_interval_max)
675 }
676
677 /// Returns a random duration in `[self_lookup_interval_min,
678 /// self_lookup_interval_max]`.
679 #[must_use]
680 pub fn random_self_lookup_interval(&self) -> Duration {
681 random_duration_in_range(self.self_lookup_interval_min, self.self_lookup_interval_max)
682 }
683}
684
685/// Pick a random `Duration` uniformly in `[min, max]` at millisecond
686/// granularity.
687///
688/// When `min == max` the result is deterministic.
689fn random_duration_in_range(min: Duration, max: Duration) -> Duration {
690 if min == max {
691 return min;
692 }
693 // Our intervals are minutes/hours, well within u64 range. Saturate to
694 // u64::MAX on the impossible overflow path to avoid a lossy cast.
695 let to_u64_millis = |d: Duration| -> u64 { u64::try_from(d.as_millis()).unwrap_or(u64::MAX) };
696 let chosen = rand::thread_rng().gen_range(to_u64_millis(min)..=to_u64_millis(max));
697 Duration::from_millis(chosen)
698}
699
700// ---------------------------------------------------------------------------
701// Tests
702// ---------------------------------------------------------------------------
703
704#[cfg(test)]
705#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
706mod tests {
707 use super::*;
708
709 #[test]
710 fn defaults_pass_validation() {
711 let config = ReplicationConfig::default();
712 assert!(config.validate().is_ok(), "default config must be valid");
713 }
714
715 #[test]
716 fn default_prune_hysteresis_is_three_days() {
717 let config = ReplicationConfig::default();
718 assert_eq!(
719 config.prune_hysteresis_duration,
720 Duration::from_secs(3 * 24 * 60 * 60)
721 );
722 }
723
724 #[test]
725 fn storage_admission_width_adds_margin() {
726 const TEST_CLOSE_GROUP_SIZE: usize = 7;
727
728 assert_eq!(
729 storage_admission_width(TEST_CLOSE_GROUP_SIZE),
730 TEST_CLOSE_GROUP_SIZE + STORAGE_ADMISSION_MARGIN
731 );
732 assert_eq!(storage_admission_width(usize::MAX), usize::MAX);
733 }
734
735 #[test]
736 fn audit_failure_weight_is_five() {
737 assert!((AUDIT_FAILURE_TRUST_WEIGHT - 5.0).abs() <= f64::EPSILON);
738 }
739
740 #[test]
741 fn replication_protocol_id_is_v2() {
742 // The v12 storage-bound audit changes replication SEMANTICS. The
743 // protocol id MUST advance past v1 so v1 and v2 nodes never exchange
744 // replication traffic they can only half-interpret (rollout safety —
745 // see the const's doc). If this regresses to v1, mixed-version nodes
746 // would talk past each other and risk spurious penalties.
747 assert_eq!(REPLICATION_PROTOCOL_ID, "autonomi.ant.replication.v2");
748 }
749
750 #[test]
751 fn audit_response_timeout_floor_at_zero_keys() {
752 let config = ReplicationConfig::default();
753 assert_eq!(
754 config.audit_response_timeout(0),
755 Duration::from_secs(AUDIT_RESPONSE_FLOOR_SECS),
756 "zero-key challenge should yield the floor exactly"
757 );
758 }
759
760 #[test]
761 fn audit_response_timeout_scales_with_key_count() {
762 let config = ReplicationConfig::default();
763 let t1 = config.audit_response_timeout(1);
764 let t10 = config.audit_response_timeout(10);
765 let t100 = config.audit_response_timeout(100);
766 assert!(t1 <= t10 && t10 < t100, "timeout must not decrease with k");
767
768 // Scaling now resolves in MILLISECONDS so a sub-second honest read no
769 // longer truncates to zero (§4). For k=1:
770 // (4_194_304 × 5 × 1000) / 52_428_800 = 400 ms, + 2 s round-1 floor =
771 // 2.4 s (previously collapsed to the bare 2 s floor).
772 assert_eq!(t1, Duration::from_millis(2400));
773
774 // For k=10: (10 × 4_194_304 × 5 × 1000) / 52_428_800 = 4000 ms scaled,
775 // + 2 s floor = 6 s. An HDD-backed honest peer at 20 MB/s reads 40 MiB
776 // in ~2 s, comfortably inside; a relay fetching 40 MiB at 5 MB/s
777 // residential bandwidth needs ~8 s for the data alone, outside.
778 assert_eq!(t10, Duration::from_secs(6));
779
780 // For k=100: (100 × 4_194_304 × 5 × 1000) / 52_428_800 = 40_000 ms
781 // scaled, + 2 s floor = 42 s.
782 assert_eq!(t100, Duration::from_secs(42));
783 }
784
785 #[test]
786 fn audit_response_timeout_fits_honest_hdd_at_typical_sample_size() {
787 // The canonical audit sample is sqrt(N) at N stored chunks.
788 // At N=100 stored chunks, sample is 10. An HDD-backed honest
789 // peer at the slowest realistic random-read throughput (20 MB/s,
790 // well below modern HDDs which sustain 80-150 MB/s sequential)
791 // reads 10 × 4 MiB = 40 MiB in ~2 s. Add 300 ms cross-continent
792 // RTT, ~10 ms scheduling, ~3 ms ML-DSA sign, and the honest
793 // envelope is ~2.3 s. The 6 s budget at k=10 leaves >3 s of
794 // slack.
795 let config = ReplicationConfig::default();
796 let budget = config.audit_response_timeout(10);
797 let realistic_hdd_bps: u64 = 20 * 1024 * 1024;
798 let bytes: u64 = 10 * 4 * 1024 * 1024;
799 let honest_envelope_secs = bytes / realistic_hdd_bps + 1; // +1 s for network/scheduling/sign
800 assert!(
801 Duration::from_secs(honest_envelope_secs) < budget,
802 "honest HDD envelope ({honest_envelope_secs}s) must fit inside k=10 budget ({}s)",
803 budget.as_secs(),
804 );
805 }
806
807 #[test]
808 fn audit_response_timeout_relay_is_outside_envelope() {
809 // The intended invariant: an honest peer with the SSD-class
810 // read budget fits inside `audit_response_timeout(k)`, while a
811 // relay attacker fetching k*4MiB over residential bandwidth
812 // (≈ 5 MB/s realistic for sustained download) does NOT. Spot-
813 // check this at k=100: honest budget is 42s, relay needs at
814 // least 100 * 4 MiB / 5 MB/s = 80s for the data alone, which
815 // exceeds the budget.
816 let config = ReplicationConfig::default();
817 let budget = config.audit_response_timeout(100);
818 let relay_data_only = Duration::from_secs(100 * 4 * 1024 * 1024 / (5 * 1024 * 1024));
819 assert!(
820 relay_data_only > budget,
821 "relay fetch ({}s) must exceed honest audit budget ({}s)",
822 relay_data_only.as_secs(),
823 budget.as_secs(),
824 );
825 }
826
827 #[test]
828 fn audit_response_timeout_saturates_on_huge_k() {
829 let config = ReplicationConfig::default();
830 // Should not panic or overflow at extreme k values.
831 let _ = config.audit_response_timeout(usize::MAX);
832 }
833
834 #[test]
835 fn quorum_threshold_zero_rejected() {
836 let config = ReplicationConfig {
837 quorum_threshold: 0,
838 ..ReplicationConfig::default()
839 };
840 assert!(config.validate().is_err());
841 }
842
843 #[test]
844 fn quorum_threshold_exceeds_close_group_rejected() {
845 let defaults = ReplicationConfig::default();
846 let config = ReplicationConfig {
847 quorum_threshold: defaults.close_group_size + 1,
848 ..defaults
849 };
850 assert!(config.validate().is_err());
851 }
852
853 #[test]
854 fn close_group_size_zero_rejected() {
855 let config = ReplicationConfig {
856 close_group_size: 0,
857 ..ReplicationConfig::default()
858 };
859 assert!(config.validate().is_err());
860 }
861
862 #[test]
863 fn close_group_size_exceeding_prune_audit_budget_rejected() {
864 let config = ReplicationConfig {
865 close_group_size: MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS + 1,
866 quorum_threshold: QUORUM_THRESHOLD,
867 ..ReplicationConfig::default()
868 };
869
870 let err = config.validate().unwrap_err();
871
872 assert!(
873 err.contains("MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS"),
874 "error should mention prune audit budget: {err}"
875 );
876 }
877
878 #[test]
879 fn paid_list_close_group_size_zero_rejected() {
880 let config = ReplicationConfig {
881 paid_list_close_group_size: 0,
882 ..ReplicationConfig::default()
883 };
884 assert!(config.validate().is_err());
885 }
886
887 #[test]
888 fn neighbor_sync_interval_inverted_rejected() {
889 let config = ReplicationConfig {
890 neighbor_sync_interval_min: Duration::from_secs(100),
891 neighbor_sync_interval_max: Duration::from_secs(50),
892 ..ReplicationConfig::default()
893 };
894 assert!(config.validate().is_err());
895 }
896
897 #[test]
898 fn audit_tick_interval_inverted_rejected() {
899 let config = ReplicationConfig {
900 audit_tick_interval_min: Duration::from_secs(100),
901 audit_tick_interval_max: Duration::from_secs(50),
902 ..ReplicationConfig::default()
903 };
904 assert!(config.validate().is_err());
905 }
906
907 #[test]
908 fn self_lookup_interval_inverted_rejected() {
909 let config = ReplicationConfig {
910 self_lookup_interval_min: Duration::from_secs(100),
911 self_lookup_interval_max: Duration::from_secs(50),
912 ..ReplicationConfig::default()
913 };
914 assert!(config.validate().is_err());
915 }
916
917 #[test]
918 fn neighbor_sync_peer_count_zero_rejected() {
919 let config = ReplicationConfig {
920 neighbor_sync_peer_count: 0,
921 ..ReplicationConfig::default()
922 };
923 assert!(config.validate().is_err());
924 }
925
926 #[test]
927 fn neighbor_sync_scope_exceeding_k_bucket_size_rejected() {
928 let config = ReplicationConfig {
929 neighbor_sync_scope: K_BUCKET_SIZE + 1,
930 ..ReplicationConfig::default()
931 };
932 assert!(config.validate().is_err());
933 }
934
935 #[test]
936 fn audit_sample_count_scales_with_sqrt() {
937 // Empty store
938 assert_eq!(ReplicationConfig::audit_sample_count(0), 0);
939
940 // Single key
941 assert_eq!(ReplicationConfig::audit_sample_count(1), 1);
942
943 // Small stores: sqrt(3)=1
944 assert_eq!(ReplicationConfig::audit_sample_count(3), 1);
945
946 // sqrt scaling
947 assert_eq!(ReplicationConfig::audit_sample_count(4), 2);
948 assert_eq!(ReplicationConfig::audit_sample_count(25), 5);
949 assert_eq!(ReplicationConfig::audit_sample_count(100), 10);
950 assert_eq!(ReplicationConfig::audit_sample_count(1_000), 31);
951 assert_eq!(ReplicationConfig::audit_sample_count(10_000), 100);
952 assert_eq!(ReplicationConfig::audit_sample_count(1_000_000), 1_000);
953 }
954
955 #[test]
956 fn max_incoming_audit_keys_scales_dynamically() {
957 // Empty store: at least 1 key accepted.
958 assert_eq!(ReplicationConfig::max_incoming_audit_keys(0), 1);
959
960 // 1 chunk: 2 * sqrt(1) = 2.
961 assert_eq!(ReplicationConfig::max_incoming_audit_keys(1), 2);
962
963 // 100 chunks: 2 * sqrt(100) = 20.
964 assert_eq!(ReplicationConfig::max_incoming_audit_keys(100), 20);
965
966 // 1M chunks: 2 * sqrt(1_000_000) = 2_000.
967 assert_eq!(ReplicationConfig::max_incoming_audit_keys(1_000_000), 2_000);
968
969 // 5M chunks: 2 * sqrt(5_000_000) = 4_472.
970 assert_eq!(ReplicationConfig::max_incoming_audit_keys(5_000_000), 4_472);
971 }
972
973 #[test]
974 fn quorum_needed_uses_smaller_of_threshold_and_majority() {
975 let config = ReplicationConfig::default();
976
977 // With 7 targets: majority = 7/2+1 = 4, threshold = 4 → min = 4
978 assert_eq!(config.quorum_needed(7), 4);
979
980 // With 3 targets: majority = 3/2+1 = 2, threshold = 4 → min = 2
981 assert_eq!(config.quorum_needed(3), 2);
982
983 // With 0 targets: quorum is impossible — returns 0
984 assert_eq!(config.quorum_needed(0), 0);
985
986 // With 100 targets: majority = 51, threshold = 4 → min = 4
987 assert_eq!(config.quorum_needed(100), 4);
988 }
989
990 #[test]
991 fn confirm_needed_is_strict_majority() {
992 assert_eq!(ReplicationConfig::confirm_needed(1), 1);
993 assert_eq!(ReplicationConfig::confirm_needed(2), 2);
994 assert_eq!(ReplicationConfig::confirm_needed(3), 2);
995 assert_eq!(ReplicationConfig::confirm_needed(4), 3);
996 assert_eq!(ReplicationConfig::confirm_needed(20), 11);
997 }
998
999 #[test]
1000 fn random_intervals_within_bounds() {
1001 let config = ReplicationConfig::default();
1002
1003 // Run several iterations to exercise randomness.
1004 let iterations = 50;
1005 for _ in 0..iterations {
1006 let ns = config.random_neighbor_sync_interval();
1007 assert!(ns >= config.neighbor_sync_interval_min);
1008 assert!(ns <= config.neighbor_sync_interval_max);
1009
1010 let at = config.random_audit_tick_interval();
1011 assert!(at >= config.audit_tick_interval_min);
1012 assert!(at <= config.audit_tick_interval_max);
1013
1014 let sl = config.random_self_lookup_interval();
1015 assert!(sl >= config.self_lookup_interval_min);
1016 assert!(sl <= config.self_lookup_interval_max);
1017 }
1018 }
1019
1020 #[test]
1021 fn random_interval_equal_bounds_is_deterministic() {
1022 let fixed = Duration::from_secs(42);
1023 let config = ReplicationConfig {
1024 neighbor_sync_interval_min: fixed,
1025 neighbor_sync_interval_max: fixed,
1026 ..ReplicationConfig::default()
1027 };
1028 assert_eq!(config.random_neighbor_sync_interval(), fixed);
1029 }
1030
1031 // -----------------------------------------------------------------------
1032 // Section 18 scenarios
1033 // -----------------------------------------------------------------------
1034
1035 /// Scenario 18: Invalid runtime config is rejected by `validate()`.
1036 #[test]
1037 fn scenario_18_invalid_config_rejected() {
1038 // quorum_threshold > close_group_size -> validation fails.
1039 let config = ReplicationConfig {
1040 quorum_threshold: 10,
1041 close_group_size: 7,
1042 ..ReplicationConfig::default()
1043 };
1044 let err = config.validate().unwrap_err();
1045 assert!(
1046 err.contains("quorum_threshold"),
1047 "error should mention quorum_threshold: {err}"
1048 );
1049
1050 // close_group_size = 0 -> validation fails.
1051 let config = ReplicationConfig {
1052 close_group_size: 0,
1053 ..ReplicationConfig::default()
1054 };
1055 let err = config.validate().unwrap_err();
1056 assert!(
1057 err.contains("close_group_size"),
1058 "error should mention close_group_size: {err}"
1059 );
1060
1061 // neighbor_sync interval min > max -> validation fails.
1062 let config = ReplicationConfig {
1063 neighbor_sync_interval_min: Duration::from_secs(200),
1064 neighbor_sync_interval_max: Duration::from_secs(100),
1065 ..ReplicationConfig::default()
1066 };
1067 let err = config.validate().unwrap_err();
1068 assert!(
1069 err.contains("neighbor_sync_interval"),
1070 "error should mention neighbor_sync_interval: {err}"
1071 );
1072
1073 // self_lookup interval min > max -> validation fails.
1074 let config = ReplicationConfig {
1075 self_lookup_interval_min: Duration::from_secs(999),
1076 self_lookup_interval_max: Duration::from_secs(1),
1077 ..ReplicationConfig::default()
1078 };
1079 let err = config.validate().unwrap_err();
1080 assert!(
1081 err.contains("self_lookup_interval"),
1082 "error should mention self_lookup_interval: {err}"
1083 );
1084
1085 // audit_tick interval min > max -> validation fails.
1086 let config = ReplicationConfig {
1087 audit_tick_interval_min: Duration::from_secs(500),
1088 audit_tick_interval_max: Duration::from_secs(10),
1089 ..ReplicationConfig::default()
1090 };
1091 let err = config.validate().unwrap_err();
1092 assert!(
1093 err.contains("audit_tick_interval"),
1094 "error should mention audit_tick_interval: {err}"
1095 );
1096 }
1097
1098 /// Scenario 26: Dynamic paid-list threshold for undersized set.
1099 /// With PaidGroupSize=8, `ConfirmNeeded` = floor(8/2)+1 = 5.
1100 #[test]
1101 fn scenario_26_dynamic_paid_threshold_undersized() {
1102 assert_eq!(ReplicationConfig::confirm_needed(8), 5, "floor(8/2)+1 = 5");
1103
1104 // Additional boundary checks for small paid groups.
1105 assert_eq!(
1106 ReplicationConfig::confirm_needed(1),
1107 1,
1108 "single peer requires 1 confirmation"
1109 );
1110 assert_eq!(
1111 ReplicationConfig::confirm_needed(2),
1112 2,
1113 "2 peers require 2 confirmations"
1114 );
1115 assert_eq!(
1116 ReplicationConfig::confirm_needed(3),
1117 2,
1118 "3 peers require 2 confirmations"
1119 );
1120 assert_eq!(
1121 ReplicationConfig::confirm_needed(0),
1122 1,
1123 "0 peers yields floor(0/2)+1 = 1 (degenerate case)"
1124 );
1125 }
1126
1127 /// Scenario 31: Consecutive audit ticks occur on randomized intervals
1128 /// bounded by the configured `[audit_tick_interval_min, audit_tick_interval_max]`
1129 /// window.
1130 #[test]
1131 fn scenario_31_audit_cadence_within_jitter_bounds() {
1132 let config = ReplicationConfig {
1133 audit_tick_interval_min: Duration::from_secs(600),
1134 audit_tick_interval_max: Duration::from_secs(1200),
1135 ..ReplicationConfig::default()
1136 };
1137
1138 // Sample many intervals and verify each is within bounds.
1139 let iterations = 100;
1140 let mut saw_different = false;
1141 let mut prev = Duration::ZERO;
1142
1143 for _ in 0..iterations {
1144 let interval = config.random_audit_tick_interval();
1145 assert!(
1146 interval >= config.audit_tick_interval_min,
1147 "interval {interval:?} below min {:?}",
1148 config.audit_tick_interval_min,
1149 );
1150 assert!(
1151 interval <= config.audit_tick_interval_max,
1152 "interval {interval:?} above max {:?}",
1153 config.audit_tick_interval_max,
1154 );
1155 if interval != prev && prev != Duration::ZERO {
1156 saw_different = true;
1157 }
1158 prev = interval;
1159 }
1160
1161 // With 100 samples from a 10-minute range, at least two should differ
1162 // (probabilistically near-certain).
1163 assert!(
1164 saw_different,
1165 "audit intervals should exhibit randomized jitter across samples"
1166 );
1167 }
1168}