ant_node/replication/
config.rs

1//! Tunable parameters for the replication subsystem.
2//!
3//! All values below are a reference profile used for logic validation.
4//! Parameter safety constraints (Section 4):
5//! 1. `1 <= QUORUM_THRESHOLD <= CLOSE_GROUP_SIZE`
6//! 2. Effective paid-list threshold is per-key dynamic:
7//!    `ConfirmNeeded(K) = floor(PaidGroupSize(K)/2)+1`
8//! 3. If constraints are violated at runtime reconfiguration, node MUST reject
9//!    the config.
10
11#![allow(clippy::module_name_repetitions)]
12
13use std::time::Duration;
14
15use rand::Rng;
16
17use crate::ant_protocol::{CLOSE_GROUP_SIZE, MAX_CHUNK_SIZE};
18
19// ---------------------------------------------------------------------------
20// Static constants (compile-time reference profile)
21// ---------------------------------------------------------------------------
22
23/// Maximum number of peers per k-bucket in the Kademlia routing table.
24pub const K_BUCKET_SIZE: usize = 20;
25
26/// Extra local-routing-table positions accepted for local chunk storage
27/// admission and stored-record pruning.
28///
29/// This margin absorbs small local RT disagreement between peers. It does not
30/// widen audit, quorum, or paid-list target sets; those remain strict
31/// `close_group_size` / paid-list group checks.
32pub const STORAGE_ADMISSION_MARGIN: usize = 2;
33
34/// Full-network target for required positive presence votes.
35///
36/// Effective per-key threshold is
37/// `QuorumNeeded(K) = min(QUORUM_THRESHOLD, floor(|QuorumTargets|/2)+1)`.
38pub const QUORUM_THRESHOLD: usize = 4; // floor(CLOSE_GROUP_SIZE / 2) + 1
39
40/// Maximum number of closest nodes tracking paid status for a key.
41pub const PAID_LIST_CLOSE_GROUP_SIZE: usize = 20;
42
43/// Number of closest peers to self eligible for neighbor sync.
44pub const NEIGHBOR_SYNC_SCOPE: usize = 20;
45
46/// Number of close-neighbor peers synced concurrently per round-robin repair
47/// round.
48pub const NEIGHBOR_SYNC_PEER_COUNT: usize = 4;
49
50/// Width used when deciding whether this node may locally store or retain a
51/// chunk.
52#[must_use]
53pub const fn storage_admission_width(close_group_size: usize) -> usize {
54    close_group_size.saturating_add(STORAGE_ADMISSION_MARGIN)
55}
56
57/// Minimum neighbor-sync cadence. Actual interval is randomized within
58/// `[min, max]`.
59const NEIGHBOR_SYNC_INTERVAL_MIN_SECS: u64 = 10 * 60;
60/// Maximum neighbor-sync cadence.
61const NEIGHBOR_SYNC_INTERVAL_MAX_SECS: u64 = 20 * 60;
62
63/// Neighbor sync cadence range (min).
64pub const NEIGHBOR_SYNC_INTERVAL_MIN: Duration =
65    Duration::from_secs(NEIGHBOR_SYNC_INTERVAL_MIN_SECS);
66
67/// Neighbor sync cadence range (max).
68pub const NEIGHBOR_SYNC_INTERVAL_MAX: Duration =
69    Duration::from_secs(NEIGHBOR_SYNC_INTERVAL_MAX_SECS);
70
71/// Per-peer minimum spacing between successive syncs with the same peer.
72const NEIGHBOR_SYNC_COOLDOWN_SECS: u64 = 60 * 60; // 1 hour
73/// Per-peer minimum spacing between successive syncs with the same peer.
74pub const NEIGHBOR_SYNC_COOLDOWN: Duration = Duration::from_secs(NEIGHBOR_SYNC_COOLDOWN_SECS);
75
76/// Minimum age for a replica repair hint before the hinted peer can be audited
77/// for that key.
78const REPAIR_HINT_MIN_AGE_SECS: u64 = 60 * 60; // 1 hour
79/// Minimum age for a replica repair hint before the hinted peer can be audited
80/// for that key.
81pub const REPAIR_HINT_MIN_AGE: Duration = Duration::from_secs(REPAIR_HINT_MIN_AGE_SECS);
82
83/// Minimum self-lookup cadence.
84const SELF_LOOKUP_INTERVAL_MIN_SECS: u64 = 5 * 60;
85/// Maximum self-lookup cadence.
86const SELF_LOOKUP_INTERVAL_MAX_SECS: u64 = 10 * 60;
87
88/// Periodic self-lookup cadence range (min) to keep close neighborhood
89/// current.
90pub const SELF_LOOKUP_INTERVAL_MIN: Duration = Duration::from_secs(SELF_LOOKUP_INTERVAL_MIN_SECS);
91
92/// Periodic self-lookup cadence range (max).
93pub const SELF_LOOKUP_INTERVAL_MAX: Duration = Duration::from_secs(SELF_LOOKUP_INTERVAL_MAX_SECS);
94
95/// Maximum number of concurrent outbound replication sends.
96///
97/// Caps how many fresh-replication chunk transfers can be in-flight at once
98/// across the entire replication engine. Prevents bandwidth saturation on
99/// home broadband connections when multiple chunks arrive simultaneously.
100/// Each send transfers up to 4 MB (`MAX_CHUNK_SIZE`), so a limit of 3 means
101/// at most ~12 MB queued for the upload link at any instant.
102pub const MAX_CONCURRENT_REPLICATION_SENDS: usize = 3;
103
104/// Maximum number of concurrent in-flight audit-responder tasks.
105///
106/// Subtree (round 1) and byte (round 2) challenge handlers are spawned off the
107/// serial replication message loop so their disk reads don't stall replication.
108/// This caps how many run at once across the engine, restoring backpressure: a
109/// peer flooding audit challenges cannot fan out unbounded `get_raw` reads or
110/// multi-MiB byte serves. When the cap is hit, the challenge is dropped — the
111/// auditor graces a non-response as a timeout, so honest auditors are
112/// unaffected and only a flooder is throttled. Sized to cover a handful of
113/// concurrent honest auditors (the per-peer gossip-audit cooldown is 30 min, so
114/// genuine concurrent audits are few) while bounding the byte round's worst-case
115/// resident bytes (`N × MAX_BYTE_CHALLENGE_KEYS × MAX_CHUNK_SIZE`).
116pub const MAX_CONCURRENT_AUDIT_RESPONSES: usize = 8;
117
118/// Maximum concurrent in-flight audit-responder tasks from any SINGLE peer.
119///
120/// The global [`MAX_CONCURRENT_AUDIT_RESPONSES`] ceiling alone is not
121/// flood-fair: one peer spamming challenges could occupy every slot and starve
122/// honest auditors (whose dropped challenges convert to timeouts → strikes on
123/// the honest peers). This per-peer cap guarantees no source holds more than
124/// its share, so a flood self-throttles. Audits are cooldown-gated (one
125/// gossip-triggered audit per peer per 30 min), so 2 in-flight per peer
126/// comfortably covers the legitimate round-1 + round-2 overlap.
127pub const MAX_AUDIT_RESPONSES_PER_PEER: u32 = 2;
128
129/// Concurrent fetches cap, derived from hardware thread count.
130///
131/// Uses `std::thread::available_parallelism()` so the node scales to the
132/// machine it runs on.  Falls back to 4 if the OS query fails.
133const AVAILABLE_PARALLELISM_FALLBACK: usize = 4;
134
135/// Returns the number of hardware threads available, used as the fetch
136/// concurrency limit.
137#[allow(clippy::incompatible_msrv)] // NonZero::get is stable since 1.79; MSRV lint conflicts with redundant_closure
138pub fn max_parallel_fetch() -> usize {
139    std::thread::available_parallelism()
140        .map_or(AVAILABLE_PARALLELISM_FALLBACK, std::num::NonZero::get)
141}
142
143/// Minimum audit-scheduler cadence.
144const AUDIT_TICK_INTERVAL_MIN_SECS: u64 = 10 * 60;
145/// Maximum audit-scheduler cadence.
146const AUDIT_TICK_INTERVAL_MAX_SECS: u64 = 20 * 60;
147
148/// Audit scheduler cadence range (min).
149pub const AUDIT_TICK_INTERVAL_MIN: Duration = Duration::from_secs(AUDIT_TICK_INTERVAL_MIN_SECS);
150
151/// Audit scheduler cadence range (max).
152pub const AUDIT_TICK_INTERVAL_MAX: Duration = Duration::from_secs(AUDIT_TICK_INTERVAL_MAX_SECS);
153
154/// Floor on the audit response deadline (independent of challenge size).
155///
156/// Sized to absorb worst-case global RTT for the audit envelope
157/// (the request + response messages are KB-scale, not chunk-scale)
158/// plus scheduling jitter. Tokyo↔NY round-trip is ~150ms each way,
159/// so 2 seconds comfortably covers cross-continent communication
160/// for the round-1 proof, whose payload is hashes (KB-scale).
161const AUDIT_RESPONSE_FLOOR_SECS: u64 = 2;
162
163/// Floor on the round-2 BYTE-challenge deadline.
164///
165/// Unlike round 1 (KB of hashes), the byte challenge ships up to
166/// `MAX_BYTE_CHALLENGE_KEYS` full chunks (2 × 4 MiB = 8 MiB) back over the
167/// wire, so the envelope must also cover a cold QUIC handshake, the
168/// multi-MiB upload back to the auditor, and a busy honest peer's disk read.
169/// The round-1 2 s floor (sized for a hashes-only reply) is too tight here —
170/// the §4 finding. 5 s matches the cross-continent-RTT + handshake + 8 MiB
171/// transfer budget while keeping a relay that must fetch the bytes over a
172/// residential link outside it (the scaled term adds the per-byte estimate on
173/// top). Mirrors main's more generous byte-round base.
174const BYTE_AUDIT_RESPONSE_FLOOR_SECS: u64 = 5;
175
176/// Conservative honest-responder read throughput, in bytes per second.
177///
178/// Used to size the audit response deadline. An honest peer answers
179/// a k-key challenge by reading k chunks from local disk, computing
180/// BLAKE3 + path proofs, and signing the response. The bottleneck is
181/// disk read; BLAKE3 at ~3 GB/s + ML-DSA signing at ~3 ms are
182/// negligible.
183///
184/// Set conservatively below any modern SSD (typical: 500 MB/s+).
185/// At 50 MB/s, a k=10 sample at 4 MiB chunks reads in ~0.8s, well
186/// inside even an aggressive timeout. A relay attacker who must
187/// fetch the same 40 MB over the network at typical bandwidth
188/// (100 Mbps = 12.5 MB/s) takes 3+ seconds for the data alone, plus
189/// per-chunk network round-trips. At larger sample sizes the gap
190/// is exponential in the relay's disadvantage.
191const AUDIT_HONEST_READ_BPS: u64 = 50 * 1024 * 1024;
192
193/// Slack multiplier on the honest-read estimate.
194///
195/// Set so an honest peer that's slower than `HONEST_READ_BPS` (e.g. an
196/// HDD-backed node, or one under load) still answers within the
197/// timeout. 5× is generous; a relay peer fetching the same data over a
198/// residential link (~5-12 MB/s) sees ~10-100× higher latency than disk
199/// and misses the budget. This is an economic deterrent calibrated for
200/// residential bandwidth, NOT a hard cryptographic bound — a relay on a
201/// datacenter cross-connect could still fetch fast enough to answer in
202/// time (see the §7 note on `audit_response_timeout`).
203const AUDIT_RESPONSE_HONEST_MULTIPLIER: u64 = 5;
204
205/// Single-key prune audit response deadline.
206///
207/// Prune audits ask a peer whether they still hold one specific key
208/// they previously claimed. The relay-defence rationale that motivates
209/// the tight commitment-bound timeout does NOT apply here: the
210/// auditor's own out-of-range hysteresis (`PRUNE_HYSTERESIS_DURATION`,
211/// 3 days) already makes "fetch on demand" infeasible as a sustained
212/// strategy.
213///
214/// Sized to comfortably accommodate cold cross-continent QUIC
215/// handshake plus scheduling jitter on a busy honest peer answering
216/// a single-key challenge: 10 s.
217const PRUNE_AUDIT_RESPONSE_SECS: u64 = 10;
218
219/// Maximum duration a peer may claim bootstrap status before penalties apply.
220const BOOTSTRAP_CLAIM_GRACE_PERIOD_SECS: u64 = 24 * 60 * 60; // 24 h
221/// Maximum duration a peer may claim bootstrap status before penalties apply.
222pub const BOOTSTRAP_CLAIM_GRACE_PERIOD: Duration =
223    Duration::from_secs(BOOTSTRAP_CLAIM_GRACE_PERIOD_SECS);
224
225/// Minimum continuous out-of-range duration before pruning a key.
226const PRUNE_HYSTERESIS_DURATION_SECS: u64 = 3 * 24 * 60 * 60; // 3 days
227/// Minimum continuous out-of-range duration before pruning a key.
228pub const PRUNE_HYSTERESIS_DURATION: Duration = Duration::from_secs(PRUNE_HYSTERESIS_DURATION_SECS);
229
230/// Protocol identifier for replication operations.
231///
232/// Bumped to `v2` for the v12 storage-bound audit. That change extends the
233/// wire types (`NeighborSyncRequest`/`Response` carry an optional trailing
234/// `StorageCommitment`, and the gossip-triggered storage-commitment audit adds
235/// the `SubtreeAuditChallenge`/`SubtreeAuditResponse` and `SubtreeByteChallenge`/
236/// `SubtreeByteResponse` messages). The bump is for SEMANTIC interop, not
237/// decode failure: postcard tolerates the appended optional field (an old
238/// decoder reads the fields it knows and ignores the trailer — pinned by the
239/// `old_decoder_tolerates_new_neighbor_sync_*` tests in `protocol.rs`), but
240/// tolerating bytes is not interoperating. A v1 node cannot decode the NEW
241/// message variants at all (unknown enum discriminant) and never acts on a
242/// piggybacked commitment, so mixed-version replication would half-function —
243/// audit challenges unanswered, commitments silently dropped — and a v2 node
244/// could read that silence as misbehaviour. Rather than reason about each
245/// such case, we route v12 replication on a distinct protocol id: a node only
246/// delivers messages whose topic matches its own id (see the topic check in
247/// `mod.rs`), so v1 and v2 nodes simply do not exchange replication traffic
248/// during a mixed-version window. This is the rollout-safe behaviour: no
249/// half-interpreted exchange, no spurious eviction. Replication between
250/// matched-version peers is unaffected. (DHT routing/lookups are a separate
251/// protocol and continue to span both versions.)
252pub const REPLICATION_PROTOCOL_ID: &str = "autonomi.ant.replication.v2";
253
254/// 10 MiB — maximum replication wire message size (accommodates hint batches).
255const REPLICATION_MESSAGE_SIZE_MIB: usize = 10;
256/// Maximum replication wire message size.
257pub const MAX_REPLICATION_MESSAGE_SIZE: usize = REPLICATION_MESSAGE_SIZE_MIB * 1024 * 1024;
258
259/// Headroom reserved for the envelope (enum tags, ids, length prefixes) when
260/// sizing a round-2 byte-challenge batch against the wire cap.
261const BYTE_CHALLENGE_RESPONSE_HEADROOM: usize = 64 * 1024;
262
263/// Maximum keys per round-2 [`SubtreeByteChallenge`] (per-batch cap).
264///
265/// Sized so the WORST-CASE response (every requested chunk at
266/// `MAX_CHUNK_SIZE`) still encodes under [`MAX_REPLICATION_MESSAGE_SIZE`].
267/// The auditor splits its spot-check sample into batches of this size (one
268/// challenge per batch, same nonce/pin); the responder rejects any single
269/// challenge requesting more.
270///
271/// [`SubtreeByteChallenge`]: crate::replication::protocol::SubtreeByteChallenge
272pub const MAX_BYTE_CHALLENGE_KEYS: usize =
273    (MAX_REPLICATION_MESSAGE_SIZE - BYTE_CHALLENGE_RESPONSE_HEADROOM) / MAX_CHUNK_SIZE;
274const _: () = assert!(
275    MAX_BYTE_CHALLENGE_KEYS >= 1,
276    "wire cap must fit at least one max-size chunk per byte-challenge response"
277);
278
279/// Rollout gate for timeout-driven eviction.
280///
281/// When `false`, a peer that crosses the consecutive-timeout strike threshold
282/// is logged but NOT reported to the trust engine (no eviction). This PR is a
283/// breaking wire change (old nodes cannot decode the new `StorageCommitment`
284/// gossip), so a not-yet-upgraded peer times out on every new audit and looks
285/// exactly like a non-storing peer; penalising timeouts during the mixed-version
286/// window would make upgraded nodes evict every old node — a death spiral.
287///
288/// Confirmed storage-integrity failures (`DigestMismatch`/`KeyAbsent`/
289/// `Rejected`/`MalformedResponse`) are NEVER gated by this — those only come
290/// from a peer that actually answered with bad data, never an old node. Flip to
291/// `true` in a small follow-up release once the fleet has upgraded. This is a
292/// real `const` (not commented-out code) so both gate sites compile and stay in
293/// sync, and the flip is one line.
294pub const TIMEOUT_EVICTION_ENABLED: bool = false;
295
296/// Verification request timeout (per-batch).
297const VERIFICATION_REQUEST_TIMEOUT_SECS: u64 = 15;
298/// Verification request timeout (per-batch).
299pub const VERIFICATION_REQUEST_TIMEOUT: Duration =
300    Duration::from_secs(VERIFICATION_REQUEST_TIMEOUT_SECS);
301
302/// Fetch request timeout.
303const FETCH_REQUEST_TIMEOUT_SECS: u64 = 30;
304/// Fetch request timeout.
305pub const FETCH_REQUEST_TIMEOUT: Duration = Duration::from_secs(FETCH_REQUEST_TIMEOUT_SECS);
306
307/// Maximum age for pending-verification entries before stale eviction.
308const PENDING_VERIFY_MAX_AGE_SECS: u64 = 30 * 60;
309/// Maximum age for pending-verification entries before stale eviction.
310pub const PENDING_VERIFY_MAX_AGE: Duration = Duration::from_secs(PENDING_VERIFY_MAX_AGE_SECS);
311
312/// Trust event weight for confirmed audit failures.
313pub const AUDIT_FAILURE_TRUST_WEIGHT: f64 = 5.0;
314
315/// Consecutive audit *timeouts* a peer may accumulate before a timeout is
316/// reported as an `ApplicationFailure` trust event.
317///
318/// The audit response timeout is an economic deterrent calibrated for
319/// residential bandwidth, not a hard cryptographic bound: a single slow
320/// response is routine for an honest node under transient load (GC pause,
321/// disk flush, a burst of concurrent requests). Penalizing on the first
322/// timeout false-positives those nodes.
323///
324/// Requiring `N` *consecutive* timeouts before penalizing removes that
325/// false-positive while preserving the deterrent against a peer that does not
326/// actually store the data and must fetch it at audit time: such a peer is
327/// slow on *every* audit and accumulates a fresh strike each tick until it
328/// crosses the threshold, whereas an honest node answers normally between rare
329/// slow ticks and any success resets its strike counter to zero (see
330/// `handle_audit_result`). The discriminator is *persistence* of slowness
331/// versus *transience*. This deliberately does not widen the per-challenge
332/// window. Applies ONLY to `AuditFailureReason::Timeout`; confirmed
333/// storage-integrity failures (`DigestMismatch` / `KeyAbsent` / `Rejected` /
334/// `MalformedResponse`) remain instantly punishable.
335pub const AUDIT_TIMEOUT_STRIKE_THRESHOLD: u32 = 3;
336
337/// Probability of launching a subtree audit when a peer's *changed* commitment
338/// is ingested via gossip (ADR-0002). Keeps audits occasional surprise exams.
339pub const AUDIT_ON_GOSSIP_PROBABILITY: f64 = 0.2;
340
341/// Per-peer cooldown between gossip-triggered subtree audits (ADR-0002), in
342/// seconds. Bounds how often any one peer is audited regardless of gossip rate.
343pub const AUDIT_ON_GOSSIP_COOLDOWN_SECS: u64 = 30 * 60;
344
345/// Number of subtree leaves spot-checked against real chunk bytes per audit
346/// (ADR-0002 real-bytes layer).
347///
348/// The auditor clamps this to its 3..=5 band (`BYTE_SPOTCHECK_MIN..=MAX` in
349/// `storage_commitment_audit`), so this is the effective MAXIMUM — set it
350/// within the band rather than advertising a sample size the auditor never
351/// requests.
352pub const AUDIT_SPOTCHECK_COUNT: u32 = 5;
353
354/// Conservative leaf-count hint for sizing the subtree-audit response deadline.
355///
356/// The deadline is set before the proof arrives, so we size for the largest
357/// legal store: `sqrt(MAX_COMMITMENT_KEY_COUNT) = 1000`. Honest small stores
358/// finish well within it.
359pub const SUBTREE_AUDIT_TIMEOUT_LEAF_HINT: usize = 1000;
360
361/// Maximum number of prune-confirmation audit challenges sent per prune pass.
362pub const MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS: usize = 64;
363
364/// Seconds to wait for `DhtNetworkEvent::BootstrapComplete` before proceeding
365/// with bootstrap sync. Covers bootstrap nodes with no peers to connect to.
366const BOOTSTRAP_COMPLETE_TIMEOUT_SECS: u64 = 60;
367
368// ---------------------------------------------------------------------------
369// Runtime-configurable wrapper
370// ---------------------------------------------------------------------------
371
372/// Runtime-configurable replication parameters.
373///
374/// Validated on construction — node rejects invalid configs.
375#[derive(Debug, Clone)]
376pub struct ReplicationConfig {
377    /// Close-group width and target holder count per key.
378    pub close_group_size: usize,
379    /// Required positive presence votes for quorum.
380    pub quorum_threshold: usize,
381    /// Maximum closest nodes tracking paid status for a key.
382    pub paid_list_close_group_size: usize,
383    /// Number of closest peers to self eligible for neighbor sync.
384    pub neighbor_sync_scope: usize,
385    /// Peers synced concurrently per round-robin repair round.
386    pub neighbor_sync_peer_count: usize,
387    /// Neighbor sync cadence range (min).
388    pub neighbor_sync_interval_min: Duration,
389    /// Neighbor sync cadence range (max).
390    pub neighbor_sync_interval_max: Duration,
391    /// Minimum spacing between successive syncs with the same peer.
392    pub neighbor_sync_cooldown: Duration,
393    /// Self-lookup cadence range (min).
394    pub self_lookup_interval_min: Duration,
395    /// Self-lookup cadence range (max).
396    pub self_lookup_interval_max: Duration,
397    /// Audit scheduler cadence range (min).
398    pub audit_tick_interval_min: Duration,
399    /// Audit scheduler cadence range (max).
400    pub audit_tick_interval_max: Duration,
401    /// Floor on the audit response deadline. Covers global RTT for
402    /// the small request/response envelope plus scheduling jitter.
403    /// See `AUDIT_RESPONSE_FLOOR_SECS` for sizing.
404    pub audit_response_floor: Duration,
405    /// Conservative honest-responder read throughput (bytes/sec).
406    /// Used to scale the audit response deadline against the size of
407    /// the challenge. Slow enough that even an HDD-backed honest peer
408    /// fits inside the budget; fast enough that a relay attacker who
409    /// must fetch bytes over the network falls outside.
410    pub audit_honest_read_bps: u64,
411    /// Slack multiplier on the honest-read estimate before
412    /// declaring an audit timed out.
413    pub audit_response_honest_multiplier: u64,
414    /// Single-key prune-audit response deadline. Has its own constant
415    /// because the relay-defence rationale that motivates the tight
416    /// commitment-bound budget does not apply to a single-key prune
417    /// challenge.
418    pub prune_audit_response_timeout: Duration,
419    /// Maximum duration a peer may claim bootstrap status.
420    pub bootstrap_claim_grace_period: Duration,
421    /// Minimum continuous out-of-range duration before pruning a key.
422    pub prune_hysteresis_duration: Duration,
423    /// Verification request timeout (per-batch).
424    pub verification_request_timeout: Duration,
425    /// Fetch request timeout.
426    pub fetch_request_timeout: Duration,
427    /// Seconds to wait for `DhtNetworkEvent::BootstrapComplete` before
428    /// proceeding with bootstrap sync (covers bootstrap nodes with no peers).
429    pub bootstrap_complete_timeout_secs: u64,
430}
431
432impl Default for ReplicationConfig {
433    fn default() -> Self {
434        Self {
435            close_group_size: CLOSE_GROUP_SIZE,
436            quorum_threshold: QUORUM_THRESHOLD,
437            paid_list_close_group_size: PAID_LIST_CLOSE_GROUP_SIZE,
438            neighbor_sync_scope: NEIGHBOR_SYNC_SCOPE,
439            neighbor_sync_peer_count: NEIGHBOR_SYNC_PEER_COUNT,
440            neighbor_sync_interval_min: NEIGHBOR_SYNC_INTERVAL_MIN,
441            neighbor_sync_interval_max: NEIGHBOR_SYNC_INTERVAL_MAX,
442            neighbor_sync_cooldown: NEIGHBOR_SYNC_COOLDOWN,
443            self_lookup_interval_min: SELF_LOOKUP_INTERVAL_MIN,
444            self_lookup_interval_max: SELF_LOOKUP_INTERVAL_MAX,
445            audit_tick_interval_min: AUDIT_TICK_INTERVAL_MIN,
446            audit_tick_interval_max: AUDIT_TICK_INTERVAL_MAX,
447            audit_response_floor: Duration::from_secs(AUDIT_RESPONSE_FLOOR_SECS),
448            audit_honest_read_bps: AUDIT_HONEST_READ_BPS,
449            audit_response_honest_multiplier: AUDIT_RESPONSE_HONEST_MULTIPLIER,
450            prune_audit_response_timeout: Duration::from_secs(PRUNE_AUDIT_RESPONSE_SECS),
451            bootstrap_claim_grace_period: BOOTSTRAP_CLAIM_GRACE_PERIOD,
452            prune_hysteresis_duration: PRUNE_HYSTERESIS_DURATION,
453            verification_request_timeout: VERIFICATION_REQUEST_TIMEOUT,
454            fetch_request_timeout: FETCH_REQUEST_TIMEOUT,
455            bootstrap_complete_timeout_secs: BOOTSTRAP_COMPLETE_TIMEOUT_SECS,
456        }
457    }
458}
459
460impl ReplicationConfig {
461    /// Validate safety constraints. Returns `Err` with a description if any
462    /// constraint is violated.
463    ///
464    /// # Errors
465    ///
466    /// Returns a human-readable message describing the first violated
467    /// constraint.
468    pub fn validate(&self) -> Result<(), String> {
469        if self.close_group_size == 0 {
470            return Err("close_group_size must be >= 1".to_string());
471        }
472        if self.quorum_threshold == 0 || self.quorum_threshold > self.close_group_size {
473            return Err(format!(
474                "quorum_threshold ({}) must satisfy 1 <= quorum_threshold <= close_group_size ({})",
475                self.quorum_threshold, self.close_group_size,
476            ));
477        }
478        if self.close_group_size > MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS {
479            return Err(format!(
480                "close_group_size ({}) must be <= MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS ({})",
481                self.close_group_size, MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS,
482            ));
483        }
484        if self.paid_list_close_group_size == 0 {
485            return Err("paid_list_close_group_size must be >= 1".to_string());
486        }
487        if self.neighbor_sync_interval_min > self.neighbor_sync_interval_max {
488            return Err(format!(
489                "neighbor_sync_interval_min ({:?}) must be <= neighbor_sync_interval_max ({:?})",
490                self.neighbor_sync_interval_min, self.neighbor_sync_interval_max,
491            ));
492        }
493        if self.audit_tick_interval_min > self.audit_tick_interval_max {
494            return Err(format!(
495                "audit_tick_interval_min ({:?}) must be <= audit_tick_interval_max ({:?})",
496                self.audit_tick_interval_min, self.audit_tick_interval_max,
497            ));
498        }
499        if self.self_lookup_interval_min > self.self_lookup_interval_max {
500            return Err(format!(
501                "self_lookup_interval_min ({:?}) must be <= self_lookup_interval_max ({:?})",
502                self.self_lookup_interval_min, self.self_lookup_interval_max,
503            ));
504        }
505        if self.neighbor_sync_peer_count == 0 {
506            return Err("neighbor_sync_peer_count must be >= 1".to_string());
507        }
508        if self.neighbor_sync_scope == 0 {
509            return Err("neighbor_sync_scope must be >= 1".to_string());
510        }
511        if self.neighbor_sync_scope > K_BUCKET_SIZE {
512            return Err(format!(
513                "neighbor_sync_scope ({}) must be <= K_BUCKET_SIZE ({})",
514                self.neighbor_sync_scope, K_BUCKET_SIZE,
515            ));
516        }
517        Ok(())
518    }
519
520    /// Effective quorum votes required for a key given the number of
521    /// reachable quorum targets.
522    ///
523    /// `min(self.quorum_threshold, floor(quorum_targets_count / 2) + 1)`
524    #[must_use]
525    pub fn quorum_needed(&self, quorum_targets_count: usize) -> usize {
526        if quorum_targets_count == 0 {
527            return 0;
528        }
529        let majority = quorum_targets_count / 2 + 1;
530        self.quorum_threshold.min(majority)
531    }
532
533    /// Confirmations required for paid-list consensus given the number of
534    /// peers in the paid-list close group for a key.
535    ///
536    /// `floor(paid_group_size / 2) + 1`
537    #[must_use]
538    pub fn confirm_needed(paid_group_size: usize) -> usize {
539        paid_group_size / 2 + 1
540    }
541
542    /// Returns a random duration in `[neighbor_sync_interval_min,
543    /// neighbor_sync_interval_max]`.
544    #[must_use]
545    pub fn random_neighbor_sync_interval(&self) -> Duration {
546        random_duration_in_range(
547            self.neighbor_sync_interval_min,
548            self.neighbor_sync_interval_max,
549        )
550    }
551
552    /// Compute the number of keys to sample for an audit round, scaled
553    /// dynamically by the total number of locally stored keys.
554    ///
555    /// Formula: `max(floor(sqrt(total_keys)), 1)`, capped at `total_keys`.
556    #[must_use]
557    pub fn audit_sample_count(total_keys: usize) -> usize {
558        #[allow(
559            clippy::cast_possible_truncation,
560            clippy::cast_sign_loss,
561            clippy::cast_precision_loss
562        )]
563        let sqrt = (total_keys as f64).sqrt() as usize;
564        sqrt.max(1).min(total_keys)
565    }
566
567    /// Maximum number of keys to accept in an incoming audit challenge.
568    ///
569    /// Scales dynamically: `2 * audit_sample_count(stored_chunks)`. The 2x
570    /// margin accounts for the challenger having a larger store than us and
571    /// therefore sampling more keys.
572    #[must_use]
573    pub fn max_incoming_audit_keys(stored_chunks: usize) -> usize {
574        // Allow at least 1 key so a newly-joined node can still be audited.
575        (2 * Self::audit_sample_count(stored_chunks)).max(1)
576    }
577
578    /// Compute the audit response timeout for a challenge with
579    /// `challenged_key_count` keys, **sized to be tight enough that a
580    /// relay attacker that must fetch the chunk bytes from elsewhere
581    /// falls outside the budget**.
582    ///
583    /// Formula:
584    ///   `floor + (challenged_bytes / honest_read_bps) × multiplier`
585    ///
586    /// Where `challenged_bytes = k × MAX_CHUNK_SIZE`. An honest peer
587    /// reads `k × 4 MiB` from local disk at `honest_read_bps` (set
588    /// conservatively at 50 MB/s — well below modern SSDs); the
589    /// multiplier of 5 absorbs jitter, BLAKE3, ML-DSA, and slow disks.
590    ///
591    /// A relay attacker on a residential link (~5-12 MB/s) who must
592    /// fetch the same `k × 4 MiB` over the network sees ~10-100× higher
593    /// latency than disk for the data alone, plus per-chunk round-trips,
594    /// and misses the budget — recording a timeout strike (per
595    /// `handle_audit_timeout` → `handle_audit_failure`). After
596    /// [`AUDIT_TIMEOUT_STRIKE_THRESHOLD`] consecutive timeouts this would
597    /// fire an `application_failure` trust event — but note that report is
598    /// currently suppressed for the breaking rollout (grep
599    /// TIMEOUT-EVICTION-DISABLED); the strike accounting still runs.
600    ///
601    /// This is an economic deterrent for the §7 relay limit calibrated
602    /// for residential bandwidth, NOT a hard bound: a relay on a
603    /// datacenter cross-connect (≥1 Gbps) can fetch `k × 4 MiB` fast
604    /// enough to answer in time. It raises the relay's cost (bandwidth
605    /// per audit) without claiming to make relaying impossible. The
606    /// cryptographic guarantee remains commitment-binding (the relay
607    /// must still hold or fetch the exact committed bytes); the timeout
608    /// only attacks the economics.
609    #[must_use]
610    pub fn audit_response_timeout(&self, challenged_key_count: usize) -> Duration {
611        let bytes_per_key = u64::try_from(crate::ant_protocol::MAX_CHUNK_SIZE).unwrap_or(u64::MAX);
612        let keys = u64::try_from(challenged_key_count).unwrap_or(u64::MAX);
613        let total_bytes = bytes_per_key.saturating_mul(keys);
614        let bps = self.audit_honest_read_bps.max(1);
615        // Apply the multiplier BEFORE integer-dividing by bps so each
616        // chunk contributes a fractional second rather than rounding
617        // down to zero. Otherwise k in 1..=12 would all collapse to the
618        // floor (~40 MiB / 50 MB/s = 0 secs in integer arithmetic), and
619        // an honest HDD-backed peer at sqrt(N)=10 stored chunks could
620        // miss the budget under load.
621        let multiplied = total_bytes.saturating_mul(self.audit_response_honest_multiplier);
622        // Resolve the scaled term in MILLISECONDS, not seconds: at the
623        // byte-round sizes (MAX_BYTE_CHALLENGE_KEYS = 2 → 8 MiB) the per-second
624        // quotient `multiplied / bps` integer-truncates to 0, leaving only the
625        // floor (the §4 finding: a 2×4 MiB honest serve under load could blow a
626        // 2 s budget). Computing in ms keeps the sub-second honest-read estimate
627        // (e.g. 8 MiB × 5 / 50 MB/s ≈ 840 ms) instead of dropping it.
628        let scaled_ms = multiplied.saturating_mul(1000) / bps;
629        // saturating_add avoids a panic if the floor plus the scaled term would
630        // overflow `Duration::MAX`.
631        self.audit_response_floor
632            .saturating_add(Duration::from_millis(scaled_ms))
633    }
634
635    /// Deadline for the round-2 BYTE challenge serving `challenged_key_count`
636    /// full chunks back to the auditor.
637    ///
638    /// Same per-byte scaling as [`Self::audit_response_timeout`] (so a relay
639    /// that must fetch the bytes over a residential link still blows it), but on
640    /// a higher floor (`BYTE_AUDIT_RESPONSE_FLOOR_SECS`) because the reply
641    /// carries up to
642    /// `MAX_BYTE_CHALLENGE_KEYS × MAX_CHUNK_SIZE` of chunk data — handshake +
643    /// multi-MiB upload + a busy honest disk read do not fit the hashes-only
644    /// round-1 floor (the §4 finding).
645    #[must_use]
646    pub fn byte_audit_response_timeout(&self, challenged_key_count: usize) -> Duration {
647        let scaled = self
648            .audit_response_timeout(challenged_key_count)
649            .saturating_sub(self.audit_response_floor);
650        Duration::from_secs(BYTE_AUDIT_RESPONSE_FLOOR_SECS).saturating_add(scaled)
651    }
652
653    /// Number of subtree leaves to spot-check against real chunk bytes per
654    /// audit (ADR-0002 real-bytes layer). Faking a fraction `x` of nonced
655    /// leaves survives only `(1 - x)^k`.
656    #[must_use]
657    pub fn audit_spotcheck_count(&self) -> u32 {
658        AUDIT_SPOTCHECK_COUNT
659    }
660
661    /// Conservative leaf-count hint for sizing the subtree-audit response
662    /// deadline before the proof arrives.
663    ///
664    /// The selected subtree holds about `sqrt(key_count)` real leaves; sizing
665    /// for a large store keeps an honest peer with a big store from timing out.
666    #[must_use]
667    pub fn subtree_audit_timeout_leaf_hint(&self) -> usize {
668        SUBTREE_AUDIT_TIMEOUT_LEAF_HINT
669    }
670
671    /// Returns a random duration in `[audit_tick_interval_min,
672    /// audit_tick_interval_max]`.
673    #[must_use]
674    pub fn random_audit_tick_interval(&self) -> Duration {
675        random_duration_in_range(self.audit_tick_interval_min, self.audit_tick_interval_max)
676    }
677
678    /// Returns a random duration in `[self_lookup_interval_min,
679    /// self_lookup_interval_max]`.
680    #[must_use]
681    pub fn random_self_lookup_interval(&self) -> Duration {
682        random_duration_in_range(self.self_lookup_interval_min, self.self_lookup_interval_max)
683    }
684}
685
686/// Pick a random `Duration` uniformly in `[min, max]` at millisecond
687/// granularity.
688///
689/// When `min == max` the result is deterministic.
690fn random_duration_in_range(min: Duration, max: Duration) -> Duration {
691    if min == max {
692        return min;
693    }
694    // Our intervals are minutes/hours, well within u64 range. Saturate to
695    // u64::MAX on the impossible overflow path to avoid a lossy cast.
696    let to_u64_millis = |d: Duration| -> u64 { u64::try_from(d.as_millis()).unwrap_or(u64::MAX) };
697    let chosen = rand::thread_rng().gen_range(to_u64_millis(min)..=to_u64_millis(max));
698    Duration::from_millis(chosen)
699}
700
701// ---------------------------------------------------------------------------
702// Tests
703// ---------------------------------------------------------------------------
704
705#[cfg(test)]
706#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
707mod tests {
708    use super::*;
709
710    #[test]
711    fn defaults_pass_validation() {
712        let config = ReplicationConfig::default();
713        assert!(config.validate().is_ok(), "default config must be valid");
714    }
715
716    #[test]
717    fn default_prune_hysteresis_is_three_days() {
718        let config = ReplicationConfig::default();
719        assert_eq!(
720            config.prune_hysteresis_duration,
721            Duration::from_secs(3 * 24 * 60 * 60)
722        );
723    }
724
725    #[test]
726    fn storage_admission_width_adds_margin() {
727        const TEST_CLOSE_GROUP_SIZE: usize = 7;
728
729        assert_eq!(
730            storage_admission_width(TEST_CLOSE_GROUP_SIZE),
731            TEST_CLOSE_GROUP_SIZE + STORAGE_ADMISSION_MARGIN
732        );
733        assert_eq!(storage_admission_width(usize::MAX), usize::MAX);
734    }
735
736    #[test]
737    fn audit_failure_weight_is_five() {
738        assert!((AUDIT_FAILURE_TRUST_WEIGHT - 5.0).abs() <= f64::EPSILON);
739    }
740
741    #[test]
742    fn audit_timeout_strike_threshold_is_three() {
743        // Smallest threshold that tolerates back-to-back transient slowness
744        // while still penalizing a persistently-slow non-storing peer within a
745        // few audit ticks.
746        assert_eq!(AUDIT_TIMEOUT_STRIKE_THRESHOLD, 3);
747    }
748
749    #[test]
750    fn replication_protocol_id_is_v2() {
751        // The v12 storage-bound audit changes replication SEMANTICS. The
752        // protocol id MUST advance past v1 so v1 and v2 nodes never exchange
753        // replication traffic they can only half-interpret (rollout safety —
754        // see the const's doc). If this regresses to v1, mixed-version nodes
755        // would talk past each other and risk spurious penalties.
756        assert_eq!(REPLICATION_PROTOCOL_ID, "autonomi.ant.replication.v2");
757    }
758
759    #[test]
760    fn audit_response_timeout_floor_at_zero_keys() {
761        let config = ReplicationConfig::default();
762        assert_eq!(
763            config.audit_response_timeout(0),
764            Duration::from_secs(AUDIT_RESPONSE_FLOOR_SECS),
765            "zero-key challenge should yield the floor exactly"
766        );
767    }
768
769    #[test]
770    fn audit_response_timeout_scales_with_key_count() {
771        let config = ReplicationConfig::default();
772        let t1 = config.audit_response_timeout(1);
773        let t10 = config.audit_response_timeout(10);
774        let t100 = config.audit_response_timeout(100);
775        assert!(t1 <= t10 && t10 < t100, "timeout must not decrease with k");
776
777        // Scaling now resolves in MILLISECONDS so a sub-second honest read no
778        // longer truncates to zero (§4). For k=1:
779        // (4_194_304 × 5 × 1000) / 52_428_800 = 400 ms, + 2 s round-1 floor =
780        // 2.4 s (previously collapsed to the bare 2 s floor).
781        assert_eq!(t1, Duration::from_millis(2400));
782
783        // For k=10: (10 × 4_194_304 × 5 × 1000) / 52_428_800 = 4000 ms scaled,
784        // + 2 s floor = 6 s. An HDD-backed honest peer at 20 MB/s reads 40 MiB
785        // in ~2 s, comfortably inside; a relay fetching 40 MiB at 5 MB/s
786        // residential bandwidth needs ~8 s for the data alone, outside.
787        assert_eq!(t10, Duration::from_secs(6));
788
789        // For k=100: (100 × 4_194_304 × 5 × 1000) / 52_428_800 = 40_000 ms
790        // scaled, + 2 s floor = 42 s.
791        assert_eq!(t100, Duration::from_secs(42));
792    }
793
794    #[test]
795    fn audit_response_timeout_fits_honest_hdd_at_typical_sample_size() {
796        // The canonical audit sample is sqrt(N) at N stored chunks.
797        // At N=100 stored chunks, sample is 10. An HDD-backed honest
798        // peer at the slowest realistic random-read throughput (20 MB/s,
799        // well below modern HDDs which sustain 80-150 MB/s sequential)
800        // reads 10 × 4 MiB = 40 MiB in ~2 s. Add 300 ms cross-continent
801        // RTT, ~10 ms scheduling, ~3 ms ML-DSA sign, and the honest
802        // envelope is ~2.3 s. The 6 s budget at k=10 leaves >3 s of
803        // slack.
804        let config = ReplicationConfig::default();
805        let budget = config.audit_response_timeout(10);
806        let realistic_hdd_bps: u64 = 20 * 1024 * 1024;
807        let bytes: u64 = 10 * 4 * 1024 * 1024;
808        let honest_envelope_secs = bytes / realistic_hdd_bps + 1; // +1 s for network/scheduling/sign
809        assert!(
810            Duration::from_secs(honest_envelope_secs) < budget,
811            "honest HDD envelope ({honest_envelope_secs}s) must fit inside k=10 budget ({}s)",
812            budget.as_secs(),
813        );
814    }
815
816    #[test]
817    fn audit_response_timeout_relay_is_outside_envelope() {
818        // The intended invariant: an honest peer with the SSD-class
819        // read budget fits inside `audit_response_timeout(k)`, while a
820        // relay attacker fetching k*4MiB over residential bandwidth
821        // (≈ 5 MB/s realistic for sustained download) does NOT. Spot-
822        // check this at k=100: honest budget is 42s, relay needs at
823        // least 100 * 4 MiB / 5 MB/s = 80s for the data alone, which
824        // exceeds the budget.
825        let config = ReplicationConfig::default();
826        let budget = config.audit_response_timeout(100);
827        let relay_data_only = Duration::from_secs(100 * 4 * 1024 * 1024 / (5 * 1024 * 1024));
828        assert!(
829            relay_data_only > budget,
830            "relay fetch ({}s) must exceed honest audit budget ({}s)",
831            relay_data_only.as_secs(),
832            budget.as_secs(),
833        );
834    }
835
836    #[test]
837    fn audit_response_timeout_saturates_on_huge_k() {
838        let config = ReplicationConfig::default();
839        // Should not panic or overflow at extreme k values.
840        let _ = config.audit_response_timeout(usize::MAX);
841    }
842
843    #[test]
844    fn quorum_threshold_zero_rejected() {
845        let config = ReplicationConfig {
846            quorum_threshold: 0,
847            ..ReplicationConfig::default()
848        };
849        assert!(config.validate().is_err());
850    }
851
852    #[test]
853    fn quorum_threshold_exceeds_close_group_rejected() {
854        let defaults = ReplicationConfig::default();
855        let config = ReplicationConfig {
856            quorum_threshold: defaults.close_group_size + 1,
857            ..defaults
858        };
859        assert!(config.validate().is_err());
860    }
861
862    #[test]
863    fn close_group_size_zero_rejected() {
864        let config = ReplicationConfig {
865            close_group_size: 0,
866            ..ReplicationConfig::default()
867        };
868        assert!(config.validate().is_err());
869    }
870
871    #[test]
872    fn close_group_size_exceeding_prune_audit_budget_rejected() {
873        let config = ReplicationConfig {
874            close_group_size: MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS + 1,
875            quorum_threshold: QUORUM_THRESHOLD,
876            ..ReplicationConfig::default()
877        };
878
879        let err = config.validate().unwrap_err();
880
881        assert!(
882            err.contains("MAX_PRUNE_AUDIT_CHALLENGES_PER_PASS"),
883            "error should mention prune audit budget: {err}"
884        );
885    }
886
887    #[test]
888    fn paid_list_close_group_size_zero_rejected() {
889        let config = ReplicationConfig {
890            paid_list_close_group_size: 0,
891            ..ReplicationConfig::default()
892        };
893        assert!(config.validate().is_err());
894    }
895
896    #[test]
897    fn neighbor_sync_interval_inverted_rejected() {
898        let config = ReplicationConfig {
899            neighbor_sync_interval_min: Duration::from_secs(100),
900            neighbor_sync_interval_max: Duration::from_secs(50),
901            ..ReplicationConfig::default()
902        };
903        assert!(config.validate().is_err());
904    }
905
906    #[test]
907    fn audit_tick_interval_inverted_rejected() {
908        let config = ReplicationConfig {
909            audit_tick_interval_min: Duration::from_secs(100),
910            audit_tick_interval_max: Duration::from_secs(50),
911            ..ReplicationConfig::default()
912        };
913        assert!(config.validate().is_err());
914    }
915
916    #[test]
917    fn self_lookup_interval_inverted_rejected() {
918        let config = ReplicationConfig {
919            self_lookup_interval_min: Duration::from_secs(100),
920            self_lookup_interval_max: Duration::from_secs(50),
921            ..ReplicationConfig::default()
922        };
923        assert!(config.validate().is_err());
924    }
925
926    #[test]
927    fn neighbor_sync_peer_count_zero_rejected() {
928        let config = ReplicationConfig {
929            neighbor_sync_peer_count: 0,
930            ..ReplicationConfig::default()
931        };
932        assert!(config.validate().is_err());
933    }
934
935    #[test]
936    fn neighbor_sync_scope_exceeding_k_bucket_size_rejected() {
937        let config = ReplicationConfig {
938            neighbor_sync_scope: K_BUCKET_SIZE + 1,
939            ..ReplicationConfig::default()
940        };
941        assert!(config.validate().is_err());
942    }
943
944    #[test]
945    fn audit_sample_count_scales_with_sqrt() {
946        // Empty store
947        assert_eq!(ReplicationConfig::audit_sample_count(0), 0);
948
949        // Single key
950        assert_eq!(ReplicationConfig::audit_sample_count(1), 1);
951
952        // Small stores: sqrt(3)=1
953        assert_eq!(ReplicationConfig::audit_sample_count(3), 1);
954
955        // sqrt scaling
956        assert_eq!(ReplicationConfig::audit_sample_count(4), 2);
957        assert_eq!(ReplicationConfig::audit_sample_count(25), 5);
958        assert_eq!(ReplicationConfig::audit_sample_count(100), 10);
959        assert_eq!(ReplicationConfig::audit_sample_count(1_000), 31);
960        assert_eq!(ReplicationConfig::audit_sample_count(10_000), 100);
961        assert_eq!(ReplicationConfig::audit_sample_count(1_000_000), 1_000);
962    }
963
964    #[test]
965    fn max_incoming_audit_keys_scales_dynamically() {
966        // Empty store: at least 1 key accepted.
967        assert_eq!(ReplicationConfig::max_incoming_audit_keys(0), 1);
968
969        // 1 chunk: 2 * sqrt(1) = 2.
970        assert_eq!(ReplicationConfig::max_incoming_audit_keys(1), 2);
971
972        // 100 chunks: 2 * sqrt(100) = 20.
973        assert_eq!(ReplicationConfig::max_incoming_audit_keys(100), 20);
974
975        // 1M chunks: 2 * sqrt(1_000_000) = 2_000.
976        assert_eq!(ReplicationConfig::max_incoming_audit_keys(1_000_000), 2_000);
977
978        // 5M chunks: 2 * sqrt(5_000_000) = 4_472.
979        assert_eq!(ReplicationConfig::max_incoming_audit_keys(5_000_000), 4_472);
980    }
981
982    #[test]
983    fn quorum_needed_uses_smaller_of_threshold_and_majority() {
984        let config = ReplicationConfig::default();
985
986        // With 7 targets: majority = 7/2+1 = 4, threshold = 4 → min = 4
987        assert_eq!(config.quorum_needed(7), 4);
988
989        // With 3 targets: majority = 3/2+1 = 2, threshold = 4 → min = 2
990        assert_eq!(config.quorum_needed(3), 2);
991
992        // With 0 targets: quorum is impossible — returns 0
993        assert_eq!(config.quorum_needed(0), 0);
994
995        // With 100 targets: majority = 51, threshold = 4 → min = 4
996        assert_eq!(config.quorum_needed(100), 4);
997    }
998
999    #[test]
1000    fn confirm_needed_is_strict_majority() {
1001        assert_eq!(ReplicationConfig::confirm_needed(1), 1);
1002        assert_eq!(ReplicationConfig::confirm_needed(2), 2);
1003        assert_eq!(ReplicationConfig::confirm_needed(3), 2);
1004        assert_eq!(ReplicationConfig::confirm_needed(4), 3);
1005        assert_eq!(ReplicationConfig::confirm_needed(20), 11);
1006    }
1007
1008    #[test]
1009    fn random_intervals_within_bounds() {
1010        let config = ReplicationConfig::default();
1011
1012        // Run several iterations to exercise randomness.
1013        let iterations = 50;
1014        for _ in 0..iterations {
1015            let ns = config.random_neighbor_sync_interval();
1016            assert!(ns >= config.neighbor_sync_interval_min);
1017            assert!(ns <= config.neighbor_sync_interval_max);
1018
1019            let at = config.random_audit_tick_interval();
1020            assert!(at >= config.audit_tick_interval_min);
1021            assert!(at <= config.audit_tick_interval_max);
1022
1023            let sl = config.random_self_lookup_interval();
1024            assert!(sl >= config.self_lookup_interval_min);
1025            assert!(sl <= config.self_lookup_interval_max);
1026        }
1027    }
1028
1029    #[test]
1030    fn random_interval_equal_bounds_is_deterministic() {
1031        let fixed = Duration::from_secs(42);
1032        let config = ReplicationConfig {
1033            neighbor_sync_interval_min: fixed,
1034            neighbor_sync_interval_max: fixed,
1035            ..ReplicationConfig::default()
1036        };
1037        assert_eq!(config.random_neighbor_sync_interval(), fixed);
1038    }
1039
1040    // -----------------------------------------------------------------------
1041    // Section 18 scenarios
1042    // -----------------------------------------------------------------------
1043
1044    /// Scenario 18: Invalid runtime config is rejected by `validate()`.
1045    #[test]
1046    fn scenario_18_invalid_config_rejected() {
1047        // quorum_threshold > close_group_size -> validation fails.
1048        let config = ReplicationConfig {
1049            quorum_threshold: 10,
1050            close_group_size: 7,
1051            ..ReplicationConfig::default()
1052        };
1053        let err = config.validate().unwrap_err();
1054        assert!(
1055            err.contains("quorum_threshold"),
1056            "error should mention quorum_threshold: {err}"
1057        );
1058
1059        // close_group_size = 0 -> validation fails.
1060        let config = ReplicationConfig {
1061            close_group_size: 0,
1062            ..ReplicationConfig::default()
1063        };
1064        let err = config.validate().unwrap_err();
1065        assert!(
1066            err.contains("close_group_size"),
1067            "error should mention close_group_size: {err}"
1068        );
1069
1070        // neighbor_sync interval min > max -> validation fails.
1071        let config = ReplicationConfig {
1072            neighbor_sync_interval_min: Duration::from_secs(200),
1073            neighbor_sync_interval_max: Duration::from_secs(100),
1074            ..ReplicationConfig::default()
1075        };
1076        let err = config.validate().unwrap_err();
1077        assert!(
1078            err.contains("neighbor_sync_interval"),
1079            "error should mention neighbor_sync_interval: {err}"
1080        );
1081
1082        // self_lookup interval min > max -> validation fails.
1083        let config = ReplicationConfig {
1084            self_lookup_interval_min: Duration::from_secs(999),
1085            self_lookup_interval_max: Duration::from_secs(1),
1086            ..ReplicationConfig::default()
1087        };
1088        let err = config.validate().unwrap_err();
1089        assert!(
1090            err.contains("self_lookup_interval"),
1091            "error should mention self_lookup_interval: {err}"
1092        );
1093
1094        // audit_tick interval min > max -> validation fails.
1095        let config = ReplicationConfig {
1096            audit_tick_interval_min: Duration::from_secs(500),
1097            audit_tick_interval_max: Duration::from_secs(10),
1098            ..ReplicationConfig::default()
1099        };
1100        let err = config.validate().unwrap_err();
1101        assert!(
1102            err.contains("audit_tick_interval"),
1103            "error should mention audit_tick_interval: {err}"
1104        );
1105    }
1106
1107    /// Scenario 26: Dynamic paid-list threshold for undersized set.
1108    /// With PaidGroupSize=8, `ConfirmNeeded` = floor(8/2)+1 = 5.
1109    #[test]
1110    fn scenario_26_dynamic_paid_threshold_undersized() {
1111        assert_eq!(ReplicationConfig::confirm_needed(8), 5, "floor(8/2)+1 = 5");
1112
1113        // Additional boundary checks for small paid groups.
1114        assert_eq!(
1115            ReplicationConfig::confirm_needed(1),
1116            1,
1117            "single peer requires 1 confirmation"
1118        );
1119        assert_eq!(
1120            ReplicationConfig::confirm_needed(2),
1121            2,
1122            "2 peers require 2 confirmations"
1123        );
1124        assert_eq!(
1125            ReplicationConfig::confirm_needed(3),
1126            2,
1127            "3 peers require 2 confirmations"
1128        );
1129        assert_eq!(
1130            ReplicationConfig::confirm_needed(0),
1131            1,
1132            "0 peers yields floor(0/2)+1 = 1 (degenerate case)"
1133        );
1134    }
1135
1136    /// Scenario 31: Consecutive audit ticks occur on randomized intervals
1137    /// bounded by the configured `[audit_tick_interval_min, audit_tick_interval_max]`
1138    /// window.
1139    #[test]
1140    fn scenario_31_audit_cadence_within_jitter_bounds() {
1141        let config = ReplicationConfig {
1142            audit_tick_interval_min: Duration::from_secs(600),
1143            audit_tick_interval_max: Duration::from_secs(1200),
1144            ..ReplicationConfig::default()
1145        };
1146
1147        // Sample many intervals and verify each is within bounds.
1148        let iterations = 100;
1149        let mut saw_different = false;
1150        let mut prev = Duration::ZERO;
1151
1152        for _ in 0..iterations {
1153            let interval = config.random_audit_tick_interval();
1154            assert!(
1155                interval >= config.audit_tick_interval_min,
1156                "interval {interval:?} below min {:?}",
1157                config.audit_tick_interval_min,
1158            );
1159            assert!(
1160                interval <= config.audit_tick_interval_max,
1161                "interval {interval:?} above max {:?}",
1162                config.audit_tick_interval_max,
1163            );
1164            if interval != prev && prev != Duration::ZERO {
1165                saw_different = true;
1166            }
1167            prev = interval;
1168        }
1169
1170        // With 100 samples from a 10-minute range, at least two should differ
1171        // (probabilistically near-certain).
1172        assert!(
1173            saw_different,
1174            "audit intervals should exhibit randomized jitter across samples"
1175        );
1176    }
1177}
ant_node/replication/config.rs

ant_node/replication/
config.rs