ant_core/data/client/mod.rs
1//! Client operations for the Autonomi network.
2//!
3//! Provides high-level APIs for storing and retrieving data
4//! on the Autonomi decentralized network.
5
6pub mod adaptive;
7pub mod batch;
8pub mod cache;
9pub(crate) mod cached_merkle;
10pub(crate) mod cached_single;
11pub mod chunk;
12pub mod data;
13pub mod file;
14pub mod merkle;
15pub mod payment;
16pub mod quote;
17
18use crate::data::client::adaptive::{AdaptiveConfig, AdaptiveController, ChannelStart, Outcome};
19use crate::data::client::cache::ChunkCache;
20use crate::data::error::{Error, Result};
21use crate::data::network::Network;
22use crate::data::peer_cache;
23use ant_protocol::evm::Wallet;
24use ant_protocol::transport::{MultiAddr, P2PNode, PeerId};
25use ant_protocol::{XorName, CLOSE_GROUP_SIZE};
26use std::path::PathBuf;
27use std::sync::atomic::{AtomicU64, Ordering};
28use std::sync::Arc;
29use tracing::debug;
30
31/// Width of the chunk PUT-target set (initial writes plus fallback): the
32/// closest `PUT_TARGET_WIDTH` peers to the address.
33///
34/// Mirrors the node-side `K_BUCKET_SIZE` / `PAID_QUOTE_ISSUER_CLOSENESS_WIDTH`
35/// (20): a node accepts a reused payment proof only when one of the proof's
36/// closest-`CLOSE_GROUP_SIZE` quote issuers is within its own local 20-closest,
37/// so trying peers past this width is pointless.
38pub(crate) const PUT_TARGET_WIDTH: usize = 20;
39
40/// Classify a `data::error::Error` into a controller `Outcome`.
41///
42/// Capacity signals (Timeout / NetworkError) drive the controller
43/// down; application errors do not. The mapping is conservative:
44/// anything that COULD be transport-related is treated as a network
45/// signal, because under-classifying a real network failure as
46/// "application error" makes the controller blind to genuine stress.
47///
48/// Mapping policy:
49/// - `Timeout` -> `Timeout` (per-op deadline elapsed)
50/// - `Network`, `InsufficientPeers`, `Io` -> `NetworkError` (transport
51/// layer reported failure)
52/// - `Protocol`, `Storage` -> `NetworkError` (these wrap remote errors
53/// that frequently include peer disconnects mid-stream — under
54/// network stress these are how transport failures surface)
55/// - `PartialUpload` -> `NetworkError` (literal capacity signal: some
56/// chunks could not be stored)
57/// - `AlreadyStored`, `Encryption`, `Crypto`, `Payment`,
58/// `Serialization`, `InvalidData`, `SignatureVerification`,
59/// `Config`, `InsufficientDiskSpace`, `CostEstimationInconclusive`,
60/// `Cancelled` -> `ApplicationError` (would happen on a perfectly
61/// healthy link; `Cancelled` is caller-initiated and must not be retried
62/// as a transport failure)
63/// - `RemotePut` -> `ApplicationError` (the remote node responded with a
64/// structured rejection — the transport succeeded, so the node declined
65/// at the application layer; not a local capacity signal)
66/// - `CloseGroupShortfall` -> `ApplicationError` (a quorum shortfall caused
67/// by close-group dial/relay churn with no PUT-response timeouts — remote
68/// peer churn, not local backpressure; a timeout-bearing shortfall keeps
69/// `InsufficientPeers`/`NetworkError` instead, so genuine congestion still
70/// cuts the cap — V2-554)
71pub(crate) fn classify_error(err: &Error) -> Outcome {
72 match err {
73 Error::Timeout(_) => Outcome::Timeout,
74 Error::Network(_)
75 | Error::InsufficientPeers(_)
76 | Error::Io(_)
77 | Error::Protocol(_)
78 | Error::Storage(_)
79 | Error::PartialUpload { .. } => Outcome::NetworkError,
80 Error::AlreadyStored
81 | Error::Encryption(_)
82 | Error::Crypto(_)
83 | Error::Payment(_)
84 | Error::Serialization(_)
85 | Error::InvalidData(_)
86 | Error::SignatureVerification(_)
87 | Error::Config(_)
88 | Error::InsufficientDiskSpace(_)
89 | Error::CostEstimationInconclusive(_)
90 | Error::Cancelled(_)
91 | Error::BadQuoteBinding { .. }
92 // A remote node responded with a structured rejection — the
93 // transport round-trip succeeded, so the node declined at the
94 // application layer (payment/disk/quote/pool). Not a local
95 // capacity signal; recorded but must not push the limiter down.
96 | Error::RemotePut { .. }
97 // A close-group PUT shortfall caused purely by dial/relay churn
98 // (dead/stale relayed peer addresses), with no PUT-response
99 // timeouts to signal local backpressure. Remote peer churn, not
100 // "client sending too fast" — must not push the limiter down
101 // (V2-554). A shortfall that DID time out keeps `InsufficientPeers`
102 // (`NetworkError`) so real congestion still cuts the cap.
103 | Error::CloseGroupShortfall(_) => Outcome::ApplicationError,
104 }
105}
106
107/// Compute XOR distance between a peer's ID bytes and a target address.
108///
109/// Uses the first 32 bytes of the peer ID (or fewer if shorter) XORed
110/// with the target address. The returned byte array sorts
111/// lexicographically from closest to furthest.
112pub(crate) fn peer_xor_distance(peer_id: &PeerId, target: &[u8; 32]) -> [u8; 32] {
113 let peer_bytes = peer_id.as_bytes();
114 let mut distance = [0u8; 32];
115 for (i, d) in distance.iter_mut().enumerate() {
116 let peer_byte = peer_bytes.get(i).copied().unwrap_or(0);
117 *d = peer_byte ^ target[i];
118 }
119 distance
120}
121
122/// Default timeout for lightweight network operations (quotes, DHT lookups) in seconds.
123const DEFAULT_QUOTE_TIMEOUT_SECS: u64 = 10;
124
125/// Default timeout for the per-peer chunk GET response and any other
126/// caller that explicitly reads `store_timeout_secs`, in seconds.
127///
128/// Note despite the name: this knob does **not** govern the non-merkle
129/// chunk PUT response timeout — that path uses the
130/// `STORE_RESPONSE_TIMEOUT` constant in `chunk.rs` directly. Nor does
131/// it govern the merkle batch PUT timeout — see
132/// `DEFAULT_MERKLE_STORE_TIMEOUT_SECS`.
133///
134/// 10 s matches the pre-existing `main` default and intentionally
135/// excludes residential-upload tuning, which is Mick's PR #78
136/// territory (splitting GET into its own field).
137const DEFAULT_STORE_TIMEOUT_SECS: u64 = 10;
138
139/// Default timeout for **merkle batch** chunk store operations in seconds.
140///
141/// Separate from `DEFAULT_STORE_TIMEOUT_SECS` because merkle PUTs carry
142/// an extra storer-side cost: the payment verifier runs an iterative
143/// DHT lookup (`CLOSENESS_LOOKUP_TIMEOUT` in `ant-node`, **240 s**
144/// post-PR #89) before accepting the proof.
145///
146/// This timeout MUST be >= the storer-side `CLOSENESS_LOOKUP_TIMEOUT`
147/// plus padding for the store-response round-trip and storer-local
148/// I/O. Otherwise the client gives up while the storer is still
149/// happily verifying, the storer wastes CPU/bandwidth on a chunk the
150/// client has already discarded, and the client re-targets a
151/// different close-K member — potentially double-storing the same
152/// chunk and polluting routing.
153///
154/// 270 s = 240 s (storer lookup) + 30 s padding (network RTT + LMDB
155/// put + fsync + clock skew tolerance).
156///
157/// This invariant must be re-validated if either side's timeout
158/// changes. Empirically surfaced as "every cross-region merkle chunk
159/// times out at 10 s" on a 210-node 7-region testnet run on
160/// 2026-05-12; bumping to 270 s flipped that 0/22 -> 9/9 pass rate.
161const DEFAULT_MERKLE_STORE_TIMEOUT_SECS: u64 = 270;
162
163/// Default timeout for chunk GET response operations in seconds.
164const DEFAULT_CHUNK_GET_TIMEOUT_SECS: u64 = 10;
165
166/// Default quote concurrency: high because quoting is pure network I/O
167/// (DHT lookups + small request/response messages) with no CPU-bound work.
168const DEFAULT_QUOTE_CONCURRENCY: usize = 32;
169
170/// Default store concurrency: moderate because each chunk PUT sends ~4MB
171/// to 7 close-group peers. At 8 concurrent stores, ~225MB of outbound
172/// traffic can be in flight. Users on fast connections can increase this
173/// with --store-concurrency; users on slow connections can decrease it.
174const DEFAULT_STORE_CONCURRENCY: usize = 8;
175
176/// Configuration for the Autonomi client.
177#[derive(Debug, Clone)]
178pub struct ClientConfig {
179 /// Per-op timeout for lightweight network operations (quotes,
180 /// DHT lookups), in seconds. The adaptive controller does NOT
181 /// currently size timeouts; this remains a static knob.
182 pub quote_timeout_secs: u64,
183 /// Per-op timeout, in seconds, for the chunk GET response path
184 /// (`chunk_get_from_peer`) and any other caller that reads this
185 /// field directly.
186 ///
187 /// Note despite the historical name `store_timeout_secs`: this
188 /// knob does **not** govern the non-merkle chunk PUT response
189 /// timeout (that path uses the `STORE_RESPONSE_TIMEOUT` constant
190 /// in `chunk.rs`) and does **not** govern the merkle batch PUT
191 /// timeout (see `merkle_store_timeout_secs`). Rename pending in
192 /// Mick's PR #78 which adds a dedicated `chunk_get_timeout_secs`.
193 ///
194 /// The adaptive controller does NOT currently size timeouts;
195 /// this remains a static knob.
196 pub store_timeout_secs: u64,
197 /// Per-op timeout for **merkle batch** chunk store (PUT)
198 /// operations, in seconds. Separate from `store_timeout_secs`
199 /// because merkle PUTs incur the storer-side
200 /// `CLOSENESS_LOOKUP_TIMEOUT` (240 s post-PR #89) on top of the
201 /// usual store path; the client must wait at least that long
202 /// plus padding, or the storer wastes work on a chunk the client
203 /// has already given up on. Default 270 s.
204 pub merkle_store_timeout_secs: u64,
205 /// Per-peer response timeout for chunk GET operations, in seconds.
206 /// This is intentionally independent from `store_timeout_secs`: PUTs
207 /// and GETs have different payload direction and performance profiles.
208 pub chunk_get_timeout_secs: u64,
209 /// Number of closest peers to consider for routing.
210 pub close_group_size: usize,
211 /// **Deprecated.** Pre-adaptive ceiling for quote concurrency.
212 ///
213 /// The adaptive controller now sizes quote fan-out from observed
214 /// signals. This field, when non-zero and smaller than the
215 /// controller's per-channel default, clamps the **quote channel
216 /// only** (it does NOT bleed into store or fetch). Removed in a
217 /// future release.
218 pub quote_concurrency: usize,
219 /// **Deprecated.** Pre-adaptive ceiling for store concurrency.
220 ///
221 /// The adaptive controller now sizes store fan-out from observed
222 /// signals. This field, when non-zero and smaller than the
223 /// controller's per-channel default, clamps the **store channel
224 /// only** (it does NOT bleed into quote or fetch). Removed in a
225 /// future release.
226 pub store_concurrency: usize,
227 /// Adaptive controller configuration. Defaults are tuned to match
228 /// or exceed the prior static behavior — disabling adaptation
229 /// (`adaptive.enabled = false`) reverts to the controller's
230 /// `initial` values without re-evaluation.
231 pub adaptive: AdaptiveConfig,
232 /// Allow loopback (`127.0.0.1`) connections in the saorsa-transport
233 /// layer. Set to `true` only for devnet / local testing. Production
234 /// peers on the public Autonomi network reject the QUIC handshake
235 /// variant produced when this is `true`, so the default is `false`.
236 ///
237 /// This mirrors the `--allow-loopback` flag in `ant-cli`, which already
238 /// defaults to `false` and threads through to the same
239 /// `CoreNodeConfig::builder().local(...)` call.
240 pub allow_loopback: bool,
241 /// Bind a dual-stack IPv6 socket (`true`) or an IPv4-only socket
242 /// (`false`). Defaults to `true`, matching the CLI default.
243 ///
244 /// Set to `false` only when running on hosts without a working IPv6
245 /// stack, to avoid advertising unreachable v6 addresses to the DHT
246 /// (which causes slow connects and junk DHT address records). This
247 /// mirrors the `--ipv4-only` flag in `ant-cli`.
248 pub ipv6: bool,
249}
250
251impl Default for ClientConfig {
252 fn default() -> Self {
253 Self {
254 quote_timeout_secs: DEFAULT_QUOTE_TIMEOUT_SECS,
255 store_timeout_secs: DEFAULT_STORE_TIMEOUT_SECS,
256 merkle_store_timeout_secs: DEFAULT_MERKLE_STORE_TIMEOUT_SECS,
257 chunk_get_timeout_secs: DEFAULT_CHUNK_GET_TIMEOUT_SECS,
258 close_group_size: CLOSE_GROUP_SIZE,
259 quote_concurrency: DEFAULT_QUOTE_CONCURRENCY,
260 store_concurrency: DEFAULT_STORE_CONCURRENCY,
261 adaptive: AdaptiveConfig::default(),
262 allow_loopback: false,
263 ipv6: true,
264 }
265 }
266}
267
268/// Build the adaptive controller for a `Client`. Loads any persisted
269/// snapshot, clamps cold-start values into the deprecated-flag bounds
270/// **per channel** (so a pin on `--store-concurrency` does NOT bleed
271/// into the fetch / quote channels), and returns the persistence path
272/// so callers can save back at shutdown.
273fn build_controller(config: &ClientConfig) -> (AdaptiveController, Option<PathBuf>) {
274 let mut adaptive_cfg = config.adaptive.clone();
275
276 // Per-channel ceilings: each legacy field is interpreted as a cap
277 // for ONLY its matching channel. The fetch channel has no
278 // pre-existing legacy field; it always uses the controller's
279 // default ceiling.
280 //
281 // The legacy fields are non-zero by ClientConfig::default(), but
282 // we honor them as bounds only when they would actually CONSTRAIN
283 // the controller — i.e. when smaller than the per-channel default
284 // max. A default ClientConfig must not silently lower the
285 // controller's ceilings.
286 // A value equal to the historic legacy default is treated as
287 // "not pinned by the user" — without this, every default
288 // ClientConfig would silently lower the controller's per-channel
289 // ceilings to the prior static values (32/8) and the controller
290 // could never grow above them.
291 let user_quote_max = config.quote_concurrency;
292 let user_store_max = config.store_concurrency;
293 let quote_pinned = user_quote_max > 0 && user_quote_max != DEFAULT_QUOTE_CONCURRENCY;
294 let store_pinned = user_store_max > 0 && user_store_max != DEFAULT_STORE_CONCURRENCY;
295 if quote_pinned && user_quote_max < adaptive_cfg.max.quote {
296 adaptive_cfg.max.quote = user_quote_max;
297 }
298 if store_pinned && user_store_max < adaptive_cfg.max.store {
299 adaptive_cfg.max.store = user_store_max;
300 }
301
302 // Cold-start values: matched to the prior static defaults. If the
303 // legacy field caps the channel below the cold-start, lower the
304 // start to match — never start above the channel's max.
305 let mut start = ChannelStart::default();
306 start.quote = start.quote.min(adaptive_cfg.max.quote);
307 start.store = start.store.min(adaptive_cfg.max.store);
308 start.fetch = start.fetch.min(adaptive_cfg.max.fetch);
309
310 let adaptive_enabled = adaptive_cfg.enabled;
311 let controller = AdaptiveController::new(start, adaptive_cfg);
312 // Skip disk warm-start entirely when adaptation is disabled —
313 // fixed-concurrency mode means the user wants exactly the cold
314 // start, no surprises from prior runs. (warm_start is also a
315 // no-op when disabled, but skipping the load avoids file I/O
316 // and the path-resolution side effects.)
317 let persist_path = if adaptive_enabled {
318 let p = adaptive::default_persist_path();
319 if let Some(ref path) = p {
320 if let Some(snap) = adaptive::load_snapshot(path) {
321 debug!(path = %path.display(), "adaptive: warm-start from disk");
322 controller.warm_start(snap);
323 }
324 }
325 p
326 } else {
327 // Even with adaptation off, persist_path is computed so
328 // explicit save_adaptive_snapshot() calls still work — but
329 // the controller currently never moves, so saving the cold
330 // start is harmless.
331 adaptive::default_persist_path()
332 };
333
334 // File downloads choose a stream-decrypt batch size per download
335 // from the current fetch cap and usable RAM, then pass it into
336 // self_encryption's runtime batch-size API. The adaptive controller
337 // still drives fan-out inside each batch by re-reading
338 // `controller.fetch.current()` in the decrypt callback.
339
340 (controller, persist_path)
341}
342
343/// Client for the Autonomi decentralized network.
344///
345/// Provides high-level APIs for storing and retrieving chunks
346/// and files on the network.
347pub struct Client {
348 config: ClientConfig,
349 network: Network,
350 wallet: Option<Arc<Wallet>>,
351 evm_network: Option<ant_protocol::evm::Network>,
352 chunk_cache: ChunkCache,
353 next_request_id: AtomicU64,
354 /// Adaptive concurrency controller: replaces the static
355 /// quote/store concurrency knobs. See `adaptive` module.
356 controller: AdaptiveController,
357 /// Path the controller persists its snapshot to. `None` disables
358 /// persistence (useful for tests / non-disk environments).
359 persist_path: Option<PathBuf>,
360 /// Path for the persistent client peer cache. `None` disables the cache.
361 peer_cache_path: Option<PathBuf>,
362}
363
364impl Client {
365 /// Create a client connected to the given P2P node.
366 #[must_use]
367 pub fn from_node(node: Arc<P2PNode>, config: ClientConfig) -> Self {
368 Self::from_node_with_peer_cache(node, config, None)
369 }
370
371 /// Create a client connected to the given P2P node and attach an optional
372 /// persistent peer cache path.
373 #[must_use]
374 pub fn from_node_with_peer_cache(
375 node: Arc<P2PNode>,
376 config: ClientConfig,
377 peer_cache_path: Option<PathBuf>,
378 ) -> Self {
379 let network = Network::from_node(node);
380 let (controller, persist_path) = build_controller(&config);
381 Self {
382 config,
383 network,
384 wallet: None,
385 evm_network: None,
386 chunk_cache: ChunkCache::default(),
387 next_request_id: AtomicU64::new(1),
388 controller,
389 persist_path,
390 peer_cache_path,
391 }
392 }
393
394 /// Create a client connected to bootstrap peers.
395 ///
396 /// Threads `config.allow_loopback` and `config.ipv6` through to
397 /// `Network::new`, which controls the saorsa-transport `local` and
398 /// `ipv6` flags on the underlying `CoreNodeConfig`. See
399 /// `ClientConfig::allow_loopback` and `ClientConfig::ipv6` for details.
400 ///
401 /// # Errors
402 ///
403 /// Returns an error if the P2P node cannot be created or bootstrapping fails.
404 pub async fn connect(
405 bootstrap_peers: &[std::net::SocketAddr],
406 config: ClientConfig,
407 ) -> Result<Self> {
408 debug!(
409 "Connecting to Autonomi network with {} bootstrap peers (allow_loopback={}, ipv6={})",
410 bootstrap_peers.len(),
411 config.allow_loopback,
412 config.ipv6,
413 );
414 let network = Network::new(bootstrap_peers, config.allow_loopback, config.ipv6).await?;
415 let (controller, persist_path) = build_controller(&config);
416 Ok(Self {
417 config,
418 network,
419 wallet: None,
420 evm_network: None,
421 chunk_cache: ChunkCache::default(),
422 next_request_id: AtomicU64::new(1),
423 controller,
424 persist_path,
425 peer_cache_path: None,
426 })
427 }
428
429 /// Set the wallet for payment operations.
430 ///
431 /// Also populates the EVM network from the wallet so that
432 /// token approvals work without a separate `with_evm_network` call.
433 #[must_use]
434 pub fn with_wallet(mut self, wallet: Wallet) -> Self {
435 self.evm_network = Some(wallet.network().clone());
436 self.wallet = Some(Arc::new(wallet));
437 self
438 }
439
440 /// Set the EVM network without requiring a wallet.
441 ///
442 /// This enables token approval and contract interactions
443 /// for external-signer flows where the private key lives outside Rust.
444 #[must_use]
445 pub fn with_evm_network(mut self, network: ant_protocol::evm::Network) -> Self {
446 self.evm_network = Some(network);
447 self
448 }
449
450 /// Get the EVM network, falling back to the wallet's network if available.
451 ///
452 /// # Errors
453 ///
454 /// Returns an error if neither `with_evm_network` nor `with_wallet` was called.
455 pub(crate) fn require_evm_network(&self) -> Result<&ant_protocol::evm::Network> {
456 if let Some(ref net) = self.evm_network {
457 return Ok(net);
458 }
459 if let Some(ref wallet) = self.wallet {
460 return Ok(wallet.network());
461 }
462 Err(Error::Payment(
463 "EVM network not configured — call with_evm_network() or with_wallet() first"
464 .to_string(),
465 ))
466 }
467
468 /// Get the client configuration.
469 #[must_use]
470 pub fn config(&self) -> &ClientConfig {
471 &self.config
472 }
473
474 /// Get a mutable reference to the client configuration.
475 pub fn config_mut(&mut self) -> &mut ClientConfig {
476 &mut self.config
477 }
478
479 /// Get a reference to the network layer.
480 #[must_use]
481 pub fn network(&self) -> &Network {
482 &self.network
483 }
484
485 /// Get the wallet, if configured.
486 #[must_use]
487 pub fn wallet(&self) -> Option<&Arc<Wallet>> {
488 self.wallet.as_ref()
489 }
490
491 /// Get a reference to the chunk cache.
492 #[must_use]
493 pub fn chunk_cache(&self) -> &ChunkCache {
494 &self.chunk_cache
495 }
496
497 /// Adaptive concurrency controller. Hot loops read
498 /// `controller().<channel>.current()` to size their fan-out and
499 /// call `.observe(...)` on each completion.
500 #[must_use]
501 pub fn controller(&self) -> &AdaptiveController {
502 &self.controller
503 }
504
505 /// Persist the current adaptive snapshot to disk so the next
506 /// `Client::connect` warm-starts at the learned values instead of
507 /// cold defaults. Best effort — failures log and are discarded.
508 /// Idempotent. Safe to call from a Drop impl or an explicit
509 /// shutdown hook.
510 pub fn save_adaptive_snapshot(&self) {
511 if let Some(ref path) = self.persist_path {
512 adaptive::save_snapshot(path, self.controller.snapshot());
513 }
514 }
515
516 /// Persist currently connected peers that have Direct-tagged addresses in
517 /// the DHT. Best effort; failures are logged and do not affect the client
518 /// operation that just completed.
519 pub async fn save_peer_cache(&self) {
520 if let Some(ref path) = self.peer_cache_path {
521 let node = self.network().node();
522 peer_cache::promote_connected_direct_peers(node.as_ref(), path, node.dht().k_value())
523 .await;
524 }
525 }
526
527 /// Get the next request ID for protocol messages.
528 pub(crate) fn next_request_id(&self) -> u64 {
529 self.next_request_id.fetch_add(1, Ordering::Relaxed)
530 }
531
532 /// Return the chunk PUT-target set: the closest [`PUT_TARGET_WIDTH`] peers
533 /// to the address, each paired with its known network addresses.
534 ///
535 /// Used by the merkle store path, which — unlike single-node payment — has
536 /// no witnessed put-target list to forward, so it fetches the closest-K
537 /// neighbourhood locally.
538 pub(crate) async fn put_target_peers(
539 &self,
540 target: &XorName,
541 ) -> Result<Vec<(PeerId, Vec<MultiAddr>)>> {
542 self.closest_peers(target, PUT_TARGET_WIDTH).await
543 }
544
545 /// Return the requested number of closest peers for a target address.
546 ///
547 /// Queries the DHT for peers by XOR distance. Returns each peer
548 /// paired with its known network addresses.
549 pub(crate) async fn closest_peers(
550 &self,
551 target: &XorName,
552 count: usize,
553 ) -> Result<Vec<(PeerId, Vec<MultiAddr>)>> {
554 let peers = self.network().find_closest_peers(target, count).await?;
555
556 if peers.is_empty() {
557 return Err(Error::InsufficientPeers(
558 "DHT returned no peers for target address".to_string(),
559 ));
560 }
561 Ok(peers)
562 }
563}
564
565/// Persist the adaptive snapshot when the `Client` is dropped, so any
566/// caller — CLI, daemon, library user, integration test — gets
567/// warm-start carry-over for free without remembering to call
568/// `save_adaptive_snapshot()` explicitly. Best effort, sync `std::fs`,
569/// no panic risk on a poisoned mutex (the inner helper handles it).
570///
571/// We deliberately write SYNCHRONOUSLY (not via `spawn_blocking`)
572/// because Drop runs during process shutdown / runtime teardown,
573/// when fire-and-forget background tasks can be dropped before they
574/// complete and the snapshot is silently lost. A small synchronous
575/// stall on a tokio worker (typically <1ms for a local-disk JSON
576/// write of ~50 bytes) is the right tradeoff for guaranteed
577/// persistence — BOUNDED by `DROP_SAVE_TIMEOUT` so a stalled
578/// network-mounted data dir cannot block process shutdown.
579const DROP_SAVE_TIMEOUT: std::time::Duration = std::time::Duration::from_millis(500);
580
581impl Drop for Client {
582 fn drop(&mut self) {
583 let Some(path) = self.persist_path.clone() else {
584 return;
585 };
586 let snap = self.controller.snapshot();
587 adaptive::save_snapshot_with_timeout(path, snap, DROP_SAVE_TIMEOUT);
588 }
589}
590
591#[cfg(test)]
592#[allow(clippy::unwrap_used)]
593mod tests {
594 use super::*;
595
596 /// Cover EVERY variant of `data::error::Error`. Build an instance of
597 /// each, classify it, and assert the resulting `Outcome` matches the
598 /// only sensible mapping. If a future commit adds a new error variant
599 /// without updating `classify_error`, this test fails to ensure the
600 /// adaptive controller always sees correct capacity signals.
601 ///
602 /// Mapping policy (mirrors `classify_error` doc):
603 /// - `Timeout` -> `Outcome::Timeout`
604 /// - `Network`, `InsufficientPeers`, `Io`, `Protocol`, `Storage`,
605 /// `PartialUpload` -> `Outcome::NetworkError` (transport-related
606 /// or literal capacity failure)
607 /// - everything else -> `Outcome::ApplicationError` (would happen
608 /// on a perfectly healthy network)
609 #[test]
610 fn classify_error_covers_all_variants() {
611 let cases: Vec<(Error, Outcome)> = vec![
612 (Error::Timeout("t".to_string()), Outcome::Timeout),
613 (Error::Network("n".to_string()), Outcome::NetworkError),
614 (
615 Error::InsufficientPeers("p".to_string()),
616 Outcome::NetworkError,
617 ),
618 (Error::Storage("s".to_string()), Outcome::NetworkError),
619 (Error::Payment("p".to_string()), Outcome::ApplicationError),
620 (Error::Protocol("p".to_string()), Outcome::NetworkError),
621 (
622 Error::InvalidData("d".to_string()),
623 Outcome::ApplicationError,
624 ),
625 (
626 Error::Serialization("s".to_string()),
627 Outcome::ApplicationError,
628 ),
629 (Error::Crypto("c".to_string()), Outcome::ApplicationError),
630 (
631 Error::Io(std::io::Error::other("io")),
632 Outcome::NetworkError,
633 ),
634 (Error::Config("c".to_string()), Outcome::ApplicationError),
635 (
636 Error::SignatureVerification("s".to_string()),
637 Outcome::ApplicationError,
638 ),
639 (
640 Error::Encryption("e".to_string()),
641 Outcome::ApplicationError,
642 ),
643 (Error::AlreadyStored, Outcome::ApplicationError),
644 (
645 Error::InsufficientDiskSpace("d".to_string()),
646 Outcome::ApplicationError,
647 ),
648 (
649 Error::CostEstimationInconclusive("c".to_string()),
650 Outcome::ApplicationError,
651 ),
652 (
653 Error::PartialUpload {
654 stored: vec![],
655 stored_count: 0,
656 failed: vec![],
657 failed_count: 0,
658 total_chunks: 0,
659 spend: Box::new(crate::data::error::PartialUploadSpend {
660 storage_cost_atto: "0".to_string(),
661 gas_cost_wei: 0,
662 }),
663 reason: "r".to_string(),
664 },
665 Outcome::NetworkError,
666 ),
667 (
668 Error::BadQuoteBinding {
669 peer_id: "peer".to_string(),
670 detail: "mismatch".to_string(),
671 },
672 Outcome::ApplicationError,
673 ),
674 // A remote application rejection: the node responded with a
675 // structured `ProtocolError`, so the transport succeeded and
676 // this must NOT register as a capacity signal (V2-468).
677 (
678 Error::RemotePut {
679 address: "abcd".to_string(),
680 source: ant_protocol::ProtocolError::PaymentFailed("stale quote".to_string()),
681 },
682 Outcome::ApplicationError,
683 ),
684 // A close-group quorum shortfall caused by dial/relay churn with
685 // no PUT-response timeouts — remote peer churn, not local
686 // backpressure, so it must NOT register as a capacity signal
687 // (V2-554). A timeout-bearing shortfall keeps `InsufficientPeers`.
688 (
689 Error::CloseGroupShortfall("Stored on 3 peers, need 4".to_string()),
690 Outcome::ApplicationError,
691 ),
692 ];
693 for (err, expected) in &cases {
694 let got = classify_error(err);
695 assert_eq!(
696 got, *expected,
697 "classify_error({err:?}) = {got:?}, expected {expected:?}",
698 );
699 }
700 }
701
702 /// C4 fix guard: pinning the legacy `quote_concurrency` /
703 /// `store_concurrency` ClientConfig fields must clamp ONLY the
704 /// matching channel's max in the resulting controller. The fetch
705 /// (download) channel must keep its full default ceiling.
706 #[test]
707 fn legacy_concurrency_pin_does_not_bleed_across_channels() {
708 let cfg = ClientConfig {
709 quote_concurrency: 4,
710 store_concurrency: 2,
711 ..ClientConfig::default()
712 };
713 let (controller, _) = build_controller(&cfg);
714 // The store/quote caps must be clamped to the user's pin.
715 assert_eq!(controller.config.max.quote, 4, "quote pin not respected");
716 assert_eq!(controller.config.max.store, 2, "store pin not respected");
717 // The fetch cap must NOT have been lowered — that's the
718 // regression C4 was about.
719 let default_fetch_max = adaptive::ChannelMax::default().fetch;
720 assert_eq!(
721 controller.config.max.fetch, default_fetch_max,
722 "fetch cap was lowered by store/quote pin (C4 regression)"
723 );
724 // Cold-start values must respect the lowered ceilings.
725 assert!(
726 controller.quote.current() <= 4,
727 "quote start exceeds its cap"
728 );
729 assert!(
730 controller.store.current() <= 2,
731 "store start exceeds its cap"
732 );
733 }
734
735 /// Default ClientConfig must NOT silently lower the controller's
736 /// per-channel ceilings — the adaptive defaults give every channel
737 /// real headroom to grow. This guards against future commits
738 /// re-introducing a global clamp.
739 #[test]
740 fn default_client_config_does_not_clamp_controller_max() {
741 let cfg = ClientConfig::default();
742 let (controller, _) = build_controller(&cfg);
743 let defaults = adaptive::ChannelMax::default();
744 // The legacy fields default to 32/8 (the prior static knobs),
745 // both of which are <= the per-channel adaptive defaults
746 // (128/64). build_controller must keep the larger, not clobber
747 // with the legacy values.
748 assert_eq!(controller.config.max.quote, defaults.quote);
749 assert_eq!(controller.config.max.store, defaults.store);
750 assert_eq!(controller.config.max.fetch, defaults.fetch);
751 // Compile-time-ish guard: if a new variant is added to Error,
752 // this match forces an update here.
753 let _ = |e: &Error| match e {
754 Error::Timeout(_)
755 | Error::Network(_)
756 | Error::InsufficientPeers(_)
757 | Error::Storage(_)
758 | Error::Payment(_)
759 | Error::Protocol(_)
760 | Error::InvalidData(_)
761 | Error::Serialization(_)
762 | Error::Crypto(_)
763 | Error::Io(_)
764 | Error::Config(_)
765 | Error::SignatureVerification(_)
766 | Error::Encryption(_)
767 | Error::AlreadyStored
768 | Error::InsufficientDiskSpace(_)
769 | Error::CostEstimationInconclusive(_)
770 | Error::Cancelled(_)
771 | Error::PartialUpload { .. }
772 | Error::BadQuoteBinding { .. }
773 | Error::RemotePut { .. }
774 | Error::CloseGroupShortfall(_) => (),
775 };
776 }
777}