freenet 0.2.26 - Docs.rs

//! Ring protocol logic and supporting types.
//!
//! Mainly maintains a healthy and optimal pool of connections to other peers in the network
//! and routes requests to the optimal peers.

use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::net::SocketAddr;
use std::sync::{Arc, Weak, atomic::AtomicU64};
use std::time::Duration;
use tokio::time::Instant;

use tracing::Instrument;

use either::Either;
use freenet_stdlib::prelude::{ContractInstanceId, ContractKey};
use parking_lot::{Mutex, RwLock};

pub use hosting::{AddClientSubscriptionResult, ClientDisconnectResult, SubscribeResult};

use crate::message::TransactionType;
use crate::topology::TopologyAdjustment;
use crate::topology::rate::Rate;
use crate::tracing::{NetEventLog, NetEventRegister};

use crate::transport::TransportPublicKey;
use crate::util::{Contains, time_source::InstantTimeSrc};
use crate::{
    config::{GlobalExecutor, GlobalRng},
    message::{NetMessage, NetMessageV1, Transaction},
    node::{self, EventLoopNotificationsSender, NodeConfig, OpManager, PeerId},
    operations::{OpEnum, connect::ConnectOp},
    router::Router,
};

mod connection_backoff;
mod connection_manager;
pub(crate) use connection_manager::ConnectionManager;
mod connection;
mod hosting;
pub use hosting::{AccessType, RecordAccessResult};
pub mod interest;
mod live_tx;
mod location;
pub(crate) mod peer_cache;
mod peer_connection_backoff;
mod peer_key_location;
pub mod topology_registry;

/// Whether to auto-subscribe to contracts on GET.
/// When true, GET operations will automatically subscribe to the contract
/// to receive updates. This is controlled by hosting cache eviction.
pub const AUTO_SUBSCRIBE_ON_GET: bool = true;

use connection_backoff::ConnectionBackoff;
pub use connection_backoff::ConnectionFailureReason;
pub(crate) use peer_connection_backoff::PeerConnectionBackoff;

pub use self::live_tx::LiveTransactionTracker;
pub use connection::Connection;
pub use interest::PeerKey;
pub use location::{Distance, Location};
pub use peer_key_location::{KnownPeerKeyLocation, PeerAddr, PeerKeyLocation};

/// Thread safe and friendly data structure to keep track of the local knowledge
/// of the state of the ring.
///
// Note: For now internally we wrap some of the types internally with locks and/or use
// multithreaded maps. In the future if performance requires it some of this can be moved
// towards a more lock-free multithreading model if necessary.
/// Backoff state for contract-directed CONNECT attempts.
struct ContractConnectState {
    current_backoff: Duration,
    last_attempt: Instant,
}

pub(crate) struct Ring {
    pub max_hops_to_live: usize,
    pub connection_manager: ConnectionManager,
    pub router: Arc<RwLock<Router>>,
    pub live_tx_tracker: LiveTransactionTracker,
    hosting_manager: hosting::HostingManager,
    event_register: Box<dyn NetEventRegister>,
    op_manager: RwLock<Option<Weak<OpManager>>>,
    /// Whether this peer is a gateway or not. This will affect behavior of the node when acquiring
    /// and dropping connections.
    pub(crate) is_gateway: bool,
    /// Shared connection backoff tracker for all connection failure types.
    connection_backoff: Arc<parking_lot::Mutex<ConnectionBackoff>>,
    /// Per-contract backoff for contract-directed CONNECT attempts.
    contract_connect_backoff: Mutex<HashMap<ContractKey, ContractConnectState>>,
    /// Injectable time source used by `connection_maintenance`. Using `util::TimeSource`
    /// (which returns `tokio::time::Instant`) lets tests supply `SharedMockTimeSource` for
    /// fine-grained control without pausing the entire tokio runtime.
    pub(crate) time_source: Arc<dyn crate::util::time_source::TimeSource + Send + Sync>,
    /// Directory for persisting the peer address cache. When set, the peer cache
    /// is periodically saved here and loaded on startup for fast reconnection.
    pub(crate) peer_cache_dir: Option<std::path::PathBuf>,
}

// /// A data type that represents the fact that a peer has been blacklisted
// /// for some action. Has to be coupled with that action
// #[derive(Debug)]
// struct Blacklisted {
//     since: Instant,
//     peer: PeerKey,
// }

/// Guard that ensures `complete_subscription_request` is called even if the
/// subscription task panics. This prevents contracts from being stuck in
/// `pending_subscription_requests` forever.
pub(crate) struct SubscriptionRecoveryGuard {
    op_manager: Arc<OpManager>,
    contract_key: ContractKey,
    completed: bool,
}

impl SubscriptionRecoveryGuard {
    pub(crate) fn new(op_manager: Arc<OpManager>, contract_key: ContractKey) -> Self {
        Self {
            op_manager,
            contract_key,
            completed: false,
        }
    }

    pub(crate) fn complete(mut self, success: bool) {
        self.op_manager
            .ring
            .complete_subscription_request(&self.contract_key, success);
        self.completed = true;
    }
}

impl Drop for SubscriptionRecoveryGuard {
    fn drop(&mut self) {
        if !self.completed {
            // Task panicked or was cancelled before completion - treat as failure
            tracing::warn!(
                contract = %self.contract_key,
                "Subscription recovery task terminated unexpectedly, marking as failed"
            );
            self.op_manager
                .ring
                .complete_subscription_request(&self.contract_key, false);
        }
    }
}

/// Result of pruning a connection.
#[derive(Debug, Default)]
pub struct PruneConnectionResult {
    /// Orphaned transactions that need to be retried or failed.
    pub orphaned_transactions: Vec<Transaction>,
    /// True if this prune caused us to drop below the readiness threshold.
    pub became_unready: bool,
}

impl Ring {
    pub const DEFAULT_MIN_CONNECTIONS: usize = 25;

    pub const DEFAULT_MAX_CONNECTIONS: usize = 200;

    const DEFAULT_MAX_UPSTREAM_BANDWIDTH: Rate = Rate::new_per_second(1_000_000.0);

    const DEFAULT_MAX_DOWNSTREAM_BANDWIDTH: Rate = Rate::new_per_second(1_000_000.0);

    /// Above this number of remaining hops, randomize which node a message which be forwarded to.
    const DEFAULT_RAND_WALK_ABOVE_HTL: usize = 7;

    /// Max hops to be performed for certain operations (e.g. propagating connection of a peer in the network).
    pub const DEFAULT_MAX_HOPS_TO_LIVE: usize = 10;

    pub fn new<ER: NetEventRegister + Clone>(
        config: &NodeConfig,
        event_loop_notifier: EventLoopNotificationsSender,
        event_register: ER,
        is_gateway: bool,
        connection_manager: ConnectionManager,
        task_monitor: &crate::node::background_task_monitor::BackgroundTaskMonitor,
    ) -> anyhow::Result<Arc<Self>> {
        let live_tx_tracker = LiveTransactionTracker::new();

        let max_hops_to_live = if let Some(v) = config.max_hops_to_live {
            v
        } else {
            Self::DEFAULT_MAX_HOPS_TO_LIVE
        };

        let router = Arc::new(RwLock::new(Router::new(&[])));
        crate::node::network_status::set_router(router.clone());
        task_monitor.register(
            "refresh_router",
            GlobalExecutor::spawn(Self::refresh_router(router.clone(), event_register.clone())),
        );

        // Interval for topology snapshot registration (1 second in test mode)
        // Registers subscription topology with the global registry for validation
        #[cfg(any(test, feature = "testing"))]
        const TOPOLOGY_SNAPSHOT_INTERVAL: Duration = Duration::from_secs(1);

        // Just initialize with a fake location, this will be later updated when the peer has an actual location assigned.
        let peer_cache_dir = if is_gateway {
            // Gateways don't need peer cache — peers connect to them.
            None
        } else {
            Some(config.config.data_dir())
        };
        let ring = Ring {
            max_hops_to_live,
            router,
            connection_manager,
            hosting_manager: hosting::HostingManager::new(),
            live_tx_tracker: live_tx_tracker.clone(),
            event_register: Box::new(event_register),
            op_manager: RwLock::new(None),
            is_gateway,
            connection_backoff: Arc::new(Mutex::new(ConnectionBackoff::new())),
            contract_connect_backoff: Mutex::new(HashMap::new()),
            time_source: Arc::new(InstantTimeSrc::new()),
            peer_cache_dir,
        };

        if let Some(loc) = config.location {
            if config.own_addr.is_none() && is_gateway {
                return Err(anyhow::anyhow!("own_addr is required for gateways"));
            }
            ring.connection_manager.update_location(Some(loc));
        }

        let ring = Arc::new(ring);
        let current_span = tracing::Span::current();
        let span = if current_span.is_none() {
            tracing::info_span!("connection_maintenance")
        } else {
            tracing::info_span!(parent: current_span, "connection_maintenance")
        };

        task_monitor.register(
            "connection_maintenance",
            GlobalExecutor::spawn({
                let fut = ring
                    .clone()
                    .connection_maintenance(event_loop_notifier, live_tx_tracker)
                    .instrument(span);
                async move {
                    if let Err(e) = fut.await {
                        tracing::error!(error = %e, "connection_maintenance exited with error");
                    }
                }
            }),
        );

        // Spawn periodic subscription state telemetry task
        task_monitor.register(
            "emit_subscription_state_telemetry",
            GlobalExecutor::spawn(Self::emit_subscription_state_telemetry(
                ring.clone(),
                Self::SUBSCRIPTION_STATE_INTERVAL,
            )),
        );

        // Spawn periodic subscription recovery task to fix "orphaned hosters"
        // (peers that have contracts cached but aren't in the subscription tree)
        task_monitor.register(
            "recover_orphaned_subscriptions",
            GlobalExecutor::spawn(Self::recover_orphaned_subscriptions(
                ring.clone(),
                Self::SUBSCRIPTION_RECOVERY_INTERVAL,
            )),
        );

        // Spawn periodic GET subscription cache sweep task
        // Cleans up expired GET-triggered subscriptions to maintain bounded memory
        task_monitor.register(
            "sweep_get_subscription_cache",
            GlobalExecutor::spawn(Self::sweep_get_subscription_cache(
                ring.clone(),
                Self::GET_SUBSCRIPTION_SWEEP_INTERVAL,
            )),
        );

        // Spawn periodic topology snapshot registration task (test mode only)
        // This allows SimNetwork to validate subscription topology during tests
        #[cfg(any(test, feature = "testing"))]
        task_monitor.register(
            "register_topology_snapshots",
            GlobalExecutor::spawn(Self::register_topology_snapshots_periodically(
                ring.clone(),
                TOPOLOGY_SNAPSHOT_INTERVAL,
            )),
        );

        // Spawn periodic router model snapshot telemetry (every 5 minutes)
        task_monitor.register(
            "emit_router_snapshot_telemetry",
            GlobalExecutor::spawn(Self::emit_router_snapshot_telemetry(
                ring.clone(),
                Duration::from_secs(60 * 5),
            )),
        );

        // Spawn periodic contract-directed CONNECT task.
        // When a peer is a "subscription root" (closest to contract among neighbors),
        // it sends CONNECTs toward the contract's ring location to merge disconnected
        // subscription subtrees.
        const CONTRACT_CONNECT_INTERVAL: Duration = Duration::from_secs(30);
        task_monitor.register(
            "contract_directed_connects",
            GlobalExecutor::spawn(Self::contract_directed_connects(
                ring.clone(),
                CONTRACT_CONNECT_INTERVAL,
            )),
        );

        // Spawn periodic interest heartbeat task.
        // Sends full Interests { hashes } to each connected peer to keep
        // interest entries alive and prevent the death spiral where expired
        // entries block broadcast delivery.
        task_monitor.register(
            "interest_heartbeat",
            GlobalExecutor::spawn(Self::interest_heartbeat(ring.clone())),
        );

        Ok(ring)
    }

    pub fn attach_op_manager(&self, op_manager: &Arc<OpManager>) {
        self.op_manager.write().replace(Arc::downgrade(op_manager));
    }

    fn upgrade_op_manager(&self) -> Option<Arc<OpManager>> {
        self.op_manager
            .read()
            .as_ref()
            .and_then(|weak| weak.clone().upgrade())
    }

    pub fn is_gateway(&self) -> bool {
        self.is_gateway
    }

    pub fn open_connections(&self) -> usize {
        self.connection_manager.connection_count()
    }

    /// Record a connection failure to the backoff tracker.
    pub fn record_connection_failure(&self, target: Location, reason: ConnectionFailureReason) {
        let mut backoff = self.connection_backoff.lock();
        backoff.record_failure_with_reason(target, reason);
    }

    /// Record a successful connection to clear backoff.
    pub fn record_connection_success(&self, target: Location) {
        let mut backoff = self.connection_backoff.lock();
        backoff.record_success(target);
    }

    /// Check if a target is currently in backoff.
    pub fn is_in_connection_backoff(&self, target: Location) -> bool {
        self.connection_backoff.lock().is_in_backoff(target)
    }

    /// Periodic cleanup of expired backoff entries.
    pub fn cleanup_connection_backoff(&self) {
        self.connection_backoff.lock().cleanup_expired();
    }

    /// Reset all connection backoff state. Used during isolation recovery
    /// when the node has had zero ring connections for an extended period.
    pub fn reset_all_connection_backoff(&self) {
        self.connection_backoff.lock().clear();
    }

    // ==================== Contract-Directed CONNECT ====================

    /// Initial backoff before retrying a contract-directed CONNECT.
    const INITIAL_CONTRACT_CONNECT_BACKOFF: Duration = Duration::from_secs(30);
    /// Maximum backoff cap for contract-directed CONNECTs (24 hours).
    const MAX_CONTRACT_CONNECT_BACKOFF: Duration = Duration::from_secs(24 * 60 * 60);
    /// Maximum contract-directed CONNECTs per cycle.
    const MAX_CONTRACT_CONNECTS_PER_CYCLE: usize = 2;

    /// Returns true if this peer is the closest to the contract among its connected neighbors
    /// (i.e., it's a subscription root for this contract).
    fn is_subscription_root(&self, contract_key: &ContractKey) -> bool {
        if !self.is_hosting_contract(contract_key) {
            return false;
        }
        let contract_location = Location::from(contract_key);
        let my_location = match self.connection_manager.own_location().location() {
            Some(loc) => loc,
            None => return false,
        };
        let my_distance = my_location.distance(contract_location);

        let connections = self.connection_manager.get_connections_by_location();
        for (_loc, conns) in connections.iter() {
            for conn in conns {
                if let Some(peer_loc) = conn.location.location() {
                    if peer_loc.distance(contract_location) < my_distance {
                        return false;
                    }
                }
            }
        }
        true
    }

    /// Check if a contract-directed CONNECT is currently in backoff.
    fn is_in_contract_connect_backoff(&self, contract_key: &ContractKey) -> bool {
        let backoff = self.contract_connect_backoff.lock();
        if let Some(state) = backoff.get(contract_key) {
            state.last_attempt.elapsed() < state.current_backoff
        } else {
            false
        }
    }

    /// Record a contract-directed CONNECT attempt. Doubles the backoff (up to cap).
    fn record_contract_connect_attempt(&self, contract_key: &ContractKey) {
        let mut backoff = self.contract_connect_backoff.lock();
        let state = backoff
            .entry(*contract_key)
            .or_insert_with(|| ContractConnectState {
                current_backoff: Self::INITIAL_CONTRACT_CONNECT_BACKOFF,
                last_attempt: Instant::now(),
            });
        state.last_attempt = Instant::now();
        state.current_backoff = (state.current_backoff * 2).min(Self::MAX_CONTRACT_CONNECT_BACKOFF);
    }

    /// Periodic task: when this peer is a subscription root for a contract,
    /// initiate a CONNECT toward the contract's ring location to merge
    /// disconnected subscription subtrees.
    async fn contract_directed_connects(ring: Arc<Self>, interval: Duration) {
        // Random initial delay to prevent thundering herd.
        let initial_delay = Duration::from_secs(GlobalRng::random_range(30u64..=60u64));
        tokio::time::sleep(initial_delay).await;

        let mut tick_interval = tokio::time::interval(interval);
        tick_interval.tick().await; // skip first immediate tick

        loop {
            tick_interval.tick().await;

            // Skip if we have too few connections to be meaningful.
            let conn_count = ring.connection_manager.connection_count();
            if conn_count < 2 {
                continue;
            }

            let contracts = ring.hosting_contract_keys();
            if contracts.is_empty() {
                continue;
            }

            let Some(op_manager) = ring.upgrade_op_manager() else {
                continue;
            };

            let mut connects_this_cycle = 0;

            for contract_key in &contracts {
                if connects_this_cycle >= Self::MAX_CONTRACT_CONNECTS_PER_CYCLE {
                    break;
                }

                if !ring.is_subscription_root(contract_key) {
                    // No longer root — a closer connection now exists.
                    // If we had backoff state, we previously sent a CONNECT as root.
                    // Now that a closer peer is connected, expire the subscription
                    // so it re-routes through the closer peer on the next renewal cycle.
                    let had_backoff = ring
                        .contract_connect_backoff
                        .lock()
                        .remove(contract_key)
                        .is_some();
                    if had_backoff {
                        ring.force_subscription_renewal(contract_key);
                        tracing::info!(
                            contract = %contract_key,
                            "No longer subscription root after contract-directed CONNECT; \
                             expired subscription to re-route through closer peer"
                        );
                    }
                    continue;
                }

                let contract_location = Location::from(contract_key);
                let my_location = ring.connection_manager.own_location().location();
                let my_distance = my_location.map(|l| l.distance(contract_location).as_f64());

                if ring.is_in_contract_connect_backoff(contract_key) {
                    continue;
                }

                // Emit telemetry for the root detection.
                crate::tracing::telemetry::send_standalone_event(
                    "subscription_root_detected",
                    serde_json::json!({
                        "contract": contract_key.to_string(),
                        "contract_location": contract_location.as_f64(),
                        "neighbor_count": conn_count,
                        "my_distance": my_distance,
                    }),
                );

                ring.record_contract_connect_attempt(contract_key);

                let backoff_secs = {
                    let b = ring.contract_connect_backoff.lock();
                    b.get(contract_key)
                        .map(|s| s.current_backoff.as_secs())
                        .unwrap_or(0)
                };

                tracing::info!(
                    contract = %contract_key,
                    %contract_location,
                    backoff_secs,
                    "Initiating contract-directed CONNECT as subscription root"
                );

                crate::tracing::telemetry::send_standalone_event(
                    "contract_directed_connect",
                    serde_json::json!({
                        "contract": contract_key.to_string(),
                        "contract_location": contract_location.as_f64(),
                        "my_distance": my_distance,
                        "backoff_secs": backoff_secs,
                    }),
                );

                let skip_list = HashSet::new();
                match ring
                    .acquire_new(
                        contract_location,
                        &skip_list,
                        &op_manager.to_event_listener,
                        &ring.live_tx_tracker,
                        &op_manager,
                    )
                    .await
                {
                    Ok(Some(tx)) => {
                        tracing::debug!(
                            %tx,
                            contract = %contract_key,
                            "Contract-directed CONNECT initiated"
                        );
                        connects_this_cycle += 1;
                    }
                    Ok(None) => {
                        tracing::debug!(
                            contract = %contract_key,
                            "Contract-directed CONNECT: no routing target found"
                        );
                    }
                    Err(e) => {
                        tracing::warn!(
                            contract = %contract_key,
                            error = %e,
                            "Contract-directed CONNECT failed"
                        );
                    }
                }
            }
        }
    }

    /// Periodic heartbeat: send `Interests { hashes }` to each connected peer.
    ///
    /// This prevents the death spiral where interest entries expire because no
    /// broadcasts are flowing, which in turn prevents broadcasts from ever
    /// flowing again. By periodically re-sending the full interest set, we
    /// keep entries alive on the remote side.
    ///
    /// Sends are spread evenly across the interval to avoid bursts.
    async fn interest_heartbeat(ring: Arc<Self>) {
        use crate::ring::interest::INTEREST_HEARTBEAT_INTERVAL;

        // Random initial delay to prevent synchronized heartbeats across peers
        let initial_delay = Duration::from_secs(GlobalRng::random_range(15u64..=45u64));
        tokio::time::sleep(initial_delay).await;

        let mut interval = tokio::time::interval(INTEREST_HEARTBEAT_INTERVAL);
        interval.tick().await; // Skip first immediate tick

        loop {
            interval.tick().await;

            let Some(op_manager) = ring.upgrade_op_manager() else {
                continue;
            };

            // Get our current interest hashes
            let hashes = op_manager.interest_manager.get_all_interest_hashes();
            if hashes.is_empty() {
                continue;
            }

            // Get all connected peer addresses (deduplicated)
            let connections = ring.connection_manager.get_connections_by_location();
            let peer_addrs: Vec<std::net::SocketAddr> = {
                let mut seen = HashSet::new();
                connections
                    .values()
                    .flat_map(|conns| conns.iter())
                    .filter_map(|conn| conn.location.socket_addr())
                    .filter(|addr| seen.insert(*addr))
                    .collect()
            };

            if peer_addrs.is_empty() {
                continue;
            }

            let num_peers = peer_addrs.len();
            // num_peers >= 1 guaranteed by the is_empty() check above
            let spread_delay = INTEREST_HEARTBEAT_INTERVAL / num_peers as u32;

            tracing::debug!(
                num_peers,
                num_hashes = hashes.len(),
                "Interest heartbeat: sending Interests to peers"
            );

            let sender = op_manager.to_event_listener.notifications_sender();
            let mut peers_sent = 0usize;
            for (i, peer_addr) in peer_addrs.into_iter().enumerate() {
                let message = crate::message::InterestMessage::Interests {
                    hashes: hashes.clone(),
                };
                if let Err(e) = sender
                    .send(either::Either::Right(
                        crate::message::NodeEvent::SendInterestMessage {
                            target: peer_addr,
                            message,
                        },
                    ))
                    .await
                {
                    // Channel send failure means the receiver is dropped (node
                    // shutting down). No point sending to remaining peers.
                    tracing::debug!(
                        peer = %peer_addr,
                        error = %e,
                        "Interest heartbeat: failed to queue message"
                    );
                    break;
                }
                peers_sent += 1;

                // Spread sends evenly across the interval (skip delay after last)
                if i + 1 < num_peers {
                    tokio::time::sleep(spread_delay).await;
                }
            }

            crate::tracing::telemetry::send_standalone_event(
                "interest_heartbeat_cycle",
                serde_json::json!({
                    "peers_sent": peers_sent,
                    "interest_hashes": hashes.len(),
                }),
            );
        }
    }

    /// Register events with the event system.
    /// This is used by operations to emit failure and other events.
    pub async fn register_events<'a>(
        &self,
        events: either::Either<
            crate::tracing::NetEventLog<'a>,
            Vec<crate::tracing::NetEventLog<'a>>,
        >,
    ) {
        self.event_register.register_events(events).await;
    }

    /// Maximum number of route events to load from the AOF event log on startup
    /// and during periodic refresh. Caps memory use while retaining enough history
    /// for the isotonic estimators to converge.
    const ROUTER_HISTORY_LIMIT: usize = 10_000;

    async fn refresh_router<ER: NetEventRegister>(router: Arc<RwLock<Router>>, register: ER) {
        // Load routing history immediately on startup so the router doesn't
        // start cold — without this, peers route suboptimally for ~5 minutes
        // until the first periodic refresh.
        match register.get_router_events(Self::ROUTER_HISTORY_LIMIT).await {
            Ok(history) if !history.is_empty() => {
                tracing::info!(
                    events = history.len(),
                    "Restored routing history from event log"
                );
                *router.write() = Router::new(&history);
            }
            Ok(_) => {
                tracing::debug!("No routing history to restore on startup");
            }
            Err(error) => {
                tracing::warn!(%error, "Failed to load routing history on startup, starting cold");
            }
        }

        let mut interval = tokio::time::interval(Duration::from_secs(60 * 5));
        interval.tick().await;
        loop {
            interval.tick().await;
            let history = match register.get_router_events(Self::ROUTER_HISTORY_LIMIT).await {
                Ok(h) => h,
                Err(error) => {
                    // Previously this was an `expect()` that would silently panic
                    // the task. Now that the task is monitored, returning will
                    // trigger the BackgroundTaskMonitor and propagate the failure.
                    tracing::error!(error = %error, "Shutting down refresh router task");
                    return;
                }
            };
            if !history.is_empty() {
                *router.write() = Router::new(&history);
            }
        }
    }

    /// Periodically emit a router model snapshot as an EventKind::RouterSnapshot event.
    ///
    /// This captures the isotonic regression curves and model state, including the
    /// connect forward estimator if available via OpManager.
    async fn emit_router_snapshot_telemetry(ring: Arc<Self>, interval_duration: Duration) {
        let mut interval = tokio::time::interval(interval_duration);
        // Skip the first immediate tick
        interval.tick().await;

        loop {
            interval.tick().await;

            let mut snapshot = ring.router.read().snapshot();

            // Try to include connect forward estimator data
            if let Some(op_manager) = ring.upgrade_op_manager() {
                let cfe = op_manager.connect_forward_estimator.read();
                let (curve, events, adjustments) = cfe.snapshot();
                snapshot.connect_forward_curve = Some(curve);
                snapshot.connect_forward_events = Some(events);
                snapshot.connect_forward_peer_adjustments = Some(adjustments);
            }

            tracing::info!(
                failure_events = snapshot.failure_events,
                success_events = snapshot.success_events,
                prediction_active = snapshot.prediction_active,
                consider_n_closest_peers = snapshot.consider_n_closest_peers,
                "router_snapshot"
            );

            if let Some(event) = NetEventLog::router_snapshot(&ring, snapshot) {
                ring.event_register
                    .register_events(Either::Left(event))
                    .await;
            }
        }
    }

    /// Periodically emit subscription_state telemetry events for all active subscriptions.
    ///
    /// This enables the telemetry dashboard to reconstruct historical subscription trees
    /// and show accurate subscription state at any point in time.
    async fn emit_subscription_state_telemetry(ring: Arc<Self>, interval_duration: Duration) {
        let mut interval = tokio::time::interval(interval_duration);
        // Skip the first immediate tick
        interval.tick().await;

        loop {
            interval.tick().await;

            // Get subscription states from the new lease-based model
            let subscription_states = ring.get_subscription_states();

            if subscription_states.is_empty() {
                continue;
            }

            tracing::debug!(
                subscription_count = subscription_states.len(),
                "Emitting periodic subscription state telemetry"
            );

            // Log subscription states (simplified - no upstream/downstream in new model)
            for (key, has_client, is_active, _expires_at) in subscription_states {
                tracing::trace!(
                    %key,
                    has_client_subscription = has_client,
                    is_active_subscription = is_active,
                    "Subscription state"
                );
            }
        }
    }

    /// Maximum renewal tasks spawned per tick.
    ///
    /// At 30s intervals, this yields up to 20 renewals/minute. Since
    /// `contracts_needing_renewal()` returns contracts expiring within the
    /// 2-minute renewal window, this effectively handles ~40 concurrent
    /// subscriptions before renewals can't keep up. The mid-cycle channel
    /// capacity check (RENEWAL_STOP_CAPACITY_FRACTION) provides backpressure
    /// if the network can't absorb this many.
    const MAX_RECOVERY_ATTEMPTS_PER_INTERVAL: usize = 10;

    /// Skip renewal cycle when channel remaining capacity falls below this
    /// fraction of max (i.e. channel is more than 50% full).
    const RENEWAL_DEFER_CAPACITY_FRACTION: usize = 2; // channel_max / 2

    /// Stop spawning mid-cycle when remaining capacity falls below this
    /// fraction of max (i.e. channel is more than 75% full).
    const RENEWAL_STOP_CAPACITY_FRACTION: usize = 4; // channel_max / 4

    /// Interval for periodic subscription state telemetry snapshots.
    pub(crate) const SUBSCRIPTION_STATE_INTERVAL: Duration = Duration::from_secs(60);

    /// Interval for periodic subscription recovery attempts.
    ///
    /// This recovers "orphaned hosters" - peers that have contracts in cache
    /// but failed to establish subscription (no upstream in subscription tree).
    pub(crate) const SUBSCRIPTION_RECOVERY_INTERVAL: Duration = Duration::from_secs(30);

    /// Interval for periodic GET subscription cache sweep.
    pub(crate) const GET_SUBSCRIPTION_SWEEP_INTERVAL: Duration = Duration::from_secs(60);

    /// Periodically attempt to recover "orphaned hosters" - contracts we're hosting
    /// but don't have an upstream subscription for.
    ///
    /// This can happen when:
    /// - The initial subscription after GET/PUT failed (network issues, timeout)
    /// - Our upstream peer disconnected and we haven't found a new one
    /// - A race condition left us hosting without subscription
    ///
    /// The task respects existing backoff mechanisms to avoid subscription spam.
    ///
    /// **Connection gating (#3676):** This task skips renewal cycles entirely when
    /// the node has zero ring connections. Without this gate, disconnected peers
    /// generate a subscribe retry storm — thousands of subscribe requests per cycle
    /// that all fail immediately because there's no one to send them to. Telemetry
    /// showed 3 peers with 0 connections generating 96% of all subscribe traffic.
    async fn recover_orphaned_subscriptions(ring: Arc<Self>, interval_duration: Duration) {
        // Wait indefinitely for the first ring connection before starting
        // subscription recovery. The per-cycle connection check below is the
        // real gate; this just avoids running the loop body with no peers.
        let mut wait_logged = false;
        loop {
            tokio::time::sleep(Duration::from_millis(500)).await;
            if ring.open_connections() > 0 {
                tracing::info!(
                    hosted_contracts = ring.hosting_contract_keys().len(),
                    "Ring connection established, starting subscription recovery"
                );
                break;
            }
            // Log periodically so operators can diagnose stuck nodes.
            if !wait_logged {
                wait_logged = true;
                tracing::info!(
                    hosted_contracts = ring.hosting_contract_keys().len(),
                    "Waiting for ring connection before starting subscription recovery"
                );
            }
        }

        // Small jitter (2-5s) after first connection to let the ring stabilize
        // slightly before flooding with subscribe requests.
        let jitter = Duration::from_secs(GlobalRng::random_range(2u64..=5u64));
        tokio::time::sleep(jitter).await;

        let mut interval = tokio::time::interval(interval_duration);
        // Skip the first immediate tick — we run the first pass immediately
        // below (no tick wait) so client subscriptions get prompt renewal.
        interval.tick().await;

        let mut first_pass = true;

        loop {
            if first_pass {
                first_pass = false;
            } else {
                interval.tick().await;
            }

            // Always run expiry sweeps, even when disconnected. Stale
            // subscriptions and downstream subscribers must be cleaned up
            // to keep interest manager counts accurate. Only the renewal
            // spawning (below) is gated on having connections.
            //
            // First, expire any stale subscriptions
            let expired = ring.expire_stale_subscriptions();
            if !expired.is_empty() {
                tracing::debug!(
                    expired_count = expired.len(),
                    "Expired {} stale subscriptions",
                    expired.len()
                );
            }

            // Expire stale downstream subscribers and decrement interest manager
            let ds_expired = ring.expire_stale_downstream_subscribers();
            if !ds_expired.is_empty() {
                tracing::debug!(
                    expired_count = ds_expired.len(),
                    "Expired stale downstream subscribers"
                );

                if let Some(op_manager) = ring.upgrade_op_manager() {
                    for (contract, expired_count) in &ds_expired {
                        // Decrement interest manager for each expired peer
                        for _ in 0..*expired_count {
                            op_manager
                                .interest_manager
                                .remove_downstream_subscriber(contract);
                        }

                        // Send Unsubscribe upstream if no remaining interest
                        if ring.should_unsubscribe_upstream(contract) {
                            let op_mgr = op_manager.clone();
                            let contract = *contract;
                            GlobalExecutor::spawn(async move {
                                op_mgr.send_unsubscribe_upstream(&contract).await;
                            });
                        }
                    }
                }
            }

            // Gate: skip renewal spawning if we have no ring connections (#3676).
            // Subscribe requests require connected peers to route through.
            // Without this, disconnected peers flood the notification channel
            // with doomed subscribe requests every 30 seconds.
            if ring.open_connections() == 0 {
                tracing::debug!("Skipping subscription renewal: no ring connections");
                continue;
            }

            // Get contracts that need subscription renewal (have client subscriptions)
            let mut contracts_needing_renewal = ring.contracts_needing_renewal();

            if contracts_needing_renewal.is_empty() {
                tracing::debug!(
                    hosted = ring.hosting_contract_keys().len(),
                    "No contracts needing subscription renewal"
                );
                continue;
            }

            tracing::info!(
                needing_renewal = contracts_needing_renewal.len(),
                hosted = ring.hosting_contract_keys().len(),
                "Starting subscription renewal cycle"
            );

            // Shuffle to prevent starvation: without this, the same failing contracts
            // (first N in iteration order) would always be tried first, blocking later
            // contracts from ever being attempted when they hit the batch limit.
            GlobalRng::shuffle(&mut contracts_needing_renewal);

            // Get op_manager to spawn subscription requests
            let Some(op_manager) = ring.upgrade_op_manager() else {
                tracing::debug!("OpManager not available for subscription renewal");
                continue;
            };

            // Backpressure: reduce batch size when the notification channel is
            // congested, but never skip entirely. Renewals are critical-path —
            // skipping a full cycle when the channel is busy lets subscriptions
            // expire, which causes cascading failures as the subscription tree
            // thins out and remaining renewals take longer paths.
            let sender = op_manager.to_event_listener.notifications_sender();
            let channel_remaining = sender.capacity();
            let channel_max = sender.max_capacity();

            let batch_limit =
                if channel_remaining < channel_max / Self::RENEWAL_DEFER_CAPACITY_FRACTION {
                    // Channel >50% full: allow a reduced batch (quarter of normal)
                    // so critical renewals still get through. Always attempt at least 1.
                    let reduced = (Self::MAX_RECOVERY_ATTEMPTS_PER_INTERVAL / 4).max(1);
                    tracing::warn!(
                        channel_remaining,
                        channel_max,
                        batch_limit = reduced,
                        contracts = contracts_needing_renewal.len(),
                        "Notification channel >50% full, reducing renewal batch size"
                    );
                    reduced
                } else {
                    Self::MAX_RECOVERY_ATTEMPTS_PER_INTERVAL
                };

            let mut attempted = 0;
            let mut skipped = 0;

            for contract in contracts_needing_renewal {
                // Limit concurrent renewal attempts to avoid overwhelming the network
                if attempted >= batch_limit {
                    tracing::debug!(
                        limit = batch_limit,
                        "Reached max renewal attempts for this interval, remaining will be tried next cycle"
                    );
                    break;
                }

                // Stop early if the channel is filling from our own spawns.
                let remaining_now = sender.capacity();
                if remaining_now < channel_max / Self::RENEWAL_STOP_CAPACITY_FRACTION {
                    tracing::warn!(
                        channel_remaining = remaining_now,
                        attempted,
                        "Notification channel >75% full during renewal spawning, stopping early"
                    );
                    break;
                }

                // Check spam prevention (respects exponential backoff and pending checks)
                if !ring.can_request_subscription(&contract) {
                    skipped += 1;
                    continue;
                }

                // Mark as pending and spawn subscription request
                if ring.mark_subscription_pending(contract) {
                    attempted += 1;

                    // Spread tasks across the interval to avoid thundering-herd bursts.
                    let jitter_ms = GlobalRng::random_range(0u64..=15_000);

                    let op_manager_clone = op_manager.clone();
                    let contract_key = contract;

                    GlobalExecutor::spawn(async move {
                        tokio::time::sleep(Duration::from_millis(jitter_ms)).await;
                        // Guard ensures complete_subscription_request is called even on panic
                        let guard =
                            SubscriptionRecoveryGuard::new(op_manager_clone.clone(), contract_key);

                        let instance_id = *contract_key.id();
                        // is_renewal: true - this is a subscription renewal, skip sending state
                        let sub_op = crate::operations::subscribe::start_op(instance_id, true);
                        let result = crate::operations::subscribe::request_subscribe(
                            &op_manager_clone,
                            sub_op,
                        )
                        .await;

                        let (outcome, error_msg) = match &result {
                            Ok(()) => {
                                tracing::info!(
                                    %contract_key,
                                    "Subscription renewal succeeded"
                                );
                                guard.complete(true);
                                ("success", None)
                            }
                            Err(crate::operations::OpError::NotificationChannelError(_)) => {
                                // Channel congestion is a local resource issue, not a
                                // protocol failure. Don't penalize with backoff — just
                                // clear the pending mark so the contract is eligible on
                                // the next cycle.
                                tracing::warn!(
                                    %contract_key,
                                    "Subscription renewal skipped (channel full), will retry next cycle"
                                );
                                guard.complete(true);
                                ("dropped_channel_full", None)
                            }
                            Err(e) => {
                                tracing::debug!(
                                    %contract_key,
                                    error = %e,
                                    "Subscription renewal failed (will retry with backoff)"
                                );
                                let err_str = e.to_string();
                                guard.complete(false);
                                ("failed", Some(err_str))
                            }
                        };

                        crate::tracing::telemetry::send_standalone_event(
                            "subscription_renewal_outcome",
                            serde_json::json!({
                                "contract": contract_key.to_string(),
                                "outcome": outcome,
                                "error": error_msg,
                            }),
                        );
                    });
                }
            }

            if attempted > 0 || skipped > 0 {
                tracing::info!(
                    attempted,
                    skipped_rate_limited = skipped,
                    "Subscription renewal cycle complete"
                );
            }
        }
    }

    /// Background task to sweep expired entries from the GET subscription cache.
    ///
    /// When contracts are evicted (past max entries and beyond TTL), this task
    /// cleans up the local subscription state. The upstream peer will eventually
    /// prune us when updates fail to deliver.
    async fn sweep_get_subscription_cache(ring: Arc<Self>, interval_duration: Duration) {
        // Add random initial delay to prevent synchronized sweeps across peers
        let initial_delay = Duration::from_secs(GlobalRng::random_range(10u64..=30u64));
        tokio::time::sleep(initial_delay).await;

        let mut interval = tokio::time::interval(interval_duration);
        interval.tick().await; // Skip first immediate tick

        loop {
            interval.tick().await;

            // Sweep expired entries from GET subscription cache
            let expired = ring.sweep_expired_get_subscriptions();

            if expired.is_empty() {
                continue;
            }

            tracing::debug!(
                expired_count = expired.len(),
                "GET subscription cache sweep found expired entries"
            );

            // Clean up local subscription state for each expired contract.
            // Note: contracts with client subscriptions are protected from eviction
            // by the should_retain predicate in sweep_expired_hosting().
            for key in expired {
                ring.unsubscribe(&key);
                tracing::info!(
                    %key,
                    "Cleaned up expired hosting subscription from local state"
                );
            }
        }
    }

    /// Periodically register topology snapshots for simulation testing.
    ///
    /// This task only runs when `CURRENT_NETWORK_NAME` is set (i.e., during SimNetwork tests).
    /// It allows SimNetwork to validate subscription topology by querying the global registry.
    #[cfg(any(test, feature = "testing"))]
    async fn register_topology_snapshots_periodically(
        ring: Arc<Self>,
        interval_duration: Duration,
    ) {
        use topology_registry::{get_current_network_name, register_topology_snapshot};

        tracing::info!("Topology snapshot registration task started");

        // Add small initial delay to let network stabilize (use short delay in tests)
        tokio::time::sleep(Duration::from_millis(100)).await;

        let mut interval = tokio::time::interval(interval_duration);
        interval.tick().await; // Skip first immediate tick

        loop {
            interval.tick().await;

            // Only register if we're in a simulation context
            let Some(network_name) = get_current_network_name() else {
                tracing::debug!("Topology snapshot: no network name set, skipping");
                continue;
            };

            let Some(peer_addr) = ring.connection_manager.get_own_addr() else {
                tracing::debug!("Topology snapshot: no peer address yet, skipping");
                continue;
            };

            // Use get_stored_location() for consistency with set_upstream distance check.
            // This ensures topology validation uses the same location as the tie-breaker.
            let location = ring
                .connection_manager
                .get_stored_location()
                .map(|l| l.as_f64())
                .unwrap_or(0.0);

            let snapshot = ring
                .hosting_manager
                .generate_topology_snapshot(peer_addr, location);
            let contract_count = snapshot.contracts.len();
            register_topology_snapshot(&network_name, snapshot);

            tracing::info!(
                %peer_addr,
                location,
                network = %network_name,
                contract_count,
                "Registered topology snapshot"
            );
        }
    }

    /// Record an access to a contract in the hosting cache.
    ///
    /// This adds or refreshes the contract in the unified hosting cache.
    /// ALL contracts in the hosting cache get subscription renewal.
    ///
    /// Returns a `RecordAccessResult` containing:
    /// - `is_new`: Whether this contract was newly added (vs. refreshed existing)
    /// - `evicted`: Contracts that were evicted to make room
    pub fn host_contract(
        &self,
        key: ContractKey,
        size_bytes: u64,
        access_type: AccessType,
    ) -> RecordAccessResult {
        self.hosting_manager
            .record_contract_access(key, size_bytes, access_type)
    }

    /// Record a GET access to a contract in the hosting cache.
    ///
    /// Returns a `RecordAccessResult` indicating whether this was a new addition
    /// and which contracts were evicted (if any).
    pub fn record_get_access(&self, key: ContractKey, size_bytes: u64) -> RecordAccessResult {
        self.host_contract(key, size_bytes, AccessType::Get)
    }

    /// Whether this node is hosting this contract (has it in cache).
    #[inline]
    pub fn is_hosting_contract(&self, key: &ContractKey) -> bool {
        self.hosting_manager.is_hosting_contract(key)
    }

    /// Set the storage reference for hosting metadata persistence.
    ///
    /// Must be called after executor creation. This enables automatic
    /// cleanup of persisted metadata when contracts are evicted.
    pub fn set_hosting_storage(&self, storage: crate::contract::storages::Storage) {
        self.hosting_manager.set_storage(storage);
    }

    /// Load hosting cache from persisted storage.
    ///
    /// Call this during startup after storage is available to restore
    /// the hosting cache from the previous run. Also migrates legacy contracts
    /// that have state but no hosting metadata.
    ///
    /// # Arguments
    /// * `storage` - The storage backend
    /// * `code_hash_lookup` - Function to look up CodeHash from ContractInstanceId.
    ///   Uses ContractStore which has the id->code_hash mapping.
    #[cfg(feature = "redb")]
    pub fn load_hosting_cache<F>(
        &self,
        storage: &crate::contract::storages::Storage,
        code_hash_lookup: F,
    ) -> Result<usize, redb::Error>
    where
        F: Fn(
            &freenet_stdlib::prelude::ContractInstanceId,
        ) -> Option<freenet_stdlib::prelude::CodeHash>,
    {
        self.hosting_manager
            .load_from_storage(storage, code_hash_lookup)
    }

    /// Load hosting cache from persisted storage (sqlite version).
    ///
    /// Also migrates legacy contracts that have state but no hosting metadata.
    #[cfg(all(feature = "sqlite", not(feature = "redb")))]
    pub async fn load_hosting_cache<F>(
        &self,
        storage: &crate::contract::storages::Storage,
        code_hash_lookup: F,
    ) -> Result<usize, crate::contract::storages::sqlite::SqlDbError>
    where
        F: Fn(
            &freenet_stdlib::prelude::ContractInstanceId,
        ) -> Option<freenet_stdlib::prelude::CodeHash>,
    {
        self.hosting_manager
            .load_from_storage(storage, code_hash_lookup)
            .await
    }

    pub fn record_request(
        &self,
        recipient: PeerKeyLocation,
        target: Location,
        request_type: TransactionType,
    ) {
        self.connection_manager
            .topology_manager
            .write()
            .record_request(recipient, target, request_type);
    }

    /// Add a connection to the ring topology.
    ///
    /// Returns `true` if this connection caused us to cross the readiness threshold
    /// (i.e., we just became ready to accept non-CONNECT operations).
    /// Returns `false` if the connection was rejected (e.g., capacity cap) or we
    /// were already ready.
    pub async fn add_connection(&self, loc: Location, peer: PeerId, was_reserved: bool) -> bool {
        tracing::info!(
            peer = %peer,
            peer_location = %loc,
            this = ?self.connection_manager.get_own_addr(),
            was_reserved = %was_reserved,
            "Adding connection to peer"
        );
        let min_ready = self.connection_manager.min_ready_connections;
        let was_ready = min_ready == 0 || self.connection_manager.connection_count() >= min_ready;

        let addr = peer.socket_addr();
        let pub_key = peer.pub_key().clone();
        let added = self
            .connection_manager
            .add_connection(loc, addr, pub_key, was_reserved);
        if !added {
            tracing::warn!(
                peer = %peer,
                peer_location = %loc,
                "Ring rejected connection - not updating caches or logging connection event"
            );
            return false;
        }
        if let Some(own_loc) = self.connection_manager.own_location().location() {
            crate::node::network_status::set_own_location(own_loc.as_f64());
        }
        // ConnectEvent::Connected telemetry is emitted by the CONNECT state
        // machine with proper transaction context; not duplicated here (#3578).
        self.refresh_density_request_cache();

        let is_ready = self.connection_manager.is_self_ready();
        // Return true only if we just crossed the threshold
        !was_ready && is_ready
    }

    pub fn update_connection_identity(&self, old_peer: &PeerId, new_peer: PeerId) {
        if self.connection_manager.update_peer_identity(
            old_peer.socket_addr(),
            new_peer.socket_addr(),
            new_peer.pub_key().clone(),
        ) {
            self.refresh_density_request_cache();
        }
    }

    fn refresh_density_request_cache(&self) {
        let cbl = self.connection_manager.get_connections_by_location();
        let topology_manager = &mut self.connection_manager.topology_manager.write();
        let _refreshed = topology_manager.refresh_cache(&cbl);
    }

    /// Returns a filtered iterator for peers that are not connected to this node already.
    pub fn is_not_connected<'a>(
        &self,
        peers: impl Iterator<Item = &'a PeerKeyLocation>,
    ) -> impl Iterator<Item = &'a PeerKeyLocation> + Send {
        let mut filtered = Vec::new();
        for peer in peers {
            if let Some(addr) = peer.socket_addr() {
                if !self.connection_manager.has_connection_or_pending(addr) {
                    filtered.push(peer);
                }
            } else {
                // If address is unknown, include the peer
                filtered.push(peer);
            }
        }
        filtered.into_iter()
    }

    /// Return the most optimal peer for hosting a given contract.
    ///
    /// This function only considers connected peers, not the node itself.
    #[inline]
    pub fn closest_potentially_hosting(
        &self,
        contract_key: &ContractKey,
        skip_list: impl Contains<std::net::SocketAddr>,
    ) -> Option<PeerKeyLocation> {
        let router = self.router.read();
        let target = Location::from(contract_key);
        let (peer, decision) = self
            .connection_manager
            .routing_with_telemetry(target, None, skip_list, &router);

        if let Some(decision) = &decision {
            tracing::debug!(
                target_location = %target.as_f64(),
                strategy = ?decision.strategy,
                num_candidates = decision.candidates.len(),
                total_routing_events = decision.total_routing_events,
                selected = peer.is_some(),
                "routing_decision"
            );
        }

        peer
    }

    /// Get k best peers for hosting a contract, ranked by routing predictions.
    /// Accepts either &ContractKey or &ContractInstanceId (both implement From<&T> for Location).
    pub fn k_closest_potentially_hosting<K>(
        &self,
        contract_id: &K,
        skip_list: impl Contains<std::net::SocketAddr> + Clone,
        k: usize,
    ) -> Vec<PeerKeyLocation>
    where
        for<'a> Location: From<&'a K>,
    {
        let router = self.router.read();
        let target_location = Location::from(contract_id);

        let mut seen = HashSet::new();
        let mut candidates: Vec<PeerKeyLocation> = Vec::new();
        let mut not_ready_fallback: Vec<PeerKeyLocation> = Vec::new();
        let mut skipped_not_ready: usize = 0;

        let connections = self.connection_manager.get_connections_by_location();
        // Sort keys for deterministic iteration order (HashMap iteration is non-deterministic)
        // This ensures the `seen.insert()` check behaves consistently across runs
        let mut sorted_keys: Vec<_> = connections.keys().collect();
        sorted_keys.sort();
        for loc in sorted_keys {
            let conns = connections.get(loc).expect("key exists");
            // Sort connections for deterministic iteration order
            let mut sorted_conns: Vec<_> = conns.iter().collect();
            sorted_conns.sort_by_key(|c| c.location.clone());
            for conn in sorted_conns {
                if let Some(addr) = conn.location.socket_addr() {
                    if skip_list.has_element(addr) || !seen.insert(addr) {
                        continue;
                    }
                    // Skip peers that haven't advertised readiness, but collect them
                    // as fallback candidates in case all peers fail the readiness check.
                    if !self.connection_manager.is_peer_ready(addr) {
                        tracing::debug!(
                            %addr,
                            target_location = %target_location.as_f64(),
                            "k_closest: skipping peer not yet ready"
                        );
                        not_ready_fallback.push(conn.location.clone());
                        skipped_not_ready += 1;
                        continue;
                    }
                }
                candidates.push(conn.location.clone());
            }
        }

        // If all connected peers failed the readiness check, fall back to using them anyway.
        // This prevents GET/SUBSCRIBE operations from failing with EmptyRing when the node
        // is connected but peers haven't yet sent ReadyState messages (e.g., early after
        // connecting, or in network topologies where the min_ready_connections threshold is
        // never satisfied). A warn-level log is emitted so operators know gating was bypassed.
        // Note: ConnectionManager::routing_candidates has the same fallback for PUT/UPDATE.
        if candidates.is_empty() && !not_ready_fallback.is_empty() {
            tracing::warn!(
                count = not_ready_fallback.len(),
                target_location = %target_location.as_f64(),
                "k_closest: no ready peers available, falling back to not-yet-ready peers to avoid EmptyRing"
            );
            candidates = not_ready_fallback;
        }

        // Sort candidates for deterministic input to select_k_best_peers
        candidates.sort();

        // Note: We intentionally do NOT fall back to known_locations here.
        // known_locations may contain peers we're not currently connected to,
        // and attempting to route to them would require establishing a new connection
        // which may fail (especially in NAT scenarios without coordination).
        // It's better to return fewer candidates than unreachable ones.

        let (selected, decision) =
            router.select_k_best_peers_with_telemetry(candidates.iter(), target_location, k);

        tracing::debug!(
            target_location = %target_location.as_f64(),
            strategy = ?decision.strategy,
            num_candidates = decision.candidates.len(),
            total_routing_events = decision.total_routing_events,
            selected_count = selected.len(),
            "routing_decision"
        );

        tracing::debug!(
            target_location = %target_location.as_f64(),
            candidates_found = selected.len(),
            skipped_not_ready,
            "k_closest_potentially_hosting result"
        );

        selected.into_iter().cloned().collect()
    }

    pub fn routing_finished(&self, event: crate::router::RouteEvent) {
        self.connection_manager
            .topology_manager
            .write()
            .report_outbound_request(event.peer.clone(), event.contract_location);

        // Update peer health tracking based on routing outcome.
        if let Some(addr) = event.peer.socket_addr() {
            let mut health = self.connection_manager.peer_health.lock();
            match &event.outcome {
                crate::router::RouteOutcome::Success { .. }
                | crate::router::RouteOutcome::SuccessUntimed => {
                    health.record_success(addr);
                }
                crate::router::RouteOutcome::Failure => {
                    health.record_failure(addr);
                }
            }
        }

        self.router.write().add_event(event);
    }

    // ==================== Subscription Management (Lease-Based) ====================

    /// Subscribe to a contract with a lease.
    ///
    /// Creates a new subscription or renews an existing one. The subscription
    /// will expire after `SUBSCRIPTION_LEASE_DURATION` unless renewed.
    pub fn subscribe(&self, contract: ContractKey) -> SubscribeResult {
        self.hosting_manager.subscribe(contract)
    }

    /// Unsubscribe from a contract.
    ///
    /// Removes the active subscription. The contract may still be hosted
    /// (in the hosting cache) until evicted by LRU.
    pub fn unsubscribe(&self, contract: &ContractKey) {
        self.hosting_manager.unsubscribe(contract)
    }

    /// Check if we have an active (non-expired) subscription to a contract.
    pub fn is_subscribed(&self, contract: &ContractKey) -> bool {
        self.hosting_manager.is_subscribed(contract)
    }

    /// Get all contracts with active subscriptions.
    pub fn get_subscribed_contracts(&self) -> Vec<ContractKey> {
        self.hosting_manager.get_subscribed_contracts()
    }

    /// Force-expire a contract's subscription so it gets renewed through the
    /// current best route on the next recovery cycle.
    fn force_subscription_renewal(&self, contract: &ContractKey) {
        self.hosting_manager.force_subscription_renewal(contract);
    }

    /// Expire stale subscriptions and return the contracts that were expired.
    ///
    /// Should be called periodically by a background task.
    pub fn expire_stale_subscriptions(&self) -> Vec<ContractKey> {
        self.hosting_manager.expire_stale_subscriptions()
    }

    // ==================== Downstream Subscriber Tracking ====================

    pub fn add_downstream_subscriber(&self, contract: &ContractKey, peer: PeerKey) -> bool {
        self.hosting_manager
            .add_downstream_subscriber(contract, peer)
    }

    #[allow(dead_code)] // Only used in tests
    pub fn renew_downstream_subscriber(&self, contract: &ContractKey, peer: &PeerKey) -> bool {
        self.hosting_manager
            .renew_downstream_subscriber(contract, peer)
    }

    pub fn remove_downstream_subscriber(&self, contract: &ContractKey, peer: &PeerKey) -> bool {
        self.hosting_manager
            .remove_downstream_subscriber(contract, peer)
    }

    pub fn has_downstream_subscribers(&self, contract: &ContractKey) -> bool {
        self.hosting_manager.has_downstream_subscribers(contract)
    }

    pub fn expire_stale_downstream_subscribers(&self) -> Vec<(ContractKey, usize)> {
        self.hosting_manager.expire_stale_downstream_subscribers()
    }

    pub fn should_unsubscribe_upstream(&self, contract: &ContractKey) -> bool {
        self.hosting_manager.should_unsubscribe_upstream(contract)
    }

    /// Check if this node is actively receiving updates for a contract.
    ///
    /// Returns true only when we have an active network subscription or local
    /// client subscriptions. The hosting LRU cache alone is not sufficient,
    /// since cached state may be stale after subscription expiry.
    pub fn is_receiving_updates(&self, contract: &ContractKey) -> bool {
        self.hosting_manager.is_receiving_updates(contract)
    }

    /// Get contracts that need subscription renewal.
    ///
    /// Returns contracts where:
    /// - We have an active subscription that will expire soon, OR
    /// - We have client subscriptions but no active network subscription, OR
    /// - We have hosted contracts without active subscriptions (THE FIX)
    pub fn contracts_needing_renewal(&self) -> Vec<ContractKey> {
        self.hosting_manager.contracts_needing_renewal()
    }

    // ==================== Client Subscription Management ====================

    /// Register a client subscription for a contract (WebSocket client subscribed).
    ///
    /// Returns information about the operation for telemetry.
    pub fn add_client_subscription(
        &self,
        instance_id: &ContractInstanceId,
        client_id: crate::client_events::ClientId,
    ) -> AddClientSubscriptionResult {
        self.hosting_manager
            .add_client_subscription(instance_id, client_id)
    }

    /// Remove a client from all its subscriptions (used when client disconnects).
    ///
    /// Returns a [`ClientDisconnectResult`] with:
    /// - `affected_contracts`: all contracts where the client was subscribed (for cleanup)
    pub fn remove_client_from_all_subscriptions(
        &self,
        client_id: crate::client_events::ClientId,
    ) -> ClientDisconnectResult {
        self.hosting_manager
            .remove_client_from_all_subscriptions(client_id)
    }

    /// Get all hosted contract keys from the hosting cache.
    pub fn hosting_contract_keys(&self) -> Vec<ContractKey> {
        self.hosting_manager.hosting_contract_keys()
    }

    /// Get the cached state size in bytes for a hosted contract.
    pub fn hosting_contract_size(&self, key: &ContractKey) -> u64 {
        self.hosting_manager.hosting_contract_size(key)
    }

    /// Get the number of contracts in the hosting cache.
    /// This is the actual count of contracts this node is caching/hosting.
    pub fn hosting_contracts_count(&self) -> usize {
        self.hosting_manager.hosting_contracts_count()
    }

    /// Get subscription state for all contracts (for telemetry).
    ///
    /// Returns: (contract, has_client_subscription, is_active_subscription, expires_at)
    pub fn get_subscription_states(&self) -> Vec<(ContractKey, bool, bool, Option<Instant>)> {
        self.hosting_manager.get_subscription_states()
    }

    // ==================== Subscription Retry Spam Prevention ====================

    /// Check if a subscription request can be made for a contract.
    /// Returns false if request is already pending or in backoff period.
    pub fn can_request_subscription(&self, contract: &ContractKey) -> bool {
        self.hosting_manager.can_request_subscription(contract)
    }

    /// Mark a subscription request as in-flight.
    /// Returns false if already pending.
    pub fn mark_subscription_pending(&self, contract: ContractKey) -> bool {
        self.hosting_manager.mark_subscription_pending(contract)
    }

    /// Mark a subscription request as completed.
    /// If success is false, applies exponential backoff.
    pub fn complete_subscription_request(&self, contract: &ContractKey, success: bool) {
        self.hosting_manager
            .complete_subscription_request(contract, success)
    }

    // ==================== Hosting Cache Management ====================

    /// Touch a contract in the hosting cache (refresh TTL without adding).
    ///
    /// Called when a user GET serves a hosted contract from local cache.
    pub fn touch_hosting(&self, key: &ContractKey) {
        self.hosting_manager.touch_hosting(key)
    }

    /// Sweep for expired entries in the hosting cache.
    ///
    /// Returns contracts evicted from this cache. Contracts with client
    /// subscriptions are protected from eviction.
    pub fn sweep_expired_hosting(&self) -> Vec<ContractKey> {
        self.hosting_manager.sweep_expired_hosting()
    }

    // ==================== Legacy GET Auto-Subscription (delegating to hosting cache) ====================
    /// Sweep for expired entries (delegated to hosting cache).
    ///
    /// Returns contracts evicted from the cache. Contracts with client
    /// subscriptions are protected from eviction.
    pub fn sweep_expired_get_subscriptions(&self) -> Vec<ContractKey> {
        // Delegate to hosting cache
        self.sweep_expired_hosting()
    }

    // ==================== Connection Pruning ====================

    /// Prune a peer connection.
    ///
    /// Returns orphaned transactions that need to be retried or failed.
    /// In the new lease-based subscription model, subscriptions are not tied to specific
    /// peers, so no subscription pruning is needed when a peer disconnects.
    pub async fn prune_connection(&self, peer: PeerId) -> PruneConnectionResult {
        use crate::tracing::DisconnectReason;

        tracing::debug!(%peer, "Removing connection");
        crate::node::network_status::record_peer_disconnected(peer.socket_addr());
        let orphaned_transactions = self
            .live_tx_tracker
            .prune_transactions_from_peer(peer.socket_addr());

        if !orphaned_transactions.is_empty() {
            tracing::debug!(
                %peer,
                orphaned_count = orphaned_transactions.len(),
                "Connection pruned with orphaned transactions"
            );
        }

        let min_ready = self.connection_manager.min_ready_connections;
        let was_ready = self.connection_manager.is_self_ready();

        // Capture connection duration before pruning
        let connection_duration_ms = self
            .connection_manager
            .get_connection_duration_ms(peer.socket_addr());

        // This case would be when a connection is being open, so peer location hasn't been recorded yet
        let Some(_loc) = self
            .connection_manager
            .prune_alive_connection(peer.socket_addr())
        else {
            return PruneConnectionResult {
                orphaned_transactions,
                became_unready: false,
            };
        };

        if let Some(event) = NetEventLog::disconnected_with_context(
            self,
            &peer,
            DisconnectReason::Pruned,
            connection_duration_ms,
            None, // bytes_sent not tracked yet
            None, // bytes_received not tracked yet
        ) {
            self.event_register
                .register_events(Either::Left(event))
                .await;
        }

        let is_ready = self.connection_manager.is_self_ready();
        let became_unready = min_ready > 0 && was_ready && !is_ready;

        PruneConnectionResult {
            orphaned_transactions,
            became_unready,
        }
    }

    async fn connection_maintenance(
        self: Arc<Self>,
        notifier: EventLoopNotificationsSender,
        live_tx_tracker: LiveTransactionTracker,
    ) -> anyhow::Result<()> {
        let is_gateway = self.is_gateway;
        tracing::info!(is_gateway, "Connection maintenance task starting");
        #[cfg(not(test))]
        const CHECK_TICK_DURATION: Duration = Duration::from_secs(60);
        #[cfg(test)]
        const CHECK_TICK_DURATION: Duration = Duration::from_secs(2);

        // Faster tick when below min_connections, so initial mesh formation
        // doesn't bottleneck on the 60-second steady-state interval.
        #[cfg(not(test))]
        const FAST_CHECK_TICK_DURATION: Duration = Duration::from_secs(5);
        #[cfg(test)]
        const FAST_CHECK_TICK_DURATION: Duration = Duration::from_secs(1);

        const REGENERATE_DENSITY_MAP_INTERVAL: Duration = Duration::from_secs(60);

        /// Base number of concurrent connection acquisition attempts (steady-state).
        const BASE_CONCURRENT_CONNECTIONS: usize = 3;

        let mut check_interval = tokio::time::interval(CHECK_TICK_DURATION);
        check_interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
        let mut refresh_density_map = tokio::time::interval(REGENERATE_DENSITY_MAP_INTERVAL);
        refresh_density_map.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);

        // if the peer is just starting wait a bit before
        // we even attempt acquiring more connections
        tokio::time::sleep(Duration::from_secs(2)).await;

        let mut pending_conn_adds = BTreeSet::new();
        let mut last_backoff_cleanup = Instant::now();
        let mut last_health_check = Instant::now();
        let mut last_peer_cache_save = self.time_source.now();
        const HEALTH_CHECK_INTERVAL: Duration = Duration::from_secs(300);
        // How often to snapshot the peer cache to disk.
        const PEER_CACHE_SAVE_INTERVAL: Duration = Duration::from_secs(30);
        const BACKOFF_CLEANUP_INTERVAL: Duration = Duration::from_secs(60);
        /// Duration of zero ring connections before escalating recovery.
        /// Uses a shorter threshold initially (before first successful connection)
        /// so that cold-start failures after OS restart recover faster (#3737).
        const ISOLATION_ESCALATION_THRESHOLD: Duration = Duration::from_secs(120);
        const INITIAL_ISOLATION_ESCALATION_THRESHOLD: Duration = Duration::from_secs(30);
        /// Max time to hold a deferred swap drop before abandoning it.
        const DEFERRED_SWAP_DROP_TTL: Duration = Duration::from_secs(120);
        // Deferred swap drops: (addr, queued_at) using time_source for
        // deterministic simulation support.
        let mut deferred_swap_drops: Vec<(SocketAddr, tokio::time::Instant)> = Vec::new();
        let mut zero_connections_since: Option<Instant> = None;
        // Track whether we've ever had ring connections. Before the first
        // successful connection, use a shorter isolation escalation threshold
        // for faster recovery from cold-start failures (#3737).
        let mut ever_had_connections = false;

        // Adaptive fast-tick backoff: increase the fast-tick interval when
        // connection count stops growing, to avoid hammering the network
        // with CONNECTs indefinitely (#3578).
        let mut last_conn_count: usize = 0;
        let mut no_progress_ticks: u32 = 0;
        // After this many consecutive no-progress ticks, start doubling
        // the fast-tick interval.
        const FAST_TICK_BACKOFF_THRESHOLD: u32 = 6; // 30s at 5s/tick

        // Maximum fast-tick multiplier: caps backoff at the normal tick rate.
        // Derived from tick ratio so invariant holds in both prod and test cfg.
        const MAX_FAST_TICK_MULTIPLIER: u32 =
            (CHECK_TICK_DURATION.as_secs() / FAST_CHECK_TICK_DURATION.as_secs()) as u32;

        // Suspend/resume detection: boot_time::Instant uses CLOCK_BOOTTIME on Linux,
        // which advances during suspend (unlike std/tokio Instant which use CLOCK_MONOTONIC).
        let mut last_boot_time = boot_time::Instant::now();
        // In production (60s tick), 2x = 120s threshold is fine since real suspend
        // causes minute+ jumps. In tests (2s tick), 2x = 4s is too tight — CI runners
        // under load can have >4s scheduling delays between loop iterations, causing
        // false positives that drop all connections mid-test. Use 30s minimum.
        #[cfg(not(test))]
        const SUSPEND_DETECTION_THRESHOLD: Duration = CHECK_TICK_DURATION.saturating_mul(2);
        #[cfg(test)]
        const SUSPEND_DETECTION_THRESHOLD: Duration = Duration::from_secs(30);

        let mut this_peer = None;
        loop {
            // Update boot-time tracking at the top of every iteration (including
            // early-continue paths) so elapsed time doesn't accumulate during startup.
            let boot_elapsed = last_boot_time.elapsed();
            last_boot_time = boot_time::Instant::now();

            let op_manager = match self.upgrade_op_manager() {
                Some(op_manager) => op_manager,
                None => {
                    tokio::time::sleep(Duration::from_millis(100)).await;
                    continue;
                }
            };
            let Some(this_addr) = &this_peer else {
                let Some(addr) = self.connection_manager.get_own_addr() else {
                    tokio::time::sleep(Duration::from_secs(1)).await;
                    continue;
                };
                this_peer = Some(addr);
                continue;
            };
            // avoid connecting to the same peer multiple times
            let mut skip_list = HashSet::new();
            skip_list.insert(*this_addr);

            // Resets both connection (location-based) and gateway (address-based)
            // backoff state, and clears stale pending reservations for gateways.
            // Used during isolation recovery to ensure all gateways are retryable
            // when the node has zero ring connections (#3319).
            // Wakes all tasks sleeping on gateway backoff (initial_join_procedure
            // and any handle_aborted_op retries) so they can retry immediately.
            let reset_all_backoff = || {
                self.reset_all_connection_backoff();
                op_manager.gateway_backoff.lock().clear();
                op_manager.gateway_backoff_cleared.notify_waiters();
                // Also clear stale pending reservations for gateways — without this,
                // gateways appear "connected/pending" via has_connection_or_pending()
                // even after backoff is reset, blocking retry attempts (#3319).
                let gateway_addrs: Vec<_> = op_manager
                    .configured_gateways
                    .iter()
                    .filter_map(|gw| gw.socket_addr())
                    .collect();
                self.connection_manager
                    .clear_pending_reservations_for(&gateway_addrs);
            };

            // Suspend/resume detection: if boot-time elapsed much more than
            // the tick interval, we were likely suspended. boot_elapsed was
            // computed at the top of the loop so it includes early-continue time.
            if boot_elapsed > SUSPEND_DETECTION_THRESHOLD {
                tracing::warn!(
                    boot_elapsed_secs = boot_elapsed.as_secs(),
                    "Detected suspend/resume (boot-time jump) — dropping all connections and clearing state"
                );
                reset_all_backoff();
                // Clear recently-failed addresses since they may be reachable again.
                self.connection_manager.cleanup_all_failed_addrs();
                // Drop all connections (including transient gateway connections).
                // After suspend, transport sockets are dead but connection entries
                // persist as zombies — keepalive tasks exit on socket error but
                // don't trigger connection cleanup. The bootstrap loop then sends
                // CONNECT messages into dead sockets that never reach the gateway.
                notifier
                    .notifications_sender
                    .send(Either::Right(crate::message::NodeEvent::DropAllConnections))
                    .await
                    .map_err(|error| {
                        tracing::debug!(?error, "Failed to send DropAllConnections");
                        error
                    })?;
                zero_connections_since = None;
            }

            // Periodic cleanup of expired backoff entries
            if last_backoff_cleanup.elapsed() > BACKOFF_CLEANUP_INTERVAL {
                self.cleanup_connection_backoff();
                last_backoff_cleanup = Instant::now();
            }

            // Clean up stale pending reservations to prevent permanent isolation
            // when CONNECT operations fail to complete cleanly.
            let stale_removed = self.connection_manager.cleanup_stale_reservations();
            if stale_removed > 0 {
                tracing::warn!(
                    stale_removed,
                    "Cleaned up stale reservations and orphaned location entries"
                );
            }

            // Capture a single `now` for all TTL/cleanup checks in this tick so that
            // they all see the same moment rather than drifting across calls. Using
            // `self.time_source` (rather than `Instant::now()` directly) allows tests
            // to supply a `SharedMockTimeSource` without pausing the whole tokio runtime.
            let tick_now = self.time_source.now();

            // Expire old NAT traversal failure entries
            self.connection_manager.cleanup_stale_failed_addrs();

            // Expire acceptor reliability entries for peers whose TTL has elapsed
            self.connection_manager
                .cleanup_expired_acceptor_stats(tick_now);

            // Clean up expired transient connections
            let expired_transients = self.connection_manager.cleanup_expired_transients();
            if expired_transients > 0 {
                tracing::debug!(
                    expired_transients,
                    "Cleaned up expired transient connections"
                );
            }

            // Periodic peer health check: evict peers with sustained routing failures.
            if last_health_check.elapsed() > HEALTH_CHECK_INTERVAL {
                last_health_check = Instant::now();
                let current_ring = self.connection_manager.connection_count();
                let unhealthy = self
                    .connection_manager
                    .peer_health
                    .lock()
                    .unhealthy_peers(self.connection_manager.min_connections, current_ring);
                for addr in unhealthy {
                    tracing::warn!(
                        peer = %addr,
                        "Evicting unhealthy peer (sustained routing failures)"
                    );
                    if let Err(e) = notifier
                        .notifications_sender
                        .send(Either::Right(crate::message::NodeEvent::DropConnection(
                            addr,
                        )))
                        .await
                    {
                        tracing::debug!(error = ?e, "Failed to send DropConnection for unhealthy peer");
                    }
                }
            }

            // Periodically save peer cache for fast reconnection after restart.
            if last_peer_cache_save.elapsed() > PEER_CACHE_SAVE_INTERVAL {
                last_peer_cache_save = self.time_source.now();
                if let Some(ref dir) = self.peer_cache_dir {
                    let cache = peer_cache::PeerCache::snapshot_from(
                        &self.connection_manager,
                        self.time_source.as_ref(),
                    );
                    if !cache.peers.is_empty() {
                        if let Err(e) = cache.save(dir) {
                            tracing::warn!(error = %e, "Failed to save peer cache");
                        }
                    }
                }
            }

            // Isolation recovery: when we have zero ring connections for too long,
            // reset all backoff state so we can retry aggressively (#2928).
            let current_conn_count = self.connection_manager.connection_count();
            // Expose to update check task for version mismatch decisions (#3204).
            crate::transport::set_open_connection_count(current_conn_count);
            if current_conn_count == 0 {
                // Use shorter threshold before first successful connection so
                // cold-start failures (e.g., after OS restart) recover in ~30s
                // instead of ~120s. Once the node has connected at least once,
                // use the steady-state threshold. See #3737.
                let threshold = if ever_had_connections {
                    ISOLATION_ESCALATION_THRESHOLD
                } else {
                    INITIAL_ISOLATION_ESCALATION_THRESHOLD
                };
                if let Some(since) = zero_connections_since {
                    if since.elapsed() > threshold {
                        tracing::warn!(
                            is_gateway,
                            isolated_for_secs = since.elapsed().as_secs(),
                            threshold_secs = threshold.as_secs(),
                            ever_connected = ever_had_connections,
                            "Node isolated with zero ring connections — resetting all backoff state"
                        );
                        reset_all_backoff();
                        zero_connections_since = Some(Instant::now());
                    }
                } else {
                    zero_connections_since = Some(Instant::now());
                    tracing::warn!(
                        is_gateway,
                        "Zero ring connections detected — starting isolation timer"
                    );
                }
            } else if zero_connections_since.take().is_some() {
                ever_had_connections = true;
                tracing::info!(
                    connections = current_conn_count,
                    "Recovered from zero-connection state"
                );
            }

            // Gateway bootstrap fallback: at zero connections, acquire_new always
            // fails (no routing candidates). Connect to gateways directly (#3219).
            //
            // Note: initial_join_procedure may also be attempting gateway connections
            // concurrently. is_not_connected provides best-effort dedup via pending
            // reservations (should_accept creates one on each join_ring_request), so
            // the second caller typically sees the gateway as "pending" and skips it.
            // Occasional duplicate CONNECTs are harmless — the connect state machine
            // handles them gracefully.
            if current_conn_count == 0 && !op_manager.configured_gateways.is_empty() {
                let eligible: Vec<_> = {
                    let backoff = op_manager.gateway_backoff.lock();
                    self.is_not_connected(op_manager.configured_gateways.iter())
                        .filter(|gw| {
                            gw.socket_addr()
                                .map(|addr| !backoff.is_in_backoff(addr))
                                .unwrap_or(false) // skip gateways without addresses
                        })
                        .cloned()
                        .collect()
                };

                if eligible.is_empty() {
                    tracing::debug!(
                        total_gateways = op_manager.configured_gateways.len(),
                        "Zero connections — all gateways connected/pending or in backoff"
                    );
                } else {
                    let attempt_count = eligible.len().min(BASE_CONCURRENT_CONNECTIONS);
                    tracing::info!(
                        eligible = eligible.len(),
                        attempting = attempt_count,
                        "Zero connections — attempting gateway bootstrap"
                    );
                    for gw in eligible.iter().take(BASE_CONCURRENT_CONNECTIONS) {
                        match crate::operations::connect::join_ring_request(gw, &op_manager).await {
                            Ok(()) => tracing::debug!(gateway = %gw, "Gateway bootstrap initiated"),
                            Err(e) => {
                                tracing::warn!(gateway = %gw, error = %e, "Gateway bootstrap failed")
                            }
                        }
                    }
                }
            }

            // Scale concurrent connection limit based on deficit to min_connections.
            // During bootstrap (far below min_connections), allow more parallel attempts
            // to avoid stalling when slots fill with slow/timing-out transactions.
            let max_concurrent = calculate_max_concurrent_connections(
                current_conn_count,
                self.connection_manager.min_connections,
            );

            // Drain pending connections, initiating multiple attempts per tick
            // (up to max_concurrent) for faster mesh formation.
            let mut active_count = live_tx_tracker.active_connect_transaction_count();
            while let Some(ideal_location) = pending_conn_adds.pop_first() {
                if self.is_in_connection_backoff(ideal_location) {
                    tracing::debug!(
                        target_location = %ideal_location,
                        "Skipping connection attempt - target in backoff"
                    );
                    // Intentionally not re-queued: adjust_topology will re-request
                    // this location on the next cycle if still below min_connections.
                    continue;
                }
                if active_count >= max_concurrent {
                    tracing::debug!(
                        active_connections = active_count,
                        max_concurrent,
                        target_location = %ideal_location,
                        "At max concurrent connections, re-queuing location"
                    );
                    pending_conn_adds.insert(ideal_location);
                    break;
                }
                tracing::debug!(
                    active_connections = active_count,
                    max_concurrent,
                    target_location = %ideal_location,
                    "Attempting to acquire new connection"
                );
                let tx = self
                    .acquire_new(
                        ideal_location,
                        &skip_list,
                        &notifier,
                        &live_tx_tracker,
                        &op_manager,
                    )
                    .await
                    .map_err(|error| {
                        tracing::error!(
                            ?error,
                            "FATAL: Connection maintenance task failed - shutting down"
                        );
                        error
                    })?;
                if tx.is_none() {
                    let conns = self.connection_manager.connection_count();
                    tracing::debug!(
                        connections = conns,
                        target_location = %ideal_location,
                        "acquire_new returned None - likely no peers to query through"
                    );
                    // Don't record a backoff against the target location here.
                    // acquire_new returning None means we have insufficient routing
                    // candidates locally — the target location itself is fine.
                    // Backing off the target would block future attempts when we
                    // gain more connections and could actually route to it.
                    // adjust_topology will re-request this location on the next tick.
                } else {
                    active_count += 1;
                    tracing::info!(
                        active_connections = active_count,
                        "Successfully initiated connection acquisition"
                    );
                }
            }

            let current_connections = self.connection_manager.connection_count();
            let pending_connection_targets = pending_conn_adds.len();
            let peers = self.connection_manager.get_connections_by_location();
            let connections_considered: usize = peers.values().map(|c| c.len()).sum();

            let mut neighbor_locations: BTreeMap<_, Vec<_>> = peers
                .iter()
                .map(|(loc, conns)| {
                    let conns: Vec<_> = conns
                        .iter()
                        .filter(|conn| {
                            conn.location
                                .socket_addr()
                                .map(|addr| !live_tx_tracker.has_live_connection(addr))
                                .unwrap_or(true)
                        })
                        .cloned()
                        .collect();
                    (*loc, conns)
                })
                .filter(|(_, conns)| !conns.is_empty())
                .collect();

            if neighbor_locations.is_empty() && connections_considered > 0 {
                tracing::debug!(
                    current_connections,
                    connections_considered,
                    live_tx_peers = live_tx_tracker.len(),
                    "Neighbor filtering removed all candidates; using all connections"
                );

                neighbor_locations = peers
                    .iter()
                    .map(|(loc, conns)| (*loc, conns.clone()))
                    .filter(|(_, conns)| !conns.is_empty())
                    .collect();
            }

            if current_connections > self.connection_manager.max_connections {
                // When over capacity, consider all connections for removal regardless of live_tx filter.
                neighbor_locations = peers.clone();
            }

            tracing::debug!(
                current_connections,
                candidates = peers.len(),
                live_tx_peers = live_tx_tracker.len(),
                "Evaluating topology maintenance"
            );

            let adjustment = self
                .connection_manager
                .topology_manager
                .write()
                .adjust_topology(
                    &neighbor_locations,
                    &self.connection_manager.own_location().location(),
                    Instant::now(),
                    current_connections,
                );

            tracing::debug!(
                adjustment = ?adjustment,
                current_connections,
                is_gateway,
                pending_adds = pending_connection_targets,
                "Topology adjustment result"
            );

            match adjustment {
                TopologyAdjustment::AddConnections(target_locs) => {
                    let allowed = calculate_allowed_connection_additions(
                        current_connections,
                        pending_connection_targets,
                        self.connection_manager.min_connections,
                        self.connection_manager.max_connections,
                        target_locs.len(),
                    );

                    if allowed == 0 {
                        tracing::debug!(
                            requested = target_locs.len(),
                            current_connections,
                            pending = pending_connection_targets,
                            min_connections = self.connection_manager.min_connections,
                            max_connections = self.connection_manager.max_connections,
                            "Skipping queuing new connection targets – backlog already satisfies capacity constraints"
                        );
                    } else {
                        let total_pending_after = pending_connection_targets + allowed;
                        tracing::debug!(
                            requested = target_locs.len(),
                            allowed,
                            total_pending_after,
                            "Queuing additional connection targets"
                        );
                        pending_conn_adds.extend(target_locs.into_iter().take(allowed));
                    }
                }
                TopologyAdjustment::RemoveConnections(mut should_disconnect_peers) => {
                    for peer in should_disconnect_peers.drain(..) {
                        if let Some(addr) = peer.socket_addr() {
                            notifier
                                .notifications_sender
                                .send(Either::Right(crate::message::NodeEvent::DropConnection(
                                    addr,
                                )))
                                .await
                                .map_err(|error| {
                                    tracing::debug!(
                                        error = ?error,
                                        "Shutting down connection maintenance task"
                                    );
                                    error
                                })?;
                        }
                    }
                }
                TopologyAdjustment::SwapConnection {
                    remove,
                    add_location,
                } => {
                    // Connect-first swap: defer the drop until the replacement
                    // connects (connection_count > min_connections), preventing
                    // undershoot that would block future swaps. Deferred drops
                    // expire after DEFERRED_SWAP_DROP_TTL if replacement fails.
                    if let Some(addr) = remove.socket_addr() {
                        tracing::info!(
                            remove_peer = %remove,
                            add_target = %add_location,
                            "Topology swap: queuing replacement connection (drop deferred)"
                        );
                        pending_conn_adds.insert(add_location);
                        // Deduplicate: don't queue the same peer twice if
                        // consecutive swaps select it before the first executes.
                        if !deferred_swap_drops.iter().any(|(a, _)| *a == addr) {
                            deferred_swap_drops.push((addr, tick_now));
                        }
                    } else {
                        tracing::warn!(
                            remove_peer = %remove,
                            "Topology swap skipped: peer has no socket address"
                        );
                    }
                }
                TopologyAdjustment::NoChange => {}
            }

            // Execute deferred swap drops: only drop as many peers as we
            // have headroom above min_connections to avoid undershooting.
            // Expire stale entries whose replacement never connected.
            {
                let before_len = deferred_swap_drops.len();
                deferred_swap_drops.retain(|(_, queued_at)| {
                    tick_now.saturating_duration_since(*queued_at) < DEFERRED_SWAP_DROP_TTL
                });
                let expired = before_len - deferred_swap_drops.len();
                if expired > 0 {
                    tracing::debug!(
                        expired,
                        "Deferred swap drops expired (replacement never connected)"
                    );
                }

                if !deferred_swap_drops.is_empty() {
                    let fresh_count = self.connection_manager.connection_count();
                    let min_conn = self.connection_manager.min_connections;
                    let n_to_drop = deferred_swap_drops_to_execute(
                        fresh_count,
                        min_conn,
                        deferred_swap_drops.len(),
                    );
                    for (addr, _) in deferred_swap_drops.drain(..n_to_drop) {
                        tracing::info!(
                            peer = %addr,
                            connections = fresh_count,
                            "Executing deferred swap drop (replacement connected)"
                        );
                        notifier
                            .notifications_sender
                            .send(Either::Right(crate::message::NodeEvent::DropConnection(
                                addr,
                            )))
                            .await
                            .map_err(|error| {
                                tracing::debug!(
                                    ?error,
                                    "Shutting down connection maintenance task"
                                );
                                error
                            })?;
                    }
                }
            }

            let needs_fast_tick = current_connections < self.connection_manager.min_connections;

            if needs_fast_tick {
                // Adaptive backoff: reset on any connection count change
                // (gain OR loss), otherwise slow down. A loss means topology
                // changed and we should re-enter aggressive mode.
                if current_connections != last_conn_count {
                    no_progress_ticks = 0;
                } else {
                    no_progress_ticks = no_progress_ticks.saturating_add(1);
                }
                last_conn_count = current_connections;

                let multiplier = if no_progress_ticks <= FAST_TICK_BACKOFF_THRESHOLD {
                    1u32
                } else {
                    let excess = no_progress_ticks - FAST_TICK_BACKOFF_THRESHOLD;
                    2u32.saturating_pow(excess).min(MAX_FAST_TICK_MULTIPLIER)
                };
                // Apply ±20% jitter to prevent synchronized CONNECT bursts
                // across peers that bootstrapped simultaneously.
                let jitter: f64 = crate::config::GlobalRng::random_range(0.8..=1.2);
                let adaptive_duration =
                    FAST_CHECK_TICK_DURATION.mul_f64(multiplier as f64 * jitter);

                if multiplier > 1 {
                    tracing::debug!(
                        current_connections,
                        min_connections = self.connection_manager.min_connections,
                        no_progress_ticks,
                        tick_interval_secs = adaptive_duration.as_secs(),
                        "Fast-tick backed off due to no connection progress"
                    );
                }

                // Uses sleep() instead of the check_interval so we don't need a
                // second Interval object. We reset check_interval on transition
                // back to steady-state to avoid an immediate burst tick.
                crate::deterministic_select! {
                  _ = refresh_density_map.tick() => {
                    self.refresh_density_request_cache();
                  },
                  _ = tokio::time::sleep(adaptive_duration) => {},
                }
            } else {
                // Reached min_connections: reset backoff for next time.
                no_progress_ticks = 0;
                last_conn_count = current_connections;

                // Reset the interval on transition from fast to normal tick so
                // accumulated missed ticks don't cause an immediate burst.
                check_interval.reset();
                crate::deterministic_select! {
                  _ = refresh_density_map.tick() => {
                    self.refresh_density_request_cache();
                  },
                  _ = check_interval.tick() => {},
                }
            }
        }
    }

    #[tracing::instrument(level = "debug", skip(self, notifier, live_tx_tracker, op_manager), fields(peer = %self.connection_manager.pub_key))]
    async fn acquire_new(
        &self,
        ideal_location: Location,
        skip_list: &HashSet<SocketAddr>,
        notifier: &EventLoopNotificationsSender,
        live_tx_tracker: &LiveTransactionTracker,
        op_manager: &Arc<OpManager>,
    ) -> anyhow::Result<Option<Transaction>> {
        let current_connections = self.connection_manager.connection_count();
        let is_gateway = self.is_gateway;

        tracing::debug!(
            current_connections,
            is_gateway,
            target_location = %ideal_location,
            "acquire_new: attempting to find peer to query"
        );

        let query_target = {
            let router = self.router.read();
            let num_connections = self.connection_manager.num_connections();
            tracing::debug!(
                target_location = %ideal_location,
                num_connections,
                skip_list_size = skip_list.len(),
                self_addr = ?self.connection_manager.get_own_addr(),
                "Looking for peer to route through"
            );
            // CONNECT operations bypass readiness gating — peers need to route
            // through ANY ring connection to acquire new connections, even if those
            // connections haven't advertised readiness yet. Without this, peers get
            // stuck below the readiness threshold: they can't initiate CONNECTs
            // because routing() filters out all their "not ready" connections,
            // but they can't become ready without more connections.
            let candidates = self.connection_manager.routing_candidates(
                ideal_location,
                None,
                skip_list,
                false, // bypass readiness — this is a CONNECT for connection acquisition
            );
            let selected = if !candidates.is_empty() {
                let (selected, _) =
                    router.select_k_best_peers_with_telemetry(candidates.iter(), ideal_location, 1);
                selected.into_iter().next().cloned()
            } else {
                None
            };
            if let Some(target) = selected {
                tracing::debug!(
                    query_target = %target,
                    target_location = %ideal_location,
                    "connection_maintenance selected routing target"
                );
                target
            } else {
                tracing::warn!(
                    current_connections,
                    is_gateway,
                    target_location = %ideal_location,
                    "acquire_new: no routing candidates found - cannot find peer to query"
                );
                return Ok(None);
            }
        };

        let joiner = self.connection_manager.own_location();
        tracing::debug!(
            this_peer = %joiner,
            query_target_peer = %query_target,
            target_location = %ideal_location,
            "Sending connect request via connection_maintenance"
        );
        let ttl = self.max_hops_to_live.max(1).min(u8::MAX as usize) as u8;
        let target_connections = self.connection_manager.min_connections;

        let failed_addrs = self.connection_manager.recently_failed_addrs();
        let connected_addrs = self.connection_manager.connected_peer_addrs();
        tracing::debug!(
            failed = failed_addrs.len(),
            connected = connected_addrs.len(),
            "acquire_new: pre-populating bloom filter with excluded peer addresses"
        );
        let mut exclude_addrs = failed_addrs;
        exclude_addrs.extend(connected_addrs);
        let (tx, op, msg) = ConnectOp::initiate_join_request(
            joiner.clone(),
            query_target.clone(),
            ideal_location,
            ttl,
            target_connections,
            op_manager.connect_forward_estimator.clone(),
            &exclude_addrs,
        );

        // Emit telemetry for initial connect request sent
        if let Some(event) = NetEventLog::connect_request_sent(
            &tx,
            self,
            ideal_location,
            joiner,
            query_target.clone(),
            ttl,
            true, // is_initial
        ) {
            self.register_events(Either::Left(event)).await;
        }

        if let Some(addr) = query_target.socket_addr() {
            live_tx_tracker.add_transaction(addr, tx);
        }
        op_manager
            .push(tx, OpEnum::Connect(Box::new(op)))
            .await
            .map_err(|err| anyhow::anyhow!(err))?;
        notifier
            .notifications_sender
            .send(Either::Left(NetMessage::V1(NetMessageV1::Connect(msg))))
            .await?;
        tracing::debug!(tx = %tx, "Connect request sent");
        Ok(Some(tx))
    }

    /// Register a topology snapshot for this peer with the global registry.
    ///
    /// This should be called periodically during simulation tests to enable
    /// topology validation. The snapshot captures the current subscription
    /// state for all contracts.
    #[cfg(any(test, feature = "testing"))]
    #[allow(dead_code)] // Used by SimNetwork tests
    pub fn register_topology_snapshot(&self, network_name: &str) {
        let Some(peer_addr) = self.connection_manager.get_own_addr() else {
            return;
        };
        // Use get_stored_location() for consistency with set_upstream distance check.
        let location = self
            .connection_manager
            .get_stored_location()
            .map(|l| l.as_f64())
            .unwrap_or(0.0);

        let snapshot = self
            .hosting_manager
            .generate_topology_snapshot(peer_addr, location);
        topology_registry::register_topology_snapshot(network_name, snapshot);
    }

    /// Get a topology snapshot for this peer without registering it.
    #[cfg(any(test, feature = "testing"))]
    #[allow(dead_code)] // Used by SimNetwork tests
    pub fn get_topology_snapshot(&self) -> Option<topology_registry::TopologySnapshot> {
        let peer_addr = self.connection_manager.get_own_addr()?;
        // Use get_stored_location() for consistency with set_upstream distance check.
        let location = self
            .connection_manager
            .get_stored_location()
            .map(|l| l.as_f64())
            .unwrap_or(0.0);

        Some(
            self.hosting_manager
                .generate_topology_snapshot(peer_addr, location),
        )
    }
}

/// Calculate the maximum number of concurrent connection acquisition attempts.
///
/// During bootstrap (below `min_connections`), scales up from the base to allow
/// more parallel attempts, preventing stalls when slots fill with slow transactions.
/// Once at or above `min_connections`, returns the base value.
fn calculate_max_concurrent_connections(
    current_connections: usize,
    min_connections: usize,
) -> usize {
    /// Base concurrent connection slots (steady-state).
    const BASE: usize = 3;
    /// How many missing connections map to one additional concurrent slot.
    const CONNECTIONS_PER_EXTRA_SLOT: usize = 3;

    if current_connections >= min_connections {
        return BASE;
    }
    let deficit = min_connections - current_connections;
    // Cap at half of min_connections, floored at BASE.
    let bootstrap_cap = (min_connections / 2).max(BASE);
    (BASE + deficit / CONNECTIONS_PER_EXTRA_SLOT).min(bootstrap_cap)
}

fn calculate_allowed_connection_additions(
    current_connections: usize,
    pending_connections: usize,
    min_connections: usize,
    max_connections: usize,
    requested: usize,
) -> usize {
    if requested == 0 {
        return 0;
    }

    let effective_connections = current_connections.saturating_add(pending_connections);
    if effective_connections >= max_connections {
        return 0;
    }

    let mut available_capacity = max_connections - effective_connections;

    if current_connections < min_connections {
        let deficit_to_min = min_connections.saturating_sub(effective_connections);
        available_capacity = available_capacity.min(deficit_to_min);
    }

    available_capacity.min(requested)
}

/// Compute how many deferred swap-drops can be executed this tick.
///
/// A deferred drop is safe to execute only when a replacement peer has actually
/// connected.  The guard: `current_connections > min_connections + pending_drops`
/// — meaning we have at least one extra connection above what's needed to cover
/// all pending drops plus the minimum.  Each drop we commit to reduces the
/// effective count by one, so we re-evaluate per element.
///
/// Drops are sent as async events; `connection_count()` won't reflect them until
/// the events are processed.  We track the decrement locally instead.
fn deferred_swap_drops_to_execute(
    current_connections: usize,
    min_connections: usize,
    pending_drops: usize,
) -> usize {
    let mut effective_count = current_connections;
    let mut n_to_drop = 0usize;
    for _ in 0..pending_drops {
        let remaining_pending = pending_drops - n_to_drop;
        if effective_count > min_connections.saturating_add(remaining_pending) {
            n_to_drop += 1;
            effective_count = effective_count.saturating_sub(1);
        } else {
            break;
        }
    }
    n_to_drop
}

#[cfg(test)]
mod max_concurrent_connections_tests {
    use super::calculate_max_concurrent_connections;

    #[test]
    fn at_min_connections_returns_base() {
        assert_eq!(calculate_max_concurrent_connections(25, 25), 3);
    }

    #[test]
    fn above_min_connections_returns_base() {
        assert_eq!(calculate_max_concurrent_connections(30, 25), 3);
    }

    #[test]
    fn large_deficit_scales_up() {
        // deficit=15, 3 + 15/3 = 8, cap = 25/2 = 12 → 8
        assert_eq!(calculate_max_concurrent_connections(10, 25), 8);
    }

    #[test]
    fn full_deficit_capped_at_half_min() {
        // deficit=25, 3 + 25/3 = 11, cap = 25/2 = 12 → 11
        assert_eq!(calculate_max_concurrent_connections(0, 25), 11);
    }

    #[test]
    fn small_deficit_adds_nothing() {
        // deficit=2, 3 + 2/3 = 3, cap = 25/2 = 12 → 3
        assert_eq!(calculate_max_concurrent_connections(23, 25), 3);
    }

    #[test]
    fn very_small_min_connections() {
        // min_conns=1, deficit=1, 3 + 0 = 3, cap = max(0, 3) = 3 → 3
        assert_eq!(calculate_max_concurrent_connections(0, 1), 3);
        // min_conns=2, deficit=2, 3 + 0 = 3, cap = max(1, 3) = 3 → 3
        assert_eq!(calculate_max_concurrent_connections(0, 2), 3);
    }

    #[test]
    fn high_min_connections_scales_cap() {
        // deficit=50, 3 + 50/3 = 19, cap = 50/2 = 25 → 19
        assert_eq!(calculate_max_concurrent_connections(0, 50), 19);
    }
}

#[cfg(test)]
mod pending_additions_tests {
    use super::calculate_allowed_connection_additions;

    #[test]
    fn respects_minimum_when_backlog_exists() {
        let allowed = calculate_allowed_connection_additions(1, 24, 25, 200, 24);
        assert_eq!(allowed, 0, "Backlog should satisfy minimum deficit");
    }

    #[test]
    fn permits_requests_until_minimum_is_met() {
        let allowed = calculate_allowed_connection_additions(1, 0, 25, 200, 24);
        assert_eq!(allowed, 24);
    }

    #[test]
    fn caps_additions_at_available_capacity() {
        let allowed = calculate_allowed_connection_additions(190, 5, 25, 200, 10);
        assert_eq!(allowed, 5);
    }

    #[test]
    fn respects_requested_when_capacity_allows() {
        let allowed = calculate_allowed_connection_additions(50, 0, 25, 200, 3);
        assert_eq!(allowed, 3);
    }
}

#[cfg(test)]
mod refresh_router_tests {
    use std::sync::Arc;
    use std::time::Duration;

    use either::Either;
    use futures::FutureExt;
    use parking_lot::RwLock;

    use crate::ring::PeerKeyLocation;
    use crate::ring::location::Location;
    use crate::router::{RouteEvent, RouteOutcome, Router};
    use crate::tracing::{NetEventLog, NetEventRegister};

    /// Mock register that returns pre-populated RouteEvents on startup.
    #[derive(Clone)]
    struct WarmStartRegister {
        events: Vec<RouteEvent>,
    }

    impl NetEventRegister for WarmStartRegister {
        fn register_events<'a>(
            &'a self,
            _events: Either<NetEventLog<'a>, Vec<NetEventLog<'a>>>,
        ) -> futures::future::BoxFuture<'a, ()> {
            async {}.boxed()
        }

        fn notify_of_time_out(
            &mut self,
            _tx: crate::message::Transaction,
            _op_type: &str,
            _target_peer: Option<String>,
        ) -> futures::future::BoxFuture<'_, ()> {
            async {}.boxed()
        }

        fn trait_clone(&self) -> Box<dyn NetEventRegister> {
            Box::new(self.clone())
        }

        fn get_router_events(
            &self,
            number: usize,
        ) -> futures::future::BoxFuture<'_, anyhow::Result<Vec<RouteEvent>>> {
            let events = self.events.iter().take(number).cloned().collect();
            async move { Ok(events) }.boxed()
        }
    }

    fn make_route_events(count: usize) -> Vec<RouteEvent> {
        (0..count)
            .map(|_| RouteEvent {
                peer: PeerKeyLocation::random(),
                contract_location: Location::random(),
                outcome: RouteOutcome::Success {
                    time_to_response_start: Duration::from_millis(50),
                    payload_size: 1000,
                    payload_transfer_time: Duration::from_millis(100),
                },
            })
            .collect()
    }

    #[tokio::test]
    async fn refresh_router_loads_history_on_startup() {
        let events = make_route_events(100);
        let register = WarmStartRegister {
            events: events.clone(),
        };
        let router = Arc::new(RwLock::new(Router::new(&[])));

        // Verify the router starts empty
        let snapshot = router.read().snapshot();
        assert_eq!(snapshot.failure_events, 0);
        assert_eq!(snapshot.success_events, 0);

        // Run refresh_router with a timeout so it doesn't loop forever.
        // The startup load happens before the interval loop, so it completes
        // within the first few milliseconds.
        tokio::time::timeout(
            Duration::from_millis(100),
            super::Ring::refresh_router(router.clone(), register),
        )
        .await
        .ok(); // timeout is expected — the function loops forever

        // Verify the router was populated from the historical events
        let snapshot = router.read().snapshot();
        assert_eq!(
            snapshot.success_events, 100,
            "Router should have been populated with startup history"
        );
    }

    #[tokio::test]
    async fn refresh_router_handles_empty_history() {
        let register = WarmStartRegister { events: vec![] };
        let router = Arc::new(RwLock::new(Router::new(&[])));

        tokio::time::timeout(
            Duration::from_millis(100),
            super::Ring::refresh_router(router.clone(), register),
        )
        .await
        .ok();

        // Router should remain empty
        let snapshot = router.read().snapshot();
        assert_eq!(snapshot.failure_events, 0);
        assert_eq!(snapshot.success_events, 0);
    }
}

#[cfg(test)]
mod deferred_swap_drop_tests {
    use super::deferred_swap_drops_to_execute;

    /// 3-node ring: current=2, min=1, pending=1.
    /// The original bug: headroom = 2-1 = 1 > 0, so the old code fired the drop,
    /// kicking the only other peer and leaving the node isolated.
    /// Fixed: guard requires current > min + pending (2 > 1+1 = 2) → false.
    #[test]
    fn three_node_ring_no_drop_without_replacement() {
        assert_eq!(deferred_swap_drops_to_execute(2, 1, 1), 0);
    }

    /// Replacement connected in a 3-node ring: current=3, min=1, pending=1.
    /// 3 > 1+1=2 → true. One drop allowed.
    #[test]
    fn three_node_ring_drop_when_replacement_connected() {
        assert_eq!(deferred_swap_drops_to_execute(3, 1, 1), 1);
    }

    /// At min_connections with no pending drops: current=10, min=10, pending=0.
    /// No pending drops means nothing to execute.
    #[test]
    fn no_pending_drops_returns_zero() {
        assert_eq!(deferred_swap_drops_to_execute(10, 10, 0), 0);
    }

    /// Large network, no replacements yet: current=12, min=10, pending=3.
    /// 12 > 10+3=13 → false. None dropped.
    #[test]
    fn large_network_no_drop_without_replacement() {
        assert_eq!(deferred_swap_drops_to_execute(12, 10, 3), 0);
    }

    /// Large network, exactly enough for one replacement: current=13, min=10, pending=3.
    /// i=0: remaining=3, 13 > 13 → false. Still none.
    #[test]
    fn large_network_boundary_no_drop() {
        assert_eq!(deferred_swap_drops_to_execute(13, 10, 3), 0);
    }

    /// Large network, one replacement connected: current=14, min=10, pending=3.
    /// i=0: remaining=3, 14 > 13 → true (effective=13, n=1)
    /// i=1: remaining=2, 13 > 12 → true (effective=12, n=2)
    /// i=2: remaining=1, 12 > 11 → true (effective=11, n=3)
    /// All 3 dropped; after drops effective=11 ≥ min=10.
    #[test]
    fn large_network_all_replacements_connected() {
        assert_eq!(deferred_swap_drops_to_execute(14, 10, 3), 3);
    }

    /// current exactly at min with one pending: current=10, min=10, pending=1.
    /// 10 > 10+1=11 → false. No drop.
    #[test]
    fn at_min_with_pending_no_drop() {
        assert_eq!(deferred_swap_drops_to_execute(10, 10, 1), 0);
    }

    /// current = min + 1 with one pending: current=11, min=10, pending=1.
    /// 11 > 11 → false. No drop (1 extra but still need the pending slot covered).
    #[test]
    fn one_above_min_with_pending_no_drop() {
        assert_eq!(deferred_swap_drops_to_execute(11, 10, 1), 0);
    }

    /// current = min + 2 with one pending: current=12, min=10, pending=1.
    /// 12 > 11 → true. One drop allowed.
    #[test]
    fn two_above_min_with_one_pending_drops_one() {
        assert_eq!(deferred_swap_drops_to_execute(12, 10, 1), 1);
    }

    /// Overflow-safe: current=0, min=0, pending=0.
    #[test]
    fn all_zero_returns_zero() {
        assert_eq!(deferred_swap_drops_to_execute(0, 0, 0), 0);
    }

    /// current < min (below minimum, e.g. still bootstrapping): no drops.
    #[test]
    fn below_min_connections_no_drop() {
        assert_eq!(deferred_swap_drops_to_execute(5, 10, 2), 0);
    }
}

#[derive(thiserror::Error, Debug)]
pub(crate) enum RingError {
    #[error(transparent)]
    ConnError(#[from] Box<node::ConnectionError>),
    #[error("No ring connections found")]
    EmptyRing,
    #[error("Ran out of, or haven't found any, hosting peers for contract {0}")]
    NoHostingPeers(ContractInstanceId),
    #[error("Peer has not joined the network yet (no ring location established)")]
    PeerNotJoined,
}