snarkos_node_router/
heartbeat.rs

1// Copyright 2024-2025 Aleo Network Foundation
2// This file is part of the snarkOS library.
3
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at:
7
8// http://www.apache.org/licenses/LICENSE-2.0
9
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use crate::{
17    Outbound,
18    Peer,
19    Router,
20    messages::{DisconnectReason, Message, PeerRequest},
21};
22use snarkvm::prelude::Network;
23
24use colored::Colorize;
25use rand::{Rng, prelude::IteratorRandom, rngs::OsRng};
26
27/// A helper function to compute the maximum of two numbers.
28/// See Rust issue 92391: https://github.com/rust-lang/rust/issues/92391.
29pub const fn max(a: usize, b: usize) -> usize {
30    match a > b {
31        true => a,
32        false => b,
33    }
34}
35
36pub trait Heartbeat<N: Network>: Outbound<N> {
37    /// The duration in seconds to sleep in between heartbeat executions.
38    const HEARTBEAT_IN_SECS: u64 = 25; // 25 seconds
39    /// The minimum number of peers required to maintain connections with.
40    const MINIMUM_NUMBER_OF_PEERS: usize = 3;
41    /// The median number of peers to maintain connections with.
42    const MEDIAN_NUMBER_OF_PEERS: usize = max(Self::MAXIMUM_NUMBER_OF_PEERS / 2, Self::MINIMUM_NUMBER_OF_PEERS);
43    /// The maximum number of peers permitted to maintain connections with.
44    const MAXIMUM_NUMBER_OF_PEERS: usize = 21;
45    /// The maximum number of provers to maintain connections with.
46    const MAXIMUM_NUMBER_OF_PROVERS: usize = Self::MAXIMUM_NUMBER_OF_PEERS / 4;
47    /// The amount of time an IP address is prohibited from connecting.
48    const IP_BAN_TIME_IN_SECS: u64 = 300;
49
50    /// Handles the heartbeat request.
51    fn heartbeat(&self) {
52        self.safety_check_minimum_number_of_peers();
53        self.log_connected_peers();
54
55        // Remove any stale connected peers.
56        self.remove_stale_connected_peers();
57        // Remove the oldest connected peer.
58        self.remove_oldest_connected_peer();
59        // Keep the number of connected peers within the allowed range.
60        self.handle_connected_peers();
61        // Keep the bootstrap peers within the allowed range.
62        self.handle_bootstrap_peers();
63        // Keep the trusted peers connected.
64        self.handle_trusted_peers();
65        // Keep the puzzle request up to date.
66        self.handle_puzzle_request();
67        // Unban any addresses whose ban time has expired.
68        self.handle_banned_ips();
69    }
70
71    /// TODO (howardwu): Consider checking minimum number of validators, to exclude clients and provers.
72    /// This function performs safety checks on the setting for the minimum number of peers.
73    fn safety_check_minimum_number_of_peers(&self) {
74        // Perform basic sanity checks on the configuration for the number of peers.
75        assert!(Self::MINIMUM_NUMBER_OF_PEERS >= 1, "The minimum number of peers must be at least 1.");
76        assert!(Self::MINIMUM_NUMBER_OF_PEERS <= Self::MAXIMUM_NUMBER_OF_PEERS);
77        assert!(Self::MINIMUM_NUMBER_OF_PEERS <= Self::MEDIAN_NUMBER_OF_PEERS);
78        assert!(Self::MEDIAN_NUMBER_OF_PEERS <= Self::MAXIMUM_NUMBER_OF_PEERS);
79        assert!(Self::MAXIMUM_NUMBER_OF_PROVERS <= Self::MAXIMUM_NUMBER_OF_PEERS);
80    }
81
82    /// This function logs the connected peers.
83    fn log_connected_peers(&self) {
84        // Log the connected peers.
85        let connected_peers = self.router().connected_peers();
86        let connected_peers_fmt = format!("{connected_peers:?}").dimmed();
87        match connected_peers.len() {
88            0 => debug!("No connected peers"),
89            1 => debug!("Connected to 1 peer: {connected_peers_fmt}"),
90            num_connected => debug!("Connected to {num_connected} peers {connected_peers_fmt}"),
91        }
92    }
93
94    /// This function removes any connected peers that have not communicated within the predefined time.
95    fn remove_stale_connected_peers(&self) {
96        // Check if any connected peer is stale.
97        for peer in self.router().get_connected_peers() {
98            // Disconnect if the peer has not communicated back within the predefined time.
99            let elapsed = peer.last_seen().elapsed().as_secs();
100            if elapsed > Router::<N>::RADIO_SILENCE_IN_SECS {
101                warn!("Peer {} has not communicated in {elapsed} seconds", peer.ip());
102                // Disconnect from this peer.
103                self.router().disconnect(peer.ip());
104            }
105        }
106    }
107
108    /// Returns a sorted vector of network addresess of all removable connected peers
109    /// where the first entry has the lowest priority andthe last one the highest.
110    ///
111    /// Rules:
112    ///     - Trusted peers and bootstrap nodes are not removable.
113    ///     - Peers that we are currently syncing with are not remeovable.
114    ///     - Validators are considered higher priority than provers or clients.
115    ///     - Connections that have not been seen in a while are considered lower priority.
116    fn get_removable_peers(&self) -> Vec<Peer<N>> {
117        // The trusted peers (specified at runtime).
118        let trusted = self.router().trusted_peers();
119        // The hardcoded bootstrap nodes.
120        let bootstrap = self.router().bootstrap_peers();
121        // Are we synced already? (cache this here, so it does not need to be recomputed)
122        let is_block_synced = self.is_block_synced();
123
124        // Sort by priority, where lowest priority will be at the beginning
125        // of the vector.
126        // Note, that this gives equal priority to clients and provers, which
127        // we might want to change in the future.
128        let mut peers = self.router().get_connected_peers();
129        peers.sort_by_key(|peer| (peer.is_validator(), peer.last_seen()));
130
131        // Deterimine which of the peers can be removed.
132        peers
133            .into_iter()
134            .filter(|peer| {
135                !trusted.contains(&peer.ip()) // Always keep trusted nodes.
136                  && !bootstrap.contains(&peer.ip()) // Always keep bootstrap nodes.
137                  && !self.router().cache.contains_inbound_block_request(&peer.ip()) // This peer is currently syncing from us.
138                  && (is_block_synced || self.router().cache.num_outbound_block_requests(&peer.ip()) == 0) // We are currently syncing from this peer.
139            })
140            .collect()
141    }
142
143    /// This function removes the peer that we have not heard from the longest,
144    /// to keep the connections fresh.
145    /// It only triggers if the router is above the minimum number of connected peers.
146    fn remove_oldest_connected_peer(&self) {
147        // Skip if the router is at or below the minimum number of connected peers.
148        if self.router().number_of_connected_peers() <= Self::MINIMUM_NUMBER_OF_PEERS {
149            return;
150        }
151
152        // Skip if the node is not requesting peers.
153        if !self.router().allow_external_peers() {
154            return;
155        }
156
157        // Disconnect from the oldest connected peer, which is the first entry in the list
158        // of removable peers.
159        // Do nothing, if the list is empty.
160        if let Some(oldest) = self.get_removable_peers().first().map(|peer| peer.ip()) {
161            info!("Disconnecting from '{oldest}' (periodic refresh of peers)");
162            let _ = self.send(oldest, Message::Disconnect(DisconnectReason::PeerRefresh.into()));
163            self.router().disconnect(oldest);
164        }
165    }
166
167    /// This function keeps the number of connected peers within the allowed range.
168    fn handle_connected_peers(&self) {
169        // Initialize an RNG.
170        let rng = &mut OsRng;
171
172        // Obtain the number of connected peers.
173        let num_connected = self.router().number_of_connected_peers();
174        // Obtain the number of connected provers.
175        let num_connected_provers = self.router().number_of_connected_provers();
176
177        // Consider rotating more external peers every ~10 heartbeats.
178        let reduce_peers = self.router().rotate_external_peers() && rng.gen_range(0..10) == 0;
179        // Determine the maximum number of peers and provers to keep.
180        let (max_peers, max_provers) = if reduce_peers {
181            (Self::MEDIAN_NUMBER_OF_PEERS, 0)
182        } else {
183            (Self::MAXIMUM_NUMBER_OF_PEERS, Self::MAXIMUM_NUMBER_OF_PROVERS)
184        };
185
186        // Compute the number of surplus peers.
187        let num_surplus_peers = num_connected.saturating_sub(max_peers);
188        // Compute the number of surplus provers.
189        let num_surplus_provers = num_connected_provers.saturating_sub(max_provers);
190        // Compute the number of provers remaining connected.
191        let num_remaining_provers = num_connected_provers.saturating_sub(num_surplus_provers);
192        // Compute the number of surplus clients and validators.
193        let num_surplus_clients_validators = num_surplus_peers.saturating_sub(num_remaining_provers);
194
195        if num_surplus_provers > 0 || num_surplus_clients_validators > 0 {
196            debug!(
197                "Exceeded maximum number of connected peers, disconnecting from ({num_surplus_provers} + {num_surplus_clients_validators}) peers"
198            );
199
200            // Retrieve the trusted peers.
201            let trusted = self.router().trusted_peers();
202            // Retrieve the bootstrap peers.
203            let bootstrap = self.router().bootstrap_peers();
204
205            // Determine the provers to disconnect from.
206            let provers_to_disconnect = self
207                .router()
208                .connected_provers()
209                .into_iter()
210                .filter(|peer_ip| !trusted.contains(peer_ip) && !bootstrap.contains(peer_ip))
211                .choose_multiple(rng, num_surplus_provers);
212
213            // Determine the clients and validators to disconnect from.
214            let peers_to_disconnect = self
215                .get_removable_peers()
216                .into_iter()
217                .filter(|peer| !peer.is_prover()) // remove provers as those are handled seperately
218                .map(|p| p.ip())
219                .take(num_surplus_clients_validators);
220
221            // Proceed to send disconnect requests to these peers.
222            for peer_ip in peers_to_disconnect.chain(provers_to_disconnect) {
223                // TODO (howardwu): Remove this after specializing this function.
224                if self.router().node_type().is_prover() {
225                    if let Some(peer) = self.router().get_connected_peer(&peer_ip) {
226                        if peer.node_type().is_validator() {
227                            continue;
228                        }
229                    }
230                }
231
232                info!("Disconnecting from '{peer_ip}' (exceeded maximum connections)");
233                self.send(peer_ip, Message::Disconnect(DisconnectReason::TooManyPeers.into()));
234                // Disconnect from this peer.
235                self.router().disconnect(peer_ip);
236            }
237        }
238
239        // Obtain the number of connected peers.
240        let num_connected = self.router().number_of_connected_peers();
241        // Compute the number of deficit peers.
242        let num_deficient = Self::MEDIAN_NUMBER_OF_PEERS.saturating_sub(num_connected);
243
244        if num_deficient > 0 {
245            // Initialize an RNG.
246            let rng = &mut OsRng;
247
248            // Attempt to connect to more peers.
249            for peer_ip in self.router().candidate_peers().into_iter().choose_multiple(rng, num_deficient) {
250                self.router().connect(peer_ip);
251            }
252
253            if self.router().allow_external_peers() {
254                // Request more peers from the connected peers.
255                for peer_ip in self.router().connected_peers().into_iter().choose_multiple(rng, 3) {
256                    self.send(peer_ip, Message::PeerRequest(PeerRequest));
257                }
258            }
259        }
260    }
261
262    /// This function keeps the number of bootstrap peers within the allowed range.
263    fn handle_bootstrap_peers(&self) {
264        // Split the bootstrap peers into connected and candidate lists.
265        let mut connected_bootstrap = Vec::new();
266        let mut candidate_bootstrap = Vec::new();
267        for bootstrap_ip in self.router().bootstrap_peers() {
268            match self.router().is_connected(&bootstrap_ip) {
269                true => connected_bootstrap.push(bootstrap_ip),
270                false => candidate_bootstrap.push(bootstrap_ip),
271            }
272        }
273        // If there are not enough connected bootstrap peers, connect to more.
274        if connected_bootstrap.is_empty() {
275            // Initialize an RNG.
276            let rng = &mut OsRng;
277            // Attempt to connect to a bootstrap peer.
278            if let Some(peer_ip) = candidate_bootstrap.into_iter().choose(rng) {
279                self.router().connect(peer_ip);
280            }
281        }
282        // Determine if the node is connected to more bootstrap peers than allowed.
283        let num_surplus = connected_bootstrap.len().saturating_sub(1);
284        if num_surplus > 0 {
285            // Initialize an RNG.
286            let rng = &mut OsRng;
287            // Proceed to send disconnect requests to these bootstrap peers.
288            for peer_ip in connected_bootstrap.into_iter().choose_multiple(rng, num_surplus) {
289                info!("Disconnecting from '{peer_ip}' (exceeded maximum bootstrap)");
290                self.send(peer_ip, Message::Disconnect(DisconnectReason::TooManyPeers.into()));
291                // Disconnect from this peer.
292                self.router().disconnect(peer_ip);
293            }
294        }
295    }
296
297    /// This function attempts to connect to any disconnected trusted peers.
298    fn handle_trusted_peers(&self) {
299        // Ensure that the trusted nodes are connected.
300        for peer_ip in self.router().trusted_peers() {
301            // If the peer is not connected, attempt to connect to it.
302            if !self.router().is_connected(peer_ip) {
303                debug!("Attempting to (re-)connect to trusted peer `{peer_ip}`");
304                self.router().connect(*peer_ip);
305            }
306        }
307    }
308
309    /// This function updates the puzzle if network has updated.
310    fn handle_puzzle_request(&self) {
311        // No-op
312    }
313
314    // Remove addresses whose ban time has expired.
315    fn handle_banned_ips(&self) {
316        self.tcp().banned_peers().remove_old_bans(Self::IP_BAN_TIME_IN_SECS);
317    }
318}