snarkos_node_router/heartbeat.rs
1// Copyright 2024-2025 Aleo Network Foundation
2// This file is part of the snarkOS library.
3
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at:
7
8// http://www.apache.org/licenses/LICENSE-2.0
9
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use crate::{
17 Outbound,
18 Peer,
19 Router,
20 messages::{DisconnectReason, Message, PeerRequest},
21};
22use snarkvm::prelude::Network;
23
24use colored::Colorize;
25use rand::{Rng, prelude::IteratorRandom, rngs::OsRng};
26
27/// A helper function to compute the maximum of two numbers.
28/// See Rust issue 92391: https://github.com/rust-lang/rust/issues/92391.
29pub const fn max(a: usize, b: usize) -> usize {
30 match a > b {
31 true => a,
32 false => b,
33 }
34}
35
36pub trait Heartbeat<N: Network>: Outbound<N> {
37 /// The duration in seconds to sleep in between heartbeat executions.
38 const HEARTBEAT_IN_SECS: u64 = 25; // 25 seconds
39 /// The minimum number of peers required to maintain connections with.
40 const MINIMUM_NUMBER_OF_PEERS: usize = 3;
41 /// The median number of peers to maintain connections with.
42 const MEDIAN_NUMBER_OF_PEERS: usize = max(Self::MAXIMUM_NUMBER_OF_PEERS / 2, Self::MINIMUM_NUMBER_OF_PEERS);
43 /// The maximum number of peers permitted to maintain connections with.
44 const MAXIMUM_NUMBER_OF_PEERS: usize = 21;
45 /// The maximum number of provers to maintain connections with.
46 const MAXIMUM_NUMBER_OF_PROVERS: usize = Self::MAXIMUM_NUMBER_OF_PEERS / 4;
47 /// The amount of time an IP address is prohibited from connecting.
48 const IP_BAN_TIME_IN_SECS: u64 = 300;
49
50 /// Handles the heartbeat request.
51 fn heartbeat(&self) {
52 self.safety_check_minimum_number_of_peers();
53 self.log_connected_peers();
54
55 // Remove any stale connected peers.
56 self.remove_stale_connected_peers();
57 // Remove the oldest connected peer.
58 self.remove_oldest_connected_peer();
59 // Keep the number of connected peers within the allowed range.
60 self.handle_connected_peers();
61 // Keep the bootstrap peers within the allowed range.
62 self.handle_bootstrap_peers();
63 // Keep the trusted peers connected.
64 self.handle_trusted_peers();
65 // Keep the puzzle request up to date.
66 self.handle_puzzle_request();
67 // Unban any addresses whose ban time has expired.
68 self.handle_banned_ips();
69 }
70
71 /// TODO (howardwu): Consider checking minimum number of validators, to exclude clients and provers.
72 /// This function performs safety checks on the setting for the minimum number of peers.
73 fn safety_check_minimum_number_of_peers(&self) {
74 // Perform basic sanity checks on the configuration for the number of peers.
75 assert!(Self::MINIMUM_NUMBER_OF_PEERS >= 1, "The minimum number of peers must be at least 1.");
76 assert!(Self::MINIMUM_NUMBER_OF_PEERS <= Self::MAXIMUM_NUMBER_OF_PEERS);
77 assert!(Self::MINIMUM_NUMBER_OF_PEERS <= Self::MEDIAN_NUMBER_OF_PEERS);
78 assert!(Self::MEDIAN_NUMBER_OF_PEERS <= Self::MAXIMUM_NUMBER_OF_PEERS);
79 assert!(Self::MAXIMUM_NUMBER_OF_PROVERS <= Self::MAXIMUM_NUMBER_OF_PEERS);
80 }
81
82 /// This function logs the connected peers.
83 fn log_connected_peers(&self) {
84 // Log the connected peers.
85 let connected_peers = self.router().connected_peers();
86 let connected_peers_fmt = format!("{connected_peers:?}").dimmed();
87 match connected_peers.len() {
88 0 => debug!("No connected peers"),
89 1 => debug!("Connected to 1 peer: {connected_peers_fmt}"),
90 num_connected => debug!("Connected to {num_connected} peers {connected_peers_fmt}"),
91 }
92 }
93
94 /// This function removes any connected peers that have not communicated within the predefined time.
95 fn remove_stale_connected_peers(&self) {
96 // Check if any connected peer is stale.
97 for peer in self.router().get_connected_peers() {
98 // Disconnect if the peer has not communicated back within the predefined time.
99 let elapsed = peer.last_seen().elapsed().as_secs();
100 if elapsed > Router::<N>::RADIO_SILENCE_IN_SECS {
101 warn!("Peer {} has not communicated in {elapsed} seconds", peer.ip());
102 // Disconnect from this peer.
103 self.router().disconnect(peer.ip());
104 }
105 }
106 }
107
108 /// Returns a sorted vector of network addresess of all removable connected peers
109 /// where the first entry has the lowest priority andthe last one the highest.
110 ///
111 /// Rules:
112 /// - Trusted peers and bootstrap nodes are not removable.
113 /// - Peers that we are currently syncing with are not remeovable.
114 /// - Validators are considered higher priority than provers or clients.
115 /// - Connections that have not been seen in a while are considered lower priority.
116 fn get_removable_peers(&self) -> Vec<Peer<N>> {
117 // The trusted peers (specified at runtime).
118 let trusted = self.router().trusted_peers();
119 // The hardcoded bootstrap nodes.
120 let bootstrap = self.router().bootstrap_peers();
121 // Are we synced already? (cache this here, so it does not need to be recomputed)
122 let is_block_synced = self.is_block_synced();
123
124 // Sort by priority, where lowest priority will be at the beginning
125 // of the vector.
126 // Note, that this gives equal priority to clients and provers, which
127 // we might want to change in the future.
128 let mut peers = self.router().get_connected_peers();
129 peers.sort_by_key(|peer| (peer.is_validator(), peer.last_seen()));
130
131 // Deterimine which of the peers can be removed.
132 peers
133 .into_iter()
134 .filter(|peer| {
135 !trusted.contains(&peer.ip()) // Always keep trusted nodes.
136 && !bootstrap.contains(&peer.ip()) // Always keep bootstrap nodes.
137 && !self.router().cache.contains_inbound_block_request(&peer.ip()) // This peer is currently syncing from us.
138 && (is_block_synced || self.router().cache.num_outbound_block_requests(&peer.ip()) == 0) // We are currently syncing from this peer.
139 })
140 .collect()
141 }
142
143 /// This function removes the peer that we have not heard from the longest,
144 /// to keep the connections fresh.
145 /// It only triggers if the router is above the minimum number of connected peers.
146 fn remove_oldest_connected_peer(&self) {
147 // Skip if the router is at or below the minimum number of connected peers.
148 if self.router().number_of_connected_peers() <= Self::MINIMUM_NUMBER_OF_PEERS {
149 return;
150 }
151
152 // Skip if the node is not requesting peers.
153 if !self.router().allow_external_peers() {
154 return;
155 }
156
157 // Disconnect from the oldest connected peer, which is the first entry in the list
158 // of removable peers.
159 // Do nothing, if the list is empty.
160 if let Some(oldest) = self.get_removable_peers().first().map(|peer| peer.ip()) {
161 info!("Disconnecting from '{oldest}' (periodic refresh of peers)");
162 let _ = self.send(oldest, Message::Disconnect(DisconnectReason::PeerRefresh.into()));
163 self.router().disconnect(oldest);
164 }
165 }
166
167 /// This function keeps the number of connected peers within the allowed range.
168 fn handle_connected_peers(&self) {
169 // Initialize an RNG.
170 let rng = &mut OsRng;
171
172 // Obtain the number of connected peers.
173 let num_connected = self.router().number_of_connected_peers();
174 // Obtain the number of connected provers.
175 let num_connected_provers = self.router().number_of_connected_provers();
176
177 // Consider rotating more external peers every ~10 heartbeats.
178 let reduce_peers = self.router().rotate_external_peers() && rng.gen_range(0..10) == 0;
179 // Determine the maximum number of peers and provers to keep.
180 let (max_peers, max_provers) = if reduce_peers {
181 (Self::MEDIAN_NUMBER_OF_PEERS, 0)
182 } else {
183 (Self::MAXIMUM_NUMBER_OF_PEERS, Self::MAXIMUM_NUMBER_OF_PROVERS)
184 };
185
186 // Compute the number of surplus peers.
187 let num_surplus_peers = num_connected.saturating_sub(max_peers);
188 // Compute the number of surplus provers.
189 let num_surplus_provers = num_connected_provers.saturating_sub(max_provers);
190 // Compute the number of provers remaining connected.
191 let num_remaining_provers = num_connected_provers.saturating_sub(num_surplus_provers);
192 // Compute the number of surplus clients and validators.
193 let num_surplus_clients_validators = num_surplus_peers.saturating_sub(num_remaining_provers);
194
195 if num_surplus_provers > 0 || num_surplus_clients_validators > 0 {
196 debug!(
197 "Exceeded maximum number of connected peers, disconnecting from ({num_surplus_provers} + {num_surplus_clients_validators}) peers"
198 );
199
200 // Retrieve the trusted peers.
201 let trusted = self.router().trusted_peers();
202 // Retrieve the bootstrap peers.
203 let bootstrap = self.router().bootstrap_peers();
204
205 // Determine the provers to disconnect from.
206 let provers_to_disconnect = self
207 .router()
208 .connected_provers()
209 .into_iter()
210 .filter(|peer_ip| !trusted.contains(peer_ip) && !bootstrap.contains(peer_ip))
211 .choose_multiple(rng, num_surplus_provers);
212
213 // Determine the clients and validators to disconnect from.
214 let peers_to_disconnect = self
215 .get_removable_peers()
216 .into_iter()
217 .filter(|peer| !peer.is_prover()) // remove provers as those are handled seperately
218 .map(|p| p.ip())
219 .take(num_surplus_clients_validators);
220
221 // Proceed to send disconnect requests to these peers.
222 for peer_ip in peers_to_disconnect.chain(provers_to_disconnect) {
223 // TODO (howardwu): Remove this after specializing this function.
224 if self.router().node_type().is_prover() {
225 if let Some(peer) = self.router().get_connected_peer(&peer_ip) {
226 if peer.node_type().is_validator() {
227 continue;
228 }
229 }
230 }
231
232 info!("Disconnecting from '{peer_ip}' (exceeded maximum connections)");
233 self.send(peer_ip, Message::Disconnect(DisconnectReason::TooManyPeers.into()));
234 // Disconnect from this peer.
235 self.router().disconnect(peer_ip);
236 }
237 }
238
239 // Obtain the number of connected peers.
240 let num_connected = self.router().number_of_connected_peers();
241 // Compute the number of deficit peers.
242 let num_deficient = Self::MEDIAN_NUMBER_OF_PEERS.saturating_sub(num_connected);
243
244 if num_deficient > 0 {
245 // Initialize an RNG.
246 let rng = &mut OsRng;
247
248 // Attempt to connect to more peers.
249 for peer_ip in self.router().candidate_peers().into_iter().choose_multiple(rng, num_deficient) {
250 self.router().connect(peer_ip);
251 }
252
253 if self.router().allow_external_peers() {
254 // Request more peers from the connected peers.
255 for peer_ip in self.router().connected_peers().into_iter().choose_multiple(rng, 3) {
256 self.send(peer_ip, Message::PeerRequest(PeerRequest));
257 }
258 }
259 }
260 }
261
262 /// This function keeps the number of bootstrap peers within the allowed range.
263 fn handle_bootstrap_peers(&self) {
264 // Split the bootstrap peers into connected and candidate lists.
265 let mut connected_bootstrap = Vec::new();
266 let mut candidate_bootstrap = Vec::new();
267 for bootstrap_ip in self.router().bootstrap_peers() {
268 match self.router().is_connected(&bootstrap_ip) {
269 true => connected_bootstrap.push(bootstrap_ip),
270 false => candidate_bootstrap.push(bootstrap_ip),
271 }
272 }
273 // If there are not enough connected bootstrap peers, connect to more.
274 if connected_bootstrap.is_empty() {
275 // Initialize an RNG.
276 let rng = &mut OsRng;
277 // Attempt to connect to a bootstrap peer.
278 if let Some(peer_ip) = candidate_bootstrap.into_iter().choose(rng) {
279 self.router().connect(peer_ip);
280 }
281 }
282 // Determine if the node is connected to more bootstrap peers than allowed.
283 let num_surplus = connected_bootstrap.len().saturating_sub(1);
284 if num_surplus > 0 {
285 // Initialize an RNG.
286 let rng = &mut OsRng;
287 // Proceed to send disconnect requests to these bootstrap peers.
288 for peer_ip in connected_bootstrap.into_iter().choose_multiple(rng, num_surplus) {
289 info!("Disconnecting from '{peer_ip}' (exceeded maximum bootstrap)");
290 self.send(peer_ip, Message::Disconnect(DisconnectReason::TooManyPeers.into()));
291 // Disconnect from this peer.
292 self.router().disconnect(peer_ip);
293 }
294 }
295 }
296
297 /// This function attempts to connect to any disconnected trusted peers.
298 fn handle_trusted_peers(&self) {
299 // Ensure that the trusted nodes are connected.
300 for peer_ip in self.router().trusted_peers() {
301 // If the peer is not connected, attempt to connect to it.
302 if !self.router().is_connected(peer_ip) {
303 debug!("Attempting to (re-)connect to trusted peer `{peer_ip}`");
304 self.router().connect(*peer_ip);
305 }
306 }
307 }
308
309 /// This function updates the puzzle if network has updated.
310 fn handle_puzzle_request(&self) {
311 // No-op
312 }
313
314 // Remove addresses whose ban time has expired.
315 fn handle_banned_ips(&self) {
316 self.tcp().banned_peers().remove_old_bans(Self::IP_BAN_TIME_IN_SECS);
317 }
318}