1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
use super::{
config::Config,
fetcher::{Config as FetcherConfig, Fetcher},
ingress::{FetchRequest, Mailbox, Message},
metrics, wire, Producer,
};
use crate::Consumer;
use bytes::Bytes;
use commonware_cryptography::PublicKey;
use commonware_macros::select_loop;
use commonware_p2p::{
utils::codec::{wrap, WrappedSender},
Blocker, Provider, Receiver, Recipients, Sender,
};
use commonware_runtime::{
spawn_cell,
telemetry::metrics::{
histogram,
status::{CounterExt, GaugeExt, Status},
},
BufferPooler, Clock, ContextCell, Handle, Metrics, Spawner,
};
use commonware_utils::{
channel::{mpsc, oneshot},
futures::Pool as FuturesPool,
Span,
};
use futures::future::{self, Either};
use rand::Rng;
use std::{collections::HashMap, marker::PhantomData};
use tracing::{debug, error, trace, warn};
/// Represents a pending serve operation.
struct Serve<E: Clock, P: PublicKey> {
timer: histogram::Timer<E>,
peer: P,
id: u64,
result: Result<Bytes, oneshot::error::RecvError>,
}
/// Manages incoming and outgoing P2P requests, coordinating fetch and serve operations.
pub struct Engine<
E: BufferPooler + Clock + Spawner + Rng + Metrics,
P: PublicKey,
D: Provider<PublicKey = P>,
B: Blocker<PublicKey = P>,
Key: Span,
Con: Consumer<Key = Key, Value = Bytes, Failure = ()>,
Pro: Producer<Key = Key>,
NetS: Sender<PublicKey = P>,
NetR: Receiver<PublicKey = P>,
> {
/// Context used to spawn tasks, manage time, etc.
context: ContextCell<E>,
/// Consumes data that is fetched from the network
consumer: Con,
/// Produces data for incoming requests
producer: Pro,
/// Manages the list of peers that can be used to fetch data
peer_provider: D,
/// The blocker that will be used to block peers that send invalid responses
blocker: B,
/// Used to detect changes in the peer set
last_peer_set_id: Option<u64>,
/// Mailbox that makes and cancels fetch requests
mailbox: mpsc::Receiver<Message<Key, P>>,
/// Manages outgoing fetch requests
fetcher: Fetcher<E, P, Key, NetS>,
/// Track the start time of fetch operations
fetch_timers: HashMap<Key, histogram::Timer<E>>,
/// Holds futures that resolve once the `Producer` has produced the data.
/// Once the future is resolved, the data (or an error) is sent to the peer.
/// Has unbounded size; the number of concurrent requests should be limited
/// by the `Producer` which may drop requests.
serves: FuturesPool<Serve<E, P>>,
/// Whether responses are sent with priority over other network messages
priority_responses: bool,
/// Metrics for the peer actor
metrics: metrics::Metrics<E>,
/// Phantom data for networking types
_r: PhantomData<NetR>,
}
impl<
E: BufferPooler + Clock + Spawner + Rng + Metrics,
P: PublicKey,
D: Provider<PublicKey = P>,
B: Blocker<PublicKey = P>,
Key: Span,
Con: Consumer<Key = Key, Value = Bytes, Failure = ()>,
Pro: Producer<Key = Key>,
NetS: Sender<PublicKey = P>,
NetR: Receiver<PublicKey = P>,
> Engine<E, P, D, B, Key, Con, Pro, NetS, NetR>
{
/// Creates a new `Actor` with the given configuration.
///
/// Returns the actor and a mailbox to send messages to it.
pub fn new(context: E, cfg: Config<P, D, B, Key, Con, Pro>) -> (Self, Mailbox<Key, P>) {
let (sender, receiver) = mpsc::channel(cfg.mailbox_size);
// TODO(#1833): Metrics should use the post-start context
let metrics = metrics::Metrics::init(context.clone());
let fetcher = Fetcher::new(
context.with_label("fetcher"),
FetcherConfig {
me: cfg.me,
initial: cfg.initial,
timeout: cfg.timeout,
retry_timeout: cfg.fetch_retry_timeout,
priority_requests: cfg.priority_requests,
},
);
(
Self {
context: ContextCell::new(context),
consumer: cfg.consumer,
producer: cfg.producer,
peer_provider: cfg.peer_provider,
blocker: cfg.blocker,
last_peer_set_id: None,
mailbox: receiver,
fetcher,
serves: FuturesPool::default(),
priority_responses: cfg.priority_responses,
metrics,
fetch_timers: HashMap::new(),
_r: PhantomData,
},
Mailbox::new(sender),
)
}
/// Runs the actor until the context is stopped.
///
/// The actor will handle:
/// - Fetching data from other peers and notifying the `Consumer`
/// - Serving data to other peers by requesting it from the `Producer`
pub fn start(mut self, network: (NetS, NetR)) -> Handle<()> {
spawn_cell!(self.context, self.run(network).await)
}
/// Inner run loop called by `start`.
async fn run(mut self, network: (NetS, NetR)) {
// Wrap channel
let (mut sender, mut receiver) = wrap(
(),
self.context.network_buffer_pool().clone(),
network.0,
network.1,
);
let peer_set_subscription = &mut self.peer_provider.subscribe().await;
select_loop! {
self.context,
on_start => {
// Update metrics
let _ = self
.metrics
.fetch_pending
.try_set(self.fetcher.len_pending());
let _ = self.metrics.fetch_active.try_set(self.fetcher.len_active());
let _ = self
.metrics
.peers_blocked
.try_set(self.fetcher.len_blocked());
let _ = self.metrics.serve_processing.try_set(self.serves.len());
// Get retry timeout (if any)
let deadline_pending = match self.fetcher.get_pending_deadline() {
Some(deadline) => Either::Left(self.context.sleep_until(deadline)),
None => Either::Right(future::pending()),
};
// Get requester timeout (if any)
let deadline_active = match self.fetcher.get_active_deadline() {
Some(deadline) => Either::Left(self.context.sleep_until(deadline)),
None => Either::Right(future::pending()),
};
},
on_stopped => {
debug!("shutdown");
self.serves.cancel_all();
},
// Handle peer set updates
Some(update) = peer_set_subscription.recv() else {
debug!("peer set subscription closed");
return;
} => {
if self.last_peer_set_id < Some(update.index) {
self.last_peer_set_id = Some(update.index);
self.fetcher.reconcile(update.latest.primary.as_ref());
}
},
// Handle active deadline
_ = deadline_active => {
if let Some(key) = self.fetcher.pop_active() {
debug!(?key, "requester timeout");
self.metrics.fetch.inc(Status::Failure);
self.fetcher.add_retry(key);
}
},
// Handle pending deadline
_ = deadline_pending => {
self.fetcher.fetch(&mut sender).await;
},
// Handle mailbox messages
Some(msg) = self.mailbox.recv() else {
error!("mailbox closed");
return;
} => {
match msg {
Message::Fetch(requests) => {
for FetchRequest { key, targets } in requests {
trace!(?key, "mailbox: fetch");
// Check if the fetch is already in progress
let is_new = !self.fetch_timers.contains_key(&key);
// Update targets
match targets {
Some(targets) => {
// Only add targets if this is a new fetch OR the existing
// fetch already has targets. Don't restrict an "all" fetch
// (no targets) to specific targets.
if is_new || self.fetcher.has_targets(&key) {
self.fetcher.add_targets(key.clone(), targets);
}
}
None => self.fetcher.clear_targets(&key),
}
// Only start new fetch if not already in progress
if is_new {
self.fetch_timers
.insert(key.clone(), self.metrics.fetch_duration.timer());
self.fetcher.add_ready(key);
} else {
trace!(?key, "updated targets for existing fetch");
}
}
}
Message::Cancel { key } => {
trace!(?key, "mailbox: cancel");
let mut guard = self.metrics.cancel.guard(Status::Dropped);
if self.fetcher.cancel(&key) {
guard.set(Status::Success);
self.fetch_timers.remove(&key).unwrap().cancel(); // must exist, don't record metric
self.consumer.failed(key.clone(), ()).await;
}
}
Message::Retain { predicate } => {
trace!("mailbox: retain");
// Remove from fetcher
self.fetcher.retain(&predicate);
// Clean up timers and notify consumer
let before = self.fetch_timers.len();
let removed = self
.fetch_timers
.extract_if(|k, _| !predicate(k))
.collect::<Vec<_>>();
for (key, timer) in removed {
timer.cancel();
self.consumer.failed(key, ()).await;
}
// Metrics
let removed = (before - self.fetch_timers.len()) as u64;
if removed == 0 {
self.metrics.cancel.inc(Status::Dropped);
} else {
self.metrics.cancel.inc_by(Status::Success, removed);
}
}
Message::Clear => {
trace!("mailbox: clear");
// Clear fetcher
self.fetcher.clear();
// Drain timers and notify consumer
let removed = self.fetch_timers.len() as u64;
for (key, timer) in self.fetch_timers.drain() {
timer.cancel();
self.consumer.failed(key, ()).await;
}
// Metrics
if removed == 0 {
self.metrics.cancel.inc(Status::Dropped);
} else {
self.metrics.cancel.inc_by(Status::Success, removed);
}
}
}
assert_eq!(self.fetcher.len(), self.fetch_timers.len());
},
// Handle completed server requests
serve = self.serves.next_completed() => {
let Serve {
timer,
peer,
id,
result,
} = serve;
// Metrics and logs
match result {
Ok(_) => {
self.metrics.serve.inc(Status::Success);
}
Err(ref err) => {
debug!(?err, ?peer, ?id, "serve failed");
timer.cancel();
self.metrics.serve.inc(Status::Failure);
}
}
// Send response to peer
self.handle_serve(&mut sender, peer, id, result, self.priority_responses)
.await;
},
// Handle network messages
msg = receiver.recv() => {
// Break if the receiver is closed
let (peer, msg) = match msg {
Ok(msg) => msg,
Err(err) => {
error!(?err, "receiver closed");
return;
}
};
// Skip if there is a decoding error
let msg = match msg {
Ok(msg) => msg,
Err(err) => {
trace!(?err, ?peer, "decode failed");
continue;
}
};
match msg.payload {
wire::Payload::Request(key) => self.handle_network_request(peer, msg.id, key),
wire::Payload::Response(response) => {
self.handle_network_response(peer, msg.id, response).await
}
wire::Payload::Error => self.handle_network_error_response(peer, msg.id),
};
},
}
}
/// Handles the case where the application responds to a request from an external peer.
async fn handle_serve(
&mut self,
sender: &mut WrappedSender<NetS, wire::Message<Key>>,
peer: P,
id: u64,
response: Result<Bytes, oneshot::error::RecvError>,
priority: bool,
) {
// Encode message
let payload: wire::Payload<Key> = response.map_or_else(
|_| wire::Payload::Error,
|data| wire::Payload::Response(data),
);
let msg = wire::Message { id, payload };
// Send message to peer
let result = sender
.send(Recipients::One(peer.clone()), msg, priority)
.await;
// Log result, but do not handle errors
match result {
Err(err) => error!(?err, ?peer, ?id, "serve send failed"),
Ok(to) if to.is_empty() => warn!(?peer, ?id, "serve send failed"),
Ok(_) => trace!(?peer, ?id, "serve sent"),
};
}
/// Handle a network request from a peer.
fn handle_network_request(&mut self, peer: P, id: u64, key: Key) {
// Serve the request
trace!(?peer, ?id, "peer request");
let mut producer = self.producer.clone();
let timer = self.metrics.serve_duration.timer();
self.serves.push(async move {
let receiver = producer.produce(key).await;
let result = receiver.await;
Serve {
timer,
peer,
id,
result,
}
});
}
/// Handle a network response from a peer.
async fn handle_network_response(&mut self, peer: P, id: u64, response: Bytes) {
trace!(?peer, ?id, "peer response: data");
// Get the key associated with the response, if any
let Some(key) = self.fetcher.pop_by_id(id, &peer, true) else {
// It's possible that the key does not exist if the request was canceled
return;
};
// The peer had the data, so we can deliver it to the consumer
if self.consumer.deliver(key.clone(), response).await {
// Record metrics
self.metrics.fetch.inc(Status::Success);
self.fetch_timers.remove(&key).unwrap(); // must exist in the map, records metric on drop
// Clear all targets for this key
self.fetcher.clear_targets(&key);
return;
}
// If the data is invalid, we need to block the peer and try again
// (blocking the peer also removes any targets associated with it)
commonware_p2p::block!(self.blocker, peer.clone(), "invalid data received");
self.fetcher.block(peer);
self.metrics.fetch.inc(Status::Failure);
self.fetcher.add_retry(key);
}
/// Handle a network response from a peer that did not have the data.
fn handle_network_error_response(&mut self, peer: P, id: u64) {
trace!(?peer, ?id, "peer response: error");
// Get the key associated with the response, if any
let Some(key) = self.fetcher.pop_by_id(id, &peer, false) else {
// It's possible that the key does not exist if the request was canceled
return;
};
// The peer did not have the data, so we need to try again
self.metrics.fetch.inc(Status::Failure);
self.fetcher.add_retry(key);
}
}