1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
#![allow(dead_code)]
use std::{fmt::Debug, future::Future, hash::Hash};
use n0_future::{future, FuturesUnordered};
use tokio::sync::{mpsc, oneshot};
/// Trait to reset an entity state in place.
///
/// In many cases this is just assigning the default value, but e.g. for an
/// `Arc<Mutex<T>>` resetting to the default value means an allocation, whereas
/// reset can be done without.
pub trait Reset: Default {
/// Reset the state to its default value.
fn reset(&mut self);
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ShutdownCause {
/// The entity is shutting down gracefully because the entity is idle.
Idle,
/// The entity is shutting down because the entity manager is shutting down.
Soft,
/// The entity is shutting down because the sender was dropped.
Drop,
}
/// Parameters for the entity manager system.
pub trait Params: Send + Sync + 'static {
/// Entity id type.
///
/// This does not require Copy to allow for more complex types, such as `String`,
/// but you have to make sure that ids are small and cheap to clone, since they are
/// used as keys in maps.
type EntityId: Debug + Hash + Eq + Clone + Send + Sync + 'static;
/// Global state type.
///
/// This is passed into all entity actors. It also needs to be cheap handle.
/// If you don't need it, just set it to `()`.
type GlobalState: Debug + Clone + Send + Sync + 'static;
/// Entity state type.
///
/// This is the actual distinct per-entity state. This needs to implement
/// `Default` and a matching `Reset`. It also needs to implement `Clone`
/// since we unfortunately need to pass an owned copy of the state to the
/// callback - otherwise we run into some rust lifetime limitations
/// <https://github.com/rust-lang/rust/issues/100013>.
///
/// Frequently this is an `Arc<Mutex<T>>` or similar. Note that per entity
/// access is concurrent but not parallel, so you can use a more efficient
/// synchronization primitive like [`AtomicRefCell`](https://crates.io/crates/atomic_refcell) if you want to.
type EntityState: Default + Debug + Reset + Clone + Send + Sync + 'static;
/// Function being called when an entity actor is shutting down.
fn on_shutdown(
state: entity_actor::State<Self>,
cause: ShutdownCause,
) -> impl Future<Output = ()> + Send + 'static
where
Self: Sized;
}
/// Sent to the main actor and then delegated to the entity actor to spawn a new task.
pub(crate) struct Spawn<P: Params> {
id: P::EntityId,
f: Box<dyn FnOnce(SpawnArg<P>) -> future::Boxed<()> + Send>,
}
pub(crate) struct EntityShutdown;
/// Argument for the `EntityManager::spawn` function.
pub enum SpawnArg<P: Params> {
/// The entity is active, and we were able to spawn a task.
Active(ActiveEntityState<P>),
/// The entity is busy and cannot spawn a new task.
Busy,
/// The entity is dead.
Dead,
}
/// Sent from the entity actor to the main actor to notify that it is shutting down.
///
/// With this message the entity actor gives back the receiver for its command channel,
/// so it can be reusd either immediately if commands come in during shutdown, or later
/// if the entity actor is reused for a different entity.
struct Shutdown<P: Params> {
id: P::EntityId,
receiver: mpsc::Receiver<entity_actor::Command<P>>,
}
struct ShutdownAll {
tx: oneshot::Sender<()>,
}
/// Sent from the main actor to the entity actor to notify that it has completed shutdown.
///
/// With this message the entity actor sends back the remaining state. The tasks set
/// at this point must be empty, as the entity actor has already completed all tasks.
struct ShutdownComplete<P: Params> {
state: ActiveEntityState<P>,
tasks: FuturesUnordered<future::Boxed<()>>,
}
mod entity_actor {
#![allow(dead_code)]
use n0_future::{future, FuturesUnordered, StreamExt};
use tokio::sync::mpsc;
use super::{
EntityShutdown, Params, Reset, Shutdown, ShutdownCause, ShutdownComplete, Spawn, SpawnArg,
};
/// State of an active entity.
#[derive(Debug)]
pub struct State<P: Params> {
/// The entity id.
pub id: P::EntityId,
/// A copy of the global state.
pub global: P::GlobalState,
/// The per-entity state which might have internal mutability.
pub state: P::EntityState,
}
impl<P: Params> Clone for State<P> {
fn clone(&self) -> Self {
Self {
id: self.id.clone(),
global: self.global.clone(),
state: self.state.clone(),
}
}
}
pub enum Command<P: Params> {
Spawn(Spawn<P>),
EntityShutdown(EntityShutdown),
}
impl<P: Params> From<EntityShutdown> for Command<P> {
fn from(_: EntityShutdown) -> Self {
Self::EntityShutdown(EntityShutdown)
}
}
#[derive(Debug)]
pub struct Actor<P: Params> {
pub recv: mpsc::Receiver<Command<P>>,
pub main: mpsc::Sender<super::main_actor::InternalCommand<P>>,
pub state: State<P>,
pub tasks: FuturesUnordered<future::Boxed<()>>,
}
impl<P: Params> Actor<P> {
pub async fn run(mut self) {
loop {
tokio::select! {
command = self.recv.recv() => {
let Some(command) = command else {
// Channel closed, this means that the main actor is shutting down.
self.drop_shutdown_state().await;
break;
};
match command {
Command::Spawn(spawn) => {
let task = (spawn.f)(SpawnArg::Active(self.state.clone()));
self.tasks.push(task);
}
Command::EntityShutdown(_) => {
self.soft_shutdown_state().await;
break;
}
}
}
Some(_) = self.tasks.next(), if !self.tasks.is_empty() => {}
}
if self.tasks.is_empty() && self.recv.is_empty() {
// No more tasks and no more commands, we can recycle the actor.
self.recycle_state().await;
break; // Exit the loop, actor is done.
}
}
}
/// drop shutdown state.
///
/// All senders for our receive channel were dropped, so we shut down without waiting for any tasks to complete.
async fn drop_shutdown_state(self) {
let Self { state, .. } = self;
P::on_shutdown(state, ShutdownCause::Drop).await;
}
/// Soft shutdown state.
///
/// We have received an explicit shutdown command, so we wait for all tasks to complete and then call the shutdown function.
async fn soft_shutdown_state(mut self) {
while (self.tasks.next().await).is_some() {}
P::on_shutdown(self.state.clone(), ShutdownCause::Soft).await;
}
async fn recycle_state(self) {
// we can't check if recv is empty here, since new messages might come in while we are in recycle_state.
assert!(
self.tasks.is_empty(),
"Tasks must be empty before recycling"
);
// notify main actor that we are starting to shut down.
// if the main actor is shutting down, this could fail, but we don't care.
self.main
.send(
Shutdown {
id: self.state.id.clone(),
receiver: self.recv,
}
.into(),
)
.await
.ok();
P::on_shutdown(self.state.clone(), ShutdownCause::Idle).await;
// Notify the main actor that we have completed shutdown.
// here we also give back the rest of ourselves so the main actor can recycle us.
self.main
.send(
ShutdownComplete {
state: self.state,
tasks: self.tasks,
}
.into(),
)
.await
.ok();
}
/// Recycle the actor for reuse by setting its state to default.
///
/// This also checks several invariants:
/// - There must be no pending messages in the receive channel.
/// - The sender must have a strong count of 1, meaning no other references exist
/// - The tasks set must be empty, meaning no tasks are running.
/// - The global state must match the scope provided.
/// - The state must be unique to the actor, meaning no other references exist.
pub fn recycle(&mut self) {
assert!(
self.recv.is_empty(),
"Cannot recycle actor with pending messages"
);
assert!(
self.recv.sender_strong_count() == 1,
"There must be only one sender left"
);
assert!(
self.tasks.is_empty(),
"Tasks must be empty before recycling"
);
self.state.state.reset();
}
}
}
pub use entity_actor::State as ActiveEntityState;
pub use main_actor::ActorState as EntityManagerState;
mod main_actor {
#![allow(dead_code)]
use std::{collections::HashMap, future::Future};
use n0_future::{future, FuturesUnordered};
use tokio::{sync::mpsc, task::JoinSet};
use tracing::{error, warn};
use super::{
entity_actor, EntityShutdown, Params, Reset, Shutdown, ShutdownAll, ShutdownComplete,
Spawn, SpawnArg,
};
pub(super) enum Command<P: Params> {
Spawn(Spawn<P>),
ShutdownAll(ShutdownAll),
}
impl<P: Params> From<ShutdownAll> for Command<P> {
fn from(shutdown_all: ShutdownAll) -> Self {
Self::ShutdownAll(shutdown_all)
}
}
pub(super) enum InternalCommand<P: Params> {
ShutdownComplete(ShutdownComplete<P>),
Shutdown(Shutdown<P>),
}
impl<P: Params> From<Shutdown<P>> for InternalCommand<P> {
fn from(shutdown: Shutdown<P>) -> Self {
Self::Shutdown(shutdown)
}
}
impl<P: Params> From<ShutdownComplete<P>> for InternalCommand<P> {
fn from(shutdown_complete: ShutdownComplete<P>) -> Self {
Self::ShutdownComplete(shutdown_complete)
}
}
#[derive(Debug)]
pub enum EntityHandle<P: Params> {
/// A running entity actor.
Live {
send: mpsc::Sender<entity_actor::Command<P>>,
},
ShuttingDown {
send: mpsc::Sender<entity_actor::Command<P>>,
recv: mpsc::Receiver<entity_actor::Command<P>>,
},
}
impl<P: Params> EntityHandle<P> {
pub fn send(&self) -> &mpsc::Sender<entity_actor::Command<P>> {
match self {
EntityHandle::Live { send } => send,
EntityHandle::ShuttingDown { send, .. } => send,
}
}
}
/// State machine for an entity actor manager.
///
/// This is if you don't want a separate manager actor, but want to inline the entity
/// actor management into your main actor.
#[derive(Debug)]
pub struct ActorState<P: Params> {
/// Channel to receive internal commands from the entity actors.
/// This channel will never be closed since we also hold a sender to it.
internal_recv: mpsc::Receiver<InternalCommand<P>>,
/// Channel to send internal commands to ourselves, to hand out to entity actors.
internal_send: mpsc::Sender<InternalCommand<P>>,
/// Map of live entity actors.
live: HashMap<P::EntityId, EntityHandle<P>>,
/// Global state shared across all entity actors.
state: P::GlobalState,
/// Pool of inactive entity actors to reuse.
pool: Vec<(
mpsc::Sender<entity_actor::Command<P>>,
entity_actor::Actor<P>,
)>,
/// Maximum size of the inbox of an entity actor.
entity_inbox_size: usize,
/// Initial capacity of the futures set for entity actors.
entity_futures_initial_capacity: usize,
}
impl<P: Params> ActorState<P> {
pub fn new(
state: P::GlobalState,
pool_capacity: usize,
entity_inbox_size: usize,
entity_response_inbox_size: usize,
entity_futures_initial_capacity: usize,
) -> Self {
let (internal_send, internal_recv) = mpsc::channel(entity_response_inbox_size);
Self {
internal_recv,
internal_send,
live: HashMap::new(),
state,
pool: Vec::with_capacity(pool_capacity),
entity_inbox_size,
entity_futures_initial_capacity,
}
}
#[must_use = "this function may return a future that must be spawned by the caller"]
/// Friendly version of `spawn_boxed` that does the boxing
pub async fn spawn<F, Fut>(
&mut self,
id: P::EntityId,
f: F,
) -> Option<impl Future<Output = ()> + Send + 'static>
where
F: FnOnce(SpawnArg<P>) -> Fut + Send + 'static,
Fut: Future<Output = ()> + Send + 'static,
{
self.spawn_boxed(
id,
Box::new(|x| {
Box::pin(async move {
f(x).await;
})
}),
)
.await
}
#[must_use = "this function may return a future that must be spawned by the caller"]
pub async fn spawn_boxed(
&mut self,
id: P::EntityId,
f: Box<dyn FnOnce(SpawnArg<P>) -> future::Boxed<()> + Send>,
) -> Option<impl Future<Output = ()> + Send + 'static> {
let (entity_handle, task) = self.get_or_create(id.clone());
let sender = entity_handle.send();
if let Err(e) =
sender.try_send(entity_actor::Command::Spawn(Spawn { id: id.clone(), f }))
{
match e {
mpsc::error::TrySendError::Full(cmd) => {
let entity_actor::Command::Spawn(spawn) = cmd else {
unreachable!()
};
warn!(
"Entity actor inbox is full, cannot send command to entity actor {:?}.",
id
);
// we await in the select here, but I think this is fine, since the actor is busy.
// maybe slowing things down a bit is helpful.
(spawn.f)(SpawnArg::Busy).await;
}
mpsc::error::TrySendError::Closed(cmd) => {
let entity_actor::Command::Spawn(spawn) = cmd else {
unreachable!()
};
error!(
"Entity actor inbox is closed, cannot send command to entity actor {:?}.",
id
);
// give the caller a chance to react to this bad news.
// at this point we are in trouble anyway, so awaiting is going to be the least of our problems.
(spawn.f)(SpawnArg::Dead).await;
}
}
};
task
}
/// This function needs to be polled by the owner of the actor state to advance the
/// entity manager state machine. If it returns a future, that future must be spawned
/// by the caller.
#[must_use = "this function may return a future that must be spawned by the caller"]
pub async fn tick(&mut self) -> Option<impl Future<Output = ()> + Send + 'static> {
if let Some(cmd) = self.internal_recv.recv().await {
match cmd {
InternalCommand::Shutdown(Shutdown { id, receiver }) => {
let Some(entity_handle) = self.live.remove(&id) else {
error!("Received shutdown command for unknown entity actor {id:?}");
return None;
};
let EntityHandle::Live { send } = entity_handle else {
error!(
"Received shutdown command for entity actor {id:?} that is already shutting down"
);
return None;
};
self.live.insert(
id.clone(),
EntityHandle::ShuttingDown {
send,
recv: receiver,
},
);
}
InternalCommand::ShutdownComplete(ShutdownComplete { state, tasks }) => {
let id = state.id.clone();
let Some(entity_handle) = self.live.remove(&id) else {
error!(
"Received shutdown complete command for unknown entity actor {id:?}"
);
return None;
};
let EntityHandle::ShuttingDown { send, recv } = entity_handle else {
error!(
"Received shutdown complete command for entity actor {id:?} that is not shutting down"
);
return None;
};
// re-assemble the actor from the parts
let mut actor = entity_actor::Actor {
main: self.internal_send.clone(),
recv,
state,
tasks,
};
if actor.recv.is_empty() {
// No commands during shutdown, we can recycle the actor.
self.recycle(send, actor);
} else {
actor.state.state.reset();
self.live.insert(id.clone(), EntityHandle::Live { send });
return Some(actor.run());
}
}
}
}
None
}
/// Send a shutdown command to all live entity actors.
pub async fn shutdown(self) {
for handle in self.live.values() {
handle.send().send(EntityShutdown {}.into()).await.ok();
}
}
/// Get or create an entity actor for the given id.
///
/// If this function returns a future, it must be spawned by the caller.
fn get_or_create(
&mut self,
id: P::EntityId,
) -> (
&mut EntityHandle<P>,
Option<impl Future<Output = ()> + Send + 'static>,
) {
let mut task = None;
let handle = self.live.entry(id.clone()).or_insert_with(|| {
if let Some((send, mut actor)) = self.pool.pop() {
// Get an actor from the pool of inactive actors and initialize it.
actor.state.id = id.clone();
actor.state.global = self.state.clone();
// strictly speaking this is not needed, since we reset the state when adding the actor to the pool.
actor.state.state.reset();
task = Some(actor.run());
EntityHandle::Live { send }
} else {
// Create a new entity actor and inbox.
let (send, recv) = mpsc::channel(self.entity_inbox_size);
let state: entity_actor::State<P> = entity_actor::State {
id: id.clone(),
global: self.state.clone(),
state: Default::default(),
};
let actor = entity_actor::Actor {
main: self.internal_send.clone(),
recv,
state,
tasks: FuturesUnordered::with_capacity(
self.entity_futures_initial_capacity,
),
};
task = Some(actor.run());
EntityHandle::Live { send }
}
});
(handle, task)
}
fn recycle(
&mut self,
sender: mpsc::Sender<entity_actor::Command<P>>,
mut actor: entity_actor::Actor<P>,
) {
assert!(sender.strong_count() == 1);
// todo: check that sender and receiver are the same channel. tokio does not have an api for this, unfortunately.
// reset the actor in any case, just to check the invariants.
actor.recycle();
// Recycle the actor for later use.
if self.pool.len() < self.pool.capacity() {
self.pool.push((sender, actor));
}
}
}
pub struct Actor<P: Params> {
/// Channel to receive commands from the outside world.
/// If this channel is closed, it means we need to shut down in a hurry.
recv: mpsc::Receiver<Command<P>>,
/// Tasks that are currently running.
tasks: JoinSet<()>,
/// Internal state of the actor
state: ActorState<P>,
}
impl<P: Params> Actor<P> {
pub fn new(
state: P::GlobalState,
recv: tokio::sync::mpsc::Receiver<Command<P>>,
pool_capacity: usize,
entity_inbox_size: usize,
entity_response_inbox_size: usize,
entity_futures_initial_capacity: usize,
) -> Self {
Self {
recv,
tasks: JoinSet::new(),
state: ActorState::new(
state,
pool_capacity,
entity_inbox_size,
entity_response_inbox_size,
entity_futures_initial_capacity,
),
}
}
pub async fn run(mut self) {
enum SelectOutcome<A, B, C> {
Command(A),
Tick(B),
TaskDone(C),
}
loop {
let res = tokio::select! {
x = self.recv.recv() => SelectOutcome::Command(x),
x = self.state.tick() => SelectOutcome::Tick(x),
Some(task) = self.tasks.join_next(), if !self.tasks.is_empty() => SelectOutcome::TaskDone(task),
};
match res {
SelectOutcome::Command(cmd) => {
let Some(cmd) = cmd else {
// Channel closed, this means that the main actor is shutting down.
self.hard_shutdown().await;
break;
};
match cmd {
Command::Spawn(spawn) => {
if let Some(task) = self.state.spawn_boxed(spawn.id, spawn.f).await
{
self.tasks.spawn(task);
}
}
Command::ShutdownAll(arg) => {
self.soft_shutdown().await;
arg.tx.send(()).ok();
break;
}
}
// Handle incoming command
}
SelectOutcome::Tick(future) => {
if let Some(task) = future {
self.tasks.spawn(task);
}
}
SelectOutcome::TaskDone(result) => {
// Handle completed task
if let Err(e) = result {
error!("Task failed: {e:?}");
}
}
}
}
}
async fn soft_shutdown(self) {
let Self {
mut tasks, state, ..
} = self;
state.shutdown().await;
while let Some(res) = tasks.join_next().await {
if let Err(e) = res {
eprintln!("Task failed during shutdown: {e:?}");
}
}
}
async fn hard_shutdown(self) {
let Self {
mut tasks, state, ..
} = self;
// this is needed so calls to internal_send in idle shutdown fail fast.
// otherwise we would have to drain the channel, but we don't care about the messages at
// this point.
drop(state);
while let Some(res) = tasks.join_next().await {
if let Err(e) = res {
eprintln!("Task failed during shutdown: {e:?}");
}
}
}
}
}
/// A manager for entities identified by an entity id.
///
/// The manager provides parallelism between entities, but just concurrency within a single entity.
/// This is useful if the entity wraps an external resource such as a file that does not benefit
/// from parallelism.
///
/// The entity manager internally uses a main actor and per-entity actors. Per entity actors
/// and their inbox queues are recycled when they become idle, to save allocations.
///
/// You can mostly ignore these implementation details, except when you want to customize the
/// queue sizes in the [`Options`] struct.
///
/// The main entry point is the [`EntityManager::spawn`] function.
///
/// Dropping the `EntityManager` will shut down the entity actors without waiting for their
/// tasks to complete. For a more gentle shutdown, use the [`EntityManager::shutdown`] function
/// that does wait for tasks to complete.
#[derive(Debug, Clone)]
pub struct EntityManager<P: Params>(mpsc::Sender<main_actor::Command<P>>);
#[derive(Debug, Clone, Copy)]
pub struct Options {
/// Maximum number of inactive entity actors that are being pooled for reuse.
pub pool_capacity: usize,
/// Size of the inbox for the manager actor.
pub inbox_size: usize,
/// Size of the inbox for entity actors.
pub entity_inbox_size: usize,
/// Size of the inbox for entity actor responses to the manager actor.
pub entity_response_inbox_size: usize,
/// Initial capacity of the futures set for entity actors.
///
/// Set this to the expected average concurrency level of your entities.
pub entity_futures_initial_capacity: usize,
}
impl Default for Options {
fn default() -> Self {
Self {
pool_capacity: 10,
inbox_size: 10,
entity_inbox_size: 10,
entity_response_inbox_size: 100,
entity_futures_initial_capacity: 16,
}
}
}
impl<P: Params> EntityManager<P> {
pub fn new(state: P::GlobalState, options: Options) -> Self {
let (send, recv) = mpsc::channel(options.inbox_size);
let actor = main_actor::Actor::new(
state,
recv,
options.pool_capacity,
options.entity_inbox_size,
options.entity_response_inbox_size,
options.entity_futures_initial_capacity,
);
n0_future::task::spawn(actor.run());
Self(send)
}
/// Spawn a new task on the entity actor with the given id.
///
/// Unless the world is ending - e.g. tokio runtime is shutting down - the passed function
/// is guaranteed to be called. However, there is no guarantee that the entity actor is
/// alive and responsive. See [`SpawnArg`] for details.
///
/// Multiple callbacks for the same entity will be executed sequentially. There is no
/// parallelism within a single entity. So you can use synchronization primitives that
/// assume unique access in P::EntityState. And even if you do use multithreaded synchronization
/// primitives, they will never be contended.
///
/// The future returned by `f` will be executed concurrently with other tasks, but again
/// there will be no real parallelism within a single entity actor.
pub async fn spawn<F, Fut>(&self, id: P::EntityId, f: F) -> Result<(), &'static str>
where
F: FnOnce(SpawnArg<P>) -> Fut + Send + 'static,
Fut: future::Future<Output = ()> + Send + 'static,
{
let spawn = Spawn {
id,
f: Box::new(|arg| {
Box::pin(async move {
f(arg).await;
})
}),
};
self.0
.send(main_actor::Command::Spawn(spawn))
.await
.map_err(|_| "Failed to send spawn command")
}
pub async fn shutdown(&self) -> std::result::Result<(), &'static str> {
let (tx, rx) = oneshot::channel();
self.0
.send(ShutdownAll { tx }.into())
.await
.map_err(|_| "Failed to send shutdown command")?;
rx.await
.map_err(|_| "Failed to receive shutdown confirmation")
}
}
#[cfg(test)]
mod tests {
//! Tests for the entity manager.
//!
//! We implement a simple database for u128 counters, identified by u64 ids,
//! with both an in-memory and a file-based implementation.
//!
//! The database does internal consistency checks, to ensure that each
//! entity is only ever accessed by a single tokio task at a time, and to
//! ensure that wakeup and shutdown events are interleaved.
//!
//! We also check that the database behaves correctly by comparing with an
//! in-memory implementation.
//!
//! Database operations are done in parallel, so the fact that we are using
//! AtomicRefCell provides another test - if there was parallel write access
//! to a single entity due to a bug, it would panic.
use std::collections::HashMap;
use n0_future::{BufferedStreamExt, StreamExt};
use testresult::TestResult;
use super::*;
// a simple database for u128 counters, identified by u64 ids.
trait CounterDb {
async fn add(&self, id: u64, value: u128) -> Result<(), &'static str>;
async fn get(&self, id: u64) -> Result<u128, &'static str>;
async fn shutdown(&self) -> Result<(), &'static str>;
async fn check_consistency(&self, values: HashMap<u64, u128>);
}
#[derive(Debug, PartialEq, Eq)]
enum Event {
Wakeup,
Shutdown,
}
mod mem {
//! The in-memory database uses a HashMap in the global state to store
//! the values of the counters. Loading means reading from the global
//! state into the entity state, and persisting means writing to the
//! global state from the entity state.
use std::{
collections::{HashMap, HashSet},
sync::{
atomic::{AtomicUsize, Ordering},
Arc, Mutex,
},
};
use atomic_refcell::AtomicRefCell;
use n0_future::time::Instant;
use super::*;
#[derive(Debug, Default)]
struct Inner {
value: Option<u128>,
tasks: HashSet<tokio::task::Id>,
}
#[derive(Debug, Clone, Default)]
struct State(Arc<AtomicRefCell<Inner>>);
impl Reset for State {
fn reset(&mut self) {
*self.0.borrow_mut() = Default::default();
}
}
#[derive(Debug, Default)]
struct Global {
// the "database" of entity values
data: HashMap<u64, u128>,
// log of awake and shutdown events
log: HashMap<u64, Vec<(Event, Instant)>>,
}
struct Counters;
impl Params for Counters {
type EntityId = u64;
type GlobalState = Arc<Mutex<Global>>;
type EntityState = State;
async fn on_shutdown(entity: entity_actor::State<Self>, _cause: ShutdownCause) {
let state = entity.state.0.borrow();
let mut global = entity.global.lock().unwrap();
assert_eq!(state.tasks.len(), 1);
// persist the state
if let Some(value) = state.value {
global.data.insert(entity.id, value);
}
// log the shutdown event
global
.log
.entry(entity.id)
.or_default()
.push((Event::Shutdown, Instant::now()));
}
}
pub struct MemDb {
m: EntityManager<Counters>,
global: Arc<Mutex<Global>>,
}
impl entity_actor::State<Counters> {
async fn with_value(&self, f: impl FnOnce(&mut u128)) -> Result<(), &'static str> {
let mut state = self.state.0.borrow_mut();
// lazily load the data from the database
if state.value.is_none() {
let mut global = self.global.lock().unwrap();
state.value = Some(global.data.get(&self.id).copied().unwrap_or_default());
// log the wakeup event
global
.log
.entry(self.id)
.or_default()
.push((Event::Wakeup, Instant::now()));
}
// insert the task id into the tasks set to check that access is always
// from the same tokio task (not necessarily the same thread).
state.tasks.insert(tokio::task::id());
// do the actual work
let r = state.value.as_mut().unwrap();
f(r);
Ok(())
}
}
impl MemDb {
pub fn new() -> Self {
let global = Arc::new(Mutex::new(Global::default()));
Self {
global: global.clone(),
m: EntityManager::<Counters>::new(global, Options::default()),
}
}
}
impl super::CounterDb for MemDb {
async fn add(&self, id: u64, value: u128) -> Result<(), &'static str> {
self.m
.spawn(id, move |arg| async move {
match arg {
SpawnArg::Active(state) => {
state
.with_value(|v| *v = v.wrapping_add(value))
.await
.unwrap();
}
SpawnArg::Busy => println!("Entity actor is busy"),
SpawnArg::Dead => println!("Entity actor is dead"),
}
})
.await
}
async fn get(&self, id: u64) -> Result<u128, &'static str> {
let (tx, rx) = oneshot::channel();
self.m
.spawn(id, move |arg| async move {
match arg {
SpawnArg::Active(state) => {
state
.with_value(|v| {
tx.send(*v)
.unwrap_or_else(|_| println!("Failed to send value"))
})
.await
.unwrap();
}
SpawnArg::Busy => println!("Entity actor is busy"),
SpawnArg::Dead => println!("Entity actor is dead"),
}
})
.await?;
rx.await.map_err(|_| "Failed to receive value")
}
async fn shutdown(&self) -> Result<(), &'static str> {
self.m.shutdown().await
}
async fn check_consistency(&self, values: HashMap<u64, u128>) {
let global = self.global.lock().unwrap();
assert_eq!(global.data, values, "Data mismatch");
for id in values.keys() {
let log = global.log.get(id).unwrap();
if !log.len().is_multiple_of(2) {
panic!(
"Log for entity {id} must contain an even number of events.\n{log:#?}"
);
}
for (i, (event, _)) in log.iter().enumerate() {
assert_eq!(
*event,
if i % 2 == 0 {
Event::Wakeup
} else {
Event::Shutdown
},
"Unexpected event type"
);
}
}
}
}
/// If a task is so busy that it can't drain it's inbox in time, we will
/// get a SpawnArg::Busy instead of access to the actual state.
///
/// This will only happen if the system is seriously overloaded, since
/// the entity actor just spawns tasks for each message. So here we
/// simulate it by just not spawning the task as we are supposed to.
#[tokio::test]
async fn test_busy() -> TestResult<()> {
let mut state = EntityManagerState::<Counters>::new(
Arc::new(Mutex::new(Global::default())),
1024,
8,
8,
2,
);
let active = Arc::new(AtomicUsize::new(0));
let busy = Arc::new(AtomicUsize::new(0));
let inc = || {
let active = active.clone();
let busy = busy.clone();
|arg: SpawnArg<Counters>| async move {
match arg {
SpawnArg::Active(_) => {
active.fetch_add(1, Ordering::SeqCst);
}
SpawnArg::Busy => {
busy.fetch_add(1, Ordering::SeqCst);
}
SpawnArg::Dead => {
println!("Entity actor is dead");
}
}
}
};
let fut1 = state.spawn(1, inc()).await;
assert!(fut1.is_some(), "First spawn should give us a task to spawn");
for _ in 0..9 {
let fut = state.spawn(1, inc()).await;
assert!(
fut.is_none(),
"Subsequent spawns should assume first task has been spawned"
);
}
assert_eq!(
active.load(Ordering::SeqCst),
0,
"Active should have never been called, since we did not spawn the task!"
);
assert_eq!(busy.load(Ordering::SeqCst), 2, "Busy should have been called two times, since we sent 10 msgs to a queue with capacity 8, and nobody is draining it");
Ok(())
}
/// If there is a panic in any of the fns that run on an entity actor,
/// the entire entity becomes dead. This can not be recovered from, and
/// trying to spawn a new task on the dead entity actor will result in
/// a SpawnArg::Dead.
#[tokio::test]
async fn test_dead() -> TestResult<()> {
let manager = EntityManager::<Counters>::new(
Arc::new(Mutex::new(Global::default())),
Options::default(),
);
let (tx, rx) = oneshot::channel();
let killer = |arg: SpawnArg<Counters>| async move {
if let SpawnArg::Active(_) = arg {
tx.send(()).ok();
panic!("Panic to kill the task");
}
};
// spawn a task that kills the entity actor
manager.spawn(1, killer).await?;
rx.await.expect("Failed to receive kill confirmation");
let (tx, rx) = oneshot::channel();
let counter = |arg: SpawnArg<Counters>| async move {
if let SpawnArg::Dead = arg {
tx.send(()).ok();
}
};
// // spawn another task on the - now dead - entity actor
manager.spawn(1, counter).await?;
rx.await.expect("Failed to receive dead confirmation");
Ok(())
}
}
mod fs {
//! The fs db uses one file per counter, stored as a 16-byte big-endian u128.
use std::{
collections::HashSet,
path::{Path, PathBuf},
sync::{Arc, Mutex},
};
use atomic_refcell::AtomicRefCell;
use n0_future::time::Instant;
use super::*;
#[derive(Debug, Clone, Default)]
struct State {
value: Option<u128>,
tasks: HashSet<tokio::task::Id>,
}
#[derive(Debug)]
struct Global {
path: PathBuf,
log: HashMap<u64, Vec<(Event, Instant)>>,
}
#[derive(Debug, Clone, Default)]
struct EntityState(Arc<AtomicRefCell<State>>);
impl Reset for EntityState {
fn reset(&mut self) {
*self.0.borrow_mut() = Default::default();
}
}
fn get_path(root: impl AsRef<Path>, id: u64) -> PathBuf {
root.as_ref().join(hex::encode(id.to_be_bytes()))
}
impl entity_actor::State<Counters> {
async fn with_value(&self, f: impl FnOnce(&mut u128)) -> Result<(), &'static str> {
let Ok(mut r) = self.state.0.try_borrow_mut() else {
panic!("failed to borrow state mutably");
};
if r.value.is_none() {
let mut global = self.global.lock().unwrap();
global
.log
.entry(self.id)
.or_default()
.push((Event::Wakeup, Instant::now()));
let path = get_path(&global.path, self.id);
// note: if we were to use async IO, we would need to make sure not to hold the
// lock guard over an await point. The entity manager makes sure that all fns
// are run on the same tokio task, but there is still concurrency, which
// a mutable borrow of the state does not allow.
let value = match std::fs::read(path) {
Ok(value) => value,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
// If the file does not exist, we initialize it to 0.
vec![0; 16]
}
Err(_) => return Err("Failed to read disk state"),
};
let value = u128::from_be_bytes(
value.try_into().map_err(|_| "Invalid disk state format")?,
);
r.value = Some(value);
}
let Some(value) = r.value.as_mut() else {
panic!("State must be Memory at this point");
};
f(value);
Ok(())
}
}
struct Counters;
impl Params for Counters {
type EntityId = u64;
type GlobalState = Arc<Mutex<Global>>;
type EntityState = EntityState;
async fn on_shutdown(state: entity_actor::State<Self>, _cause: ShutdownCause) {
let r = state.state.0.borrow();
let mut global = state.global.lock().unwrap();
if let Some(value) = r.value {
let path = get_path(&global.path, state.id);
let value_bytes = value.to_be_bytes();
std::fs::write(&path, value_bytes).expect("Failed to write disk state");
}
global
.log
.entry(state.id)
.or_default()
.push((Event::Shutdown, Instant::now()));
}
}
pub struct FsDb {
global: Arc<Mutex<Global>>,
m: EntityManager<Counters>,
}
impl FsDb {
pub fn new(path: impl AsRef<Path>) -> Self {
let global = Global {
path: path.as_ref().to_owned(),
log: HashMap::new(),
};
let global = Arc::new(Mutex::new(global));
Self {
global: global.clone(),
m: EntityManager::<Counters>::new(global, Options::default()),
}
}
}
impl super::CounterDb for FsDb {
async fn add(&self, id: u64, value: u128) -> Result<(), &'static str> {
self.m
.spawn(id, move |arg| async move {
match arg {
SpawnArg::Active(state) => {
state
.with_value(|v| *v = v.wrapping_add(value))
.await
.unwrap();
}
SpawnArg::Busy => println!("Entity actor is busy"),
SpawnArg::Dead => println!("Entity actor is dead"),
}
})
.await
}
async fn get(&self, id: u64) -> Result<u128, &'static str> {
let (tx, rx) = oneshot::channel();
self.m
.spawn(id, move |arg| async move {
match arg {
SpawnArg::Active(state) => {
state
.with_value(|v| {
tx.send(*v)
.unwrap_or_else(|_| println!("Failed to send value"))
})
.await
.unwrap();
}
SpawnArg::Busy => println!("Entity actor is busy"),
SpawnArg::Dead => println!("Entity actor is dead"),
}
})
.await?;
rx.await.map_err(|_| "Failed to receive value in get")
}
async fn shutdown(&self) -> Result<(), &'static str> {
self.m.shutdown().await
}
async fn check_consistency(&self, values: HashMap<u64, u128>) {
let global = self.global.lock().unwrap();
for (id, value) in &values {
let path = get_path(&global.path, *id);
let disk_value = match std::fs::read(path) {
Ok(data) => u128::from_be_bytes(data.try_into().unwrap()),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => 0,
Err(_) => panic!("Failed to read disk state for id {id}"),
};
assert_eq!(disk_value, *value, "Disk value mismatch for id {id}");
}
for id in values.keys() {
let log = global.log.get(id).unwrap();
assert!(
log.len().is_multiple_of(2),
"Log must contain alternating wakeup and shutdown events"
);
for (i, (event, _)) in log.iter().enumerate() {
assert_eq!(
*event,
if i % 2 == 0 {
Event::Wakeup
} else {
Event::Shutdown
},
"Unexpected event type"
);
}
}
}
}
}
async fn test_random(
db: impl CounterDb,
entries: &[(u64, u128)],
) -> testresult::TestResult<()> {
// compute the expected values
let mut reference = HashMap::new();
for (id, value) in entries {
let v: &mut u128 = reference.entry(*id).or_default();
*v = v.wrapping_add(*value);
}
// do the same computation using the database, and some concurrency
// and parallelism (we will get parallelism if we are using a multi-threaded runtime).
let mut errors = Vec::new();
n0_future::stream::iter(entries)
.map(|(id, value)| db.add(*id, *value))
.buffered_unordered(16)
.for_each(|result| {
if let Err(e) = result {
errors.push(e);
}
})
.await;
assert!(errors.is_empty(), "Failed to add some entries: {errors:?}");
// check that the db contains the expected values
let ids = reference.keys().copied().collect::<Vec<_>>();
for id in &ids {
let res = db.get(*id).await?;
assert_eq!(res, reference.get(id).copied().unwrap_or_default());
}
db.shutdown().await?;
// check that the db is consistent with the reference
db.check_consistency(reference).await;
Ok(())
}
#[test_strategy::proptest]
fn test_counters_manager_proptest_mem(entries: Vec<(u64, u128)>) {
let rt = tokio::runtime::Builder::new_multi_thread()
.build()
.expect("Failed to create tokio runtime");
rt.block_on(async move {
let db = mem::MemDb::new();
test_random(db, &entries).await
})
.expect("Test failed");
}
#[test_strategy::proptest]
fn test_counters_manager_proptest_fs(entries: Vec<(u64, u128)>) {
let dir = tempfile::tempdir().unwrap();
let rt = tokio::runtime::Builder::new_multi_thread()
.build()
.expect("Failed to create tokio runtime");
rt.block_on(async move {
let db = fs::FsDb::new(dir.path());
test_random(db, &entries).await
})
.expect("Test failed");
}
}