geiserx_ts_runtime 0.11.0

tailscale runtime
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
//! DERP relay fan-out: one underlay-transport task per DERP region from the control derp map.
//!
//! [`Multiderp`] spawns a connection task per region, keeps the home region always-on, and lets the
//! others idle out after a grace period. It also demuxes disco frames (e.g. `CallMeMaybe`) relayed
//! over DERP into the magicsock so a relay-only peer can still open a direct path.
//!
//! Anti-leak: STUN server collection ([`stun_servers_from_regions`]) emits only `FixedAddr` v4
//! servers — never `UseDns` — so probing never triggers a second DNS-resolution egress path.

use core::net::{SocketAddr, SocketAddrV4};
use std::{
    collections::HashMap,
    sync::{Arc, RwLock},
    time::{Duration, Instant},
};

use kameo::{
    actor::ActorRef,
    error::SendError,
    message::{Context, Message},
};
use tokio::{
    sync::{mpsc, watch},
    task::JoinSet,
};
use ts_control::DerpRegion;
use ts_derp::RegionId;
use ts_keys::{NodeKeyPair, NodePublicKey};
use ts_magicsock::MagicSock;
use ts_transport::{
    BatchRecvIter, PeerId, UnderlayTransport, UnderlayTransportExt, UnderlayTransportId,
};

use crate::{
    Env, Error,
    dataplane::{DataplaneActor, NewUnderlayTransport, UnderlayFromDataplane, UnderlayToDataplane},
    derp_latency::DerpLatencyMeasurement,
    peer_tracker::{PeerDb, PeerState},
};

/// Consumes derp map updates and spawns a task per region that runs an underlay transport.
/// Also consumes home derp indications (for this node) to notify the relevant task that it
/// should keep the transport awake even if there is no traffic.
///
/// Other than the home task (which is always kept alive to receive packets), the transport
/// tasks keep the connection alive as long as there is traffic sent or received, and for a
/// short grace period afterward. Connections are otherwise closed not in use.
pub struct Multiderp {
    env: Env,
    dataplane: ActorRef<DataplaneActor>,
    derps: HashMap<RegionId, RegionEntry>,
    /// Cached region info from the last derp map, so a `send_disco` to a not-yet-connected
    /// region can re-enter [`Multiderp::ensure_region`] with the region's servers.
    regions: HashMap<RegionId, DerpRegion>,
    current_home_derp: Option<RegionId>,
    peer_db: Arc<RwLock<Option<Arc<PeerDb>>>>,
    /// Observed DERP routes: the region we last *received* a frame from each peer on. Mirrors Go
    /// magicsock's `c.derpRoute` (`wgengine/magicsock/derp.go`), whose doc says it lets a node
    /// "learn the DERP home upon getting the first connection … help nodes from a slow or
    /// misbehaving control plane." When the netmap carries no home region for a peer (common on a
    /// self-hosted control plane that doesn't echo `preferred_derp`), [`RouteUpdater`] consults this
    /// so a peer that has reached us over some region becomes reachable on that same region — the
    /// rendezvous holds because the peer is demonstrably listening there. Region runners write it
    /// live (each knows its own region + the inbound peer id); pruned to the live netmap on each
    /// peer-state update so a departed peer's route can't pin a stale region forever.
    observed_routes: Arc<RwLock<HashMap<PeerId, RegionId>>>,
    /// The direct underlay socket, installed by [`crate::direct::DirectManager`] once it binds.
    ///
    /// A live handle (shared `RwLock`) so a disco frame (e.g. a `CallMeMaybe`) relayed to us over
    /// DERP can be demuxed and routed into the magicsock — letting it learn a peer's candidate
    /// endpoints and open a direct path even when the peer can only reach us over the relay. `None`
    /// until the direct manager binds (or permanently if its bind failed, in which case relayed
    /// disco frames are simply forwarded to the dataplane as before — they decode as junk there
    /// and are dropped). Region tasks read it live, so regions spawned before the sock is set pick
    /// it up once available.
    direct_sock: Arc<RwLock<Option<Arc<MagicSock>>>>,
    tasks: JoinSet<()>,
}

struct RegionEntry {
    transport_id: UnderlayTransportId,
    home_derp: watch::Sender<bool>,
    /// Sender for raw sealed disco frames (e.g. CallMeMaybe) to relay through this region's
    /// DERP client, keyed by the destination peer's node public key. Bounded; a dropped frame
    /// is retried on the next CallMeMaybe cadence.
    disco_tx: mpsc::Sender<(NodePublicKey, Vec<u8>)>,
}

impl Multiderp {
    #[tracing::instrument(skip_all, fields(region_id = %id))]
    async fn ensure_region(
        &mut self,
        id: RegionId,
        region: &DerpRegion,
        mut shutdown: watch::Receiver<bool>,
    ) {
        // TODO(npry): update if region info changes

        if self.derps.contains_key(&id) {
            tracing::trace!("region already existed");
            return;
        }

        let region = region.clone();
        let keys = self.env.keys.node_keys;

        let (transport_id, mut up, down) = match self.dataplane.ask(NewUnderlayTransport).await {
            Ok(val) => val,
            Err(SendError::ActorNotRunning(..) | SendError::ActorStopped) => {
                if !*shutdown.borrow() {
                    panic!("dataplane has stopped but we're not shutting down");
                }

                return;
            }
            // A transient mailbox-full / timeout (or handler) error must degrade a single region
            // rather than abort DERP setup for the whole node. Skip this region; it is re-attempted
            // on the next derp-map update or send_disco.
            Err(e) => {
                tracing::error!(error = %e, "multiderp: failed to set up DERP region; skipping");
                return;
            }
        };
        let (home_derp_tx, mut home_derp_rx) = watch::channel(false);
        let (disco_tx, mut disco_rx) = mpsc::channel::<(NodePublicKey, Vec<u8>)>(8);

        let peer_db = self.peer_db.clone();
        let direct_sock = self.direct_sock.clone();
        let observed_routes = self.observed_routes.clone();

        self.tasks.spawn(async move {
            while !*shutdown.borrow() {
                tokio::select! {
                    _ = shutdown.changed() => {
                        break;
                    },
                    ret = run_derp_once(
                        id,
                        &region,
                        keys,
                        &down,
                        &mut up,
                        &mut home_derp_rx,
                        &mut disco_rx,
                        &peer_db,
                        &direct_sock,
                        &observed_routes,
                    ) => if let Err(e) = ret {
                        tracing::error!(error = %e, region_id = %id, "running derp client");
                        tokio::time::sleep(Duration::from_millis(500)).await;
                    },
                }

                if up.is_closed() {
                    tracing::warn!(region_id = %id, "underlay up channel closed!");
                    break;
                }

                if down.is_closed() {
                    tracing::warn!(region_id = %id, "underlay down channel closed!");
                    break;
                }
            }
        });

        self.derps.insert(
            id,
            RegionEntry {
                transport_id,
                home_derp: home_derp_tx,
                disco_tx,
            },
        );
    }
}

#[kameo::messages]
impl Multiderp {
    #[message]
    pub fn transport_id_for_region(&self, id: RegionId) -> Option<UnderlayTransportId> {
        Some(self.derps.get(&id)?.transport_id)
    }

    /// The relay region to reach `peer` on when the netmap carried no home region for it.
    ///
    /// Resolution order (mirrors Go's netmap-home → `derpRoute` learned route, plus a bounded
    /// last resort): an **observed** route (a region we have actually received a frame from this
    /// peer on — the peer is demonstrably listening there), else our **own current home region**
    /// as a last resort. The home-region fallback is a deliberate, interop-safe divergence from
    /// strict Go (which returns no route): it rendezvouses a *co-regional* peer — the dominant
    /// geo-close / same-tailnet deployment — even when a self-hosted control plane never echoes the
    /// peer's `preferred_derp`. If the peer is not connected to that region the DERP server simply
    /// drops the relayed frame (no host dial, no leak); it is strictly better than dropping the peer
    /// outright, and self-heals to the peer's real region the moment one is observed or the netmap
    /// supplies it. `None` only if we have learned no route *and* have no home region yet.
    #[message]
    pub fn region_for_peer(&self, peer: PeerId) -> Option<RegionId> {
        let observed = poisoned_read(&self.observed_routes).get(&peer).copied();
        resolve_region_for_peer(observed, self.current_home_derp, |r| {
            self.derps.contains_key(&r)
        })
    }

    /// Like [`Multiderp::region_for_peer`] but keyed by node public key, for callers (the
    /// CallMeMaybe relay loop) that hold a `NodePublicKey` rather than a `PeerId`. Resolves the key
    /// to a `PeerId` via the peer db, then applies the same observed-route → home-region inference.
    /// `None` if the key isn't a current netmap member, or if no live region can be inferred.
    #[message]
    pub fn region_for_node(&self, node: NodePublicKey) -> Option<RegionId> {
        let peer = {
            let db = poisoned_read(&self.peer_db);
            let (id, _) = db.as_ref()?.get(&node)?;
            id
        };
        let observed = poisoned_read(&self.observed_routes).get(&peer).copied();
        resolve_region_for_peer(observed, self.current_home_derp, |r| {
            self.derps.contains_key(&r)
        })
    }

    /// v4 STUN server addresses from the current derp map, for leak-safe single-socket STUN.
    /// Only FixedAddr v4 STUN nodes are returned; UseDns nodes are skipped (resolving them
    /// would be a second egress / DNS-leak path). May be empty (then we fall back to pong-harvest).
    #[message]
    pub fn stun_servers_v4(&self) -> (Vec<SocketAddr>,) {
        (stun_servers_from_regions(self.regions.values()),)
    }

    /// Install the direct underlay socket so disco frames (e.g. a `CallMeMaybe`) relayed to us
    /// over DERP can be demuxed into the magicsock (see [`Multiderp::direct_sock`]).
    ///
    /// Sent once by [`crate::direct::DirectManager`] after it binds. Region tasks read the handle
    /// live, so this takes effect on regions already running as well as ones spawned later.
    #[message]
    pub fn set_direct_sock(&mut self, sock: Arc<MagicSock>) {
        *poisoned_write(&self.direct_sock) = Some(sock);
    }

    /// Relay a raw sealed disco frame (e.g. a CallMeMaybe) to `peer` through DERP region `region`.
    ///
    /// Wakes the region's connection if it is not currently established (the queued frame counts
    /// as activity). If the region is unknown (not in the last derp map) the frame is dropped with
    /// a warning. A full per-region queue also drops the frame; it is retried on the next cadence.
    #[message]
    pub async fn send_disco(&mut self, peer: NodePublicKey, region: RegionId, frame: Vec<u8>) {
        let Some(region_info) = self.regions.get(&region).cloned() else {
            tracing::warn!(region_id = %region, "no derp region info, dropping disco frame");
            return;
        };

        self.ensure_region(region, &region_info, self.env.shutdown.clone())
            .await;

        let Some(entry) = self.derps.get(&region) else {
            tracing::warn!(region_id = %region, "region not established, dropping disco frame");
            return;
        };

        if let Err(e) = entry.disco_tx.try_send((peer, frame)) {
            tracing::trace!(error = %e, region_id = %region, "disco relay queue full or closed, dropping frame");
        }
    }
}

/// Collect the v4 STUN server addresses from a set of derp regions, for leak-safe single-socket
/// STUN.
///
/// **Anti-leak gate — do not loosen.** Only [`ts_derp::IpUsage::FixedAddr`] v4 servers with a
/// `stun_port` are emitted. `UseDns` (and `Disable`) servers are deliberately skipped: resolving a
/// STUN server hostname would be a DNS query and a second egress path, defeating the whole point of
/// probing from the one bound underlay socket. A future reader must not "fix" this to include
/// `UseDns` servers — that would reintroduce the DNS-leak path. Extracted as a free function so the
/// filtering can be unit-tested directly against `ServerConnInfo` fixtures.
fn stun_servers_from_regions<'a>(
    regions: impl IntoIterator<Item = &'a DerpRegion>,
) -> Vec<SocketAddr> {
    let mut servers = Vec::new();
    for region in regions {
        for srv in &region.servers {
            let Some(stun_port) = srv.stun_port else {
                continue;
            };
            if let ts_derp::IpUsage::FixedAddr(v4) = srv.ipv4 {
                servers.push(SocketAddr::V4(SocketAddrV4::new(v4, stun_port)));
            }
        }
    }
    servers
}

/// Read a [`RwLock`], recovering from poisoning rather than propagating the panic.
///
/// These locks guard a wholesale-replaced snapshot (the peer db, or the direct socket handle) with
/// no cross-field invariant a mid-write panic could leave half-applied. Recovering (rather than
/// `.unwrap()`) keeps a single panicking task from poisoning the lock and cascade-killing every
/// region runner that reads it — that would collapse all DERP relaying instead of failing closed.
fn poisoned_read<T>(lock: &RwLock<T>) -> std::sync::RwLockReadGuard<'_, T> {
    lock.read().unwrap_or_else(|poisoned| poisoned.into_inner())
}

/// Write-lock counterpart of [`poisoned_read`]; same poison-recovery rationale.
fn poisoned_write<T>(lock: &RwLock<T>) -> std::sync::RwLockWriteGuard<'_, T> {
    lock.write()
        .unwrap_or_else(|poisoned| poisoned.into_inner())
}

/// The relay region to reach a peer whose netmap entry carried no home region: prefer an `observed`
/// route (a region we've received a frame from this peer on — it's listening there), else our own
/// `home` region as a last resort. Only a region with a live transport task (`region_is_live`) is
/// returned — a region whose `ensure_region` hasn't run yet has no `transport_id` for the route
/// updater to map, so offering it would just produce a no-route drop a step later; skipping it lets
/// the caller fall through cleanly (and the periodic recompute retries once the task is up).
/// Extracted as a pure function so the resolution order is unit-testable without the actor/lock
/// machinery. See [`Multiderp::region_for_peer`] for the full rationale (Go `derpRoute` parity + the
/// bounded home-region divergence).
fn resolve_region_for_peer(
    observed: Option<RegionId>,
    home: Option<RegionId>,
    region_is_live: impl Fn(RegionId) -> bool,
) -> Option<RegionId> {
    observed.or(home).filter(|r| region_is_live(*r))
}

/// Record that a frame from `peer` arrived on region `region` (Go `derpRoute` learning), updating
/// `routes` only when the mapping actually changes — so a steady stream of frames from a peer on its
/// established region doesn't churn the write lock. Returns `true` iff the map was modified (the
/// caller logs on a genuine change). Extracted from `run_derp_once` so the learn→resolve round-trip
/// is unit-testable without the async DERP runner: a recorded route is exactly what
/// [`resolve_region_for_peer`]'s `observed` arm later consumes.
fn record_observed_route(
    routes: &RwLock<HashMap<PeerId, RegionId>>,
    peer: PeerId,
    region: RegionId,
) -> bool {
    if poisoned_read(routes).get(&peer) == Some(&region) {
        return false;
    }
    poisoned_write(routes).insert(peer, region);
    true
}

struct PeerDbLookup<'a>(&'a RwLock<Option<Arc<PeerDb>>>);

impl ts_transport::PeerLookup<PeerId, NodePublicKey> for PeerDbLookup<'_> {
    fn lookup_key(&self, id: PeerId) -> Option<NodePublicKey> {
        let db = poisoned_read(self.0);
        let db = db.as_ref()?;

        let (_, node) = db.get(&id)?;
        Some(node.node_key)
    }
}

impl ts_transport::PeerLookup<NodePublicKey, PeerId> for PeerDbLookup<'_> {
    fn lookup_key(&self, key: NodePublicKey) -> Option<PeerId> {
        let db = poisoned_read(self.0);
        let db = db.as_ref()?;

        let (id, _) = db.get(&key)?;

        Some(id)
    }
}

#[tracing::instrument(skip_all, fields(region_id = %id), name = "derp runner")]
async fn run_derp_once(
    id: RegionId,
    region: &DerpRegion,
    keys: NodeKeyPair,
    to_dataplane: &UnderlayToDataplane,
    from_dataplane: &mut UnderlayFromDataplane,
    home_derp_rx: &mut watch::Receiver<bool>,
    disco_rx: &mut mpsc::Receiver<(NodePublicKey, Vec<u8>)>,
    peer_db: &RwLock<Option<Arc<PeerDb>>>,
    direct_sock: &RwLock<Option<Arc<MagicSock>>>,
    observed_routes: &RwLock<HashMap<PeerId, RegionId>>,
) -> Result<(), ts_derp::Error> {
    const INACTIVITY_TIMEOUT: Duration = Duration::from_secs(10);

    loop {
        let mut pending = None;
        let mut pending_disco = None;

        tracing::trace!("waiting for packet activity or for this to become home derp");

        while !*home_derp_rx.borrow_and_update() {
            tokio::select! {
                _ = home_derp_rx.changed() => {
                    tracing::trace!(is_home_derp = *home_derp_rx.borrow());
                },

                from_net = from_dataplane.recv() => {
                    tracing::trace!("received packet to send");
                    pending = from_net;
                    break;
                }

                disco = disco_rx.recv() => {
                    tracing::trace!("received disco frame to relay, waking connection");
                    pending_disco = disco;
                    break;
                }
            }
        }

        tracing::trace!("establishing derp connection");

        // Hold the client in an `Arc` so we can both wrap a clone with the PeerId<->NodeKey
        // lookup (for dataplane traffic) and keep a raw handle for `send_one` (disco frames
        // addressed directly by node public key, bypassing the PeerId mapping).
        let client = Arc::new(ts_derp::DefaultClient::connect(&region.servers, &keys).await?);
        let transport = client.clone().with_key_lookup(PeerDbLookup(peer_db));

        if let Some(pending) = pending {
            tracing::trace!("sending queued packet");
            transport.send([pending]).await?;
        }

        if let Some((node_key, frame)) = pending_disco {
            tracing::trace!("relaying queued disco frame");
            client.send_one(node_key, &frame).await?;
        }

        let mut last_activity = Instant::now();

        loop {
            let span = tracing::trace_span!("derp_loop");

            let inactivity_timeout =
                (!*home_derp_rx.borrow()).then(|| last_activity + INACTIVITY_TIMEOUT);

            tokio::select! {
                from_derp = transport.recv() => {
                    last_activity = Instant::now();

                    // Inbound disco-over-DERP demux (npts-C2). A peer that can only reach us over
                    // the relay (e.g. symmetric NAT on both sides) sends its CallMeMaybe over DERP;
                    // it arrives here interleaved with WireGuard data. Route disco frames into the
                    // magicsock so it can learn the peer's candidate endpoints and open a direct
                    // path; everything else goes to the dataplane unchanged.
                    //
                    // Anti-leak: only CallMeMaybe is acted on (see
                    // `MagicSock::handle_relayed_call_me_maybe`). A relayed frame has no real UDP
                    // source, so we must never feed a relayed Ping/Pong into a path that would pong
                    // to a bogus address — that entry point drops them. If the direct socket isn't
                    // bound yet (or its bind failed), disco frames fall through to the dataplane as
                    // before, where they decode as junk and are dropped. That startup window
                    // self-heals: the peer re-sends CallMeMaybe on its own advertise cadence, so a
                    // dropped frame here is recovered on the next round, not a lost hole-punch.
                    // Snapshot the direct-sock handle once for the whole batch (it changes at most
                    // once, when the direct manager installs it). See `demux_relayed_disco` for the
                    // CallMeMaybe-only filtering this feeds.
                    let sock = poisoned_read(direct_sock).clone();
                    for ret in from_derp.batch_iter() {
                        let (peer_id, pkts) = ret?;

                        // Observed-route learning (Go `derpRoute` parity): a frame reached us from
                        // this peer on region `id`, so the peer is listening there. Record it (any
                        // frame counts — disco or WG data) so [`Multiderp::region_for_peer`] can
                        // relay back to a peer whose netmap home region we never learned. The helper
                        // writes only on an actual change, to avoid churning the lock on every batch.
                        if record_observed_route(observed_routes, peer_id, id) {
                            tracing::trace!(parent: &span, %peer_id, region_id = %id, "learned observed derp route for peer");
                        }

                        let data = demux_relayed_disco(pkts, sock.as_deref());
                        if data.is_empty() {
                            continue;
                        }

                        tracing::trace!(parent: &span, %peer_id, len = data.len(), "packet from derp server");

                        let Ok(()) = to_dataplane.send((peer_id, data)) else {
                            tracing::error!(parent: &span, "underlay receive channel closed");
                            break;
                        };
                    }
                },

                disco = disco_rx.recv() => {
                    last_activity = Instant::now();

                    let Some((node_key, frame)) = disco else {
                        tracing::warn!(parent: &span, "disco relay queue closed");
                        break;
                    };

                    tracing::trace!(parent: &span, "relaying disco frame over derp");
                    client.send_one(node_key, &frame).await?;
                },

                from_net = from_dataplane.recv() => {
                    last_activity = Instant::now();

                    let Some(from_net) = from_net else {
                        tracing::warn!(parent: &span, "transport queue closed");
                        break;
                    };

                    tracing::trace!(parent: &span, peer = %from_net.0, packets = from_net.1.len(), "packets to derp server");

                    transport.send([from_net]).await?;
                },

                _ = option_timeout(inactivity_timeout) => {
                    if !*home_derp_rx.borrow_and_update() {
                        tracing::trace!(parent: &span, "timed out and not home derp, closing derp conn");
                        break;
                    }
                },

                _ = home_derp_rx.changed() => {
                    tracing::trace!(is_home_derp = *home_derp_rx.borrow());
                },
            }
        }
    }
}

/// Demux a batch of frames received from a DERP server, routing relayed disco frames into the
/// direct socket and returning the remaining (WireGuard data) frames to forward to the dataplane.
///
/// A peer reachable only over the relay (e.g. symmetric NAT on both ends) sends its `CallMeMaybe`
/// over DERP; it is interleaved with WireGuard data on this path. Each frame that
/// [`ts_magicsock::looks_like_disco`] and is consumed by
/// [`MagicSock::handle_relayed_call_me_maybe`] is dropped from the data stream (the magicsock
/// learns the peer's candidate endpoints from it). Everything else — and *all* frames when no
/// direct socket is installed — is returned unchanged for the dataplane.
///
/// Anti-leak: a relayed frame has no real UDP source, so only `CallMeMaybe` is acted on; relayed
/// Pings/Pongs are dropped by `handle_relayed_call_me_maybe` rather than producing a pong to a
/// bogus address.
fn demux_relayed_disco(
    pkts: impl IntoIterator<Item = ts_packet::PacketMut>,
    sock: Option<&MagicSock>,
) -> Vec<ts_packet::PacketMut> {
    let mut data = Vec::new();
    for mut pkt in pkts {
        if ts_magicsock::looks_like_disco(pkt.as_ref())
            && let Some(sock) = sock
            && sock.handle_relayed_call_me_maybe(pkt.as_mut())
        {
            // Consumed as a relayed disco frame; keep it off the dataplane.
            continue;
        }
        data.push(pkt);
    }
    data
}

async fn option_timeout(duration: Option<Instant>) {
    match duration {
        Some(dur) => tokio::time::sleep_until(dur.into()).await,
        None => core::future::pending().await,
    }
}

impl kameo::Actor for Multiderp {
    type Args = (Env, ActorRef<DataplaneActor>);
    type Error = Error;

    async fn on_start(
        (env, dataplane): Self::Args,
        slf: ActorRef<Self>,
    ) -> Result<Self, Self::Error> {
        env.subscribe::<Arc<ts_control::StateUpdate>>(&slf).await?;
        env.subscribe::<Arc<PeerState>>(&slf).await?;
        env.subscribe::<DerpLatencyMeasurement>(&slf).await?;

        Ok(Self {
            env,
            dataplane,
            peer_db: Default::default(),
            direct_sock: Default::default(),
            observed_routes: Default::default(),
            derps: Default::default(),
            regions: Default::default(),
            tasks: JoinSet::new(),
            current_home_derp: None,
        })
    }
}

impl Message<Arc<ts_control::StateUpdate>> for Multiderp {
    type Reply = ();

    #[tracing::instrument(skip_all, name = "multiderp map update")]
    async fn handle(
        &mut self,
        msg: Arc<ts_control::StateUpdate>,
        _ctx: &mut Context<Self, Self::Reply>,
    ) {
        let Some(derp_map) = &msg.derp else {
            return;
        };

        for (id, region) in derp_map {
            self.regions.insert(*id, region.clone());
            self.ensure_region(*id, region, self.env.shutdown.clone())
                .await;

            // If this is the home region and it was just started, it needs to be notified that it's
            // the home region.
            if let Some(home_derp) = self.current_home_derp
                && *id == home_derp
            {
                self.derps
                    .get_mut(&home_derp)
                    .unwrap()
                    .home_derp
                    .send_replace(true);
            }
        }
    }
}

impl Message<Arc<PeerState>> for Multiderp {
    type Reply = ();

    async fn handle(&mut self, msg: Arc<PeerState>, _ctx: &mut Context<Self, Self::Reply>) {
        // Prune observed routes for peers that left the netmap, so a departed/reassigned peer can't
        // pin a stale region forever (bounds the map to the live peer set). Done before swapping the
        // db so the retain reads the incoming snapshot.
        poisoned_write(&self.observed_routes)
            .retain(|peer_id, _| msg.peers.peers().contains_key(peer_id));

        let mut db = poisoned_write(&self.peer_db);
        *db = Some(msg.peers.clone());
    }
}

impl Message<DerpLatencyMeasurement> for Multiderp {
    type Reply = ();

    async fn handle(&mut self, msg: DerpLatencyMeasurement, _ctx: &mut Context<Self, Self::Reply>) {
        let Some(result) = msg.measurement.as_ref().first() else {
            tracing::trace!("received home derp measurement message but none was set");
            return;
        };

        if let Some(home_derp) = self.current_home_derp {
            self.derps
                .get_mut(&home_derp)
                .unwrap()
                .home_derp
                .send_replace(false);
        }

        if self.current_home_derp.is_none_or(|id| id != result.id) {
            self.current_home_derp = Some(result.id);
            if let Some(derp) = self.derps.get_mut(&result.id) {
                derp.home_derp.send_replace(true);
            }

            tracing::info!(
                region_id = %result.id,
                latency_ms = result.latency.as_secs_f32() * 1000.,
                "new home derp region selected"
            );
        }
    }
}

#[cfg(test)]
mod tests {
    use ts_keys::DiscoPrivateKey;
    use ts_packet::PacketMut;

    use super::*;

    fn localhost() -> std::net::SocketAddr {
        "127.0.0.1:0".parse().unwrap()
    }

    /// A binding verifier accepting every disco frame. The demux tests are not exercising the
    /// netmap-membership check (covered in `direct::tests` and `ts_magicsock`), so they install
    /// this to keep the now-fail-closed relayed-CallMeMaybe handler learning endpoints.
    fn allow_all() -> ts_magicsock::BindingVerifier {
        Arc::new(|_: &ts_keys::DiscoPublicKey, _: Option<&NodePublicKey>| true)
    }

    /// A `CallMeMaybe` relayed to us over DERP is routed into the magicsock (its endpoints are
    /// learned via `add_peer_endpoints`) and is *not* returned for the dataplane, while an
    /// interleaved WireGuard data frame still reaches the dataplane unchanged. This is the
    /// npts-C2 inbound disco-over-DERP demux.
    #[tokio::test]
    async fn relayed_call_me_maybe_is_demuxed_not_forwarded() {
        // Our direct socket; the relayed CallMeMaybe is sealed *to* its disco key.
        let our_disco = DiscoPrivateKey::random();
        let our_node = ts_keys::NodePrivateKey::random().public_key();
        let sock = MagicSock::bind(localhost(), our_disco, our_node)
            .await
            .unwrap()
            .with_binding_verifier(allow_all());

        // A remote peer's CallMeMaybe carrying a public (pingable) candidate endpoint.
        let peer_disco = DiscoPrivateKey::random();
        let peer_ep: std::net::SocketAddr = "203.0.113.7:41641".parse().unwrap();
        let cmm =
            ts_magicsock::seal_call_me_maybe(&peer_disco, &our_disco.public_key(), &[peer_ep])
                .unwrap();

        // A normal WireGuard data frame (type byte 0x04, never the disco magic prefix).
        let wg = PacketMut::from(&[0x04u8, 0, 0, 0, 1, 2, 3, 4][..]);

        let batch = vec![PacketMut::from(&cmm[..]), wg];
        let to_dataplane = demux_relayed_disco(batch, Some(&sock));

        // The CallMeMaybe was consumed; only the data frame is forwarded.
        assert_eq!(
            to_dataplane.len(),
            1,
            "only the data frame reaches the dataplane"
        );
        assert_eq!(to_dataplane[0].as_ref(), &[0x04u8, 0, 0, 0, 1, 2, 3, 4]);

        // The peer's candidate endpoint was learned by the magicsock.
        assert_eq!(
            sock.candidate_addrs(&peer_disco.public_key()),
            vec![peer_ep],
            "the relayed CallMeMaybe's endpoint should be learned"
        );
    }

    /// With no direct socket installed (bind failed, or before the direct manager binds), every
    /// frame — disco or not — is forwarded to the dataplane unchanged (the prior behavior).
    #[tokio::test]
    async fn without_direct_sock_all_frames_forwarded() {
        let our_disco = DiscoPrivateKey::random();
        let peer_disco = DiscoPrivateKey::random();
        let cmm = ts_magicsock::seal_call_me_maybe(
            &peer_disco,
            &our_disco.public_key(),
            &["203.0.113.7:41641".parse().unwrap()],
        )
        .unwrap();
        let wg = PacketMut::from(&[0x04u8, 9, 9][..]);

        let batch = vec![PacketMut::from(&cmm[..]), wg];
        let out = demux_relayed_disco(batch, None);

        assert_eq!(
            out.len(),
            2,
            "no demux without a direct socket; all frames pass through"
        );
    }

    /// A disco *Ping* relayed over DERP must be dropped, never ponged: a relayed frame has no real
    /// UDP source to answer. It is consumed (kept off the dataplane) but learns no candidate path,
    /// even with an allow-all verifier installed — proving the drop is structural (CallMeMaybe-only
    /// at the relay), not a verifier rejection.
    #[tokio::test]
    async fn relayed_ping_is_dropped_not_ponged() {
        let our_disco = DiscoPrivateKey::random();
        let our_node = ts_keys::NodePrivateKey::random().public_key();
        let sock = MagicSock::bind(localhost(), our_disco, our_node)
            .await
            .unwrap()
            .with_binding_verifier(allow_all());

        let peer_disco = DiscoPrivateKey::random();
        let peer_node = ts_keys::NodePrivateKey::random().public_key();
        let tx = ts_magicsock::random_tx_id();
        let ping =
            ts_magicsock::seal_ping(&peer_disco, peer_node, &our_disco.public_key(), tx).unwrap();

        let out = demux_relayed_disco(vec![PacketMut::from(&ping[..])], Some(&sock));

        assert!(
            out.is_empty(),
            "a relayed disco Ping is consumed (kept off the dataplane)"
        );
        assert!(
            sock.candidate_addrs(&peer_disco.public_key()).is_empty(),
            "a relayed Ping must not learn a candidate path"
        );
    }

    /// A relayed `CallMeMaybe` advertising a forbidden candidate (loopback/private/IPv6) has that
    /// endpoint filtered by `is_pingable_candidate` before it can become a host-sourced ping
    /// target; only the public candidate offered alongside it is learned.
    #[tokio::test]
    async fn relayed_call_me_maybe_forbidden_endpoints_filtered() {
        let our_disco = DiscoPrivateKey::random();
        let our_node = ts_keys::NodePrivateKey::random().public_key();
        let sock = MagicSock::bind(localhost(), our_disco, our_node)
            .await
            .unwrap()
            .with_binding_verifier(allow_all());

        let peer_disco = DiscoPrivateKey::random();
        let loopback: std::net::SocketAddr = "127.0.0.1:41641".parse().unwrap();
        let private: std::net::SocketAddr = "10.1.2.3:41641".parse().unwrap();
        let public: std::net::SocketAddr = "203.0.113.50:41641".parse().unwrap();
        let cmm = ts_magicsock::seal_call_me_maybe(
            &peer_disco,
            &our_disco.public_key(),
            &[loopback, private, public],
        )
        .unwrap();

        let out = demux_relayed_disco(vec![PacketMut::from(&cmm[..])], Some(&sock));
        assert!(out.is_empty(), "the CallMeMaybe is consumed, not forwarded");

        assert_eq!(
            sock.candidate_addrs(&peer_disco.public_key()),
            vec![public],
            "only the public candidate survives the pingable-candidate filter"
        );
    }

    /// Build a [`ServerConnInfo`] fixture with the given v4 usage and STUN port; other fields are
    /// fixed placeholders the STUN filter never reads.
    fn server(
        ipv4: ts_derp::IpUsage<core::net::Ipv4Addr>,
        stun_port: Option<u16>,
    ) -> ts_derp::ServerConnInfo {
        ts_derp::ServerConnInfo {
            hostname: "derp.example".to_string(),
            ipv4,
            ipv6: ts_derp::IpUsage::Disable,
            tls_validation_config: ts_derp::TlsValidationConfig::CommonName {
                common_name: "derp.example".to_string(),
            },
            https_port: 443,
            stun_port,
            stun_only: false,
            supports_port_80: false,
        }
    }

    fn region(servers: Vec<ts_derp::ServerConnInfo>) -> DerpRegion {
        DerpRegion {
            info: ts_derp::RegionInfo {
                name: "r".to_string(),
                code: "r".to_string(),
                no_measure_no_home: false,
            },
            servers,
        }
    }

    /// The anti-DNS-leak gate: only FixedAddr-v4 servers with a STUN port are returned. A `UseDns`
    /// server (would require a DNS lookup = second egress) is skipped, a `Disable` server is
    /// skipped, and a FixedAddr server with no `stun_port` is skipped. A future change that lets
    /// `UseDns` through would reintroduce the DNS-leak path this test guards.
    #[test]
    fn stun_servers_from_regions_returns_only_fixed_v4_with_port() {
        let fixed = core::net::Ipv4Addr::new(203, 0, 113, 5);
        let r = region(vec![
            // Kept: FixedAddr v4 with a STUN port.
            server(ts_derp::IpUsage::FixedAddr(fixed), Some(3478)),
            // Skipped: UseDns (resolving it would be a DNS leak / second egress).
            server(ts_derp::IpUsage::UseDns, Some(3478)),
            // Skipped: explicitly disabled v4.
            server(ts_derp::IpUsage::Disable, Some(3478)),
            // Skipped: FixedAddr but STUN disabled (no stun_port).
            server(
                ts_derp::IpUsage::FixedAddr(core::net::Ipv4Addr::new(198, 51, 100, 9)),
                None,
            ),
        ]);

        let got = stun_servers_from_regions([&r]);
        assert_eq!(
            got,
            vec![SocketAddr::V4(SocketAddrV4::new(fixed, 3478))],
            "only the FixedAddr-v4-with-port server must be probed (UseDns/Disable/no-port skipped)"
        );
    }

    /// A derp map with no FixedAddr-v4 STUN servers yields an empty list (the prober then falls back
    /// to pong-harvest) rather than panicking or fabricating an address.
    #[test]
    fn stun_servers_from_regions_empty_when_no_fixed_v4() {
        let r = region(vec![
            server(ts_derp::IpUsage::UseDns, Some(3478)),
            server(ts_derp::IpUsage::Disable, None),
        ]);
        assert!(
            stun_servers_from_regions([&r]).is_empty(),
            "no FixedAddr-v4 STUN server => empty probe list"
        );
    }

    fn rid(n: u32) -> RegionId {
        RegionId(core::num::NonZeroU32::new(n).unwrap())
    }

    /// The relay-region resolution order for a peer whose netmap carried no home region:
    /// an observed route wins; absent that, our own home region is the last resort; absent both,
    /// there is no route (the peer is dropped, as before — the floor can't be lowered with no relay
    /// to offer). This is issue #24's connectivity floor.
    #[test]
    fn resolve_region_prefers_observed_then_home() {
        // All regions live for this case.
        let live = |_: RegionId| true;
        // Observed route wins even when a home region exists (the peer is provably listening there).
        assert_eq!(
            resolve_region_for_peer(Some(rid(7)), Some(rid(19)), live),
            Some(rid(7)),
            "an observed route must win over the home-region fallback"
        );
        // No observed route => fall back to our own home region.
        assert_eq!(
            resolve_region_for_peer(None, Some(rid(19)), live),
            Some(rid(19)),
            "with no observed route, relay via our own home region"
        );
        // Observed route, no home region yet => still routable.
        assert_eq!(
            resolve_region_for_peer(Some(rid(7)), None, live),
            Some(rid(7)),
            "an observed route is usable even before a home region is known"
        );
        // Neither => no route (unchanged drop behavior; nothing to relay through).
        assert_eq!(
            resolve_region_for_peer(None, None, live),
            None,
            "with neither an observed route nor a home region there is no relay route"
        );
    }

    /// A region with no live transport task is not offered: returning it would only produce a
    /// no-route drop a step later (the route updater's `TransportIdForRegion` would miss). The
    /// caller falls through and the periodic recompute retries once the region's task is up.
    #[test]
    fn resolve_region_skips_region_without_live_transport() {
        // Home region 19 is resolved but has no live transport task → not offered.
        assert_eq!(
            resolve_region_for_peer(None, Some(rid(19)), |_| false),
            None,
            "a home region with no live transport must not be returned"
        );
        // Even an observed route is gated on liveness (a region whose task died/not-yet-spawned).
        assert_eq!(
            resolve_region_for_peer(Some(rid(7)), Some(rid(19)), |r| r == rid(19)),
            None,
            "an observed region without a live transport is skipped even if home is live-but-not-chosen"
        );
        // The observed route is returned when it is the live one.
        assert_eq!(
            resolve_region_for_peer(Some(rid(7)), Some(rid(19)), |r| r == rid(7)),
            Some(rid(7)),
            "the observed route is returned when its transport is live"
        );
    }

    /// Observed routes are pruned to the live peer set: a peer that left the netmap must not keep
    /// pinning a stale region. Exercises the retain in the `PeerState` handler against a plain map
    /// (the handler's prune logic, independent of the actor).
    #[test]
    fn observed_routes_prune_to_live_peers() {
        let mut routes: HashMap<PeerId, RegionId> = HashMap::new();
        routes.insert(PeerId(1), rid(19));
        routes.insert(PeerId(2), rid(7));
        routes.insert(PeerId(3), rid(19));

        // Only peers 1 and 3 remain in the netmap.
        let live: std::collections::HashSet<PeerId> = [PeerId(1), PeerId(3)].into_iter().collect();
        routes.retain(|peer_id, _| live.contains(peer_id));

        assert_eq!(routes.get(&PeerId(1)), Some(&rid(19)), "live peer kept");
        assert_eq!(routes.get(&PeerId(3)), Some(&rid(19)), "live peer kept");
        assert!(
            !routes.contains_key(&PeerId(2)),
            "a peer no longer in the netmap must have its observed route pruned"
        );
    }

    /// Observed-route LEARNING (`record_observed_route`, the logic `run_derp_once` runs on each
    /// inbound DERP frame): a first frame records the route and returns `true`; a repeat frame from
    /// the same peer on the same region is a no-op returning `false` (no lock churn); a frame on a
    /// *different* region updates the route and returns `true` (a peer that moved regions). This is
    /// the producer side of issue #24's observed-route mechanism — previously only the resolver was
    /// tested.
    #[test]
    fn record_observed_route_learns_and_dedups() {
        let routes: RwLock<HashMap<PeerId, RegionId>> = RwLock::new(HashMap::new());

        // First frame from peer 1 on region 19 → learned.
        assert!(
            record_observed_route(&routes, PeerId(1), rid(19)),
            "first frame learns the route (returns changed=true)"
        );
        assert_eq!(poisoned_read(&routes).get(&PeerId(1)), Some(&rid(19)));

        // Repeat frame, same region → no change.
        assert!(
            !record_observed_route(&routes, PeerId(1), rid(19)),
            "a repeat frame on the same region is a no-op (returns changed=false, no lock write)"
        );

        // Frame on a different region → peer moved, route updated.
        assert!(
            record_observed_route(&routes, PeerId(1), rid(7)),
            "a frame on a new region updates the route (returns changed=true)"
        );
        assert_eq!(
            poisoned_read(&routes).get(&PeerId(1)),
            Some(&rid(7)),
            "the observed route follows the peer to its new region"
        );
    }

    /// The full observed-route round-trip: learning a route via `record_observed_route` makes
    /// `resolve_region_for_peer` (the consumer) return exactly that region for the peer — even with
    /// NO netmap home region and no own-home fallback. This pins issue #24's core promise (a peer we
    /// have heard from over DERP becomes reachable on that region) across the producer→consumer seam,
    /// which the two helpers' isolated tests don't cover together.
    #[test]
    fn observed_route_learn_then_resolve_round_trip() {
        let routes: RwLock<HashMap<PeerId, RegionId>> = RwLock::new(HashMap::new());
        let peer = PeerId(42);

        // Before any frame: no observed route, no home region → unreachable.
        let observed_before = poisoned_read(&routes).get(&peer).copied();
        assert_eq!(
            resolve_region_for_peer(observed_before, None, |_| true),
            None,
            "with neither an observed route nor a home region the peer has no relay route"
        );

        // A frame from the peer arrives on region 5 → learned.
        assert!(record_observed_route(&routes, peer, rid(5)));

        // Now the resolver returns region 5 from the observed route alone (home = None), provided
        // that region has a live transport.
        let observed_after = poisoned_read(&routes).get(&peer).copied();
        assert_eq!(
            resolve_region_for_peer(observed_after, None, |r| r == rid(5)),
            Some(rid(5)),
            "a learned observed route makes the peer reachable on that region with no netmap home"
        );
    }
}