zlayer_types/overlayd.rs
1//! IPC wire protocol between the main `zlayer` daemon and `zlayer-overlayd`.
2//!
3//! `zlayer-overlayd` is a standalone, long-lived daemon that owns every
4//! mechanism touching the overlay/network plane (the `WireGuard` device +
5//! adapter, peers, `AllowedIPs`/service subnets, IP allocation, DNS, NAT,
6//! Linux bridges + veth/netns attach, and the Windows HCN Internal network +
7//! endpoints). The main `zlayer` daemon keeps the cluster brain (Raft, the
8//! scheduler, the service registry, container/HCS process lifecycle) and
9//! drives overlayd over a length-prefixed-JSON IPC channel (a Unix domain
10//! socket on Unix, a named pipe on Windows).
11//!
12//! This module is that channel's **wire contract** — the only thing both
13//! sides must agree on. It lives in `zlayer-types` (a leaf crate) so the
14//! daemon, the overlayd server, and the overlayd client can all depend on it
15//! without a dependency cycle.
16//!
17//! ## Framing
18//!
19//! One connection multiplexes request/response and server→client event push.
20//! Each frame is a [`OverlaydFrame`] serialized as JSON and written with a
21//! `u32` little-endian length prefix (the framing itself lives in
22//! `zlayer-overlayd`'s transport module, not here). The main daemon sends
23//! [`OverlaydFrame::Request`]s each carrying a client-chosen `id`; overlayd
24//! replies with a [`OverlaydFrame::Response`] echoing that `id`, and may at
25//! any time push an unsolicited [`OverlaydFrame::Event`].
26//!
27//! ## Wire-type conventions
28//!
29//! - Windows HCN GUIDs cross the wire as **bare lowercase strings**
30//! (`aabbccdd-eeff-...`, no braces) — `windows::core::GUID` is not
31//! `serde`-serializable and `zlayer-types` must not depend on `windows`.
32//! - CIDRs cross as `String` (e.g. `"10.200.0.0/28"`); endpoints as `String`
33//! (`"host:port"`, kept textual so an unresolved/hostname endpoint survives).
34//! - Addresses use [`std::net::IpAddr`] (serde-serializable via `std`).
35
36use std::net::IpAddr;
37
38use serde::{Deserialize, Serialize};
39
40pub use crate::nat_wire::{NatCandidateWire, NatConfigSpec};
41pub use crate::overlay::{OverlayConfig, OverlayMode};
42
43/// Wire-protocol version. Bump on any breaking change to the frame/request/
44/// response/event shapes so a version-skewed daemon/overlayd pair can detect
45/// the mismatch instead of silently misparsing.
46pub const PROTOCOL_VERSION: u32 = 1;
47
48/// A multiplexed frame on the overlayd IPC connection.
49#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
50#[serde(tag = "frame", rename_all = "snake_case")]
51pub enum OverlaydFrame {
52 /// Main daemon → overlayd. `id` is echoed back on the matching response.
53 Request { id: u64, request: OverlaydRequest },
54 /// overlayd → main daemon, answering the request with the same `id`.
55 Response { id: u64, response: OverlaydResponse },
56 /// overlayd → main daemon, unsolicited (no `id`).
57 Event(OverlaydEvent),
58}
59
60/// Identifies the container overlayd must wire into the overlay. The agent
61/// owns the container's process/compute-system lifecycle and hands overlayd
62/// just enough to attach it.
63#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
64#[serde(tag = "platform", rename_all = "snake_case")]
65pub enum AttachHandle {
66 /// Linux: the container's PID. overlayd opens `/proc/<pid>/ns/net` and
67 /// creates the veth pair into that network namespace.
68 LinuxPid { pid: u32 },
69 /// Windows: the HCS container id (+ the IP the agent reserved, if any).
70 /// overlayd creates the HCN endpoint + per-container namespace on its HCN
71 /// Internal network and returns the bare-lowercase namespace GUID
72 /// ([`AttachResult::namespace_guid`]) for the agent to embed in the
73 /// compute-system document's `Container.Networking.Namespace`.
74 WindowsContainer {
75 container_id: String,
76 #[serde(default, skip_serializing_if = "Option::is_none")]
77 ip: Option<IpAddr>,
78 },
79 /// A guest that manages its own overlay interface (the macOS VZ-Linux VM
80 /// runtime). overlayd cannot enter the guest's netns (it is a VM, not a host
81 /// process), so instead of building a veth it **allocates the overlay
82 /// identity** — keypair, address, and the current peer set — registers the
83 /// generated public key in the mesh, and returns it as
84 /// [`OverlaydResponse::GuestConfig`]. The caller ships that config into the
85 /// guest (over vsock) where a kernel `WireGuard` device is brought up. `id` is
86 /// the opaque container id used to scope the allocation + the registered
87 /// peer so `DetachContainer` can release it.
88 GuestManaged { id: String },
89 /// Host-shared native macOS container (Seatbelt, native-VZ, libkrun): no
90 /// per-container netns/PID and no in-guest `WireGuard`. overlayd allocates a
91 /// distinct overlay `/32` from the node slice and adds it as a `utun` alias
92 /// so it is locally deliverable; the agent then forwards
93 /// `<overlay_ip>:port` to the container's local delivery address. Detach +
94 /// bookkeeping are keyed by `id` (like `GuestManaged`).
95 HostShared { id: String },
96}
97
98/// A request from the main daemon to overlayd.
99#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
100#[serde(tag = "op", rename_all = "snake_case")]
101pub enum OverlaydRequest {
102 /// Push this node's Raft id (cluster-brain context overlayd scopes by).
103 SetLocalNodeId { node_id: u64 },
104 /// Push this node's `WireGuard` public key (base64).
105 SetLocalWgPubkey { pubkey: String },
106
107 /// Bring up (or reuse) this node's base/global overlay. Idempotent: if the
108 /// overlay network already exists (recorded in overlayd's marker), it is
109 /// reused rather than recreated. This is the only place the base overlay
110 /// is created; it is torn down only on a full uninstall.
111 SetupGlobalOverlay {
112 deployment: String,
113 instance_id: String,
114 /// Full cluster CIDR, e.g. `"10.200.0.0/16"`.
115 cluster_cidr: String,
116 /// This node's per-node slice, e.g. `"10.200.0.0/28"`. `None` until the
117 /// leader assigns one.
118 #[serde(default, skip_serializing_if = "Option::is_none")]
119 slice_cidr: Option<String>,
120 wg_port: u16,
121 /// When true, a host-adapter (utun/Wintun) bringup failure is FATAL
122 /// instead of degrading to a VM-only overlay. Set by the daemon when the
123 /// node runs a host-shared runtime (macOS Seatbelt/native-VZ/libkrun)
124 /// where the host adapter IS the container data path. `#[serde(default)]`
125 /// keeps a pre-field daemon's payload decoding (false = old behavior).
126 #[serde(default)]
127 host_adapter_mandatory: bool,
128 /// Full NAT-traversal configuration for this node's overlay.
129 ///
130 /// `None` (or any omitted sub-field) means "no explicit NAT config" and
131 /// overlayd falls back to its built-in `NatConfig::default()`. This
132 /// replaced the previous `nat_enabled: bool`, which silently dropped the
133 /// operator's `--stun-server`/`--turn-server`/`--relay-server-bind`
134 /// flags (overlayd only ever saw the enabled toggle). `#[serde(default)]`
135 /// keeps a pre-`nat` daemon's payload (no `nat` field) decoding cleanly.
136 #[serde(default, skip_serializing_if = "Option::is_none")]
137 nat: Option<NatConfigSpec>,
138 },
139 /// Tear down the node's base overlay (e.g. on full uninstall).
140 TeardownGlobalOverlay,
141
142 /// Create the per-service overlay segment (Linux bridge / Windows HCN
143 /// Internal network) for `service`. Returns [`OverlaydResponse::BridgeName`].
144 SetupServiceOverlay { service: String, mode: OverlayMode },
145 /// Remove the per-service overlay segment.
146 TeardownServiceOverlay { service: String },
147
148 /// Allocate (or, with `ip` set on a later attach, validate) an overlay IP
149 /// from the node slice for a container on `service`.
150 AllocateIp { service: String, join_global: bool },
151 /// Return an overlay IP to the allocator.
152 ReleaseIp { ip: IpAddr },
153
154 /// Wire a container into the overlay. Returns [`OverlaydResponse::Attached`].
155 AttachContainer {
156 handle: AttachHandle,
157 service: String,
158 join_global: bool,
159 /// When true, overlayd reclaims the per-service bridge once the LAST
160 /// container detaches (ephemeral/per-job networks). When false, the bridge
161 /// persists across scale-to-0 (managed services). Defaults false for
162 /// back-compat with older clients.
163 #[serde(default)]
164 ephemeral: bool,
165 /// When `Some(network)`, this attach joins the named **isolated** network:
166 /// overlayd records the member in that network's membership set and
167 /// enforces Docker-style L3 isolation (the member reaches its own
168 /// network's members + the daemon node IP + egress, but NOT other
169 /// networks' members or arbitrary cluster overlay IPs). `None` = the flat
170 /// cluster mesh (today's behavior). Defaults `None` for older clients.
171 #[serde(default, skip_serializing_if = "Option::is_none")]
172 isolation_network: Option<String>,
173 #[serde(default, skip_serializing_if = "Option::is_none")]
174 dns_server: Option<IpAddr>,
175 #[serde(default, skip_serializing_if = "Option::is_none")]
176 dns_domain: Option<String>,
177 },
178 /// Tear down a container's overlay attachment and release its IP.
179 DetachContainer { handle: AttachHandle },
180
181 /// Add a `WireGuard` peer to the base overlay.
182 AddPeer {
183 #[serde(flatten)]
184 peer: PeerSpec,
185 #[serde(default)]
186 scope: PeerScope,
187 },
188 /// Remove a peer by its base64 public key.
189 RemovePeer {
190 pubkey: String,
191 #[serde(default)]
192 scope: PeerScope,
193 },
194 /// Plumb a service subnet into a peer's `AllowedIPs`.
195 AddAllowedIp {
196 pubkey: String,
197 cidr: String,
198 #[serde(default)]
199 scope: PeerScope,
200 },
201 /// Remove a service subnet from a peer's `AllowedIPs`.
202 RemoveAllowedIp {
203 pubkey: String,
204 cidr: String,
205 #[serde(default)]
206 scope: PeerScope,
207 },
208
209 /// Register an overlay DNS A/AAAA record.
210 RegisterDns { name: String, ip: IpAddr },
211 /// Remove an overlay DNS record.
212 UnregisterDns { name: String },
213
214 /// Write a macOS `/etc/resolver/<zone>` scoped-resolver file pointing at the
215 /// node's overlay DNS. Privileged (root-only path); the rootless daemon asks
216 /// the ROOT overlayd to perform it. macOS-only handler; no-op elsewhere.
217 WriteScopedResolver {
218 zone: String,
219 node_ip: IpAddr,
220 #[serde(default, skip_serializing_if = "Option::is_none")]
221 port: Option<u16>,
222 },
223 /// Remove a macOS `/etc/resolver/<zone>` scoped-resolver file.
224 RemoveScopedResolver { zone: String },
225
226 /// Reclaim orphaned per-service host bridges (and their stale device/
227 /// container veths) that no live deployment still owns. The daemon computes
228 /// `live_bridge_names` from storage — the full set of `zl-…-b` bridge names
229 /// every currently-restored service SHOULD own — and overlayd deletes every
230 /// `zl-…-b` bridge link NOT in that set (plus releases its subnet/AllowedIPs
231 /// when recoverable), so a bridge left behind by a crashed/forgotten
232 /// deployment is swept on the next daemon startup. Names are passed (rather
233 /// than overlayd reaching into storage) to keep overlayd storage-free.
234 /// Returns [`OverlaydResponse::PrunedBridges`].
235 PruneOrphanBridges { live_bridge_names: Vec<String> },
236
237 /// Snapshot overlay state for diagnostics. Returns [`OverlaydResponse::Status`].
238 Status,
239 /// Run one NAT-traversal maintenance tick (probe/refresh endpoints).
240 NatTick,
241 /// Snapshot the live NAT-traversal state (local candidates, per-peer
242 /// connection types, last refresh). Returns [`OverlaydResponse::NatStatus`].
243 NatStatus,
244 /// Ask overlayd to shut down gracefully (drops the adapter).
245 Shutdown,
246}
247
248/// overlayd's answer to an [`OverlaydRequest`].
249#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
250#[serde(tag = "result", rename_all = "snake_case")]
251pub enum OverlaydResponse {
252 /// Generic success with no payload.
253 Ok,
254 /// An allocated/validated overlay IP (`AllocateIp`).
255 Ip { ip: IpAddr },
256 /// A completed container attach.
257 Attached(AttachResult),
258 /// The overlay identity for a guest-managed attach
259 /// ([`AttachHandle::GuestManaged`]): the keypair, allocated address, and the
260 /// peer set the guest should configure on its own `WireGuard` device.
261 GuestConfig(GuestOverlayConfig),
262 /// The interface/bridge/network name created (`SetupServiceOverlay`,
263 /// `SetupGlobalOverlay`).
264 BridgeName { name: String },
265 /// A diagnostics snapshot (`Status`).
266 Status(StatusSnapshot),
267 /// A live NAT-traversal snapshot (`NatStatus`).
268 NatStatus(NatStatusWire),
269 /// The orphaned bridges/veths reclaimed by `PruneOrphanBridges`.
270 PrunedBridges { reclaimed: Vec<String> },
271 /// A dedicated per-service overlay device's identity (`SetupServiceOverlay`
272 /// in Dedicated mode). Not yet produced by the server — the server still
273 /// returns [`OverlaydResponse::BridgeName`] for now; this variant is the
274 /// wire contract for a later task that switches Dedicated setup over.
275 ServiceOverlay(ServiceOverlayInfo),
276 /// The request failed; `message` is a human-readable reason.
277 Err { message: String },
278}
279
280/// Identity of a dedicated per-service overlay device, reported by
281/// `SetupServiceOverlay` once Dedicated mode is wired up. Shared-mode setups
282/// leave the `wg_*`/`overlay_ip`/`subnet` fields `None`.
283#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
284pub struct ServiceOverlayInfo {
285 pub name: String,
286 pub mode: crate::overlay::OverlayMode,
287 #[serde(default, skip_serializing_if = "Option::is_none")]
288 pub wg_public_key: Option<String>,
289 #[serde(default, skip_serializing_if = "Option::is_none")]
290 pub wg_port: Option<u16>,
291 #[serde(default, skip_serializing_if = "Option::is_none")]
292 pub overlay_ip: Option<std::net::IpAddr>,
293 #[serde(default, skip_serializing_if = "Option::is_none")]
294 pub subnet: Option<String>,
295}
296
297/// Result of [`OverlaydRequest::AttachContainer`].
298#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
299pub struct AttachResult {
300 /// The container's overlay IP.
301 pub ip: IpAddr,
302 /// Windows only: the bare-lowercase HCN namespace GUID the agent embeds in
303 /// the compute-system document. `None` on Linux (no HCN namespace).
304 #[serde(default, skip_serializing_if = "Option::is_none")]
305 pub namespace_guid: Option<String>,
306}
307
308/// Overlay identity returned for a guest-managed attach
309/// ([`AttachHandle::GuestManaged`] → [`OverlaydResponse::GuestConfig`]).
310///
311/// The host allocated the address from the node slice, generated the keypair,
312/// and registered `public_key` in the mesh (so peers route to the guest). The
313/// caller ships everything except `public_key` into the guest; `public_key` is
314/// echoed back so the caller can record/deregister the peer it represents.
315#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
316pub struct GuestOverlayConfig {
317 /// The guest's allocated overlay address.
318 pub overlay_ip: IpAddr,
319 /// Prefix length of the overlay network (interface address + on-link route).
320 pub prefix_len: u8,
321 /// Base64 `WireGuard` private key for the guest's overlay endpoint.
322 pub private_key: String,
323 /// Base64 `WireGuard` public key matching `private_key` (registered in the
324 /// mesh by overlayd; echoed for the caller's bookkeeping).
325 pub public_key: String,
326 /// UDP port the guest's `WireGuard` device should listen on.
327 pub listen_port: u16,
328 /// The peers the guest should configure (other nodes/containers).
329 pub peers: Vec<PeerSpec>,
330 /// Overlay DNS resolver IP for the container, if any.
331 #[serde(default, skip_serializing_if = "Option::is_none")]
332 pub dns_server: Option<IpAddr>,
333 /// Overlay DNS search domain, if any.
334 #[serde(default, skip_serializing_if = "Option::is_none")]
335 pub dns_domain: Option<String>,
336}
337
338/// Which overlay device a peer / `AllowedIP` op targets. `Global` (default, and
339/// the only value pre-Dedicated senders emit) = the single cluster transport.
340/// `Service` = that service's dedicated per-service transport.
341#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
342#[serde(tag = "scope", rename_all = "snake_case")]
343pub enum PeerScope {
344 #[default]
345 Global,
346 Service {
347 service: String,
348 },
349}
350
351/// A `WireGuard` peer to add to the base overlay. Mirrors
352/// `zlayer_overlay::PeerInfo` but with wire-safe field types.
353#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
354pub struct PeerSpec {
355 /// base64 `WireGuard` public key.
356 pub public_key: String,
357 /// `host:port` (textual so an unresolved/hostname endpoint survives).
358 pub endpoint: String,
359 /// Comma-separated CIDR list (e.g. `"10.200.0.5/32,10.200.1.0/24"`).
360 pub allowed_ips: String,
361 /// Persistent-keepalive interval, in seconds (0 = disabled).
362 pub persistent_keepalive_secs: u64,
363 /// NAT-traversal candidates the peer advertised at join time (host /
364 /// server-reflexive / relay addresses it can be reached on). overlayd feeds
365 /// these into `NatTraversal::connect_to_peer` to hole-punch / relay toward
366 /// the peer when a direct endpoint doesn't establish a handshake. Empty for
367 /// a pre-NAT sender; `#[serde(default)]` keeps such payloads decoding.
368 #[serde(default, skip_serializing_if = "Vec::is_empty")]
369 pub candidates: Vec<NatCandidateWire>,
370}
371
372/// An unsolicited notification pushed from overlayd to the main daemon.
373#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
374#[serde(tag = "event", rename_all = "snake_case")]
375pub enum OverlaydEvent {
376 /// A peer's liveness changed (handshake seen / lost).
377 PeerHealthChanged { pubkey: String, healthy: bool },
378 /// NAT traversal moved a peer to a new endpoint.
379 NatEndpointChanged { pubkey: String, endpoint: String },
380}
381
382/// Diagnostics snapshot returned by [`OverlaydRequest::Status`].
383#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
384pub struct StatusSnapshot {
385 /// Base overlay interface name (e.g. `"zl-overlay0"`), if up.
386 #[serde(default, skip_serializing_if = "Option::is_none")]
387 pub interface: Option<String>,
388 /// This node's overlay IP, if assigned.
389 #[serde(default, skip_serializing_if = "Option::is_none")]
390 pub node_ip: Option<IpAddr>,
391 /// This node's `WireGuard` public key (base64), if up.
392 #[serde(default, skip_serializing_if = "Option::is_none")]
393 pub public_key: Option<String>,
394 /// Full cluster CIDR.
395 #[serde(default, skip_serializing_if = "Option::is_none")]
396 pub overlay_cidr: Option<String>,
397 /// This node's per-node slice CIDR.
398 #[serde(default, skip_serializing_if = "Option::is_none")]
399 pub slice_cidr: Option<String>,
400 /// Number of base-overlay peers.
401 pub peer_count: u32,
402 /// Number of per-service overlays set up on this node.
403 pub service_count: u32,
404 /// Per-peer status.
405 #[serde(default, skip_serializing_if = "Vec::is_empty")]
406 pub peers: Vec<PeerStatus>,
407 /// Per dedicated per-service overlay device status. Empty unless one or
408 /// more services run in `OverlayMode::Dedicated` on this node.
409 #[serde(default, skip_serializing_if = "Vec::is_empty")]
410 pub dedicated_services: Vec<DedicatedServiceStatus>,
411}
412
413/// Status of a single dedicated per-service overlay device within a
414/// [`StatusSnapshot`].
415#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
416pub struct DedicatedServiceStatus {
417 pub service: String,
418 pub interface: String,
419 pub public_key: String,
420 pub listen_port: u16,
421 pub overlay_ip: std::net::IpAddr,
422 pub subnet: String,
423 pub peer_count: u32,
424}
425
426/// Per-peer status within a [`StatusSnapshot`].
427#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
428pub struct PeerStatus {
429 pub public_key: String,
430 pub endpoint: String,
431 pub allowed_ips: String,
432 /// Last successful handshake, Unix seconds; `0` if never.
433 pub last_handshake_unix_secs: i64,
434}
435
436/// Live NAT-traversal snapshot returned by [`OverlaydRequest::NatStatus`].
437///
438/// Wire mirror of `zlayer_overlay::NatStatusSnapshot` (which `zlayer-types`
439/// cannot reference — it lives behind the `nat` feature). The agent shim
440/// converts this back into `NatStatusSnapshot` for the API layer.
441#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
442pub struct NatStatusWire {
443 /// Locally gathered ICE candidates.
444 #[serde(default, skip_serializing_if = "Vec::is_empty")]
445 pub candidates: Vec<NatCandidateWire>,
446 /// Per-peer NAT connectivity entries.
447 #[serde(default, skip_serializing_if = "Vec::is_empty")]
448 pub peers: Vec<NatPeerWire>,
449 /// Unix-epoch seconds of the last successful candidate gather / STUN refresh.
450 #[serde(default)]
451 pub last_refresh: u64,
452}
453
454/// Per-peer NAT connectivity entry within a [`NatStatusWire`].
455#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
456pub struct NatPeerWire {
457 /// Peer node id, or the base64 public key when no node id is available.
458 pub node_id: String,
459 /// Connection type as a lowercase string
460 /// (`"direct"` / `"hole-punched"` / `"relayed"` / `"unreachable"`).
461 pub connection_type: String,
462 /// Selected remote endpoint (`host:port`), if one has been negotiated.
463 #[serde(default, skip_serializing_if = "Option::is_none")]
464 pub remote_endpoint: Option<String>,
465}
466
467#[cfg(test)]
468mod tests {
469 use super::*;
470 use crate::nat_wire::{RelayServerSpec, TurnServerSpec};
471
472 /// A frame round-trips through JSON unchanged (the core wire guarantee).
473 fn roundtrip(frame: &OverlaydFrame) {
474 let json = serde_json::to_string(frame).expect("serialize");
475 let back: OverlaydFrame = serde_json::from_str(&json).expect("deserialize");
476 assert_eq!(frame, &back, "frame must round-trip; json was {json}");
477 }
478
479 #[test]
480 fn request_frames_round_trip() {
481 roundtrip(&OverlaydFrame::Request {
482 id: 1,
483 request: OverlaydRequest::SetupGlobalOverlay {
484 deployment: "prod".into(),
485 instance_id: "42".into(),
486 cluster_cidr: "10.200.0.0/16".into(),
487 slice_cidr: Some("10.200.0.0/28".into()),
488 wg_port: 51820,
489 host_adapter_mandatory: true,
490 nat: Some(NatConfigSpec {
491 enabled: true,
492 stun_servers: vec!["stun.l.google.com:19302".into()],
493 turn_servers: vec![TurnServerSpec {
494 addr: "turn.example.com:3478".into(),
495 username: "u".into(),
496 credential: "p".into(),
497 }],
498 hole_punch_timeout_secs: 15,
499 stun_refresh_interval_secs: 60,
500 max_candidate_pairs: 10,
501 relay_server: Some(RelayServerSpec {
502 listen_port: 3478,
503 external_addr: "1.2.3.4:3478".into(),
504 max_sessions: 100,
505 auth_credential: Some("cluster-secret".into()),
506 }),
507 }),
508 },
509 });
510 roundtrip(&OverlaydFrame::Request {
511 id: 2,
512 request: OverlaydRequest::AttachContainer {
513 handle: AttachHandle::WindowsContainer {
514 container_id: "ctr-abc".into(),
515 ip: Some("10.200.0.5".parse().unwrap()),
516 },
517 service: "web".into(),
518 join_global: false,
519 ephemeral: false,
520 isolation_network: None,
521 dns_server: Some("10.200.0.1".parse().unwrap()),
522 dns_domain: Some("overlay".into()),
523 },
524 });
525 roundtrip(&OverlaydFrame::Request {
526 id: 3,
527 request: OverlaydRequest::AttachContainer {
528 handle: AttachHandle::LinuxPid { pid: 4242 },
529 service: "web".into(),
530 join_global: true,
531 ephemeral: false,
532 isolation_network: Some("job-net".into()),
533 dns_server: None,
534 dns_domain: None,
535 },
536 });
537 }
538
539 #[test]
540 fn response_and_event_frames_round_trip() {
541 roundtrip(&OverlaydFrame::Response {
542 id: 2,
543 response: OverlaydResponse::Attached(AttachResult {
544 ip: "10.200.0.5".parse().unwrap(),
545 namespace_guid: Some("aabbccdd-eeff-0011-2233-445566778899".into()),
546 }),
547 });
548 roundtrip(&OverlaydFrame::Response {
549 id: 9,
550 response: OverlaydResponse::Err {
551 message: "no slice assigned".into(),
552 },
553 });
554 roundtrip(&OverlaydFrame::Event(OverlaydEvent::PeerHealthChanged {
555 pubkey: "base64key".into(),
556 healthy: false,
557 }));
558 }
559
560 #[test]
561 fn prune_orphan_bridges_round_trips() {
562 roundtrip(&OverlaydFrame::Request {
563 id: 30,
564 request: OverlaydRequest::PruneOrphanBridges {
565 live_bridge_names: vec!["zl-prod-0-web-b".into(), "zl-prod-0-api-b".into()],
566 },
567 });
568 roundtrip(&OverlaydFrame::Response {
569 id: 30,
570 response: OverlaydResponse::PrunedBridges {
571 reclaimed: vec!["zl-1ca4568944-b".into(), "zl-81c6bc17c7-b".into()],
572 },
573 });
574 }
575
576 #[test]
577 fn status_snapshot_round_trips_and_defaults() {
578 roundtrip(&OverlaydFrame::Response {
579 id: 7,
580 response: OverlaydResponse::Status(StatusSnapshot {
581 interface: Some("zl-overlay0".into()),
582 node_ip: Some("10.200.0.1".parse().unwrap()),
583 peer_count: 2,
584 service_count: 1,
585 peers: vec![PeerStatus {
586 public_key: "k".into(),
587 endpoint: "1.2.3.4:51820".into(),
588 allowed_ips: "10.200.0.2/32".into(),
589 last_handshake_unix_secs: 0,
590 }],
591 ..StatusSnapshot::default()
592 }),
593 });
594 }
595
596 fn sample_peer() -> PeerSpec {
597 PeerSpec {
598 public_key: "base64key".into(),
599 endpoint: "1.2.3.4:51820".into(),
600 allowed_ips: "10.200.0.2/32".into(),
601 persistent_keepalive_secs: 25,
602 candidates: vec![
603 NatCandidateWire {
604 candidate_type: "host".into(),
605 address: "192.168.1.5:51820".into(),
606 priority: 100,
607 },
608 NatCandidateWire {
609 candidate_type: "server-reflexive".into(),
610 address: "203.0.113.5:51820".into(),
611 priority: 50,
612 },
613 ],
614 }
615 }
616
617 #[test]
618 fn peer_ops_round_trip_both_scopes() {
619 // AddPeer, global (default) + service scope.
620 roundtrip(&OverlaydFrame::Request {
621 id: 1,
622 request: OverlaydRequest::AddPeer {
623 peer: sample_peer(),
624 scope: PeerScope::Global,
625 },
626 });
627 roundtrip(&OverlaydFrame::Request {
628 id: 2,
629 request: OverlaydRequest::AddPeer {
630 peer: sample_peer(),
631 scope: PeerScope::Service {
632 service: "web".into(),
633 },
634 },
635 });
636 // RemovePeer.
637 roundtrip(&OverlaydFrame::Request {
638 id: 3,
639 request: OverlaydRequest::RemovePeer {
640 pubkey: "k".into(),
641 scope: PeerScope::Global,
642 },
643 });
644 roundtrip(&OverlaydFrame::Request {
645 id: 4,
646 request: OverlaydRequest::RemovePeer {
647 pubkey: "k".into(),
648 scope: PeerScope::Service {
649 service: "web".into(),
650 },
651 },
652 });
653 // AddAllowedIp.
654 roundtrip(&OverlaydFrame::Request {
655 id: 5,
656 request: OverlaydRequest::AddAllowedIp {
657 pubkey: "k".into(),
658 cidr: "10.200.1.0/24".into(),
659 scope: PeerScope::Global,
660 },
661 });
662 roundtrip(&OverlaydFrame::Request {
663 id: 6,
664 request: OverlaydRequest::AddAllowedIp {
665 pubkey: "k".into(),
666 cidr: "10.200.1.0/24".into(),
667 scope: PeerScope::Service {
668 service: "web".into(),
669 },
670 },
671 });
672 // RemoveAllowedIp.
673 roundtrip(&OverlaydFrame::Request {
674 id: 7,
675 request: OverlaydRequest::RemoveAllowedIp {
676 pubkey: "k".into(),
677 cidr: "10.200.1.0/24".into(),
678 scope: PeerScope::Global,
679 },
680 });
681 roundtrip(&OverlaydFrame::Request {
682 id: 8,
683 request: OverlaydRequest::RemoveAllowedIp {
684 pubkey: "k".into(),
685 cidr: "10.200.1.0/24".into(),
686 scope: PeerScope::Service {
687 service: "web".into(),
688 },
689 },
690 });
691 }
692
693 #[test]
694 fn add_peer_without_scope_defaults_to_global() {
695 // A pre-Dedicated sender emits no `scope` field. The frame is tagged
696 // `frame: "request"`, the request `op: "add_peer"`, and `PeerSpec` is
697 // flattened so its fields sit at the request level.
698 let json = r#"{
699 "frame": "request",
700 "id": 11,
701 "request": {
702 "op": "add_peer",
703 "public_key": "base64key",
704 "endpoint": "1.2.3.4:51820",
705 "allowed_ips": "10.200.0.2/32",
706 "persistent_keepalive_secs": 25
707 }
708 }"#;
709 let frame: OverlaydFrame = serde_json::from_str(json).expect("deserialize");
710 match frame {
711 OverlaydFrame::Request {
712 request: OverlaydRequest::AddPeer { scope, peer },
713 ..
714 } => {
715 assert_eq!(scope, PeerScope::Global);
716 assert_eq!(peer.public_key, "base64key");
717 // A pre-NAT sender omits `candidates`; it must default to empty.
718 assert!(peer.candidates.is_empty());
719 }
720 other => panic!("expected AddPeer request, got {other:?}"),
721 }
722 }
723
724 /// `SetupGlobalOverlay` from a pre-`nat` daemon omits the `nat` field
725 /// entirely; it must deserialize to `nat: None` (overlayd then uses its
726 /// built-in `NatConfig::default()`), proving the wire change is backward
727 /// compatible.
728 #[test]
729 fn setup_global_overlay_without_nat_field_defaults_to_none() {
730 let json = r#"{
731 "frame": "request",
732 "id": 12,
733 "request": {
734 "op": "setup_global_overlay",
735 "deployment": "prod",
736 "instance_id": "1",
737 "cluster_cidr": "10.200.0.0/16",
738 "wg_port": 51820
739 }
740 }"#;
741 let frame: OverlaydFrame = serde_json::from_str(json).expect("deserialize");
742 match frame {
743 OverlaydFrame::Request {
744 request: OverlaydRequest::SetupGlobalOverlay { nat, .. },
745 ..
746 } => assert!(nat.is_none(), "missing nat field must default to None"),
747 other => panic!("expected SetupGlobalOverlay, got {other:?}"),
748 }
749 }
750
751 /// An `AddPeer` carrying NAT candidates round-trips with the candidate list
752 /// intact (the join-time candidate exchange the connect-half relies on).
753 #[test]
754 fn add_peer_with_candidates_round_trips() {
755 roundtrip(&OverlaydFrame::Request {
756 id: 40,
757 request: OverlaydRequest::AddPeer {
758 peer: sample_peer(),
759 scope: PeerScope::Global,
760 },
761 });
762 }
763
764 /// The `NatStatus` request and its `NatStatus` response round-trip, including
765 /// candidates + per-peer connection types.
766 #[test]
767 fn nat_status_request_and_response_round_trip() {
768 roundtrip(&OverlaydFrame::Request {
769 id: 41,
770 request: OverlaydRequest::NatStatus,
771 });
772 roundtrip(&OverlaydFrame::Response {
773 id: 41,
774 response: OverlaydResponse::NatStatus(NatStatusWire {
775 candidates: vec![NatCandidateWire {
776 candidate_type: "host".into(),
777 address: "192.168.1.5:51820".into(),
778 priority: 100,
779 }],
780 peers: vec![NatPeerWire {
781 node_id: "base64peerkey".into(),
782 connection_type: "hole-punched".into(),
783 remote_endpoint: Some("203.0.113.9:51820".into()),
784 }],
785 last_refresh: 1_700_000_000,
786 }),
787 });
788 // Empty snapshot round-trips too (default shape).
789 roundtrip(&OverlaydFrame::Response {
790 id: 42,
791 response: OverlaydResponse::NatStatus(NatStatusWire::default()),
792 });
793 }
794
795 #[test]
796 fn service_overlay_response_round_trips_both_shapes() {
797 // Shared shape: identity fields are None.
798 roundtrip(&OverlaydFrame::Response {
799 id: 20,
800 response: OverlaydResponse::ServiceOverlay(ServiceOverlayInfo {
801 name: "web".into(),
802 mode: crate::overlay::OverlayMode::Shared,
803 wg_public_key: None,
804 wg_port: None,
805 overlay_ip: None,
806 subnet: None,
807 }),
808 });
809 // Dedicated shape: all identity fields populated.
810 roundtrip(&OverlaydFrame::Response {
811 id: 21,
812 response: OverlaydResponse::ServiceOverlay(ServiceOverlayInfo {
813 name: "web".into(),
814 mode: crate::overlay::OverlayMode::Dedicated,
815 wg_public_key: Some("svc-key".into()),
816 wg_port: Some(51821),
817 overlay_ip: Some("10.201.0.1".parse().unwrap()),
818 subnet: Some("10.201.0.0/24".into()),
819 }),
820 });
821 }
822
823 #[test]
824 fn status_snapshot_with_dedicated_service_round_trips() {
825 roundtrip(&OverlaydFrame::Response {
826 id: 22,
827 response: OverlaydResponse::Status(StatusSnapshot {
828 interface: Some("zl-overlay0".into()),
829 node_ip: Some("10.200.0.1".parse().unwrap()),
830 peer_count: 1,
831 service_count: 1,
832 dedicated_services: vec![DedicatedServiceStatus {
833 service: "web".into(),
834 interface: "zl-svc-web0".into(),
835 public_key: "svc-key".into(),
836 listen_port: 51821,
837 overlay_ip: "10.201.0.1".parse().unwrap(),
838 subnet: "10.201.0.0/24".into(),
839 peer_count: 3,
840 }],
841 ..StatusSnapshot::default()
842 }),
843 });
844 }
845}