zlayer_agent/overlay_manager.rs
1//! Thin overlayd client shim.
2//!
3//! Historically `OverlayManager` owned every mechanism touching the
4//! overlay/network plane (the cluster `WireGuard` transport, per-service Linux
5//! bridges, veth/netns attach, the Windows HCN Internal network + endpoints,
6//! IPAM, DNS, NAT). All of that machinery was migrated wholesale into the
7//! standalone `zlayer-overlayd` daemon (`crates/zlayer-overlayd/src/server.rs`).
8//!
9//! What remains here is a **client shim**: it keeps only cluster-brain / cached
10//! state (deployment name, instance id, local node id, local wg pubkey, and
11//! cached status values such as `node_ip`/`dns`/`cidr`) and forwards every
12//! mechanical operation to overlayd over the IPC client
13//! [`zlayer_overlayd::OverlaydClient`]. Every public method keeps the exact
14//! signature it had before the migration so existing callers compile unchanged;
15//! the body simply builds the matching [`OverlaydRequest`], issues
16//! `client.call(req)`, and maps the response.
17//!
18//! On Windows, the manager additionally maintains a small `hcn_cleanup` map
19//! (HCN namespace GUID -> (`service_name`, `allocated_ip`)) so that
20//! agent-side bookkeeping for autoclean attaches survives even though the
21//! authoritative HCN state lives in overlayd. The map is populated on
22//! `attach_container_hcn(autoclean = true)` and drained on
23//! `detach_container_hcn`.
24
25use crate::error::AgentError;
26use ipnetwork::IpNetwork;
27use std::collections::hash_map::DefaultHasher;
28use std::hash::{Hash, Hasher};
29use std::net::{IpAddr, SocketAddr};
30use std::path::PathBuf;
31use std::sync::Arc;
32use tokio::sync::Mutex;
33use zlayer_overlay::{NatConfig, NatPeerSnapshot, NatStatusSnapshot};
34use zlayer_overlayd::OverlaydClient;
35use zlayer_paths::ZLayerDirs;
36use zlayer_types::overlayd::{
37 AttachHandle, OverlaydRequest, OverlaydResponse, PeerSpec, StatusSnapshot,
38};
39
40/// Maximum length for Linux network interface names (IFNAMSIZ - 1 for null terminator).
41const MAX_IFNAME_LEN: usize = 15;
42
43/// Generate a Linux-safe interface name guaranteed to be <= 15 chars.
44///
45/// Joins the `parts` with `-` after a `"zl-"` prefix and appends `-{suffix}` if non-empty.
46/// When the result exceeds 15 characters, a deterministic hash of all parts is used instead
47/// to keep the name unique and within the kernel limit.
48///
49/// Kept in the agent (and re-exported from the crate root) because callers
50/// outside the overlay machinery — notably `runtimes/wsl2_delegate.rs` — still
51/// use it for deterministic naming. overlayd has its own private copy for the
52/// names it generates server-side; the two are identical by construction.
53#[must_use]
54pub fn make_interface_name(parts: &[&str], suffix: &str) -> String {
55 let base = format!("zl-{}", parts.join("-"));
56 let candidate = if suffix.is_empty() {
57 base
58 } else {
59 format!("{base}-{suffix}")
60 };
61
62 if candidate.len() <= MAX_IFNAME_LEN {
63 return candidate;
64 }
65
66 // Name is too long -- produce a deterministic hash-based name.
67 let mut hasher = DefaultHasher::new();
68 for part in parts {
69 part.hash(&mut hasher);
70 }
71 suffix.hash(&mut hasher);
72 let hash = format!("{:x}", hasher.finish());
73
74 if suffix.is_empty() {
75 // "zl-" (3) + up to 12 hex chars = 15
76 let budget = MAX_IFNAME_LEN - 3;
77 format!("zl-{}", &hash[..budget.min(hash.len())])
78 } else {
79 // "zl-" (3) + hash + "-" (1) + suffix
80 let suffix_cost = 1 + suffix.len(); // "-" + suffix
81 let hash_budget = MAX_IFNAME_LEN.saturating_sub(3 + suffix_cost);
82 if hash_budget == 0 {
83 // Suffix itself is extremely long -- just hash everything
84 let budget = MAX_IFNAME_LEN - 3;
85 format!("zl-{}", &hash[..budget.min(hash.len())])
86 } else {
87 format!("zl-{}-{}", &hash[..hash_budget.min(hash.len())], suffix)
88 }
89 }
90}
91
92/// Map a `zlayer_overlayd` client error into the agent's error type.
93fn map_overlayd_err(e: &zlayer_overlayd::OverlaydError) -> AgentError {
94 AgentError::Network(format!("overlayd: {e}"))
95}
96
97/// Convert a live [`zlayer_overlay::PeerInfo`] into the wire-safe [`PeerSpec`]
98/// the overlayd IPC contract expects. Shared by every `add_*_peer` shim so the
99/// global and per-service paths build identical specs.
100fn peer_spec_from(peer: &zlayer_overlay::PeerInfo) -> PeerSpec {
101 PeerSpec {
102 public_key: peer.public_key.clone(),
103 endpoint: peer.endpoint.to_string(),
104 allowed_ips: peer.allowed_ips.clone(),
105 persistent_keepalive_secs: peer.persistent_keepalive_interval.as_secs(),
106 }
107}
108
109/// Manages overlay networks for a deployment by delegating all mechanics to the
110/// `zlayer-overlayd` daemon.
111///
112/// This struct holds only cluster-brain / cached state; the actual overlay
113/// machinery lives in overlayd and is reached through [`OverlayManager::client`].
114pub struct OverlayManager {
115 /// Deployment name (used for network naming).
116 deployment: String,
117 /// Per-daemon-process disambiguator included in overlay link names. Stable
118 /// for the daemon's lifetime; forwarded to overlayd in `SetupGlobalOverlay`.
119 instance_id: String,
120 /// Root data directory; used to resolve the overlayd IPC socket path.
121 data_dir: PathBuf,
122 /// Lazily-connected overlayd IPC client. Wrapped in an `Arc<Mutex<_>>` so
123 /// the manager can be shared behind an `Arc<RwLock<_>>` and still serialize
124 /// request/response round-trips on the single framed connection.
125 client: Mutex<Option<Arc<Mutex<OverlaydClient>>>>,
126 /// Local raft node id, forwarded to overlayd via `SetLocalNodeId`.
127 local_node_id: u64,
128 /// This node's cluster `WireGuard` public key (base64), forwarded to
129 /// overlayd via `SetLocalWgPubkey`. Behind a `Mutex` because the setter
130 /// takes `&self` (callers hold only a read guard at that point).
131 local_wg_pubkey: Mutex<Option<String>>,
132 /// `WireGuard` listen port for the overlay network.
133 overlay_port: u16,
134 /// Cached node overlay IP, populated from `SetupGlobalOverlay`/`Status`.
135 node_ip: Option<IpAddr>,
136 /// Cached global overlay interface name.
137 global_interface: Option<String>,
138 /// Cached full cluster CIDR.
139 cluster_cidr: Option<IpNetwork>,
140 /// Cached per-node slice CIDR.
141 slice_cidr: Option<IpNetwork>,
142 /// Cached overlay DNS server address.
143 dns_server_addr: Option<SocketAddr>,
144 /// Cached overlay DNS zone domain.
145 dns_domain: Option<String>,
146 /// NAT traversal configuration. overlayd owns the live NAT orchestrator;
147 /// this is cached so the daemon can decide whether to drive `NatTick`.
148 nat_config: Option<NatConfig>,
149 /// Override for the `WireGuard` UAPI socket directory. overlayd owns the
150 /// real transport, so this is retained only for API/diagnostic parity.
151 uapi_sock_dir: Option<PathBuf>,
152 /// Map of HCN namespace GUID -> (`service_name`, `allocated_ip`) for autoclean.
153 /// When a Windows container is attached with `autoclean = true`, its entry
154 /// is inserted here; `detach_container_hcn` removes it. overlayd is the
155 /// authoritative owner of the HCN namespace/endpoint state, but the agent
156 /// keeps this side-map so it can answer "what attachments do I still need
157 /// to release on shutdown?" without an IPC round-trip per query.
158 #[cfg(target_os = "windows")]
159 hcn_cleanup: std::sync::Arc<
160 tokio::sync::Mutex<
161 std::collections::HashMap<windows::core::GUID, (String, std::net::IpAddr)>,
162 >,
163 >,
164}
165
166impl OverlayManager {
167 /// Create a new overlay manager for a deployment (legacy single-node path).
168 ///
169 /// Uses the default cluster `/16`. Prefer [`OverlayManager::with_slice`] for
170 /// cluster deployments. The overlayd IPC client is connected lazily on first
171 /// use (via the socket under the system-default data dir).
172 ///
173 /// # Errors
174 /// Infallible today; the `Result` is preserved for ABI parity with callers.
175 ///
176 /// # Panics
177 /// Panics only if the compile-time-constant default CIDR `10.200.0.0/16`
178 /// fails to parse (impossible).
179 #[allow(clippy::unused_async)]
180 pub async fn new(deployment: String, instance_id: String) -> Result<Self, AgentError> {
181 let data_dir = ZLayerDirs::system_default().data_dir().to_path_buf();
182 let default_cidr: IpNetwork = "10.200.0.0/16".parse().expect("compile-time constant CIDR");
183 Ok(Self {
184 deployment,
185 instance_id,
186 data_dir,
187 client: Mutex::new(None),
188 local_node_id: 0,
189 local_wg_pubkey: Mutex::new(None),
190 overlay_port: zlayer_core::DEFAULT_WG_PORT,
191 node_ip: None,
192 global_interface: None,
193 cluster_cidr: Some(default_cidr),
194 slice_cidr: None,
195 dns_server_addr: None,
196 dns_domain: None,
197 nat_config: None,
198 uapi_sock_dir: None,
199 #[cfg(target_os = "windows")]
200 hcn_cleanup: std::sync::Arc::new(tokio::sync::Mutex::new(
201 std::collections::HashMap::new(),
202 )),
203 })
204 }
205
206 /// Create an `OverlayManager` bound to a per-node slice.
207 ///
208 /// `slice_cidr` is the per-node slice owned by this node; `cluster_cidr` is
209 /// the full cluster CIDR. Both are forwarded to overlayd in
210 /// `SetupGlobalOverlay`.
211 #[must_use]
212 pub fn with_slice(
213 deployment: String,
214 cluster_cidr: IpNetwork,
215 slice_cidr: IpNetwork,
216 port: u16,
217 instance_id: String,
218 ) -> Self {
219 let data_dir = ZLayerDirs::system_default().data_dir().to_path_buf();
220 Self {
221 deployment,
222 instance_id,
223 data_dir,
224 client: Mutex::new(None),
225 local_node_id: 0,
226 local_wg_pubkey: Mutex::new(None),
227 overlay_port: port,
228 node_ip: None,
229 global_interface: None,
230 cluster_cidr: Some(cluster_cidr),
231 slice_cidr: Some(slice_cidr),
232 dns_server_addr: None,
233 dns_domain: None,
234 nat_config: None,
235 uapi_sock_dir: None,
236 #[cfg(target_os = "windows")]
237 hcn_cleanup: std::sync::Arc::new(tokio::sync::Mutex::new(
238 std::collections::HashMap::new(),
239 )),
240 }
241 }
242
243 /// Set the `WireGuard` listen port for the overlay network.
244 #[must_use]
245 pub fn with_overlay_port(mut self, port: u16) -> Self {
246 self.overlay_port = port;
247 self
248 }
249
250 /// Set the NAT traversal configuration. overlayd owns the live NAT
251 /// orchestrator; this records the toggle so `SetupGlobalOverlay` can carry
252 /// `nat_enabled` and the daemon can decide whether to drive `NatTick`.
253 #[must_use]
254 pub fn with_nat_config(mut self, nat: NatConfig) -> Self {
255 self.nat_config = Some(nat);
256 self
257 }
258
259 /// Override the `WireGuard` UAPI socket directory. Retained for API parity;
260 /// overlayd owns the real transport's socket directory.
261 #[must_use]
262 pub fn with_uapi_sock_dir(mut self, dir: impl Into<PathBuf>) -> Self {
263 self.uapi_sock_dir = Some(dir.into());
264 self
265 }
266
267 /// Override the data directory used to resolve the overlayd IPC socket.
268 #[must_use]
269 pub fn with_data_dir(mut self, dir: impl Into<PathBuf>) -> Self {
270 self.data_dir = dir.into();
271 self
272 }
273
274 /// Set the local raft node id (builder-style).
275 #[must_use]
276 pub fn with_local_node_id(mut self, node_id: u64) -> Self {
277 self.local_node_id = node_id;
278 self
279 }
280
281 /// Get or lazily establish the overlayd IPC connection.
282 async fn client(&self) -> Result<Arc<Mutex<OverlaydClient>>, AgentError> {
283 let mut guard = self.client.lock().await;
284 if let Some(c) = guard.as_ref() {
285 return Ok(Arc::clone(c));
286 }
287 let socket = ZLayerDirs::default_overlayd_socket_path_for(&self.data_dir);
288 // Bounded dial (~2.5s worst case): overlay operations are non-fatal, so a
289 // dead/unreachable overlayd must degrade fast rather than hold the daemon's
290 // startup hostage. The overlayd supervisor (ensure_overlayd_running) owns
291 // the generous "wait for a freshly-spawned overlayd to bind" budget; once
292 // it has confirmed overlayd up (or fast-failed when the binary is missing),
293 // this lazy connector only needs a short retry window.
294 let conn = OverlaydClient::connect_with_attempts(std::path::Path::new(&socket), 6)
295 .await
296 .map_err(|e| map_overlayd_err(&e))?;
297 let arc = Arc::new(Mutex::new(conn));
298 *guard = Some(Arc::clone(&arc));
299 Ok(arc)
300 }
301
302 /// Issue a single overlayd request, folding `Err` responses into errors.
303 async fn call(&self, req: OverlaydRequest) -> Result<OverlaydResponse, AgentError> {
304 let client = self.client().await?;
305 let mut conn = client.lock().await;
306 conn.call(req).await.map_err(|e| map_overlayd_err(&e))
307 }
308
309 /// Post-construction setter for the local raft node id. Forwards
310 /// `SetLocalNodeId` to overlayd best-effort.
311 pub fn set_local_node_id(&mut self, node_id: u64) {
312 self.local_node_id = node_id;
313 }
314
315 /// Record this node's cluster `WireGuard` public key (base64) and forward it
316 /// to overlayd so service subnets can be added to the cluster transport's
317 /// local `AllowedIPs`.
318 pub async fn set_local_wg_pubkey(&self, pubkey: String) {
319 *self.local_wg_pubkey.lock().await = Some(pubkey.clone());
320 if let Err(e) = self
321 .call(OverlaydRequest::SetLocalWgPubkey { pubkey })
322 .await
323 {
324 tracing::warn!(error = %e, "overlayd SetLocalWgPubkey failed");
325 }
326 }
327
328 /// Returns the number of services currently registered (cached `Status`).
329 pub async fn service_count(&self) -> usize {
330 match self.call(OverlaydRequest::Status).await {
331 Ok(OverlaydResponse::Status(snap)) => snap.service_count as usize,
332 _ => 0,
333 }
334 }
335
336 /// Returns whether NAT traversal is enabled for this manager.
337 #[must_use]
338 pub fn nat_enabled(&self) -> bool {
339 self.nat_config
340 .as_ref()
341 .map_or_else(|| NatConfig::default().enabled, |c| c.enabled)
342 }
343
344 /// Returns a clone of the configured [`NatConfig`], or `None`.
345 #[must_use]
346 pub fn nat_config(&self) -> Option<NatConfig> {
347 self.nat_config.clone()
348 }
349
350 /// Bootstrap NAT traversal. overlayd starts NAT lazily on its first
351 /// `NatTick`, so this is a thin shim that reports whether NAT is enabled.
352 ///
353 /// # Errors
354 /// Infallible today; preserved for ABI parity.
355 #[allow(clippy::unused_async)]
356 pub async fn start_nat_traversal(&self) -> Result<bool, AgentError> {
357 Ok(self.nat_enabled())
358 }
359
360 /// Run one NAT-traversal maintenance tick by forwarding `NatTick` to overlayd.
361 ///
362 /// # Errors
363 /// Returns an error when overlayd reports a NAT refresh failure.
364 pub async fn nat_maintenance_tick(&self) -> Result<(), AgentError> {
365 if !self.nat_enabled() {
366 return Ok(());
367 }
368 self.call(OverlaydRequest::NatTick).await?;
369 Ok(())
370 }
371
372 /// Snapshot the current NAT traversal state for API consumers.
373 ///
374 /// overlayd owns the live NAT orchestrator and does not surface per-peer
375 /// candidate detail over the IPC contract, so this returns an empty
376 /// snapshot. Kept for API parity.
377 #[allow(clippy::unused_async)]
378 pub async fn nat_status_snapshot(&self) -> NatStatusSnapshot {
379 let _peers: Vec<NatPeerSnapshot> = Vec::new();
380 NatStatusSnapshot::empty()
381 }
382
383 /// Record the overlay DNS server address and zone domain (cached locally;
384 /// forwarded to overlayd on each container attach).
385 pub fn set_dns_config(&mut self, addr: Option<SocketAddr>, domain: Option<String>) {
386 self.dns_server_addr = addr;
387 self.dns_domain = domain;
388 }
389
390 /// Builder-style variant of [`OverlayManager::set_dns_config`].
391 #[must_use]
392 pub fn with_dns_config(mut self, addr: Option<SocketAddr>, domain: Option<String>) -> Self {
393 self.dns_server_addr = addr;
394 self.dns_domain = domain;
395 self
396 }
397
398 /// Returns the overlay DNS server address if configured.
399 #[must_use]
400 pub fn dns_server_addr(&self) -> Option<SocketAddr> {
401 self.dns_server_addr
402 }
403
404 /// Returns the overlay DNS zone domain, if configured.
405 #[must_use]
406 pub fn dns_domain(&self) -> Option<&str> {
407 self.dns_domain.as_deref()
408 }
409
410 /// Setup the global overlay network by delegating to overlayd.
411 ///
412 /// Forwards the local node id and wg pubkey first (so overlayd has the
413 /// cluster-brain context), then issues `SetupGlobalOverlay` and caches the
414 /// returned interface name plus the node IP / CIDRs reported by `Status`.
415 ///
416 /// # Errors
417 /// Returns an error if overlayd fails to bring up the overlay.
418 pub async fn setup_global_overlay(&mut self) -> Result<(), AgentError> {
419 // Fast pre-flight: establish (and cache) the overlayd connection once with a
420 // bounded budget. If overlayd is unreachable this returns after a single
421 // ~2.5s dial instead of letting each of the calls below pay the full retry
422 // window (which previously stacked to ~35s of daemon-startup stall when the
423 // overlayd binary was missing). Overlay setup is non-fatal, so bailing here
424 // simply leaves cross-node networking degraded — handled by the caller.
425 self.client().await?;
426
427 // Push cluster-brain context first (best-effort).
428 let _ = self
429 .call(OverlaydRequest::SetLocalNodeId {
430 node_id: self.local_node_id,
431 })
432 .await;
433 if let Some(pubkey) = self.local_wg_pubkey.lock().await.clone() {
434 let _ = self
435 .call(OverlaydRequest::SetLocalWgPubkey { pubkey })
436 .await;
437 }
438
439 let cluster_cidr = self
440 .cluster_cidr
441 .map_or_else(|| "10.200.0.0/16".to_string(), |c| c.to_string());
442 let slice_cidr = self.slice_cidr.map(|c| c.to_string());
443
444 let resp = self
445 .call(OverlaydRequest::SetupGlobalOverlay {
446 deployment: self.deployment.clone(),
447 instance_id: self.instance_id.clone(),
448 cluster_cidr,
449 slice_cidr,
450 wg_port: self.overlay_port,
451 nat_enabled: self.nat_enabled(),
452 })
453 .await?;
454 if let OverlaydResponse::BridgeName { name } = resp {
455 self.global_interface = Some(name);
456 }
457
458 // Refresh cached status (node_ip, cidrs).
459 self.refresh_status().await;
460 Ok(())
461 }
462
463 /// Refresh cached status fields from overlayd (`node_ip`, interface, CIDRs).
464 async fn refresh_status(&mut self) {
465 if let Ok(OverlaydResponse::Status(snap)) = self.call(OverlaydRequest::Status).await {
466 let StatusSnapshot {
467 interface,
468 node_ip,
469 overlay_cidr,
470 slice_cidr,
471 ..
472 } = snap;
473 if let Some(iface) = interface {
474 self.global_interface = Some(iface);
475 }
476 if node_ip.is_some() {
477 self.node_ip = node_ip;
478 }
479 if let Some(c) = overlay_cidr.and_then(|s| s.parse().ok()) {
480 self.cluster_cidr = Some(c);
481 }
482 if let Some(s) = slice_cidr.and_then(|s| s.parse().ok()) {
483 self.slice_cidr = Some(s);
484 }
485 }
486 }
487
488 /// Set up the per-service overlay segment by delegating to overlayd.
489 ///
490 /// Returns a [`ServiceOverlayInfo`] describing the segment. The
491 /// container-attach handle (bridge name on Linux, interface elsewhere) is
492 /// `info.name`. In `Dedicated` mode the `wg_public_key`/`wg_port`/
493 /// `overlay_ip`/`subnet` fields carry the per-service `WireGuard`
494 /// transport's identity so the deploy path can publish it to Raft and mesh
495 /// with the other hosting nodes; in `Shared` mode those fields are `None`.
496 ///
497 /// `mode` is the service's resolved [`OverlayMode`], read from its spec at
498 /// the deploy call site. In `Shared` mode overlayd attaches the service to
499 /// the cluster transport via a per-node bridge; in `Dedicated` mode it
500 /// stands up a per-service `WireGuard` transport with its own crypto
501 /// context and reports its identity via
502 /// [`OverlaydResponse::ServiceOverlay`].
503 ///
504 /// # Errors
505 /// Returns an error if overlayd fails to create the segment.
506 pub async fn setup_service_overlay(
507 &self,
508 service_name: &str,
509 mode: zlayer_types::overlay::OverlayMode,
510 ) -> Result<zlayer_types::overlayd::ServiceOverlayInfo, AgentError> {
511 let resp = self
512 .call(OverlaydRequest::SetupServiceOverlay {
513 service: service_name.to_string(),
514 mode,
515 })
516 .await?;
517 match resp {
518 // Shared mode (and any server still on the legacy response shape)
519 // reports only the container-attach handle; synthesize a
520 // `ServiceOverlayInfo` whose Dedicated-only fields are `None`.
521 OverlaydResponse::BridgeName { name } => {
522 Ok(zlayer_types::overlayd::ServiceOverlayInfo {
523 name,
524 mode,
525 wg_public_key: None,
526 wg_port: None,
527 overlay_ip: None,
528 subnet: None,
529 })
530 }
531 // Dedicated mode reports the full device identity.
532 OverlaydResponse::ServiceOverlay(info) => Ok(info),
533 other => Err(AgentError::Network(format!(
534 "overlayd SetupServiceOverlay returned unexpected response: {other:?}"
535 ))),
536 }
537 }
538
539 /// Add a container to the appropriate overlay networks by delegating to
540 /// overlayd (`AttachContainer` with a `LinuxPid` handle).
541 ///
542 /// # Errors
543 /// Returns an error if overlayd cannot attach the container.
544 pub async fn attach_container(
545 &self,
546 container_pid: u32,
547 service_name: &str,
548 join_global: bool,
549 ) -> Result<IpAddr, AgentError> {
550 let resp = self
551 .call(OverlaydRequest::AttachContainer {
552 handle: AttachHandle::LinuxPid { pid: container_pid },
553 service: service_name.to_string(),
554 join_global,
555 dns_server: self.dns_server_addr.map(|sa| sa.ip()),
556 dns_domain: self.dns_domain.clone(),
557 })
558 .await?;
559 match resp {
560 OverlaydResponse::Attached(result) => Ok(result.ip),
561 other => Err(AgentError::Network(format!(
562 "overlayd AttachContainer returned unexpected response: {other:?}"
563 ))),
564 }
565 }
566
567 /// Attach a guest-managed container (a VM with no host netns/PID) to the
568 /// overlay by asking overlayd to allocate the overlay identity (keypair +
569 /// address + the current peer set) and register the generated public key in
570 /// the mesh. The caller ships the returned [`GuestOverlayConfig`] into the
571 /// guest (over vsock) where it brings up its own `WireGuard` device.
572 ///
573 /// `id` is the opaque container id used to scope the allocation so a later
574 /// [`detach_container_guest`](OverlayManager::detach_container_guest) can
575 /// release the address + remove the peer.
576 ///
577 /// # Errors
578 /// Returns an error if overlayd cannot allocate/register the guest.
579 pub async fn attach_container_guest(
580 &self,
581 id: &str,
582 service_name: &str,
583 join_global: bool,
584 ) -> Result<zlayer_types::overlayd::GuestOverlayConfig, AgentError> {
585 let resp = self
586 .call(OverlaydRequest::AttachContainer {
587 handle: AttachHandle::GuestManaged { id: id.to_string() },
588 service: service_name.to_string(),
589 join_global,
590 dns_server: self.dns_server_addr.map(|sa| sa.ip()),
591 dns_domain: self.dns_domain.clone(),
592 })
593 .await?;
594 match resp {
595 OverlaydResponse::GuestConfig(cfg) => Ok(cfg),
596 other => Err(AgentError::Network(format!(
597 "overlayd AttachContainer(GuestManaged) returned unexpected response: {other:?}"
598 ))),
599 }
600 }
601
602 /// Detach a guest-managed container: release its overlay IP and remove its
603 /// registered mesh peer.
604 ///
605 /// # Errors
606 /// Returns an error if overlayd cannot detach the container.
607 pub async fn detach_container_guest(&self, id: &str) -> Result<(), AgentError> {
608 let resp = self
609 .call(OverlaydRequest::DetachContainer {
610 handle: AttachHandle::GuestManaged { id: id.to_string() },
611 })
612 .await?;
613 match resp {
614 OverlaydResponse::Ok => Ok(()),
615 other => Err(AgentError::Network(format!(
616 "overlayd DetachContainer(GuestManaged) returned unexpected response: {other:?}"
617 ))),
618 }
619 }
620
621 /// Register a Windows HCN container with overlayd and return its overlay IP
622 /// plus the overlayd-created namespace GUID.
623 ///
624 /// The return type gained the namespace GUID (vs. the pre-migration
625 /// IP-only return) because the HCN network + endpoint + namespace are now
626 /// created inside overlayd, and `HcsRuntime` needs that GUID to embed in the
627 /// compute-system document.
628 ///
629 /// When `autoclean` is true and overlayd reports back a namespace GUID, an
630 /// entry is recorded in [`OverlayManager::hcn_cleanup`] so a later
631 /// [`OverlayManager::detach_container_hcn`] (or process teardown) can drain
632 /// it. The cleanup map is purely agent-side bookkeeping; overlayd remains
633 /// the authoritative owner of the HCN namespace/endpoint state.
634 ///
635 /// # Errors
636 /// Returns an error if overlayd cannot attach the container.
637 #[cfg(target_os = "windows")]
638 #[allow(clippy::too_many_arguments)]
639 pub async fn attach_container_hcn(
640 &self,
641 container_id: &str,
642 service_name: &str,
643 ip_override: Option<std::net::IpAddr>,
644 autoclean: bool,
645 dns_server: Option<std::net::IpAddr>,
646 dns_domain: Option<String>,
647 ) -> Result<(std::net::IpAddr, Option<String>), AgentError> {
648 let resp = self
649 .call(OverlaydRequest::AttachContainer {
650 handle: AttachHandle::WindowsContainer {
651 container_id: container_id.to_string(),
652 ip: ip_override,
653 },
654 service: service_name.to_string(),
655 join_global: false,
656 dns_server: dns_server.or_else(|| self.dns_server_addr.map(|sa| sa.ip())),
657 dns_domain: dns_domain.or_else(|| self.dns_domain.clone()),
658 })
659 .await?;
660 match resp {
661 OverlaydResponse::Attached(result) => {
662 // Record agent-side autoclean bookkeeping. We key by the
663 // overlayd-issued namespace GUID; if overlayd did not return
664 // one (e.g. host-network attach), there is nothing to track.
665 if autoclean {
666 if let Some(ns_str) = result.namespace_guid.as_deref() {
667 match windows::core::GUID::try_from(ns_str) {
668 Ok(ns_guid) => {
669 let mut cleanup = self.hcn_cleanup.lock().await;
670 cleanup.insert(ns_guid, (service_name.to_string(), result.ip));
671 }
672 Err(e) => {
673 tracing::warn!(
674 ns = %ns_str,
675 error = %e,
676 "overlayd returned a non-GUID namespace handle; skipping hcn_cleanup insert"
677 );
678 }
679 }
680 }
681 }
682 Ok((result.ip, result.namespace_guid))
683 }
684 other => Err(AgentError::Network(format!(
685 "overlayd AttachContainer(WindowsContainer) returned unexpected response: {other:?}"
686 ))),
687 }
688 }
689
690 /// Detach and release a Windows HCN container by its bare namespace GUID.
691 ///
692 /// Drains the agent-side [`OverlayManager::hcn_cleanup`] entry (if any)
693 /// before forwarding `DetachContainer` to overlayd. Safe to call with an
694 /// unknown GUID — the map drain is a no-op in that case.
695 ///
696 /// # Errors
697 /// Returns an error if overlayd reports a detach failure.
698 #[cfg(target_os = "windows")]
699 pub async fn detach_container_hcn(&self, namespace_guid: &str) -> Result<(), AgentError> {
700 // Drain the agent-side cleanup map first so a later overlayd error does
701 // not leave a stale entry behind.
702 match windows::core::GUID::try_from(namespace_guid) {
703 Ok(ns_guid) => {
704 let mut cleanup = self.hcn_cleanup.lock().await;
705 if let Some((service_name, ip)) = cleanup.remove(&ns_guid) {
706 tracing::info!(
707 ns = %namespace_guid,
708 service = %service_name,
709 ip = %ip,
710 "Released HCN overlay attachment (agent-side cleanup)"
711 );
712 }
713 }
714 Err(e) => {
715 tracing::warn!(
716 ns = %namespace_guid,
717 error = %e,
718 "detach_container_hcn called with non-GUID handle; skipping hcn_cleanup drain"
719 );
720 }
721 }
722
723 self.call(OverlaydRequest::DetachContainer {
724 handle: AttachHandle::WindowsContainer {
725 container_id: namespace_guid.to_string(),
726 ip: None,
727 },
728 })
729 .await?;
730 Ok(())
731 }
732
733 /// Release the overlay resources held by a Linux container by delegating to
734 /// overlayd (`DetachContainer` with a `LinuxPid` handle).
735 ///
736 /// # Errors
737 /// Returns an error if overlayd reports a detach failure.
738 pub async fn detach_container(&self, pid: u32) -> Result<(), AgentError> {
739 self.call(OverlaydRequest::DetachContainer {
740 handle: AttachHandle::LinuxPid { pid },
741 })
742 .await?;
743 Ok(())
744 }
745
746 /// Tear down the per-service overlay segment for `service_name`.
747 pub async fn teardown_service_overlay(&self, service_name: &str) {
748 if let Err(e) = self
749 .call(OverlaydRequest::TeardownServiceOverlay {
750 service: service_name.to_string(),
751 })
752 .await
753 {
754 tracing::warn!(service = %service_name, error = %e, "overlayd TeardownServiceOverlay failed");
755 }
756 }
757
758 /// Cleanup all overlay networks (tears down the global overlay in overlayd).
759 ///
760 /// # Errors
761 /// Returns an error if overlayd reports a teardown failure.
762 pub async fn cleanup(&mut self) -> Result<(), AgentError> {
763 self.call(OverlaydRequest::TeardownGlobalOverlay).await?;
764 self.global_interface = None;
765 // Best-effort drain of any agent-side autoclean bookkeeping we still
766 // hold on Windows. overlayd already tore down the HCN namespaces in
767 // response to `TeardownGlobalOverlay`; this just empties the side-map
768 // so a subsequent reuse of this manager starts clean.
769 #[cfg(target_os = "windows")]
770 {
771 let mut cleanup = self.hcn_cleanup.lock().await;
772 cleanup.clear();
773 }
774 Ok(())
775 }
776
777 /// Returns this node's IP on the global overlay network (cached).
778 pub fn node_ip(&self) -> Option<IpAddr> {
779 self.node_ip
780 }
781
782 /// Returns the deployment name this overlay manager was created for.
783 pub fn deployment(&self) -> &str {
784 &self.deployment
785 }
786
787 /// Returns the global overlay interface name (cached).
788 pub fn global_interface(&self) -> Option<&str> {
789 self.global_interface.as_deref()
790 }
791
792 /// Returns the `WireGuard` listen port for the overlay network.
793 pub fn overlay_port(&self) -> u16 {
794 self.overlay_port
795 }
796
797 /// Returns `true` if the global overlay transport is active (cached: an
798 /// interface name has been recorded).
799 pub fn has_global_transport(&self) -> bool {
800 self.global_interface.is_some()
801 }
802
803 /// Returns the number of per-service overlay bridges currently active.
804 pub async fn service_bridge_count(&self) -> usize {
805 match self.call(OverlaydRequest::Status).await {
806 Ok(OverlaydResponse::Status(snap)) => snap.service_count as usize,
807 _ => 0,
808 }
809 }
810
811 /// Add a peer to the live global overlay transport by delegating to overlayd.
812 ///
813 /// The parameter type is preserved (`&zlayer_overlay::PeerInfo`) so the one
814 /// caller (`zlayer-api`'s internal add-peer handler) compiles unchanged; the
815 /// shim converts it to a wire-safe [`PeerSpec`].
816 ///
817 /// # Errors
818 /// Returns an error if overlayd rejects the peer (e.g. overlay not yet up).
819 pub async fn add_global_peer(&self, peer: &zlayer_overlay::PeerInfo) -> Result<(), AgentError> {
820 self.call(OverlaydRequest::AddPeer {
821 peer: peer_spec_from(peer),
822 scope: zlayer_types::overlayd::PeerScope::Global,
823 })
824 .await?;
825 Ok(())
826 }
827
828 /// Add a peer to a service's dedicated per-service overlay transport.
829 ///
830 /// Analogous to [`OverlayManager::add_global_peer`] but scoped to
831 /// `service`'s [`OverlayMode::Dedicated`] device: first the peer itself
832 /// (`AddPeer` with `scope: Service`), then the service `subnet` plumbed
833 /// into that peer's `AllowedIPs` (`AddAllowedIp` with the same scope).
834 ///
835 /// # Errors
836 /// Returns an error if overlayd rejects the peer or the allowed-IP add
837 /// (e.g. the service's dedicated transport is not yet up).
838 pub async fn add_service_peer(
839 &self,
840 service: &str,
841 peer: &zlayer_overlay::PeerInfo,
842 subnet: &str,
843 ) -> Result<(), AgentError> {
844 self.call(OverlaydRequest::AddPeer {
845 peer: peer_spec_from(peer),
846 scope: zlayer_types::overlayd::PeerScope::Service {
847 service: service.to_string(),
848 },
849 })
850 .await?;
851 self.call(OverlaydRequest::AddAllowedIp {
852 pubkey: peer.public_key.clone(),
853 cidr: subnet.to_string(),
854 scope: zlayer_types::overlayd::PeerScope::Service {
855 service: service.to_string(),
856 },
857 })
858 .await?;
859 Ok(())
860 }
861
862 /// Remove a peer (by base64 public key) from a service's dedicated
863 /// per-service overlay transport.
864 ///
865 /// # Errors
866 /// Returns an error if overlayd reports the removal failed.
867 pub async fn remove_service_peer(&self, service: &str, pubkey: &str) -> Result<(), AgentError> {
868 self.call(OverlaydRequest::RemovePeer {
869 pubkey: pubkey.to_string(),
870 scope: zlayer_types::overlayd::PeerScope::Service {
871 service: service.to_string(),
872 },
873 })
874 .await?;
875 Ok(())
876 }
877
878 /// Returns the CIDR string for the overlay IP allocator (cached cluster CIDR).
879 pub fn overlay_cidr(&self) -> String {
880 self.cluster_cidr
881 .map_or_else(|| "10.200.0.0/16".to_string(), |c| c.to_string())
882 }
883
884 /// Returns the per-node slice CIDR this manager was built with, or `None`.
885 pub fn slice_cidr(&self) -> Option<IpNetwork> {
886 self.slice_cidr
887 }
888
889 /// Returns the full cluster CIDR, if known.
890 pub fn cluster_cidr(&self) -> Option<IpNetwork> {
891 self.cluster_cidr
892 }
893
894 /// Persist the IPAM allocator state. overlayd owns IPAM; this is a no-op
895 /// retained for ABI parity with callers.
896 ///
897 /// # Errors
898 /// Infallible today.
899 #[allow(clippy::unused_async)]
900 pub async fn persist_ipam_state(&self, _path: &std::path::Path) -> Result<(), AgentError> {
901 Ok(())
902 }
903
904 /// Restore IPAM allocator state. overlayd owns IPAM; this is a no-op
905 /// retained for ABI parity with callers.
906 ///
907 /// # Errors
908 /// Infallible today.
909 #[allow(clippy::unused_async)]
910 pub async fn restore_ipam_state(&mut self, _path: &std::path::Path) -> Result<(), AgentError> {
911 Ok(())
912 }
913
914 /// Returns IP allocation statistics: (`allocated_count`, `base_addr`).
915 ///
916 /// overlayd owns IPAM and does not surface allocation counters over IPC, so
917 /// this reports `(0, base)` derived from the cached cluster CIDR.
918 pub fn ip_alloc_stats(&self) -> (u64, IpAddr) {
919 let base = self
920 .cluster_cidr
921 .map_or(IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED), |c| c.network());
922 (0, base)
923 }
924}
925
926#[cfg(test)]
927mod tests {
928 use super::*;
929
930 /// No generated name may ever exceed 15 characters.
931 #[test]
932 fn interface_name_never_exceeds_limit() {
933 let cases: Vec<(&[&str], &str)> = vec![
934 (&["a"], "g"),
935 (&["zlayer-manager"], "g"),
936 (&["my-very-long-deployment-name-that-goes-on-and-on"], "g"),
937 (&["zlayer", "manager"], "s"),
938 (&["zlayer-manager", "frontend-service"], "s"),
939 (&["a", "b"], "s"),
940 (
941 &["abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz"],
942 "s",
943 ),
944 (&["x"], ""),
945 (&["deployment"], ""),
946 (&["a-really-long-name-exceeding-everything"], "suffix"),
947 ];
948
949 for (parts, suffix) in &cases {
950 let name = make_interface_name(parts, suffix);
951 assert!(
952 name.len() <= MAX_IFNAME_LEN,
953 "Name '{}' is {} chars (parts={:?}, suffix='{}')",
954 name,
955 name.len(),
956 parts,
957 suffix,
958 );
959 }
960 }
961
962 /// Very long and varied inputs must still respect the limit.
963 #[test]
964 fn interface_name_with_extreme_lengths() {
965 let long = "a".repeat(200);
966 let long_ref = long.as_str();
967
968 let name = make_interface_name(&[long_ref], "g");
969 assert!(name.len() <= MAX_IFNAME_LEN, "Name '{name}' too long");
970
971 let name = make_interface_name(&[long_ref, long_ref, long_ref], "s");
972 assert!(name.len() <= MAX_IFNAME_LEN, "Name '{name}' too long");
973
974 let name = make_interface_name(&[long_ref], "");
975 assert!(name.len() <= MAX_IFNAME_LEN, "Name '{name}' too long");
976 }
977
978 /// Same inputs must always produce the same output.
979 #[test]
980 fn interface_name_is_deterministic() {
981 let a = make_interface_name(&["zlayer-manager"], "g");
982 let b = make_interface_name(&["zlayer-manager"], "g");
983 assert_eq!(a, b);
984 }
985
986 /// Different inputs must produce different outputs.
987 #[test]
988 fn interface_name_uniqueness() {
989 let a = make_interface_name(&["deploy-a"], "g");
990 let b = make_interface_name(&["deploy-b"], "g");
991 assert_ne!(a, b);
992
993 let a = make_interface_name(&["deploy"], "g");
994 let b = make_interface_name(&["deploy"], "s");
995 assert_ne!(a, b);
996 }
997
998 /// Short names that fit should be returned as-is (human readable).
999 #[test]
1000 fn interface_name_short_inputs_are_readable() {
1001 let name = make_interface_name(&["app"], "g");
1002 assert_eq!(name, "zl-app-g");
1003 let name = make_interface_name(&["my", "web"], "s");
1004 assert_eq!(name, "zl-my-web-s");
1005 }
1006
1007 /// `with_slice` must remember the slice it was built with.
1008 #[test]
1009 fn with_slice_stores_slice_cidr() {
1010 let cluster: IpNetwork = "10.200.0.0/16".parse().unwrap();
1011 let slice: IpNetwork = "10.200.42.0/28".parse().unwrap();
1012 let om = OverlayManager::with_slice(
1013 "test-deploy".to_string(),
1014 cluster,
1015 slice,
1016 51820,
1017 "test".to_string(),
1018 );
1019 assert_eq!(om.slice_cidr(), Some(slice));
1020 assert_eq!(om.cluster_cidr(), Some(cluster));
1021 assert_eq!(om.overlay_port(), 51820);
1022 assert_eq!(om.deployment(), "test-deploy");
1023 }
1024
1025 /// `node_ip()` is None before any setup.
1026 #[tokio::test]
1027 async fn node_ip_none_before_setup() {
1028 let om = OverlayManager::new("test-deploy".to_string(), "test".to_string())
1029 .await
1030 .unwrap();
1031 assert!(om.node_ip().is_none());
1032 }
1033
1034 /// DNS config round-trips through the cache.
1035 #[tokio::test]
1036 async fn dns_config_set_and_round_trip() {
1037 let mut om = OverlayManager::new("dns-roundtrip".to_string(), "test".to_string())
1038 .await
1039 .unwrap();
1040 let addr: SocketAddr = "10.200.42.1:15353".parse().unwrap();
1041 om.set_dns_config(Some(addr), Some("overlay.local".to_string()));
1042 assert_eq!(om.dns_server_addr(), Some(addr));
1043 assert_eq!(om.dns_domain(), Some("overlay.local"));
1044
1045 om.set_dns_config(None, None);
1046 assert!(om.dns_server_addr().is_none());
1047 assert!(om.dns_domain().is_none());
1048 }
1049
1050 /// `peer_spec_from` must copy every `PeerInfo` field into the wire-safe
1051 /// `PeerSpec` exactly as the live overlayd transport expects (endpoint
1052 /// stringified, keepalive in whole seconds).
1053 #[test]
1054 fn peer_spec_from_copies_all_fields() {
1055 let peer = zlayer_overlay::PeerInfo {
1056 public_key: "base64key".to_string(),
1057 endpoint: "1.2.3.4:51820".parse().unwrap(),
1058 allowed_ips: "10.200.0.2/32".to_string(),
1059 persistent_keepalive_interval: std::time::Duration::from_secs(25),
1060 };
1061 let spec = peer_spec_from(&peer);
1062 assert_eq!(spec.public_key, "base64key");
1063 assert_eq!(spec.endpoint, "1.2.3.4:51820");
1064 assert_eq!(spec.allowed_ips, "10.200.0.2/32");
1065 assert_eq!(spec.persistent_keepalive_secs, 25);
1066 }
1067
1068 /// `setup_service_overlay` must forward the caller-supplied mode verbatim
1069 /// (no more hardcoded `OverlayMode::default()`). Asserts the request the
1070 /// shim builds carries `Dedicated` when asked for `Dedicated`.
1071 #[test]
1072 fn setup_service_overlay_request_carries_dedicated_mode() {
1073 let req = OverlaydRequest::SetupServiceOverlay {
1074 service: "web".to_string(),
1075 mode: zlayer_types::overlay::OverlayMode::Dedicated,
1076 };
1077 match req {
1078 OverlaydRequest::SetupServiceOverlay { service, mode } => {
1079 assert_eq!(service, "web");
1080 assert_eq!(mode, zlayer_types::overlay::OverlayMode::Dedicated);
1081 assert_ne!(mode, zlayer_types::overlay::OverlayMode::default());
1082 }
1083 other => panic!("expected SetupServiceOverlay, got {other:?}"),
1084 }
1085 }
1086
1087 /// The service-scoped peer ops must target `PeerScope::Service { service }`,
1088 /// not `Global`, so dedicated transports stay isolated from the cluster
1089 /// transport.
1090 #[test]
1091 fn service_peer_ops_use_service_scope() {
1092 let peer = zlayer_overlay::PeerInfo {
1093 public_key: "k".to_string(),
1094 endpoint: "1.2.3.4:51820".parse().unwrap(),
1095 allowed_ips: "10.201.0.2/32".to_string(),
1096 persistent_keepalive_interval: std::time::Duration::from_secs(0),
1097 };
1098 let svc_scope = zlayer_types::overlayd::PeerScope::Service {
1099 service: "web".to_string(),
1100 };
1101
1102 let add = OverlaydRequest::AddPeer {
1103 peer: peer_spec_from(&peer),
1104 scope: svc_scope.clone(),
1105 };
1106 let allow = OverlaydRequest::AddAllowedIp {
1107 pubkey: peer.public_key.clone(),
1108 cidr: "10.201.0.0/24".to_string(),
1109 scope: svc_scope.clone(),
1110 };
1111 let remove = OverlaydRequest::RemovePeer {
1112 pubkey: peer.public_key.clone(),
1113 scope: svc_scope,
1114 };
1115
1116 match add {
1117 OverlaydRequest::AddPeer { scope, peer } => {
1118 assert_eq!(
1119 scope,
1120 zlayer_types::overlayd::PeerScope::Service {
1121 service: "web".to_string()
1122 }
1123 );
1124 assert_eq!(peer.public_key, "k");
1125 }
1126 other => panic!("expected AddPeer, got {other:?}"),
1127 }
1128 match allow {
1129 OverlaydRequest::AddAllowedIp { scope, cidr, .. } => {
1130 assert_eq!(cidr, "10.201.0.0/24");
1131 assert_eq!(
1132 scope,
1133 zlayer_types::overlayd::PeerScope::Service {
1134 service: "web".to_string()
1135 }
1136 );
1137 }
1138 other => panic!("expected AddAllowedIp, got {other:?}"),
1139 }
1140 match remove {
1141 OverlaydRequest::RemovePeer { scope, pubkey } => {
1142 assert_eq!(pubkey, "k");
1143 assert_eq!(
1144 scope,
1145 zlayer_types::overlayd::PeerScope::Service {
1146 service: "web".to_string()
1147 }
1148 );
1149 }
1150 other => panic!("expected RemovePeer, got {other:?}"),
1151 }
1152 }
1153
1154 /// Windows-only: verify the `hcn_cleanup` side-map starts empty on both
1155 /// constructor paths. Live insert/drain coverage lives behind the overlayd
1156 /// IPC layer (which is exercised by the windows e2e tests), but this
1157 /// sanity-checks that the field is wired correctly through `new()` and
1158 /// `with_slice()`.
1159 #[cfg(target_os = "windows")]
1160 #[tokio::test]
1161 async fn hcn_cleanup_map_starts_empty() {
1162 let om = OverlayManager::new("test-deploy".to_string(), "test".to_string())
1163 .await
1164 .unwrap();
1165 {
1166 let map = om.hcn_cleanup.lock().await;
1167 assert!(
1168 map.is_empty(),
1169 "hcn_cleanup map must start empty from new()"
1170 );
1171 }
1172
1173 let cluster: IpNetwork = "10.200.0.0/16".parse().unwrap();
1174 let slice: IpNetwork = "10.200.42.0/28".parse().unwrap();
1175 let om = OverlayManager::with_slice(
1176 "test-deploy".to_string(),
1177 cluster,
1178 slice,
1179 51820,
1180 "test".to_string(),
1181 );
1182 {
1183 let map = om.hcn_cleanup.lock().await;
1184 assert!(
1185 map.is_empty(),
1186 "hcn_cleanup map must start empty from with_slice()"
1187 );
1188 }
1189 }
1190}