zlayer_agent/overlay_manager.rs
1//! Thin overlayd client shim.
2//!
3//! Historically `OverlayManager` owned every mechanism touching the
4//! overlay/network plane (the cluster `WireGuard` transport, per-service Linux
5//! bridges, veth/netns attach, the Windows HCN Internal network + endpoints,
6//! IPAM, DNS, NAT). All of that machinery was migrated wholesale into the
7//! standalone `zlayer-overlayd` daemon (`crates/zlayer-overlayd/src/server.rs`).
8//!
9//! What remains here is a **client shim**: it keeps only cluster-brain / cached
10//! state (deployment name, instance id, local node id, local wg pubkey, and
11//! cached status values such as `node_ip`/`dns`/`cidr`) and forwards every
12//! mechanical operation to overlayd over the IPC client
13//! [`zlayer_overlayd::OverlaydClient`]. Every public method keeps the exact
14//! signature it had before the migration so existing callers compile unchanged;
15//! the body simply builds the matching [`OverlaydRequest`], issues
16//! `client.call(req)`, and maps the response.
17//!
18//! On Windows, the manager additionally maintains a small `hcn_cleanup` map
19//! (HCN namespace GUID -> (`service_name`, `allocated_ip`)) so that
20//! agent-side bookkeeping for autoclean attaches survives even though the
21//! authoritative HCN state lives in overlayd. The map is populated on
22//! `attach_container_hcn(autoclean = true)` and drained on
23//! `detach_container_hcn`.
24
25use crate::error::AgentError;
26use ipnetwork::IpNetwork;
27use std::collections::hash_map::DefaultHasher;
28use std::hash::{Hash, Hasher};
29use std::net::{IpAddr, SocketAddr};
30use std::path::PathBuf;
31use std::sync::Arc;
32use tokio::sync::Mutex;
33use zlayer_overlay::{NatConfig, NatPeerSnapshot, NatStatusSnapshot};
34use zlayer_overlayd::OverlaydClient;
35use zlayer_paths::ZLayerDirs;
36use zlayer_types::overlayd::{
37 AttachHandle, OverlaydRequest, OverlaydResponse, PeerSpec, StatusSnapshot,
38};
39
40/// Maximum length for Linux network interface names (IFNAMSIZ - 1 for null terminator).
41const MAX_IFNAME_LEN: usize = 15;
42
43/// Generate a Linux-safe interface name guaranteed to be <= 15 chars.
44///
45/// Joins the `parts` with `-` after a `"zl-"` prefix and appends `-{suffix}` if non-empty.
46/// When the result exceeds 15 characters, a deterministic hash of all parts is used instead
47/// to keep the name unique and within the kernel limit.
48///
49/// Kept in the agent (and re-exported from the crate root) because callers
50/// outside the overlay machinery — notably `runtimes/wsl2_delegate.rs` — still
51/// use it for deterministic naming. overlayd has its own private copy for the
52/// names it generates server-side; the two are identical by construction.
53#[must_use]
54pub fn make_interface_name(parts: &[&str], suffix: &str) -> String {
55 let base = format!("zl-{}", parts.join("-"));
56 let candidate = if suffix.is_empty() {
57 base
58 } else {
59 format!("{base}-{suffix}")
60 };
61
62 if candidate.len() <= MAX_IFNAME_LEN {
63 return candidate;
64 }
65
66 // Name is too long -- produce a deterministic hash-based name.
67 let mut hasher = DefaultHasher::new();
68 for part in parts {
69 part.hash(&mut hasher);
70 }
71 suffix.hash(&mut hasher);
72 let hash = format!("{:x}", hasher.finish());
73
74 if suffix.is_empty() {
75 // "zl-" (3) + up to 12 hex chars = 15
76 let budget = MAX_IFNAME_LEN - 3;
77 format!("zl-{}", &hash[..budget.min(hash.len())])
78 } else {
79 // "zl-" (3) + hash + "-" (1) + suffix
80 let suffix_cost = 1 + suffix.len(); // "-" + suffix
81 let hash_budget = MAX_IFNAME_LEN.saturating_sub(3 + suffix_cost);
82 if hash_budget == 0 {
83 // Suffix itself is extremely long -- just hash everything
84 let budget = MAX_IFNAME_LEN - 3;
85 format!("zl-{}", &hash[..budget.min(hash.len())])
86 } else {
87 format!("zl-{}-{}", &hash[..hash_budget.min(hash.len())], suffix)
88 }
89 }
90}
91
92/// Map a `zlayer_overlayd` client error into the agent's error type.
93fn map_overlayd_err(e: &zlayer_overlayd::OverlaydError) -> AgentError {
94 AgentError::Network(format!("overlayd: {e}"))
95}
96
97/// Convert a live [`zlayer_overlay::PeerInfo`] into the wire-safe [`PeerSpec`]
98/// the overlayd IPC contract expects. Shared by every `add_*_peer` shim so the
99/// global and per-service paths build identical specs.
100fn peer_spec_from(peer: &zlayer_overlay::PeerInfo) -> PeerSpec {
101 PeerSpec {
102 public_key: peer.public_key.clone(),
103 endpoint: peer.endpoint.to_string(),
104 allowed_ips: peer.allowed_ips.clone(),
105 persistent_keepalive_secs: peer.persistent_keepalive_interval.as_secs(),
106 }
107}
108
109/// Manages overlay networks for a deployment by delegating all mechanics to the
110/// `zlayer-overlayd` daemon.
111///
112/// This struct holds only cluster-brain / cached state; the actual overlay
113/// machinery lives in overlayd and is reached through [`OverlayManager::client`].
114pub struct OverlayManager {
115 /// Deployment name (used for network naming).
116 deployment: String,
117 /// Per-daemon-process disambiguator included in overlay link names. Stable
118 /// for the daemon's lifetime; forwarded to overlayd in `SetupGlobalOverlay`.
119 instance_id: String,
120 /// Root data directory; used to resolve the overlayd IPC socket path.
121 data_dir: PathBuf,
122 /// Lazily-connected overlayd IPC client. Wrapped in an `Arc<Mutex<_>>` so
123 /// the manager can be shared behind an `Arc<RwLock<_>>` and still serialize
124 /// request/response round-trips on the single framed connection.
125 client: Mutex<Option<Arc<Mutex<OverlaydClient>>>>,
126 /// Local raft node id, forwarded to overlayd via `SetLocalNodeId`.
127 local_node_id: u64,
128 /// This node's cluster `WireGuard` public key (base64), forwarded to
129 /// overlayd via `SetLocalWgPubkey`. Behind a `Mutex` because the setter
130 /// takes `&self` (callers hold only a read guard at that point).
131 local_wg_pubkey: Mutex<Option<String>>,
132 /// `WireGuard` listen port for the overlay network.
133 overlay_port: u16,
134 /// Cached node overlay IP, populated from `SetupGlobalOverlay`/`Status`.
135 node_ip: Option<IpAddr>,
136 /// Cached global overlay interface name.
137 global_interface: Option<String>,
138 /// Cached full cluster CIDR.
139 cluster_cidr: Option<IpNetwork>,
140 /// Cached per-node slice CIDR.
141 slice_cidr: Option<IpNetwork>,
142 /// Cached overlay DNS server address.
143 dns_server_addr: Option<SocketAddr>,
144 /// Cached overlay DNS zone domain.
145 dns_domain: Option<String>,
146 /// NAT traversal configuration. overlayd owns the live NAT orchestrator;
147 /// this is cached so the daemon can decide whether to drive `NatTick`.
148 nat_config: Option<NatConfig>,
149 /// Override for the `WireGuard` UAPI socket directory. overlayd owns the
150 /// real transport, so this is retained only for API/diagnostic parity.
151 uapi_sock_dir: Option<PathBuf>,
152 /// Map of HCN namespace GUID -> (`service_name`, `allocated_ip`) for autoclean.
153 /// When a Windows container is attached with `autoclean = true`, its entry
154 /// is inserted here; `detach_container_hcn` removes it. overlayd is the
155 /// authoritative owner of the HCN namespace/endpoint state, but the agent
156 /// keeps this side-map so it can answer "what attachments do I still need
157 /// to release on shutdown?" without an IPC round-trip per query.
158 #[cfg(target_os = "windows")]
159 hcn_cleanup: std::sync::Arc<
160 tokio::sync::Mutex<
161 std::collections::HashMap<windows::core::GUID, (String, std::net::IpAddr)>,
162 >,
163 >,
164}
165
166impl OverlayManager {
167 /// Create a new overlay manager for a deployment (legacy single-node path).
168 ///
169 /// Uses the default cluster `/16`. Prefer [`OverlayManager::with_slice`] for
170 /// cluster deployments. The overlayd IPC client is connected lazily on first
171 /// use (via the socket under the system-default data dir).
172 ///
173 /// # Errors
174 /// Infallible today; the `Result` is preserved for ABI parity with callers.
175 ///
176 /// # Panics
177 /// Panics only if the compile-time-constant default CIDR `10.200.0.0/16`
178 /// fails to parse (impossible).
179 #[allow(clippy::unused_async)]
180 pub async fn new(deployment: String, instance_id: String) -> Result<Self, AgentError> {
181 let data_dir = ZLayerDirs::system_default().data_dir().to_path_buf();
182 let default_cidr: IpNetwork = "10.200.0.0/16".parse().expect("compile-time constant CIDR");
183 Ok(Self {
184 deployment,
185 instance_id,
186 data_dir,
187 client: Mutex::new(None),
188 local_node_id: 0,
189 local_wg_pubkey: Mutex::new(None),
190 overlay_port: zlayer_core::DEFAULT_WG_PORT,
191 node_ip: None,
192 global_interface: None,
193 cluster_cidr: Some(default_cidr),
194 slice_cidr: None,
195 dns_server_addr: None,
196 dns_domain: None,
197 nat_config: None,
198 uapi_sock_dir: None,
199 #[cfg(target_os = "windows")]
200 hcn_cleanup: std::sync::Arc::new(tokio::sync::Mutex::new(
201 std::collections::HashMap::new(),
202 )),
203 })
204 }
205
206 /// Create an `OverlayManager` bound to a per-node slice.
207 ///
208 /// `slice_cidr` is the per-node slice owned by this node; `cluster_cidr` is
209 /// the full cluster CIDR. Both are forwarded to overlayd in
210 /// `SetupGlobalOverlay`.
211 #[must_use]
212 pub fn with_slice(
213 deployment: String,
214 cluster_cidr: IpNetwork,
215 slice_cidr: IpNetwork,
216 port: u16,
217 instance_id: String,
218 ) -> Self {
219 let data_dir = ZLayerDirs::system_default().data_dir().to_path_buf();
220 Self {
221 deployment,
222 instance_id,
223 data_dir,
224 client: Mutex::new(None),
225 local_node_id: 0,
226 local_wg_pubkey: Mutex::new(None),
227 overlay_port: port,
228 node_ip: None,
229 global_interface: None,
230 cluster_cidr: Some(cluster_cidr),
231 slice_cidr: Some(slice_cidr),
232 dns_server_addr: None,
233 dns_domain: None,
234 nat_config: None,
235 uapi_sock_dir: None,
236 #[cfg(target_os = "windows")]
237 hcn_cleanup: std::sync::Arc::new(tokio::sync::Mutex::new(
238 std::collections::HashMap::new(),
239 )),
240 }
241 }
242
243 /// Set the `WireGuard` listen port for the overlay network.
244 #[must_use]
245 pub fn with_overlay_port(mut self, port: u16) -> Self {
246 self.overlay_port = port;
247 self
248 }
249
250 /// Set the NAT traversal configuration. overlayd owns the live NAT
251 /// orchestrator; this records the toggle so `SetupGlobalOverlay` can carry
252 /// `nat_enabled` and the daemon can decide whether to drive `NatTick`.
253 #[must_use]
254 pub fn with_nat_config(mut self, nat: NatConfig) -> Self {
255 self.nat_config = Some(nat);
256 self
257 }
258
259 /// Override the `WireGuard` UAPI socket directory. Retained for API parity;
260 /// overlayd owns the real transport's socket directory.
261 #[must_use]
262 pub fn with_uapi_sock_dir(mut self, dir: impl Into<PathBuf>) -> Self {
263 self.uapi_sock_dir = Some(dir.into());
264 self
265 }
266
267 /// Override the data directory used to resolve the overlayd IPC socket.
268 #[must_use]
269 pub fn with_data_dir(mut self, dir: impl Into<PathBuf>) -> Self {
270 self.data_dir = dir.into();
271 self
272 }
273
274 /// Set the local raft node id (builder-style).
275 #[must_use]
276 pub fn with_local_node_id(mut self, node_id: u64) -> Self {
277 self.local_node_id = node_id;
278 self
279 }
280
281 /// Get or lazily establish the overlayd IPC connection.
282 async fn client(&self) -> Result<Arc<Mutex<OverlaydClient>>, AgentError> {
283 let mut guard = self.client.lock().await;
284 if let Some(c) = guard.as_ref() {
285 return Ok(Arc::clone(c));
286 }
287 let socket = ZLayerDirs::default_overlayd_socket_path_for(&self.data_dir);
288 // Bounded dial (~2.5s worst case): overlay operations are non-fatal, so a
289 // dead/unreachable overlayd must degrade fast rather than hold the daemon's
290 // startup hostage. The overlayd supervisor (ensure_overlayd_running) owns
291 // the generous "wait for a freshly-spawned overlayd to bind" budget; once
292 // it has confirmed overlayd up (or fast-failed when the binary is missing),
293 // this lazy connector only needs a short retry window.
294 let conn = OverlaydClient::connect_with_attempts(std::path::Path::new(&socket), 6)
295 .await
296 .map_err(|e| map_overlayd_err(&e))?;
297 let arc = Arc::new(Mutex::new(conn));
298 *guard = Some(Arc::clone(&arc));
299 Ok(arc)
300 }
301
302 /// Issue a single overlayd request, folding `Err` responses into errors.
303 async fn call(&self, req: OverlaydRequest) -> Result<OverlaydResponse, AgentError> {
304 let client = self.client().await?;
305 let mut conn = client.lock().await;
306 conn.call(req).await.map_err(|e| map_overlayd_err(&e))
307 }
308
309 /// Post-construction setter for the local raft node id. Forwards
310 /// `SetLocalNodeId` to overlayd best-effort.
311 pub fn set_local_node_id(&mut self, node_id: u64) {
312 self.local_node_id = node_id;
313 }
314
315 /// Record this node's cluster `WireGuard` public key (base64) and forward it
316 /// to overlayd so service subnets can be added to the cluster transport's
317 /// local `AllowedIPs`.
318 pub async fn set_local_wg_pubkey(&self, pubkey: String) {
319 *self.local_wg_pubkey.lock().await = Some(pubkey.clone());
320 if let Err(e) = self
321 .call(OverlaydRequest::SetLocalWgPubkey { pubkey })
322 .await
323 {
324 tracing::warn!(error = %e, "overlayd SetLocalWgPubkey failed");
325 }
326 }
327
328 /// Returns the number of services currently registered (cached `Status`).
329 pub async fn service_count(&self) -> usize {
330 match self.call(OverlaydRequest::Status).await {
331 Ok(OverlaydResponse::Status(snap)) => snap.service_count as usize,
332 _ => 0,
333 }
334 }
335
336 /// Returns whether NAT traversal is enabled for this manager.
337 #[must_use]
338 pub fn nat_enabled(&self) -> bool {
339 self.nat_config
340 .as_ref()
341 .map_or_else(|| NatConfig::default().enabled, |c| c.enabled)
342 }
343
344 /// Returns a clone of the configured [`NatConfig`], or `None`.
345 #[must_use]
346 pub fn nat_config(&self) -> Option<NatConfig> {
347 self.nat_config.clone()
348 }
349
350 /// Bootstrap NAT traversal. overlayd starts NAT lazily on its first
351 /// `NatTick`, so this is a thin shim that reports whether NAT is enabled.
352 ///
353 /// # Errors
354 /// Infallible today; preserved for ABI parity.
355 #[allow(clippy::unused_async)]
356 pub async fn start_nat_traversal(&self) -> Result<bool, AgentError> {
357 Ok(self.nat_enabled())
358 }
359
360 /// Run one NAT-traversal maintenance tick by forwarding `NatTick` to overlayd.
361 ///
362 /// # Errors
363 /// Returns an error when overlayd reports a NAT refresh failure.
364 pub async fn nat_maintenance_tick(&self) -> Result<(), AgentError> {
365 if !self.nat_enabled() {
366 return Ok(());
367 }
368 self.call(OverlaydRequest::NatTick).await?;
369 Ok(())
370 }
371
372 /// Snapshot the current NAT traversal state for API consumers.
373 ///
374 /// overlayd owns the live NAT orchestrator and does not surface per-peer
375 /// candidate detail over the IPC contract, so this returns an empty
376 /// snapshot. Kept for API parity.
377 #[allow(clippy::unused_async)]
378 pub async fn nat_status_snapshot(&self) -> NatStatusSnapshot {
379 let _peers: Vec<NatPeerSnapshot> = Vec::new();
380 NatStatusSnapshot::empty()
381 }
382
383 /// Record the overlay DNS server address and zone domain (cached locally;
384 /// forwarded to overlayd on each container attach).
385 pub fn set_dns_config(&mut self, addr: Option<SocketAddr>, domain: Option<String>) {
386 self.dns_server_addr = addr;
387 self.dns_domain = domain;
388 }
389
390 /// Builder-style variant of [`OverlayManager::set_dns_config`].
391 #[must_use]
392 pub fn with_dns_config(mut self, addr: Option<SocketAddr>, domain: Option<String>) -> Self {
393 self.dns_server_addr = addr;
394 self.dns_domain = domain;
395 self
396 }
397
398 /// Returns the overlay DNS server address if configured.
399 #[must_use]
400 pub fn dns_server_addr(&self) -> Option<SocketAddr> {
401 self.dns_server_addr
402 }
403
404 /// Returns the overlay DNS zone domain, if configured.
405 #[must_use]
406 pub fn dns_domain(&self) -> Option<&str> {
407 self.dns_domain.as_deref()
408 }
409
410 /// Setup the global overlay network by delegating to overlayd.
411 ///
412 /// Forwards the local node id and wg pubkey first (so overlayd has the
413 /// cluster-brain context), then issues `SetupGlobalOverlay` and caches the
414 /// returned interface name plus the node IP / CIDRs reported by `Status`.
415 ///
416 /// # Errors
417 /// Returns an error if overlayd fails to bring up the overlay.
418 pub async fn setup_global_overlay(&mut self) -> Result<(), AgentError> {
419 // Fast pre-flight: establish (and cache) the overlayd connection once with a
420 // bounded budget. If overlayd is unreachable this returns after a single
421 // ~2.5s dial instead of letting each of the calls below pay the full retry
422 // window (which previously stacked to ~35s of daemon-startup stall when the
423 // overlayd binary was missing). Overlay setup is non-fatal, so bailing here
424 // simply leaves cross-node networking degraded — handled by the caller.
425 self.client().await?;
426
427 // Push cluster-brain context first (best-effort).
428 let _ = self
429 .call(OverlaydRequest::SetLocalNodeId {
430 node_id: self.local_node_id,
431 })
432 .await;
433 if let Some(pubkey) = self.local_wg_pubkey.lock().await.clone() {
434 let _ = self
435 .call(OverlaydRequest::SetLocalWgPubkey { pubkey })
436 .await;
437 }
438
439 let cluster_cidr = self
440 .cluster_cidr
441 .map_or_else(|| "10.200.0.0/16".to_string(), |c| c.to_string());
442 let slice_cidr = self.slice_cidr.map(|c| c.to_string());
443
444 let resp = self
445 .call(OverlaydRequest::SetupGlobalOverlay {
446 deployment: self.deployment.clone(),
447 instance_id: self.instance_id.clone(),
448 cluster_cidr,
449 slice_cidr,
450 wg_port: self.overlay_port,
451 nat_enabled: self.nat_enabled(),
452 })
453 .await?;
454 if let OverlaydResponse::BridgeName { name } = resp {
455 self.global_interface = Some(name);
456 }
457
458 // Refresh cached status (node_ip, cidrs).
459 self.refresh_status().await;
460 Ok(())
461 }
462
463 /// Refresh cached status fields from overlayd (`node_ip`, interface, CIDRs).
464 async fn refresh_status(&mut self) {
465 if let Ok(OverlaydResponse::Status(snap)) = self.call(OverlaydRequest::Status).await {
466 let StatusSnapshot {
467 interface,
468 node_ip,
469 overlay_cidr,
470 slice_cidr,
471 ..
472 } = snap;
473 if let Some(iface) = interface {
474 self.global_interface = Some(iface);
475 }
476 if node_ip.is_some() {
477 self.node_ip = node_ip;
478 }
479 if let Some(c) = overlay_cidr.and_then(|s| s.parse().ok()) {
480 self.cluster_cidr = Some(c);
481 }
482 if let Some(s) = slice_cidr.and_then(|s| s.parse().ok()) {
483 self.slice_cidr = Some(s);
484 }
485 }
486 }
487
488 /// Set up the per-service overlay segment by delegating to overlayd.
489 ///
490 /// Returns a [`ServiceOverlayInfo`] describing the segment. The
491 /// container-attach handle (bridge name on Linux, interface elsewhere) is
492 /// `info.name`. In `Dedicated` mode the `wg_public_key`/`wg_port`/
493 /// `overlay_ip`/`subnet` fields carry the per-service `WireGuard`
494 /// transport's identity so the deploy path can publish it to Raft and mesh
495 /// with the other hosting nodes; in `Shared` mode those fields are `None`.
496 ///
497 /// `mode` is the service's resolved [`OverlayMode`], read from its spec at
498 /// the deploy call site. In `Shared` mode overlayd attaches the service to
499 /// the cluster transport via a per-node bridge; in `Dedicated` mode it
500 /// stands up a per-service `WireGuard` transport with its own crypto
501 /// context and reports its identity via
502 /// [`OverlaydResponse::ServiceOverlay`].
503 ///
504 /// # Errors
505 /// Returns an error if overlayd fails to create the segment.
506 pub async fn setup_service_overlay(
507 &self,
508 service_name: &str,
509 mode: zlayer_types::overlay::OverlayMode,
510 ) -> Result<zlayer_types::overlayd::ServiceOverlayInfo, AgentError> {
511 let resp = self
512 .call(OverlaydRequest::SetupServiceOverlay {
513 service: service_name.to_string(),
514 mode,
515 })
516 .await?;
517 match resp {
518 // Shared mode (and any server still on the legacy response shape)
519 // reports only the container-attach handle; synthesize a
520 // `ServiceOverlayInfo` whose Dedicated-only fields are `None`.
521 OverlaydResponse::BridgeName { name } => {
522 Ok(zlayer_types::overlayd::ServiceOverlayInfo {
523 name,
524 mode,
525 wg_public_key: None,
526 wg_port: None,
527 overlay_ip: None,
528 subnet: None,
529 })
530 }
531 // Dedicated mode reports the full device identity.
532 OverlaydResponse::ServiceOverlay(info) => Ok(info),
533 other => Err(AgentError::Network(format!(
534 "overlayd SetupServiceOverlay returned unexpected response: {other:?}"
535 ))),
536 }
537 }
538
539 /// Add a container to the appropriate overlay networks by delegating to
540 /// overlayd (`AttachContainer` with a `LinuxPid` handle).
541 ///
542 /// # Errors
543 /// Returns an error if overlayd cannot attach the container.
544 pub async fn attach_container(
545 &self,
546 container_pid: u32,
547 service_name: &str,
548 join_global: bool,
549 dns_domain_override: Option<String>,
550 ) -> Result<IpAddr, AgentError> {
551 let resp = self
552 .call(OverlaydRequest::AttachContainer {
553 handle: AttachHandle::LinuxPid { pid: container_pid },
554 service: service_name.to_string(),
555 join_global,
556 dns_server: self.dns_server_addr.map(|sa| sa.ip()),
557 // Per-deployment search domain when the caller supplies one
558 // (so a guest's bare `<svc>` resolves to ITS deployment);
559 // otherwise the global zone domain.
560 dns_domain: dns_domain_override.or_else(|| self.dns_domain.clone()),
561 })
562 .await?;
563 match resp {
564 OverlaydResponse::Attached(result) => Ok(result.ip),
565 other => Err(AgentError::Network(format!(
566 "overlayd AttachContainer returned unexpected response: {other:?}"
567 ))),
568 }
569 }
570
571 /// Attach a guest-managed container (a VM with no host netns/PID) to the
572 /// overlay by asking overlayd to allocate the overlay identity (keypair +
573 /// address + the current peer set) and register the generated public key in
574 /// the mesh. The caller ships the returned [`GuestOverlayConfig`] into the
575 /// guest (over vsock) where it brings up its own `WireGuard` device.
576 ///
577 /// `id` is the opaque container id used to scope the allocation so a later
578 /// [`detach_container_guest`](OverlayManager::detach_container_guest) can
579 /// release the address + remove the peer.
580 ///
581 /// # Errors
582 /// Returns an error if overlayd cannot allocate/register the guest.
583 pub async fn attach_container_guest(
584 &self,
585 id: &str,
586 service_name: &str,
587 join_global: bool,
588 dns_domain_override: Option<String>,
589 ) -> Result<zlayer_types::overlayd::GuestOverlayConfig, AgentError> {
590 let resp = self
591 .call(OverlaydRequest::AttachContainer {
592 handle: AttachHandle::GuestManaged { id: id.to_string() },
593 service: service_name.to_string(),
594 join_global,
595 dns_server: self.dns_server_addr.map(|sa| sa.ip()),
596 // Per-deployment search domain when the caller supplies one
597 // (so a guest's bare `<svc>` resolves to ITS deployment);
598 // otherwise the global zone domain.
599 dns_domain: dns_domain_override.or_else(|| self.dns_domain.clone()),
600 })
601 .await?;
602 match resp {
603 OverlaydResponse::GuestConfig(cfg) => Ok(cfg),
604 other => Err(AgentError::Network(format!(
605 "overlayd AttachContainer(GuestManaged) returned unexpected response: {other:?}"
606 ))),
607 }
608 }
609
610 /// Detach a guest-managed container: release its overlay IP and remove its
611 /// registered mesh peer.
612 ///
613 /// # Errors
614 /// Returns an error if overlayd cannot detach the container.
615 pub async fn detach_container_guest(&self, id: &str) -> Result<(), AgentError> {
616 let resp = self
617 .call(OverlaydRequest::DetachContainer {
618 handle: AttachHandle::GuestManaged { id: id.to_string() },
619 })
620 .await?;
621 match resp {
622 OverlaydResponse::Ok => Ok(()),
623 other => Err(AgentError::Network(format!(
624 "overlayd DetachContainer(GuestManaged) returned unexpected response: {other:?}"
625 ))),
626 }
627 }
628
629 /// Register a Windows HCN container with overlayd and return its overlay IP
630 /// plus the overlayd-created namespace GUID.
631 ///
632 /// The return type gained the namespace GUID (vs. the pre-migration
633 /// IP-only return) because the HCN network + endpoint + namespace are now
634 /// created inside overlayd, and `HcsRuntime` needs that GUID to embed in the
635 /// compute-system document.
636 ///
637 /// When `autoclean` is true and overlayd reports back a namespace GUID, an
638 /// entry is recorded in [`OverlayManager::hcn_cleanup`] so a later
639 /// [`OverlayManager::detach_container_hcn`] (or process teardown) can drain
640 /// it. The cleanup map is purely agent-side bookkeeping; overlayd remains
641 /// the authoritative owner of the HCN namespace/endpoint state.
642 ///
643 /// # Errors
644 /// Returns an error if overlayd cannot attach the container.
645 #[cfg(target_os = "windows")]
646 #[allow(clippy::too_many_arguments)]
647 pub async fn attach_container_hcn(
648 &self,
649 container_id: &str,
650 service_name: &str,
651 ip_override: Option<std::net::IpAddr>,
652 autoclean: bool,
653 dns_server: Option<std::net::IpAddr>,
654 dns_domain: Option<String>,
655 ) -> Result<(std::net::IpAddr, Option<String>), AgentError> {
656 let resp = self
657 .call(OverlaydRequest::AttachContainer {
658 handle: AttachHandle::WindowsContainer {
659 container_id: container_id.to_string(),
660 ip: ip_override,
661 },
662 service: service_name.to_string(),
663 join_global: false,
664 dns_server: dns_server.or_else(|| self.dns_server_addr.map(|sa| sa.ip())),
665 dns_domain: dns_domain.or_else(|| self.dns_domain.clone()),
666 })
667 .await?;
668 match resp {
669 OverlaydResponse::Attached(result) => {
670 // Record agent-side autoclean bookkeeping. We key by the
671 // overlayd-issued namespace GUID; if overlayd did not return
672 // one (e.g. host-network attach), there is nothing to track.
673 if autoclean {
674 if let Some(ns_str) = result.namespace_guid.as_deref() {
675 match windows::core::GUID::try_from(ns_str) {
676 Ok(ns_guid) => {
677 let mut cleanup = self.hcn_cleanup.lock().await;
678 cleanup.insert(ns_guid, (service_name.to_string(), result.ip));
679 }
680 Err(e) => {
681 tracing::warn!(
682 ns = %ns_str,
683 error = %e,
684 "overlayd returned a non-GUID namespace handle; skipping hcn_cleanup insert"
685 );
686 }
687 }
688 }
689 }
690 Ok((result.ip, result.namespace_guid))
691 }
692 other => Err(AgentError::Network(format!(
693 "overlayd AttachContainer(WindowsContainer) returned unexpected response: {other:?}"
694 ))),
695 }
696 }
697
698 /// Detach and release a Windows HCN container by its bare namespace GUID.
699 ///
700 /// Drains the agent-side [`OverlayManager::hcn_cleanup`] entry (if any)
701 /// before forwarding `DetachContainer` to overlayd. Safe to call with an
702 /// unknown GUID — the map drain is a no-op in that case.
703 ///
704 /// # Errors
705 /// Returns an error if overlayd reports a detach failure.
706 #[cfg(target_os = "windows")]
707 pub async fn detach_container_hcn(&self, namespace_guid: &str) -> Result<(), AgentError> {
708 // Drain the agent-side cleanup map first so a later overlayd error does
709 // not leave a stale entry behind.
710 match windows::core::GUID::try_from(namespace_guid) {
711 Ok(ns_guid) => {
712 let mut cleanup = self.hcn_cleanup.lock().await;
713 if let Some((service_name, ip)) = cleanup.remove(&ns_guid) {
714 tracing::info!(
715 ns = %namespace_guid,
716 service = %service_name,
717 ip = %ip,
718 "Released HCN overlay attachment (agent-side cleanup)"
719 );
720 }
721 }
722 Err(e) => {
723 tracing::warn!(
724 ns = %namespace_guid,
725 error = %e,
726 "detach_container_hcn called with non-GUID handle; skipping hcn_cleanup drain"
727 );
728 }
729 }
730
731 self.call(OverlaydRequest::DetachContainer {
732 handle: AttachHandle::WindowsContainer {
733 container_id: namespace_guid.to_string(),
734 ip: None,
735 },
736 })
737 .await?;
738 Ok(())
739 }
740
741 /// Release the overlay resources held by a Linux container by delegating to
742 /// overlayd (`DetachContainer` with a `LinuxPid` handle).
743 ///
744 /// # Errors
745 /// Returns an error if overlayd reports a detach failure.
746 pub async fn detach_container(&self, pid: u32) -> Result<(), AgentError> {
747 self.call(OverlaydRequest::DetachContainer {
748 handle: AttachHandle::LinuxPid { pid },
749 })
750 .await?;
751 Ok(())
752 }
753
754 /// Tear down the per-service overlay segment for `service_name`.
755 pub async fn teardown_service_overlay(&self, service_name: &str) {
756 if let Err(e) = self
757 .call(OverlaydRequest::TeardownServiceOverlay {
758 service: service_name.to_string(),
759 })
760 .await
761 {
762 tracing::warn!(service = %service_name, error = %e, "overlayd TeardownServiceOverlay failed");
763 }
764 }
765
766 /// Cleanup all overlay networks (tears down the global overlay in overlayd).
767 ///
768 /// # Errors
769 /// Returns an error if overlayd reports a teardown failure.
770 pub async fn cleanup(&mut self) -> Result<(), AgentError> {
771 self.call(OverlaydRequest::TeardownGlobalOverlay).await?;
772 self.global_interface = None;
773 // Best-effort drain of any agent-side autoclean bookkeeping we still
774 // hold on Windows. overlayd already tore down the HCN namespaces in
775 // response to `TeardownGlobalOverlay`; this just empties the side-map
776 // so a subsequent reuse of this manager starts clean.
777 #[cfg(target_os = "windows")]
778 {
779 let mut cleanup = self.hcn_cleanup.lock().await;
780 cleanup.clear();
781 }
782 Ok(())
783 }
784
785 /// Returns this node's IP on the global overlay network (cached).
786 pub fn node_ip(&self) -> Option<IpAddr> {
787 self.node_ip
788 }
789
790 /// Returns the deployment name this overlay manager was created for.
791 pub fn deployment(&self) -> &str {
792 &self.deployment
793 }
794
795 /// Returns the global overlay interface name (cached).
796 pub fn global_interface(&self) -> Option<&str> {
797 self.global_interface.as_deref()
798 }
799
800 /// Returns the `WireGuard` listen port for the overlay network.
801 pub fn overlay_port(&self) -> u16 {
802 self.overlay_port
803 }
804
805 /// Returns `true` if the global overlay transport is active (cached: an
806 /// interface name has been recorded).
807 pub fn has_global_transport(&self) -> bool {
808 self.global_interface.is_some()
809 }
810
811 /// Returns the number of per-service overlay bridges currently active.
812 pub async fn service_bridge_count(&self) -> usize {
813 match self.call(OverlaydRequest::Status).await {
814 Ok(OverlaydResponse::Status(snap)) => snap.service_count as usize,
815 _ => 0,
816 }
817 }
818
819 /// Add a peer to the live global overlay transport by delegating to overlayd.
820 ///
821 /// The parameter type is preserved (`&zlayer_overlay::PeerInfo`) so the one
822 /// caller (`zlayer-api`'s internal add-peer handler) compiles unchanged; the
823 /// shim converts it to a wire-safe [`PeerSpec`].
824 ///
825 /// # Errors
826 /// Returns an error if overlayd rejects the peer (e.g. overlay not yet up).
827 pub async fn add_global_peer(&self, peer: &zlayer_overlay::PeerInfo) -> Result<(), AgentError> {
828 self.call(OverlaydRequest::AddPeer {
829 peer: peer_spec_from(peer),
830 scope: zlayer_types::overlayd::PeerScope::Global,
831 })
832 .await?;
833 Ok(())
834 }
835
836 /// Add a peer to a service's dedicated per-service overlay transport.
837 ///
838 /// Analogous to [`OverlayManager::add_global_peer`] but scoped to
839 /// `service`'s [`OverlayMode::Dedicated`] device: first the peer itself
840 /// (`AddPeer` with `scope: Service`), then the service `subnet` plumbed
841 /// into that peer's `AllowedIPs` (`AddAllowedIp` with the same scope).
842 ///
843 /// # Errors
844 /// Returns an error if overlayd rejects the peer or the allowed-IP add
845 /// (e.g. the service's dedicated transport is not yet up).
846 pub async fn add_service_peer(
847 &self,
848 service: &str,
849 peer: &zlayer_overlay::PeerInfo,
850 subnet: &str,
851 ) -> Result<(), AgentError> {
852 self.call(OverlaydRequest::AddPeer {
853 peer: peer_spec_from(peer),
854 scope: zlayer_types::overlayd::PeerScope::Service {
855 service: service.to_string(),
856 },
857 })
858 .await?;
859 self.call(OverlaydRequest::AddAllowedIp {
860 pubkey: peer.public_key.clone(),
861 cidr: subnet.to_string(),
862 scope: zlayer_types::overlayd::PeerScope::Service {
863 service: service.to_string(),
864 },
865 })
866 .await?;
867 Ok(())
868 }
869
870 /// Remove a peer (by base64 public key) from a service's dedicated
871 /// per-service overlay transport.
872 ///
873 /// # Errors
874 /// Returns an error if overlayd reports the removal failed.
875 pub async fn remove_service_peer(&self, service: &str, pubkey: &str) -> Result<(), AgentError> {
876 self.call(OverlaydRequest::RemovePeer {
877 pubkey: pubkey.to_string(),
878 scope: zlayer_types::overlayd::PeerScope::Service {
879 service: service.to_string(),
880 },
881 })
882 .await?;
883 Ok(())
884 }
885
886 /// Returns the CIDR string for the overlay IP allocator (cached cluster CIDR).
887 pub fn overlay_cidr(&self) -> String {
888 self.cluster_cidr
889 .map_or_else(|| "10.200.0.0/16".to_string(), |c| c.to_string())
890 }
891
892 /// Returns the per-node slice CIDR this manager was built with, or `None`.
893 pub fn slice_cidr(&self) -> Option<IpNetwork> {
894 self.slice_cidr
895 }
896
897 /// Returns the full cluster CIDR, if known.
898 pub fn cluster_cidr(&self) -> Option<IpNetwork> {
899 self.cluster_cidr
900 }
901
902 /// Persist the IPAM allocator state. overlayd owns IPAM; this is a no-op
903 /// retained for ABI parity with callers.
904 ///
905 /// # Errors
906 /// Infallible today.
907 #[allow(clippy::unused_async)]
908 pub async fn persist_ipam_state(&self, _path: &std::path::Path) -> Result<(), AgentError> {
909 Ok(())
910 }
911
912 /// Restore IPAM allocator state. overlayd owns IPAM; this is a no-op
913 /// retained for ABI parity with callers.
914 ///
915 /// # Errors
916 /// Infallible today.
917 #[allow(clippy::unused_async)]
918 pub async fn restore_ipam_state(&mut self, _path: &std::path::Path) -> Result<(), AgentError> {
919 Ok(())
920 }
921
922 /// Returns IP allocation statistics: (`allocated_count`, `base_addr`).
923 ///
924 /// overlayd owns IPAM and does not surface allocation counters over IPC, so
925 /// this reports `(0, base)` derived from the cached cluster CIDR.
926 pub fn ip_alloc_stats(&self) -> (u64, IpAddr) {
927 let base = self
928 .cluster_cidr
929 .map_or(IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED), |c| c.network());
930 (0, base)
931 }
932}
933
934#[cfg(test)]
935mod tests {
936 use super::*;
937
938 /// No generated name may ever exceed 15 characters.
939 #[test]
940 fn interface_name_never_exceeds_limit() {
941 let cases: Vec<(&[&str], &str)> = vec![
942 (&["a"], "g"),
943 (&["zlayer-manager"], "g"),
944 (&["my-very-long-deployment-name-that-goes-on-and-on"], "g"),
945 (&["zlayer", "manager"], "s"),
946 (&["zlayer-manager", "frontend-service"], "s"),
947 (&["a", "b"], "s"),
948 (
949 &["abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz"],
950 "s",
951 ),
952 (&["x"], ""),
953 (&["deployment"], ""),
954 (&["a-really-long-name-exceeding-everything"], "suffix"),
955 ];
956
957 for (parts, suffix) in &cases {
958 let name = make_interface_name(parts, suffix);
959 assert!(
960 name.len() <= MAX_IFNAME_LEN,
961 "Name '{}' is {} chars (parts={:?}, suffix='{}')",
962 name,
963 name.len(),
964 parts,
965 suffix,
966 );
967 }
968 }
969
970 /// Very long and varied inputs must still respect the limit.
971 #[test]
972 fn interface_name_with_extreme_lengths() {
973 let long = "a".repeat(200);
974 let long_ref = long.as_str();
975
976 let name = make_interface_name(&[long_ref], "g");
977 assert!(name.len() <= MAX_IFNAME_LEN, "Name '{name}' too long");
978
979 let name = make_interface_name(&[long_ref, long_ref, long_ref], "s");
980 assert!(name.len() <= MAX_IFNAME_LEN, "Name '{name}' too long");
981
982 let name = make_interface_name(&[long_ref], "");
983 assert!(name.len() <= MAX_IFNAME_LEN, "Name '{name}' too long");
984 }
985
986 /// Same inputs must always produce the same output.
987 #[test]
988 fn interface_name_is_deterministic() {
989 let a = make_interface_name(&["zlayer-manager"], "g");
990 let b = make_interface_name(&["zlayer-manager"], "g");
991 assert_eq!(a, b);
992 }
993
994 /// Different inputs must produce different outputs.
995 #[test]
996 fn interface_name_uniqueness() {
997 let a = make_interface_name(&["deploy-a"], "g");
998 let b = make_interface_name(&["deploy-b"], "g");
999 assert_ne!(a, b);
1000
1001 let a = make_interface_name(&["deploy"], "g");
1002 let b = make_interface_name(&["deploy"], "s");
1003 assert_ne!(a, b);
1004 }
1005
1006 /// Short names that fit should be returned as-is (human readable).
1007 #[test]
1008 fn interface_name_short_inputs_are_readable() {
1009 let name = make_interface_name(&["app"], "g");
1010 assert_eq!(name, "zl-app-g");
1011 let name = make_interface_name(&["my", "web"], "s");
1012 assert_eq!(name, "zl-my-web-s");
1013 }
1014
1015 /// `with_slice` must remember the slice it was built with.
1016 #[test]
1017 fn with_slice_stores_slice_cidr() {
1018 let cluster: IpNetwork = "10.200.0.0/16".parse().unwrap();
1019 let slice: IpNetwork = "10.200.42.0/28".parse().unwrap();
1020 let om = OverlayManager::with_slice(
1021 "test-deploy".to_string(),
1022 cluster,
1023 slice,
1024 51820,
1025 "test".to_string(),
1026 );
1027 assert_eq!(om.slice_cidr(), Some(slice));
1028 assert_eq!(om.cluster_cidr(), Some(cluster));
1029 assert_eq!(om.overlay_port(), 51820);
1030 assert_eq!(om.deployment(), "test-deploy");
1031 }
1032
1033 /// `node_ip()` is None before any setup.
1034 #[tokio::test]
1035 async fn node_ip_none_before_setup() {
1036 let om = OverlayManager::new("test-deploy".to_string(), "test".to_string())
1037 .await
1038 .unwrap();
1039 assert!(om.node_ip().is_none());
1040 }
1041
1042 /// DNS config round-trips through the cache.
1043 #[tokio::test]
1044 async fn dns_config_set_and_round_trip() {
1045 let mut om = OverlayManager::new("dns-roundtrip".to_string(), "test".to_string())
1046 .await
1047 .unwrap();
1048 let addr: SocketAddr = "10.200.42.1:15353".parse().unwrap();
1049 om.set_dns_config(Some(addr), Some("overlay.local".to_string()));
1050 assert_eq!(om.dns_server_addr(), Some(addr));
1051 assert_eq!(om.dns_domain(), Some("overlay.local"));
1052
1053 om.set_dns_config(None, None);
1054 assert!(om.dns_server_addr().is_none());
1055 assert!(om.dns_domain().is_none());
1056 }
1057
1058 /// `peer_spec_from` must copy every `PeerInfo` field into the wire-safe
1059 /// `PeerSpec` exactly as the live overlayd transport expects (endpoint
1060 /// stringified, keepalive in whole seconds).
1061 #[test]
1062 fn peer_spec_from_copies_all_fields() {
1063 let peer = zlayer_overlay::PeerInfo {
1064 public_key: "base64key".to_string(),
1065 endpoint: "1.2.3.4:51820".parse().unwrap(),
1066 allowed_ips: "10.200.0.2/32".to_string(),
1067 persistent_keepalive_interval: std::time::Duration::from_secs(25),
1068 };
1069 let spec = peer_spec_from(&peer);
1070 assert_eq!(spec.public_key, "base64key");
1071 assert_eq!(spec.endpoint, "1.2.3.4:51820");
1072 assert_eq!(spec.allowed_ips, "10.200.0.2/32");
1073 assert_eq!(spec.persistent_keepalive_secs, 25);
1074 }
1075
1076 /// `setup_service_overlay` must forward the caller-supplied mode verbatim
1077 /// (no more hardcoded `OverlayMode::default()`). Asserts the request the
1078 /// shim builds carries `Dedicated` when asked for `Dedicated`.
1079 #[test]
1080 fn setup_service_overlay_request_carries_dedicated_mode() {
1081 let req = OverlaydRequest::SetupServiceOverlay {
1082 service: "web".to_string(),
1083 mode: zlayer_types::overlay::OverlayMode::Dedicated,
1084 };
1085 match req {
1086 OverlaydRequest::SetupServiceOverlay { service, mode } => {
1087 assert_eq!(service, "web");
1088 assert_eq!(mode, zlayer_types::overlay::OverlayMode::Dedicated);
1089 assert_ne!(mode, zlayer_types::overlay::OverlayMode::default());
1090 }
1091 other => panic!("expected SetupServiceOverlay, got {other:?}"),
1092 }
1093 }
1094
1095 /// The service-scoped peer ops must target `PeerScope::Service { service }`,
1096 /// not `Global`, so dedicated transports stay isolated from the cluster
1097 /// transport.
1098 #[test]
1099 fn service_peer_ops_use_service_scope() {
1100 let peer = zlayer_overlay::PeerInfo {
1101 public_key: "k".to_string(),
1102 endpoint: "1.2.3.4:51820".parse().unwrap(),
1103 allowed_ips: "10.201.0.2/32".to_string(),
1104 persistent_keepalive_interval: std::time::Duration::from_secs(0),
1105 };
1106 let svc_scope = zlayer_types::overlayd::PeerScope::Service {
1107 service: "web".to_string(),
1108 };
1109
1110 let add = OverlaydRequest::AddPeer {
1111 peer: peer_spec_from(&peer),
1112 scope: svc_scope.clone(),
1113 };
1114 let allow = OverlaydRequest::AddAllowedIp {
1115 pubkey: peer.public_key.clone(),
1116 cidr: "10.201.0.0/24".to_string(),
1117 scope: svc_scope.clone(),
1118 };
1119 let remove = OverlaydRequest::RemovePeer {
1120 pubkey: peer.public_key.clone(),
1121 scope: svc_scope,
1122 };
1123
1124 match add {
1125 OverlaydRequest::AddPeer { scope, peer } => {
1126 assert_eq!(
1127 scope,
1128 zlayer_types::overlayd::PeerScope::Service {
1129 service: "web".to_string()
1130 }
1131 );
1132 assert_eq!(peer.public_key, "k");
1133 }
1134 other => panic!("expected AddPeer, got {other:?}"),
1135 }
1136 match allow {
1137 OverlaydRequest::AddAllowedIp { scope, cidr, .. } => {
1138 assert_eq!(cidr, "10.201.0.0/24");
1139 assert_eq!(
1140 scope,
1141 zlayer_types::overlayd::PeerScope::Service {
1142 service: "web".to_string()
1143 }
1144 );
1145 }
1146 other => panic!("expected AddAllowedIp, got {other:?}"),
1147 }
1148 match remove {
1149 OverlaydRequest::RemovePeer { scope, pubkey } => {
1150 assert_eq!(pubkey, "k");
1151 assert_eq!(
1152 scope,
1153 zlayer_types::overlayd::PeerScope::Service {
1154 service: "web".to_string()
1155 }
1156 );
1157 }
1158 other => panic!("expected RemovePeer, got {other:?}"),
1159 }
1160 }
1161
1162 /// Windows-only: verify the `hcn_cleanup` side-map starts empty on both
1163 /// constructor paths. Live insert/drain coverage lives behind the overlayd
1164 /// IPC layer (which is exercised by the windows e2e tests), but this
1165 /// sanity-checks that the field is wired correctly through `new()` and
1166 /// `with_slice()`.
1167 #[cfg(target_os = "windows")]
1168 #[tokio::test]
1169 async fn hcn_cleanup_map_starts_empty() {
1170 let om = OverlayManager::new("test-deploy".to_string(), "test".to_string())
1171 .await
1172 .unwrap();
1173 {
1174 let map = om.hcn_cleanup.lock().await;
1175 assert!(
1176 map.is_empty(),
1177 "hcn_cleanup map must start empty from new()"
1178 );
1179 }
1180
1181 let cluster: IpNetwork = "10.200.0.0/16".parse().unwrap();
1182 let slice: IpNetwork = "10.200.42.0/28".parse().unwrap();
1183 let om = OverlayManager::with_slice(
1184 "test-deploy".to_string(),
1185 cluster,
1186 slice,
1187 51820,
1188 "test".to_string(),
1189 );
1190 {
1191 let map = om.hcn_cleanup.lock().await;
1192 assert!(
1193 map.is_empty(),
1194 "hcn_cleanup map must start empty from with_slice()"
1195 );
1196 }
1197 }
1198}