Skip to main content

zlayer_agent/
overlay_manager.rs

1//! Thin overlayd client shim.
2//!
3//! Historically `OverlayManager` owned every mechanism touching the
4//! overlay/network plane (the cluster `WireGuard` transport, per-service Linux
5//! bridges, veth/netns attach, the Windows HCN Internal network + endpoints,
6//! IPAM, DNS, NAT). All of that machinery was migrated wholesale into the
7//! standalone `zlayer-overlayd` daemon (`crates/zlayer-overlayd/src/server.rs`).
8//!
9//! What remains here is a **client shim**: it keeps only cluster-brain / cached
10//! state (deployment name, instance id, local node id, local wg pubkey, and
11//! cached status values such as `node_ip`/`dns`/`cidr`) and forwards every
12//! mechanical operation to overlayd over the IPC client
13//! [`zlayer_overlayd::OverlaydClient`]. Every public method keeps the exact
14//! signature it had before the migration so existing callers compile unchanged;
15//! the body simply builds the matching [`OverlaydRequest`], issues
16//! `client.call(req)`, and maps the response.
17//!
18//! On Windows, the manager additionally maintains a small `hcn_cleanup` map
19//! (HCN namespace GUID -> (`service_name`, `allocated_ip`)) so that
20//! agent-side bookkeeping for autoclean attaches survives even though the
21//! authoritative HCN state lives in overlayd. The map is populated on
22//! `attach_container_hcn(autoclean = true)` and drained on
23//! `detach_container_hcn`.
24
25use crate::error::AgentError;
26use ipnetwork::IpNetwork;
27use std::collections::hash_map::DefaultHasher;
28use std::hash::{Hash, Hasher};
29use std::net::{IpAddr, SocketAddr};
30use std::path::PathBuf;
31use std::sync::Arc;
32use tokio::sync::Mutex;
33use zlayer_overlay::{NatConfig, NatPeerSnapshot, NatStatusSnapshot};
34use zlayer_overlayd::OverlaydClient;
35use zlayer_paths::ZLayerDirs;
36use zlayer_types::overlayd::{
37    AttachHandle, OverlaydRequest, OverlaydResponse, PeerSpec, StatusSnapshot,
38};
39
40/// Maximum length for Linux network interface names (IFNAMSIZ - 1 for null terminator).
41const MAX_IFNAME_LEN: usize = 15;
42
43/// Generate a Linux-safe interface name guaranteed to be <= 15 chars.
44///
45/// Joins the `parts` with `-` after a `"zl-"` prefix and appends `-{suffix}` if non-empty.
46/// When the result exceeds 15 characters, a deterministic hash of all parts is used instead
47/// to keep the name unique and within the kernel limit.
48///
49/// Kept in the agent (and re-exported from the crate root) because callers
50/// outside the overlay machinery — notably `runtimes/wsl2_delegate.rs` — still
51/// use it for deterministic naming. overlayd has its own private copy for the
52/// names it generates server-side; the two are identical by construction.
53#[must_use]
54pub fn make_interface_name(parts: &[&str], suffix: &str) -> String {
55    let base = format!("zl-{}", parts.join("-"));
56    let candidate = if suffix.is_empty() {
57        base
58    } else {
59        format!("{base}-{suffix}")
60    };
61
62    if candidate.len() <= MAX_IFNAME_LEN {
63        return candidate;
64    }
65
66    // Name is too long -- produce a deterministic hash-based name.
67    let mut hasher = DefaultHasher::new();
68    for part in parts {
69        part.hash(&mut hasher);
70    }
71    suffix.hash(&mut hasher);
72    let hash = format!("{:x}", hasher.finish());
73
74    if suffix.is_empty() {
75        // "zl-" (3) + up to 12 hex chars = 15
76        let budget = MAX_IFNAME_LEN - 3;
77        format!("zl-{}", &hash[..budget.min(hash.len())])
78    } else {
79        // "zl-" (3) + hash + "-" (1) + suffix
80        let suffix_cost = 1 + suffix.len(); // "-" + suffix
81        let hash_budget = MAX_IFNAME_LEN.saturating_sub(3 + suffix_cost);
82        if hash_budget == 0 {
83            // Suffix itself is extremely long -- just hash everything
84            let budget = MAX_IFNAME_LEN - 3;
85            format!("zl-{}", &hash[..budget.min(hash.len())])
86        } else {
87            format!("zl-{}-{}", &hash[..hash_budget.min(hash.len())], suffix)
88        }
89    }
90}
91
92/// Map a `zlayer_overlayd` client error into the agent's error type.
93fn map_overlayd_err(e: &zlayer_overlayd::OverlaydError) -> AgentError {
94    AgentError::Network(format!("overlayd: {e}"))
95}
96
97/// Convert a live [`zlayer_overlay::PeerInfo`] into the wire-safe [`PeerSpec`]
98/// the overlayd IPC contract expects. Shared by every `add_*_peer` shim so the
99/// global and per-service paths build identical specs.
100fn peer_spec_from(peer: &zlayer_overlay::PeerInfo) -> PeerSpec {
101    PeerSpec {
102        public_key: peer.public_key.clone(),
103        endpoint: peer.endpoint.to_string(),
104        allowed_ips: peer.allowed_ips.clone(),
105        persistent_keepalive_secs: peer.persistent_keepalive_interval.as_secs(),
106    }
107}
108
109/// Manages overlay networks for a deployment by delegating all mechanics to the
110/// `zlayer-overlayd` daemon.
111///
112/// This struct holds only cluster-brain / cached state; the actual overlay
113/// machinery lives in overlayd and is reached through [`OverlayManager::client`].
114pub struct OverlayManager {
115    /// Deployment name (used for network naming).
116    deployment: String,
117    /// Per-daemon-process disambiguator included in overlay link names. Stable
118    /// for the daemon's lifetime; forwarded to overlayd in `SetupGlobalOverlay`.
119    instance_id: String,
120    /// Root data directory; used to resolve the overlayd IPC socket path.
121    data_dir: PathBuf,
122    /// Lazily-connected overlayd IPC client. Wrapped in an `Arc<Mutex<_>>` so
123    /// the manager can be shared behind an `Arc<RwLock<_>>` and still serialize
124    /// request/response round-trips on the single framed connection.
125    client: Mutex<Option<Arc<Mutex<OverlaydClient>>>>,
126    /// Local raft node id, forwarded to overlayd via `SetLocalNodeId`.
127    local_node_id: u64,
128    /// This node's cluster `WireGuard` public key (base64), forwarded to
129    /// overlayd via `SetLocalWgPubkey`. Behind a `Mutex` because the setter
130    /// takes `&self` (callers hold only a read guard at that point).
131    local_wg_pubkey: Mutex<Option<String>>,
132    /// `WireGuard` listen port for the overlay network.
133    overlay_port: u16,
134    /// Cached node overlay IP, populated from `SetupGlobalOverlay`/`Status`.
135    node_ip: Option<IpAddr>,
136    /// Cached global overlay interface name.
137    global_interface: Option<String>,
138    /// Cached full cluster CIDR.
139    cluster_cidr: Option<IpNetwork>,
140    /// Cached per-node slice CIDR.
141    slice_cidr: Option<IpNetwork>,
142    /// Cached overlay DNS server address.
143    dns_server_addr: Option<SocketAddr>,
144    /// Cached overlay DNS zone domain.
145    dns_domain: Option<String>,
146    /// NAT traversal configuration. overlayd owns the live NAT orchestrator;
147    /// this is cached so the daemon can decide whether to drive `NatTick`.
148    nat_config: Option<NatConfig>,
149    /// Override for the `WireGuard` UAPI socket directory. overlayd owns the
150    /// real transport, so this is retained only for API/diagnostic parity.
151    uapi_sock_dir: Option<PathBuf>,
152    /// Map of HCN namespace GUID -> (`service_name`, `allocated_ip`) for autoclean.
153    /// When a Windows container is attached with `autoclean = true`, its entry
154    /// is inserted here; `detach_container_hcn` removes it. overlayd is the
155    /// authoritative owner of the HCN namespace/endpoint state, but the agent
156    /// keeps this side-map so it can answer "what attachments do I still need
157    /// to release on shutdown?" without an IPC round-trip per query.
158    #[cfg(target_os = "windows")]
159    hcn_cleanup: std::sync::Arc<
160        tokio::sync::Mutex<
161            std::collections::HashMap<windows::core::GUID, (String, std::net::IpAddr)>,
162        >,
163    >,
164}
165
166impl OverlayManager {
167    /// Create a new overlay manager for a deployment (legacy single-node path).
168    ///
169    /// Uses the default cluster `/16`. Prefer [`OverlayManager::with_slice`] for
170    /// cluster deployments. The overlayd IPC client is connected lazily on first
171    /// use (via the socket under the system-default data dir).
172    ///
173    /// # Errors
174    /// Infallible today; the `Result` is preserved for ABI parity with callers.
175    ///
176    /// # Panics
177    /// Panics only if the compile-time-constant default CIDR `10.200.0.0/16`
178    /// fails to parse (impossible).
179    #[allow(clippy::unused_async)]
180    pub async fn new(deployment: String, instance_id: String) -> Result<Self, AgentError> {
181        let data_dir = ZLayerDirs::system_default().data_dir().to_path_buf();
182        let default_cidr: IpNetwork = "10.200.0.0/16".parse().expect("compile-time constant CIDR");
183        Ok(Self {
184            deployment,
185            instance_id,
186            data_dir,
187            client: Mutex::new(None),
188            local_node_id: 0,
189            local_wg_pubkey: Mutex::new(None),
190            overlay_port: zlayer_core::DEFAULT_WG_PORT,
191            node_ip: None,
192            global_interface: None,
193            cluster_cidr: Some(default_cidr),
194            slice_cidr: None,
195            dns_server_addr: None,
196            dns_domain: None,
197            nat_config: None,
198            uapi_sock_dir: None,
199            #[cfg(target_os = "windows")]
200            hcn_cleanup: std::sync::Arc::new(tokio::sync::Mutex::new(
201                std::collections::HashMap::new(),
202            )),
203        })
204    }
205
206    /// Create an `OverlayManager` bound to a per-node slice.
207    ///
208    /// `slice_cidr` is the per-node slice owned by this node; `cluster_cidr` is
209    /// the full cluster CIDR. Both are forwarded to overlayd in
210    /// `SetupGlobalOverlay`.
211    #[must_use]
212    pub fn with_slice(
213        deployment: String,
214        cluster_cidr: IpNetwork,
215        slice_cidr: IpNetwork,
216        port: u16,
217        instance_id: String,
218    ) -> Self {
219        let data_dir = ZLayerDirs::system_default().data_dir().to_path_buf();
220        Self {
221            deployment,
222            instance_id,
223            data_dir,
224            client: Mutex::new(None),
225            local_node_id: 0,
226            local_wg_pubkey: Mutex::new(None),
227            overlay_port: port,
228            node_ip: None,
229            global_interface: None,
230            cluster_cidr: Some(cluster_cidr),
231            slice_cidr: Some(slice_cidr),
232            dns_server_addr: None,
233            dns_domain: None,
234            nat_config: None,
235            uapi_sock_dir: None,
236            #[cfg(target_os = "windows")]
237            hcn_cleanup: std::sync::Arc::new(tokio::sync::Mutex::new(
238                std::collections::HashMap::new(),
239            )),
240        }
241    }
242
243    /// Set the `WireGuard` listen port for the overlay network.
244    #[must_use]
245    pub fn with_overlay_port(mut self, port: u16) -> Self {
246        self.overlay_port = port;
247        self
248    }
249
250    /// Set the NAT traversal configuration. overlayd owns the live NAT
251    /// orchestrator; this records the toggle so `SetupGlobalOverlay` can carry
252    /// `nat_enabled` and the daemon can decide whether to drive `NatTick`.
253    #[must_use]
254    pub fn with_nat_config(mut self, nat: NatConfig) -> Self {
255        self.nat_config = Some(nat);
256        self
257    }
258
259    /// Override the `WireGuard` UAPI socket directory. Retained for API parity;
260    /// overlayd owns the real transport's socket directory.
261    #[must_use]
262    pub fn with_uapi_sock_dir(mut self, dir: impl Into<PathBuf>) -> Self {
263        self.uapi_sock_dir = Some(dir.into());
264        self
265    }
266
267    /// Override the data directory used to resolve the overlayd IPC socket.
268    #[must_use]
269    pub fn with_data_dir(mut self, dir: impl Into<PathBuf>) -> Self {
270        self.data_dir = dir.into();
271        self
272    }
273
274    /// Set the local raft node id (builder-style).
275    #[must_use]
276    pub fn with_local_node_id(mut self, node_id: u64) -> Self {
277        self.local_node_id = node_id;
278        self
279    }
280
281    /// Get or lazily establish the overlayd IPC connection.
282    async fn client(&self) -> Result<Arc<Mutex<OverlaydClient>>, AgentError> {
283        let mut guard = self.client.lock().await;
284        if let Some(c) = guard.as_ref() {
285            return Ok(Arc::clone(c));
286        }
287        let socket = ZLayerDirs::default_overlayd_socket_path_for(&self.data_dir);
288        // Bounded dial (~2.5s worst case): overlay operations are non-fatal, so a
289        // dead/unreachable overlayd must degrade fast rather than hold the daemon's
290        // startup hostage. The overlayd supervisor (ensure_overlayd_running) owns
291        // the generous "wait for a freshly-spawned overlayd to bind" budget; once
292        // it has confirmed overlayd up (or fast-failed when the binary is missing),
293        // this lazy connector only needs a short retry window.
294        let conn = OverlaydClient::connect_with_attempts(std::path::Path::new(&socket), 6)
295            .await
296            .map_err(|e| map_overlayd_err(&e))?;
297        let arc = Arc::new(Mutex::new(conn));
298        *guard = Some(Arc::clone(&arc));
299        Ok(arc)
300    }
301
302    /// Issue a single overlayd request, folding `Err` responses into errors.
303    async fn call(&self, req: OverlaydRequest) -> Result<OverlaydResponse, AgentError> {
304        let client = self.client().await?;
305        let mut conn = client.lock().await;
306        conn.call(req).await.map_err(|e| map_overlayd_err(&e))
307    }
308
309    /// Post-construction setter for the local raft node id. Forwards
310    /// `SetLocalNodeId` to overlayd best-effort.
311    pub fn set_local_node_id(&mut self, node_id: u64) {
312        self.local_node_id = node_id;
313    }
314
315    /// Record this node's cluster `WireGuard` public key (base64) and forward it
316    /// to overlayd so service subnets can be added to the cluster transport's
317    /// local `AllowedIPs`.
318    pub async fn set_local_wg_pubkey(&self, pubkey: String) {
319        *self.local_wg_pubkey.lock().await = Some(pubkey.clone());
320        if let Err(e) = self
321            .call(OverlaydRequest::SetLocalWgPubkey { pubkey })
322            .await
323        {
324            tracing::warn!(error = %e, "overlayd SetLocalWgPubkey failed");
325        }
326    }
327
328    /// Returns the number of services currently registered (cached `Status`).
329    pub async fn service_count(&self) -> usize {
330        match self.call(OverlaydRequest::Status).await {
331            Ok(OverlaydResponse::Status(snap)) => snap.service_count as usize,
332            _ => 0,
333        }
334    }
335
336    /// Returns whether NAT traversal is enabled for this manager.
337    #[must_use]
338    pub fn nat_enabled(&self) -> bool {
339        self.nat_config
340            .as_ref()
341            .map_or_else(|| NatConfig::default().enabled, |c| c.enabled)
342    }
343
344    /// Returns a clone of the configured [`NatConfig`], or `None`.
345    #[must_use]
346    pub fn nat_config(&self) -> Option<NatConfig> {
347        self.nat_config.clone()
348    }
349
350    /// Bootstrap NAT traversal. overlayd starts NAT lazily on its first
351    /// `NatTick`, so this is a thin shim that reports whether NAT is enabled.
352    ///
353    /// # Errors
354    /// Infallible today; preserved for ABI parity.
355    #[allow(clippy::unused_async)]
356    pub async fn start_nat_traversal(&self) -> Result<bool, AgentError> {
357        Ok(self.nat_enabled())
358    }
359
360    /// Run one NAT-traversal maintenance tick by forwarding `NatTick` to overlayd.
361    ///
362    /// # Errors
363    /// Returns an error when overlayd reports a NAT refresh failure.
364    pub async fn nat_maintenance_tick(&self) -> Result<(), AgentError> {
365        if !self.nat_enabled() {
366            return Ok(());
367        }
368        self.call(OverlaydRequest::NatTick).await?;
369        Ok(())
370    }
371
372    /// Snapshot the current NAT traversal state for API consumers.
373    ///
374    /// overlayd owns the live NAT orchestrator and does not surface per-peer
375    /// candidate detail over the IPC contract, so this returns an empty
376    /// snapshot. Kept for API parity.
377    #[allow(clippy::unused_async)]
378    pub async fn nat_status_snapshot(&self) -> NatStatusSnapshot {
379        let _peers: Vec<NatPeerSnapshot> = Vec::new();
380        NatStatusSnapshot::empty()
381    }
382
383    /// Record the overlay DNS server address and zone domain (cached locally;
384    /// forwarded to overlayd on each container attach).
385    pub fn set_dns_config(&mut self, addr: Option<SocketAddr>, domain: Option<String>) {
386        self.dns_server_addr = addr;
387        self.dns_domain = domain;
388    }
389
390    /// Builder-style variant of [`OverlayManager::set_dns_config`].
391    #[must_use]
392    pub fn with_dns_config(mut self, addr: Option<SocketAddr>, domain: Option<String>) -> Self {
393        self.dns_server_addr = addr;
394        self.dns_domain = domain;
395        self
396    }
397
398    /// Returns the overlay DNS server address if configured.
399    #[must_use]
400    pub fn dns_server_addr(&self) -> Option<SocketAddr> {
401        self.dns_server_addr
402    }
403
404    /// Returns the overlay DNS zone domain, if configured.
405    #[must_use]
406    pub fn dns_domain(&self) -> Option<&str> {
407        self.dns_domain.as_deref()
408    }
409
410    /// Setup the global overlay network by delegating to overlayd.
411    ///
412    /// Forwards the local node id and wg pubkey first (so overlayd has the
413    /// cluster-brain context), then issues `SetupGlobalOverlay` and caches the
414    /// returned interface name plus the node IP / CIDRs reported by `Status`.
415    ///
416    /// # Errors
417    /// Returns an error if overlayd fails to bring up the overlay.
418    pub async fn setup_global_overlay(&mut self) -> Result<(), AgentError> {
419        // Fast pre-flight: establish (and cache) the overlayd connection once with a
420        // bounded budget. If overlayd is unreachable this returns after a single
421        // ~2.5s dial instead of letting each of the calls below pay the full retry
422        // window (which previously stacked to ~35s of daemon-startup stall when the
423        // overlayd binary was missing). Overlay setup is non-fatal, so bailing here
424        // simply leaves cross-node networking degraded — handled by the caller.
425        self.client().await?;
426
427        // Push cluster-brain context first (best-effort).
428        let _ = self
429            .call(OverlaydRequest::SetLocalNodeId {
430                node_id: self.local_node_id,
431            })
432            .await;
433        if let Some(pubkey) = self.local_wg_pubkey.lock().await.clone() {
434            let _ = self
435                .call(OverlaydRequest::SetLocalWgPubkey { pubkey })
436                .await;
437        }
438
439        let cluster_cidr = self
440            .cluster_cidr
441            .map_or_else(|| "10.200.0.0/16".to_string(), |c| c.to_string());
442        let slice_cidr = self.slice_cidr.map(|c| c.to_string());
443
444        let resp = self
445            .call(OverlaydRequest::SetupGlobalOverlay {
446                deployment: self.deployment.clone(),
447                instance_id: self.instance_id.clone(),
448                cluster_cidr,
449                slice_cidr,
450                wg_port: self.overlay_port,
451                nat_enabled: self.nat_enabled(),
452            })
453            .await?;
454        if let OverlaydResponse::BridgeName { name } = resp {
455            self.global_interface = Some(name);
456        }
457
458        // Refresh cached status (node_ip, cidrs).
459        self.refresh_status().await;
460        Ok(())
461    }
462
463    /// Refresh cached status fields from overlayd (`node_ip`, interface, CIDRs).
464    async fn refresh_status(&mut self) {
465        if let Ok(OverlaydResponse::Status(snap)) = self.call(OverlaydRequest::Status).await {
466            let StatusSnapshot {
467                interface,
468                node_ip,
469                overlay_cidr,
470                slice_cidr,
471                ..
472            } = snap;
473            if let Some(iface) = interface {
474                self.global_interface = Some(iface);
475            }
476            if node_ip.is_some() {
477                self.node_ip = node_ip;
478            }
479            if let Some(c) = overlay_cidr.and_then(|s| s.parse().ok()) {
480                self.cluster_cidr = Some(c);
481            }
482            if let Some(s) = slice_cidr.and_then(|s| s.parse().ok()) {
483                self.slice_cidr = Some(s);
484            }
485        }
486    }
487
488    /// Set up the per-service overlay segment by delegating to overlayd.
489    ///
490    /// Returns a [`ServiceOverlayInfo`] describing the segment. The
491    /// container-attach handle (bridge name on Linux, interface elsewhere) is
492    /// `info.name`. In `Dedicated` mode the `wg_public_key`/`wg_port`/
493    /// `overlay_ip`/`subnet` fields carry the per-service `WireGuard`
494    /// transport's identity so the deploy path can publish it to Raft and mesh
495    /// with the other hosting nodes; in `Shared` mode those fields are `None`.
496    ///
497    /// `mode` is the service's resolved [`OverlayMode`], read from its spec at
498    /// the deploy call site. In `Shared` mode overlayd attaches the service to
499    /// the cluster transport via a per-node bridge; in `Dedicated` mode it
500    /// stands up a per-service `WireGuard` transport with its own crypto
501    /// context and reports its identity via
502    /// [`OverlaydResponse::ServiceOverlay`].
503    ///
504    /// # Errors
505    /// Returns an error if overlayd fails to create the segment.
506    pub async fn setup_service_overlay(
507        &self,
508        service_name: &str,
509        mode: zlayer_types::overlay::OverlayMode,
510    ) -> Result<zlayer_types::overlayd::ServiceOverlayInfo, AgentError> {
511        let resp = self
512            .call(OverlaydRequest::SetupServiceOverlay {
513                service: service_name.to_string(),
514                mode,
515            })
516            .await?;
517        match resp {
518            // Shared mode (and any server still on the legacy response shape)
519            // reports only the container-attach handle; synthesize a
520            // `ServiceOverlayInfo` whose Dedicated-only fields are `None`.
521            OverlaydResponse::BridgeName { name } => {
522                Ok(zlayer_types::overlayd::ServiceOverlayInfo {
523                    name,
524                    mode,
525                    wg_public_key: None,
526                    wg_port: None,
527                    overlay_ip: None,
528                    subnet: None,
529                })
530            }
531            // Dedicated mode reports the full device identity.
532            OverlaydResponse::ServiceOverlay(info) => Ok(info),
533            other => Err(AgentError::Network(format!(
534                "overlayd SetupServiceOverlay returned unexpected response: {other:?}"
535            ))),
536        }
537    }
538
539    /// Add a container to the appropriate overlay networks by delegating to
540    /// overlayd (`AttachContainer` with a `LinuxPid` handle).
541    ///
542    /// # Errors
543    /// Returns an error if overlayd cannot attach the container.
544    pub async fn attach_container(
545        &self,
546        container_pid: u32,
547        service_name: &str,
548        join_global: bool,
549    ) -> Result<IpAddr, AgentError> {
550        let resp = self
551            .call(OverlaydRequest::AttachContainer {
552                handle: AttachHandle::LinuxPid { pid: container_pid },
553                service: service_name.to_string(),
554                join_global,
555                dns_server: self.dns_server_addr.map(|sa| sa.ip()),
556                dns_domain: self.dns_domain.clone(),
557            })
558            .await?;
559        match resp {
560            OverlaydResponse::Attached(result) => Ok(result.ip),
561            other => Err(AgentError::Network(format!(
562                "overlayd AttachContainer returned unexpected response: {other:?}"
563            ))),
564        }
565    }
566
567    /// Attach a guest-managed container (a VM with no host netns/PID) to the
568    /// overlay by asking overlayd to allocate the overlay identity (keypair +
569    /// address + the current peer set) and register the generated public key in
570    /// the mesh. The caller ships the returned [`GuestOverlayConfig`] into the
571    /// guest (over vsock) where it brings up its own `WireGuard` device.
572    ///
573    /// `id` is the opaque container id used to scope the allocation so a later
574    /// [`detach_container_guest`](OverlayManager::detach_container_guest) can
575    /// release the address + remove the peer.
576    ///
577    /// # Errors
578    /// Returns an error if overlayd cannot allocate/register the guest.
579    pub async fn attach_container_guest(
580        &self,
581        id: &str,
582        service_name: &str,
583        join_global: bool,
584    ) -> Result<zlayer_types::overlayd::GuestOverlayConfig, AgentError> {
585        let resp = self
586            .call(OverlaydRequest::AttachContainer {
587                handle: AttachHandle::GuestManaged { id: id.to_string() },
588                service: service_name.to_string(),
589                join_global,
590                dns_server: self.dns_server_addr.map(|sa| sa.ip()),
591                dns_domain: self.dns_domain.clone(),
592            })
593            .await?;
594        match resp {
595            OverlaydResponse::GuestConfig(cfg) => Ok(cfg),
596            other => Err(AgentError::Network(format!(
597                "overlayd AttachContainer(GuestManaged) returned unexpected response: {other:?}"
598            ))),
599        }
600    }
601
602    /// Detach a guest-managed container: release its overlay IP and remove its
603    /// registered mesh peer.
604    ///
605    /// # Errors
606    /// Returns an error if overlayd cannot detach the container.
607    pub async fn detach_container_guest(&self, id: &str) -> Result<(), AgentError> {
608        let resp = self
609            .call(OverlaydRequest::DetachContainer {
610                handle: AttachHandle::GuestManaged { id: id.to_string() },
611            })
612            .await?;
613        match resp {
614            OverlaydResponse::Ok => Ok(()),
615            other => Err(AgentError::Network(format!(
616                "overlayd DetachContainer(GuestManaged) returned unexpected response: {other:?}"
617            ))),
618        }
619    }
620
621    /// Register a Windows HCN container with overlayd and return its overlay IP
622    /// plus the overlayd-created namespace GUID.
623    ///
624    /// The return type gained the namespace GUID (vs. the pre-migration
625    /// IP-only return) because the HCN network + endpoint + namespace are now
626    /// created inside overlayd, and `HcsRuntime` needs that GUID to embed in the
627    /// compute-system document.
628    ///
629    /// When `autoclean` is true and overlayd reports back a namespace GUID, an
630    /// entry is recorded in [`OverlayManager::hcn_cleanup`] so a later
631    /// [`OverlayManager::detach_container_hcn`] (or process teardown) can drain
632    /// it. The cleanup map is purely agent-side bookkeeping; overlayd remains
633    /// the authoritative owner of the HCN namespace/endpoint state.
634    ///
635    /// # Errors
636    /// Returns an error if overlayd cannot attach the container.
637    #[cfg(target_os = "windows")]
638    #[allow(clippy::too_many_arguments)]
639    pub async fn attach_container_hcn(
640        &self,
641        container_id: &str,
642        service_name: &str,
643        ip_override: Option<std::net::IpAddr>,
644        autoclean: bool,
645        dns_server: Option<std::net::IpAddr>,
646        dns_domain: Option<String>,
647    ) -> Result<(std::net::IpAddr, Option<String>), AgentError> {
648        let resp = self
649            .call(OverlaydRequest::AttachContainer {
650                handle: AttachHandle::WindowsContainer {
651                    container_id: container_id.to_string(),
652                    ip: ip_override,
653                },
654                service: service_name.to_string(),
655                join_global: false,
656                dns_server: dns_server.or_else(|| self.dns_server_addr.map(|sa| sa.ip())),
657                dns_domain: dns_domain.or_else(|| self.dns_domain.clone()),
658            })
659            .await?;
660        match resp {
661            OverlaydResponse::Attached(result) => {
662                // Record agent-side autoclean bookkeeping. We key by the
663                // overlayd-issued namespace GUID; if overlayd did not return
664                // one (e.g. host-network attach), there is nothing to track.
665                if autoclean {
666                    if let Some(ns_str) = result.namespace_guid.as_deref() {
667                        match windows::core::GUID::try_from(ns_str) {
668                            Ok(ns_guid) => {
669                                let mut cleanup = self.hcn_cleanup.lock().await;
670                                cleanup.insert(ns_guid, (service_name.to_string(), result.ip));
671                            }
672                            Err(e) => {
673                                tracing::warn!(
674                                    ns = %ns_str,
675                                    error = %e,
676                                    "overlayd returned a non-GUID namespace handle; skipping hcn_cleanup insert"
677                                );
678                            }
679                        }
680                    }
681                }
682                Ok((result.ip, result.namespace_guid))
683            }
684            other => Err(AgentError::Network(format!(
685                "overlayd AttachContainer(WindowsContainer) returned unexpected response: {other:?}"
686            ))),
687        }
688    }
689
690    /// Detach and release a Windows HCN container by its bare namespace GUID.
691    ///
692    /// Drains the agent-side [`OverlayManager::hcn_cleanup`] entry (if any)
693    /// before forwarding `DetachContainer` to overlayd. Safe to call with an
694    /// unknown GUID — the map drain is a no-op in that case.
695    ///
696    /// # Errors
697    /// Returns an error if overlayd reports a detach failure.
698    #[cfg(target_os = "windows")]
699    pub async fn detach_container_hcn(&self, namespace_guid: &str) -> Result<(), AgentError> {
700        // Drain the agent-side cleanup map first so a later overlayd error does
701        // not leave a stale entry behind.
702        match windows::core::GUID::try_from(namespace_guid) {
703            Ok(ns_guid) => {
704                let mut cleanup = self.hcn_cleanup.lock().await;
705                if let Some((service_name, ip)) = cleanup.remove(&ns_guid) {
706                    tracing::info!(
707                        ns = %namespace_guid,
708                        service = %service_name,
709                        ip = %ip,
710                        "Released HCN overlay attachment (agent-side cleanup)"
711                    );
712                }
713            }
714            Err(e) => {
715                tracing::warn!(
716                    ns = %namespace_guid,
717                    error = %e,
718                    "detach_container_hcn called with non-GUID handle; skipping hcn_cleanup drain"
719                );
720            }
721        }
722
723        self.call(OverlaydRequest::DetachContainer {
724            handle: AttachHandle::WindowsContainer {
725                container_id: namespace_guid.to_string(),
726                ip: None,
727            },
728        })
729        .await?;
730        Ok(())
731    }
732
733    /// Release the overlay resources held by a Linux container by delegating to
734    /// overlayd (`DetachContainer` with a `LinuxPid` handle).
735    ///
736    /// # Errors
737    /// Returns an error if overlayd reports a detach failure.
738    pub async fn detach_container(&self, pid: u32) -> Result<(), AgentError> {
739        self.call(OverlaydRequest::DetachContainer {
740            handle: AttachHandle::LinuxPid { pid },
741        })
742        .await?;
743        Ok(())
744    }
745
746    /// Tear down the per-service overlay segment for `service_name`.
747    pub async fn teardown_service_overlay(&self, service_name: &str) {
748        if let Err(e) = self
749            .call(OverlaydRequest::TeardownServiceOverlay {
750                service: service_name.to_string(),
751            })
752            .await
753        {
754            tracing::warn!(service = %service_name, error = %e, "overlayd TeardownServiceOverlay failed");
755        }
756    }
757
758    /// Cleanup all overlay networks (tears down the global overlay in overlayd).
759    ///
760    /// # Errors
761    /// Returns an error if overlayd reports a teardown failure.
762    pub async fn cleanup(&mut self) -> Result<(), AgentError> {
763        self.call(OverlaydRequest::TeardownGlobalOverlay).await?;
764        self.global_interface = None;
765        // Best-effort drain of any agent-side autoclean bookkeeping we still
766        // hold on Windows. overlayd already tore down the HCN namespaces in
767        // response to `TeardownGlobalOverlay`; this just empties the side-map
768        // so a subsequent reuse of this manager starts clean.
769        #[cfg(target_os = "windows")]
770        {
771            let mut cleanup = self.hcn_cleanup.lock().await;
772            cleanup.clear();
773        }
774        Ok(())
775    }
776
777    /// Returns this node's IP on the global overlay network (cached).
778    pub fn node_ip(&self) -> Option<IpAddr> {
779        self.node_ip
780    }
781
782    /// Returns the deployment name this overlay manager was created for.
783    pub fn deployment(&self) -> &str {
784        &self.deployment
785    }
786
787    /// Returns the global overlay interface name (cached).
788    pub fn global_interface(&self) -> Option<&str> {
789        self.global_interface.as_deref()
790    }
791
792    /// Returns the `WireGuard` listen port for the overlay network.
793    pub fn overlay_port(&self) -> u16 {
794        self.overlay_port
795    }
796
797    /// Returns `true` if the global overlay transport is active (cached: an
798    /// interface name has been recorded).
799    pub fn has_global_transport(&self) -> bool {
800        self.global_interface.is_some()
801    }
802
803    /// Returns the number of per-service overlay bridges currently active.
804    pub async fn service_bridge_count(&self) -> usize {
805        match self.call(OverlaydRequest::Status).await {
806            Ok(OverlaydResponse::Status(snap)) => snap.service_count as usize,
807            _ => 0,
808        }
809    }
810
811    /// Add a peer to the live global overlay transport by delegating to overlayd.
812    ///
813    /// The parameter type is preserved (`&zlayer_overlay::PeerInfo`) so the one
814    /// caller (`zlayer-api`'s internal add-peer handler) compiles unchanged; the
815    /// shim converts it to a wire-safe [`PeerSpec`].
816    ///
817    /// # Errors
818    /// Returns an error if overlayd rejects the peer (e.g. overlay not yet up).
819    pub async fn add_global_peer(&self, peer: &zlayer_overlay::PeerInfo) -> Result<(), AgentError> {
820        self.call(OverlaydRequest::AddPeer {
821            peer: peer_spec_from(peer),
822            scope: zlayer_types::overlayd::PeerScope::Global,
823        })
824        .await?;
825        Ok(())
826    }
827
828    /// Add a peer to a service's dedicated per-service overlay transport.
829    ///
830    /// Analogous to [`OverlayManager::add_global_peer`] but scoped to
831    /// `service`'s [`OverlayMode::Dedicated`] device: first the peer itself
832    /// (`AddPeer` with `scope: Service`), then the service `subnet` plumbed
833    /// into that peer's `AllowedIPs` (`AddAllowedIp` with the same scope).
834    ///
835    /// # Errors
836    /// Returns an error if overlayd rejects the peer or the allowed-IP add
837    /// (e.g. the service's dedicated transport is not yet up).
838    pub async fn add_service_peer(
839        &self,
840        service: &str,
841        peer: &zlayer_overlay::PeerInfo,
842        subnet: &str,
843    ) -> Result<(), AgentError> {
844        self.call(OverlaydRequest::AddPeer {
845            peer: peer_spec_from(peer),
846            scope: zlayer_types::overlayd::PeerScope::Service {
847                service: service.to_string(),
848            },
849        })
850        .await?;
851        self.call(OverlaydRequest::AddAllowedIp {
852            pubkey: peer.public_key.clone(),
853            cidr: subnet.to_string(),
854            scope: zlayer_types::overlayd::PeerScope::Service {
855                service: service.to_string(),
856            },
857        })
858        .await?;
859        Ok(())
860    }
861
862    /// Remove a peer (by base64 public key) from a service's dedicated
863    /// per-service overlay transport.
864    ///
865    /// # Errors
866    /// Returns an error if overlayd reports the removal failed.
867    pub async fn remove_service_peer(&self, service: &str, pubkey: &str) -> Result<(), AgentError> {
868        self.call(OverlaydRequest::RemovePeer {
869            pubkey: pubkey.to_string(),
870            scope: zlayer_types::overlayd::PeerScope::Service {
871                service: service.to_string(),
872            },
873        })
874        .await?;
875        Ok(())
876    }
877
878    /// Returns the CIDR string for the overlay IP allocator (cached cluster CIDR).
879    pub fn overlay_cidr(&self) -> String {
880        self.cluster_cidr
881            .map_or_else(|| "10.200.0.0/16".to_string(), |c| c.to_string())
882    }
883
884    /// Returns the per-node slice CIDR this manager was built with, or `None`.
885    pub fn slice_cidr(&self) -> Option<IpNetwork> {
886        self.slice_cidr
887    }
888
889    /// Returns the full cluster CIDR, if known.
890    pub fn cluster_cidr(&self) -> Option<IpNetwork> {
891        self.cluster_cidr
892    }
893
894    /// Persist the IPAM allocator state. overlayd owns IPAM; this is a no-op
895    /// retained for ABI parity with callers.
896    ///
897    /// # Errors
898    /// Infallible today.
899    #[allow(clippy::unused_async)]
900    pub async fn persist_ipam_state(&self, _path: &std::path::Path) -> Result<(), AgentError> {
901        Ok(())
902    }
903
904    /// Restore IPAM allocator state. overlayd owns IPAM; this is a no-op
905    /// retained for ABI parity with callers.
906    ///
907    /// # Errors
908    /// Infallible today.
909    #[allow(clippy::unused_async)]
910    pub async fn restore_ipam_state(&mut self, _path: &std::path::Path) -> Result<(), AgentError> {
911        Ok(())
912    }
913
914    /// Returns IP allocation statistics: (`allocated_count`, `base_addr`).
915    ///
916    /// overlayd owns IPAM and does not surface allocation counters over IPC, so
917    /// this reports `(0, base)` derived from the cached cluster CIDR.
918    pub fn ip_alloc_stats(&self) -> (u64, IpAddr) {
919        let base = self
920            .cluster_cidr
921            .map_or(IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED), |c| c.network());
922        (0, base)
923    }
924}
925
926#[cfg(test)]
927mod tests {
928    use super::*;
929
930    /// No generated name may ever exceed 15 characters.
931    #[test]
932    fn interface_name_never_exceeds_limit() {
933        let cases: Vec<(&[&str], &str)> = vec![
934            (&["a"], "g"),
935            (&["zlayer-manager"], "g"),
936            (&["my-very-long-deployment-name-that-goes-on-and-on"], "g"),
937            (&["zlayer", "manager"], "s"),
938            (&["zlayer-manager", "frontend-service"], "s"),
939            (&["a", "b"], "s"),
940            (
941                &["abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz"],
942                "s",
943            ),
944            (&["x"], ""),
945            (&["deployment"], ""),
946            (&["a-really-long-name-exceeding-everything"], "suffix"),
947        ];
948
949        for (parts, suffix) in &cases {
950            let name = make_interface_name(parts, suffix);
951            assert!(
952                name.len() <= MAX_IFNAME_LEN,
953                "Name '{}' is {} chars (parts={:?}, suffix='{}')",
954                name,
955                name.len(),
956                parts,
957                suffix,
958            );
959        }
960    }
961
962    /// Very long and varied inputs must still respect the limit.
963    #[test]
964    fn interface_name_with_extreme_lengths() {
965        let long = "a".repeat(200);
966        let long_ref = long.as_str();
967
968        let name = make_interface_name(&[long_ref], "g");
969        assert!(name.len() <= MAX_IFNAME_LEN, "Name '{name}' too long");
970
971        let name = make_interface_name(&[long_ref, long_ref, long_ref], "s");
972        assert!(name.len() <= MAX_IFNAME_LEN, "Name '{name}' too long");
973
974        let name = make_interface_name(&[long_ref], "");
975        assert!(name.len() <= MAX_IFNAME_LEN, "Name '{name}' too long");
976    }
977
978    /// Same inputs must always produce the same output.
979    #[test]
980    fn interface_name_is_deterministic() {
981        let a = make_interface_name(&["zlayer-manager"], "g");
982        let b = make_interface_name(&["zlayer-manager"], "g");
983        assert_eq!(a, b);
984    }
985
986    /// Different inputs must produce different outputs.
987    #[test]
988    fn interface_name_uniqueness() {
989        let a = make_interface_name(&["deploy-a"], "g");
990        let b = make_interface_name(&["deploy-b"], "g");
991        assert_ne!(a, b);
992
993        let a = make_interface_name(&["deploy"], "g");
994        let b = make_interface_name(&["deploy"], "s");
995        assert_ne!(a, b);
996    }
997
998    /// Short names that fit should be returned as-is (human readable).
999    #[test]
1000    fn interface_name_short_inputs_are_readable() {
1001        let name = make_interface_name(&["app"], "g");
1002        assert_eq!(name, "zl-app-g");
1003        let name = make_interface_name(&["my", "web"], "s");
1004        assert_eq!(name, "zl-my-web-s");
1005    }
1006
1007    /// `with_slice` must remember the slice it was built with.
1008    #[test]
1009    fn with_slice_stores_slice_cidr() {
1010        let cluster: IpNetwork = "10.200.0.0/16".parse().unwrap();
1011        let slice: IpNetwork = "10.200.42.0/28".parse().unwrap();
1012        let om = OverlayManager::with_slice(
1013            "test-deploy".to_string(),
1014            cluster,
1015            slice,
1016            51820,
1017            "test".to_string(),
1018        );
1019        assert_eq!(om.slice_cidr(), Some(slice));
1020        assert_eq!(om.cluster_cidr(), Some(cluster));
1021        assert_eq!(om.overlay_port(), 51820);
1022        assert_eq!(om.deployment(), "test-deploy");
1023    }
1024
1025    /// `node_ip()` is None before any setup.
1026    #[tokio::test]
1027    async fn node_ip_none_before_setup() {
1028        let om = OverlayManager::new("test-deploy".to_string(), "test".to_string())
1029            .await
1030            .unwrap();
1031        assert!(om.node_ip().is_none());
1032    }
1033
1034    /// DNS config round-trips through the cache.
1035    #[tokio::test]
1036    async fn dns_config_set_and_round_trip() {
1037        let mut om = OverlayManager::new("dns-roundtrip".to_string(), "test".to_string())
1038            .await
1039            .unwrap();
1040        let addr: SocketAddr = "10.200.42.1:15353".parse().unwrap();
1041        om.set_dns_config(Some(addr), Some("overlay.local".to_string()));
1042        assert_eq!(om.dns_server_addr(), Some(addr));
1043        assert_eq!(om.dns_domain(), Some("overlay.local"));
1044
1045        om.set_dns_config(None, None);
1046        assert!(om.dns_server_addr().is_none());
1047        assert!(om.dns_domain().is_none());
1048    }
1049
1050    /// `peer_spec_from` must copy every `PeerInfo` field into the wire-safe
1051    /// `PeerSpec` exactly as the live overlayd transport expects (endpoint
1052    /// stringified, keepalive in whole seconds).
1053    #[test]
1054    fn peer_spec_from_copies_all_fields() {
1055        let peer = zlayer_overlay::PeerInfo {
1056            public_key: "base64key".to_string(),
1057            endpoint: "1.2.3.4:51820".parse().unwrap(),
1058            allowed_ips: "10.200.0.2/32".to_string(),
1059            persistent_keepalive_interval: std::time::Duration::from_secs(25),
1060        };
1061        let spec = peer_spec_from(&peer);
1062        assert_eq!(spec.public_key, "base64key");
1063        assert_eq!(spec.endpoint, "1.2.3.4:51820");
1064        assert_eq!(spec.allowed_ips, "10.200.0.2/32");
1065        assert_eq!(spec.persistent_keepalive_secs, 25);
1066    }
1067
1068    /// `setup_service_overlay` must forward the caller-supplied mode verbatim
1069    /// (no more hardcoded `OverlayMode::default()`). Asserts the request the
1070    /// shim builds carries `Dedicated` when asked for `Dedicated`.
1071    #[test]
1072    fn setup_service_overlay_request_carries_dedicated_mode() {
1073        let req = OverlaydRequest::SetupServiceOverlay {
1074            service: "web".to_string(),
1075            mode: zlayer_types::overlay::OverlayMode::Dedicated,
1076        };
1077        match req {
1078            OverlaydRequest::SetupServiceOverlay { service, mode } => {
1079                assert_eq!(service, "web");
1080                assert_eq!(mode, zlayer_types::overlay::OverlayMode::Dedicated);
1081                assert_ne!(mode, zlayer_types::overlay::OverlayMode::default());
1082            }
1083            other => panic!("expected SetupServiceOverlay, got {other:?}"),
1084        }
1085    }
1086
1087    /// The service-scoped peer ops must target `PeerScope::Service { service }`,
1088    /// not `Global`, so dedicated transports stay isolated from the cluster
1089    /// transport.
1090    #[test]
1091    fn service_peer_ops_use_service_scope() {
1092        let peer = zlayer_overlay::PeerInfo {
1093            public_key: "k".to_string(),
1094            endpoint: "1.2.3.4:51820".parse().unwrap(),
1095            allowed_ips: "10.201.0.2/32".to_string(),
1096            persistent_keepalive_interval: std::time::Duration::from_secs(0),
1097        };
1098        let svc_scope = zlayer_types::overlayd::PeerScope::Service {
1099            service: "web".to_string(),
1100        };
1101
1102        let add = OverlaydRequest::AddPeer {
1103            peer: peer_spec_from(&peer),
1104            scope: svc_scope.clone(),
1105        };
1106        let allow = OverlaydRequest::AddAllowedIp {
1107            pubkey: peer.public_key.clone(),
1108            cidr: "10.201.0.0/24".to_string(),
1109            scope: svc_scope.clone(),
1110        };
1111        let remove = OverlaydRequest::RemovePeer {
1112            pubkey: peer.public_key.clone(),
1113            scope: svc_scope,
1114        };
1115
1116        match add {
1117            OverlaydRequest::AddPeer { scope, peer } => {
1118                assert_eq!(
1119                    scope,
1120                    zlayer_types::overlayd::PeerScope::Service {
1121                        service: "web".to_string()
1122                    }
1123                );
1124                assert_eq!(peer.public_key, "k");
1125            }
1126            other => panic!("expected AddPeer, got {other:?}"),
1127        }
1128        match allow {
1129            OverlaydRequest::AddAllowedIp { scope, cidr, .. } => {
1130                assert_eq!(cidr, "10.201.0.0/24");
1131                assert_eq!(
1132                    scope,
1133                    zlayer_types::overlayd::PeerScope::Service {
1134                        service: "web".to_string()
1135                    }
1136                );
1137            }
1138            other => panic!("expected AddAllowedIp, got {other:?}"),
1139        }
1140        match remove {
1141            OverlaydRequest::RemovePeer { scope, pubkey } => {
1142                assert_eq!(pubkey, "k");
1143                assert_eq!(
1144                    scope,
1145                    zlayer_types::overlayd::PeerScope::Service {
1146                        service: "web".to_string()
1147                    }
1148                );
1149            }
1150            other => panic!("expected RemovePeer, got {other:?}"),
1151        }
1152    }
1153
1154    /// Windows-only: verify the `hcn_cleanup` side-map starts empty on both
1155    /// constructor paths. Live insert/drain coverage lives behind the overlayd
1156    /// IPC layer (which is exercised by the windows e2e tests), but this
1157    /// sanity-checks that the field is wired correctly through `new()` and
1158    /// `with_slice()`.
1159    #[cfg(target_os = "windows")]
1160    #[tokio::test]
1161    async fn hcn_cleanup_map_starts_empty() {
1162        let om = OverlayManager::new("test-deploy".to_string(), "test".to_string())
1163            .await
1164            .unwrap();
1165        {
1166            let map = om.hcn_cleanup.lock().await;
1167            assert!(
1168                map.is_empty(),
1169                "hcn_cleanup map must start empty from new()"
1170            );
1171        }
1172
1173        let cluster: IpNetwork = "10.200.0.0/16".parse().unwrap();
1174        let slice: IpNetwork = "10.200.42.0/28".parse().unwrap();
1175        let om = OverlayManager::with_slice(
1176            "test-deploy".to_string(),
1177            cluster,
1178            slice,
1179            51820,
1180            "test".to_string(),
1181        );
1182        {
1183            let map = om.hcn_cleanup.lock().await;
1184            assert!(
1185                map.is_empty(),
1186                "hcn_cleanup map must start empty from with_slice()"
1187            );
1188        }
1189    }
1190}