Skip to main content

zlayer_agent/
overlay_manager.rs

1//! Thin overlayd client shim.
2//!
3//! Historically `OverlayManager` owned every mechanism touching the
4//! overlay/network plane (the cluster `WireGuard` transport, per-service Linux
5//! bridges, veth/netns attach, the Windows HCN Internal network + endpoints,
6//! IPAM, DNS, NAT). All of that machinery was migrated wholesale into the
7//! standalone `zlayer-overlayd` daemon (`crates/zlayer-overlayd/src/server.rs`).
8//!
9//! What remains here is a **client shim**: it keeps only cluster-brain / cached
10//! state (deployment name, instance id, local node id, local wg pubkey, and
11//! cached status values such as `node_ip`/`dns`/`cidr`) and forwards every
12//! mechanical operation to overlayd over the IPC client
13//! [`zlayer_overlayd::OverlaydClient`]. Every public method keeps the exact
14//! signature it had before the migration so existing callers compile unchanged;
15//! the body simply builds the matching [`OverlaydRequest`], issues
16//! `client.call(req)`, and maps the response.
17//!
18//! On Windows, the manager additionally maintains a small `hcn_cleanup` map
19//! (HCN namespace GUID -> (`service_name`, `allocated_ip`)) so that
20//! agent-side bookkeeping for autoclean attaches survives even though the
21//! authoritative HCN state lives in overlayd. The map is populated on
22//! `attach_container_hcn(autoclean = true)` and drained on
23//! `detach_container_hcn`.
24
25use crate::error::AgentError;
26use ipnetwork::IpNetwork;
27use std::collections::hash_map::DefaultHasher;
28use std::hash::{Hash, Hasher};
29use std::net::{IpAddr, SocketAddr};
30use std::path::PathBuf;
31use std::sync::Arc;
32use tokio::sync::Mutex;
33use zlayer_overlay::{NatConfig, NatPeerSnapshot, NatStatusSnapshot};
34use zlayer_overlayd::OverlaydClient;
35use zlayer_paths::ZLayerDirs;
36use zlayer_types::overlayd::{
37    AttachHandle, OverlaydRequest, OverlaydResponse, PeerSpec, StatusSnapshot,
38};
39
40/// Maximum length for Linux network interface names (IFNAMSIZ - 1 for null terminator).
41const MAX_IFNAME_LEN: usize = 15;
42
43/// Generate a Linux-safe interface name guaranteed to be <= 15 chars.
44///
45/// Joins the `parts` with `-` after a `"zl-"` prefix and appends `-{suffix}` if non-empty.
46/// When the result exceeds 15 characters, a deterministic hash of all parts is used instead
47/// to keep the name unique and within the kernel limit.
48///
49/// Kept in the agent (and re-exported from the crate root) because callers
50/// outside the overlay machinery — notably `runtimes/wsl2_delegate.rs` — still
51/// use it for deterministic naming. overlayd has its own private copy for the
52/// names it generates server-side; the two are identical by construction.
53#[must_use]
54pub fn make_interface_name(parts: &[&str], suffix: &str) -> String {
55    let base = format!("zl-{}", parts.join("-"));
56    let candidate = if suffix.is_empty() {
57        base
58    } else {
59        format!("{base}-{suffix}")
60    };
61
62    if candidate.len() <= MAX_IFNAME_LEN {
63        return candidate;
64    }
65
66    // Name is too long -- produce a deterministic hash-based name.
67    let mut hasher = DefaultHasher::new();
68    for part in parts {
69        part.hash(&mut hasher);
70    }
71    suffix.hash(&mut hasher);
72    let hash = format!("{:x}", hasher.finish());
73
74    if suffix.is_empty() {
75        // "zl-" (3) + up to 12 hex chars = 15
76        let budget = MAX_IFNAME_LEN - 3;
77        format!("zl-{}", &hash[..budget.min(hash.len())])
78    } else {
79        // "zl-" (3) + hash + "-" (1) + suffix
80        let suffix_cost = 1 + suffix.len(); // "-" + suffix
81        let hash_budget = MAX_IFNAME_LEN.saturating_sub(3 + suffix_cost);
82        if hash_budget == 0 {
83            // Suffix itself is extremely long -- just hash everything
84            let budget = MAX_IFNAME_LEN - 3;
85            format!("zl-{}", &hash[..budget.min(hash.len())])
86        } else {
87            format!("zl-{}-{}", &hash[..hash_budget.min(hash.len())], suffix)
88        }
89    }
90}
91
92/// Map a `zlayer_overlayd` client error into the agent's error type.
93fn map_overlayd_err(e: &zlayer_overlayd::OverlaydError) -> AgentError {
94    AgentError::Network(format!("overlayd: {e}"))
95}
96
97/// Convert a live [`zlayer_overlay::PeerInfo`] into the wire-safe [`PeerSpec`]
98/// the overlayd IPC contract expects. Shared by every `add_*_peer` shim so the
99/// global and per-service paths build identical specs.
100fn peer_spec_from(peer: &zlayer_overlay::PeerInfo) -> PeerSpec {
101    PeerSpec {
102        public_key: peer.public_key.clone(),
103        endpoint: peer.endpoint.to_string(),
104        allowed_ips: peer.allowed_ips.clone(),
105        persistent_keepalive_secs: peer.persistent_keepalive_interval.as_secs(),
106    }
107}
108
109/// Manages overlay networks for a deployment by delegating all mechanics to the
110/// `zlayer-overlayd` daemon.
111///
112/// This struct holds only cluster-brain / cached state; the actual overlay
113/// machinery lives in overlayd and is reached through [`OverlayManager::client`].
114pub struct OverlayManager {
115    /// Deployment name (used for network naming).
116    deployment: String,
117    /// Per-daemon-process disambiguator included in overlay link names. Stable
118    /// for the daemon's lifetime; forwarded to overlayd in `SetupGlobalOverlay`.
119    instance_id: String,
120    /// Root data directory; used to resolve the overlayd IPC socket path.
121    data_dir: PathBuf,
122    /// Lazily-connected overlayd IPC client. Wrapped in an `Arc<Mutex<_>>` so
123    /// the manager can be shared behind an `Arc<RwLock<_>>` and still serialize
124    /// request/response round-trips on the single framed connection.
125    client: Mutex<Option<Arc<Mutex<OverlaydClient>>>>,
126    /// Local raft node id, forwarded to overlayd via `SetLocalNodeId`.
127    local_node_id: u64,
128    /// This node's cluster `WireGuard` public key (base64), forwarded to
129    /// overlayd via `SetLocalWgPubkey`. Behind a `Mutex` because the setter
130    /// takes `&self` (callers hold only a read guard at that point).
131    local_wg_pubkey: Mutex<Option<String>>,
132    /// `WireGuard` listen port for the overlay network.
133    overlay_port: u16,
134    /// Cached node overlay IP, populated from `SetupGlobalOverlay`/`Status`.
135    node_ip: Option<IpAddr>,
136    /// Cached global overlay interface name.
137    global_interface: Option<String>,
138    /// Cached full cluster CIDR.
139    cluster_cidr: Option<IpNetwork>,
140    /// Cached per-node slice CIDR.
141    slice_cidr: Option<IpNetwork>,
142    /// Cached overlay DNS server address.
143    dns_server_addr: Option<SocketAddr>,
144    /// Cached overlay DNS zone domain.
145    dns_domain: Option<String>,
146    /// NAT traversal configuration. overlayd owns the live NAT orchestrator;
147    /// this is cached so the daemon can decide whether to drive `NatTick`.
148    nat_config: Option<NatConfig>,
149    /// Override for the `WireGuard` UAPI socket directory. overlayd owns the
150    /// real transport, so this is retained only for API/diagnostic parity.
151    uapi_sock_dir: Option<PathBuf>,
152    /// Map of HCN namespace GUID -> (`service_name`, `allocated_ip`) for autoclean.
153    /// When a Windows container is attached with `autoclean = true`, its entry
154    /// is inserted here; `detach_container_hcn` removes it. overlayd is the
155    /// authoritative owner of the HCN namespace/endpoint state, but the agent
156    /// keeps this side-map so it can answer "what attachments do I still need
157    /// to release on shutdown?" without an IPC round-trip per query.
158    #[cfg(target_os = "windows")]
159    hcn_cleanup: std::sync::Arc<
160        tokio::sync::Mutex<
161            std::collections::HashMap<windows::core::GUID, (String, std::net::IpAddr)>,
162        >,
163    >,
164}
165
166impl OverlayManager {
167    /// Create a new overlay manager for a deployment (legacy single-node path).
168    ///
169    /// Uses the default cluster `/16`. Prefer [`OverlayManager::with_slice`] for
170    /// cluster deployments. The overlayd IPC client is connected lazily on first
171    /// use (via the socket under the system-default data dir).
172    ///
173    /// # Errors
174    /// Infallible today; the `Result` is preserved for ABI parity with callers.
175    ///
176    /// # Panics
177    /// Panics only if the compile-time-constant default CIDR `10.200.0.0/16`
178    /// fails to parse (impossible).
179    #[allow(clippy::unused_async)]
180    pub async fn new(deployment: String, instance_id: String) -> Result<Self, AgentError> {
181        let data_dir = ZLayerDirs::system_default().data_dir().to_path_buf();
182        let default_cidr: IpNetwork = "10.200.0.0/16".parse().expect("compile-time constant CIDR");
183        Ok(Self {
184            deployment,
185            instance_id,
186            data_dir,
187            client: Mutex::new(None),
188            local_node_id: 0,
189            local_wg_pubkey: Mutex::new(None),
190            overlay_port: zlayer_core::DEFAULT_WG_PORT,
191            node_ip: None,
192            global_interface: None,
193            cluster_cidr: Some(default_cidr),
194            slice_cidr: None,
195            dns_server_addr: None,
196            dns_domain: None,
197            nat_config: None,
198            uapi_sock_dir: None,
199            #[cfg(target_os = "windows")]
200            hcn_cleanup: std::sync::Arc::new(tokio::sync::Mutex::new(
201                std::collections::HashMap::new(),
202            )),
203        })
204    }
205
206    /// Create an `OverlayManager` bound to a per-node slice.
207    ///
208    /// `slice_cidr` is the per-node slice owned by this node; `cluster_cidr` is
209    /// the full cluster CIDR. Both are forwarded to overlayd in
210    /// `SetupGlobalOverlay`.
211    #[must_use]
212    pub fn with_slice(
213        deployment: String,
214        cluster_cidr: IpNetwork,
215        slice_cidr: IpNetwork,
216        port: u16,
217        instance_id: String,
218    ) -> Self {
219        let data_dir = ZLayerDirs::system_default().data_dir().to_path_buf();
220        Self {
221            deployment,
222            instance_id,
223            data_dir,
224            client: Mutex::new(None),
225            local_node_id: 0,
226            local_wg_pubkey: Mutex::new(None),
227            overlay_port: port,
228            node_ip: None,
229            global_interface: None,
230            cluster_cidr: Some(cluster_cidr),
231            slice_cidr: Some(slice_cidr),
232            dns_server_addr: None,
233            dns_domain: None,
234            nat_config: None,
235            uapi_sock_dir: None,
236            #[cfg(target_os = "windows")]
237            hcn_cleanup: std::sync::Arc::new(tokio::sync::Mutex::new(
238                std::collections::HashMap::new(),
239            )),
240        }
241    }
242
243    /// Set the `WireGuard` listen port for the overlay network.
244    #[must_use]
245    pub fn with_overlay_port(mut self, port: u16) -> Self {
246        self.overlay_port = port;
247        self
248    }
249
250    /// Set the NAT traversal configuration. overlayd owns the live NAT
251    /// orchestrator; this records the toggle so `SetupGlobalOverlay` can carry
252    /// `nat_enabled` and the daemon can decide whether to drive `NatTick`.
253    #[must_use]
254    pub fn with_nat_config(mut self, nat: NatConfig) -> Self {
255        self.nat_config = Some(nat);
256        self
257    }
258
259    /// Override the `WireGuard` UAPI socket directory. Retained for API parity;
260    /// overlayd owns the real transport's socket directory.
261    #[must_use]
262    pub fn with_uapi_sock_dir(mut self, dir: impl Into<PathBuf>) -> Self {
263        self.uapi_sock_dir = Some(dir.into());
264        self
265    }
266
267    /// Override the data directory used to resolve the overlayd IPC socket.
268    #[must_use]
269    pub fn with_data_dir(mut self, dir: impl Into<PathBuf>) -> Self {
270        self.data_dir = dir.into();
271        self
272    }
273
274    /// Set the local raft node id (builder-style).
275    #[must_use]
276    pub fn with_local_node_id(mut self, node_id: u64) -> Self {
277        self.local_node_id = node_id;
278        self
279    }
280
281    /// Get or lazily establish the overlayd IPC connection.
282    async fn client(&self) -> Result<Arc<Mutex<OverlaydClient>>, AgentError> {
283        let mut guard = self.client.lock().await;
284        if let Some(c) = guard.as_ref() {
285            return Ok(Arc::clone(c));
286        }
287        let socket = ZLayerDirs::default_overlayd_socket_path_for(&self.data_dir);
288        // Bounded dial (~2.5s worst case): overlay operations are non-fatal, so a
289        // dead/unreachable overlayd must degrade fast rather than hold the daemon's
290        // startup hostage. The overlayd supervisor (ensure_overlayd_running) owns
291        // the generous "wait for a freshly-spawned overlayd to bind" budget; once
292        // it has confirmed overlayd up (or fast-failed when the binary is missing),
293        // this lazy connector only needs a short retry window.
294        let conn = OverlaydClient::connect_with_attempts(std::path::Path::new(&socket), 6)
295            .await
296            .map_err(|e| map_overlayd_err(&e))?;
297        let arc = Arc::new(Mutex::new(conn));
298        *guard = Some(Arc::clone(&arc));
299        Ok(arc)
300    }
301
302    /// Issue a single overlayd request, folding `Err` responses into errors.
303    async fn call(&self, req: OverlaydRequest) -> Result<OverlaydResponse, AgentError> {
304        let client = self.client().await?;
305        let mut conn = client.lock().await;
306        conn.call(req).await.map_err(|e| map_overlayd_err(&e))
307    }
308
309    /// Post-construction setter for the local raft node id. Forwards
310    /// `SetLocalNodeId` to overlayd best-effort.
311    pub fn set_local_node_id(&mut self, node_id: u64) {
312        self.local_node_id = node_id;
313    }
314
315    /// Record this node's cluster `WireGuard` public key (base64) and forward it
316    /// to overlayd so service subnets can be added to the cluster transport's
317    /// local `AllowedIPs`.
318    pub async fn set_local_wg_pubkey(&self, pubkey: String) {
319        *self.local_wg_pubkey.lock().await = Some(pubkey.clone());
320        if let Err(e) = self
321            .call(OverlaydRequest::SetLocalWgPubkey { pubkey })
322            .await
323        {
324            tracing::warn!(error = %e, "overlayd SetLocalWgPubkey failed");
325        }
326    }
327
328    /// Returns the number of services currently registered (cached `Status`).
329    pub async fn service_count(&self) -> usize {
330        match self.call(OverlaydRequest::Status).await {
331            Ok(OverlaydResponse::Status(snap)) => snap.service_count as usize,
332            _ => 0,
333        }
334    }
335
336    /// Returns whether NAT traversal is enabled for this manager.
337    #[must_use]
338    pub fn nat_enabled(&self) -> bool {
339        self.nat_config
340            .as_ref()
341            .map_or_else(|| NatConfig::default().enabled, |c| c.enabled)
342    }
343
344    /// Returns a clone of the configured [`NatConfig`], or `None`.
345    #[must_use]
346    pub fn nat_config(&self) -> Option<NatConfig> {
347        self.nat_config.clone()
348    }
349
350    /// Bootstrap NAT traversal. overlayd starts NAT lazily on its first
351    /// `NatTick`, so this is a thin shim that reports whether NAT is enabled.
352    ///
353    /// # Errors
354    /// Infallible today; preserved for ABI parity.
355    #[allow(clippy::unused_async)]
356    pub async fn start_nat_traversal(&self) -> Result<bool, AgentError> {
357        Ok(self.nat_enabled())
358    }
359
360    /// Run one NAT-traversal maintenance tick by forwarding `NatTick` to overlayd.
361    ///
362    /// # Errors
363    /// Returns an error when overlayd reports a NAT refresh failure.
364    pub async fn nat_maintenance_tick(&self) -> Result<(), AgentError> {
365        if !self.nat_enabled() {
366            return Ok(());
367        }
368        self.call(OverlaydRequest::NatTick).await?;
369        Ok(())
370    }
371
372    /// Snapshot the current NAT traversal state for API consumers.
373    ///
374    /// overlayd owns the live NAT orchestrator and does not surface per-peer
375    /// candidate detail over the IPC contract, so this returns an empty
376    /// snapshot. Kept for API parity.
377    #[allow(clippy::unused_async)]
378    pub async fn nat_status_snapshot(&self) -> NatStatusSnapshot {
379        let _peers: Vec<NatPeerSnapshot> = Vec::new();
380        NatStatusSnapshot::empty()
381    }
382
383    /// Record the overlay DNS server address and zone domain (cached locally;
384    /// forwarded to overlayd on each container attach).
385    pub fn set_dns_config(&mut self, addr: Option<SocketAddr>, domain: Option<String>) {
386        self.dns_server_addr = addr;
387        self.dns_domain = domain;
388    }
389
390    /// Builder-style variant of [`OverlayManager::set_dns_config`].
391    #[must_use]
392    pub fn with_dns_config(mut self, addr: Option<SocketAddr>, domain: Option<String>) -> Self {
393        self.dns_server_addr = addr;
394        self.dns_domain = domain;
395        self
396    }
397
398    /// Returns the overlay DNS server address if configured.
399    #[must_use]
400    pub fn dns_server_addr(&self) -> Option<SocketAddr> {
401        self.dns_server_addr
402    }
403
404    /// Returns the overlay DNS zone domain, if configured.
405    #[must_use]
406    pub fn dns_domain(&self) -> Option<&str> {
407        self.dns_domain.as_deref()
408    }
409
410    /// Setup the global overlay network by delegating to overlayd.
411    ///
412    /// Forwards the local node id and wg pubkey first (so overlayd has the
413    /// cluster-brain context), then issues `SetupGlobalOverlay` and caches the
414    /// returned interface name plus the node IP / CIDRs reported by `Status`.
415    ///
416    /// # Errors
417    /// Returns an error if overlayd fails to bring up the overlay.
418    pub async fn setup_global_overlay(&mut self) -> Result<(), AgentError> {
419        // Fast pre-flight: establish (and cache) the overlayd connection once with a
420        // bounded budget. If overlayd is unreachable this returns after a single
421        // ~2.5s dial instead of letting each of the calls below pay the full retry
422        // window (which previously stacked to ~35s of daemon-startup stall when the
423        // overlayd binary was missing). Overlay setup is non-fatal, so bailing here
424        // simply leaves cross-node networking degraded — handled by the caller.
425        self.client().await?;
426
427        // Push cluster-brain context first (best-effort).
428        let _ = self
429            .call(OverlaydRequest::SetLocalNodeId {
430                node_id: self.local_node_id,
431            })
432            .await;
433        if let Some(pubkey) = self.local_wg_pubkey.lock().await.clone() {
434            let _ = self
435                .call(OverlaydRequest::SetLocalWgPubkey { pubkey })
436                .await;
437        }
438
439        let cluster_cidr = self
440            .cluster_cidr
441            .map_or_else(|| "10.200.0.0/16".to_string(), |c| c.to_string());
442        let slice_cidr = self.slice_cidr.map(|c| c.to_string());
443
444        let resp = self
445            .call(OverlaydRequest::SetupGlobalOverlay {
446                deployment: self.deployment.clone(),
447                instance_id: self.instance_id.clone(),
448                cluster_cidr,
449                slice_cidr,
450                wg_port: self.overlay_port,
451                nat_enabled: self.nat_enabled(),
452            })
453            .await?;
454        if let OverlaydResponse::BridgeName { name } = resp {
455            self.global_interface = Some(name);
456        }
457
458        // Refresh cached status (node_ip, cidrs).
459        self.refresh_status().await;
460        Ok(())
461    }
462
463    /// Refresh cached status fields from overlayd (`node_ip`, interface, CIDRs).
464    async fn refresh_status(&mut self) {
465        if let Ok(OverlaydResponse::Status(snap)) = self.call(OverlaydRequest::Status).await {
466            let StatusSnapshot {
467                interface,
468                node_ip,
469                overlay_cidr,
470                slice_cidr,
471                ..
472            } = snap;
473            if let Some(iface) = interface {
474                self.global_interface = Some(iface);
475            }
476            if node_ip.is_some() {
477                self.node_ip = node_ip;
478            }
479            if let Some(c) = overlay_cidr.and_then(|s| s.parse().ok()) {
480                self.cluster_cidr = Some(c);
481            }
482            if let Some(s) = slice_cidr.and_then(|s| s.parse().ok()) {
483                self.slice_cidr = Some(s);
484            }
485        }
486    }
487
488    /// Set up the per-service overlay segment by delegating to overlayd.
489    ///
490    /// Returns a [`ServiceOverlayInfo`] describing the segment. The
491    /// container-attach handle (bridge name on Linux, interface elsewhere) is
492    /// `info.name`. In `Dedicated` mode the `wg_public_key`/`wg_port`/
493    /// `overlay_ip`/`subnet` fields carry the per-service `WireGuard`
494    /// transport's identity so the deploy path can publish it to Raft and mesh
495    /// with the other hosting nodes; in `Shared` mode those fields are `None`.
496    ///
497    /// `mode` is the service's resolved [`OverlayMode`], read from its spec at
498    /// the deploy call site. In `Shared` mode overlayd attaches the service to
499    /// the cluster transport via a per-node bridge; in `Dedicated` mode it
500    /// stands up a per-service `WireGuard` transport with its own crypto
501    /// context and reports its identity via
502    /// [`OverlaydResponse::ServiceOverlay`].
503    ///
504    /// # Errors
505    /// Returns an error if overlayd fails to create the segment.
506    pub async fn setup_service_overlay(
507        &self,
508        service_name: &str,
509        mode: zlayer_types::overlay::OverlayMode,
510    ) -> Result<zlayer_types::overlayd::ServiceOverlayInfo, AgentError> {
511        let resp = self
512            .call(OverlaydRequest::SetupServiceOverlay {
513                service: service_name.to_string(),
514                mode,
515            })
516            .await?;
517        match resp {
518            // Shared mode (and any server still on the legacy response shape)
519            // reports only the container-attach handle; synthesize a
520            // `ServiceOverlayInfo` whose Dedicated-only fields are `None`.
521            OverlaydResponse::BridgeName { name } => {
522                Ok(zlayer_types::overlayd::ServiceOverlayInfo {
523                    name,
524                    mode,
525                    wg_public_key: None,
526                    wg_port: None,
527                    overlay_ip: None,
528                    subnet: None,
529                })
530            }
531            // Dedicated mode reports the full device identity.
532            OverlaydResponse::ServiceOverlay(info) => Ok(info),
533            other => Err(AgentError::Network(format!(
534                "overlayd SetupServiceOverlay returned unexpected response: {other:?}"
535            ))),
536        }
537    }
538
539    /// Add a container to the appropriate overlay networks by delegating to
540    /// overlayd (`AttachContainer` with a `LinuxPid` handle).
541    ///
542    /// # Errors
543    /// Returns an error if overlayd cannot attach the container.
544    pub async fn attach_container(
545        &self,
546        container_pid: u32,
547        service_name: &str,
548        join_global: bool,
549        dns_domain_override: Option<String>,
550    ) -> Result<IpAddr, AgentError> {
551        let resp = self
552            .call(OverlaydRequest::AttachContainer {
553                handle: AttachHandle::LinuxPid { pid: container_pid },
554                service: service_name.to_string(),
555                join_global,
556                dns_server: self.dns_server_addr.map(|sa| sa.ip()),
557                // Per-deployment search domain when the caller supplies one
558                // (so a guest's bare `<svc>` resolves to ITS deployment);
559                // otherwise the global zone domain.
560                dns_domain: dns_domain_override.or_else(|| self.dns_domain.clone()),
561            })
562            .await?;
563        match resp {
564            OverlaydResponse::Attached(result) => Ok(result.ip),
565            other => Err(AgentError::Network(format!(
566                "overlayd AttachContainer returned unexpected response: {other:?}"
567            ))),
568        }
569    }
570
571    /// Attach a guest-managed container (a VM with no host netns/PID) to the
572    /// overlay by asking overlayd to allocate the overlay identity (keypair +
573    /// address + the current peer set) and register the generated public key in
574    /// the mesh. The caller ships the returned [`GuestOverlayConfig`] into the
575    /// guest (over vsock) where it brings up its own `WireGuard` device.
576    ///
577    /// `id` is the opaque container id used to scope the allocation so a later
578    /// [`detach_container_guest`](OverlayManager::detach_container_guest) can
579    /// release the address + remove the peer.
580    ///
581    /// # Errors
582    /// Returns an error if overlayd cannot allocate/register the guest.
583    pub async fn attach_container_guest(
584        &self,
585        id: &str,
586        service_name: &str,
587        join_global: bool,
588        dns_domain_override: Option<String>,
589    ) -> Result<zlayer_types::overlayd::GuestOverlayConfig, AgentError> {
590        let resp = self
591            .call(OverlaydRequest::AttachContainer {
592                handle: AttachHandle::GuestManaged { id: id.to_string() },
593                service: service_name.to_string(),
594                join_global,
595                dns_server: self.dns_server_addr.map(|sa| sa.ip()),
596                // Per-deployment search domain when the caller supplies one
597                // (so a guest's bare `<svc>` resolves to ITS deployment);
598                // otherwise the global zone domain.
599                dns_domain: dns_domain_override.or_else(|| self.dns_domain.clone()),
600            })
601            .await?;
602        match resp {
603            OverlaydResponse::GuestConfig(cfg) => Ok(cfg),
604            other => Err(AgentError::Network(format!(
605                "overlayd AttachContainer(GuestManaged) returned unexpected response: {other:?}"
606            ))),
607        }
608    }
609
610    /// Detach a guest-managed container: release its overlay IP and remove its
611    /// registered mesh peer.
612    ///
613    /// # Errors
614    /// Returns an error if overlayd cannot detach the container.
615    pub async fn detach_container_guest(&self, id: &str) -> Result<(), AgentError> {
616        let resp = self
617            .call(OverlaydRequest::DetachContainer {
618                handle: AttachHandle::GuestManaged { id: id.to_string() },
619            })
620            .await?;
621        match resp {
622            OverlaydResponse::Ok => Ok(()),
623            other => Err(AgentError::Network(format!(
624                "overlayd DetachContainer(GuestManaged) returned unexpected response: {other:?}"
625            ))),
626        }
627    }
628
629    /// Register a Windows HCN container with overlayd and return its overlay IP
630    /// plus the overlayd-created namespace GUID.
631    ///
632    /// The return type gained the namespace GUID (vs. the pre-migration
633    /// IP-only return) because the HCN network + endpoint + namespace are now
634    /// created inside overlayd, and `HcsRuntime` needs that GUID to embed in the
635    /// compute-system document.
636    ///
637    /// When `autoclean` is true and overlayd reports back a namespace GUID, an
638    /// entry is recorded in [`OverlayManager::hcn_cleanup`] so a later
639    /// [`OverlayManager::detach_container_hcn`] (or process teardown) can drain
640    /// it. The cleanup map is purely agent-side bookkeeping; overlayd remains
641    /// the authoritative owner of the HCN namespace/endpoint state.
642    ///
643    /// # Errors
644    /// Returns an error if overlayd cannot attach the container.
645    #[cfg(target_os = "windows")]
646    #[allow(clippy::too_many_arguments)]
647    pub async fn attach_container_hcn(
648        &self,
649        container_id: &str,
650        service_name: &str,
651        ip_override: Option<std::net::IpAddr>,
652        autoclean: bool,
653        dns_server: Option<std::net::IpAddr>,
654        dns_domain: Option<String>,
655    ) -> Result<(std::net::IpAddr, Option<String>), AgentError> {
656        let resp = self
657            .call(OverlaydRequest::AttachContainer {
658                handle: AttachHandle::WindowsContainer {
659                    container_id: container_id.to_string(),
660                    ip: ip_override,
661                },
662                service: service_name.to_string(),
663                join_global: false,
664                dns_server: dns_server.or_else(|| self.dns_server_addr.map(|sa| sa.ip())),
665                dns_domain: dns_domain.or_else(|| self.dns_domain.clone()),
666            })
667            .await?;
668        match resp {
669            OverlaydResponse::Attached(result) => {
670                // Record agent-side autoclean bookkeeping. We key by the
671                // overlayd-issued namespace GUID; if overlayd did not return
672                // one (e.g. host-network attach), there is nothing to track.
673                if autoclean {
674                    if let Some(ns_str) = result.namespace_guid.as_deref() {
675                        match windows::core::GUID::try_from(ns_str) {
676                            Ok(ns_guid) => {
677                                let mut cleanup = self.hcn_cleanup.lock().await;
678                                cleanup.insert(ns_guid, (service_name.to_string(), result.ip));
679                            }
680                            Err(e) => {
681                                tracing::warn!(
682                                    ns = %ns_str,
683                                    error = %e,
684                                    "overlayd returned a non-GUID namespace handle; skipping hcn_cleanup insert"
685                                );
686                            }
687                        }
688                    }
689                }
690                Ok((result.ip, result.namespace_guid))
691            }
692            other => Err(AgentError::Network(format!(
693                "overlayd AttachContainer(WindowsContainer) returned unexpected response: {other:?}"
694            ))),
695        }
696    }
697
698    /// Detach and release a Windows HCN container by its bare namespace GUID.
699    ///
700    /// Drains the agent-side [`OverlayManager::hcn_cleanup`] entry (if any)
701    /// before forwarding `DetachContainer` to overlayd. Safe to call with an
702    /// unknown GUID — the map drain is a no-op in that case.
703    ///
704    /// # Errors
705    /// Returns an error if overlayd reports a detach failure.
706    #[cfg(target_os = "windows")]
707    pub async fn detach_container_hcn(&self, namespace_guid: &str) -> Result<(), AgentError> {
708        // Drain the agent-side cleanup map first so a later overlayd error does
709        // not leave a stale entry behind.
710        match windows::core::GUID::try_from(namespace_guid) {
711            Ok(ns_guid) => {
712                let mut cleanup = self.hcn_cleanup.lock().await;
713                if let Some((service_name, ip)) = cleanup.remove(&ns_guid) {
714                    tracing::info!(
715                        ns = %namespace_guid,
716                        service = %service_name,
717                        ip = %ip,
718                        "Released HCN overlay attachment (agent-side cleanup)"
719                    );
720                }
721            }
722            Err(e) => {
723                tracing::warn!(
724                    ns = %namespace_guid,
725                    error = %e,
726                    "detach_container_hcn called with non-GUID handle; skipping hcn_cleanup drain"
727                );
728            }
729        }
730
731        self.call(OverlaydRequest::DetachContainer {
732            handle: AttachHandle::WindowsContainer {
733                container_id: namespace_guid.to_string(),
734                ip: None,
735            },
736        })
737        .await?;
738        Ok(())
739    }
740
741    /// Release the overlay resources held by a Linux container by delegating to
742    /// overlayd (`DetachContainer` with a `LinuxPid` handle).
743    ///
744    /// # Errors
745    /// Returns an error if overlayd reports a detach failure.
746    pub async fn detach_container(&self, pid: u32) -> Result<(), AgentError> {
747        self.call(OverlaydRequest::DetachContainer {
748            handle: AttachHandle::LinuxPid { pid },
749        })
750        .await?;
751        Ok(())
752    }
753
754    /// Tear down the per-service overlay segment for `service_name`.
755    pub async fn teardown_service_overlay(&self, service_name: &str) {
756        if let Err(e) = self
757            .call(OverlaydRequest::TeardownServiceOverlay {
758                service: service_name.to_string(),
759            })
760            .await
761        {
762            tracing::warn!(service = %service_name, error = %e, "overlayd TeardownServiceOverlay failed");
763        }
764    }
765
766    /// Cleanup all overlay networks (tears down the global overlay in overlayd).
767    ///
768    /// # Errors
769    /// Returns an error if overlayd reports a teardown failure.
770    pub async fn cleanup(&mut self) -> Result<(), AgentError> {
771        self.call(OverlaydRequest::TeardownGlobalOverlay).await?;
772        self.global_interface = None;
773        // Best-effort drain of any agent-side autoclean bookkeeping we still
774        // hold on Windows. overlayd already tore down the HCN namespaces in
775        // response to `TeardownGlobalOverlay`; this just empties the side-map
776        // so a subsequent reuse of this manager starts clean.
777        #[cfg(target_os = "windows")]
778        {
779            let mut cleanup = self.hcn_cleanup.lock().await;
780            cleanup.clear();
781        }
782        Ok(())
783    }
784
785    /// Returns this node's IP on the global overlay network (cached).
786    pub fn node_ip(&self) -> Option<IpAddr> {
787        self.node_ip
788    }
789
790    /// Returns the deployment name this overlay manager was created for.
791    pub fn deployment(&self) -> &str {
792        &self.deployment
793    }
794
795    /// Returns the global overlay interface name (cached).
796    pub fn global_interface(&self) -> Option<&str> {
797        self.global_interface.as_deref()
798    }
799
800    /// Returns the `WireGuard` listen port for the overlay network.
801    pub fn overlay_port(&self) -> u16 {
802        self.overlay_port
803    }
804
805    /// Returns `true` if the global overlay transport is active (cached: an
806    /// interface name has been recorded).
807    pub fn has_global_transport(&self) -> bool {
808        self.global_interface.is_some()
809    }
810
811    /// Returns the number of per-service overlay bridges currently active.
812    pub async fn service_bridge_count(&self) -> usize {
813        match self.call(OverlaydRequest::Status).await {
814            Ok(OverlaydResponse::Status(snap)) => snap.service_count as usize,
815            _ => 0,
816        }
817    }
818
819    /// Add a peer to the live global overlay transport by delegating to overlayd.
820    ///
821    /// The parameter type is preserved (`&zlayer_overlay::PeerInfo`) so the one
822    /// caller (`zlayer-api`'s internal add-peer handler) compiles unchanged; the
823    /// shim converts it to a wire-safe [`PeerSpec`].
824    ///
825    /// # Errors
826    /// Returns an error if overlayd rejects the peer (e.g. overlay not yet up).
827    pub async fn add_global_peer(&self, peer: &zlayer_overlay::PeerInfo) -> Result<(), AgentError> {
828        self.call(OverlaydRequest::AddPeer {
829            peer: peer_spec_from(peer),
830            scope: zlayer_types::overlayd::PeerScope::Global,
831        })
832        .await?;
833        Ok(())
834    }
835
836    /// Add a peer to a service's dedicated per-service overlay transport.
837    ///
838    /// Analogous to [`OverlayManager::add_global_peer`] but scoped to
839    /// `service`'s [`OverlayMode::Dedicated`] device: first the peer itself
840    /// (`AddPeer` with `scope: Service`), then the service `subnet` plumbed
841    /// into that peer's `AllowedIPs` (`AddAllowedIp` with the same scope).
842    ///
843    /// # Errors
844    /// Returns an error if overlayd rejects the peer or the allowed-IP add
845    /// (e.g. the service's dedicated transport is not yet up).
846    pub async fn add_service_peer(
847        &self,
848        service: &str,
849        peer: &zlayer_overlay::PeerInfo,
850        subnet: &str,
851    ) -> Result<(), AgentError> {
852        self.call(OverlaydRequest::AddPeer {
853            peer: peer_spec_from(peer),
854            scope: zlayer_types::overlayd::PeerScope::Service {
855                service: service.to_string(),
856            },
857        })
858        .await?;
859        self.call(OverlaydRequest::AddAllowedIp {
860            pubkey: peer.public_key.clone(),
861            cidr: subnet.to_string(),
862            scope: zlayer_types::overlayd::PeerScope::Service {
863                service: service.to_string(),
864            },
865        })
866        .await?;
867        Ok(())
868    }
869
870    /// Remove a peer (by base64 public key) from a service's dedicated
871    /// per-service overlay transport.
872    ///
873    /// # Errors
874    /// Returns an error if overlayd reports the removal failed.
875    pub async fn remove_service_peer(&self, service: &str, pubkey: &str) -> Result<(), AgentError> {
876        self.call(OverlaydRequest::RemovePeer {
877            pubkey: pubkey.to_string(),
878            scope: zlayer_types::overlayd::PeerScope::Service {
879                service: service.to_string(),
880            },
881        })
882        .await?;
883        Ok(())
884    }
885
886    /// Returns the CIDR string for the overlay IP allocator (cached cluster CIDR).
887    pub fn overlay_cidr(&self) -> String {
888        self.cluster_cidr
889            .map_or_else(|| "10.200.0.0/16".to_string(), |c| c.to_string())
890    }
891
892    /// Returns the per-node slice CIDR this manager was built with, or `None`.
893    pub fn slice_cidr(&self) -> Option<IpNetwork> {
894        self.slice_cidr
895    }
896
897    /// Returns the full cluster CIDR, if known.
898    pub fn cluster_cidr(&self) -> Option<IpNetwork> {
899        self.cluster_cidr
900    }
901
902    /// Persist the IPAM allocator state. overlayd owns IPAM; this is a no-op
903    /// retained for ABI parity with callers.
904    ///
905    /// # Errors
906    /// Infallible today.
907    #[allow(clippy::unused_async)]
908    pub async fn persist_ipam_state(&self, _path: &std::path::Path) -> Result<(), AgentError> {
909        Ok(())
910    }
911
912    /// Restore IPAM allocator state. overlayd owns IPAM; this is a no-op
913    /// retained for ABI parity with callers.
914    ///
915    /// # Errors
916    /// Infallible today.
917    #[allow(clippy::unused_async)]
918    pub async fn restore_ipam_state(&mut self, _path: &std::path::Path) -> Result<(), AgentError> {
919        Ok(())
920    }
921
922    /// Returns IP allocation statistics: (`allocated_count`, `base_addr`).
923    ///
924    /// overlayd owns IPAM and does not surface allocation counters over IPC, so
925    /// this reports `(0, base)` derived from the cached cluster CIDR.
926    pub fn ip_alloc_stats(&self) -> (u64, IpAddr) {
927        let base = self
928            .cluster_cidr
929            .map_or(IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED), |c| c.network());
930        (0, base)
931    }
932}
933
934#[cfg(test)]
935mod tests {
936    use super::*;
937
938    /// No generated name may ever exceed 15 characters.
939    #[test]
940    fn interface_name_never_exceeds_limit() {
941        let cases: Vec<(&[&str], &str)> = vec![
942            (&["a"], "g"),
943            (&["zlayer-manager"], "g"),
944            (&["my-very-long-deployment-name-that-goes-on-and-on"], "g"),
945            (&["zlayer", "manager"], "s"),
946            (&["zlayer-manager", "frontend-service"], "s"),
947            (&["a", "b"], "s"),
948            (
949                &["abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz"],
950                "s",
951            ),
952            (&["x"], ""),
953            (&["deployment"], ""),
954            (&["a-really-long-name-exceeding-everything"], "suffix"),
955        ];
956
957        for (parts, suffix) in &cases {
958            let name = make_interface_name(parts, suffix);
959            assert!(
960                name.len() <= MAX_IFNAME_LEN,
961                "Name '{}' is {} chars (parts={:?}, suffix='{}')",
962                name,
963                name.len(),
964                parts,
965                suffix,
966            );
967        }
968    }
969
970    /// Very long and varied inputs must still respect the limit.
971    #[test]
972    fn interface_name_with_extreme_lengths() {
973        let long = "a".repeat(200);
974        let long_ref = long.as_str();
975
976        let name = make_interface_name(&[long_ref], "g");
977        assert!(name.len() <= MAX_IFNAME_LEN, "Name '{name}' too long");
978
979        let name = make_interface_name(&[long_ref, long_ref, long_ref], "s");
980        assert!(name.len() <= MAX_IFNAME_LEN, "Name '{name}' too long");
981
982        let name = make_interface_name(&[long_ref], "");
983        assert!(name.len() <= MAX_IFNAME_LEN, "Name '{name}' too long");
984    }
985
986    /// Same inputs must always produce the same output.
987    #[test]
988    fn interface_name_is_deterministic() {
989        let a = make_interface_name(&["zlayer-manager"], "g");
990        let b = make_interface_name(&["zlayer-manager"], "g");
991        assert_eq!(a, b);
992    }
993
994    /// Different inputs must produce different outputs.
995    #[test]
996    fn interface_name_uniqueness() {
997        let a = make_interface_name(&["deploy-a"], "g");
998        let b = make_interface_name(&["deploy-b"], "g");
999        assert_ne!(a, b);
1000
1001        let a = make_interface_name(&["deploy"], "g");
1002        let b = make_interface_name(&["deploy"], "s");
1003        assert_ne!(a, b);
1004    }
1005
1006    /// Short names that fit should be returned as-is (human readable).
1007    #[test]
1008    fn interface_name_short_inputs_are_readable() {
1009        let name = make_interface_name(&["app"], "g");
1010        assert_eq!(name, "zl-app-g");
1011        let name = make_interface_name(&["my", "web"], "s");
1012        assert_eq!(name, "zl-my-web-s");
1013    }
1014
1015    /// `with_slice` must remember the slice it was built with.
1016    #[test]
1017    fn with_slice_stores_slice_cidr() {
1018        let cluster: IpNetwork = "10.200.0.0/16".parse().unwrap();
1019        let slice: IpNetwork = "10.200.42.0/28".parse().unwrap();
1020        let om = OverlayManager::with_slice(
1021            "test-deploy".to_string(),
1022            cluster,
1023            slice,
1024            51820,
1025            "test".to_string(),
1026        );
1027        assert_eq!(om.slice_cidr(), Some(slice));
1028        assert_eq!(om.cluster_cidr(), Some(cluster));
1029        assert_eq!(om.overlay_port(), 51820);
1030        assert_eq!(om.deployment(), "test-deploy");
1031    }
1032
1033    /// `node_ip()` is None before any setup.
1034    #[tokio::test]
1035    async fn node_ip_none_before_setup() {
1036        let om = OverlayManager::new("test-deploy".to_string(), "test".to_string())
1037            .await
1038            .unwrap();
1039        assert!(om.node_ip().is_none());
1040    }
1041
1042    /// DNS config round-trips through the cache.
1043    #[tokio::test]
1044    async fn dns_config_set_and_round_trip() {
1045        let mut om = OverlayManager::new("dns-roundtrip".to_string(), "test".to_string())
1046            .await
1047            .unwrap();
1048        let addr: SocketAddr = "10.200.42.1:15353".parse().unwrap();
1049        om.set_dns_config(Some(addr), Some("overlay.local".to_string()));
1050        assert_eq!(om.dns_server_addr(), Some(addr));
1051        assert_eq!(om.dns_domain(), Some("overlay.local"));
1052
1053        om.set_dns_config(None, None);
1054        assert!(om.dns_server_addr().is_none());
1055        assert!(om.dns_domain().is_none());
1056    }
1057
1058    /// `peer_spec_from` must copy every `PeerInfo` field into the wire-safe
1059    /// `PeerSpec` exactly as the live overlayd transport expects (endpoint
1060    /// stringified, keepalive in whole seconds).
1061    #[test]
1062    fn peer_spec_from_copies_all_fields() {
1063        let peer = zlayer_overlay::PeerInfo {
1064            public_key: "base64key".to_string(),
1065            endpoint: "1.2.3.4:51820".parse().unwrap(),
1066            allowed_ips: "10.200.0.2/32".to_string(),
1067            persistent_keepalive_interval: std::time::Duration::from_secs(25),
1068        };
1069        let spec = peer_spec_from(&peer);
1070        assert_eq!(spec.public_key, "base64key");
1071        assert_eq!(spec.endpoint, "1.2.3.4:51820");
1072        assert_eq!(spec.allowed_ips, "10.200.0.2/32");
1073        assert_eq!(spec.persistent_keepalive_secs, 25);
1074    }
1075
1076    /// `setup_service_overlay` must forward the caller-supplied mode verbatim
1077    /// (no more hardcoded `OverlayMode::default()`). Asserts the request the
1078    /// shim builds carries `Dedicated` when asked for `Dedicated`.
1079    #[test]
1080    fn setup_service_overlay_request_carries_dedicated_mode() {
1081        let req = OverlaydRequest::SetupServiceOverlay {
1082            service: "web".to_string(),
1083            mode: zlayer_types::overlay::OverlayMode::Dedicated,
1084        };
1085        match req {
1086            OverlaydRequest::SetupServiceOverlay { service, mode } => {
1087                assert_eq!(service, "web");
1088                assert_eq!(mode, zlayer_types::overlay::OverlayMode::Dedicated);
1089                assert_ne!(mode, zlayer_types::overlay::OverlayMode::default());
1090            }
1091            other => panic!("expected SetupServiceOverlay, got {other:?}"),
1092        }
1093    }
1094
1095    /// The service-scoped peer ops must target `PeerScope::Service { service }`,
1096    /// not `Global`, so dedicated transports stay isolated from the cluster
1097    /// transport.
1098    #[test]
1099    fn service_peer_ops_use_service_scope() {
1100        let peer = zlayer_overlay::PeerInfo {
1101            public_key: "k".to_string(),
1102            endpoint: "1.2.3.4:51820".parse().unwrap(),
1103            allowed_ips: "10.201.0.2/32".to_string(),
1104            persistent_keepalive_interval: std::time::Duration::from_secs(0),
1105        };
1106        let svc_scope = zlayer_types::overlayd::PeerScope::Service {
1107            service: "web".to_string(),
1108        };
1109
1110        let add = OverlaydRequest::AddPeer {
1111            peer: peer_spec_from(&peer),
1112            scope: svc_scope.clone(),
1113        };
1114        let allow = OverlaydRequest::AddAllowedIp {
1115            pubkey: peer.public_key.clone(),
1116            cidr: "10.201.0.0/24".to_string(),
1117            scope: svc_scope.clone(),
1118        };
1119        let remove = OverlaydRequest::RemovePeer {
1120            pubkey: peer.public_key.clone(),
1121            scope: svc_scope,
1122        };
1123
1124        match add {
1125            OverlaydRequest::AddPeer { scope, peer } => {
1126                assert_eq!(
1127                    scope,
1128                    zlayer_types::overlayd::PeerScope::Service {
1129                        service: "web".to_string()
1130                    }
1131                );
1132                assert_eq!(peer.public_key, "k");
1133            }
1134            other => panic!("expected AddPeer, got {other:?}"),
1135        }
1136        match allow {
1137            OverlaydRequest::AddAllowedIp { scope, cidr, .. } => {
1138                assert_eq!(cidr, "10.201.0.0/24");
1139                assert_eq!(
1140                    scope,
1141                    zlayer_types::overlayd::PeerScope::Service {
1142                        service: "web".to_string()
1143                    }
1144                );
1145            }
1146            other => panic!("expected AddAllowedIp, got {other:?}"),
1147        }
1148        match remove {
1149            OverlaydRequest::RemovePeer { scope, pubkey } => {
1150                assert_eq!(pubkey, "k");
1151                assert_eq!(
1152                    scope,
1153                    zlayer_types::overlayd::PeerScope::Service {
1154                        service: "web".to_string()
1155                    }
1156                );
1157            }
1158            other => panic!("expected RemovePeer, got {other:?}"),
1159        }
1160    }
1161
1162    /// Windows-only: verify the `hcn_cleanup` side-map starts empty on both
1163    /// constructor paths. Live insert/drain coverage lives behind the overlayd
1164    /// IPC layer (which is exercised by the windows e2e tests), but this
1165    /// sanity-checks that the field is wired correctly through `new()` and
1166    /// `with_slice()`.
1167    #[cfg(target_os = "windows")]
1168    #[tokio::test]
1169    async fn hcn_cleanup_map_starts_empty() {
1170        let om = OverlayManager::new("test-deploy".to_string(), "test".to_string())
1171            .await
1172            .unwrap();
1173        {
1174            let map = om.hcn_cleanup.lock().await;
1175            assert!(
1176                map.is_empty(),
1177                "hcn_cleanup map must start empty from new()"
1178            );
1179        }
1180
1181        let cluster: IpNetwork = "10.200.0.0/16".parse().unwrap();
1182        let slice: IpNetwork = "10.200.42.0/28".parse().unwrap();
1183        let om = OverlayManager::with_slice(
1184            "test-deploy".to_string(),
1185            cluster,
1186            slice,
1187            51820,
1188            "test".to_string(),
1189        );
1190        {
1191            let map = om.hcn_cleanup.lock().await;
1192            assert!(
1193                map.is_empty(),
1194                "hcn_cleanup map must start empty from with_slice()"
1195            );
1196        }
1197    }
1198}