1use std::collections::HashMap;
14use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
15#[cfg(target_os = "linux")]
16use std::os::fd::AsFd;
17use std::path::{Path, PathBuf};
18use std::sync::atomic::{AtomicU64, Ordering};
19
20use ipnetwork::IpNetwork;
21use zlayer_overlay::{NatConfig, NatTraversal, OverlayConfig, OverlayTransport, PeerInfo};
22use zlayer_types::overlayd::{
23 AttachHandle, AttachResult, DedicatedServiceStatus, GuestOverlayConfig, OverlayMode,
24 OverlaydRequest, OverlaydResponse, PeerScope, PeerSpec, PeerStatus, ServiceOverlayInfo,
25 StatusSnapshot,
26};
27
28use crate::error::OverlaydError;
29use crate::network_state::{
30 owner_for_service, DedicatedPortAllocator, ManagedNetwork, NetworkState,
31};
32
33const MAX_IFNAME_LEN: usize = 15;
35
36#[must_use]
42pub fn make_interface_name(parts: &[&str], suffix: &str) -> String {
43 use std::collections::hash_map::DefaultHasher;
44 use std::hash::{Hash, Hasher};
45
46 let base = format!("zl-{}", parts.join("-"));
47 let candidate = if suffix.is_empty() {
48 base
49 } else {
50 format!("{base}-{suffix}")
51 };
52
53 if candidate.len() <= MAX_IFNAME_LEN {
54 return candidate;
55 }
56
57 let mut hasher = DefaultHasher::new();
59 for part in parts {
60 part.hash(&mut hasher);
61 }
62 suffix.hash(&mut hasher);
63 let hash = format!("{:x}", hasher.finish());
64
65 if suffix.is_empty() {
66 let budget = MAX_IFNAME_LEN - 3;
68 format!("zl-{}", &hash[..budget.min(hash.len())])
69 } else {
70 let suffix_cost = 1 + suffix.len(); let hash_budget = MAX_IFNAME_LEN.saturating_sub(3 + suffix_cost);
73 if hash_budget == 0 {
74 let budget = MAX_IFNAME_LEN - 3;
75 format!("zl-{}", &hash[..budget.min(hash.len())])
76 } else {
77 format!("zl-{}-{}", &hash[..hash_budget.min(hash.len())], suffix)
78 }
79 }
80}
81
82fn first_usable_ip(subnet: ipnet::IpNet) -> IpAddr {
87 match subnet {
88 ipnet::IpNet::V4(v4) => {
89 let net = u32::from(v4.network());
90 IpAddr::V4(Ipv4Addr::from(net.wrapping_add(1)))
91 }
92 ipnet::IpNet::V6(v6) => {
93 let net = u128::from(v6.network());
94 IpAddr::V6(Ipv6Addr::from(net.wrapping_add(1)))
95 }
96 }
97}
98
99#[cfg(target_os = "linux")]
102#[derive(Debug)]
103struct BridgeAttachParams<'a> {
104 bridge_name: &'a str,
106 gateway: IpAddr,
108 subnet_prefix_len: u8,
110}
111
112#[cfg(target_os = "linux")]
115#[derive(Debug, Clone)]
116struct AttachInfo {
117 service_ip: IpAddr,
119 service_name: Option<String>,
121 global_ip: Option<IpAddr>,
123 joined_global: bool,
125}
126
127#[derive(Debug, Clone)]
132struct GuestAttachInfo {
133 overlay_ip: IpAddr,
135 public_key: String,
138 service_name: Option<String>,
142}
143
144#[cfg(target_os = "linux")]
148#[derive(Debug)]
149struct ServiceBridge {
150 name: String,
152 subnet: ipnet::IpNet,
154 gateway: IpAddr,
156 ip_allocator: zlayer_overlay::allocator::IpAllocator,
158}
159
160struct ServiceTransport {
170 transport: OverlayTransport,
172 interface: String,
174 public_key: String,
176 listen_port: u16,
178 overlay_ip: std::net::IpAddr,
180 subnet: ipnet::IpNet,
182}
183
184pub struct OverlaydServer {
186 deployment: String,
188 instance_id: String,
191 data_dir: PathBuf,
193 global_interface: Option<String>,
195 global_transport: Option<OverlayTransport>,
199 service_interfaces: HashMap<String, String>,
201 service_transports: HashMap<String, ServiceTransport>,
204 dedicated_ports: DedicatedPortAllocator,
206 #[cfg(target_os = "linux")]
208 service_bridges: HashMap<String, ServiceBridge>,
209 service_subnet_registry: Option<zlayer_overlay::allocator::ServiceSubnetRegistry>,
213 local_node_id: u64,
215 local_wg_pubkey: Option<String>,
219 transport_public_key: Option<String>,
223 ip_allocator: IpAllocator,
225 node_ip: Option<IpAddr>,
227 overlay_port: u16,
229 cluster_cidr: Option<IpNetwork>,
231 slice_cidr: Option<IpNetwork>,
233 #[cfg(target_os = "windows")]
235 hcn_cleanup: HashMap<windows::core::GUID, (String, std::net::IpAddr)>,
236 #[cfg(target_os = "windows")]
241 service_ip_allocators: HashMap<String, IpAllocator>,
242 #[cfg(target_os = "linux")]
244 attached: HashMap<u32, AttachInfo>,
245 global_peers: HashMap<String, PeerSpec>,
251 guest_attachments: HashMap<String, GuestAttachInfo>,
256 dns_server_addr: Option<SocketAddr>,
258 dns_domain: Option<String>,
260 dns_records: HashMap<String, IpAddr>,
262 nat_config: Option<NatConfig>,
264 uapi_sock_dir: Option<PathBuf>,
266 nat_traversal: Option<NatTraversal>,
268 nat_last_refresh: AtomicU64,
270 shutdown_requested: bool,
272}
273
274impl OverlaydServer {
275 #[must_use]
283 pub fn new(data_dir: PathBuf) -> Self {
284 let default_cidr: IpNetwork = "10.200.0.0/16".parse().expect("compile-time constant CIDR");
287 let overlay_port = zlayer_core::DEFAULT_WG_PORT;
288
289 let marker_path = zlayer_paths::ZLayerDirs::new(data_dir.clone()).agent_network_state();
293 let recorded_dedicated_ports: Vec<u16> = NetworkState::load(&marker_path)
294 .networks
295 .iter()
296 .filter(|n| n.owner.starts_with("service:"))
297 .filter_map(|n| n.wg_port)
298 .collect();
299
300 Self {
301 deployment: String::new(),
302 instance_id: String::new(),
303 data_dir,
304 global_interface: None,
305 global_transport: None,
306 service_interfaces: HashMap::new(),
307 service_transports: HashMap::new(),
308 dedicated_ports: DedicatedPortAllocator::new(overlay_port, recorded_dedicated_ports),
309 #[cfg(target_os = "linux")]
310 service_bridges: HashMap::new(),
311 service_subnet_registry: None,
312 local_node_id: 0,
313 local_wg_pubkey: None,
314 transport_public_key: None,
315 ip_allocator: IpAllocator::new(default_cidr),
316 node_ip: None,
317 overlay_port,
318 cluster_cidr: Some(default_cidr),
319 slice_cidr: None,
320 #[cfg(target_os = "windows")]
321 hcn_cleanup: HashMap::new(),
322 #[cfg(target_os = "windows")]
323 service_ip_allocators: HashMap::new(),
324 #[cfg(target_os = "linux")]
325 attached: HashMap::new(),
326 global_peers: HashMap::new(),
327 guest_attachments: HashMap::new(),
328 dns_server_addr: None,
329 dns_domain: None,
330 dns_records: HashMap::new(),
331 nat_config: None,
332 uapi_sock_dir: None,
333 nat_traversal: None,
334 nat_last_refresh: AtomicU64::new(0),
335 shutdown_requested: false,
336 }
337 }
338
339 #[must_use]
342 pub fn with_uapi_sock_dir(mut self, dir: impl Into<PathBuf>) -> Self {
343 self.uapi_sock_dir = Some(dir.into());
344 self
345 }
346
347 #[must_use]
349 pub fn shutdown_requested(&self) -> bool {
350 self.shutdown_requested
351 }
352
353 #[must_use]
356 pub fn data_dir(&self) -> &Path {
357 &self.data_dir
358 }
359
360 pub async fn handle(&mut self, req: OverlaydRequest) -> OverlaydResponse {
366 match self.dispatch(req).await {
367 Ok(resp) => resp,
368 Err(e) => OverlaydResponse::Err {
369 message: e.to_string(),
370 },
371 }
372 }
373
374 #[allow(clippy::too_many_lines)]
375 async fn dispatch(&mut self, req: OverlaydRequest) -> Result<OverlaydResponse, OverlaydError> {
376 match req {
377 OverlaydRequest::SetLocalNodeId { node_id } => {
378 self.local_node_id = node_id;
379 Ok(OverlaydResponse::Ok)
380 }
381 OverlaydRequest::SetLocalWgPubkey { pubkey } => {
382 self.local_wg_pubkey = Some(pubkey);
383 Ok(OverlaydResponse::Ok)
384 }
385 OverlaydRequest::SetupGlobalOverlay {
386 deployment,
387 instance_id,
388 cluster_cidr,
389 slice_cidr,
390 wg_port,
391 nat_enabled,
392 } => {
393 let name = self
394 .setup_global_overlay(
395 deployment,
396 instance_id,
397 &cluster_cidr,
398 slice_cidr.as_deref(),
399 wg_port,
400 nat_enabled,
401 )
402 .await?;
403 Ok(OverlaydResponse::BridgeName { name })
404 }
405 OverlaydRequest::TeardownGlobalOverlay => {
406 self.teardown_global_overlay();
407 Ok(OverlaydResponse::Ok)
408 }
409 OverlaydRequest::SetupServiceOverlay { service, mode } => {
410 let info = self.setup_service_overlay(&service, mode).await?;
411 Ok(OverlaydResponse::ServiceOverlay(info))
412 }
413 OverlaydRequest::TeardownServiceOverlay { service } => {
414 self.teardown_service_overlay(&service).await;
415 Ok(OverlaydResponse::Ok)
416 }
417 OverlaydRequest::AllocateIp {
418 service,
419 join_global,
420 } => {
421 let ip = self.allocate_ip(&service, join_global)?;
422 Ok(OverlaydResponse::Ip { ip })
423 }
424 OverlaydRequest::ReleaseIp { ip } => {
425 self.release_ip(ip);
426 Ok(OverlaydResponse::Ok)
427 }
428 OverlaydRequest::AttachContainer {
429 handle,
430 service,
431 join_global,
432 dns_server,
433 dns_domain,
434 } => {
435 if let AttachHandle::GuestManaged { id } = handle {
441 if let Some(server) = dns_server {
445 self.dns_server_addr = Some(SocketAddr::new(server, 53));
446 }
447 if dns_domain.is_some() {
448 self.dns_domain.clone_from(&dns_domain);
449 }
450 let config = self
451 .attach_container_guest(&id, &service, join_global, dns_server, dns_domain)
452 .await?;
453 Ok(OverlaydResponse::GuestConfig(config))
454 } else {
455 let result = self
456 .attach_container(handle, &service, join_global, dns_server, dns_domain)
457 .await?;
458 Ok(OverlaydResponse::Attached(result))
459 }
460 }
461 OverlaydRequest::DetachContainer { handle } => {
462 if let AttachHandle::GuestManaged { id } = handle {
463 self.detach_container_guest(&id).await?;
464 } else {
465 self.detach_container(handle).await?;
466 }
467 Ok(OverlaydResponse::Ok)
468 }
469 OverlaydRequest::AddPeer { peer, scope } => {
473 let info = peer_spec_to_info(&peer)?;
474 let transport = self.transport_for_scope(&scope)?;
475 Self::add_peer_on(transport, &info).await?;
476 if matches!(scope, PeerScope::Global) {
479 self.global_peers.insert(peer.public_key.clone(), peer);
480 }
481 Ok(OverlaydResponse::Ok)
482 }
483 OverlaydRequest::RemovePeer { pubkey, scope } => {
484 let transport = self.transport_for_scope(&scope)?;
485 Self::remove_peer_on(transport, &pubkey).await?;
486 if matches!(scope, PeerScope::Global) {
487 self.global_peers.remove(&pubkey);
488 }
489 Ok(OverlaydResponse::Ok)
490 }
491 OverlaydRequest::AddAllowedIp {
492 pubkey,
493 cidr,
494 scope,
495 } => {
496 let transport = self.transport_for_scope(&scope)?;
497 Self::add_allowed_ip_on(transport, &pubkey, &cidr).await?;
498 Ok(OverlaydResponse::Ok)
499 }
500 OverlaydRequest::RemoveAllowedIp {
501 pubkey,
502 cidr,
503 scope,
504 } => {
505 let transport = self.transport_for_scope(&scope)?;
506 Self::remove_allowed_ip_on(transport, &pubkey, &cidr).await?;
507 Ok(OverlaydResponse::Ok)
508 }
509 OverlaydRequest::RegisterDns { name, ip } => {
510 self.register_dns(name, ip);
511 Ok(OverlaydResponse::Ok)
512 }
513 OverlaydRequest::UnregisterDns { name } => {
514 self.unregister_dns(&name);
515 Ok(OverlaydResponse::Ok)
516 }
517 OverlaydRequest::Status => Ok(OverlaydResponse::Status(self.status_snapshot().await)),
518 OverlaydRequest::NatTick => {
519 self.nat_maintenance_tick().await?;
520 Ok(OverlaydResponse::Ok)
521 }
522 OverlaydRequest::Shutdown => {
523 self.shutdown_requested = true;
524 self.teardown_global_overlay();
525 Ok(OverlaydResponse::Ok)
526 }
527 }
528 }
529
530 async fn setup_global_overlay(
542 &mut self,
543 deployment: String,
544 instance_id: String,
545 cluster_cidr: &str,
546 slice_cidr: Option<&str>,
547 wg_port: u16,
548 nat_enabled: bool,
549 ) -> Result<String, OverlaydError> {
550 self.deployment = deployment;
551 self.instance_id = instance_id;
552 self.overlay_port = wg_port;
553
554 let cluster: IpNetwork = cluster_cidr.parse().map_err(|e| {
555 OverlaydError::Other(format!("invalid cluster CIDR {cluster_cidr}: {e}"))
556 })?;
557 self.cluster_cidr = Some(cluster);
558 if let Some(slice) = slice_cidr {
559 let slice_net: IpNetwork = slice
560 .parse()
561 .map_err(|e| OverlaydError::Other(format!("invalid slice CIDR {slice}: {e}")))?;
562 self.slice_cidr = Some(slice_net);
563 self.ip_allocator = IpAllocator::new(slice_net);
564 }
565 if !nat_enabled {
568 self.nat_config = Some(NatConfig {
569 enabled: false,
570 ..NatConfig::default()
571 });
572 }
573
574 if let Some(name) = self.global_interface.clone() {
575 if self.global_transport.is_some() {
576 tracing::debug!(
577 deployment = %self.deployment,
578 "Global overlay already active, reusing existing transport"
579 );
580 return Ok(name);
581 }
582 }
583
584 let interface_name = make_interface_name(&[&self.deployment, &self.instance_id], "g");
585
586 let (private_key, public_key) = OverlayTransport::generate_keys()
587 .await
588 .map_err(|e| OverlaydError::Overlay(format!("Failed to generate keys: {e}")))?;
589
590 let node_ip = self.ip_allocator.allocate()?;
591 self.transport_public_key = Some(public_key.clone());
592 let config = self.build_config(private_key, public_key, node_ip, 16, self.overlay_port);
593 let mut transport = OverlayTransport::new(config, interface_name);
594
595 transport
596 .create_interface()
597 .await
598 .map_err(|e| OverlaydError::Overlay(format!("Failed to create global overlay: {e}")))?;
599 transport.configure(&[]).await.map_err(|e| {
600 OverlaydError::Overlay(format!("Failed to configure global overlay: {e}"))
601 })?;
602
603 let actual_name = transport.interface_name().to_string();
605
606 self.node_ip = Some(node_ip);
607 self.global_interface = Some(actual_name.clone());
608 self.global_transport = Some(transport);
609 Ok(actual_name)
610 }
611
612 fn teardown_global_overlay(&mut self) {
614 if let Some(mut transport) = self.global_transport.take() {
615 tracing::info!("Shutting down global overlay");
616 transport.shutdown();
617 }
618 self.global_interface = None;
619 self.transport_public_key = None;
620 }
621
622 #[cfg(target_os = "linux")]
633 async fn setup_service_overlay(
634 &mut self,
635 service: &str,
636 mode: OverlayMode,
637 ) -> Result<ServiceOverlayInfo, OverlaydError> {
638 match mode.resolve() {
639 OverlayMode::Shared => self.setup_service_overlay_shared(service).await,
640 OverlayMode::Dedicated => self.setup_service_overlay_dedicated(service).await,
641 OverlayMode::Auto => unreachable!("OverlayMode::resolve never returns Auto"),
642 }
643 }
644
645 #[cfg(target_os = "linux")]
657 #[allow(clippy::too_many_lines)]
658 async fn setup_service_overlay_shared(
659 &mut self,
660 service: &str,
661 ) -> Result<ServiceOverlayInfo, OverlaydError> {
662 if let Some(existing) = self.service_bridges.get(service) {
664 let name = existing.name.clone();
665 tracing::debug!(service = %service, bridge = %name, "Service bridge already active, reusing");
666 return Ok(shared_overlay_info(name));
667 }
668
669 self.ensure_service_subnet_registry()?;
671 let subnet: ipnet::IpNet = {
672 let registry = self
673 .service_subnet_registry
674 .as_mut()
675 .expect("ensure_service_subnet_registry leaves Some");
676 let node_key = self.local_node_id.to_string();
677 registry.assign(service, &node_key).map_err(|e| {
678 OverlaydError::Overlay(format!(
679 "ServiceSubnetRegistry::assign({service}, {node_key}) failed: {e}"
680 ))
681 })?
682 };
683
684 let bridge_name = self.create_service_bridge(service, subnet).await?;
687
688 if let Some(ref cluster) = self.global_transport {
692 if let Some(ref pubkey) = self.local_wg_pubkey {
693 if let Err(e) = cluster.add_allowed_ip(pubkey, subnet).await {
694 tracing::warn!(
695 service = %service,
696 subnet = %subnet,
697 error = %e,
698 "Failed to add service subnet to cluster transport AllowedIPs (non-fatal)"
699 );
700 }
701 } else {
702 tracing::debug!(service = %service, "local_wg_pubkey not yet set; skipping cluster AllowedIPs update");
703 }
704 }
705
706 Ok(shared_overlay_info(bridge_name))
707 }
708
709 #[cfg(target_os = "linux")]
724 async fn create_service_bridge(
725 &mut self,
726 service: &str,
727 subnet: ipnet::IpNet,
728 ) -> Result<String, OverlaydError> {
729 use zlayer_overlay::allocator::IpAllocator as OverlayIpAllocator;
730
731 let bridge_name = make_interface_name(&[&self.deployment, &self.instance_id, service], "b");
732
733 if let Err(e) = crate::netlink::create_bridge(&bridge_name).await {
734 return Err(OverlaydError::Overlay(format!(
735 "create_bridge({bridge_name}) failed: {e}"
736 )));
737 }
738 if let Err(e) = crate::netlink::set_bridge_stp(&bridge_name, false) {
739 tracing::warn!(bridge = %bridge_name, error = %e, "set_bridge_stp(off) failed (non-fatal)");
740 }
741
742 let gateway = first_usable_ip(subnet);
744 if let Err(e) =
745 crate::netlink::add_address_to_link_by_name(&bridge_name, gateway, subnet.prefix_len())
746 .await
747 {
748 let _ = crate::netlink::delete_bridge(&bridge_name).await;
749 return Err(OverlaydError::Overlay(format!(
750 "add_address_to_link_by_name({bridge_name}, {gateway}/{}) failed: {e}",
751 subnet.prefix_len()
752 )));
753 }
754 if let Err(e) = crate::netlink::set_link_up_by_name(&bridge_name).await {
755 let _ = crate::netlink::delete_bridge(&bridge_name).await;
756 return Err(OverlaydError::Overlay(format!(
757 "set_link_up_by_name({bridge_name}) failed: {e}"
758 )));
759 }
760
761 let mut ip_allocator = OverlayIpAllocator::new(&subnet.to_string()).map_err(|e| {
763 OverlaydError::Overlay(format!("IpAllocator::new({subnet}) failed: {e}"))
764 })?;
765 let _ = ip_allocator.allocate_specific(gateway);
766
767 self.service_bridges.insert(
768 service.to_string(),
769 ServiceBridge {
770 name: bridge_name.clone(),
771 subnet,
772 gateway,
773 ip_allocator,
774 },
775 );
776 self.service_interfaces
777 .insert(service.to_string(), bridge_name.clone());
778
779 tracing::info!(service = %service, bridge = %bridge_name, subnet = %subnet, gateway = %gateway, "Service bridge created");
780 Ok(bridge_name)
781 }
782
783 #[cfg(not(target_os = "linux"))]
791 async fn setup_service_overlay(
792 &mut self,
793 service: &str,
794 mode: OverlayMode,
795 ) -> Result<ServiceOverlayInfo, OverlaydError> {
796 match mode.resolve() {
797 OverlayMode::Shared => self.setup_service_overlay_shared(service).await,
798 OverlayMode::Dedicated => self.setup_service_overlay_dedicated(service).await,
799 OverlayMode::Auto => unreachable!("OverlayMode::resolve never returns Auto"),
800 }
801 }
802
803 #[cfg(not(target_os = "linux"))]
811 #[allow(clippy::unused_async)]
812 async fn setup_service_overlay_shared(
813 &mut self,
814 service: &str,
815 ) -> Result<ServiceOverlayInfo, OverlaydError> {
816 let placeholder = make_interface_name(&[&self.deployment, &self.instance_id, service], "b");
817 self.service_interfaces
818 .insert(service.to_string(), placeholder.clone());
819 tracing::debug!(service = %service, "Service overlay bridge setup is Linux-only; using direct networking placeholder");
820 Ok(shared_overlay_info(placeholder))
821 }
822
823 #[allow(clippy::too_many_lines)]
840 async fn setup_service_overlay_dedicated(
841 &mut self,
842 service: &str,
843 ) -> Result<ServiceOverlayInfo, OverlaydError> {
844 if let Some(st) = self.service_transports.get(service) {
848 return Ok(dedicated_overlay_info(
849 st.interface.clone(),
850 &st.public_key,
851 st.listen_port,
852 st.overlay_ip,
853 st.subnet,
854 ));
855 }
856
857 let marker_path =
861 zlayer_paths::ZLayerDirs::new(self.data_dir.clone()).agent_network_state();
862 let recorded = NetworkState::load(&marker_path)
863 .get(&owner_for_service(service))
864 .cloned();
865
866 let (private_key, public_key, listen_port, iface_hint) = match recorded.as_ref() {
867 Some(entry)
868 if entry.wg_private_key.is_some()
869 && entry.wg_public_key.is_some()
870 && entry.wg_port.is_some()
871 && entry.interface.is_some() =>
872 {
873 let port = entry.wg_port.expect("checked above");
874 self.dedicated_ports.reserve(port);
875 (
876 entry.wg_private_key.clone().expect("checked above"),
877 entry.wg_public_key.clone().expect("checked above"),
878 port,
879 entry.interface.clone().expect("checked above"),
880 )
881 }
882 _ => {
883 let port = self.dedicated_ports.allocate()?;
884 let (priv_key, pub_key) = OverlayTransport::generate_keys()
885 .await
886 .map_err(|e| OverlaydError::Overlay(format!("Failed to generate keys: {e}")))?;
887 let iface =
888 make_interface_name(&[&self.deployment, &self.instance_id, service], "d");
889 (priv_key, pub_key, port, iface)
890 }
891 };
892
893 self.ensure_service_subnet_registry()?;
896 let subnet: ipnet::IpNet = {
897 let registry = self
898 .service_subnet_registry
899 .as_mut()
900 .expect("ensure_service_subnet_registry leaves Some");
901 let node_key = self.local_node_id.to_string();
902 registry.assign(service, &node_key).map_err(|e| {
903 OverlaydError::Overlay(format!(
904 "ServiceSubnetRegistry::assign({service}, {node_key}) failed: {e}"
905 ))
906 })?
907 };
908 let overlay_ip = first_usable_ip(subnet);
909
910 let config = self.build_config(
914 private_key.clone(),
915 public_key.clone(),
916 overlay_ip,
917 subnet.prefix_len(),
918 listen_port,
919 );
920 let mut transport = OverlayTransport::new(config, iface_hint);
921 transport.create_interface().await.map_err(|e| {
922 OverlaydError::Overlay(format!(
923 "Failed to create dedicated overlay for {service}: {e}"
924 ))
925 })?;
926 transport.configure(&[]).await.map_err(|e| {
927 OverlaydError::Overlay(format!(
928 "Failed to configure dedicated overlay for {service}: {e}"
929 ))
930 })?;
931 let actual_iface = transport.interface_name().to_string();
932
933 let mut marker = NetworkState::load(&marker_path);
937 marker.upsert(ManagedNetwork {
938 owner: owner_for_service(service),
939 kind: "wg-dedicated".to_string(),
940 name: actual_iface.clone(),
941 id: public_key.clone(),
942 subnet: subnet.to_string(),
943 wg_port: Some(listen_port),
944 wg_private_key: Some(private_key),
945 wg_public_key: Some(public_key.clone()),
946 interface: Some(actual_iface.clone()),
947 });
948 if let Err(e) = marker.save(&marker_path) {
949 tracing::warn!(service = %service, error = %e, path = %marker_path.display(), "failed to persist dedicated-overlay marker (device still live)");
950 }
951
952 self.service_transports.insert(
954 service.to_string(),
955 ServiceTransport {
956 transport,
957 interface: actual_iface.clone(),
958 public_key: public_key.clone(),
959 listen_port,
960 overlay_ip,
961 subnet,
962 },
963 );
964
965 tracing::info!(
966 service = %service,
967 interface = %actual_iface,
968 listen_port,
969 subnet = %subnet,
970 overlay_ip = %overlay_ip,
971 "Dedicated per-service overlay device created"
972 );
973
974 let name = self
978 .attach_dedicated_service(service, subnet, overlay_ip)
979 .await?;
980
981 Ok(dedicated_overlay_info(
982 name,
983 &public_key,
984 listen_port,
985 overlay_ip,
986 subnet,
987 ))
988 }
989
990 #[cfg(target_os = "linux")]
1003 async fn attach_dedicated_service(
1004 &mut self,
1005 service: &str,
1006 subnet: ipnet::IpNet,
1007 overlay_ip: IpAddr,
1008 ) -> Result<String, OverlaydError> {
1009 let _ = overlay_ip;
1010 let bridge_name = self.create_service_bridge(service, subnet).await?;
1011
1012 if let Some(st) = self.service_transports.get(service) {
1017 if let Some(ref pubkey) = self.local_wg_pubkey {
1018 if let Err(e) = st.transport.add_allowed_ip(pubkey, subnet).await {
1019 tracing::warn!(
1020 service = %service,
1021 subnet = %subnet,
1022 error = %e,
1023 "Failed to add service subnet to dedicated transport AllowedIPs (non-fatal)"
1024 );
1025 }
1026 } else {
1027 tracing::debug!(service = %service, "local_wg_pubkey not yet set; skipping dedicated AllowedIPs update");
1028 }
1029 }
1030
1031 Ok(bridge_name)
1032 }
1033
1034 #[cfg(target_os = "windows")]
1047 async fn attach_dedicated_service(
1048 &mut self,
1049 service: &str,
1050 subnet: ipnet::IpNet,
1051 _overlay_ip: IpAddr,
1052 ) -> Result<String, OverlaydError> {
1053 let _net_id = self.ensure_service_network(service, subnet).await?;
1057 let daemon_name = self.deployment_or_default();
1059 Ok(format!(
1060 "{}-svc-{service}",
1061 overlay_network_name(&daemon_name)
1062 ))
1063 }
1064
1065 #[cfg(all(not(target_os = "linux"), not(target_os = "windows")))]
1069 #[allow(clippy::unused_async)]
1070 async fn attach_dedicated_service(
1071 &mut self,
1072 service: &str,
1073 _subnet: ipnet::IpNet,
1074 _overlay_ip: IpAddr,
1075 ) -> Result<String, OverlaydError> {
1076 let iface = self
1077 .service_transports
1078 .get(service)
1079 .map(|st| st.interface.clone())
1080 .unwrap_or_default();
1081 Ok(iface)
1082 }
1083
1084 #[cfg_attr(not(target_os = "linux"), allow(clippy::unused_async))]
1089 async fn teardown_service_overlay(&mut self, service: &str) {
1090 #[cfg(target_os = "linux")]
1092 {
1093 let removed = self.service_bridges.remove(service);
1094 self.service_interfaces.remove(service);
1095 if let Some(bridge) = removed {
1096 if let Some(ref cluster) = self.global_transport {
1097 if let Some(ref pubkey) = self.local_wg_pubkey {
1098 if let Err(e) = cluster.remove_allowed_ip(pubkey, bridge.subnet).await {
1099 tracing::warn!(
1100 service = %service,
1101 subnet = %bridge.subnet,
1102 error = %e,
1103 "Failed to remove service subnet from cluster AllowedIPs (non-fatal)"
1104 );
1105 }
1106 }
1107 }
1108
1109 if let Err(e) = crate::netlink::delete_bridge(&bridge.name).await {
1110 tracing::warn!(service = %service, bridge = %bridge.name, error = %e, "delete_bridge failed (non-fatal)");
1111 }
1112
1113 if let Some(registry) = self.service_subnet_registry.as_mut() {
1114 let node_key = self.local_node_id.to_string();
1115 let _ = registry.release(service, &node_key);
1116 }
1117
1118 tracing::info!(service = %service, bridge = %bridge.name, "Tore down service bridge");
1119 }
1120 }
1121 #[cfg(not(target_os = "linux"))]
1122 {
1123 if let Some(iface) = self.service_interfaces.remove(service) {
1124 tracing::info!(service = %service, interface = %iface, "Removed service overlay interface (placeholder, non-Linux)");
1125 }
1126 }
1127
1128 if let Some(mut st) = self.service_transports.remove(service) {
1132 st.transport.shutdown();
1133 self.dedicated_ports.release(st.listen_port);
1134
1135 if let Some(registry) = self.service_subnet_registry.as_mut() {
1139 let node_key = self.local_node_id.to_string();
1140 let _ = registry.release(service, &node_key);
1141 }
1142
1143 let marker_path =
1144 zlayer_paths::ZLayerDirs::new(self.data_dir.clone()).agent_network_state();
1145 let mut marker = NetworkState::load(&marker_path);
1146 let removed_entry = marker.remove(&owner_for_service(service));
1147 if removed_entry.is_some() {
1148 if let Err(e) = marker.save(&marker_path) {
1149 tracing::warn!(service = %service, error = %e, path = %marker_path.display(), "failed to persist dedicated-overlay marker removal");
1150 }
1151 }
1152
1153 #[cfg(target_os = "windows")]
1159 {
1160 self.service_ip_allocators.remove(service);
1161 if let Some(entry) = removed_entry.as_ref() {
1162 if entry.kind == "hcn-internal" {
1163 if let Ok(guid) = windows::core::GUID::try_from(entry.id.as_str()) {
1164 match zlayer_hns::network::Network::delete(guid) {
1165 Ok(()) => {
1166 tracing::info!(service = %service, id = %entry.id, "deleted per-service HCN network");
1167 }
1168 Err(e) => {
1169 tracing::warn!(service = %service, id = %entry.id, error = %e, "failed to delete per-service HCN network (may leak until uninstall)");
1170 }
1171 }
1172 } else {
1173 tracing::warn!(service = %service, id = %entry.id, "per-service marker has unparseable HCN GUID; skipping network delete");
1174 }
1175 }
1176 }
1177 }
1178 #[cfg(not(target_os = "windows"))]
1179 drop(removed_entry);
1180
1181 tracing::info!(
1182 service = %service,
1183 interface = %st.interface,
1184 listen_port = st.listen_port,
1185 "Tore down dedicated per-service overlay device"
1186 );
1187 }
1188 }
1189
1190 fn ensure_service_subnet_registry(&mut self) -> Result<(), OverlaydError> {
1197 use zlayer_overlay::allocator::ServiceSubnetRegistry;
1198
1199 if self.service_subnet_registry.is_some() {
1200 return Ok(());
1201 }
1202 let cluster_cidr = self.cluster_cidr.ok_or_else(|| {
1203 OverlaydError::Other(
1204 "service subnet registry needs a cluster CIDR (SetupGlobalOverlay first)"
1205 .to_string(),
1206 )
1207 })?;
1208 let cluster_ipnet: ipnet::IpNet = cluster_cidr.to_string().parse().map_err(|e| {
1209 OverlaydError::Other(format!(
1210 "failed to convert cluster CIDR {cluster_cidr} to ipnet::IpNet: {e}"
1211 ))
1212 })?;
1213 let slice_prefix: u8 = match cluster_ipnet {
1214 ipnet::IpNet::V4(_) => 28,
1215 ipnet::IpNet::V6(_) => 120,
1216 };
1217 let registry = ServiceSubnetRegistry::new(cluster_ipnet, slice_prefix).map_err(|e| {
1218 OverlaydError::Other(format!("failed to build ServiceSubnetRegistry: {e}"))
1219 })?;
1220 self.service_subnet_registry = Some(registry);
1221 Ok(())
1222 }
1223
1224 fn allocate_ip(&mut self, service: &str, join_global: bool) -> Result<IpAddr, OverlaydError> {
1233 let _ = join_global;
1237 #[cfg(target_os = "linux")]
1238 {
1239 if let Some(bridge) = self.service_bridges.get_mut(service) {
1240 return bridge.ip_allocator.allocate().ok_or_else(|| {
1241 OverlaydError::Overlay(format!(
1242 "service bridge {} subnet {} exhausted",
1243 bridge.name, bridge.subnet
1244 ))
1245 });
1246 }
1247 }
1248 let _ = service;
1249 self.ip_allocator.allocate()
1250 }
1251
1252 fn release_ip(&mut self, ip: IpAddr) {
1255 #[cfg(target_os = "linux")]
1256 {
1257 for bridge in self.service_bridges.values_mut() {
1258 if bridge.subnet.contains(&ip) {
1259 bridge.ip_allocator.release(ip);
1260 return;
1261 }
1262 }
1263 }
1264 self.ip_allocator.release(ip);
1265 }
1266
1267 async fn attach_container(
1274 &mut self,
1275 handle: AttachHandle,
1276 service: &str,
1277 join_global: bool,
1278 dns_server: Option<IpAddr>,
1279 dns_domain: Option<String>,
1280 ) -> Result<AttachResult, OverlaydError> {
1281 if let Some(server) = dns_server {
1285 self.dns_server_addr = Some(SocketAddr::new(server, 53));
1286 }
1287 if dns_domain.is_some() {
1288 self.dns_domain.clone_from(&dns_domain);
1289 }
1290 match handle {
1291 AttachHandle::LinuxPid { pid } => {
1292 let ip = self
1293 .attach_container_linux(pid, service, join_global)
1294 .await?;
1295 Ok(AttachResult {
1296 ip,
1297 namespace_guid: None,
1298 })
1299 }
1300 AttachHandle::WindowsContainer { container_id, ip } => {
1301 self.attach_container_windows(&container_id, service, ip, dns_server, dns_domain)
1302 .await
1303 }
1304 AttachHandle::GuestManaged { .. } => Err(OverlaydError::Other(
1305 "guest-managed attach must go through attach_container_guest, not attach_container"
1306 .to_string(),
1307 )),
1308 }
1309 }
1310
1311 async fn detach_container(&mut self, handle: AttachHandle) -> Result<(), OverlaydError> {
1317 match handle {
1318 AttachHandle::LinuxPid { pid } => self.detach_container_linux(pid).await,
1319 AttachHandle::WindowsContainer { container_id, .. } => {
1320 self.detach_container_windows(&container_id).await
1321 }
1322 AttachHandle::GuestManaged { .. } => Err(OverlaydError::Other(
1323 "guest-managed detach must go through detach_container_guest, not detach_container"
1324 .to_string(),
1325 )),
1326 }
1327 }
1328
1329 #[allow(clippy::cast_possible_truncation)]
1358 async fn attach_container_guest(
1359 &mut self,
1360 id: &str,
1361 service: &str,
1362 join_global: bool,
1363 dns_server: Option<IpAddr>,
1364 dns_domain: Option<String>,
1365 ) -> Result<GuestOverlayConfig, OverlaydError> {
1366 let node_public_key = self.transport_public_key.clone().ok_or_else(|| {
1370 OverlaydError::Other(
1371 "guest-managed attach requires the global overlay to be set up first \
1372 (no node WireGuard public key)"
1373 .to_string(),
1374 )
1375 })?;
1376 if self.global_transport.is_none() {
1377 return Err(OverlaydError::Other(
1378 "guest-managed attach requires the global overlay to be set up first \
1379 (no global transport)"
1380 .to_string(),
1381 ));
1382 }
1383
1384 let (overlay_ip, prefix_len, pool_service): (IpAddr, u8, Option<String>) = {
1390 #[cfg(target_os = "linux")]
1391 {
1392 if let Some(bridge) = self.service_bridges.get_mut(service) {
1393 let ip = bridge.ip_allocator.allocate().ok_or_else(|| {
1394 OverlaydError::Overlay(format!(
1395 "service bridge {} subnet {} exhausted",
1396 bridge.name, bridge.subnet
1397 ))
1398 })?;
1399 let prefix = bridge.subnet.prefix_len();
1400 (ip, prefix, Some(service.to_string()))
1401 } else {
1402 let ip = self.ip_allocator.allocate()?;
1403 (ip, self.slice_prefix_len(), None)
1404 }
1405 }
1406 #[cfg(not(target_os = "linux"))]
1407 {
1408 let _ = service;
1409 let ip = self.ip_allocator.allocate()?;
1410 (ip, self.slice_prefix_len(), None)
1411 }
1412 };
1413 let _ = join_global;
1418
1419 let (private_key, public_key) = OverlayTransport::generate_keys().await.map_err(|e| {
1422 self.release_guest_ip(overlay_ip, pool_service.as_deref());
1424 OverlaydError::Overlay(format!("failed to generate guest keys: {e}"))
1425 })?;
1426
1427 let node_allowed = self
1442 .cluster_cidr
1443 .or(self.slice_cidr)
1444 .map_or_else(|| String::from("0.0.0.0/0"), |c| c.to_string());
1445 let node_endpoint = self.node_endpoint_for_guest();
1446 let peers: Vec<PeerSpec> = vec![PeerSpec {
1447 public_key: node_public_key,
1448 endpoint: node_endpoint,
1449 allowed_ips: node_allowed,
1450 persistent_keepalive_secs: 25,
1451 }];
1452
1453 let host_route = format!(
1457 "{}/{}",
1458 overlay_ip,
1459 if overlay_ip.is_ipv6() { 128 } else { 32 }
1460 );
1461 let guest_peer = PeerSpec {
1462 public_key: public_key.clone(),
1463 endpoint: "0.0.0.0:0".to_string(),
1468 allowed_ips: host_route,
1469 persistent_keepalive_secs: 0,
1470 };
1471 let guest_peer_info = peer_spec_to_info(&guest_peer)?;
1472 {
1473 let transport = self.transport_for_scope(&PeerScope::Global)?;
1474 if let Err(e) = Self::add_peer_on(transport, &guest_peer_info).await {
1475 self.release_guest_ip(overlay_ip, pool_service.as_deref());
1476 return Err(e);
1477 }
1478 }
1479 self.global_peers
1482 .insert(public_key.clone(), guest_peer.clone());
1483 self.guest_attachments.insert(
1484 id.to_string(),
1485 GuestAttachInfo {
1486 overlay_ip,
1487 public_key: public_key.clone(),
1488 service_name: pool_service,
1489 },
1490 );
1491
1492 Ok(GuestOverlayConfig {
1494 overlay_ip,
1495 prefix_len,
1496 private_key,
1497 public_key,
1498 listen_port: self.overlay_port,
1501 peers,
1502 dns_server: dns_server.or_else(|| self.dns_server_addr.map(|s| s.ip())),
1503 dns_domain: dns_domain.or_else(|| self.dns_domain.clone()),
1504 })
1505 }
1506
1507 async fn detach_container_guest(&mut self, id: &str) -> Result<(), OverlaydError> {
1514 let Some(info) = self.guest_attachments.remove(id) else {
1515 return Ok(());
1516 };
1517 self.global_peers.remove(&info.public_key);
1519 if let Ok(transport) = self.transport_for_scope(&PeerScope::Global) {
1520 if let Err(e) = Self::remove_peer_on(transport, &info.public_key).await {
1521 tracing::warn!(
1522 guest = %id,
1523 pubkey = %info.public_key,
1524 error = %e,
1525 "failed to remove guest peer from global transport"
1526 );
1527 }
1528 }
1529 self.release_guest_ip(info.overlay_ip, info.service_name.as_deref());
1531 Ok(())
1532 }
1533
1534 fn release_guest_ip(&mut self, ip: IpAddr, service: Option<&str>) {
1538 #[cfg(target_os = "linux")]
1539 {
1540 if let Some(svc) = service {
1541 if let Some(bridge) = self.service_bridges.get_mut(svc) {
1542 bridge.ip_allocator.release(ip);
1543 return;
1544 }
1545 }
1546 }
1547 let _ = service;
1548 self.ip_allocator.release(ip);
1549 }
1550
1551 fn slice_prefix_len(&self) -> u8 {
1554 self.slice_cidr.or(self.cluster_cidr).map_or(
1555 if self.node_ip.is_some_and(|ip| ip.is_ipv6()) {
1556 64
1557 } else {
1558 24
1559 },
1560 |c| c.prefix(),
1561 )
1562 }
1563
1564 fn node_endpoint_for_guest(&self) -> String {
1571 let ip = self.node_ip.unwrap_or(IpAddr::V4(Ipv4Addr::UNSPECIFIED));
1572 SocketAddr::new(ip, self.overlay_port).to_string()
1573 }
1574
1575 #[cfg(target_os = "linux")]
1578 async fn attach_container_linux(
1579 &mut self,
1580 container_pid: u32,
1581 service: &str,
1582 join_global: bool,
1583 ) -> Result<IpAddr, OverlaydError> {
1584 let (bridge_name, bridge_subnet, bridge_gateway, container_ip) = {
1586 let bridge = self.service_bridges.get_mut(service).ok_or_else(|| {
1587 OverlaydError::Other(format!(
1588 "no service bridge for service {service}; call setup_service_overlay() first"
1589 ))
1590 })?;
1591 let ip = bridge.ip_allocator.allocate().ok_or_else(|| {
1592 OverlaydError::Overlay(format!(
1593 "service bridge {} subnet {} exhausted",
1594 bridge.name, bridge.subnet
1595 ))
1596 })?;
1597 (bridge.name.clone(), bridge.subnet, bridge.gateway, ip)
1598 };
1599
1600 let bridge_params = BridgeAttachParams {
1601 bridge_name: &bridge_name,
1602 gateway: bridge_gateway,
1603 subnet_prefix_len: bridge_subnet.prefix_len(),
1604 };
1605 if let Err(e) = self
1606 .attach_to_interface(
1607 container_pid,
1608 container_ip,
1609 "s",
1610 "eth0",
1611 Some(&bridge_params),
1612 )
1613 .await
1614 {
1615 if let Some(bridge) = self.service_bridges.get_mut(service) {
1616 bridge.ip_allocator.release(container_ip);
1617 }
1618 return Err(e);
1619 }
1620
1621 let mut global_ip: Option<IpAddr> = None;
1622 if join_global && self.global_interface.is_some() {
1623 let g_ip = self.ip_allocator.allocate()?;
1624 self.attach_to_interface(container_pid, g_ip, "g", "eth1", None)
1625 .await?;
1626 global_ip = Some(g_ip);
1627 }
1628
1629 self.attached.insert(
1630 container_pid,
1631 AttachInfo {
1632 service_ip: container_ip,
1633 service_name: Some(service.to_string()),
1634 global_ip,
1635 joined_global: global_ip.is_some(),
1636 },
1637 );
1638
1639 Ok(container_ip)
1640 }
1641
1642 #[cfg(not(target_os = "linux"))]
1645 #[allow(clippy::unused_async)]
1646 async fn attach_container_linux(
1647 &mut self,
1648 _container_pid: u32,
1649 service: &str,
1650 _join_global: bool,
1651 ) -> Result<IpAddr, OverlaydError> {
1652 tracing::debug!(service = %service, "LinuxPid attach is a no-op off Linux; using node overlay IP");
1653 Ok(self.node_ip.unwrap_or(IpAddr::V4(Ipv4Addr::LOCALHOST)))
1654 }
1655
1656 #[cfg(target_os = "linux")]
1658 async fn detach_container_linux(&mut self, pid: u32) -> Result<(), OverlaydError> {
1659 let Some(info) = self.attached.remove(&pid) else {
1660 return Ok(());
1661 };
1662
1663 let veth_s = format!("veth-{pid}-s");
1664 if let Err(e) = crate::netlink::delete_link_by_name(&veth_s).await {
1665 tracing::warn!(link = %veth_s, pid, error = %e, "Failed to delete service veth");
1666 }
1667 if info.joined_global {
1668 let veth_g = format!("veth-{pid}-g");
1669 if let Err(e) = crate::netlink::delete_link_by_name(&veth_g).await {
1670 tracing::warn!(link = %veth_g, pid, error = %e, "Failed to delete global veth");
1671 }
1672 }
1673
1674 if let Some(svc) = info.service_name.as_deref() {
1675 if let Some(bridge) = self.service_bridges.get_mut(svc) {
1676 bridge.ip_allocator.release(info.service_ip);
1677 } else {
1678 tracing::debug!(service = %svc, ip = %info.service_ip, "detach: service bridge already torn down; dropping service IP release");
1679 }
1680 } else {
1681 self.ip_allocator.release(info.service_ip);
1682 }
1683 if let Some(g) = info.global_ip {
1684 self.ip_allocator.release(g);
1685 }
1686 Ok(())
1687 }
1688
1689 #[cfg(not(target_os = "linux"))]
1691 #[allow(clippy::unused_async)]
1692 async fn detach_container_linux(&mut self, _pid: u32) -> Result<(), OverlaydError> {
1693 Ok(())
1694 }
1695
1696 #[cfg(target_os = "linux")]
1700 async fn sweep_orphan_veths() {
1701 let links = match crate::netlink::list_all_links().await {
1702 Ok(links) => links,
1703 Err(e) => {
1704 tracing::warn!(error = %e, "Failed to list links for orphan sweep");
1705 return;
1706 }
1707 };
1708 for (_index, name) in links {
1709 let remainder = if let Some(r) = name.strip_prefix("veth-") {
1710 r
1711 } else if let Some(r) = name.strip_prefix("vc-") {
1712 r
1713 } else {
1714 continue;
1715 };
1716 let Some(pid_str) = remainder.split('-').next() else {
1717 continue;
1718 };
1719 let pid: u32 = match pid_str.parse() {
1720 Ok(p) => p,
1721 Err(_) => continue,
1722 };
1723 if Path::new(&format!("/proc/{pid}")).exists() {
1724 continue;
1725 }
1726 tracing::info!(link = %name, pid = pid, "Deleting orphan veth");
1727 if let Err(e) = crate::netlink::delete_link_by_name(&name).await {
1728 tracing::warn!(link = %name, error = %e, "Failed to delete orphan veth");
1729 }
1730 }
1731 }
1732
1733 #[cfg(target_os = "linux")]
1734 #[allow(clippy::too_many_lines)]
1735 async fn attach_to_interface(
1736 &self,
1737 container_pid: u32,
1738 ip: IpAddr,
1739 tag: &str,
1740 container_iface: &str,
1741 bridge: Option<&BridgeAttachParams<'_>>,
1742 ) -> Result<(), OverlaydError> {
1743 Self::sweep_orphan_veths().await;
1745
1746 let is_v6 = ip.is_ipv6();
1747 let prefix_len: u8 = if let Some(b) = bridge {
1748 b.subnet_prefix_len
1749 } else if is_v6 {
1750 64
1751 } else {
1752 24
1753 };
1754 let host_prefix: u8 = if is_v6 { 128 } else { 32 };
1755
1756 let veth_host = format!("veth-{container_pid}-{tag}");
1757 let veth_pending = format!("vc-{container_pid}-{tag}");
1758 let veth_container = container_iface.to_string();
1759
1760 let container_ns_fd = std::os::fd::OwnedFd::from(
1761 std::fs::File::open(format!("/proc/{container_pid}/ns/net")).map_err(|e| {
1762 OverlaydError::Overlay(format!("Failed to open /proc/{container_pid}/ns/net: {e}"))
1763 })?,
1764 );
1765
1766 crate::netlink::delete_link_by_name(&veth_host)
1767 .await
1768 .map_err(|e| OverlaydError::Overlay(format!("pre-cleanup delete {veth_host}: {e}")))?;
1769 crate::netlink::delete_link_by_name(&veth_pending)
1770 .await
1771 .map_err(|e| {
1772 OverlaydError::Overlay(format!("pre-cleanup delete {veth_pending}: {e}"))
1773 })?;
1774
1775 let bridge_gateway: Option<IpAddr> = bridge.map(|b| b.gateway);
1776 let bridge_name: Option<String> = bridge.map(|b| b.bridge_name.to_string());
1777 let node_ip = self.node_ip;
1778
1779 let result: Result<(), OverlaydError> = async {
1780 crate::netlink::create_veth_pair(&veth_host, &veth_pending)
1781 .await
1782 .map_err(|e| OverlaydError::Overlay(format!("create veth pair: {e}")))?;
1783
1784 crate::netlink::move_link_into_netns_fd_and_rename(
1785 &veth_pending,
1786 AsFd::as_fd(&container_ns_fd),
1787 &veth_container,
1788 )
1789 .map_err(|e| OverlaydError::Overlay(format!("move veth into netns: {e}")))?;
1790
1791 let vc = veth_container.clone();
1792 let bridge_gateway_for_netns = bridge_gateway;
1793 tokio::task::spawn_blocking(move || {
1794 crate::netlink::with_netns_fd_async(container_ns_fd, move || async move {
1795 crate::netlink::add_address_to_link_by_name(&vc, ip, prefix_len).await?;
1796 crate::netlink::set_link_up_by_name(&vc).await?;
1797 crate::netlink::set_link_up_by_name("lo").await?;
1798 if let Some(gw) = bridge_gateway_for_netns {
1799 crate::netlink::add_default_route_via_gateway(gw).await?;
1800 }
1801 Ok(())
1802 })
1803 })
1804 .await
1805 .map_err(|e| OverlaydError::Overlay(format!("container netns task panicked: {e}")))?
1806 .map_err(|e| OverlaydError::Overlay(format!("container netns ops: {e}")))?;
1807
1808 crate::netlink::set_link_up_by_name(&veth_host)
1809 .await
1810 .map_err(|e| OverlaydError::Overlay(format!("set {veth_host} up: {e}")))?;
1811
1812 if let Some(bname) = bridge_name.as_deref() {
1813 crate::netlink::add_link_to_bridge(&veth_host, bname)
1814 .await
1815 .map_err(|e| {
1816 OverlaydError::Overlay(format!(
1817 "enslave {veth_host} to bridge {bname}: {e}"
1818 ))
1819 })?;
1820 } else {
1821 crate::netlink::replace_route_via_dev(ip, host_prefix, &veth_host, node_ip)
1822 .await
1823 .map_err(|e| {
1824 OverlaydError::Overlay(format!("host route for {ip}/{host_prefix}: {e}"))
1825 })?;
1826 }
1827
1828 let _ = crate::netlink::set_sysctl("net.ipv4.ip_forward", "1");
1829 let _ = crate::netlink::set_sysctl("net.ipv6.conf.all.forwarding", "1");
1830
1831 Ok(())
1832 }
1833 .await;
1834
1835 if result.is_err() {
1836 let _ = crate::netlink::delete_link_by_name(&veth_host).await;
1837 let _ = crate::netlink::delete_link_by_name(&veth_pending).await;
1838 }
1839 result
1840 }
1841
1842 #[cfg(target_os = "windows")]
1853 async fn attach_container_windows(
1854 &mut self,
1855 container_id: &str,
1856 service: &str,
1857 ip_override: Option<IpAddr>,
1858 dns_server: Option<IpAddr>,
1859 dns_domain: Option<String>,
1860 ) -> Result<AttachResult, OverlaydError> {
1861 let dedicated_subnet = self.dedicated_service_subnet(service);
1869
1870 let (net_id, ip, prefix_length) = if let Some(svc_subnet) = dedicated_subnet {
1871 let net_id = self.ensure_service_network(service, svc_subnet).await?;
1873
1874 let svc_ipnetwork: IpNetwork = svc_subnet.to_string().parse().map_err(|e| {
1881 OverlaydError::Other(format!("failed to parse service subnet {svc_subnet}: {e}"))
1882 })?;
1883 let allocator = self
1884 .service_ip_allocators
1885 .entry(service.to_string())
1886 .or_insert_with(|| IpAllocator::new(svc_ipnetwork));
1887 let ip = match ip_override {
1888 Some(ip) if svc_subnet.contains(&ip) => ip,
1889 Some(ip) => {
1890 return Err(OverlaydError::Other(format!(
1891 "overridden IP {ip} is not inside dedicated service subnet {svc_subnet} for service {service}"
1892 )));
1893 }
1894 None => allocator.allocate()?,
1895 };
1896 (net_id, ip, svc_subnet.prefix_len())
1897 } else {
1898 let slice = self.slice_cidr.ok_or_else(|| {
1900 OverlaydError::Other(
1901 "no node slice assigned yet (SetupGlobalOverlay with slice_cidr first)"
1902 .to_string(),
1903 )
1904 })?;
1905 let slice_ipnet: ipnet::IpNet = slice.to_string().parse().map_err(|e| {
1906 OverlaydError::Other(format!("failed to parse slice CIDR {slice}: {e}"))
1907 })?;
1908 let net_id = self.ensure_overlay_network(slice_ipnet).await?;
1909 let ip = match ip_override {
1910 Some(ip) => ip,
1911 None => self.ip_allocator.allocate()?,
1912 };
1913 (net_id, ip, slice_ipnet.prefix_len())
1914 };
1915
1916 let dns_server_eff = dns_server.or_else(|| self.dns_server_addr.map(|a| a.ip()));
1918 let dns_domain_for_attach = dns_domain.or_else(|| self.dns_domain.clone());
1919 let cluster_cidr = self.cluster_cidr.map(|c| c.to_string()).unwrap_or_default();
1920 let owner_tag = owner_tag(&self.deployment_or_default());
1921 let cid = container_id.to_string();
1922
1923 let attachment = tokio::task::spawn_blocking(move || {
1924 zlayer_hns::attach::EndpointAttachment::create_overlay(
1925 net_id,
1926 &owner_tag,
1927 cid.as_str(),
1928 ip,
1929 prefix_length,
1930 &cluster_cidr,
1931 dns_server_eff,
1932 dns_domain_for_attach.as_deref(),
1933 )
1934 })
1935 .await
1936 .map_err(|e| OverlaydError::Other(format!("spawn_blocking join failed: {e}")))?
1937 .map_err(|e| OverlaydError::Overlay(format!("HCN overlay endpoint attach failed: {e}")))?;
1938
1939 let namespace_id = attachment.namespace_id();
1940 let bare_guid = format_guid_bare(namespace_id);
1941
1942 self.hcn_cleanup
1944 .insert(namespace_id, (service.to_string(), ip));
1945
1946 tracing::info!(
1947 ns = %bare_guid,
1948 service = %service,
1949 ip = %ip,
1950 "Attached container to HCN overlay"
1951 );
1952
1953 Ok(AttachResult {
1954 ip,
1955 namespace_guid: Some(bare_guid),
1956 })
1957 }
1958
1959 #[cfg(not(target_os = "windows"))]
1961 #[allow(clippy::unused_async)]
1962 async fn attach_container_windows(
1963 &mut self,
1964 _container_id: &str,
1965 _service: &str,
1966 _ip_override: Option<IpAddr>,
1967 _dns_server: Option<IpAddr>,
1968 _dns_domain: Option<String>,
1969 ) -> Result<AttachResult, OverlaydError> {
1970 Err(OverlaydError::Other(
1971 "WindowsContainer attach is only supported on Windows".to_string(),
1972 ))
1973 }
1974
1975 #[cfg(target_os = "windows")]
1978 #[allow(clippy::unused_async)]
1979 async fn detach_container_windows(
1980 &mut self,
1981 namespace_guid: &str,
1982 ) -> Result<(), OverlaydError> {
1983 use windows::core::GUID;
1984
1985 let Ok(guid) = GUID::try_from(namespace_guid) else {
1986 tracing::warn!(ns = %namespace_guid, "detach: unparseable namespace GUID");
1987 return Ok(());
1988 };
1989 if let Some((service, ip)) = self.hcn_cleanup.remove(&guid) {
1990 self.ip_allocator.release(ip);
1991 tracing::info!(ns = %namespace_guid, service = %service, ip = %ip, "Released HCN overlay attachment");
1992 }
1993 Ok(())
1994 }
1995
1996 #[cfg(not(target_os = "windows"))]
1998 #[allow(clippy::unused_async)]
1999 async fn detach_container_windows(
2000 &mut self,
2001 _namespace_guid: &str,
2002 ) -> Result<(), OverlaydError> {
2003 Ok(())
2004 }
2005
2006 #[cfg(target_os = "windows")]
2014 #[allow(clippy::too_many_lines)]
2015 async fn ensure_overlay_network(
2016 &mut self,
2017 slice_cidr: ipnet::IpNet,
2018 ) -> Result<windows::core::GUID, OverlaydError> {
2019 use windows::core::GUID;
2020
2021 let daemon_name = self.deployment_or_default();
2022 let net_name = overlay_network_name(&daemon_name);
2023 let marker_path =
2024 zlayer_paths::ZLayerDirs::new(self.data_dir.clone()).agent_network_state();
2025
2026 if let Some(recorded_id) = crate::network_state::NetworkState::load(&marker_path)
2028 .get(crate::network_state::OWNER_BASE)
2029 .and_then(|entry| GUID::try_from(entry.id.as_str()).ok())
2030 {
2031 let reopened = tokio::task::spawn_blocking(move || {
2032 zlayer_hns::network::Network::open(recorded_id).ok()
2033 })
2034 .await
2035 .map_err(|e| OverlaydError::Other(format!("spawn_blocking join failed: {e}")))?;
2036 if reopened.is_some() {
2037 tracing::info!(name = %net_name, "reusing HCN overlay network from marker");
2038 return Ok(recorded_id);
2039 }
2040 }
2041
2042 let target_name = net_name.clone();
2044 let existing = tokio::task::spawn_blocking(move || -> Option<GUID> {
2045 let guids = zlayer_hns::network::list("{}").ok()?;
2046 for guid in guids {
2047 let Ok(network) = zlayer_hns::network::Network::open(guid) else {
2048 continue;
2049 };
2050 if matches!(network.query("{}"), Ok(props) if props.name == target_name) {
2051 return Some(guid);
2052 }
2053 }
2054 None
2055 })
2056 .await
2057 .map_err(|e| OverlaydError::Other(format!("spawn_blocking join failed: {e}")))?;
2058
2059 if let Some(existing_id) = existing {
2060 tracing::info!(name = %net_name, "reusing existing HCN overlay network");
2061 return Ok(existing_id);
2062 }
2063
2064 let net_id = GUID::new()
2065 .map_err(|e| OverlaydError::Other(format!("GUID::new for overlay network: {e}")))?;
2066 let subnet_str = slice_cidr.to_string();
2067
2068 let use_transparent = std::env::var(zlayer_hns::adapter::ZLAYER_UPLINK_ENV)
2073 .ok()
2074 .is_some_and(|v| !v.trim().is_empty());
2075
2076 let net_name_for_create = net_name.clone();
2077 let subnet_for_create = subnet_str.clone();
2078 if use_transparent {
2079 let uplink = zlayer_hns::adapter::find_primary_adapter()
2080 .map_err(|e| OverlaydError::Other(format!("find_primary_adapter: {e}")))?;
2081 tracing::warn!(uplink = %uplink, "ZLAYER_HCN_UPLINK_ADAPTER set: creating HCN *Transparent* overlay bound to a physical NIC");
2082 tokio::task::spawn_blocking(move || {
2083 zlayer_hns::network::Network::create_transparent(
2084 net_id,
2085 &net_name_for_create,
2086 &subnet_for_create,
2087 &uplink,
2088 )
2089 })
2090 .await
2091 .map_err(|e| OverlaydError::Other(format!("spawn_blocking join failed: {e}")))?
2092 .map_err(|e| {
2093 OverlaydError::Overlay(format!("HcnCreateNetwork transparent ({net_name}): {e}"))
2094 })?;
2095 } else {
2096 tokio::task::spawn_blocking(move || {
2097 zlayer_hns::network::Network::create_internal(
2098 net_id,
2099 &net_name_for_create,
2100 &subnet_for_create,
2101 )
2102 })
2103 .await
2104 .map_err(|e| OverlaydError::Other(format!("spawn_blocking join failed: {e}")))?
2105 .map_err(|e| {
2106 OverlaydError::Overlay(format!("HcnCreateNetwork internal ({net_name}): {e}"))
2107 })?;
2108 }
2109
2110 tokio::time::sleep(std::time::Duration::from_millis(2000)).await;
2114
2115 tracing::info!(
2116 subnet = %subnet_str,
2117 mode = if use_transparent { "Transparent" } else { "Internal" },
2118 "created HCN overlay network"
2119 );
2120
2121 let mut marker = crate::network_state::NetworkState::load(&marker_path);
2124 marker.upsert(crate::network_state::ManagedNetwork {
2125 owner: crate::network_state::OWNER_BASE.to_string(),
2126 kind: if use_transparent {
2127 "hcn-transparent"
2128 } else {
2129 "hcn-internal"
2130 }
2131 .to_string(),
2132 name: net_name.clone(),
2133 id: format_guid_bare(net_id),
2134 subnet: subnet_str.clone(),
2135 wg_port: None,
2137 wg_private_key: None,
2138 wg_public_key: None,
2139 interface: None,
2140 });
2141 if let Err(e) = marker.save(&marker_path) {
2142 tracing::warn!(error = %e, path = %marker_path.display(), "failed to persist agent network marker (network still reusable by name)");
2143 }
2144
2145 Ok(net_id)
2146 }
2147
2148 #[cfg(target_os = "windows")]
2167 #[allow(clippy::too_many_lines)]
2168 async fn ensure_service_network(
2169 &mut self,
2170 service: &str,
2171 subnet: ipnet::IpNet,
2172 ) -> Result<windows::core::GUID, OverlaydError> {
2173 use windows::core::GUID;
2174
2175 let daemon_name = self.deployment_or_default();
2176 let net_name = format!("{}-svc-{service}", overlay_network_name(&daemon_name));
2179 let owner = owner_for_service(service);
2180 let marker_path =
2181 zlayer_paths::ZLayerDirs::new(self.data_dir.clone()).agent_network_state();
2182
2183 let recorded_hcn_id = crate::network_state::NetworkState::load(&marker_path)
2189 .get(&owner)
2190 .filter(|entry| entry.kind == "hcn-internal")
2191 .and_then(|entry| GUID::try_from(entry.id.as_str()).ok());
2192 if let Some(recorded_id) = recorded_hcn_id {
2193 let reopened = tokio::task::spawn_blocking(move || {
2194 zlayer_hns::network::Network::open(recorded_id).ok()
2195 })
2196 .await
2197 .map_err(|e| OverlaydError::Other(format!("spawn_blocking join failed: {e}")))?;
2198 if reopened.is_some() {
2199 tracing::info!(name = %net_name, service = %service, "reusing per-service HCN network from marker");
2200 return Ok(recorded_id);
2201 }
2202 }
2203
2204 let target_name = net_name.clone();
2206 let existing = tokio::task::spawn_blocking(move || -> Option<GUID> {
2207 let guids = zlayer_hns::network::list("{}").ok()?;
2208 for guid in guids {
2209 let Ok(network) = zlayer_hns::network::Network::open(guid) else {
2210 continue;
2211 };
2212 if matches!(network.query("{}"), Ok(props) if props.name == target_name) {
2213 return Some(guid);
2214 }
2215 }
2216 None
2217 })
2218 .await
2219 .map_err(|e| OverlaydError::Other(format!("spawn_blocking join failed: {e}")))?;
2220
2221 if let Some(existing_id) = existing {
2222 tracing::info!(name = %net_name, service = %service, "reusing existing per-service HCN network");
2223 return Ok(existing_id);
2224 }
2225
2226 let net_id = GUID::new()
2227 .map_err(|e| OverlaydError::Other(format!("GUID::new for per-service network: {e}")))?;
2228 let subnet_str = subnet.to_string();
2229
2230 let net_name_for_create = net_name.clone();
2235 let subnet_for_create = subnet_str.clone();
2236 tokio::task::spawn_blocking(move || {
2237 zlayer_hns::network::Network::create_internal(
2238 net_id,
2239 &net_name_for_create,
2240 &subnet_for_create,
2241 )
2242 })
2243 .await
2244 .map_err(|e| OverlaydError::Other(format!("spawn_blocking join failed: {e}")))?
2245 .map_err(|e| {
2246 OverlaydError::Overlay(format!("HcnCreateNetwork internal ({net_name}): {e}"))
2247 })?;
2248
2249 tokio::time::sleep(std::time::Duration::from_millis(2000)).await;
2253
2254 tracing::info!(
2255 service = %service,
2256 subnet = %subnet_str,
2257 "created per-service HCN Internal network"
2258 );
2259
2260 let mut marker = crate::network_state::NetworkState::load(&marker_path);
2276 let carried = marker.get(&owner).cloned();
2277 marker.upsert(crate::network_state::ManagedNetwork {
2278 owner,
2279 kind: "hcn-internal".to_string(),
2280 name: net_name.clone(),
2281 id: format_guid_bare(net_id),
2282 subnet: subnet_str.clone(),
2283 wg_port: carried.as_ref().and_then(|c| c.wg_port),
2284 wg_private_key: carried.as_ref().and_then(|c| c.wg_private_key.clone()),
2285 wg_public_key: carried.as_ref().and_then(|c| c.wg_public_key.clone()),
2286 interface: carried.as_ref().and_then(|c| c.interface.clone()),
2287 });
2288 if let Err(e) = marker.save(&marker_path) {
2289 tracing::warn!(service = %service, error = %e, path = %marker_path.display(), "failed to persist per-service network marker (network still reusable by name)");
2290 }
2291
2292 Ok(net_id)
2293 }
2294
2295 #[cfg(target_os = "windows")]
2307 fn dedicated_service_subnet(&self, service: &str) -> Option<ipnet::IpNet> {
2308 if let Some(st) = self.service_transports.get(service) {
2309 return Some(st.subnet);
2310 }
2311 let marker_path =
2312 zlayer_paths::ZLayerDirs::new(self.data_dir.clone()).agent_network_state();
2313 crate::network_state::NetworkState::load(&marker_path)
2314 .get(&owner_for_service(service))
2315 .filter(|entry| entry.kind == "hcn-internal")
2316 .and_then(|entry| entry.subnet.parse::<ipnet::IpNet>().ok())
2317 }
2318
2319 #[cfg(target_os = "windows")]
2322 fn deployment_or_default(&self) -> String {
2323 if self.deployment.is_empty() {
2324 "zlayer".to_string()
2325 } else {
2326 self.deployment.clone()
2327 }
2328 }
2329
2330 fn transport_for_scope(&self, scope: &PeerScope) -> Result<&OverlayTransport, OverlaydError> {
2341 match scope {
2342 PeerScope::Global => self
2343 .global_transport
2344 .as_ref()
2345 .ok_or_else(|| OverlaydError::Other("global overlay not set up".into())),
2346 PeerScope::Service { service } => self
2347 .service_transports
2348 .get(service)
2349 .map(|s| &s.transport)
2350 .ok_or_else(|| {
2351 OverlaydError::Other(format!("no dedicated overlay for service {service}"))
2352 }),
2353 }
2354 }
2355
2356 async fn add_peer_on(
2361 transport: &OverlayTransport,
2362 peer: &PeerInfo,
2363 ) -> Result<(), OverlaydError> {
2364 transport
2365 .add_peer(peer)
2366 .await
2367 .map_err(|e| OverlaydError::Overlay(format!("add_peer failed: {e}")))
2368 }
2369
2370 async fn remove_peer_on(
2375 transport: &OverlayTransport,
2376 pubkey: &str,
2377 ) -> Result<(), OverlaydError> {
2378 transport
2379 .remove_peer(pubkey)
2380 .await
2381 .map_err(|e| OverlaydError::Overlay(format!("remove_peer failed: {e}")))
2382 }
2383
2384 async fn add_allowed_ip_on(
2389 transport: &OverlayTransport,
2390 pubkey: &str,
2391 cidr: &str,
2392 ) -> Result<(), OverlaydError> {
2393 let net: ipnet::IpNet = cidr
2394 .parse()
2395 .map_err(|e| OverlaydError::Other(format!("invalid CIDR {cidr}: {e}")))?;
2396 transport
2397 .add_allowed_ip(pubkey, net)
2398 .await
2399 .map_err(|e| OverlaydError::Overlay(format!("add_allowed_ip failed: {e}")))
2400 }
2401
2402 async fn remove_allowed_ip_on(
2407 transport: &OverlayTransport,
2408 pubkey: &str,
2409 cidr: &str,
2410 ) -> Result<(), OverlaydError> {
2411 let net: ipnet::IpNet = cidr
2412 .parse()
2413 .map_err(|e| OverlaydError::Other(format!("invalid CIDR {cidr}: {e}")))?;
2414 transport
2415 .remove_allowed_ip(pubkey, net)
2416 .await
2417 .map_err(|e| OverlaydError::Overlay(format!("remove_allowed_ip failed: {e}")))
2418 }
2419
2420 fn register_dns(&mut self, name: String, ip: IpAddr) {
2424 self.dns_records.insert(name, ip);
2425 }
2426
2427 fn unregister_dns(&mut self, name: &str) {
2429 self.dns_records.remove(name);
2430 }
2431
2432 async fn nat_maintenance_tick(&mut self) -> Result<(), OverlaydError> {
2440 if self.nat_traversal.is_none() {
2442 let config = self.nat_config.clone().unwrap_or_default();
2443 if config.enabled {
2444 let mut nat = NatTraversal::new(config, self.overlay_port);
2445 match nat.gather_candidates().await {
2446 Ok(candidates) => {
2447 tracing::info!(count = candidates.len(), "Gathered NAT candidates");
2448 self.nat_last_refresh.store(now_unix(), Ordering::SeqCst);
2449 self.nat_traversal = Some(nat);
2450 }
2451 Err(e) => {
2452 tracing::warn!(error = %e, "NAT candidate gathering failed");
2453 return Ok(());
2454 }
2455 }
2456 } else {
2457 return Ok(());
2458 }
2459 }
2460
2461 let Some(nat) = self.nat_traversal.as_mut() else {
2462 return Ok(());
2463 };
2464 match nat.refresh().await {
2465 Ok(changed) => {
2466 if changed {
2467 tracing::info!("NAT reflexive address changed during refresh");
2468 }
2469 self.nat_last_refresh.store(now_unix(), Ordering::SeqCst);
2470 Ok(())
2471 }
2472 Err(e) => Err(OverlaydError::Overlay(format!(
2473 "NAT maintenance tick failed: {e}"
2474 ))),
2475 }
2476 }
2477
2478 async fn status_snapshot(&self) -> StatusSnapshot {
2482 let mut peers: Vec<PeerStatus> = Vec::new();
2483 let public_key = self.transport_public_key.clone();
2484
2485 if let Some(transport) = self.global_transport.as_ref() {
2486 if let Ok(dump) = transport.status().await {
2489 peers = parse_peer_status(&dump);
2490 }
2491 }
2492
2493 let service_count = u32::try_from(self.service_count()).unwrap_or(u32::MAX);
2494 let peer_count = u32::try_from(peers.len()).unwrap_or(u32::MAX);
2495
2496 let mut dedicated_services: Vec<DedicatedServiceStatus> = Vec::new();
2499 for (svc, st) in &self.service_transports {
2500 let peer_count = match st.transport.status().await {
2501 Ok(dump) => u32::try_from(parse_peer_status(&dump).len()).unwrap_or(u32::MAX),
2502 Err(_) => 0,
2503 };
2504 dedicated_services.push(DedicatedServiceStatus {
2505 service: svc.clone(),
2506 interface: st.interface.clone(),
2507 public_key: st.public_key.clone(),
2508 listen_port: st.listen_port,
2509 overlay_ip: st.overlay_ip,
2510 subnet: st.subnet.to_string(),
2511 peer_count,
2512 });
2513 }
2514
2515 StatusSnapshot {
2516 interface: self.global_interface.clone(),
2517 node_ip: self.node_ip,
2518 public_key,
2519 overlay_cidr: self.cluster_cidr.map(|c| c.to_string()),
2520 slice_cidr: self.slice_cidr.map(|c| c.to_string()),
2521 peer_count,
2522 service_count,
2523 peers,
2524 dedicated_services,
2525 }
2526 }
2527
2528 fn service_count(&self) -> usize {
2531 let extra_dedicated = self
2532 .service_transports
2533 .keys()
2534 .filter(|svc| !self.service_interfaces.contains_key(*svc))
2535 .count();
2536 self.service_interfaces.len() + extra_dedicated
2537 }
2538
2539 fn build_config(
2542 &self,
2543 private_key: String,
2544 public_key: String,
2545 ip: IpAddr,
2546 mask: u8,
2547 listen_port: u16,
2548 ) -> OverlayConfig {
2549 let local_addr = match ip {
2550 IpAddr::V4(_) => IpAddr::V4(Ipv4Addr::UNSPECIFIED),
2551 IpAddr::V6(_) => IpAddr::V6(Ipv6Addr::UNSPECIFIED),
2552 };
2553 let mut config = OverlayConfig {
2554 local_endpoint: SocketAddr::new(local_addr, listen_port),
2555 private_key,
2556 public_key,
2557 overlay_cidr: format!("{ip}/{mask}"),
2558 ..OverlayConfig::default()
2559 };
2560 if let Some(nat) = self.nat_config.clone() {
2561 config.nat = nat;
2562 }
2563 if let Some(dir) = self.uapi_sock_dir.clone() {
2564 config.uapi_sock_dir = dir;
2565 }
2566 config
2567 }
2568}
2569
2570fn shared_overlay_info(name: String) -> ServiceOverlayInfo {
2574 ServiceOverlayInfo {
2575 name,
2576 mode: OverlayMode::Shared,
2577 wg_public_key: None,
2578 wg_port: None,
2579 overlay_ip: None,
2580 subnet: None,
2581 }
2582}
2583
2584fn dedicated_overlay_info(
2588 name: String,
2589 public_key: &str,
2590 listen_port: u16,
2591 overlay_ip: IpAddr,
2592 subnet: ipnet::IpNet,
2593) -> ServiceOverlayInfo {
2594 ServiceOverlayInfo {
2595 name,
2596 mode: OverlayMode::Dedicated,
2597 wg_public_key: Some(public_key.to_string()),
2598 wg_port: Some(listen_port),
2599 overlay_ip: Some(overlay_ip),
2600 subnet: Some(subnet.to_string()),
2601 }
2602}
2603
2604pub fn peer_spec_to_info(spec: &PeerSpec) -> Result<PeerInfo, OverlaydError> {
2610 let endpoint: SocketAddr = spec.endpoint.parse().map_err(|e| {
2611 OverlaydError::Other(format!("invalid peer endpoint {}: {e}", spec.endpoint))
2612 })?;
2613 Ok(PeerInfo::new(
2614 spec.public_key.clone(),
2615 endpoint,
2616 &spec.allowed_ips,
2617 std::time::Duration::from_secs(spec.persistent_keepalive_secs),
2618 ))
2619}
2620
2621fn parse_peer_status(dump: &str) -> Vec<PeerStatus> {
2627 let mut peers: Vec<PeerStatus> = Vec::new();
2628 let mut current: Option<PeerStatus> = None;
2629 let mut allowed: Vec<String> = Vec::new();
2630
2631 let flush = |peers: &mut Vec<PeerStatus>,
2632 current: &mut Option<PeerStatus>,
2633 allowed: &mut Vec<String>| {
2634 if let Some(mut p) = current.take() {
2635 p.allowed_ips = allowed.join(",");
2636 peers.push(p);
2637 }
2638 allowed.clear();
2639 };
2640
2641 for line in dump.lines() {
2642 let line = line.trim();
2643 let Some((key, value)) = line.split_once('=') else {
2644 continue;
2645 };
2646 match key.trim() {
2647 "public_key" | "peer" => {
2648 flush(&mut peers, &mut current, &mut allowed);
2649 current = Some(PeerStatus {
2650 public_key: value.trim().to_string(),
2651 endpoint: String::new(),
2652 allowed_ips: String::new(),
2653 last_handshake_unix_secs: 0,
2654 });
2655 }
2656 "endpoint" => {
2657 if let Some(p) = current.as_mut() {
2658 p.endpoint = value.trim().to_string();
2659 }
2660 }
2661 "allowed_ip" | "allowed_ips" => {
2662 if current.is_some() {
2663 allowed.push(value.trim().to_string());
2664 }
2665 }
2666 "latest_handshake" | "last_handshake_time_sec" => {
2667 if let Some(p) = current.as_mut() {
2668 p.last_handshake_unix_secs = value.trim().parse().unwrap_or(0);
2669 }
2670 }
2671 _ => {}
2672 }
2673 }
2674 flush(&mut peers, &mut current, &mut allowed);
2675 peers
2676}
2677
2678fn now_unix() -> u64 {
2680 std::time::SystemTime::now()
2681 .duration_since(std::time::UNIX_EPOCH)
2682 .unwrap_or_default()
2683 .as_secs()
2684}
2685
2686struct IpAllocator {
2690 cidr: IpNetwork,
2692 base: IpAddr,
2694 next_offset: AtomicU64,
2696 released: parking_lot::Mutex<Vec<IpAddr>>,
2699}
2700
2701impl IpAllocator {
2702 fn new(cidr: IpNetwork) -> Self {
2703 Self {
2704 base: cidr.network(),
2705 cidr,
2706 next_offset: AtomicU64::new(1),
2707 released: parking_lot::Mutex::new(Vec::new()),
2708 }
2709 }
2710
2711 #[allow(clippy::cast_possible_truncation)]
2712 fn compute_addr(&self, offset: u64) -> IpAddr {
2713 match self.base {
2714 IpAddr::V4(base_v4) => {
2715 let base_u32 = u32::from_be_bytes(base_v4.octets());
2716 let addr = base_u32.wrapping_add(offset as u32);
2717 IpAddr::V4(Ipv4Addr::from(addr.to_be_bytes()))
2718 }
2719 IpAddr::V6(base_v6) => {
2720 let base_u128 = u128::from(base_v6);
2721 let addr = base_u128.wrapping_add(u128::from(offset));
2722 IpAddr::V6(Ipv6Addr::from(addr))
2723 }
2724 }
2725 }
2726
2727 fn allocate(&self) -> Result<IpAddr, OverlaydError> {
2732 if let Some(ip) = self.released.lock().pop() {
2733 return Ok(ip);
2734 }
2735 let offset = self.next_offset.fetch_add(1, Ordering::SeqCst);
2736 let addr = self.compute_addr(offset);
2737
2738 let in_cidr = self.cidr.contains(addr);
2739 let is_v4_broadcast = matches!(
2740 (&self.cidr, &addr),
2741 (IpNetwork::V4(v4), IpAddr::V4(a)) if *a == v4.broadcast()
2742 );
2743 if !in_cidr || is_v4_broadcast {
2744 return Err(OverlaydError::Overlay(format!(
2745 "IP allocator exhausted: next address {addr} is outside slice {}",
2746 self.cidr
2747 )));
2748 }
2749 Ok(addr)
2750 }
2751
2752 fn release(&self, ip: IpAddr) {
2754 let mut released = self.released.lock();
2755 if !released.contains(&ip) {
2756 released.push(ip);
2757 }
2758 }
2759}
2760
2761#[cfg(target_os = "windows")]
2767fn owner_tag(daemon_name: &str) -> String {
2768 if daemon_name == "zlayer" {
2769 "zlayer".to_string()
2770 } else {
2771 daemon_name.to_string()
2772 }
2773}
2774
2775#[cfg(target_os = "windows")]
2779fn overlay_network_name(daemon_name: &str) -> String {
2780 if daemon_name == "zlayer" {
2781 "zlayer-overlay".to_string()
2782 } else {
2783 format!("{daemon_name}-overlay")
2784 }
2785}
2786
2787#[cfg(target_os = "windows")]
2791fn format_guid_bare(id: windows::core::GUID) -> String {
2792 format!("{id:?}")
2793 .trim_matches(|c: char| c == '{' || c == '}')
2794 .to_ascii_lowercase()
2795}
2796
2797#[cfg(target_os = "windows")]
2801pub fn purge_managed_networks(data_dir: &Path, daemon_name: &str) {
2802 use windows::core::GUID;
2803
2804 let marker_path = zlayer_paths::ZLayerDirs::new(data_dir.to_path_buf()).agent_network_state();
2805 let state = crate::network_state::NetworkState::load(&marker_path);
2806
2807 for entry in &state.networks {
2809 if !entry.kind.starts_with("hcn") {
2810 continue;
2811 }
2812 match GUID::try_from(entry.id.as_str()) {
2813 Ok(guid) => match zlayer_hns::network::Network::delete(guid) {
2814 Ok(()) => {
2815 tracing::info!(name = %entry.name, id = %entry.id, "deleted managed HCN network");
2816 }
2817 Err(e) => {
2818 tracing::warn!(name = %entry.name, id = %entry.id, error = %e, "failed to delete managed HCN network");
2819 }
2820 },
2821 Err(e) => {
2822 tracing::warn!(id = %entry.id, error = %e, "managed network marker has unparseable GUID");
2823 }
2824 }
2825 }
2826
2827 let overlay_name = overlay_network_name(daemon_name);
2830 if let Ok(guids) = zlayer_hns::network::list("{}") {
2831 for guid in guids {
2832 let Ok(network) = zlayer_hns::network::Network::open(guid) else {
2833 continue;
2834 };
2835 let is_ours = matches!(network.query("{}"), Ok(props) if props.name == overlay_name);
2836 drop(network);
2837 if is_ours {
2838 match zlayer_hns::network::Network::delete(guid) {
2839 Ok(()) => {
2840 tracing::info!(name = %overlay_name, "deleted overlay HCN network (name sweep)");
2841 }
2842 Err(e) => {
2843 tracing::warn!(name = %overlay_name, error = %e, "failed to delete overlay network (name sweep)");
2844 }
2845 }
2846 }
2847 }
2848 }
2849
2850 if marker_path.exists() {
2851 if let Err(e) = std::fs::remove_file(&marker_path) {
2852 tracing::warn!(error = %e, path = %marker_path.display(), "failed to remove agent network marker");
2853 }
2854 }
2855}
2856
2857#[cfg(test)]
2858mod tests {
2859 use super::*;
2860
2861 #[test]
2862 fn peer_spec_to_info_parses_endpoint_and_keepalive() {
2863 let spec = PeerSpec {
2864 public_key: "base64key".to_string(),
2865 endpoint: "1.2.3.4:51820".to_string(),
2866 allowed_ips: "10.200.0.5/32,10.200.1.0/24".to_string(),
2867 persistent_keepalive_secs: 25,
2868 };
2869 let info = peer_spec_to_info(&spec).expect("valid spec");
2870 assert_eq!(info.public_key, "base64key");
2871 assert_eq!(info.endpoint, "1.2.3.4:51820".parse().unwrap());
2872 assert_eq!(info.allowed_ips, "10.200.0.5/32,10.200.1.0/24");
2873 assert_eq!(
2874 info.persistent_keepalive_interval,
2875 std::time::Duration::from_secs(25)
2876 );
2877 }
2878
2879 #[test]
2880 fn peer_spec_to_info_rejects_bad_endpoint() {
2881 let spec = PeerSpec {
2882 public_key: "k".to_string(),
2883 endpoint: "not-a-socket-addr".to_string(),
2884 allowed_ips: String::new(),
2885 persistent_keepalive_secs: 0,
2886 };
2887 assert!(peer_spec_to_info(&spec).is_err());
2888 }
2889
2890 #[test]
2891 fn interface_name_never_exceeds_limit() {
2892 let cases: Vec<(&[&str], &str)> = vec![
2893 (&["a"], "g"),
2894 (&["zlayer-manager"], "g"),
2895 (&["my-very-long-deployment-name-that-goes-on-and-on"], "g"),
2896 (&["zlayer", "manager"], "s"),
2897 (
2898 &["abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz"],
2899 "s",
2900 ),
2901 (&["x"], ""),
2902 ];
2903 for (parts, suffix) in &cases {
2904 let name = make_interface_name(parts, suffix);
2905 assert!(name.len() <= MAX_IFNAME_LEN, "Name '{name}' too long");
2906 assert!(name.starts_with("zl-"));
2907 }
2908 }
2909
2910 #[test]
2911 fn interface_name_is_deterministic() {
2912 assert_eq!(
2913 make_interface_name(&["zlayer-manager"], "g"),
2914 make_interface_name(&["zlayer-manager"], "g")
2915 );
2916 }
2917
2918 #[test]
2919 fn parse_peer_status_splits_blocks() {
2920 let dump = "\
2921public_key=AAA
2922endpoint=1.2.3.4:51820
2923allowed_ip=10.200.0.2/32
2924allowed_ip=10.200.1.0/24
2925latest_handshake=1700000000
2926public_key=BBB
2927endpoint=5.6.7.8:51820
2928allowed_ip=10.200.0.3/32
2929latest_handshake=0
2930";
2931 let peers = parse_peer_status(dump);
2932 assert_eq!(peers.len(), 2);
2933 assert_eq!(peers[0].public_key, "AAA");
2934 assert_eq!(peers[0].endpoint, "1.2.3.4:51820");
2935 assert_eq!(peers[0].allowed_ips, "10.200.0.2/32,10.200.1.0/24");
2936 assert_eq!(peers[0].last_handshake_unix_secs, 1_700_000_000);
2937 assert_eq!(peers[1].public_key, "BBB");
2938 assert_eq!(peers[1].last_handshake_unix_secs, 0);
2939 }
2940
2941 #[tokio::test]
2942 async fn status_snapshot_before_setup_is_empty() {
2943 let server = OverlaydServer::new(std::path::PathBuf::from("/tmp/zlayer-overlayd-test"));
2944 let snap = server.status_snapshot().await;
2945 assert!(snap.interface.is_none());
2946 assert!(snap.node_ip.is_none());
2947 assert!(snap.public_key.is_none());
2948 assert_eq!(snap.peer_count, 0);
2949 assert_eq!(snap.service_count, 0);
2950 assert!(snap.peers.is_empty());
2951 }
2952
2953 #[tokio::test]
2954 async fn allocate_and_release_ip_round_trip() {
2955 let mut server = OverlaydServer::new(std::path::PathBuf::from("/tmp/zlayer-overlayd-test"));
2956 let a = server.allocate_ip("svc", false).expect("alloc a");
2957 let b = server.allocate_ip("svc", false).expect("alloc b");
2958 assert_ne!(a, b);
2959 server.release_ip(a);
2960 let c = server.allocate_ip("svc", false).expect("alloc c");
2962 assert_eq!(c, a);
2963 }
2964
2965 fn test_server() -> OverlaydServer {
2968 let dir = std::env::temp_dir().join(format!(
2969 "zlayer-overlayd-scope-{}-{}",
2970 std::process::id(),
2971 now_unix()
2972 ));
2973 OverlaydServer::new(dir)
2974 }
2975
2976 #[tokio::test]
2977 async fn transport_for_scope_global_requires_setup() {
2978 let server = test_server();
2979 match server.transport_for_scope(&PeerScope::Global) {
2982 Ok(_) => panic!("global overlay should not be set up"),
2983 Err(OverlaydError::Other(m)) => {
2984 assert!(m.contains("global overlay not set up"), "got: {m}");
2985 }
2986 Err(other) => panic!("unexpected error: {other:?}"),
2987 }
2988 }
2989
2990 #[tokio::test]
2991 async fn transport_for_scope_unset_service_errors() {
2992 let server = test_server();
2993 match server.transport_for_scope(&PeerScope::Service {
2994 service: "x".to_string(),
2995 }) {
2996 Ok(_) => panic!("no dedicated overlay should exist for x"),
2997 Err(OverlaydError::Other(m)) => {
2998 assert_eq!(m, "no dedicated overlay for service x");
2999 }
3000 Err(other) => panic!("unexpected error: {other:?}"),
3001 }
3002 }
3003
3004 #[tokio::test]
3005 async fn add_peer_service_scope_before_setup_errors_via_dispatch() {
3006 let mut server = test_server();
3007 let resp = server
3008 .handle(OverlaydRequest::AddPeer {
3009 peer: PeerSpec {
3010 public_key: "k".to_string(),
3011 endpoint: "1.2.3.4:51820".to_string(),
3012 allowed_ips: "10.200.0.2/32".to_string(),
3013 persistent_keepalive_secs: 0,
3014 },
3015 scope: PeerScope::Service {
3016 service: "x".to_string(),
3017 },
3018 })
3019 .await;
3020 match resp {
3021 OverlaydResponse::Err { message } => {
3022 assert_eq!(message, "no dedicated overlay for service x");
3023 }
3024 other => panic!("expected Err response, got {other:?}"),
3025 }
3026 }
3027
3028 #[cfg(target_os = "linux")]
3032 #[tokio::test]
3033 #[ignore = "needs CAP_NET_ADMIN; run on a privileged Linux host"]
3034 async fn dedicated_setup_creates_distinct_device_and_routes_service_peer() {
3035 let mut server = test_server();
3036 let global_name = server
3039 .setup_global_overlay(
3040 "dep".to_string(),
3041 "i0".to_string(),
3042 "10.200.0.0/16",
3043 Some("10.200.0.0/28"),
3044 zlayer_core::DEFAULT_WG_PORT,
3045 false,
3046 )
3047 .await
3048 .expect("global overlay up");
3049 assert!(!global_name.is_empty());
3050
3051 let info = server
3053 .setup_service_overlay("web", OverlayMode::Dedicated)
3054 .await
3055 .expect("dedicated service overlay up");
3056 assert_eq!(info.mode, OverlayMode::Dedicated);
3057 let port = info.wg_port.expect("dedicated port");
3058 assert_ne!(
3059 port, server.overlay_port,
3060 "dedicated device must not share the global port"
3061 );
3062
3063 let st = server
3064 .service_transports
3065 .get("web")
3066 .expect("service transport recorded");
3067 assert_eq!(st.listen_port, port);
3068 assert_ne!(
3069 st.interface, global_name,
3070 "dedicated interface must differ from global"
3071 );
3072 assert_eq!(
3073 Some(st.public_key.clone()),
3074 info.wg_public_key,
3075 "info pubkey matches recorded transport"
3076 );
3077 assert_ne!(
3078 Some(st.public_key.clone()),
3079 server.transport_public_key,
3080 "dedicated key must differ from global key"
3081 );
3082
3083 let resp = server
3086 .handle(OverlaydRequest::AddPeer {
3087 peer: PeerSpec {
3088 public_key: {
3089 let (_priv, pubk) = OverlayTransport::generate_keys().await.unwrap();
3090 pubk
3091 },
3092 endpoint: "5.6.7.8:51999".to_string(),
3093 allowed_ips: "10.201.0.2/32".to_string(),
3094 persistent_keepalive_secs: 25,
3095 },
3096 scope: PeerScope::Service {
3097 service: "web".to_string(),
3098 },
3099 })
3100 .await;
3101 assert!(
3102 matches!(resp, OverlaydResponse::Ok),
3103 "service-scoped add_peer should land on the dedicated device, got {resp:?}"
3104 );
3105 }
3106
3107 #[tokio::test]
3108 async fn guest_attach_requires_global_overlay() {
3109 let mut server = test_server();
3112 let resp = server
3113 .handle(OverlaydRequest::AttachContainer {
3114 handle: AttachHandle::GuestManaged {
3115 id: "vm-1".to_string(),
3116 },
3117 service: "web".to_string(),
3118 join_global: true,
3119 dns_server: None,
3120 dns_domain: None,
3121 })
3122 .await;
3123 match resp {
3124 OverlaydResponse::Err { message } => {
3125 assert!(
3126 message.contains("global overlay to be set up"),
3127 "got: {message}"
3128 );
3129 }
3130 other => panic!("expected Err response, got {other:?}"),
3131 }
3132 assert!(server.guest_attachments.is_empty());
3134 }
3135
3136 #[tokio::test]
3137 async fn detach_unknown_guest_is_idempotent() {
3138 let mut server = test_server();
3139 server
3141 .detach_container_guest("never-attached")
3142 .await
3143 .expect("detach of unknown guest is a no-op");
3144 }
3145
3146 #[cfg(target_os = "linux")]
3151 #[tokio::test]
3152 #[ignore = "needs CAP_NET_ADMIN; run on a privileged Linux host"]
3153 async fn guest_attach_allocates_config_and_detach_releases() {
3154 let mut server = test_server();
3155 server
3156 .setup_global_overlay(
3157 "dep".to_string(),
3158 "i0".to_string(),
3159 "10.200.0.0/16",
3160 Some("10.200.0.0/28"),
3161 zlayer_core::DEFAULT_WG_PORT,
3162 false,
3163 )
3164 .await
3165 .expect("global overlay up");
3166
3167 let (_p, other_pub) = OverlayTransport::generate_keys().await.unwrap();
3169 let add = server
3170 .handle(OverlaydRequest::AddPeer {
3171 peer: PeerSpec {
3172 public_key: other_pub.clone(),
3173 endpoint: "9.9.9.9:51820".to_string(),
3174 allowed_ips: "10.200.1.0/28".to_string(),
3175 persistent_keepalive_secs: 25,
3176 },
3177 scope: PeerScope::Global,
3178 })
3179 .await;
3180 assert!(
3181 matches!(add, OverlaydResponse::Ok),
3182 "seed peer add: {add:?}"
3183 );
3184
3185 let resp = server
3186 .handle(OverlaydRequest::AttachContainer {
3187 handle: AttachHandle::GuestManaged {
3188 id: "vm-1".to_string(),
3189 },
3190 service: "web".to_string(),
3191 join_global: true,
3192 dns_server: Some("10.200.0.1".parse().unwrap()),
3193 dns_domain: Some("overlay".to_string()),
3194 })
3195 .await;
3196 let config = match resp {
3197 OverlaydResponse::GuestConfig(c) => c,
3198 other => panic!("expected GuestConfig, got {other:?}"),
3199 };
3200 assert!(!config.private_key.is_empty());
3201 assert!(!config.public_key.is_empty());
3202 assert_ne!(config.private_key, config.public_key);
3203 assert_eq!(config.listen_port, server.overlay_port);
3204 assert_eq!(config.dns_server, Some("10.200.0.1".parse().unwrap()));
3205 assert!(
3207 config.peers.iter().any(|p| p.public_key == other_pub),
3208 "guest must learn the seeded global peer"
3209 );
3210 assert!(
3211 config
3212 .peers
3213 .iter()
3214 .any(|p| Some(&p.public_key) == server.transport_public_key.as_ref()),
3215 "guest must learn THIS node as a peer"
3216 );
3217 assert!(server.global_peers.contains_key(&config.public_key));
3219 let info = server
3220 .guest_attachments
3221 .get("vm-1")
3222 .expect("attachment recorded");
3223 assert_eq!(info.overlay_ip, config.overlay_ip);
3224
3225 let det = server
3227 .handle(OverlaydRequest::DetachContainer {
3228 handle: AttachHandle::GuestManaged {
3229 id: "vm-1".to_string(),
3230 },
3231 })
3232 .await;
3233 assert!(matches!(det, OverlaydResponse::Ok), "detach: {det:?}");
3234 assert!(!server.guest_attachments.contains_key("vm-1"));
3235 assert!(!server.global_peers.contains_key(&config.public_key));
3236 }
3237}