1use super::{egress, netlink, netns};
2use crate::error::{NucleusError, Result, StateTransition};
3use crate::network::config::{BridgeConfig, EgressPolicy, PortForward};
4use crate::network::NetworkState;
5use std::fs::OpenOptions;
6use std::net::Ipv4Addr;
7use std::os::fd::FromRawFd;
8use std::os::unix::fs::FileTypeExt;
9use std::os::unix::fs::OpenOptionsExt;
10use std::os::unix::io::AsRawFd;
11use std::process::Command;
12use tracing::{debug, info, warn};
13
14pub struct BridgeNetwork {
16 config: BridgeConfig,
17 container_ip: String,
18 veth_host: String,
19 container_id: String,
20 prev_ip_forward: Option<String>,
21 state: NetworkState,
22}
23
24impl BridgeNetwork {
25 fn open_dev_urandom() -> Result<std::fs::File> {
26 let file = OpenOptions::new()
27 .read(true)
28 .custom_flags(libc::O_NOFOLLOW | libc::O_CLOEXEC)
29 .open("/dev/urandom")
30 .map_err(|e| {
31 NucleusError::NetworkError(format!("Failed to open /dev/urandom: {}", e))
32 })?;
33
34 let metadata = file.metadata().map_err(|e| {
35 NucleusError::NetworkError(format!("Failed to stat /dev/urandom: {}", e))
36 })?;
37 if !metadata.file_type().is_char_device() {
38 return Err(NucleusError::NetworkError(
39 "/dev/urandom is not a character device".to_string(),
40 ));
41 }
42
43 Ok(file)
44 }
45
46 pub fn setup(pid: u32, config: &BridgeConfig) -> Result<Self> {
53 Self::setup_for(pid, config, &format!("{:x}", pid))
54 }
55
56 pub fn setup_with_id(pid: u32, config: &BridgeConfig, container_id: &str) -> Result<Self> {
58 Self::setup_for(pid, config, container_id)
59 }
60
61 fn setup_for(pid: u32, config: &BridgeConfig, container_id: &str) -> Result<Self> {
62 config.validate()?;
64
65 let mut net_state = NetworkState::Unconfigured;
66 net_state = net_state.transition(NetworkState::Configuring)?;
67
68 let alloc_dir = Self::ip_alloc_dir();
69 let container_ip = Self::reserve_ip_in_dir(
70 &alloc_dir,
71 container_id,
72 &config.subnet,
73 config.container_ip.as_deref(),
74 )?;
75 let prefix = Self::subnet_prefix(&config.subnet);
76
77 let veth_host_full = format!("veth-{:x}", pid);
79 let veth_cont_full = format!("vethc-{:x}", pid);
80 let veth_host = veth_host_full[..veth_host_full.len().min(15)].to_string();
81 let veth_container = veth_cont_full[..veth_cont_full.len().min(15)].to_string();
82 let mut rollback = SetupRollback::new(
83 veth_host.clone(),
84 config.subnet.clone(),
85 Some((alloc_dir.clone(), container_id.to_string())),
86 );
87
88 Self::ensure_bridge_for(&config.bridge_name, &config.subnet)?;
90
91 netlink::create_veth(&veth_host, &veth_container)?;
93 rollback.veth_created = true;
94
95 netlink::set_link_master(&veth_host, &config.bridge_name)?;
97 netlink::set_link_up(&veth_host)?;
98
99 netlink::set_link_netns(&veth_container, pid)?;
101
102 let start_ticks = Self::read_pid_start_ticks(pid);
106 if start_ticks == 0 {
107 drop(rollback);
108 return Err(NucleusError::NetworkError(format!(
109 "Cannot read start_ticks for PID {} – process may have exited",
110 pid
111 )));
112 }
113
114 let container_addr: Ipv4Addr = container_ip.parse().map_err(|e| {
115 NucleusError::NetworkError(format!("invalid container IP '{}': {}", container_ip, e))
116 })?;
117 {
118 let vc = veth_container.clone();
119 netns::in_netns(pid, move || {
120 netlink::add_addr(&vc, container_addr, prefix)?;
121 netlink::set_link_up(&vc)?;
122 netlink::set_link_up("lo")?;
123 Ok(())
124 })?;
125 }
126
127 let current_ticks = Self::read_pid_start_ticks(pid);
129 if current_ticks != start_ticks {
130 drop(rollback);
131 return Err(NucleusError::NetworkError(format!(
132 "PID {} was recycled during network setup (start_ticks changed: {} -> {})",
133 pid, start_ticks, current_ticks
134 )));
135 }
136
137 let gateway = Self::gateway_from_subnet(&config.subnet);
139 let gateway_addr: Ipv4Addr = gateway.parse().map_err(|e| {
140 NucleusError::NetworkError(format!("invalid gateway IP '{}': {}", gateway, e))
141 })?;
142 netns::in_netns(pid, move || netlink::add_default_route(gateway_addr))?;
143
144 Self::run_cmd(
146 "iptables",
147 &[
148 "-t",
149 "nat",
150 "-A",
151 "POSTROUTING",
152 "-s",
153 &config.subnet,
154 "-j",
155 "MASQUERADE",
156 ],
157 )?;
158 rollback.nat_added = true;
159
160 let prev_ip_forward = match std::fs::read_to_string("/proc/sys/net/ipv4/ip_forward") {
162 Ok(v) => Some(v.trim().to_string()),
163 Err(e) => {
164 warn!(
165 "Could not read ip_forward state (will not restore on cleanup): {}",
166 e
167 );
168 None
169 }
170 };
171 rollback.prev_ip_forward = prev_ip_forward;
172 std::fs::write("/proc/sys/net/ipv4/ip_forward", "1").map_err(|e| {
173 NucleusError::NetworkError(format!("Failed to enable IP forwarding: {}", e))
174 })?;
175
176 for pf in &config.port_forwards {
178 Self::setup_port_forward_for(&container_ip, pf)?;
179 rollback
180 .port_forwards
181 .push((container_ip.clone(), pf.clone()));
182 }
183
184 net_state = net_state.transition(NetworkState::Active)?;
185
186 info!(
187 "Bridge network configured: {} -> {} (IP: {})",
188 veth_host, veth_container, container_ip
189 );
190 let prev_ip_forward = rollback.prev_ip_forward.clone();
191 rollback.disarm();
192
193 Ok(Self {
194 config: config.clone(),
195 container_ip,
196 veth_host,
197 container_id: container_id.to_string(),
198 prev_ip_forward,
199 state: net_state,
200 })
201 }
202
203 pub fn apply_egress_policy(&self, pid: u32, policy: &EgressPolicy) -> Result<()> {
208 egress::apply_egress_policy(pid, &self.config.dns, policy, false)
209 }
210
211 pub fn cleanup(mut self) -> Result<()> {
215 self.state = self.state.transition(NetworkState::Cleaned)?;
216
217 Self::release_allocated_ip(&self.container_id);
219
220 for pf in &self.config.port_forwards {
222 if let Err(e) = self.cleanup_port_forward(pf) {
223 warn!("Failed to cleanup port forward: {}", e);
224 }
225 }
226
227 let _ = Self::run_cmd(
229 "iptables",
230 &[
231 "-t",
232 "nat",
233 "-D",
234 "POSTROUTING",
235 "-s",
236 &self.config.subnet,
237 "-j",
238 "MASQUERADE",
239 ],
240 );
241
242 let _ = netlink::del_link(&self.veth_host);
244
245 if let Some(ref prev) = self.prev_ip_forward {
247 if prev == "0" {
248 if let Err(e) = std::fs::write("/proc/sys/net/ipv4/ip_forward", "0") {
249 warn!("Failed to restore ip_forward to 0: {}", e);
250 } else {
251 info!("Restored net.ipv4.ip_forward to 0");
252 }
253 }
254 }
255
256 info!("Bridge network cleaned up");
257 Ok(())
258 }
259
260 fn cleanup_best_effort(&mut self) {
264 if self.state == NetworkState::Cleaned {
265 return;
266 }
267
268 Self::release_allocated_ip(&self.container_id);
269
270 for pf in &self.config.port_forwards {
271 let _ = self.cleanup_port_forward(pf);
272 }
273
274 let _ = Self::run_cmd(
275 "iptables",
276 &[
277 "-t",
278 "nat",
279 "-D",
280 "POSTROUTING",
281 "-s",
282 &self.config.subnet,
283 "-j",
284 "MASQUERADE",
285 ],
286 );
287
288 let _ = netlink::del_link(&self.veth_host);
289
290 if let Some(ref prev) = self.prev_ip_forward {
291 if prev == "0" {
292 let _ = std::fs::write("/proc/sys/net/ipv4/ip_forward", "0");
293 }
294 }
295
296 self.state = NetworkState::Cleaned;
297 debug!("Bridge network cleaned up (best-effort via drop)");
298 }
299
300 pub fn cleanup_orphaned_rules(subnet: &str) {
306 let output = match Command::new("iptables")
308 .args(["-t", "nat", "-L", "POSTROUTING", "-n"])
309 .output()
310 {
311 Ok(o) => o,
312 Err(e) => {
313 debug!("Cannot check iptables for orphaned rules: {}", e);
314 return;
315 }
316 };
317
318 let stdout = String::from_utf8_lossy(&output.stdout);
319 let mut orphaned_count = 0u32;
320 for line in stdout.lines() {
321 if line.contains("MASQUERADE") && line.contains(subnet) {
322 let _ = Self::run_cmd(
324 "iptables",
325 &[
326 "-t",
327 "nat",
328 "-D",
329 "POSTROUTING",
330 "-s",
331 subnet,
332 "-j",
333 "MASQUERADE",
334 ],
335 );
336 orphaned_count += 1;
337 }
338 }
339
340 if orphaned_count > 0 {
341 info!(
342 "Cleaned up {} orphaned iptables MASQUERADE rule(s) for subnet {}",
343 orphaned_count, subnet
344 );
345 }
346 }
347
348 fn ensure_bridge_for(bridge_name: &str, subnet: &str) -> Result<()> {
349 if netlink::link_exists(bridge_name) {
350 return Ok(());
351 }
352
353 netlink::create_bridge(bridge_name)?;
354
355 let gateway = Self::gateway_from_subnet(subnet);
356 let gateway_addr: Ipv4Addr = gateway.parse().map_err(|e| {
357 NucleusError::NetworkError(format!("invalid bridge gateway '{}': {}", gateway, e))
358 })?;
359 netlink::add_addr(bridge_name, gateway_addr, Self::subnet_prefix(subnet))?;
360 netlink::set_link_up(bridge_name)?;
361
362 info!("Created bridge {}", bridge_name);
363 Ok(())
364 }
365
366 fn setup_port_forward_for(container_ip: &str, pf: &PortForward) -> Result<()> {
367 for chain in ["PREROUTING", "OUTPUT"] {
368 let args = Self::port_forward_rule_args("-A", chain, container_ip, pf);
369 Self::run_cmd_owned("iptables", &args)?;
370 }
371
372 let host_ip = pf
373 .host_ip
374 .map(|ip| ip.to_string())
375 .unwrap_or_else(|| "0.0.0.0".to_string());
376 info!(
377 "Port forward: {}:{} -> {}:{}/{}",
378 host_ip, pf.host_port, container_ip, pf.container_port, pf.protocol
379 );
380 Ok(())
381 }
382
383 fn cleanup_port_forward(&self, pf: &PortForward) -> Result<()> {
384 for chain in ["OUTPUT", "PREROUTING"] {
385 let args = Self::port_forward_rule_args("-D", chain, &self.container_ip, pf);
386 Self::run_cmd_owned("iptables", &args)?;
387 }
388 Ok(())
389 }
390
391 fn allocate_ip_with_reserved(
397 subnet: &str,
398 reserved: &std::collections::HashSet<String>,
399 ) -> Result<String> {
400 let base = subnet.split('/').next().unwrap_or("10.0.42.0");
401 let parts: Vec<&str> = base.split('.').collect();
402 if parts.len() != 4 {
403 return Ok("10.0.42.2".to_string());
404 }
405
406 let mut rand_buf = [0u8; 128];
413 let mut urandom = Self::open_dev_urandom()?;
414 std::io::Read::read_exact(&mut urandom, &mut rand_buf).map_err(|e| {
415 NucleusError::NetworkError(format!("Failed to read /dev/urandom: {}", e))
416 })?;
417 for &byte in &rand_buf {
418 if byte >= 253 {
420 continue;
421 }
422 let offset = byte as u32 + 2;
423 let candidate = format!("{}.{}.{}.{}", parts[0], parts[1], parts[2], offset);
424 if reserved.contains(&candidate) {
425 continue;
426 }
427 if !Self::is_ip_in_use(&candidate)? {
428 return Ok(candidate);
430 }
431 }
432
433 Err(NucleusError::NetworkError(format!(
434 "Failed to allocate free IP in subnet {}",
435 subnet
436 )))
437 }
438
439 fn reserve_ip_in_dir(
440 alloc_dir: &std::path::Path,
441 container_id: &str,
442 subnet: &str,
443 requested_ip: Option<&str>,
444 ) -> Result<String> {
445 Self::ensure_alloc_dir(alloc_dir)?;
446 let lock_path = alloc_dir.join(".lock");
447 let lock_file = std::fs::OpenOptions::new()
448 .create(true)
449 .write(true)
450 .truncate(false)
451 .open(&lock_path)
452 .map_err(|e| {
453 NucleusError::NetworkError(format!("Failed to open IP alloc lock: {}", e))
454 })?;
455 let lock_ret = unsafe { libc::flock(lock_file.as_raw_fd(), libc::LOCK_EX) };
458 if lock_ret != 0 {
459 return Err(NucleusError::NetworkError(format!(
460 "Failed to acquire IP alloc lock: {}",
461 std::io::Error::last_os_error()
462 )));
463 }
464
465 let reserved = Self::collect_reserved_ips_in_dir(alloc_dir);
466 let ip = match requested_ip {
467 Some(ip) => {
468 if reserved.contains(ip) || Self::is_ip_in_use(ip)? {
469 return Err(NucleusError::NetworkError(format!(
470 "Requested container IP {} is already in use",
471 ip
472 )));
473 }
474 ip.to_string()
475 }
476 None => Self::allocate_ip_with_reserved(subnet, &reserved)?,
477 };
478
479 Self::record_allocated_ip_in_dir(alloc_dir, container_id, &ip)?;
480 Ok(ip)
481 }
482
483 fn collect_reserved_ips_in_dir(
485 alloc_dir: &std::path::Path,
486 ) -> std::collections::HashSet<String> {
487 let mut ips = std::collections::HashSet::new();
488 if let Ok(entries) = std::fs::read_dir(alloc_dir) {
489 for entry in entries.flatten() {
490 if let Some(name) = entry.file_name().to_str() {
491 if name.ends_with(".ip") {
492 if let Ok(ip) = std::fs::read_to_string(entry.path()) {
493 let ip = ip.trim().to_string();
494 if !ip.is_empty() {
495 ips.insert(ip);
496 }
497 }
498 }
499 }
500 }
501 }
502 ips
503 }
504
505 fn record_allocated_ip_in_dir(
507 alloc_dir: &std::path::Path,
508 container_id: &str,
509 ip: &str,
510 ) -> Result<()> {
511 Self::ensure_alloc_dir(alloc_dir)?;
512 let path = alloc_dir.join(format!("{}.ip", container_id));
513 std::fs::write(&path, ip).map_err(|e| {
514 NucleusError::NetworkError(format!("Failed to record IP allocation: {}", e))
515 })?;
516 Ok(())
517 }
518
519 fn release_allocated_ip(container_id: &str) {
521 let alloc_dir = Self::ip_alloc_dir();
522 Self::release_allocated_ip_in_dir(&alloc_dir, container_id);
523 }
524
525 fn release_allocated_ip_in_dir(alloc_dir: &std::path::Path, container_id: &str) {
526 let path = alloc_dir.join(format!("{}.ip", container_id));
527 let _ = std::fs::remove_file(path);
528 }
529
530 fn ensure_alloc_dir(alloc_dir: &std::path::Path) -> Result<()> {
533 if alloc_dir.exists() {
536 if let Ok(meta) = std::fs::symlink_metadata(alloc_dir) {
537 if meta.file_type().is_symlink() {
538 return Err(NucleusError::NetworkError(format!(
539 "IP alloc dir {:?} is a symlink, refusing to use",
540 alloc_dir
541 )));
542 }
543 }
544 }
545 if let Some(parent) = alloc_dir.parent() {
547 if let Ok(meta) = std::fs::symlink_metadata(parent) {
548 if meta.file_type().is_symlink() {
549 return Err(NucleusError::NetworkError(format!(
550 "IP alloc dir parent {:?} is a symlink, refusing to use",
551 parent
552 )));
553 }
554 }
555 }
556
557 std::fs::create_dir_all(alloc_dir).map_err(|e| {
558 NucleusError::NetworkError(format!("Failed to create IP alloc dir: {}", e))
559 })?;
560
561 use std::os::unix::fs::PermissionsExt;
563 let perms = std::fs::Permissions::from_mode(0o700);
564 std::fs::set_permissions(alloc_dir, perms).map_err(|e| {
565 NucleusError::NetworkError(format!(
566 "Failed to set permissions on IP alloc dir {:?}: {}",
567 alloc_dir, e
568 ))
569 })?;
570
571 if let Ok(meta) = std::fs::symlink_metadata(alloc_dir) {
573 if meta.file_type().is_symlink() {
574 return Err(NucleusError::NetworkError(format!(
575 "IP alloc dir {:?} was replaced with a symlink during setup",
576 alloc_dir
577 )));
578 }
579 }
580 Ok(())
581 }
582
583 fn ip_alloc_dir() -> std::path::PathBuf {
584 if nix::unistd::Uid::effective().is_root() {
585 std::path::PathBuf::from("/var/run/nucleus/ip-alloc")
586 } else {
587 dirs::runtime_dir()
588 .map(|d| d.join("nucleus/ip-alloc"))
589 .or_else(|| dirs::data_local_dir().map(|d| d.join("nucleus/ip-alloc")))
590 .unwrap_or_else(|| {
591 dirs::home_dir()
592 .map(|h| h.join(".nucleus/ip-alloc"))
593 .unwrap_or_else(|| std::path::PathBuf::from("/var/run/nucleus/ip-alloc"))
594 })
595 }
596 }
597
598 fn read_pid_start_ticks(pid: u32) -> u64 {
601 let stat_path = format!("/proc/{}/stat", pid);
602 if let Ok(content) = std::fs::read_to_string(&stat_path) {
603 if let Some(after_comm) = content.rfind(')') {
606 return content[after_comm + 2..]
607 .split_whitespace()
608 .nth(19) .and_then(|s| s.parse().ok())
610 .unwrap_or(0);
611 }
612 }
613 0
614 }
615
616 fn gateway_from_subnet(subnet: &str) -> String {
618 let base = subnet.split('/').next().unwrap_or("10.0.42.0");
619 let parts: Vec<&str> = base.split('.').collect();
620 if parts.len() == 4 {
621 format!("{}.{}.{}.1", parts[0], parts[1], parts[2])
622 } else {
623 "10.0.42.1".to_string()
624 }
625 }
626
627 fn subnet_prefix(subnet: &str) -> u8 {
628 subnet
629 .split_once('/')
630 .and_then(|(_, p)| p.parse::<u8>().ok())
631 .filter(|p| *p <= 32)
632 .unwrap_or(24)
633 }
634
635 pub(crate) fn resolve_bin(name: &str) -> Result<String> {
642 let search_dirs: &[&str] = match name {
643 "iptables" => &["/usr/sbin/iptables", "/sbin/iptables", "/usr/bin/iptables"],
644 "slirp4netns" => &[
645 "/usr/bin/slirp4netns",
646 "/bin/slirp4netns",
647 "/run/current-system/sw/bin/slirp4netns",
648 ],
649 _ => &[],
650 };
651
652 for path in search_dirs {
653 let p = std::path::Path::new(path);
654 if p.exists() {
655 Self::validate_network_binary(p, name)?;
656 return Ok(path.to_string());
657 }
658 }
659
660 if let Ok(output) = Command::new("which").arg(name).output() {
662 if output.status.success() {
663 let resolved = String::from_utf8_lossy(&output.stdout).trim().to_string();
664 if !resolved.is_empty() {
665 let p = std::path::Path::new(&resolved);
666 Self::validate_network_binary(p, name)?;
667 return Ok(resolved);
668 }
669 }
670 }
671
672 Err(NucleusError::NetworkError(format!(
673 "Required binary '{}' not found or failed validation",
674 name
675 )))
676 }
677
678 fn validate_network_binary(path: &std::path::Path, name: &str) -> Result<()> {
681 use std::os::unix::fs::MetadataExt;
682
683 let meta = std::fs::metadata(path)
684 .map_err(|e| NucleusError::NetworkError(format!("Cannot stat {}: {}", name, e)))?;
685 let mode = meta.mode();
686 if mode & 0o022 != 0 {
687 return Err(NucleusError::NetworkError(format!(
688 "Binary '{}' at {:?} is writable by group/others (mode {:o}), refusing to execute",
689 name, path, mode
690 )));
691 }
692 let owner = meta.uid();
693 let euid = nix::unistd::Uid::effective().as_raw();
694 if owner != 0 && owner != euid {
695 return Err(NucleusError::NetworkError(format!(
696 "Binary '{}' at {:?} owned by UID {} (expected root or euid {}), refusing to execute",
697 name, path, owner, euid
698 )));
699 }
700 Ok(())
701 }
702
703 fn run_cmd(program: &str, args: &[&str]) -> Result<()> {
704 let resolved = Self::resolve_bin(program)?;
705 let output = Command::new(&resolved).args(args).output().map_err(|e| {
706 NucleusError::NetworkError(format!("Failed to run {} {:?}: {}", resolved, args, e))
707 })?;
708
709 if !output.status.success() {
710 let stderr = String::from_utf8_lossy(&output.stderr);
711 return Err(NucleusError::NetworkError(format!(
712 "{} {:?} failed: {}",
713 program, args, stderr
714 )));
715 }
716
717 Ok(())
718 }
719
720 fn run_cmd_owned(program: &str, args: &[String]) -> Result<()> {
721 let refs: Vec<&str> = args.iter().map(String::as_str).collect();
722 Self::run_cmd(program, &refs)
723 }
724
725 fn port_forward_rule_args(
726 operation: &str,
727 chain: &str,
728 container_ip: &str,
729 pf: &PortForward,
730 ) -> Vec<String> {
731 let mut args = vec![
732 "-t".to_string(),
733 "nat".to_string(),
734 operation.to_string(),
735 chain.to_string(),
736 "-p".to_string(),
737 pf.protocol.as_str().to_string(),
738 ];
739
740 if chain == "OUTPUT" {
741 args.extend([
742 "-m".to_string(),
743 "addrtype".to_string(),
744 "--dst-type".to_string(),
745 "LOCAL".to_string(),
746 ]);
747 }
748
749 if let Some(host_ip) = pf.host_ip {
750 args.extend(["-d".to_string(), host_ip.to_string()]);
751 }
752
753 args.extend([
754 "--dport".to_string(),
755 pf.host_port.to_string(),
756 "-j".to_string(),
757 "DNAT".to_string(),
758 "--to-destination".to_string(),
759 format!("{}:{}", container_ip, pf.container_port),
760 ]);
761
762 args
763 }
764
765 fn is_ip_in_use(ip: &str) -> Result<bool> {
766 let addr: Ipv4Addr = ip
767 .parse()
768 .map_err(|e| NucleusError::NetworkError(format!("invalid IP '{}': {}", ip, e)))?;
769 netlink::is_addr_in_use(&addr)
770 }
771
772 pub fn write_resolv_conf(root: &std::path::Path, dns: &[String]) -> Result<()> {
774 let resolv_path = root.join("etc/resolv.conf");
775 let content: String = dns
776 .iter()
777 .map(|server| format!("nameserver {}\n", server))
778 .collect();
779 std::fs::write(&resolv_path, content).map_err(|e| {
780 NucleusError::NetworkError(format!("Failed to write resolv.conf: {}", e))
781 })?;
782 Ok(())
783 }
784
785 pub fn bind_mount_resolv_conf(root: &std::path::Path, dns: &[String]) -> Result<()> {
791 use nix::mount::{mount, MsFlags};
792
793 let content: String = dns
794 .iter()
795 .map(|server| format!("nameserver {}\n", server))
796 .collect();
797
798 let memfd_name = std::ffi::CString::new("nucleus-resolv").map_err(|e| {
800 NucleusError::NetworkError(format!("Failed to create memfd name: {}", e))
801 })?;
802 let raw_fd = unsafe { libc::memfd_create(memfd_name.as_ptr(), 0) };
805 if raw_fd < 0 {
806 return Self::bind_mount_resolv_conf_staging(root, dns);
808 }
809 let memfd = unsafe { std::os::fd::OwnedFd::from_raw_fd(raw_fd) };
813
814 use std::io::Write as _;
816 let mut memfd_file = std::fs::File::from(memfd);
817 if memfd_file.write_all(content.as_bytes()).is_err() {
818 return Self::bind_mount_resolv_conf_staging(root, dns);
820 }
821 use std::os::fd::IntoRawFd;
823 let memfd = {
824 let raw = memfd_file.into_raw_fd();
825 unsafe { std::os::fd::OwnedFd::from_raw_fd(raw) }
827 };
828
829 let target = root.join("etc/resolv.conf");
831 if !target.exists() {
832 let _ = std::fs::write(&target, "");
833 }
834
835 let memfd_path = format!("/proc/self/fd/{}", memfd.as_raw_fd());
837 mount(
838 Some(memfd_path.as_str()),
839 &target,
840 None::<&str>,
841 MsFlags::MS_BIND,
842 None::<&str>,
843 )
844 .map_err(|e| {
845 NucleusError::NetworkError(format!("Failed to bind mount resolv.conf: {}", e))
847 })?;
848
849 info!("Bind-mounted resolv.conf for bridge networking (rootfs mode, memfd)");
853 Ok(())
854 }
855
856 fn bind_mount_resolv_conf_staging(root: &std::path::Path, dns: &[String]) -> Result<()> {
858 use nix::mount::{mount, MsFlags};
859
860 let content: String = dns
861 .iter()
862 .map(|server| format!("nameserver {}\n", server))
863 .collect();
864
865 let staging = root.join("tmp/.resolv.conf.nucleus");
867 if let Some(parent) = staging.parent() {
868 std::fs::create_dir_all(parent).map_err(|e| {
869 NucleusError::NetworkError(format!(
870 "Failed to create resolv.conf staging parent: {}",
871 e
872 ))
873 })?;
874 }
875 std::fs::write(&staging, content).map_err(|e| {
876 NucleusError::NetworkError(format!("Failed to write staging resolv.conf: {}", e))
877 })?;
878
879 let target = root.join("etc/resolv.conf");
881 if !target.exists() {
882 let _ = std::fs::write(&target, "");
883 }
884
885 mount(
887 Some(staging.as_path()),
888 &target,
889 None::<&str>,
890 MsFlags::MS_BIND,
891 None::<&str>,
892 )
893 .map_err(|e| {
894 NucleusError::NetworkError(format!("Failed to bind mount resolv.conf: {}", e))
895 })?;
896
897 if let Err(e) = std::fs::remove_file(&staging) {
900 warn!("Failed to remove staging resolv.conf {:?}: {}", staging, e);
901 }
902
903 info!("Bind-mounted resolv.conf for bridge networking (rootfs mode, staging)");
904 Ok(())
905 }
906}
907
908impl Drop for BridgeNetwork {
909 fn drop(&mut self) {
910 self.cleanup_best_effort();
911 }
912}
913
914struct SetupRollback {
915 veth_host: String,
916 subnet: String,
917 veth_created: bool,
918 nat_added: bool,
919 port_forwards: Vec<(String, PortForward)>,
920 prev_ip_forward: Option<String>,
921 reserved_ip: Option<(std::path::PathBuf, String)>,
922 armed: bool,
923}
924
925impl SetupRollback {
926 fn new(
927 veth_host: String,
928 subnet: String,
929 reserved_ip: Option<(std::path::PathBuf, String)>,
930 ) -> Self {
931 Self {
932 veth_host,
933 subnet,
934 veth_created: false,
935 nat_added: false,
936 port_forwards: Vec::new(),
937 prev_ip_forward: None,
938 reserved_ip,
939 armed: true,
940 }
941 }
942
943 fn disarm(&mut self) {
944 self.armed = false;
945 }
946}
947
948impl Drop for SetupRollback {
949 fn drop(&mut self) {
950 if !self.armed {
951 return;
952 }
953
954 for (container_ip, pf) in self.port_forwards.iter().rev() {
955 for chain in ["OUTPUT", "PREROUTING"] {
956 let args = BridgeNetwork::port_forward_rule_args("-D", chain, container_ip, pf);
957 if let Err(e) = BridgeNetwork::run_cmd_owned("iptables", &args) {
958 warn!(
959 "Rollback: failed to remove iptables {} rule for {}: {}",
960 chain, container_ip, e
961 );
962 }
963 }
964 }
965
966 if self.nat_added {
967 if let Err(e) = BridgeNetwork::run_cmd(
968 "iptables",
969 &[
970 "-t",
971 "nat",
972 "-D",
973 "POSTROUTING",
974 "-s",
975 &self.subnet,
976 "-j",
977 "MASQUERADE",
978 ],
979 ) {
980 warn!("Rollback: failed to remove NAT rule: {}", e);
981 }
982 }
983
984 if self.veth_created {
985 if let Err(e) = netlink::del_link(&self.veth_host) {
986 warn!("Rollback: failed to delete veth {}: {}", self.veth_host, e);
987 }
988 }
989
990 if let Some((alloc_dir, container_id)) = &self.reserved_ip {
991 BridgeNetwork::release_allocated_ip_in_dir(alloc_dir, container_id);
992 }
993 }
994}
995
996#[cfg(test)]
997mod tests {
998 use super::*;
999
1000 #[test]
1001 fn test_ip_allocation_rejection_sampling_range() {
1002 for byte in 0u8..253 {
1005 let offset = byte as u32 + 2;
1006 assert!(
1007 (2..=254).contains(&offset),
1008 "offset {} out of range",
1009 offset
1010 );
1011 }
1012 for byte in [253u8, 254, 255] {
1014 assert!(byte >= 253);
1015 }
1016 }
1017
1018 #[test]
1019 fn test_reserve_ip_blocks_duplicate_requested_address() {
1020 let temp = tempfile::tempdir().unwrap();
1021 BridgeNetwork::record_allocated_ip_in_dir(temp.path(), "one", "10.0.42.2").unwrap();
1022
1023 let err =
1024 BridgeNetwork::reserve_ip_in_dir(temp.path(), "two", "10.0.42.0/24", Some("10.0.42.2"))
1025 .unwrap_err();
1026 assert!(
1027 err.to_string().contains("already in use"),
1028 "second reservation of the same IP must fail"
1029 );
1030 }
1031
1032 #[test]
1033 fn test_setup_rollback_releases_reserved_ip() {
1034 let temp = tempfile::tempdir().unwrap();
1035 BridgeNetwork::record_allocated_ip_in_dir(temp.path(), "rollback", "10.0.42.3").unwrap();
1036
1037 let rollback = SetupRollback {
1038 veth_host: "veth-test".to_string(),
1039 subnet: "10.0.42.0/24".to_string(),
1040 veth_created: false,
1041 nat_added: false,
1042 port_forwards: Vec::new(),
1043 prev_ip_forward: None,
1044 reserved_ip: Some((temp.path().to_path_buf(), "rollback".to_string())),
1045 armed: true,
1046 };
1047
1048 drop(rollback);
1049
1050 assert!(
1051 !temp.path().join("rollback.ip").exists(),
1052 "rollback must release reserved IP files on setup failure"
1053 );
1054 }
1055
1056 #[test]
1057 fn test_port_forward_rules_include_output_chain_for_local_host_clients() {
1058 let pf = PortForward {
1059 host_ip: None,
1060 host_port: 8080,
1061 container_port: 80,
1062 protocol: crate::network::config::Protocol::Tcp,
1063 };
1064
1065 let prerouting =
1066 BridgeNetwork::port_forward_rule_args("-A", "PREROUTING", "10.0.42.2", &pf);
1067 let output = BridgeNetwork::port_forward_rule_args("-A", "OUTPUT", "10.0.42.2", &pf);
1068
1069 assert!(prerouting.iter().any(|arg| arg == "PREROUTING"));
1070 assert!(output.iter().any(|arg| arg == "OUTPUT"));
1071 assert!(
1072 output
1073 .windows(2)
1074 .any(|pair| pair[0] == "--dst-type" && pair[1] == "LOCAL"),
1075 "OUTPUT rule must target local-destination traffic"
1076 );
1077 }
1078
1079 #[test]
1080 fn test_port_forward_rules_include_host_ip_when_configured() {
1081 let pf = PortForward {
1082 host_ip: Some(std::net::Ipv4Addr::new(127, 0, 0, 1)),
1083 host_port: 4173,
1084 container_port: 4173,
1085 protocol: crate::network::config::Protocol::Tcp,
1086 };
1087
1088 let prerouting =
1089 BridgeNetwork::port_forward_rule_args("-A", "PREROUTING", "10.0.42.2", &pf);
1090 let output = BridgeNetwork::port_forward_rule_args("-A", "OUTPUT", "10.0.42.2", &pf);
1091
1092 for args in [&prerouting, &output] {
1093 assert!(
1094 args.windows(2)
1095 .any(|pair| pair[0] == "-d" && pair[1] == "127.0.0.1"),
1096 "port forward must restrict DNAT rules to the configured host IP"
1097 );
1098 }
1099 }
1100}