1use super::{netlink, netns};
2use crate::error::{NucleusError, Result, StateTransition};
3use crate::network::config::{BridgeConfig, EgressPolicy, PortForward};
4use crate::network::NetworkState;
5use std::fs::OpenOptions;
6use std::net::Ipv4Addr;
7use std::os::fd::FromRawFd;
8use std::os::unix::fs::FileTypeExt;
9use std::os::unix::fs::OpenOptionsExt;
10use std::os::unix::io::AsRawFd;
11use std::process::Command;
12use tracing::{debug, info, warn};
13
14pub struct BridgeNetwork {
16 config: BridgeConfig,
17 container_ip: String,
18 veth_host: String,
19 container_id: String,
20 prev_ip_forward: Option<String>,
21 state: NetworkState,
22}
23
24impl BridgeNetwork {
25 fn open_dev_urandom() -> Result<std::fs::File> {
26 let file = OpenOptions::new()
27 .read(true)
28 .custom_flags(libc::O_NOFOLLOW | libc::O_CLOEXEC)
29 .open("/dev/urandom")
30 .map_err(|e| {
31 NucleusError::NetworkError(format!("Failed to open /dev/urandom: {}", e))
32 })?;
33
34 let metadata = file.metadata().map_err(|e| {
35 NucleusError::NetworkError(format!("Failed to stat /dev/urandom: {}", e))
36 })?;
37 if !metadata.file_type().is_char_device() {
38 return Err(NucleusError::NetworkError(
39 "/dev/urandom is not a character device".to_string(),
40 ));
41 }
42
43 Ok(file)
44 }
45
46 pub fn setup(pid: u32, config: &BridgeConfig) -> Result<Self> {
53 Self::setup_for(pid, config, &format!("{:x}", pid))
54 }
55
56 pub fn setup_with_id(pid: u32, config: &BridgeConfig, container_id: &str) -> Result<Self> {
58 Self::setup_for(pid, config, container_id)
59 }
60
61 fn setup_for(pid: u32, config: &BridgeConfig, container_id: &str) -> Result<Self> {
62 config.validate()?;
64
65 let mut net_state = NetworkState::Unconfigured;
66 net_state = net_state.transition(NetworkState::Configuring)?;
67
68 let alloc_dir = Self::ip_alloc_dir();
69 let container_ip = Self::reserve_ip_in_dir(
70 &alloc_dir,
71 container_id,
72 &config.subnet,
73 config.container_ip.as_deref(),
74 )?;
75 let prefix = Self::subnet_prefix(&config.subnet);
76
77 let veth_host_full = format!("veth-{:x}", pid);
79 let veth_cont_full = format!("vethc-{:x}", pid);
80 let veth_host = veth_host_full[..veth_host_full.len().min(15)].to_string();
81 let veth_container = veth_cont_full[..veth_cont_full.len().min(15)].to_string();
82 let mut rollback = SetupRollback::new(
83 veth_host.clone(),
84 config.subnet.clone(),
85 Some((alloc_dir.clone(), container_id.to_string())),
86 );
87
88 Self::ensure_bridge_for(&config.bridge_name, &config.subnet)?;
90
91 netlink::create_veth(&veth_host, &veth_container)?;
93 rollback.veth_created = true;
94
95 netlink::set_link_master(&veth_host, &config.bridge_name)?;
97 netlink::set_link_up(&veth_host)?;
98
99 netlink::set_link_netns(&veth_container, pid)?;
101
102 let start_ticks = Self::read_pid_start_ticks(pid);
106 if start_ticks == 0 {
107 drop(rollback);
108 return Err(NucleusError::NetworkError(format!(
109 "Cannot read start_ticks for PID {} — process may have exited",
110 pid
111 )));
112 }
113
114 let container_addr: Ipv4Addr = container_ip.parse().map_err(|e| {
115 NucleusError::NetworkError(format!("invalid container IP '{}': {}", container_ip, e))
116 })?;
117 {
118 let vc = veth_container.clone();
119 netns::in_netns(pid, move || {
120 netlink::add_addr(&vc, container_addr, prefix)?;
121 netlink::set_link_up(&vc)?;
122 netlink::set_link_up("lo")?;
123 Ok(())
124 })?;
125 }
126
127 let current_ticks = Self::read_pid_start_ticks(pid);
129 if current_ticks != start_ticks {
130 drop(rollback);
131 return Err(NucleusError::NetworkError(format!(
132 "PID {} was recycled during network setup (start_ticks changed: {} -> {})",
133 pid, start_ticks, current_ticks
134 )));
135 }
136
137 let gateway = Self::gateway_from_subnet(&config.subnet);
139 let gateway_addr: Ipv4Addr = gateway.parse().map_err(|e| {
140 NucleusError::NetworkError(format!("invalid gateway IP '{}': {}", gateway, e))
141 })?;
142 netns::in_netns(pid, move || netlink::add_default_route(gateway_addr))?;
143
144 Self::run_cmd(
146 "iptables",
147 &[
148 "-t",
149 "nat",
150 "-A",
151 "POSTROUTING",
152 "-s",
153 &config.subnet,
154 "-j",
155 "MASQUERADE",
156 ],
157 )?;
158 rollback.nat_added = true;
159
160 let prev_ip_forward = match std::fs::read_to_string("/proc/sys/net/ipv4/ip_forward") {
162 Ok(v) => Some(v.trim().to_string()),
163 Err(e) => {
164 warn!(
165 "Could not read ip_forward state (will not restore on cleanup): {}",
166 e
167 );
168 None
169 }
170 };
171 rollback.prev_ip_forward = prev_ip_forward;
172 std::fs::write("/proc/sys/net/ipv4/ip_forward", "1").map_err(|e| {
173 NucleusError::NetworkError(format!("Failed to enable IP forwarding: {}", e))
174 })?;
175
176 for pf in &config.port_forwards {
178 Self::setup_port_forward_for(&container_ip, pf)?;
179 rollback
180 .port_forwards
181 .push((container_ip.clone(), pf.clone()));
182 }
183
184 net_state = net_state.transition(NetworkState::Active)?;
185
186 info!(
187 "Bridge network configured: {} -> {} (IP: {})",
188 veth_host, veth_container, container_ip
189 );
190 let prev_ip_forward = rollback.prev_ip_forward.clone();
191 rollback.disarm();
192
193 Ok(Self {
194 config: config.clone(),
195 container_ip,
196 veth_host,
197 container_id: container_id.to_string(),
198 prev_ip_forward,
199 state: net_state,
200 })
201 }
202
203 pub fn apply_egress_policy(&self, pid: u32, policy: &EgressPolicy) -> Result<()> {
208 for cidr in &policy.allowed_cidrs {
210 crate::network::config::validate_egress_cidr(cidr)
211 .map_err(|e| NucleusError::NetworkError(format!("Invalid egress CIDR: {}", e)))?;
212 }
213
214 let ipt = Self::resolve_bin("iptables")?;
215
216 netns::exec_in_netns(pid, &ipt, &["-P", "OUTPUT", "DROP"])?;
219 netns::exec_in_netns(pid, &ipt, &["-F", "OUTPUT"])?;
221
222 netns::exec_in_netns(pid, &ipt, &["-A", "OUTPUT", "-o", "lo", "-j", "ACCEPT"])?;
224
225 netns::exec_in_netns(
226 pid,
227 &ipt,
228 &[
229 "-A",
230 "OUTPUT",
231 "-m",
232 "conntrack",
233 "--ctstate",
234 "ESTABLISHED,RELATED",
235 "-j",
236 "ACCEPT",
237 ],
238 )?;
239
240 if policy.allow_dns {
242 for dns in &self.config.dns {
243 netns::exec_in_netns(
244 pid,
245 &ipt,
246 &[
247 "-A", "OUTPUT", "-p", "udp", "-d", dns, "--dport", "53", "-j", "ACCEPT",
248 ],
249 )?;
250 netns::exec_in_netns(
251 pid,
252 &ipt,
253 &[
254 "-A", "OUTPUT", "-p", "tcp", "-d", dns, "--dport", "53", "-j", "ACCEPT",
255 ],
256 )?;
257 }
258 }
259
260 for cidr in &policy.allowed_cidrs {
262 if policy.allowed_tcp_ports.is_empty() && policy.allowed_udp_ports.is_empty() {
263 netns::exec_in_netns(pid, &ipt, &["-A", "OUTPUT", "-d", cidr, "-j", "ACCEPT"])?;
264 } else {
265 for port in &policy.allowed_tcp_ports {
266 let port_s = port.to_string();
267 netns::exec_in_netns(
268 pid,
269 &ipt,
270 &[
271 "-A", "OUTPUT", "-p", "tcp", "-d", cidr, "--dport", &port_s, "-j",
272 "ACCEPT",
273 ],
274 )?;
275 }
276 for port in &policy.allowed_udp_ports {
277 let port_s = port.to_string();
278 netns::exec_in_netns(
279 pid,
280 &ipt,
281 &[
282 "-A", "OUTPUT", "-p", "udp", "-d", cidr, "--dport", &port_s, "-j",
283 "ACCEPT",
284 ],
285 )?;
286 }
287 }
288 }
289
290 if policy.log_denied {
292 netns::exec_in_netns(
293 pid,
294 &ipt,
295 &[
296 "-A",
297 "OUTPUT",
298 "-m",
299 "limit",
300 "--limit",
301 "5/min",
302 "-j",
303 "LOG",
304 "--log-prefix",
305 "nucleus-egress-denied: ",
306 ],
307 )?;
308 }
309
310 netns::exec_in_netns(pid, &ipt, &["-P", "OUTPUT", "DROP"])?;
312
313 info!(
314 "Egress policy applied: {} allowed CIDRs",
315 policy.allowed_cidrs.len()
316 );
317 debug!("Egress policy details: {:?}", policy);
318
319 Ok(())
320 }
321
322 pub fn cleanup(mut self) -> Result<()> {
326 self.state = self.state.transition(NetworkState::Cleaned)?;
327
328 Self::release_allocated_ip(&self.container_id);
330
331 for pf in &self.config.port_forwards {
333 if let Err(e) = self.cleanup_port_forward(pf) {
334 warn!("Failed to cleanup port forward: {}", e);
335 }
336 }
337
338 let _ = Self::run_cmd(
340 "iptables",
341 &[
342 "-t",
343 "nat",
344 "-D",
345 "POSTROUTING",
346 "-s",
347 &self.config.subnet,
348 "-j",
349 "MASQUERADE",
350 ],
351 );
352
353 let _ = netlink::del_link(&self.veth_host);
355
356 if let Some(ref prev) = self.prev_ip_forward {
358 if prev == "0" {
359 if let Err(e) = std::fs::write("/proc/sys/net/ipv4/ip_forward", "0") {
360 warn!("Failed to restore ip_forward to 0: {}", e);
361 } else {
362 info!("Restored net.ipv4.ip_forward to 0");
363 }
364 }
365 }
366
367 info!("Bridge network cleaned up");
368 Ok(())
369 }
370
371 fn cleanup_best_effort(&mut self) {
375 if self.state == NetworkState::Cleaned {
376 return;
377 }
378
379 Self::release_allocated_ip(&self.container_id);
380
381 for pf in &self.config.port_forwards {
382 let _ = self.cleanup_port_forward(pf);
383 }
384
385 let _ = Self::run_cmd(
386 "iptables",
387 &[
388 "-t",
389 "nat",
390 "-D",
391 "POSTROUTING",
392 "-s",
393 &self.config.subnet,
394 "-j",
395 "MASQUERADE",
396 ],
397 );
398
399 let _ = netlink::del_link(&self.veth_host);
400
401 if let Some(ref prev) = self.prev_ip_forward {
402 if prev == "0" {
403 let _ = std::fs::write("/proc/sys/net/ipv4/ip_forward", "0");
404 }
405 }
406
407 self.state = NetworkState::Cleaned;
408 debug!("Bridge network cleaned up (best-effort via drop)");
409 }
410
411 pub fn cleanup_orphaned_rules(subnet: &str) {
417 let output = match Command::new("iptables")
419 .args(["-t", "nat", "-L", "POSTROUTING", "-n"])
420 .output()
421 {
422 Ok(o) => o,
423 Err(e) => {
424 debug!("Cannot check iptables for orphaned rules: {}", e);
425 return;
426 }
427 };
428
429 let stdout = String::from_utf8_lossy(&output.stdout);
430 let mut orphaned_count = 0u32;
431 for line in stdout.lines() {
432 if line.contains("MASQUERADE") && line.contains(subnet) {
433 let _ = Self::run_cmd(
435 "iptables",
436 &[
437 "-t",
438 "nat",
439 "-D",
440 "POSTROUTING",
441 "-s",
442 subnet,
443 "-j",
444 "MASQUERADE",
445 ],
446 );
447 orphaned_count += 1;
448 }
449 }
450
451 if orphaned_count > 0 {
452 info!(
453 "Cleaned up {} orphaned iptables MASQUERADE rule(s) for subnet {}",
454 orphaned_count, subnet
455 );
456 }
457 }
458
459 fn ensure_bridge_for(bridge_name: &str, subnet: &str) -> Result<()> {
460 if netlink::link_exists(bridge_name) {
461 return Ok(());
462 }
463
464 netlink::create_bridge(bridge_name)?;
465
466 let gateway = Self::gateway_from_subnet(subnet);
467 let gateway_addr: Ipv4Addr = gateway.parse().map_err(|e| {
468 NucleusError::NetworkError(format!("invalid bridge gateway '{}': {}", gateway, e))
469 })?;
470 netlink::add_addr(bridge_name, gateway_addr, Self::subnet_prefix(subnet))?;
471 netlink::set_link_up(bridge_name)?;
472
473 info!("Created bridge {}", bridge_name);
474 Ok(())
475 }
476
477 fn setup_port_forward_for(container_ip: &str, pf: &PortForward) -> Result<()> {
478 for chain in ["PREROUTING", "OUTPUT"] {
479 let args = Self::port_forward_rule_args("-A", chain, container_ip, pf);
480 Self::run_cmd_owned("iptables", &args)?;
481 }
482
483 let host_ip = pf
484 .host_ip
485 .map(|ip| ip.to_string())
486 .unwrap_or_else(|| "0.0.0.0".to_string());
487 info!(
488 "Port forward: {}:{} -> {}:{}/{}",
489 host_ip, pf.host_port, container_ip, pf.container_port, pf.protocol
490 );
491 Ok(())
492 }
493
494 fn cleanup_port_forward(&self, pf: &PortForward) -> Result<()> {
495 for chain in ["OUTPUT", "PREROUTING"] {
496 let args = Self::port_forward_rule_args("-D", chain, &self.container_ip, pf);
497 Self::run_cmd_owned("iptables", &args)?;
498 }
499 Ok(())
500 }
501
502 fn allocate_ip_with_reserved(
508 subnet: &str,
509 reserved: &std::collections::HashSet<String>,
510 ) -> Result<String> {
511 let base = subnet.split('/').next().unwrap_or("10.0.42.0");
512 let parts: Vec<&str> = base.split('.').collect();
513 if parts.len() != 4 {
514 return Ok("10.0.42.2".to_string());
515 }
516
517 let mut rand_buf = [0u8; 128];
524 let mut urandom = Self::open_dev_urandom()?;
525 std::io::Read::read_exact(&mut urandom, &mut rand_buf).map_err(|e| {
526 NucleusError::NetworkError(format!("Failed to read /dev/urandom: {}", e))
527 })?;
528 for &byte in &rand_buf {
529 if byte >= 253 {
531 continue;
532 }
533 let offset = byte as u32 + 2;
534 let candidate = format!("{}.{}.{}.{}", parts[0], parts[1], parts[2], offset);
535 if reserved.contains(&candidate) {
536 continue;
537 }
538 if !Self::is_ip_in_use(&candidate)? {
539 return Ok(candidate);
541 }
542 }
543
544 Err(NucleusError::NetworkError(format!(
545 "Failed to allocate free IP in subnet {}",
546 subnet
547 )))
548 }
549
550 fn reserve_ip_in_dir(
551 alloc_dir: &std::path::Path,
552 container_id: &str,
553 subnet: &str,
554 requested_ip: Option<&str>,
555 ) -> Result<String> {
556 Self::ensure_alloc_dir(alloc_dir)?;
557 let lock_path = alloc_dir.join(".lock");
558 let lock_file = std::fs::OpenOptions::new()
559 .create(true)
560 .write(true)
561 .truncate(false)
562 .open(&lock_path)
563 .map_err(|e| {
564 NucleusError::NetworkError(format!("Failed to open IP alloc lock: {}", e))
565 })?;
566 let lock_ret = unsafe { libc::flock(lock_file.as_raw_fd(), libc::LOCK_EX) };
569 if lock_ret != 0 {
570 return Err(NucleusError::NetworkError(format!(
571 "Failed to acquire IP alloc lock: {}",
572 std::io::Error::last_os_error()
573 )));
574 }
575
576 let reserved = Self::collect_reserved_ips_in_dir(alloc_dir);
577 let ip = match requested_ip {
578 Some(ip) => {
579 if reserved.contains(ip) || Self::is_ip_in_use(ip)? {
580 return Err(NucleusError::NetworkError(format!(
581 "Requested container IP {} is already in use",
582 ip
583 )));
584 }
585 ip.to_string()
586 }
587 None => Self::allocate_ip_with_reserved(subnet, &reserved)?,
588 };
589
590 Self::record_allocated_ip_in_dir(alloc_dir, container_id, &ip)?;
591 Ok(ip)
592 }
593
594 fn collect_reserved_ips_in_dir(
596 alloc_dir: &std::path::Path,
597 ) -> std::collections::HashSet<String> {
598 let mut ips = std::collections::HashSet::new();
599 if let Ok(entries) = std::fs::read_dir(alloc_dir) {
600 for entry in entries.flatten() {
601 if let Some(name) = entry.file_name().to_str() {
602 if name.ends_with(".ip") {
603 if let Ok(ip) = std::fs::read_to_string(entry.path()) {
604 let ip = ip.trim().to_string();
605 if !ip.is_empty() {
606 ips.insert(ip);
607 }
608 }
609 }
610 }
611 }
612 }
613 ips
614 }
615
616 fn record_allocated_ip_in_dir(
618 alloc_dir: &std::path::Path,
619 container_id: &str,
620 ip: &str,
621 ) -> Result<()> {
622 Self::ensure_alloc_dir(alloc_dir)?;
623 let path = alloc_dir.join(format!("{}.ip", container_id));
624 std::fs::write(&path, ip).map_err(|e| {
625 NucleusError::NetworkError(format!("Failed to record IP allocation: {}", e))
626 })?;
627 Ok(())
628 }
629
630 fn release_allocated_ip(container_id: &str) {
632 let alloc_dir = Self::ip_alloc_dir();
633 Self::release_allocated_ip_in_dir(&alloc_dir, container_id);
634 }
635
636 fn release_allocated_ip_in_dir(alloc_dir: &std::path::Path, container_id: &str) {
637 let path = alloc_dir.join(format!("{}.ip", container_id));
638 let _ = std::fs::remove_file(path);
639 }
640
641 fn ensure_alloc_dir(alloc_dir: &std::path::Path) -> Result<()> {
644 if alloc_dir.exists() {
647 if let Ok(meta) = std::fs::symlink_metadata(alloc_dir) {
648 if meta.file_type().is_symlink() {
649 return Err(NucleusError::NetworkError(format!(
650 "IP alloc dir {:?} is a symlink, refusing to use",
651 alloc_dir
652 )));
653 }
654 }
655 }
656 if let Some(parent) = alloc_dir.parent() {
658 if let Ok(meta) = std::fs::symlink_metadata(parent) {
659 if meta.file_type().is_symlink() {
660 return Err(NucleusError::NetworkError(format!(
661 "IP alloc dir parent {:?} is a symlink, refusing to use",
662 parent
663 )));
664 }
665 }
666 }
667
668 std::fs::create_dir_all(alloc_dir).map_err(|e| {
669 NucleusError::NetworkError(format!("Failed to create IP alloc dir: {}", e))
670 })?;
671
672 use std::os::unix::fs::PermissionsExt;
674 let perms = std::fs::Permissions::from_mode(0o700);
675 std::fs::set_permissions(alloc_dir, perms).map_err(|e| {
676 NucleusError::NetworkError(format!(
677 "Failed to set permissions on IP alloc dir {:?}: {}",
678 alloc_dir, e
679 ))
680 })?;
681
682 if let Ok(meta) = std::fs::symlink_metadata(alloc_dir) {
684 if meta.file_type().is_symlink() {
685 return Err(NucleusError::NetworkError(format!(
686 "IP alloc dir {:?} was replaced with a symlink during setup",
687 alloc_dir
688 )));
689 }
690 }
691 Ok(())
692 }
693
694 fn ip_alloc_dir() -> std::path::PathBuf {
695 if nix::unistd::Uid::effective().is_root() {
696 std::path::PathBuf::from("/var/run/nucleus/ip-alloc")
697 } else {
698 dirs::runtime_dir()
699 .map(|d| d.join("nucleus/ip-alloc"))
700 .or_else(|| dirs::data_local_dir().map(|d| d.join("nucleus/ip-alloc")))
701 .unwrap_or_else(|| {
702 dirs::home_dir()
703 .map(|h| h.join(".nucleus/ip-alloc"))
704 .unwrap_or_else(|| std::path::PathBuf::from("/var/run/nucleus/ip-alloc"))
705 })
706 }
707 }
708
709 fn read_pid_start_ticks(pid: u32) -> u64 {
712 let stat_path = format!("/proc/{}/stat", pid);
713 if let Ok(content) = std::fs::read_to_string(&stat_path) {
714 if let Some(after_comm) = content.rfind(')') {
717 return content[after_comm + 2..]
718 .split_whitespace()
719 .nth(19) .and_then(|s| s.parse().ok())
721 .unwrap_or(0);
722 }
723 }
724 0
725 }
726
727 fn gateway_from_subnet(subnet: &str) -> String {
729 let base = subnet.split('/').next().unwrap_or("10.0.42.0");
730 let parts: Vec<&str> = base.split('.').collect();
731 if parts.len() == 4 {
732 format!("{}.{}.{}.1", parts[0], parts[1], parts[2])
733 } else {
734 "10.0.42.1".to_string()
735 }
736 }
737
738 fn subnet_prefix(subnet: &str) -> u8 {
739 subnet
740 .split_once('/')
741 .and_then(|(_, p)| p.parse::<u8>().ok())
742 .filter(|p| *p <= 32)
743 .unwrap_or(24)
744 }
745
746 fn resolve_bin(name: &str) -> Result<String> {
753 let search_dirs: &[&str] = match name {
754 "iptables" => &["/usr/sbin/iptables", "/sbin/iptables", "/usr/bin/iptables"],
755 _ => &[],
756 };
757
758 for path in search_dirs {
759 let p = std::path::Path::new(path);
760 if p.exists() {
761 Self::validate_network_binary(p, name)?;
762 return Ok(path.to_string());
763 }
764 }
765
766 if let Ok(output) = Command::new("which").arg(name).output() {
768 if output.status.success() {
769 let resolved = String::from_utf8_lossy(&output.stdout).trim().to_string();
770 if !resolved.is_empty() {
771 let p = std::path::Path::new(&resolved);
772 Self::validate_network_binary(p, name)?;
773 return Ok(resolved);
774 }
775 }
776 }
777
778 Err(NucleusError::NetworkError(format!(
779 "Required binary '{}' not found or failed validation",
780 name
781 )))
782 }
783
784 fn validate_network_binary(path: &std::path::Path, name: &str) -> Result<()> {
787 use std::os::unix::fs::MetadataExt;
788
789 let meta = std::fs::metadata(path)
790 .map_err(|e| NucleusError::NetworkError(format!("Cannot stat {}: {}", name, e)))?;
791 let mode = meta.mode();
792 if mode & 0o022 != 0 {
793 return Err(NucleusError::NetworkError(format!(
794 "Binary '{}' at {:?} is writable by group/others (mode {:o}), refusing to execute",
795 name, path, mode
796 )));
797 }
798 let owner = meta.uid();
799 let euid = nix::unistd::Uid::effective().as_raw();
800 if owner != 0 && owner != euid {
801 return Err(NucleusError::NetworkError(format!(
802 "Binary '{}' at {:?} owned by UID {} (expected root or euid {}), refusing to execute",
803 name, path, owner, euid
804 )));
805 }
806 Ok(())
807 }
808
809 fn run_cmd(program: &str, args: &[&str]) -> Result<()> {
810 let resolved = Self::resolve_bin(program)?;
811 let output = Command::new(&resolved).args(args).output().map_err(|e| {
812 NucleusError::NetworkError(format!("Failed to run {} {:?}: {}", resolved, args, e))
813 })?;
814
815 if !output.status.success() {
816 let stderr = String::from_utf8_lossy(&output.stderr);
817 return Err(NucleusError::NetworkError(format!(
818 "{} {:?} failed: {}",
819 program, args, stderr
820 )));
821 }
822
823 Ok(())
824 }
825
826 fn run_cmd_owned(program: &str, args: &[String]) -> Result<()> {
827 let refs: Vec<&str> = args.iter().map(String::as_str).collect();
828 Self::run_cmd(program, &refs)
829 }
830
831 fn port_forward_rule_args(
832 operation: &str,
833 chain: &str,
834 container_ip: &str,
835 pf: &PortForward,
836 ) -> Vec<String> {
837 let mut args = vec![
838 "-t".to_string(),
839 "nat".to_string(),
840 operation.to_string(),
841 chain.to_string(),
842 "-p".to_string(),
843 pf.protocol.as_str().to_string(),
844 ];
845
846 if chain == "OUTPUT" {
847 args.extend([
848 "-m".to_string(),
849 "addrtype".to_string(),
850 "--dst-type".to_string(),
851 "LOCAL".to_string(),
852 ]);
853 }
854
855 if let Some(host_ip) = pf.host_ip {
856 args.extend(["-d".to_string(), host_ip.to_string()]);
857 }
858
859 args.extend([
860 "--dport".to_string(),
861 pf.host_port.to_string(),
862 "-j".to_string(),
863 "DNAT".to_string(),
864 "--to-destination".to_string(),
865 format!("{}:{}", container_ip, pf.container_port),
866 ]);
867
868 args
869 }
870
871 fn is_ip_in_use(ip: &str) -> Result<bool> {
872 let addr: Ipv4Addr = ip
873 .parse()
874 .map_err(|e| NucleusError::NetworkError(format!("invalid IP '{}': {}", ip, e)))?;
875 netlink::is_addr_in_use(&addr)
876 }
877
878 pub fn write_resolv_conf(root: &std::path::Path, dns: &[String]) -> Result<()> {
880 let resolv_path = root.join("etc/resolv.conf");
881 let content: String = dns
882 .iter()
883 .map(|server| format!("nameserver {}\n", server))
884 .collect();
885 std::fs::write(&resolv_path, content).map_err(|e| {
886 NucleusError::NetworkError(format!("Failed to write resolv.conf: {}", e))
887 })?;
888 Ok(())
889 }
890
891 pub fn bind_mount_resolv_conf(root: &std::path::Path, dns: &[String]) -> Result<()> {
897 use nix::mount::{mount, MsFlags};
898
899 let content: String = dns
900 .iter()
901 .map(|server| format!("nameserver {}\n", server))
902 .collect();
903
904 let memfd_name = std::ffi::CString::new("nucleus-resolv").map_err(|e| {
906 NucleusError::NetworkError(format!("Failed to create memfd name: {}", e))
907 })?;
908 let raw_fd = unsafe { libc::memfd_create(memfd_name.as_ptr(), 0) };
911 if raw_fd < 0 {
912 return Self::bind_mount_resolv_conf_staging(root, dns);
914 }
915 let memfd = unsafe { std::os::fd::OwnedFd::from_raw_fd(raw_fd) };
919
920 let write_result = unsafe {
924 libc::write(
925 memfd.as_raw_fd(),
926 content.as_ptr() as *const libc::c_void,
927 content.len(),
928 )
929 };
930 if write_result < 0 {
931 return Self::bind_mount_resolv_conf_staging(root, dns);
933 }
934
935 let target = root.join("etc/resolv.conf");
937 if !target.exists() {
938 let _ = std::fs::write(&target, "");
939 }
940
941 let memfd_path = format!("/proc/self/fd/{}", memfd.as_raw_fd());
943 mount(
944 Some(memfd_path.as_str()),
945 &target,
946 None::<&str>,
947 MsFlags::MS_BIND,
948 None::<&str>,
949 )
950 .map_err(|e| {
951 NucleusError::NetworkError(format!("Failed to bind mount resolv.conf: {}", e))
953 })?;
954
955 info!("Bind-mounted resolv.conf for bridge networking (rootfs mode, memfd)");
959 Ok(())
960 }
961
962 fn bind_mount_resolv_conf_staging(root: &std::path::Path, dns: &[String]) -> Result<()> {
964 use nix::mount::{mount, MsFlags};
965
966 let content: String = dns
967 .iter()
968 .map(|server| format!("nameserver {}\n", server))
969 .collect();
970
971 let staging = root.join("tmp/.resolv.conf.nucleus");
973 if let Some(parent) = staging.parent() {
974 std::fs::create_dir_all(parent).map_err(|e| {
975 NucleusError::NetworkError(format!(
976 "Failed to create resolv.conf staging parent: {}",
977 e
978 ))
979 })?;
980 }
981 std::fs::write(&staging, content).map_err(|e| {
982 NucleusError::NetworkError(format!("Failed to write staging resolv.conf: {}", e))
983 })?;
984
985 let target = root.join("etc/resolv.conf");
987 if !target.exists() {
988 let _ = std::fs::write(&target, "");
989 }
990
991 mount(
993 Some(staging.as_path()),
994 &target,
995 None::<&str>,
996 MsFlags::MS_BIND,
997 None::<&str>,
998 )
999 .map_err(|e| {
1000 NucleusError::NetworkError(format!("Failed to bind mount resolv.conf: {}", e))
1001 })?;
1002
1003 if let Err(e) = std::fs::remove_file(&staging) {
1006 warn!("Failed to remove staging resolv.conf {:?}: {}", staging, e);
1007 }
1008
1009 info!("Bind-mounted resolv.conf for bridge networking (rootfs mode, staging)");
1010 Ok(())
1011 }
1012}
1013
1014impl Drop for BridgeNetwork {
1015 fn drop(&mut self) {
1016 self.cleanup_best_effort();
1017 }
1018}
1019
1020struct SetupRollback {
1021 veth_host: String,
1022 subnet: String,
1023 veth_created: bool,
1024 nat_added: bool,
1025 port_forwards: Vec<(String, PortForward)>,
1026 prev_ip_forward: Option<String>,
1027 reserved_ip: Option<(std::path::PathBuf, String)>,
1028 armed: bool,
1029}
1030
1031impl SetupRollback {
1032 fn new(
1033 veth_host: String,
1034 subnet: String,
1035 reserved_ip: Option<(std::path::PathBuf, String)>,
1036 ) -> Self {
1037 Self {
1038 veth_host,
1039 subnet,
1040 veth_created: false,
1041 nat_added: false,
1042 port_forwards: Vec::new(),
1043 prev_ip_forward: None,
1044 reserved_ip,
1045 armed: true,
1046 }
1047 }
1048
1049 fn disarm(&mut self) {
1050 self.armed = false;
1051 }
1052}
1053
1054impl Drop for SetupRollback {
1055 fn drop(&mut self) {
1056 if !self.armed {
1057 return;
1058 }
1059
1060 for (container_ip, pf) in self.port_forwards.iter().rev() {
1061 for chain in ["OUTPUT", "PREROUTING"] {
1062 let args = BridgeNetwork::port_forward_rule_args("-D", chain, container_ip, pf);
1063 if let Err(e) = BridgeNetwork::run_cmd_owned("iptables", &args) {
1064 warn!(
1065 "Rollback: failed to remove iptables {} rule for {}: {}",
1066 chain, container_ip, e
1067 );
1068 }
1069 }
1070 }
1071
1072 if self.nat_added {
1073 if let Err(e) = BridgeNetwork::run_cmd(
1074 "iptables",
1075 &[
1076 "-t",
1077 "nat",
1078 "-D",
1079 "POSTROUTING",
1080 "-s",
1081 &self.subnet,
1082 "-j",
1083 "MASQUERADE",
1084 ],
1085 ) {
1086 warn!("Rollback: failed to remove NAT rule: {}", e);
1087 }
1088 }
1089
1090 if self.veth_created {
1091 if let Err(e) = netlink::del_link(&self.veth_host) {
1092 warn!("Rollback: failed to delete veth {}: {}", self.veth_host, e);
1093 }
1094 }
1095
1096 if let Some((alloc_dir, container_id)) = &self.reserved_ip {
1097 BridgeNetwork::release_allocated_ip_in_dir(alloc_dir, container_id);
1098 }
1099 }
1100}
1101
1102#[cfg(test)]
1103mod tests {
1104 use super::*;
1105
1106 #[test]
1107 fn test_ip_allocation_rejection_sampling_range() {
1108 for byte in 0u8..253 {
1111 let offset = byte as u32 + 2;
1112 assert!(
1113 (2..=254).contains(&offset),
1114 "offset {} out of range",
1115 offset
1116 );
1117 }
1118 for byte in [253u8, 254, 255] {
1120 assert!(byte >= 253);
1121 }
1122 }
1123
1124 #[test]
1125 fn test_reserve_ip_blocks_duplicate_requested_address() {
1126 let temp = tempfile::tempdir().unwrap();
1127 BridgeNetwork::record_allocated_ip_in_dir(temp.path(), "one", "10.0.42.2").unwrap();
1128
1129 let err =
1130 BridgeNetwork::reserve_ip_in_dir(temp.path(), "two", "10.0.42.0/24", Some("10.0.42.2"))
1131 .unwrap_err();
1132 assert!(
1133 err.to_string().contains("already in use"),
1134 "second reservation of the same IP must fail"
1135 );
1136 }
1137
1138 #[test]
1139 fn test_setup_rollback_releases_reserved_ip() {
1140 let temp = tempfile::tempdir().unwrap();
1141 BridgeNetwork::record_allocated_ip_in_dir(temp.path(), "rollback", "10.0.42.3").unwrap();
1142
1143 let rollback = SetupRollback {
1144 veth_host: "veth-test".to_string(),
1145 subnet: "10.0.42.0/24".to_string(),
1146 veth_created: false,
1147 nat_added: false,
1148 port_forwards: Vec::new(),
1149 prev_ip_forward: None,
1150 reserved_ip: Some((temp.path().to_path_buf(), "rollback".to_string())),
1151 armed: true,
1152 };
1153
1154 drop(rollback);
1155
1156 assert!(
1157 !temp.path().join("rollback.ip").exists(),
1158 "rollback must release reserved IP files on setup failure"
1159 );
1160 }
1161
1162 #[test]
1163 fn test_port_forward_rules_include_output_chain_for_local_host_clients() {
1164 let pf = PortForward {
1165 host_ip: None,
1166 host_port: 8080,
1167 container_port: 80,
1168 protocol: crate::network::config::Protocol::Tcp,
1169 };
1170
1171 let prerouting =
1172 BridgeNetwork::port_forward_rule_args("-A", "PREROUTING", "10.0.42.2", &pf);
1173 let output = BridgeNetwork::port_forward_rule_args("-A", "OUTPUT", "10.0.42.2", &pf);
1174
1175 assert!(prerouting.iter().any(|arg| arg == "PREROUTING"));
1176 assert!(output.iter().any(|arg| arg == "OUTPUT"));
1177 assert!(
1178 output
1179 .windows(2)
1180 .any(|pair| pair[0] == "--dst-type" && pair[1] == "LOCAL"),
1181 "OUTPUT rule must target local-destination traffic"
1182 );
1183 }
1184
1185 #[test]
1186 fn test_port_forward_rules_include_host_ip_when_configured() {
1187 let pf = PortForward {
1188 host_ip: Some(std::net::Ipv4Addr::new(127, 0, 0, 1)),
1189 host_port: 4173,
1190 container_port: 4173,
1191 protocol: crate::network::config::Protocol::Tcp,
1192 };
1193
1194 let prerouting =
1195 BridgeNetwork::port_forward_rule_args("-A", "PREROUTING", "10.0.42.2", &pf);
1196 let output = BridgeNetwork::port_forward_rule_args("-A", "OUTPUT", "10.0.42.2", &pf);
1197
1198 for args in [&prerouting, &output] {
1199 assert!(
1200 args.windows(2)
1201 .any(|pair| pair[0] == "-d" && pair[1] == "127.0.0.1"),
1202 "port forward must restrict DNAT rules to the configured host IP"
1203 );
1204 }
1205 }
1206}