1use crate::error::{NucleusError, Result, StateTransition};
2use crate::network::config::{BridgeConfig, EgressPolicy, PortForward};
3use crate::network::NetworkState;
4use std::os::fd::FromRawFd;
5use std::os::unix::io::AsRawFd;
6use std::process::Command;
7use tracing::{debug, info, warn};
8
9pub struct BridgeNetwork {
11 config: BridgeConfig,
12 container_ip: String,
13 veth_host: String,
14 container_id: String,
15 prev_ip_forward: Option<String>,
16 state: NetworkState,
17}
18
19impl BridgeNetwork {
20 pub fn setup(pid: u32, config: &BridgeConfig) -> Result<Self> {
27 Self::setup_for(pid, config, &format!("{:x}", pid))
28 }
29
30 pub fn setup_with_id(pid: u32, config: &BridgeConfig, container_id: &str) -> Result<Self> {
32 Self::setup_for(pid, config, container_id)
33 }
34
35 fn setup_for(pid: u32, config: &BridgeConfig, container_id: &str) -> Result<Self> {
36 config.validate()?;
38
39 let mut net_state = NetworkState::Unconfigured;
40 net_state = net_state.transition(NetworkState::Configuring)?;
41
42 let alloc_dir = Self::ip_alloc_dir();
43 let container_ip = Self::reserve_ip_in_dir(
44 &alloc_dir,
45 container_id,
46 &config.subnet,
47 config.container_ip.as_deref(),
48 )?;
49 let prefix = Self::subnet_prefix(&config.subnet);
50
51 let veth_host_full = format!("veth-{:x}", pid);
53 let veth_cont_full = format!("vethc-{:x}", pid);
54 let veth_host = veth_host_full[..veth_host_full.len().min(15)].to_string();
55 let veth_container = veth_cont_full[..veth_cont_full.len().min(15)].to_string();
56 let mut rollback = SetupRollback::new(
57 veth_host.clone(),
58 config.subnet.clone(),
59 Some((alloc_dir.clone(), container_id.to_string())),
60 );
61
62 Self::ensure_bridge_for(&config.bridge_name, &config.subnet)?;
64
65 Self::run_cmd(
67 "ip",
68 &[
69 "link",
70 "add",
71 &veth_host,
72 "type",
73 "veth",
74 "peer",
75 "name",
76 &veth_container,
77 ],
78 )?;
79 rollback.veth_created = true;
80
81 Self::run_cmd(
83 "ip",
84 &["link", "set", &veth_host, "master", &config.bridge_name],
85 )?;
86 Self::run_cmd("ip", &["link", "set", &veth_host, "up"])?;
87
88 Self::run_cmd(
90 "ip",
91 &["link", "set", &veth_container, "netns", &pid.to_string()],
92 )?;
93
94 let pid_str = pid.to_string();
98 let start_ticks = Self::read_pid_start_ticks(pid);
99 if start_ticks == 0 {
100 drop(rollback);
101 return Err(NucleusError::NetworkError(format!(
102 "Cannot read start_ticks for PID {} — process may have exited",
103 pid
104 )));
105 }
106
107 Self::run_cmd(
108 "nsenter",
109 &[
110 "-t",
111 &pid_str,
112 "-n",
113 "ip",
114 "addr",
115 "add",
116 &format!("{}/{}", container_ip, prefix),
117 "dev",
118 &veth_container,
119 ],
120 )?;
121 Self::run_cmd(
122 "nsenter",
123 &[
124 "-t",
125 &pid_str,
126 "-n",
127 "ip",
128 "link",
129 "set",
130 &veth_container,
131 "up",
132 ],
133 )?;
134 Self::run_cmd(
135 "nsenter",
136 &["-t", &pid_str, "-n", "ip", "link", "set", "lo", "up"],
137 )?;
138
139 let current_ticks = Self::read_pid_start_ticks(pid);
141 if current_ticks != start_ticks {
142 drop(rollback);
143 return Err(NucleusError::NetworkError(format!(
144 "PID {} was recycled during network setup (start_ticks changed: {} -> {})",
145 pid, start_ticks, current_ticks
146 )));
147 }
148
149 let gateway = Self::gateway_from_subnet(&config.subnet);
151 Self::run_cmd(
152 "nsenter",
153 &[
154 "-t", &pid_str, "-n", "ip", "route", "add", "default", "via", &gateway,
155 ],
156 )?;
157
158 Self::run_cmd(
160 "iptables",
161 &[
162 "-t",
163 "nat",
164 "-A",
165 "POSTROUTING",
166 "-s",
167 &config.subnet,
168 "-j",
169 "MASQUERADE",
170 ],
171 )?;
172 rollback.nat_added = true;
173
174 let prev_ip_forward = match std::fs::read_to_string("/proc/sys/net/ipv4/ip_forward") {
176 Ok(v) => Some(v.trim().to_string()),
177 Err(e) => {
178 warn!(
179 "Could not read ip_forward state (will not restore on cleanup): {}",
180 e
181 );
182 None
183 }
184 };
185 rollback.prev_ip_forward = prev_ip_forward;
186 std::fs::write("/proc/sys/net/ipv4/ip_forward", "1").map_err(|e| {
187 NucleusError::NetworkError(format!("Failed to enable IP forwarding: {}", e))
188 })?;
189
190 for pf in &config.port_forwards {
192 Self::setup_port_forward_for(&container_ip, pf)?;
193 rollback
194 .port_forwards
195 .push((container_ip.clone(), pf.clone()));
196 }
197
198 net_state = net_state.transition(NetworkState::Active)?;
199
200 info!(
201 "Bridge network configured: {} -> {} (IP: {})",
202 veth_host, veth_container, container_ip
203 );
204 let prev_ip_forward = rollback.prev_ip_forward.clone();
205 rollback.disarm();
206
207 Ok(Self {
208 config: config.clone(),
209 container_ip,
210 veth_host,
211 container_id: container_id.to_string(),
212 prev_ip_forward,
213 state: net_state,
214 })
215 }
216
217 pub fn apply_egress_policy(&self, pid: u32, policy: &EgressPolicy) -> Result<()> {
222 for cidr in &policy.allowed_cidrs {
224 crate::network::config::validate_egress_cidr(cidr)
225 .map_err(|e| NucleusError::NetworkError(format!("Invalid egress CIDR: {}", e)))?;
226 }
227
228 let pid_str = pid.to_string();
229
230 Self::run_cmd(
233 "nsenter",
234 &["-t", &pid_str, "-n", "iptables", "-P", "OUTPUT", "DROP"],
235 )?;
236 Self::run_cmd(
238 "nsenter",
239 &["-t", &pid_str, "-n", "iptables", "-F", "OUTPUT"],
240 )?;
241
242 Self::run_cmd(
244 "nsenter",
245 &[
246 "-t", &pid_str, "-n", "iptables", "-A", "OUTPUT", "-o", "lo", "-j", "ACCEPT",
247 ],
248 )?;
249
250 Self::run_cmd(
251 "nsenter",
252 &[
253 "-t",
254 &pid_str,
255 "-n",
256 "iptables",
257 "-A",
258 "OUTPUT",
259 "-m",
260 "conntrack",
261 "--ctstate",
262 "ESTABLISHED,RELATED",
263 "-j",
264 "ACCEPT",
265 ],
266 )?;
267
268 if policy.allow_dns {
270 for dns in &self.config.dns {
271 Self::run_cmd(
272 "nsenter",
273 &[
274 "-t", &pid_str, "-n", "iptables", "-A", "OUTPUT", "-p", "udp", "-d", dns,
275 "--dport", "53", "-j", "ACCEPT",
276 ],
277 )?;
278 Self::run_cmd(
279 "nsenter",
280 &[
281 "-t", &pid_str, "-n", "iptables", "-A", "OUTPUT", "-p", "tcp", "-d", dns,
282 "--dport", "53", "-j", "ACCEPT",
283 ],
284 )?;
285 }
286 }
287
288 for cidr in &policy.allowed_cidrs {
290 if policy.allowed_tcp_ports.is_empty() && policy.allowed_udp_ports.is_empty() {
291 Self::run_cmd(
293 "nsenter",
294 &[
295 "-t", &pid_str, "-n", "iptables", "-A", "OUTPUT", "-d", cidr, "-j",
296 "ACCEPT",
297 ],
298 )?;
299 } else {
300 for port in &policy.allowed_tcp_ports {
301 Self::run_cmd(
302 "nsenter",
303 &[
304 "-t",
305 &pid_str,
306 "-n",
307 "iptables",
308 "-A",
309 "OUTPUT",
310 "-p",
311 "tcp",
312 "-d",
313 cidr,
314 "--dport",
315 &port.to_string(),
316 "-j",
317 "ACCEPT",
318 ],
319 )?;
320 }
321 for port in &policy.allowed_udp_ports {
322 Self::run_cmd(
323 "nsenter",
324 &[
325 "-t",
326 &pid_str,
327 "-n",
328 "iptables",
329 "-A",
330 "OUTPUT",
331 "-p",
332 "udp",
333 "-d",
334 cidr,
335 "--dport",
336 &port.to_string(),
337 "-j",
338 "ACCEPT",
339 ],
340 )?;
341 }
342 }
343 }
344
345 if policy.log_denied {
347 Self::run_cmd(
348 "nsenter",
349 &[
350 "-t",
351 &pid_str,
352 "-n",
353 "iptables",
354 "-A",
355 "OUTPUT",
356 "-m",
357 "limit",
358 "--limit",
359 "5/min",
360 "-j",
361 "LOG",
362 "--log-prefix",
363 "nucleus-egress-denied: ",
364 ],
365 )?;
366 }
367
368 Self::run_cmd(
370 "nsenter",
371 &["-t", &pid_str, "-n", "iptables", "-P", "OUTPUT", "DROP"],
372 )?;
373
374 info!(
375 "Egress policy applied: {} allowed CIDRs",
376 policy.allowed_cidrs.len()
377 );
378 debug!("Egress policy details: {:?}", policy);
379
380 Ok(())
381 }
382
383 pub fn cleanup(mut self) -> Result<()> {
387 self.state = self.state.transition(NetworkState::Cleaned)?;
388
389 Self::release_allocated_ip(&self.container_id);
391
392 for pf in &self.config.port_forwards {
394 if let Err(e) = self.cleanup_port_forward(pf) {
395 warn!("Failed to cleanup port forward: {}", e);
396 }
397 }
398
399 let _ = Self::run_cmd(
401 "iptables",
402 &[
403 "-t",
404 "nat",
405 "-D",
406 "POSTROUTING",
407 "-s",
408 &self.config.subnet,
409 "-j",
410 "MASQUERADE",
411 ],
412 );
413
414 let _ = Self::run_cmd("ip", &["link", "del", &self.veth_host]);
416
417 if let Some(ref prev) = self.prev_ip_forward {
419 if prev == "0" {
420 if let Err(e) = std::fs::write("/proc/sys/net/ipv4/ip_forward", "0") {
421 warn!("Failed to restore ip_forward to 0: {}", e);
422 } else {
423 info!("Restored net.ipv4.ip_forward to 0");
424 }
425 }
426 }
427
428 info!("Bridge network cleaned up");
429 Ok(())
430 }
431
432 fn cleanup_best_effort(&mut self) {
436 if self.state == NetworkState::Cleaned {
437 return;
438 }
439
440 Self::release_allocated_ip(&self.container_id);
441
442 for pf in &self.config.port_forwards {
443 let _ = self.cleanup_port_forward(pf);
444 }
445
446 let _ = Self::run_cmd(
447 "iptables",
448 &[
449 "-t",
450 "nat",
451 "-D",
452 "POSTROUTING",
453 "-s",
454 &self.config.subnet,
455 "-j",
456 "MASQUERADE",
457 ],
458 );
459
460 let _ = Self::run_cmd("ip", &["link", "del", &self.veth_host]);
461
462 if let Some(ref prev) = self.prev_ip_forward {
463 if prev == "0" {
464 let _ = std::fs::write("/proc/sys/net/ipv4/ip_forward", "0");
465 }
466 }
467
468 self.state = NetworkState::Cleaned;
469 debug!("Bridge network cleaned up (best-effort via drop)");
470 }
471
472 pub fn cleanup_orphaned_rules(subnet: &str) {
478 let output = match Command::new("iptables")
480 .args(["-t", "nat", "-L", "POSTROUTING", "-n"])
481 .output()
482 {
483 Ok(o) => o,
484 Err(e) => {
485 debug!("Cannot check iptables for orphaned rules: {}", e);
486 return;
487 }
488 };
489
490 let stdout = String::from_utf8_lossy(&output.stdout);
491 let mut orphaned_count = 0u32;
492 for line in stdout.lines() {
493 if line.contains("MASQUERADE") && line.contains(subnet) {
494 let _ = Self::run_cmd(
496 "iptables",
497 &[
498 "-t",
499 "nat",
500 "-D",
501 "POSTROUTING",
502 "-s",
503 subnet,
504 "-j",
505 "MASQUERADE",
506 ],
507 );
508 orphaned_count += 1;
509 }
510 }
511
512 if orphaned_count > 0 {
513 info!(
514 "Cleaned up {} orphaned iptables MASQUERADE rule(s) for subnet {}",
515 orphaned_count, subnet
516 );
517 }
518 }
519
520 fn ensure_bridge_for(bridge_name: &str, subnet: &str) -> Result<()> {
521 if Self::run_cmd("ip", &["link", "show", bridge_name]).is_ok() {
523 return Ok(());
524 }
525
526 Self::run_cmd(
528 "ip",
529 &["link", "add", "name", bridge_name, "type", "bridge"],
530 )?;
531
532 let gateway = Self::gateway_from_subnet(subnet);
533 Self::run_cmd(
534 "ip",
535 &[
536 "addr",
537 "add",
538 &format!("{}/{}", gateway, Self::subnet_prefix(subnet)),
539 "dev",
540 bridge_name,
541 ],
542 )?;
543 Self::run_cmd("ip", &["link", "set", bridge_name, "up"])?;
544
545 info!("Created bridge {}", bridge_name);
546 Ok(())
547 }
548
549 fn setup_port_forward_for(container_ip: &str, pf: &PortForward) -> Result<()> {
550 for chain in ["PREROUTING", "OUTPUT"] {
551 let args = Self::port_forward_rule_args("-A", chain, container_ip, pf);
552 Self::run_cmd_owned("iptables", &args)?;
553 }
554
555 let host_ip = pf
556 .host_ip
557 .map(|ip| ip.to_string())
558 .unwrap_or_else(|| "0.0.0.0".to_string());
559 info!(
560 "Port forward: {}:{} -> {}:{}/{}",
561 host_ip, pf.host_port, container_ip, pf.container_port, pf.protocol
562 );
563 Ok(())
564 }
565
566 fn cleanup_port_forward(&self, pf: &PortForward) -> Result<()> {
567 for chain in ["OUTPUT", "PREROUTING"] {
568 let args = Self::port_forward_rule_args("-D", chain, &self.container_ip, pf);
569 Self::run_cmd_owned("iptables", &args)?;
570 }
571 Ok(())
572 }
573
574 fn allocate_ip_with_reserved(
580 subnet: &str,
581 reserved: &std::collections::HashSet<String>,
582 ) -> Result<String> {
583 let base = subnet.split('/').next().unwrap_or("10.0.42.0");
584 let parts: Vec<&str> = base.split('.').collect();
585 if parts.len() != 4 {
586 return Ok("10.0.42.2".to_string());
587 }
588
589 let mut rand_buf = [0u8; 128];
596 std::fs::File::open("/dev/urandom")
597 .and_then(|mut f| std::io::Read::read_exact(&mut f, &mut rand_buf))
598 .map_err(|e| {
599 NucleusError::NetworkError(format!("Failed to read /dev/urandom: {}", e))
600 })?;
601 for &byte in &rand_buf {
602 if byte >= 253 {
604 continue;
605 }
606 let offset = byte as u32 + 2;
607 let candidate = format!("{}.{}.{}.{}", parts[0], parts[1], parts[2], offset);
608 if reserved.contains(&candidate) {
609 continue;
610 }
611 if !Self::is_ip_in_use(&candidate)? {
612 return Ok(candidate);
614 }
615 }
616
617 Err(NucleusError::NetworkError(format!(
618 "Failed to allocate free IP in subnet {}",
619 subnet
620 )))
621 }
622
623 fn reserve_ip_in_dir(
624 alloc_dir: &std::path::Path,
625 container_id: &str,
626 subnet: &str,
627 requested_ip: Option<&str>,
628 ) -> Result<String> {
629 Self::ensure_alloc_dir(alloc_dir)?;
630 let lock_path = alloc_dir.join(".lock");
631 let lock_file = std::fs::OpenOptions::new()
632 .create(true)
633 .write(true)
634 .truncate(false)
635 .open(&lock_path)
636 .map_err(|e| {
637 NucleusError::NetworkError(format!("Failed to open IP alloc lock: {}", e))
638 })?;
639 let lock_ret = unsafe { libc::flock(lock_file.as_raw_fd(), libc::LOCK_EX) };
642 if lock_ret != 0 {
643 return Err(NucleusError::NetworkError(format!(
644 "Failed to acquire IP alloc lock: {}",
645 std::io::Error::last_os_error()
646 )));
647 }
648
649 let reserved = Self::collect_reserved_ips_in_dir(alloc_dir);
650 let ip = match requested_ip {
651 Some(ip) => {
652 if reserved.contains(ip) || Self::is_ip_in_use(ip)? {
653 return Err(NucleusError::NetworkError(format!(
654 "Requested container IP {} is already in use",
655 ip
656 )));
657 }
658 ip.to_string()
659 }
660 None => Self::allocate_ip_with_reserved(subnet, &reserved)?,
661 };
662
663 Self::record_allocated_ip_in_dir(alloc_dir, container_id, &ip)?;
664 Ok(ip)
665 }
666
667 fn collect_reserved_ips_in_dir(
669 alloc_dir: &std::path::Path,
670 ) -> std::collections::HashSet<String> {
671 let mut ips = std::collections::HashSet::new();
672 if let Ok(entries) = std::fs::read_dir(alloc_dir) {
673 for entry in entries.flatten() {
674 if let Some(name) = entry.file_name().to_str() {
675 if name.ends_with(".ip") {
676 if let Ok(ip) = std::fs::read_to_string(entry.path()) {
677 let ip = ip.trim().to_string();
678 if !ip.is_empty() {
679 ips.insert(ip);
680 }
681 }
682 }
683 }
684 }
685 }
686 ips
687 }
688
689 fn record_allocated_ip_in_dir(
691 alloc_dir: &std::path::Path,
692 container_id: &str,
693 ip: &str,
694 ) -> Result<()> {
695 Self::ensure_alloc_dir(alloc_dir)?;
696 let path = alloc_dir.join(format!("{}.ip", container_id));
697 std::fs::write(&path, ip).map_err(|e| {
698 NucleusError::NetworkError(format!("Failed to record IP allocation: {}", e))
699 })?;
700 Ok(())
701 }
702
703 fn release_allocated_ip(container_id: &str) {
705 let alloc_dir = Self::ip_alloc_dir();
706 Self::release_allocated_ip_in_dir(&alloc_dir, container_id);
707 }
708
709 fn release_allocated_ip_in_dir(alloc_dir: &std::path::Path, container_id: &str) {
710 let path = alloc_dir.join(format!("{}.ip", container_id));
711 let _ = std::fs::remove_file(path);
712 }
713
714 fn ensure_alloc_dir(alloc_dir: &std::path::Path) -> Result<()> {
717 if alloc_dir.exists() {
720 if let Ok(meta) = std::fs::symlink_metadata(alloc_dir) {
721 if meta.file_type().is_symlink() {
722 return Err(NucleusError::NetworkError(format!(
723 "IP alloc dir {:?} is a symlink, refusing to use",
724 alloc_dir
725 )));
726 }
727 }
728 }
729 if let Some(parent) = alloc_dir.parent() {
731 if let Ok(meta) = std::fs::symlink_metadata(parent) {
732 if meta.file_type().is_symlink() {
733 return Err(NucleusError::NetworkError(format!(
734 "IP alloc dir parent {:?} is a symlink, refusing to use",
735 parent
736 )));
737 }
738 }
739 }
740
741 std::fs::create_dir_all(alloc_dir).map_err(|e| {
742 NucleusError::NetworkError(format!("Failed to create IP alloc dir: {}", e))
743 })?;
744
745 use std::os::unix::fs::PermissionsExt;
747 let perms = std::fs::Permissions::from_mode(0o700);
748 std::fs::set_permissions(alloc_dir, perms).map_err(|e| {
749 NucleusError::NetworkError(format!(
750 "Failed to set permissions on IP alloc dir {:?}: {}",
751 alloc_dir, e
752 ))
753 })?;
754
755 if let Ok(meta) = std::fs::symlink_metadata(alloc_dir) {
757 if meta.file_type().is_symlink() {
758 return Err(NucleusError::NetworkError(format!(
759 "IP alloc dir {:?} was replaced with a symlink during setup",
760 alloc_dir
761 )));
762 }
763 }
764 Ok(())
765 }
766
767 fn ip_alloc_dir() -> std::path::PathBuf {
768 if nix::unistd::Uid::effective().is_root() {
769 std::path::PathBuf::from("/var/run/nucleus/ip-alloc")
770 } else {
771 dirs::runtime_dir()
772 .map(|d| d.join("nucleus/ip-alloc"))
773 .or_else(|| dirs::data_local_dir().map(|d| d.join("nucleus/ip-alloc")))
774 .unwrap_or_else(|| {
775 dirs::home_dir()
776 .map(|h| h.join(".nucleus/ip-alloc"))
777 .unwrap_or_else(|| std::path::PathBuf::from("/var/run/nucleus/ip-alloc"))
778 })
779 }
780 }
781
782 fn read_pid_start_ticks(pid: u32) -> u64 {
785 let stat_path = format!("/proc/{}/stat", pid);
786 if let Ok(content) = std::fs::read_to_string(&stat_path) {
787 if let Some(after_comm) = content.rfind(')') {
790 return content[after_comm + 2..]
791 .split_whitespace()
792 .nth(19) .and_then(|s| s.parse().ok())
794 .unwrap_or(0);
795 }
796 }
797 0
798 }
799
800 fn gateway_from_subnet(subnet: &str) -> String {
802 let base = subnet.split('/').next().unwrap_or("10.0.42.0");
803 let parts: Vec<&str> = base.split('.').collect();
804 if parts.len() == 4 {
805 format!("{}.{}.{}.1", parts[0], parts[1], parts[2])
806 } else {
807 "10.0.42.1".to_string()
808 }
809 }
810
811 fn subnet_prefix(subnet: &str) -> u8 {
812 subnet
813 .split_once('/')
814 .and_then(|(_, p)| p.parse::<u8>().ok())
815 .filter(|p| *p <= 32)
816 .unwrap_or(24)
817 }
818
819 fn resolve_bin(name: &str) -> Result<String> {
826 let search_dirs: &[&str] = match name {
827 "ip" => &["/usr/sbin/ip", "/sbin/ip", "/usr/bin/ip"],
828 "iptables" => &["/usr/sbin/iptables", "/sbin/iptables", "/usr/bin/iptables"],
829 "nsenter" => &["/usr/bin/nsenter", "/usr/sbin/nsenter", "/bin/nsenter"],
830 _ => &[],
831 };
832
833 for path in search_dirs {
834 let p = std::path::Path::new(path);
835 if p.exists() {
836 Self::validate_network_binary(p, name)?;
837 return Ok(path.to_string());
838 }
839 }
840
841 if let Ok(output) = Command::new("which").arg(name).output() {
843 if output.status.success() {
844 let resolved = String::from_utf8_lossy(&output.stdout).trim().to_string();
845 if !resolved.is_empty() {
846 let p = std::path::Path::new(&resolved);
847 Self::validate_network_binary(p, name)?;
848 return Ok(resolved);
849 }
850 }
851 }
852
853 Err(NucleusError::NetworkError(format!(
854 "Required binary '{}' not found or failed validation",
855 name
856 )))
857 }
858
859 fn validate_network_binary(path: &std::path::Path, name: &str) -> Result<()> {
862 use std::os::unix::fs::MetadataExt;
863
864 let meta = std::fs::metadata(path)
865 .map_err(|e| NucleusError::NetworkError(format!("Cannot stat {}: {}", name, e)))?;
866 let mode = meta.mode();
867 if mode & 0o022 != 0 {
868 return Err(NucleusError::NetworkError(format!(
869 "Binary '{}' at {:?} is writable by group/others (mode {:o}), refusing to execute",
870 name, path, mode
871 )));
872 }
873 let owner = meta.uid();
874 let euid = nix::unistd::Uid::effective().as_raw();
875 if owner != 0 && owner != euid {
876 return Err(NucleusError::NetworkError(format!(
877 "Binary '{}' at {:?} owned by UID {} (expected root or euid {}), refusing to execute",
878 name, path, owner, euid
879 )));
880 }
881 Ok(())
882 }
883
884 fn run_cmd(program: &str, args: &[&str]) -> Result<()> {
885 let resolved = Self::resolve_bin(program)?;
886 let output = Command::new(&resolved).args(args).output().map_err(|e| {
887 NucleusError::NetworkError(format!("Failed to run {} {:?}: {}", resolved, args, e))
888 })?;
889
890 if !output.status.success() {
891 let stderr = String::from_utf8_lossy(&output.stderr);
892 return Err(NucleusError::NetworkError(format!(
893 "{} {:?} failed: {}",
894 program, args, stderr
895 )));
896 }
897
898 Ok(())
899 }
900
901 fn run_cmd_owned(program: &str, args: &[String]) -> Result<()> {
902 let refs: Vec<&str> = args.iter().map(String::as_str).collect();
903 Self::run_cmd(program, &refs)
904 }
905
906 fn port_forward_rule_args(
907 operation: &str,
908 chain: &str,
909 container_ip: &str,
910 pf: &PortForward,
911 ) -> Vec<String> {
912 let mut args = vec![
913 "-t".to_string(),
914 "nat".to_string(),
915 operation.to_string(),
916 chain.to_string(),
917 "-p".to_string(),
918 pf.protocol.as_str().to_string(),
919 ];
920
921 if chain == "OUTPUT" {
922 args.extend([
923 "-m".to_string(),
924 "addrtype".to_string(),
925 "--dst-type".to_string(),
926 "LOCAL".to_string(),
927 ]);
928 }
929
930 if let Some(host_ip) = pf.host_ip {
931 args.extend(["-d".to_string(), host_ip.to_string()]);
932 }
933
934 args.extend([
935 "--dport".to_string(),
936 pf.host_port.to_string(),
937 "-j".to_string(),
938 "DNAT".to_string(),
939 "--to-destination".to_string(),
940 format!("{}:{}", container_ip, pf.container_port),
941 ]);
942
943 args
944 }
945
946 fn is_ip_in_use(ip: &str) -> Result<bool> {
947 let ip_bin = Self::resolve_bin("ip")?;
948 let output = Command::new(&ip_bin)
949 .args(["-4", "addr", "show"])
950 .output()
951 .map_err(|e| {
952 NucleusError::NetworkError(format!("Failed to inspect host IPs: {}", e))
953 })?;
954
955 if !output.status.success() {
956 let stderr = String::from_utf8_lossy(&output.stderr);
957 return Err(NucleusError::NetworkError(format!(
958 "ip -4 addr show failed: {}",
959 stderr.trim()
960 )));
961 }
962
963 let stdout = String::from_utf8_lossy(&output.stdout);
964 Ok(stdout.contains(&format!(" {}/", ip)))
965 }
966
967 pub fn write_resolv_conf(root: &std::path::Path, dns: &[String]) -> Result<()> {
969 let resolv_path = root.join("etc/resolv.conf");
970 let content: String = dns
971 .iter()
972 .map(|server| format!("nameserver {}\n", server))
973 .collect();
974 std::fs::write(&resolv_path, content).map_err(|e| {
975 NucleusError::NetworkError(format!("Failed to write resolv.conf: {}", e))
976 })?;
977 Ok(())
978 }
979
980 pub fn bind_mount_resolv_conf(root: &std::path::Path, dns: &[String]) -> Result<()> {
986 use nix::mount::{mount, MsFlags};
987
988 let content: String = dns
989 .iter()
990 .map(|server| format!("nameserver {}\n", server))
991 .collect();
992
993 let memfd_name = std::ffi::CString::new("nucleus-resolv").map_err(|e| {
995 NucleusError::NetworkError(format!("Failed to create memfd name: {}", e))
996 })?;
997 let raw_fd = unsafe { libc::memfd_create(memfd_name.as_ptr(), 0) };
1000 if raw_fd < 0 {
1001 return Self::bind_mount_resolv_conf_staging(root, dns);
1003 }
1004 let memfd = unsafe { std::os::fd::OwnedFd::from_raw_fd(raw_fd) };
1008
1009 let write_result = unsafe {
1013 libc::write(
1014 memfd.as_raw_fd(),
1015 content.as_ptr() as *const libc::c_void,
1016 content.len(),
1017 )
1018 };
1019 if write_result < 0 {
1020 return Self::bind_mount_resolv_conf_staging(root, dns);
1022 }
1023
1024 let target = root.join("etc/resolv.conf");
1026 if !target.exists() {
1027 let _ = std::fs::write(&target, "");
1028 }
1029
1030 let memfd_path = format!("/proc/self/fd/{}", memfd.as_raw_fd());
1032 mount(
1033 Some(memfd_path.as_str()),
1034 &target,
1035 None::<&str>,
1036 MsFlags::MS_BIND,
1037 None::<&str>,
1038 )
1039 .map_err(|e| {
1040 NucleusError::NetworkError(format!("Failed to bind mount resolv.conf: {}", e))
1042 })?;
1043
1044 info!("Bind-mounted resolv.conf for bridge networking (rootfs mode, memfd)");
1048 Ok(())
1049 }
1050
1051 fn bind_mount_resolv_conf_staging(root: &std::path::Path, dns: &[String]) -> Result<()> {
1053 use nix::mount::{mount, MsFlags};
1054
1055 let content: String = dns
1056 .iter()
1057 .map(|server| format!("nameserver {}\n", server))
1058 .collect();
1059
1060 let staging = root.join("tmp/.resolv.conf.nucleus");
1062 if let Some(parent) = staging.parent() {
1063 std::fs::create_dir_all(parent).map_err(|e| {
1064 NucleusError::NetworkError(format!(
1065 "Failed to create resolv.conf staging parent: {}",
1066 e
1067 ))
1068 })?;
1069 }
1070 std::fs::write(&staging, content).map_err(|e| {
1071 NucleusError::NetworkError(format!("Failed to write staging resolv.conf: {}", e))
1072 })?;
1073
1074 let target = root.join("etc/resolv.conf");
1076 if !target.exists() {
1077 let _ = std::fs::write(&target, "");
1078 }
1079
1080 mount(
1082 Some(staging.as_path()),
1083 &target,
1084 None::<&str>,
1085 MsFlags::MS_BIND,
1086 None::<&str>,
1087 )
1088 .map_err(|e| {
1089 NucleusError::NetworkError(format!("Failed to bind mount resolv.conf: {}", e))
1090 })?;
1091
1092 if let Err(e) = std::fs::remove_file(&staging) {
1095 warn!("Failed to remove staging resolv.conf {:?}: {}", staging, e);
1096 }
1097
1098 info!("Bind-mounted resolv.conf for bridge networking (rootfs mode, staging)");
1099 Ok(())
1100 }
1101}
1102
1103impl Drop for BridgeNetwork {
1104 fn drop(&mut self) {
1105 self.cleanup_best_effort();
1106 }
1107}
1108
1109struct SetupRollback {
1110 veth_host: String,
1111 subnet: String,
1112 veth_created: bool,
1113 nat_added: bool,
1114 port_forwards: Vec<(String, PortForward)>,
1115 prev_ip_forward: Option<String>,
1116 reserved_ip: Option<(std::path::PathBuf, String)>,
1117 armed: bool,
1118}
1119
1120impl SetupRollback {
1121 fn new(
1122 veth_host: String,
1123 subnet: String,
1124 reserved_ip: Option<(std::path::PathBuf, String)>,
1125 ) -> Self {
1126 Self {
1127 veth_host,
1128 subnet,
1129 veth_created: false,
1130 nat_added: false,
1131 port_forwards: Vec::new(),
1132 prev_ip_forward: None,
1133 reserved_ip,
1134 armed: true,
1135 }
1136 }
1137
1138 fn disarm(&mut self) {
1139 self.armed = false;
1140 }
1141}
1142
1143impl Drop for SetupRollback {
1144 fn drop(&mut self) {
1145 if !self.armed {
1146 return;
1147 }
1148
1149 for (container_ip, pf) in self.port_forwards.iter().rev() {
1150 for chain in ["OUTPUT", "PREROUTING"] {
1151 let args = BridgeNetwork::port_forward_rule_args("-D", chain, container_ip, pf);
1152 if let Err(e) = BridgeNetwork::run_cmd_owned("iptables", &args) {
1153 warn!(
1154 "Rollback: failed to remove iptables {} rule for {}: {}",
1155 chain, container_ip, e
1156 );
1157 }
1158 }
1159 }
1160
1161 if self.nat_added {
1162 if let Err(e) = BridgeNetwork::run_cmd(
1163 "iptables",
1164 &[
1165 "-t",
1166 "nat",
1167 "-D",
1168 "POSTROUTING",
1169 "-s",
1170 &self.subnet,
1171 "-j",
1172 "MASQUERADE",
1173 ],
1174 ) {
1175 warn!("Rollback: failed to remove NAT rule: {}", e);
1176 }
1177 }
1178
1179 if self.veth_created {
1180 if let Err(e) = BridgeNetwork::run_cmd("ip", &["link", "del", &self.veth_host]) {
1181 warn!("Rollback: failed to delete veth {}: {}", self.veth_host, e);
1182 }
1183 }
1184
1185 if let Some((alloc_dir, container_id)) = &self.reserved_ip {
1186 BridgeNetwork::release_allocated_ip_in_dir(alloc_dir, container_id);
1187 }
1188 }
1189}
1190
1191#[cfg(test)]
1192mod tests {
1193 use super::*;
1194
1195 #[test]
1196 fn test_ip_allocation_rejection_sampling_range() {
1197 for byte in 0u8..253 {
1200 let offset = byte as u32 + 2;
1201 assert!(
1202 (2..=254).contains(&offset),
1203 "offset {} out of range",
1204 offset
1205 );
1206 }
1207 for byte in [253u8, 254, 255] {
1209 assert!(byte >= 253);
1210 }
1211 }
1212
1213 #[test]
1214 fn test_reserve_ip_blocks_duplicate_requested_address() {
1215 let temp = tempfile::tempdir().unwrap();
1216 BridgeNetwork::record_allocated_ip_in_dir(temp.path(), "one", "10.0.42.2").unwrap();
1217
1218 let err =
1219 BridgeNetwork::reserve_ip_in_dir(temp.path(), "two", "10.0.42.0/24", Some("10.0.42.2"))
1220 .unwrap_err();
1221 assert!(
1222 err.to_string().contains("already in use"),
1223 "second reservation of the same IP must fail"
1224 );
1225 }
1226
1227 #[test]
1228 fn test_setup_rollback_releases_reserved_ip() {
1229 let temp = tempfile::tempdir().unwrap();
1230 BridgeNetwork::record_allocated_ip_in_dir(temp.path(), "rollback", "10.0.42.3").unwrap();
1231
1232 let rollback = SetupRollback {
1233 veth_host: "veth-test".to_string(),
1234 subnet: "10.0.42.0/24".to_string(),
1235 veth_created: false,
1236 nat_added: false,
1237 port_forwards: Vec::new(),
1238 prev_ip_forward: None,
1239 reserved_ip: Some((temp.path().to_path_buf(), "rollback".to_string())),
1240 armed: true,
1241 };
1242
1243 drop(rollback);
1244
1245 assert!(
1246 !temp.path().join("rollback.ip").exists(),
1247 "rollback must release reserved IP files on setup failure"
1248 );
1249 }
1250
1251 #[test]
1252 fn test_port_forward_rules_include_output_chain_for_local_host_clients() {
1253 let pf = PortForward {
1254 host_ip: None,
1255 host_port: 8080,
1256 container_port: 80,
1257 protocol: crate::network::config::Protocol::Tcp,
1258 };
1259
1260 let prerouting =
1261 BridgeNetwork::port_forward_rule_args("-A", "PREROUTING", "10.0.42.2", &pf);
1262 let output = BridgeNetwork::port_forward_rule_args("-A", "OUTPUT", "10.0.42.2", &pf);
1263
1264 assert!(prerouting.iter().any(|arg| arg == "PREROUTING"));
1265 assert!(output.iter().any(|arg| arg == "OUTPUT"));
1266 assert!(
1267 output
1268 .windows(2)
1269 .any(|pair| pair[0] == "--dst-type" && pair[1] == "LOCAL"),
1270 "OUTPUT rule must target local-destination traffic"
1271 );
1272 }
1273
1274 #[test]
1275 fn test_port_forward_rules_include_host_ip_when_configured() {
1276 let pf = PortForward {
1277 host_ip: Some(std::net::Ipv4Addr::new(127, 0, 0, 1)),
1278 host_port: 4173,
1279 container_port: 4173,
1280 protocol: crate::network::config::Protocol::Tcp,
1281 };
1282
1283 let prerouting =
1284 BridgeNetwork::port_forward_rule_args("-A", "PREROUTING", "10.0.42.2", &pf);
1285 let output = BridgeNetwork::port_forward_rule_args("-A", "OUTPUT", "10.0.42.2", &pf);
1286
1287 for args in [&prerouting, &output] {
1288 assert!(
1289 args.windows(2)
1290 .any(|pair| pair[0] == "-d" && pair[1] == "127.0.0.1"),
1291 "port forward must restrict DNAT rules to the configured host IP"
1292 );
1293 }
1294 }
1295}