1use super::{
2 egress, BridgeConfig, BridgeNetwork, EgressPolicy, NatBackend, NetworkState, PortForward,
3};
4use crate::error::{NucleusError, Result, StateTransition};
5use nix::fcntl::{fcntl, FcntlArg, FdFlag};
6use serde_json::json;
7use std::io::{Read, Write};
8use std::os::fd::{AsRawFd, OwnedFd};
9use std::os::unix::net::UnixStream;
10use std::os::unix::process::ExitStatusExt;
11use std::path::{Path, PathBuf};
12use std::process::{Child, Command};
13use std::time::{Duration, Instant};
14use tracing::{debug, info, warn};
15
16const SLIRP_TAP_NAME: &str = "tap0";
17
18pub enum BridgeDriver {
20 Kernel(BridgeNetwork),
21 Userspace(UserspaceNetwork),
22}
23
24impl BridgeDriver {
25 pub fn setup_with_id(
26 pid: u32,
27 config: &BridgeConfig,
28 container_id: &str,
29 host_is_root: bool,
30 rootless: bool,
31 ) -> Result<Self> {
32 match config.selected_nat_backend(host_is_root, rootless) {
33 NatBackend::Kernel => Ok(Self::Kernel(BridgeNetwork::setup_with_id(
34 pid,
35 config,
36 container_id,
37 )?)),
38 NatBackend::Userspace => Ok(Self::Userspace(UserspaceNetwork::setup_with_id(
39 pid,
40 config,
41 container_id,
42 host_is_root,
43 rootless,
44 )?)),
45 NatBackend::Auto => Err(NucleusError::NetworkError(
46 "nat backend selection resolved to auto unexpectedly".to_string(),
47 )),
48 }
49 }
50
51 pub fn apply_egress_policy(
52 &self,
53 pid: u32,
54 policy: &EgressPolicy,
55 rootless: bool,
56 ) -> Result<()> {
57 match self {
58 Self::Kernel(net) => net.apply_egress_policy(pid, policy),
59 Self::Userspace(net) => net.apply_egress_policy(pid, policy, rootless),
60 }
61 }
62
63 pub fn cleanup(self) -> Result<()> {
64 match self {
65 Self::Kernel(net) => net.cleanup(),
66 Self::Userspace(net) => net.cleanup(),
67 }
68 }
69}
70
71pub struct UserspaceNetwork {
73 config: BridgeConfig,
74 guest_ip: String,
75 container_id: String,
76 api_socket_path: PathBuf,
77 runtime_dir: PathBuf,
78 exit_signal: Option<OwnedFd>,
79 child: Child,
80 state: NetworkState,
81}
82
83impl UserspaceNetwork {
84 pub(crate) fn default_dns_server(subnet: &str) -> Result<String> {
85 Self::dns_ip_from_subnet(subnet)
86 }
87
88 pub fn setup_with_id(
89 pid: u32,
90 config: &BridgeConfig,
91 container_id: &str,
92 host_is_root: bool,
93 rootless: bool,
94 ) -> Result<Self> {
95 config.validate()?;
96
97 let guest_ip = Self::guest_ip_from_subnet(&config.subnet)?;
98 Self::validate_userspace_config(config, &guest_ip)?;
99
100 let mut state = NetworkState::Unconfigured;
101 state = state.transition(NetworkState::Configuring)?;
102
103 let runtime_dir = Self::runtime_dir(container_id);
104 Self::ensure_runtime_dir(&runtime_dir)?;
105 let api_socket_path = runtime_dir.join("slirp4netns.sock");
106
107 let slirp = BridgeNetwork::resolve_bin("slirp4netns")?;
108 let needs_userns = Self::should_join_userns(host_is_root, rootless);
114
115 let slirp_path = Path::new(&slirp);
116 let (child, exit_write) = match Self::spawn_slirp(
117 slirp_path,
118 pid,
119 config,
120 needs_userns,
121 &api_socket_path,
122 true,
123 ) {
124 Ok(result) => result,
125 Err(e) => {
126 warn!(
127 "slirp4netns sandbox failed ({}), retrying without --enable-sandbox",
128 e
129 );
130 let _ = std::fs::remove_file(&api_socket_path);
136 Self::spawn_slirp(
137 slirp_path,
138 pid,
139 config,
140 needs_userns,
141 &api_socket_path,
142 false,
143 )
144 .map_err(|retry_err| {
145 let _ = std::fs::remove_dir_all(&runtime_dir);
146 retry_err
147 })?
148 }
149 };
150
151 let mut network = Self {
152 config: config.clone(),
153 guest_ip: guest_ip.to_string(),
154 container_id: container_id.to_string(),
155 api_socket_path,
156 runtime_dir,
157 exit_signal: Some(exit_write),
158 child,
159 state,
160 };
161
162 if let Err(e) = network.configure_port_forwards() {
163 network.cleanup_best_effort();
164 return Err(e);
165 }
166
167 network.state = network.state.transition(NetworkState::Active)?;
168
169 info!(
170 "Userspace NAT configured via slirp4netns for container {} (guest IP {})",
171 network.container_id, network.guest_ip
172 );
173
174 Ok(network)
175 }
176
177 pub fn apply_egress_policy(
178 &self,
179 pid: u32,
180 policy: &EgressPolicy,
181 rootless: bool,
182 ) -> Result<()> {
183 egress::apply_egress_policy(pid, &self.effective_dns_servers(), policy, rootless)
184 }
185
186 pub fn cleanup(mut self) -> Result<()> {
187 self.state = self.state.transition(NetworkState::Cleaned)?;
188 self.stop_child()?;
189 self.cleanup_runtime_dir();
190 Ok(())
191 }
192
193 fn effective_dns_servers(&self) -> Vec<String> {
194 if self.config.dns.is_empty() {
195 vec![Self::dns_ip_from_subnet(&self.config.subnet)
196 .unwrap_or_else(|_| "10.0.2.3".to_string())]
197 } else {
198 self.config.dns.clone()
199 }
200 }
201
202 fn configure_port_forwards(&mut self) -> Result<()> {
203 for pf in &self.config.port_forwards {
204 self.add_port_forward(pf)?;
205 }
206 Ok(())
207 }
208
209 fn add_port_forward(&self, pf: &PortForward) -> Result<()> {
210 let mut arguments = serde_json::Map::new();
211 arguments.insert("proto".to_string(), json!(pf.protocol.as_str()));
212 arguments.insert("host_port".to_string(), json!(pf.host_port));
213 arguments.insert("guest_port".to_string(), json!(pf.container_port));
214 if let Some(host_ip) = pf.host_ip {
215 arguments.insert("host_addr".to_string(), json!(host_ip.to_string()));
216 }
217
218 let response = Self::api_request(
219 &self.api_socket_path,
220 &json!({
221 "execute": "add_hostfwd",
222 "arguments": arguments,
223 }),
224 )?;
225
226 if let Some(error) = response.get("error") {
227 return Err(NucleusError::NetworkError(format!(
228 "slirp4netns add_hostfwd failed for {}:{}->{}/{}: {}",
229 pf.host_ip
230 .map(|ip| ip.to_string())
231 .unwrap_or_else(|| "0.0.0.0".to_string()),
232 pf.host_port,
233 pf.container_port,
234 pf.protocol,
235 error
236 )));
237 }
238
239 debug!(
240 "Configured slirp4netns port forward {}:{} -> {}:{}/{}",
241 pf.host_ip
242 .map(|ip| ip.to_string())
243 .unwrap_or_else(|| "0.0.0.0".to_string()),
244 pf.host_port,
245 self.guest_ip,
246 pf.container_port,
247 pf.protocol
248 );
249 Ok(())
250 }
251
252 fn api_request(socket_path: &Path, request: &serde_json::Value) -> Result<serde_json::Value> {
253 let mut stream = UnixStream::connect(socket_path).map_err(|e| {
254 NucleusError::NetworkError(format!(
255 "connect slirp4netns API socket {:?}: {}",
256 socket_path, e
257 ))
258 })?;
259 let payload = serde_json::to_vec(request).map_err(|e| {
260 NucleusError::NetworkError(format!("serialize slirp4netns API request: {}", e))
261 })?;
262 stream.write_all(&payload).map_err(|e| {
263 NucleusError::NetworkError(format!("write slirp4netns API request: {}", e))
264 })?;
265 stream
266 .shutdown(std::net::Shutdown::Write)
267 .map_err(|e| NucleusError::NetworkError(format!("shutdown slirp4netns API: {}", e)))?;
268
269 let mut buf = Vec::new();
270 stream.read_to_end(&mut buf).map_err(|e| {
271 NucleusError::NetworkError(format!("read slirp4netns API response: {}", e))
272 })?;
273
274 serde_json::from_slice(&buf).map_err(|e| {
275 NucleusError::NetworkError(format!(
276 "parse slirp4netns API response '{}': {}",
277 String::from_utf8_lossy(&buf),
278 e
279 ))
280 })
281 }
282
283 fn wait_until_ready(child: &mut Child, ready_read: OwnedFd) -> Result<()> {
284 let mut ready = std::fs::File::from(ready_read);
285 let mut buf = [0u8; 1];
286 match ready.read_exact(&mut buf) {
287 Ok(()) if buf == [b'1'] => Ok(()),
288 Ok(()) => Err(NucleusError::NetworkError(format!(
289 "slirp4netns ready-fd returned unexpected byte {:?}",
290 buf
291 ))),
292 Err(e) => {
293 if let Ok(Some(status)) = child.try_wait() {
294 let detail = status
295 .code()
296 .map(|code| format!("exit code {}", code))
297 .or_else(|| status.signal().map(|sig| format!("signal {}", sig)))
298 .unwrap_or_else(|| "unknown status".to_string());
299 Err(NucleusError::NetworkError(format!(
300 "slirp4netns exited before ready: {}",
301 detail
302 )))
303 } else {
304 Err(NucleusError::NetworkError(format!(
305 "failed waiting for slirp4netns readiness: {}",
306 e
307 )))
308 }
309 }
310 }
311 }
312
313 fn stop_child(&mut self) -> Result<()> {
314 self.exit_signal.take();
315
316 let deadline = Instant::now() + Duration::from_secs(2);
317 loop {
318 match self.child.try_wait() {
319 Ok(Some(_)) => break,
320 Ok(None) if Instant::now() < deadline => {
321 std::thread::sleep(Duration::from_millis(50))
322 }
323 Ok(None) => {
324 self.child.kill().map_err(|e| {
325 NucleusError::NetworkError(format!("kill slirp4netns: {}", e))
326 })?;
327 let _ = self.child.wait();
328 break;
329 }
330 Err(e) => {
331 return Err(NucleusError::NetworkError(format!(
332 "wait for slirp4netns shutdown: {}",
333 e
334 )))
335 }
336 }
337 }
338
339 info!(
340 "Userspace NAT cleaned up for container {}",
341 self.container_id
342 );
343 Ok(())
344 }
345
346 fn cleanup_best_effort(&mut self) {
347 if self.state == NetworkState::Cleaned {
348 return;
349 }
350
351 self.exit_signal.take();
352
353 if let Ok(None) = self.child.try_wait() {
354 let deadline = Instant::now() + Duration::from_secs(1);
355 while Instant::now() < deadline {
356 match self.child.try_wait() {
357 Ok(Some(_)) => break,
358 Ok(None) => std::thread::sleep(Duration::from_millis(25)),
359 Err(_) => break,
360 }
361 }
362
363 if let Ok(None) = self.child.try_wait() {
364 let _ = self.child.kill();
365 let _ = self.child.wait();
366 }
367 }
368
369 self.cleanup_runtime_dir();
370 self.state = NetworkState::Cleaned;
371 debug!(
372 "Userspace NAT cleaned up (best-effort via drop) for container {}",
373 self.container_id
374 );
375 }
376
377 fn cleanup_runtime_dir(&self) {
378 if let Err(e) = std::fs::remove_dir_all(&self.runtime_dir) {
379 if self.runtime_dir.exists() {
380 warn!(
381 "Failed to remove slirp4netns runtime dir {:?}: {}",
382 self.runtime_dir, e
383 );
384 }
385 }
386 }
387
388 fn validate_userspace_config(config: &BridgeConfig, guest_ip: &str) -> Result<()> {
389 let prefix = config
390 .subnet
391 .split_once('/')
392 .and_then(|(_, prefix)| prefix.parse::<u8>().ok())
393 .unwrap_or(24);
394 if prefix > 25 {
395 return Err(NucleusError::NetworkError(format!(
396 "Userspace NAT requires a subnet with at least 128 addresses; '{}' is too small",
397 config.subnet
398 )));
399 }
400
401 if let Some(requested_ip) = config.container_ip.as_deref() {
402 if requested_ip != guest_ip {
403 return Err(NucleusError::NetworkError(format!(
404 "Userspace NAT uses the slirp4netns guest address {}; requested container IP {} is unsupported",
405 guest_ip, requested_ip
406 )));
407 }
408 }
409
410 Ok(())
411 }
412
413 fn should_join_userns(_host_is_root: bool, rootless: bool) -> bool {
414 rootless
415 }
416
417 fn spawn_slirp(
418 slirp_bin: &Path,
419 pid: u32,
420 config: &BridgeConfig,
421 needs_userns: bool,
422 api_socket_path: &Path,
423 enable_sandbox: bool,
424 ) -> Result<(Child, OwnedFd)> {
425 let (ready_read, ready_write) = nix::unistd::pipe()
426 .map_err(|e| NucleusError::NetworkError(format!("ready pipe: {}", e)))?;
427 let (exit_read, exit_write) = nix::unistd::pipe()
428 .map_err(|e| NucleusError::NetworkError(format!("exit pipe: {}", e)))?;
429 Self::clear_cloexec(&ready_write)?;
430 Self::clear_cloexec(&exit_read)?;
431
432 let args = Self::command_args(
433 pid,
434 config,
435 needs_userns,
436 api_socket_path,
437 ready_write.as_raw_fd(),
438 exit_read.as_raw_fd(),
439 enable_sandbox,
440 );
441
442 let mut child = Command::new(slirp_bin)
443 .args(&args)
444 .spawn()
445 .map_err(|e| NucleusError::NetworkError(format!("spawn slirp4netns: {}", e)))?;
446
447 drop(ready_write);
448 drop(exit_read);
449
450 match Self::wait_until_ready(&mut child, ready_read) {
451 Ok(()) => Ok((child, exit_write)),
452 Err(e) => {
453 let _ = child.kill();
454 let _ = child.wait();
455 Err(e)
456 }
457 }
458 }
459
460 fn command_args(
461 pid: u32,
462 config: &BridgeConfig,
463 join_userns: bool,
464 api_socket_path: &Path,
465 ready_fd: i32,
466 exit_fd: i32,
467 enable_sandbox: bool,
468 ) -> Vec<String> {
469 let mut args = vec![
470 "--configure".to_string(),
471 "--ready-fd".to_string(),
472 ready_fd.to_string(),
473 "--exit-fd".to_string(),
474 exit_fd.to_string(),
475 "--api-socket".to_string(),
476 api_socket_path.display().to_string(),
477 "--cidr".to_string(),
478 config.subnet.clone(),
479 "--disable-host-loopback".to_string(),
480 ];
481
482 if enable_sandbox {
483 args.push("--enable-sandbox".to_string());
484 }
485
486 if !config.dns.is_empty() {
487 args.push("--disable-dns".to_string());
488 }
489
490 if join_userns {
491 args.push("--userns-path".to_string());
492 args.push(format!("/proc/{}/ns/user", pid));
493 }
494
495 args.push(pid.to_string());
496 args.push(SLIRP_TAP_NAME.to_string());
497 args
498 }
499
500 fn runtime_dir(container_id: &str) -> PathBuf {
501 let base = if nix::unistd::Uid::effective().is_root() {
502 PathBuf::from("/run/nucleus/userspace-net")
503 } else {
504 dirs::runtime_dir()
505 .map(|dir| dir.join("nucleus/userspace-net"))
506 .or_else(|| dirs::data_local_dir().map(|dir| dir.join("nucleus/userspace-net")))
507 .unwrap_or_else(|| std::env::temp_dir().join("nucleus-userspace-net"))
508 };
509 base.join(container_id)
510 }
511
512 fn ensure_runtime_dir(path: &Path) -> Result<()> {
513 if let Some(parent) = path.parent() {
514 std::fs::create_dir_all(parent).map_err(|e| {
515 NucleusError::NetworkError(format!(
516 "create userspace-net parent dir {:?}: {}",
517 parent, e
518 ))
519 })?;
520 }
521 std::fs::create_dir_all(path).map_err(|e| {
522 NucleusError::NetworkError(format!("create userspace-net dir {:?}: {}", path, e))
523 })?;
524 use std::os::unix::fs::PermissionsExt;
525 std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o700)).map_err(|e| {
526 NucleusError::NetworkError(format!(
527 "secure userspace-net dir permissions for {:?}: {}",
528 path, e
529 ))
530 })?;
531 Ok(())
532 }
533
534 fn clear_cloexec(fd: &OwnedFd) -> Result<()> {
535 let flags = fcntl(fd, FcntlArg::F_GETFD).map_err(|e| {
536 NucleusError::NetworkError(format!("fcntl(F_GETFD) on fd {}: {}", fd.as_raw_fd(), e))
537 })?;
538 let fd_flags = FdFlag::from_bits_truncate(flags);
539 let new_flags = fd_flags & !FdFlag::FD_CLOEXEC;
540 fcntl(fd, FcntlArg::F_SETFD(new_flags)).map_err(|e| {
541 NucleusError::NetworkError(format!("fcntl(F_SETFD) on fd {}: {}", fd.as_raw_fd(), e))
542 })?;
543 Ok(())
544 }
545
546 fn guest_ip_from_subnet(subnet: &str) -> Result<String> {
547 Self::offset_ip_from_subnet(subnet, 100).map(|ip| ip.to_string())
548 }
549
550 fn dns_ip_from_subnet(subnet: &str) -> Result<String> {
551 Self::offset_ip_from_subnet(subnet, 3).map(|ip| ip.to_string())
552 }
553
554 fn offset_ip_from_subnet(subnet: &str, offset: u32) -> Result<std::net::Ipv4Addr> {
555 let (base, prefix) = subnet.split_once('/').ok_or_else(|| {
556 NucleusError::NetworkError(format!("Invalid CIDR (missing /prefix): '{}'", subnet))
557 })?;
558 let prefix = prefix.parse::<u8>().map_err(|e| {
559 NucleusError::NetworkError(format!("Invalid CIDR prefix '{}': {}", subnet, e))
560 })?;
561 let base_ip = base.parse::<std::net::Ipv4Addr>().map_err(|e| {
562 NucleusError::NetworkError(format!("Invalid CIDR base '{}': {}", subnet, e))
563 })?;
564
565 let host_capacity = if prefix == 32 {
566 1u64
567 } else {
568 1u64 << (32 - prefix)
569 };
570 if offset as u64 >= host_capacity {
571 return Err(NucleusError::NetworkError(format!(
572 "CIDR '{}' does not have room for host offset {}",
573 subnet, offset
574 )));
575 }
576
577 let candidate = u32::from(base_ip)
578 .checked_add(offset)
579 .ok_or_else(|| NucleusError::NetworkError(format!("CIDR '{}' overflowed", subnet)))?;
580 Ok(std::net::Ipv4Addr::from(candidate))
581 }
582}
583
584impl Drop for UserspaceNetwork {
585 fn drop(&mut self) {
586 self.cleanup_best_effort();
587 }
588}
589
590#[cfg(test)]
591mod tests {
592 use super::*;
593 use crate::network::Protocol;
594
595 #[test]
596 fn test_auto_nat_backend_prefers_kernel_for_rootful_hosts() {
597 let cfg = BridgeConfig::default();
598 assert_eq!(cfg.selected_nat_backend(true, false), NatBackend::Kernel);
599 assert_eq!(cfg.selected_nat_backend(true, true), NatBackend::Userspace);
600 assert_eq!(cfg.selected_nat_backend(false, true), NatBackend::Userspace);
601 }
602
603 #[test]
604 fn test_userspace_backend_rejects_too_small_subnets() {
605 let cfg = BridgeConfig {
606 subnet: "10.0.42.0/26".to_string(),
607 ..BridgeConfig::default()
608 };
609
610 let guest_ip = UserspaceNetwork::guest_ip_from_subnet(&cfg.subnet).unwrap_err();
611 assert!(
612 guest_ip.to_string().contains("does not have room"),
613 "unexpected error: {guest_ip}"
614 );
615 }
616
617 #[test]
618 fn test_userspace_backend_rejects_custom_guest_ip() {
619 let cfg = BridgeConfig {
620 container_ip: Some("10.0.42.2".to_string()),
621 ..BridgeConfig::default()
622 };
623
624 let err = UserspaceNetwork::validate_userspace_config(&cfg, "10.0.42.100").unwrap_err();
625 assert!(err
626 .to_string()
627 .contains("requested container IP 10.0.42.2 is unsupported"));
628 }
629
630 #[test]
631 fn test_slirp_command_args_disable_builtin_dns_when_explicit_dns_is_set() {
632 let cfg = BridgeConfig::default().with_dns(vec!["1.1.1.1".to_string()]);
633 let args = UserspaceNetwork::command_args(
634 4242,
635 &cfg,
636 true,
637 Path::new("/tmp/slirp.sock"),
638 5,
639 6,
640 true,
641 );
642
643 assert!(args.iter().any(|arg| arg == "--disable-dns"));
644 assert!(args.iter().any(|arg| arg == "--userns-path"));
645 }
646
647 #[test]
648 fn test_slirp_userns_join_is_kept_for_root_remapped_hosts() {
649 assert!(UserspaceNetwork::should_join_userns(true, true));
650 assert!(UserspaceNetwork::should_join_userns(false, true));
651 assert!(!UserspaceNetwork::should_join_userns(true, false));
652 assert!(!UserspaceNetwork::should_join_userns(false, false));
653 }
654
655 #[test]
656 fn test_slirp_command_args_keep_userns_without_sandbox() {
657 let cfg = BridgeConfig::default();
658 let args = UserspaceNetwork::command_args(
659 4242,
660 &cfg,
661 true,
662 Path::new("/tmp/slirp.sock"),
663 5,
664 6,
665 false,
666 );
667
668 assert!(!args.iter().any(|arg| arg == "--enable-sandbox"));
669 assert!(args.iter().any(|arg| arg == "--userns-path"));
670 assert!(args.iter().any(|arg| arg == "/proc/4242/ns/user"));
671 }
672
673 #[test]
674 fn test_userspace_port_forward_request_uses_slirp_hostfwd_shape() {
675 let pf = PortForward {
676 host_ip: Some(std::net::Ipv4Addr::new(127, 0, 0, 1)),
677 host_port: 8080,
678 container_port: 80,
679 protocol: Protocol::Tcp,
680 };
681
682 let mut arguments = serde_json::Map::new();
683 arguments.insert("proto".to_string(), json!(pf.protocol.as_str()));
684 arguments.insert("host_port".to_string(), json!(pf.host_port));
685 arguments.insert("guest_port".to_string(), json!(pf.container_port));
686 if let Some(host_ip) = pf.host_ip {
687 arguments.insert("host_addr".to_string(), json!(host_ip.to_string()));
688 }
689 let request = json!({
690 "execute": "add_hostfwd",
691 "arguments": arguments,
692 });
693
694 assert_eq!(request["execute"], "add_hostfwd");
695 assert_eq!(request["arguments"]["proto"], "tcp");
696 assert_eq!(request["arguments"]["host_addr"], "127.0.0.1");
697 assert_eq!(request["arguments"]["host_port"], 8080);
698 assert_eq!(request["arguments"]["guest_port"], 80);
699 }
700}