1use super::{
2 egress, BridgeConfig, BridgeNetwork, EgressPolicy, NatBackend, NetworkState, PortForward,
3};
4use crate::error::{NucleusError, Result, StateTransition};
5use nix::fcntl::{fcntl, FcntlArg, FdFlag};
6use serde_json::json;
7use std::io::{Read, Write};
8use std::os::fd::{AsRawFd, OwnedFd};
9use std::os::unix::net::UnixStream;
10use std::os::unix::process::ExitStatusExt;
11use std::path::{Path, PathBuf};
12use std::process::{Child, Command};
13use std::time::{Duration, Instant};
14use tracing::{debug, info, warn};
15
16const SLIRP_TAP_NAME: &str = "tap0";
17
18pub enum BridgeDriver {
20 Kernel(BridgeNetwork),
21 Userspace(UserspaceNetwork),
22}
23
24impl BridgeDriver {
25 pub fn setup_with_id(
26 pid: u32,
27 config: &BridgeConfig,
28 container_id: &str,
29 host_is_root: bool,
30 rootless: bool,
31 ) -> Result<Self> {
32 match config.selected_nat_backend(host_is_root, rootless) {
33 NatBackend::Kernel => Ok(Self::Kernel(BridgeNetwork::setup_with_id(
34 pid,
35 config,
36 container_id,
37 )?)),
38 NatBackend::Userspace => Ok(Self::Userspace(UserspaceNetwork::setup_with_id(
39 pid,
40 config,
41 container_id,
42 host_is_root,
43 rootless,
44 )?)),
45 NatBackend::Auto => Err(NucleusError::NetworkError(
46 "nat backend selection resolved to auto unexpectedly".to_string(),
47 )),
48 }
49 }
50
51 pub fn apply_egress_policy(
52 &self,
53 pid: u32,
54 policy: &EgressPolicy,
55 rootless: bool,
56 ) -> Result<()> {
57 match self {
58 Self::Kernel(net) => net.apply_egress_policy(pid, policy),
59 Self::Userspace(net) => net.apply_egress_policy(pid, policy, rootless),
60 }
61 }
62
63 pub fn cleanup(self) -> Result<()> {
64 match self {
65 Self::Kernel(net) => net.cleanup(),
66 Self::Userspace(net) => net.cleanup(),
67 }
68 }
69}
70
71pub struct UserspaceNetwork {
73 config: BridgeConfig,
74 guest_ip: String,
75 container_id: String,
76 api_socket_path: PathBuf,
77 runtime_dir: PathBuf,
78 exit_signal: Option<OwnedFd>,
79 child: Child,
80 state: NetworkState,
81}
82
83impl UserspaceNetwork {
84 pub(crate) fn default_dns_server(subnet: &str) -> Result<String> {
85 Self::dns_ip_from_subnet(subnet)
86 }
87
88 pub fn setup_with_id(
89 pid: u32,
90 config: &BridgeConfig,
91 container_id: &str,
92 host_is_root: bool,
93 rootless: bool,
94 ) -> Result<Self> {
95 config.validate()?;
96
97 let guest_ip = Self::guest_ip_from_subnet(&config.subnet)?;
98 Self::validate_userspace_config(config, &guest_ip)?;
99
100 let mut state = NetworkState::Unconfigured;
101 state = state.transition(NetworkState::Configuring)?;
102
103 let runtime_dir = Self::runtime_dir(container_id);
104 Self::ensure_runtime_dir(&runtime_dir)?;
105 let api_socket_path = runtime_dir.join("slirp4netns.sock");
106
107 let slirp = BridgeNetwork::resolve_bin("slirp4netns")?;
108 let needs_userns = rootless && !host_is_root;
115
116 let slirp_path = Path::new(&slirp);
117 let (child, exit_write) = match Self::spawn_slirp(
118 slirp_path,
119 pid,
120 config,
121 needs_userns,
122 &api_socket_path,
123 true,
124 ) {
125 Ok(result) => result,
126 Err(e) => {
127 warn!(
128 "slirp4netns sandbox failed ({}), retrying without --enable-sandbox",
129 e
130 );
131 let _ = std::fs::remove_file(&api_socket_path);
137 Self::spawn_slirp(
138 slirp_path,
139 pid,
140 config,
141 needs_userns,
142 &api_socket_path,
143 false,
144 )
145 .map_err(|retry_err| {
146 let _ = std::fs::remove_dir_all(&runtime_dir);
147 retry_err
148 })?
149 }
150 };
151
152 let mut network = Self {
153 config: config.clone(),
154 guest_ip: guest_ip.to_string(),
155 container_id: container_id.to_string(),
156 api_socket_path,
157 runtime_dir,
158 exit_signal: Some(exit_write),
159 child,
160 state,
161 };
162
163 if let Err(e) = network.configure_port_forwards() {
164 network.cleanup_best_effort();
165 return Err(e);
166 }
167
168 network.state = network.state.transition(NetworkState::Active)?;
169
170 info!(
171 "Userspace NAT configured via slirp4netns for container {} (guest IP {})",
172 network.container_id, network.guest_ip
173 );
174
175 Ok(network)
176 }
177
178 pub fn apply_egress_policy(
179 &self,
180 pid: u32,
181 policy: &EgressPolicy,
182 rootless: bool,
183 ) -> Result<()> {
184 egress::apply_egress_policy(pid, &self.effective_dns_servers(), policy, rootless)
185 }
186
187 pub fn cleanup(mut self) -> Result<()> {
188 self.state = self.state.transition(NetworkState::Cleaned)?;
189 self.stop_child()?;
190 self.cleanup_runtime_dir();
191 Ok(())
192 }
193
194 fn effective_dns_servers(&self) -> Vec<String> {
195 if self.config.dns.is_empty() {
196 vec![Self::dns_ip_from_subnet(&self.config.subnet)
197 .unwrap_or_else(|_| "10.0.2.3".to_string())]
198 } else {
199 self.config.dns.clone()
200 }
201 }
202
203 fn configure_port_forwards(&mut self) -> Result<()> {
204 for pf in &self.config.port_forwards {
205 self.add_port_forward(pf)?;
206 }
207 Ok(())
208 }
209
210 fn add_port_forward(&self, pf: &PortForward) -> Result<()> {
211 let mut arguments = serde_json::Map::new();
212 arguments.insert("proto".to_string(), json!(pf.protocol.as_str()));
213 arguments.insert("host_port".to_string(), json!(pf.host_port));
214 arguments.insert("guest_port".to_string(), json!(pf.container_port));
215 if let Some(host_ip) = pf.host_ip {
216 arguments.insert("host_addr".to_string(), json!(host_ip.to_string()));
217 }
218
219 let response = Self::api_request(
220 &self.api_socket_path,
221 &json!({
222 "execute": "add_hostfwd",
223 "arguments": arguments,
224 }),
225 )?;
226
227 if let Some(error) = response.get("error") {
228 return Err(NucleusError::NetworkError(format!(
229 "slirp4netns add_hostfwd failed for {}:{}->{}/{}: {}",
230 pf.host_ip
231 .map(|ip| ip.to_string())
232 .unwrap_or_else(|| "0.0.0.0".to_string()),
233 pf.host_port,
234 pf.container_port,
235 pf.protocol,
236 error
237 )));
238 }
239
240 debug!(
241 "Configured slirp4netns port forward {}:{} -> {}:{}/{}",
242 pf.host_ip
243 .map(|ip| ip.to_string())
244 .unwrap_or_else(|| "0.0.0.0".to_string()),
245 pf.host_port,
246 self.guest_ip,
247 pf.container_port,
248 pf.protocol
249 );
250 Ok(())
251 }
252
253 fn api_request(socket_path: &Path, request: &serde_json::Value) -> Result<serde_json::Value> {
254 let mut stream = UnixStream::connect(socket_path).map_err(|e| {
255 NucleusError::NetworkError(format!(
256 "connect slirp4netns API socket {:?}: {}",
257 socket_path, e
258 ))
259 })?;
260 let payload = serde_json::to_vec(request).map_err(|e| {
261 NucleusError::NetworkError(format!("serialize slirp4netns API request: {}", e))
262 })?;
263 stream.write_all(&payload).map_err(|e| {
264 NucleusError::NetworkError(format!("write slirp4netns API request: {}", e))
265 })?;
266 stream
267 .shutdown(std::net::Shutdown::Write)
268 .map_err(|e| NucleusError::NetworkError(format!("shutdown slirp4netns API: {}", e)))?;
269
270 let mut buf = Vec::new();
271 stream.read_to_end(&mut buf).map_err(|e| {
272 NucleusError::NetworkError(format!("read slirp4netns API response: {}", e))
273 })?;
274
275 serde_json::from_slice(&buf).map_err(|e| {
276 NucleusError::NetworkError(format!(
277 "parse slirp4netns API response '{}': {}",
278 String::from_utf8_lossy(&buf),
279 e
280 ))
281 })
282 }
283
284 fn wait_until_ready(child: &mut Child, ready_read: OwnedFd) -> Result<()> {
285 let mut ready = std::fs::File::from(ready_read);
286 let mut buf = [0u8; 1];
287 match ready.read_exact(&mut buf) {
288 Ok(()) if buf == [b'1'] => Ok(()),
289 Ok(()) => Err(NucleusError::NetworkError(format!(
290 "slirp4netns ready-fd returned unexpected byte {:?}",
291 buf
292 ))),
293 Err(e) => {
294 if let Ok(Some(status)) = child.try_wait() {
295 let detail = status
296 .code()
297 .map(|code| format!("exit code {}", code))
298 .or_else(|| status.signal().map(|sig| format!("signal {}", sig)))
299 .unwrap_or_else(|| "unknown status".to_string());
300 Err(NucleusError::NetworkError(format!(
301 "slirp4netns exited before ready: {}",
302 detail
303 )))
304 } else {
305 Err(NucleusError::NetworkError(format!(
306 "failed waiting for slirp4netns readiness: {}",
307 e
308 )))
309 }
310 }
311 }
312 }
313
314 fn stop_child(&mut self) -> Result<()> {
315 self.exit_signal.take();
316
317 let deadline = Instant::now() + Duration::from_secs(2);
318 loop {
319 match self.child.try_wait() {
320 Ok(Some(_)) => break,
321 Ok(None) if Instant::now() < deadline => {
322 std::thread::sleep(Duration::from_millis(50))
323 }
324 Ok(None) => {
325 self.child.kill().map_err(|e| {
326 NucleusError::NetworkError(format!("kill slirp4netns: {}", e))
327 })?;
328 let _ = self.child.wait();
329 break;
330 }
331 Err(e) => {
332 return Err(NucleusError::NetworkError(format!(
333 "wait for slirp4netns shutdown: {}",
334 e
335 )))
336 }
337 }
338 }
339
340 info!(
341 "Userspace NAT cleaned up for container {}",
342 self.container_id
343 );
344 Ok(())
345 }
346
347 fn cleanup_best_effort(&mut self) {
348 if self.state == NetworkState::Cleaned {
349 return;
350 }
351
352 self.exit_signal.take();
353
354 if let Ok(None) = self.child.try_wait() {
355 let deadline = Instant::now() + Duration::from_secs(1);
356 while Instant::now() < deadline {
357 match self.child.try_wait() {
358 Ok(Some(_)) => break,
359 Ok(None) => std::thread::sleep(Duration::from_millis(25)),
360 Err(_) => break,
361 }
362 }
363
364 if let Ok(None) = self.child.try_wait() {
365 let _ = self.child.kill();
366 let _ = self.child.wait();
367 }
368 }
369
370 self.cleanup_runtime_dir();
371 self.state = NetworkState::Cleaned;
372 debug!(
373 "Userspace NAT cleaned up (best-effort via drop) for container {}",
374 self.container_id
375 );
376 }
377
378 fn cleanup_runtime_dir(&self) {
379 if let Err(e) = std::fs::remove_dir_all(&self.runtime_dir) {
380 if self.runtime_dir.exists() {
381 warn!(
382 "Failed to remove slirp4netns runtime dir {:?}: {}",
383 self.runtime_dir, e
384 );
385 }
386 }
387 }
388
389 fn validate_userspace_config(config: &BridgeConfig, guest_ip: &str) -> Result<()> {
390 let prefix = config
391 .subnet
392 .split_once('/')
393 .and_then(|(_, prefix)| prefix.parse::<u8>().ok())
394 .unwrap_or(24);
395 if prefix > 25 {
396 return Err(NucleusError::NetworkError(format!(
397 "Userspace NAT requires a subnet with at least 128 addresses; '{}' is too small",
398 config.subnet
399 )));
400 }
401
402 if let Some(requested_ip) = config.container_ip.as_deref() {
403 if requested_ip != guest_ip {
404 return Err(NucleusError::NetworkError(format!(
405 "Userspace NAT uses the slirp4netns guest address {}; requested container IP {} is unsupported",
406 guest_ip, requested_ip
407 )));
408 }
409 }
410
411 Ok(())
412 }
413
414 fn spawn_slirp(
415 slirp_bin: &Path,
416 pid: u32,
417 config: &BridgeConfig,
418 needs_userns: bool,
419 api_socket_path: &Path,
420 enable_sandbox: bool,
421 ) -> Result<(Child, OwnedFd)> {
422 let (ready_read, ready_write) = nix::unistd::pipe()
423 .map_err(|e| NucleusError::NetworkError(format!("ready pipe: {}", e)))?;
424 let (exit_read, exit_write) = nix::unistd::pipe()
425 .map_err(|e| NucleusError::NetworkError(format!("exit pipe: {}", e)))?;
426 Self::clear_cloexec(&ready_write)?;
427 Self::clear_cloexec(&exit_read)?;
428
429 let args = Self::command_args(
430 pid,
431 config,
432 needs_userns,
433 api_socket_path,
434 ready_write.as_raw_fd(),
435 exit_read.as_raw_fd(),
436 enable_sandbox,
437 );
438
439 let mut child = Command::new(slirp_bin)
440 .args(&args)
441 .spawn()
442 .map_err(|e| NucleusError::NetworkError(format!("spawn slirp4netns: {}", e)))?;
443
444 drop(ready_write);
445 drop(exit_read);
446
447 match Self::wait_until_ready(&mut child, ready_read) {
448 Ok(()) => Ok((child, exit_write)),
449 Err(e) => {
450 let _ = child.kill();
451 let _ = child.wait();
452 Err(e)
453 }
454 }
455 }
456
457 fn command_args(
458 pid: u32,
459 config: &BridgeConfig,
460 join_userns: bool,
461 api_socket_path: &Path,
462 ready_fd: i32,
463 exit_fd: i32,
464 enable_sandbox: bool,
465 ) -> Vec<String> {
466 let mut args = vec![
467 "--configure".to_string(),
468 "--ready-fd".to_string(),
469 ready_fd.to_string(),
470 "--exit-fd".to_string(),
471 exit_fd.to_string(),
472 "--api-socket".to_string(),
473 api_socket_path.display().to_string(),
474 "--cidr".to_string(),
475 config.subnet.clone(),
476 "--disable-host-loopback".to_string(),
477 ];
478
479 if enable_sandbox {
480 args.push("--enable-sandbox".to_string());
481 }
482
483 if !config.dns.is_empty() {
484 args.push("--disable-dns".to_string());
485 }
486
487 if join_userns {
488 args.push("--userns-path".to_string());
489 args.push(format!("/proc/{}/ns/user", pid));
490 }
491
492 args.push(pid.to_string());
493 args.push(SLIRP_TAP_NAME.to_string());
494 args
495 }
496
497 fn runtime_dir(container_id: &str) -> PathBuf {
498 let base = if nix::unistd::Uid::effective().is_root() {
499 PathBuf::from("/run/nucleus/userspace-net")
500 } else {
501 dirs::runtime_dir()
502 .map(|dir| dir.join("nucleus/userspace-net"))
503 .or_else(|| dirs::data_local_dir().map(|dir| dir.join("nucleus/userspace-net")))
504 .unwrap_or_else(|| std::env::temp_dir().join("nucleus-userspace-net"))
505 };
506 base.join(container_id)
507 }
508
509 fn ensure_runtime_dir(path: &Path) -> Result<()> {
510 if let Some(parent) = path.parent() {
511 std::fs::create_dir_all(parent).map_err(|e| {
512 NucleusError::NetworkError(format!(
513 "create userspace-net parent dir {:?}: {}",
514 parent, e
515 ))
516 })?;
517 }
518 std::fs::create_dir_all(path).map_err(|e| {
519 NucleusError::NetworkError(format!("create userspace-net dir {:?}: {}", path, e))
520 })?;
521 use std::os::unix::fs::PermissionsExt;
522 std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o700)).map_err(|e| {
523 NucleusError::NetworkError(format!(
524 "secure userspace-net dir permissions for {:?}: {}",
525 path, e
526 ))
527 })?;
528 Ok(())
529 }
530
531 fn clear_cloexec(fd: &OwnedFd) -> Result<()> {
532 let flags = fcntl(fd, FcntlArg::F_GETFD).map_err(|e| {
533 NucleusError::NetworkError(format!("fcntl(F_GETFD) on fd {}: {}", fd.as_raw_fd(), e))
534 })?;
535 let fd_flags = FdFlag::from_bits_truncate(flags);
536 let new_flags = fd_flags & !FdFlag::FD_CLOEXEC;
537 fcntl(fd, FcntlArg::F_SETFD(new_flags)).map_err(|e| {
538 NucleusError::NetworkError(format!("fcntl(F_SETFD) on fd {}: {}", fd.as_raw_fd(), e))
539 })?;
540 Ok(())
541 }
542
543 fn guest_ip_from_subnet(subnet: &str) -> Result<String> {
544 Self::offset_ip_from_subnet(subnet, 100).map(|ip| ip.to_string())
545 }
546
547 fn dns_ip_from_subnet(subnet: &str) -> Result<String> {
548 Self::offset_ip_from_subnet(subnet, 3).map(|ip| ip.to_string())
549 }
550
551 fn offset_ip_from_subnet(subnet: &str, offset: u32) -> Result<std::net::Ipv4Addr> {
552 let (base, prefix) = subnet.split_once('/').ok_or_else(|| {
553 NucleusError::NetworkError(format!("Invalid CIDR (missing /prefix): '{}'", subnet))
554 })?;
555 let prefix = prefix.parse::<u8>().map_err(|e| {
556 NucleusError::NetworkError(format!("Invalid CIDR prefix '{}': {}", subnet, e))
557 })?;
558 let base_ip = base.parse::<std::net::Ipv4Addr>().map_err(|e| {
559 NucleusError::NetworkError(format!("Invalid CIDR base '{}': {}", subnet, e))
560 })?;
561
562 let host_capacity = if prefix == 32 {
563 1u64
564 } else {
565 1u64 << (32 - prefix)
566 };
567 if offset as u64 >= host_capacity {
568 return Err(NucleusError::NetworkError(format!(
569 "CIDR '{}' does not have room for host offset {}",
570 subnet, offset
571 )));
572 }
573
574 let candidate = u32::from(base_ip)
575 .checked_add(offset)
576 .ok_or_else(|| NucleusError::NetworkError(format!("CIDR '{}' overflowed", subnet)))?;
577 Ok(std::net::Ipv4Addr::from(candidate))
578 }
579}
580
581impl Drop for UserspaceNetwork {
582 fn drop(&mut self) {
583 self.cleanup_best_effort();
584 }
585}
586
587#[cfg(test)]
588mod tests {
589 use super::*;
590 use crate::network::Protocol;
591
592 #[test]
593 fn test_auto_nat_backend_prefers_kernel_for_rootful_hosts() {
594 let cfg = BridgeConfig::default();
595 assert_eq!(cfg.selected_nat_backend(true, false), NatBackend::Kernel);
596 assert_eq!(cfg.selected_nat_backend(true, true), NatBackend::Userspace);
597 assert_eq!(cfg.selected_nat_backend(false, true), NatBackend::Userspace);
598 }
599
600 #[test]
601 fn test_userspace_backend_rejects_too_small_subnets() {
602 let cfg = BridgeConfig {
603 subnet: "10.0.42.0/26".to_string(),
604 ..BridgeConfig::default()
605 };
606
607 let guest_ip = UserspaceNetwork::guest_ip_from_subnet(&cfg.subnet).unwrap_err();
608 assert!(
609 guest_ip.to_string().contains("does not have room"),
610 "unexpected error: {guest_ip}"
611 );
612 }
613
614 #[test]
615 fn test_userspace_backend_rejects_custom_guest_ip() {
616 let cfg = BridgeConfig {
617 container_ip: Some("10.0.42.2".to_string()),
618 ..BridgeConfig::default()
619 };
620
621 let err = UserspaceNetwork::validate_userspace_config(&cfg, "10.0.42.100").unwrap_err();
622 assert!(err
623 .to_string()
624 .contains("requested container IP 10.0.42.2 is unsupported"));
625 }
626
627 #[test]
628 fn test_slirp_command_args_disable_builtin_dns_when_explicit_dns_is_set() {
629 let cfg = BridgeConfig::default().with_dns(vec!["1.1.1.1".to_string()]);
630 let args = UserspaceNetwork::command_args(
631 4242,
632 &cfg,
633 true,
634 Path::new("/tmp/slirp.sock"),
635 5,
636 6,
637 true,
638 );
639
640 assert!(args.iter().any(|arg| arg == "--disable-dns"));
641 assert!(args.iter().any(|arg| arg == "--userns-path"));
642 }
643
644 #[test]
645 fn test_userspace_port_forward_request_uses_slirp_hostfwd_shape() {
646 let pf = PortForward {
647 host_ip: Some(std::net::Ipv4Addr::new(127, 0, 0, 1)),
648 host_port: 8080,
649 container_port: 80,
650 protocol: Protocol::Tcp,
651 };
652
653 let mut arguments = serde_json::Map::new();
654 arguments.insert("proto".to_string(), json!(pf.protocol.as_str()));
655 arguments.insert("host_port".to_string(), json!(pf.host_port));
656 arguments.insert("guest_port".to_string(), json!(pf.container_port));
657 if let Some(host_ip) = pf.host_ip {
658 arguments.insert("host_addr".to_string(), json!(host_ip.to_string()));
659 }
660 let request = json!({
661 "execute": "add_hostfwd",
662 "arguments": arguments,
663 });
664
665 assert_eq!(request["execute"], "add_hostfwd");
666 assert_eq!(request["arguments"]["proto"], "tcp");
667 assert_eq!(request["arguments"]["host_addr"], "127.0.0.1");
668 assert_eq!(request["arguments"]["host_port"], 8080);
669 assert_eq!(request["arguments"]["guest_port"], 80);
670 }
671}