1use super::{
2 egress, BridgeConfig, BridgeNetwork, EgressPolicy, NatBackend, NetworkState, PortForward,
3};
4use crate::error::{NucleusError, Result, StateTransition};
5use nix::fcntl::{fcntl, FcntlArg, FdFlag};
6use serde_json::json;
7use std::io::{Read, Write};
8use std::os::fd::{AsRawFd, OwnedFd};
9use std::os::unix::net::UnixStream;
10use std::os::unix::process::ExitStatusExt;
11use std::path::{Path, PathBuf};
12use std::process::{Child, Command};
13use std::time::{Duration, Instant};
14use tracing::{debug, info, warn};
15
16const SLIRP_TAP_NAME: &str = "tap0";
17
18pub enum BridgeDriver {
20 Kernel(BridgeNetwork),
21 Userspace(UserspaceNetwork),
22}
23
24impl BridgeDriver {
25 pub fn setup_with_id(
26 pid: u32,
27 config: &BridgeConfig,
28 container_id: &str,
29 host_is_root: bool,
30 rootless: bool,
31 ) -> Result<Self> {
32 match config.selected_nat_backend(host_is_root, rootless) {
33 NatBackend::Kernel => Ok(Self::Kernel(BridgeNetwork::setup_with_id(
34 pid,
35 config,
36 container_id,
37 )?)),
38 NatBackend::Userspace => Ok(Self::Userspace(UserspaceNetwork::setup_with_id(
39 pid,
40 config,
41 container_id,
42 rootless,
43 )?)),
44 NatBackend::Auto => Err(NucleusError::NetworkError(
45 "nat backend selection resolved to auto unexpectedly".to_string(),
46 )),
47 }
48 }
49
50 pub fn apply_egress_policy(
51 &self,
52 pid: u32,
53 policy: &EgressPolicy,
54 rootless: bool,
55 ) -> Result<()> {
56 match self {
57 Self::Kernel(net) => net.apply_egress_policy(pid, policy),
58 Self::Userspace(net) => net.apply_egress_policy(pid, policy, rootless),
59 }
60 }
61
62 pub fn cleanup(self) -> Result<()> {
63 match self {
64 Self::Kernel(net) => net.cleanup(),
65 Self::Userspace(net) => net.cleanup(),
66 }
67 }
68}
69
70pub struct UserspaceNetwork {
72 config: BridgeConfig,
73 guest_ip: String,
74 container_id: String,
75 api_socket_path: PathBuf,
76 runtime_dir: PathBuf,
77 exit_signal: Option<OwnedFd>,
78 child: Child,
79 state: NetworkState,
80}
81
82impl UserspaceNetwork {
83 pub(crate) fn default_dns_server(subnet: &str) -> Result<String> {
84 Self::dns_ip_from_subnet(subnet)
85 }
86
87 pub fn setup_with_id(
88 pid: u32,
89 config: &BridgeConfig,
90 container_id: &str,
91 rootless: bool,
92 ) -> Result<Self> {
93 config.validate()?;
94
95 let guest_ip = Self::guest_ip_from_subnet(&config.subnet)?;
96 Self::validate_userspace_config(config, &guest_ip)?;
97
98 let mut state = NetworkState::Unconfigured;
99 state = state.transition(NetworkState::Configuring)?;
100
101 let runtime_dir = Self::runtime_dir(container_id);
102 Self::ensure_runtime_dir(&runtime_dir)?;
103 let api_socket_path = runtime_dir.join("slirp4netns.sock");
104
105 let (ready_read, ready_write) = nix::unistd::pipe()
106 .map_err(|e| NucleusError::NetworkError(format!("ready pipe: {}", e)))?;
107 let (exit_read, exit_write) = nix::unistd::pipe()
108 .map_err(|e| NucleusError::NetworkError(format!("exit pipe: {}", e)))?;
109 Self::clear_cloexec(&ready_write)?;
110 Self::clear_cloexec(&exit_read)?;
111
112 let slirp = BridgeNetwork::resolve_bin("slirp4netns")?;
113 let args = Self::command_args(
114 pid,
115 config,
116 rootless,
117 &api_socket_path,
118 ready_write.as_raw_fd(),
119 exit_read.as_raw_fd(),
120 );
121
122 let mut child = Command::new(&slirp)
123 .args(&args)
124 .spawn()
125 .map_err(|e| NucleusError::NetworkError(format!("spawn slirp4netns: {}", e)))?;
126
127 drop(ready_write);
128 drop(exit_read);
129
130 if let Err(e) = Self::wait_until_ready(&mut child, ready_read) {
131 let _ = child.kill();
132 let _ = child.wait();
133 let _ = std::fs::remove_dir_all(&runtime_dir);
134 return Err(e);
135 }
136
137 let mut network = Self {
138 config: config.clone(),
139 guest_ip: guest_ip.to_string(),
140 container_id: container_id.to_string(),
141 api_socket_path,
142 runtime_dir,
143 exit_signal: Some(exit_write),
144 child,
145 state,
146 };
147
148 if let Err(e) = network.configure_port_forwards() {
149 network.cleanup_best_effort();
150 return Err(e);
151 }
152
153 network.state = network.state.transition(NetworkState::Active)?;
154
155 info!(
156 "Userspace NAT configured via slirp4netns for container {} (guest IP {})",
157 network.container_id, network.guest_ip
158 );
159
160 Ok(network)
161 }
162
163 pub fn apply_egress_policy(
164 &self,
165 pid: u32,
166 policy: &EgressPolicy,
167 rootless: bool,
168 ) -> Result<()> {
169 egress::apply_egress_policy(pid, &self.effective_dns_servers(), policy, rootless)
170 }
171
172 pub fn cleanup(mut self) -> Result<()> {
173 self.state = self.state.transition(NetworkState::Cleaned)?;
174 self.stop_child()?;
175 self.cleanup_runtime_dir();
176 Ok(())
177 }
178
179 fn effective_dns_servers(&self) -> Vec<String> {
180 if self.config.dns.is_empty() {
181 vec![Self::dns_ip_from_subnet(&self.config.subnet)
182 .unwrap_or_else(|_| "10.0.2.3".to_string())]
183 } else {
184 self.config.dns.clone()
185 }
186 }
187
188 fn configure_port_forwards(&mut self) -> Result<()> {
189 for pf in &self.config.port_forwards {
190 self.add_port_forward(pf)?;
191 }
192 Ok(())
193 }
194
195 fn add_port_forward(&self, pf: &PortForward) -> Result<()> {
196 let mut arguments = serde_json::Map::new();
197 arguments.insert("proto".to_string(), json!(pf.protocol.as_str()));
198 arguments.insert("host_port".to_string(), json!(pf.host_port));
199 arguments.insert("guest_port".to_string(), json!(pf.container_port));
200 if let Some(host_ip) = pf.host_ip {
201 arguments.insert("host_addr".to_string(), json!(host_ip.to_string()));
202 }
203
204 let response = Self::api_request(
205 &self.api_socket_path,
206 &json!({
207 "execute": "add_hostfwd",
208 "arguments": arguments,
209 }),
210 )?;
211
212 if let Some(error) = response.get("error") {
213 return Err(NucleusError::NetworkError(format!(
214 "slirp4netns add_hostfwd failed for {}:{}->{}/{}: {}",
215 pf.host_ip
216 .map(|ip| ip.to_string())
217 .unwrap_or_else(|| "0.0.0.0".to_string()),
218 pf.host_port,
219 pf.container_port,
220 pf.protocol,
221 error
222 )));
223 }
224
225 debug!(
226 "Configured slirp4netns port forward {}:{} -> {}:{}/{}",
227 pf.host_ip
228 .map(|ip| ip.to_string())
229 .unwrap_or_else(|| "0.0.0.0".to_string()),
230 pf.host_port,
231 self.guest_ip,
232 pf.container_port,
233 pf.protocol
234 );
235 Ok(())
236 }
237
238 fn api_request(socket_path: &Path, request: &serde_json::Value) -> Result<serde_json::Value> {
239 let mut stream = UnixStream::connect(socket_path).map_err(|e| {
240 NucleusError::NetworkError(format!(
241 "connect slirp4netns API socket {:?}: {}",
242 socket_path, e
243 ))
244 })?;
245 let payload = serde_json::to_vec(request).map_err(|e| {
246 NucleusError::NetworkError(format!("serialize slirp4netns API request: {}", e))
247 })?;
248 stream.write_all(&payload).map_err(|e| {
249 NucleusError::NetworkError(format!("write slirp4netns API request: {}", e))
250 })?;
251 stream
252 .shutdown(std::net::Shutdown::Write)
253 .map_err(|e| NucleusError::NetworkError(format!("shutdown slirp4netns API: {}", e)))?;
254
255 let mut buf = Vec::new();
256 stream.read_to_end(&mut buf).map_err(|e| {
257 NucleusError::NetworkError(format!("read slirp4netns API response: {}", e))
258 })?;
259
260 serde_json::from_slice(&buf).map_err(|e| {
261 NucleusError::NetworkError(format!(
262 "parse slirp4netns API response '{}': {}",
263 String::from_utf8_lossy(&buf),
264 e
265 ))
266 })
267 }
268
269 fn wait_until_ready(child: &mut Child, ready_read: OwnedFd) -> Result<()> {
270 let mut ready = std::fs::File::from(ready_read);
271 let mut buf = [0u8; 1];
272 match ready.read_exact(&mut buf) {
273 Ok(()) if buf == [b'1'] => Ok(()),
274 Ok(()) => Err(NucleusError::NetworkError(format!(
275 "slirp4netns ready-fd returned unexpected byte {:?}",
276 buf
277 ))),
278 Err(e) => {
279 if let Ok(Some(status)) = child.try_wait() {
280 let detail = status
281 .code()
282 .map(|code| format!("exit code {}", code))
283 .or_else(|| status.signal().map(|sig| format!("signal {}", sig)))
284 .unwrap_or_else(|| "unknown status".to_string());
285 Err(NucleusError::NetworkError(format!(
286 "slirp4netns exited before ready: {}",
287 detail
288 )))
289 } else {
290 Err(NucleusError::NetworkError(format!(
291 "failed waiting for slirp4netns readiness: {}",
292 e
293 )))
294 }
295 }
296 }
297 }
298
299 fn stop_child(&mut self) -> Result<()> {
300 self.exit_signal.take();
301
302 let deadline = Instant::now() + Duration::from_secs(2);
303 loop {
304 match self.child.try_wait() {
305 Ok(Some(_)) => break,
306 Ok(None) if Instant::now() < deadline => {
307 std::thread::sleep(Duration::from_millis(50))
308 }
309 Ok(None) => {
310 self.child.kill().map_err(|e| {
311 NucleusError::NetworkError(format!("kill slirp4netns: {}", e))
312 })?;
313 let _ = self.child.wait();
314 break;
315 }
316 Err(e) => {
317 return Err(NucleusError::NetworkError(format!(
318 "wait for slirp4netns shutdown: {}",
319 e
320 )))
321 }
322 }
323 }
324
325 info!(
326 "Userspace NAT cleaned up for container {}",
327 self.container_id
328 );
329 Ok(())
330 }
331
332 fn cleanup_best_effort(&mut self) {
333 if self.state == NetworkState::Cleaned {
334 return;
335 }
336
337 self.exit_signal.take();
338
339 if let Ok(None) = self.child.try_wait() {
340 let deadline = Instant::now() + Duration::from_secs(1);
341 while Instant::now() < deadline {
342 match self.child.try_wait() {
343 Ok(Some(_)) => break,
344 Ok(None) => std::thread::sleep(Duration::from_millis(25)),
345 Err(_) => break,
346 }
347 }
348
349 if let Ok(None) = self.child.try_wait() {
350 let _ = self.child.kill();
351 let _ = self.child.wait();
352 }
353 }
354
355 self.cleanup_runtime_dir();
356 self.state = NetworkState::Cleaned;
357 debug!(
358 "Userspace NAT cleaned up (best-effort via drop) for container {}",
359 self.container_id
360 );
361 }
362
363 fn cleanup_runtime_dir(&self) {
364 if let Err(e) = std::fs::remove_dir_all(&self.runtime_dir) {
365 if self.runtime_dir.exists() {
366 warn!(
367 "Failed to remove slirp4netns runtime dir {:?}: {}",
368 self.runtime_dir, e
369 );
370 }
371 }
372 }
373
374 fn validate_userspace_config(config: &BridgeConfig, guest_ip: &str) -> Result<()> {
375 let prefix = config
376 .subnet
377 .split_once('/')
378 .and_then(|(_, prefix)| prefix.parse::<u8>().ok())
379 .unwrap_or(24);
380 if prefix > 25 {
381 return Err(NucleusError::NetworkError(format!(
382 "Userspace NAT requires a subnet with at least 128 addresses; '{}' is too small",
383 config.subnet
384 )));
385 }
386
387 if let Some(requested_ip) = config.container_ip.as_deref() {
388 if requested_ip != guest_ip {
389 return Err(NucleusError::NetworkError(format!(
390 "Userspace NAT uses the slirp4netns guest address {}; requested container IP {} is unsupported",
391 guest_ip, requested_ip
392 )));
393 }
394 }
395
396 Ok(())
397 }
398
399 fn command_args(
400 pid: u32,
401 config: &BridgeConfig,
402 rootless: bool,
403 api_socket_path: &Path,
404 ready_fd: i32,
405 exit_fd: i32,
406 ) -> Vec<String> {
407 let mut args = vec![
408 "--configure".to_string(),
409 "--ready-fd".to_string(),
410 ready_fd.to_string(),
411 "--exit-fd".to_string(),
412 exit_fd.to_string(),
413 "--api-socket".to_string(),
414 api_socket_path.display().to_string(),
415 "--cidr".to_string(),
416 config.subnet.clone(),
417 "--disable-host-loopback".to_string(),
418 "--enable-sandbox".to_string(),
419 ];
420
421 if !config.dns.is_empty() {
422 args.push("--disable-dns".to_string());
423 }
424
425 if rootless {
426 args.push("--userns-path".to_string());
427 args.push(format!("/proc/{}/ns/user", pid));
428 }
429
430 args.push(pid.to_string());
431 args.push(SLIRP_TAP_NAME.to_string());
432 args
433 }
434
435 fn runtime_dir(container_id: &str) -> PathBuf {
436 let base = if nix::unistd::Uid::effective().is_root() {
437 PathBuf::from("/run/nucleus/userspace-net")
438 } else {
439 dirs::runtime_dir()
440 .map(|dir| dir.join("nucleus/userspace-net"))
441 .or_else(|| dirs::data_local_dir().map(|dir| dir.join("nucleus/userspace-net")))
442 .unwrap_or_else(|| std::env::temp_dir().join("nucleus-userspace-net"))
443 };
444 base.join(container_id)
445 }
446
447 fn ensure_runtime_dir(path: &Path) -> Result<()> {
448 if let Some(parent) = path.parent() {
449 std::fs::create_dir_all(parent).map_err(|e| {
450 NucleusError::NetworkError(format!(
451 "create userspace-net parent dir {:?}: {}",
452 parent, e
453 ))
454 })?;
455 }
456 std::fs::create_dir_all(path).map_err(|e| {
457 NucleusError::NetworkError(format!("create userspace-net dir {:?}: {}", path, e))
458 })?;
459 use std::os::unix::fs::PermissionsExt;
460 std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o700)).map_err(|e| {
461 NucleusError::NetworkError(format!(
462 "secure userspace-net dir permissions for {:?}: {}",
463 path, e
464 ))
465 })?;
466 Ok(())
467 }
468
469 fn clear_cloexec(fd: &OwnedFd) -> Result<()> {
470 let flags = fcntl(fd, FcntlArg::F_GETFD).map_err(|e| {
471 NucleusError::NetworkError(format!("fcntl(F_GETFD) on fd {}: {}", fd.as_raw_fd(), e))
472 })?;
473 let fd_flags = FdFlag::from_bits_truncate(flags);
474 let new_flags = fd_flags & !FdFlag::FD_CLOEXEC;
475 fcntl(fd, FcntlArg::F_SETFD(new_flags)).map_err(|e| {
476 NucleusError::NetworkError(format!("fcntl(F_SETFD) on fd {}: {}", fd.as_raw_fd(), e))
477 })?;
478 Ok(())
479 }
480
481 fn guest_ip_from_subnet(subnet: &str) -> Result<String> {
482 Self::offset_ip_from_subnet(subnet, 100).map(|ip| ip.to_string())
483 }
484
485 fn dns_ip_from_subnet(subnet: &str) -> Result<String> {
486 Self::offset_ip_from_subnet(subnet, 3).map(|ip| ip.to_string())
487 }
488
489 fn offset_ip_from_subnet(subnet: &str, offset: u32) -> Result<std::net::Ipv4Addr> {
490 let (base, prefix) = subnet.split_once('/').ok_or_else(|| {
491 NucleusError::NetworkError(format!("Invalid CIDR (missing /prefix): '{}'", subnet))
492 })?;
493 let prefix = prefix.parse::<u8>().map_err(|e| {
494 NucleusError::NetworkError(format!("Invalid CIDR prefix '{}': {}", subnet, e))
495 })?;
496 let base_ip = base.parse::<std::net::Ipv4Addr>().map_err(|e| {
497 NucleusError::NetworkError(format!("Invalid CIDR base '{}': {}", subnet, e))
498 })?;
499
500 let host_capacity = if prefix == 32 {
501 1u64
502 } else {
503 1u64 << (32 - prefix)
504 };
505 if offset as u64 >= host_capacity {
506 return Err(NucleusError::NetworkError(format!(
507 "CIDR '{}' does not have room for host offset {}",
508 subnet, offset
509 )));
510 }
511
512 let candidate = u32::from(base_ip)
513 .checked_add(offset)
514 .ok_or_else(|| NucleusError::NetworkError(format!("CIDR '{}' overflowed", subnet)))?;
515 Ok(std::net::Ipv4Addr::from(candidate))
516 }
517}
518
519impl Drop for UserspaceNetwork {
520 fn drop(&mut self) {
521 self.cleanup_best_effort();
522 }
523}
524
525#[cfg(test)]
526mod tests {
527 use super::*;
528 use crate::network::Protocol;
529
530 #[test]
531 fn test_auto_nat_backend_prefers_kernel_for_rootful_hosts() {
532 let cfg = BridgeConfig::default();
533 assert_eq!(cfg.selected_nat_backend(true, false), NatBackend::Kernel);
534 assert_eq!(cfg.selected_nat_backend(true, true), NatBackend::Userspace);
535 assert_eq!(cfg.selected_nat_backend(false, true), NatBackend::Userspace);
536 }
537
538 #[test]
539 fn test_userspace_backend_rejects_too_small_subnets() {
540 let cfg = BridgeConfig {
541 subnet: "10.0.42.0/26".to_string(),
542 ..BridgeConfig::default()
543 };
544
545 let guest_ip = UserspaceNetwork::guest_ip_from_subnet(&cfg.subnet).unwrap_err();
546 assert!(
547 guest_ip.to_string().contains("does not have room"),
548 "unexpected error: {guest_ip}"
549 );
550 }
551
552 #[test]
553 fn test_userspace_backend_rejects_custom_guest_ip() {
554 let cfg = BridgeConfig {
555 container_ip: Some("10.0.42.2".to_string()),
556 ..BridgeConfig::default()
557 };
558
559 let err = UserspaceNetwork::validate_userspace_config(&cfg, "10.0.42.100").unwrap_err();
560 assert!(err
561 .to_string()
562 .contains("requested container IP 10.0.42.2 is unsupported"));
563 }
564
565 #[test]
566 fn test_slirp_command_args_disable_builtin_dns_when_explicit_dns_is_set() {
567 let cfg = BridgeConfig::default().with_dns(vec!["1.1.1.1".to_string()]);
568 let args =
569 UserspaceNetwork::command_args(4242, &cfg, true, Path::new("/tmp/slirp.sock"), 5, 6);
570
571 assert!(args.iter().any(|arg| arg == "--disable-dns"));
572 assert!(args.iter().any(|arg| arg == "--userns-path"));
573 }
574
575 #[test]
576 fn test_userspace_port_forward_request_uses_slirp_hostfwd_shape() {
577 let pf = PortForward {
578 host_ip: Some(std::net::Ipv4Addr::new(127, 0, 0, 1)),
579 host_port: 8080,
580 container_port: 80,
581 protocol: Protocol::Tcp,
582 };
583
584 let mut arguments = serde_json::Map::new();
585 arguments.insert("proto".to_string(), json!(pf.protocol.as_str()));
586 arguments.insert("host_port".to_string(), json!(pf.host_port));
587 arguments.insert("guest_port".to_string(), json!(pf.container_port));
588 if let Some(host_ip) = pf.host_ip {
589 arguments.insert("host_addr".to_string(), json!(host_ip.to_string()));
590 }
591 let request = json!({
592 "execute": "add_hostfwd",
593 "arguments": arguments,
594 });
595
596 assert_eq!(request["execute"], "add_hostfwd");
597 assert_eq!(request["arguments"]["proto"], "tcp");
598 assert_eq!(request["arguments"]["host_addr"], "127.0.0.1");
599 assert_eq!(request["arguments"]["host_port"], 8080);
600 assert_eq!(request["arguments"]["guest_port"], 80);
601 }
602}