Skip to main content

nucleus/network/
userspace.rs

1use super::{
2    egress, BridgeConfig, BridgeNetwork, EgressPolicy, NatBackend, NetworkState, PortForward,
3};
4use crate::error::{NucleusError, Result, StateTransition};
5use nix::fcntl::{fcntl, FcntlArg, FdFlag};
6use serde_json::json;
7use std::io::{Read, Write};
8use std::os::fd::{AsRawFd, OwnedFd};
9use std::os::unix::net::UnixStream;
10use std::os::unix::process::ExitStatusExt;
11use std::path::{Path, PathBuf};
12use std::process::{Child, Command};
13use std::time::{Duration, Instant};
14use tracing::{debug, info, warn};
15
16const SLIRP_TAP_NAME: &str = "tap0";
17
18/// Native bridge-mode driver for the native runtime.
19pub enum BridgeDriver {
20    Kernel(BridgeNetwork),
21    Userspace(UserspaceNetwork),
22}
23
24impl BridgeDriver {
25    pub fn setup_with_id(
26        pid: u32,
27        config: &BridgeConfig,
28        container_id: &str,
29        host_is_root: bool,
30        rootless: bool,
31    ) -> Result<Self> {
32        match config.selected_nat_backend(host_is_root, rootless) {
33            NatBackend::Kernel => Ok(Self::Kernel(BridgeNetwork::setup_with_id(
34                pid,
35                config,
36                container_id,
37            )?)),
38            NatBackend::Userspace => Ok(Self::Userspace(UserspaceNetwork::setup_with_id(
39                pid,
40                config,
41                container_id,
42                rootless,
43            )?)),
44            NatBackend::Auto => Err(NucleusError::NetworkError(
45                "nat backend selection resolved to auto unexpectedly".to_string(),
46            )),
47        }
48    }
49
50    pub fn apply_egress_policy(
51        &self,
52        pid: u32,
53        policy: &EgressPolicy,
54        rootless: bool,
55    ) -> Result<()> {
56        match self {
57            Self::Kernel(net) => net.apply_egress_policy(pid, policy),
58            Self::Userspace(net) => net.apply_egress_policy(pid, policy, rootless),
59        }
60    }
61
62    pub fn cleanup(self) -> Result<()> {
63        match self {
64            Self::Kernel(net) => net.cleanup(),
65            Self::Userspace(net) => net.cleanup(),
66        }
67    }
68}
69
70/// Userspace NAT manager backed by slirp4netns.
71pub struct UserspaceNetwork {
72    config: BridgeConfig,
73    guest_ip: String,
74    container_id: String,
75    api_socket_path: PathBuf,
76    runtime_dir: PathBuf,
77    exit_signal: Option<OwnedFd>,
78    child: Child,
79    state: NetworkState,
80}
81
82impl UserspaceNetwork {
83    pub(crate) fn default_dns_server(subnet: &str) -> Result<String> {
84        Self::dns_ip_from_subnet(subnet)
85    }
86
87    pub fn setup_with_id(
88        pid: u32,
89        config: &BridgeConfig,
90        container_id: &str,
91        rootless: bool,
92    ) -> Result<Self> {
93        config.validate()?;
94
95        let guest_ip = Self::guest_ip_from_subnet(&config.subnet)?;
96        Self::validate_userspace_config(config, &guest_ip)?;
97
98        let mut state = NetworkState::Unconfigured;
99        state = state.transition(NetworkState::Configuring)?;
100
101        let runtime_dir = Self::runtime_dir(container_id);
102        Self::ensure_runtime_dir(&runtime_dir)?;
103        let api_socket_path = runtime_dir.join("slirp4netns.sock");
104
105        let (ready_read, ready_write) = nix::unistd::pipe()
106            .map_err(|e| NucleusError::NetworkError(format!("ready pipe: {}", e)))?;
107        let (exit_read, exit_write) = nix::unistd::pipe()
108            .map_err(|e| NucleusError::NetworkError(format!("exit pipe: {}", e)))?;
109        Self::clear_cloexec(&ready_write)?;
110        Self::clear_cloexec(&exit_read)?;
111
112        let slirp = BridgeNetwork::resolve_bin("slirp4netns")?;
113        let args = Self::command_args(
114            pid,
115            config,
116            rootless,
117            &api_socket_path,
118            ready_write.as_raw_fd(),
119            exit_read.as_raw_fd(),
120        );
121
122        let mut child = Command::new(&slirp)
123            .args(&args)
124            .spawn()
125            .map_err(|e| NucleusError::NetworkError(format!("spawn slirp4netns: {}", e)))?;
126
127        drop(ready_write);
128        drop(exit_read);
129
130        if let Err(e) = Self::wait_until_ready(&mut child, ready_read) {
131            let _ = child.kill();
132            let _ = child.wait();
133            let _ = std::fs::remove_dir_all(&runtime_dir);
134            return Err(e);
135        }
136
137        let mut network = Self {
138            config: config.clone(),
139            guest_ip: guest_ip.to_string(),
140            container_id: container_id.to_string(),
141            api_socket_path,
142            runtime_dir,
143            exit_signal: Some(exit_write),
144            child,
145            state,
146        };
147
148        if let Err(e) = network.configure_port_forwards() {
149            network.cleanup_best_effort();
150            return Err(e);
151        }
152
153        network.state = network.state.transition(NetworkState::Active)?;
154
155        info!(
156            "Userspace NAT configured via slirp4netns for container {} (guest IP {})",
157            network.container_id, network.guest_ip
158        );
159
160        Ok(network)
161    }
162
163    pub fn apply_egress_policy(
164        &self,
165        pid: u32,
166        policy: &EgressPolicy,
167        rootless: bool,
168    ) -> Result<()> {
169        egress::apply_egress_policy(pid, &self.effective_dns_servers(), policy, rootless)
170    }
171
172    pub fn cleanup(mut self) -> Result<()> {
173        self.state = self.state.transition(NetworkState::Cleaned)?;
174        self.stop_child()?;
175        self.cleanup_runtime_dir();
176        Ok(())
177    }
178
179    fn effective_dns_servers(&self) -> Vec<String> {
180        if self.config.dns.is_empty() {
181            vec![Self::dns_ip_from_subnet(&self.config.subnet)
182                .unwrap_or_else(|_| "10.0.2.3".to_string())]
183        } else {
184            self.config.dns.clone()
185        }
186    }
187
188    fn configure_port_forwards(&mut self) -> Result<()> {
189        for pf in &self.config.port_forwards {
190            self.add_port_forward(pf)?;
191        }
192        Ok(())
193    }
194
195    fn add_port_forward(&self, pf: &PortForward) -> Result<()> {
196        let mut arguments = serde_json::Map::new();
197        arguments.insert("proto".to_string(), json!(pf.protocol.as_str()));
198        arguments.insert("host_port".to_string(), json!(pf.host_port));
199        arguments.insert("guest_port".to_string(), json!(pf.container_port));
200        if let Some(host_ip) = pf.host_ip {
201            arguments.insert("host_addr".to_string(), json!(host_ip.to_string()));
202        }
203
204        let response = Self::api_request(
205            &self.api_socket_path,
206            &json!({
207                "execute": "add_hostfwd",
208                "arguments": arguments,
209            }),
210        )?;
211
212        if let Some(error) = response.get("error") {
213            return Err(NucleusError::NetworkError(format!(
214                "slirp4netns add_hostfwd failed for {}:{}->{}/{}: {}",
215                pf.host_ip
216                    .map(|ip| ip.to_string())
217                    .unwrap_or_else(|| "0.0.0.0".to_string()),
218                pf.host_port,
219                pf.container_port,
220                pf.protocol,
221                error
222            )));
223        }
224
225        debug!(
226            "Configured slirp4netns port forward {}:{} -> {}:{}/{}",
227            pf.host_ip
228                .map(|ip| ip.to_string())
229                .unwrap_or_else(|| "0.0.0.0".to_string()),
230            pf.host_port,
231            self.guest_ip,
232            pf.container_port,
233            pf.protocol
234        );
235        Ok(())
236    }
237
238    fn api_request(socket_path: &Path, request: &serde_json::Value) -> Result<serde_json::Value> {
239        let mut stream = UnixStream::connect(socket_path).map_err(|e| {
240            NucleusError::NetworkError(format!(
241                "connect slirp4netns API socket {:?}: {}",
242                socket_path, e
243            ))
244        })?;
245        let payload = serde_json::to_vec(request).map_err(|e| {
246            NucleusError::NetworkError(format!("serialize slirp4netns API request: {}", e))
247        })?;
248        stream.write_all(&payload).map_err(|e| {
249            NucleusError::NetworkError(format!("write slirp4netns API request: {}", e))
250        })?;
251        stream
252            .shutdown(std::net::Shutdown::Write)
253            .map_err(|e| NucleusError::NetworkError(format!("shutdown slirp4netns API: {}", e)))?;
254
255        let mut buf = Vec::new();
256        stream.read_to_end(&mut buf).map_err(|e| {
257            NucleusError::NetworkError(format!("read slirp4netns API response: {}", e))
258        })?;
259
260        serde_json::from_slice(&buf).map_err(|e| {
261            NucleusError::NetworkError(format!(
262                "parse slirp4netns API response '{}': {}",
263                String::from_utf8_lossy(&buf),
264                e
265            ))
266        })
267    }
268
269    fn wait_until_ready(child: &mut Child, ready_read: OwnedFd) -> Result<()> {
270        let mut ready = std::fs::File::from(ready_read);
271        let mut buf = [0u8; 1];
272        match ready.read_exact(&mut buf) {
273            Ok(()) if buf == [b'1'] => Ok(()),
274            Ok(()) => Err(NucleusError::NetworkError(format!(
275                "slirp4netns ready-fd returned unexpected byte {:?}",
276                buf
277            ))),
278            Err(e) => {
279                if let Ok(Some(status)) = child.try_wait() {
280                    let detail = status
281                        .code()
282                        .map(|code| format!("exit code {}", code))
283                        .or_else(|| status.signal().map(|sig| format!("signal {}", sig)))
284                        .unwrap_or_else(|| "unknown status".to_string());
285                    Err(NucleusError::NetworkError(format!(
286                        "slirp4netns exited before ready: {}",
287                        detail
288                    )))
289                } else {
290                    Err(NucleusError::NetworkError(format!(
291                        "failed waiting for slirp4netns readiness: {}",
292                        e
293                    )))
294                }
295            }
296        }
297    }
298
299    fn stop_child(&mut self) -> Result<()> {
300        self.exit_signal.take();
301
302        let deadline = Instant::now() + Duration::from_secs(2);
303        loop {
304            match self.child.try_wait() {
305                Ok(Some(_)) => break,
306                Ok(None) if Instant::now() < deadline => {
307                    std::thread::sleep(Duration::from_millis(50))
308                }
309                Ok(None) => {
310                    self.child.kill().map_err(|e| {
311                        NucleusError::NetworkError(format!("kill slirp4netns: {}", e))
312                    })?;
313                    let _ = self.child.wait();
314                    break;
315                }
316                Err(e) => {
317                    return Err(NucleusError::NetworkError(format!(
318                        "wait for slirp4netns shutdown: {}",
319                        e
320                    )))
321                }
322            }
323        }
324
325        info!(
326            "Userspace NAT cleaned up for container {}",
327            self.container_id
328        );
329        Ok(())
330    }
331
332    fn cleanup_best_effort(&mut self) {
333        if self.state == NetworkState::Cleaned {
334            return;
335        }
336
337        self.exit_signal.take();
338
339        if let Ok(None) = self.child.try_wait() {
340            let deadline = Instant::now() + Duration::from_secs(1);
341            while Instant::now() < deadline {
342                match self.child.try_wait() {
343                    Ok(Some(_)) => break,
344                    Ok(None) => std::thread::sleep(Duration::from_millis(25)),
345                    Err(_) => break,
346                }
347            }
348
349            if let Ok(None) = self.child.try_wait() {
350                let _ = self.child.kill();
351                let _ = self.child.wait();
352            }
353        }
354
355        self.cleanup_runtime_dir();
356        self.state = NetworkState::Cleaned;
357        debug!(
358            "Userspace NAT cleaned up (best-effort via drop) for container {}",
359            self.container_id
360        );
361    }
362
363    fn cleanup_runtime_dir(&self) {
364        if let Err(e) = std::fs::remove_dir_all(&self.runtime_dir) {
365            if self.runtime_dir.exists() {
366                warn!(
367                    "Failed to remove slirp4netns runtime dir {:?}: {}",
368                    self.runtime_dir, e
369                );
370            }
371        }
372    }
373
374    fn validate_userspace_config(config: &BridgeConfig, guest_ip: &str) -> Result<()> {
375        let prefix = config
376            .subnet
377            .split_once('/')
378            .and_then(|(_, prefix)| prefix.parse::<u8>().ok())
379            .unwrap_or(24);
380        if prefix > 25 {
381            return Err(NucleusError::NetworkError(format!(
382                "Userspace NAT requires a subnet with at least 128 addresses; '{}' is too small",
383                config.subnet
384            )));
385        }
386
387        if let Some(requested_ip) = config.container_ip.as_deref() {
388            if requested_ip != guest_ip {
389                return Err(NucleusError::NetworkError(format!(
390                    "Userspace NAT uses the slirp4netns guest address {}; requested container IP {} is unsupported",
391                    guest_ip, requested_ip
392                )));
393            }
394        }
395
396        Ok(())
397    }
398
399    fn command_args(
400        pid: u32,
401        config: &BridgeConfig,
402        rootless: bool,
403        api_socket_path: &Path,
404        ready_fd: i32,
405        exit_fd: i32,
406    ) -> Vec<String> {
407        let mut args = vec![
408            "--configure".to_string(),
409            "--ready-fd".to_string(),
410            ready_fd.to_string(),
411            "--exit-fd".to_string(),
412            exit_fd.to_string(),
413            "--api-socket".to_string(),
414            api_socket_path.display().to_string(),
415            "--cidr".to_string(),
416            config.subnet.clone(),
417            "--disable-host-loopback".to_string(),
418            "--enable-sandbox".to_string(),
419        ];
420
421        if !config.dns.is_empty() {
422            args.push("--disable-dns".to_string());
423        }
424
425        if rootless {
426            args.push("--userns-path".to_string());
427            args.push(format!("/proc/{}/ns/user", pid));
428        }
429
430        args.push(pid.to_string());
431        args.push(SLIRP_TAP_NAME.to_string());
432        args
433    }
434
435    fn runtime_dir(container_id: &str) -> PathBuf {
436        let base = if nix::unistd::Uid::effective().is_root() {
437            PathBuf::from("/run/nucleus/userspace-net")
438        } else {
439            dirs::runtime_dir()
440                .map(|dir| dir.join("nucleus/userspace-net"))
441                .or_else(|| dirs::data_local_dir().map(|dir| dir.join("nucleus/userspace-net")))
442                .unwrap_or_else(|| std::env::temp_dir().join("nucleus-userspace-net"))
443        };
444        base.join(container_id)
445    }
446
447    fn ensure_runtime_dir(path: &Path) -> Result<()> {
448        if let Some(parent) = path.parent() {
449            std::fs::create_dir_all(parent).map_err(|e| {
450                NucleusError::NetworkError(format!(
451                    "create userspace-net parent dir {:?}: {}",
452                    parent, e
453                ))
454            })?;
455        }
456        std::fs::create_dir_all(path).map_err(|e| {
457            NucleusError::NetworkError(format!("create userspace-net dir {:?}: {}", path, e))
458        })?;
459        use std::os::unix::fs::PermissionsExt;
460        std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o700)).map_err(|e| {
461            NucleusError::NetworkError(format!(
462                "secure userspace-net dir permissions for {:?}: {}",
463                path, e
464            ))
465        })?;
466        Ok(())
467    }
468
469    fn clear_cloexec(fd: &OwnedFd) -> Result<()> {
470        let flags = fcntl(fd, FcntlArg::F_GETFD).map_err(|e| {
471            NucleusError::NetworkError(format!("fcntl(F_GETFD) on fd {}: {}", fd.as_raw_fd(), e))
472        })?;
473        let fd_flags = FdFlag::from_bits_truncate(flags);
474        let new_flags = fd_flags & !FdFlag::FD_CLOEXEC;
475        fcntl(fd, FcntlArg::F_SETFD(new_flags)).map_err(|e| {
476            NucleusError::NetworkError(format!("fcntl(F_SETFD) on fd {}: {}", fd.as_raw_fd(), e))
477        })?;
478        Ok(())
479    }
480
481    fn guest_ip_from_subnet(subnet: &str) -> Result<String> {
482        Self::offset_ip_from_subnet(subnet, 100).map(|ip| ip.to_string())
483    }
484
485    fn dns_ip_from_subnet(subnet: &str) -> Result<String> {
486        Self::offset_ip_from_subnet(subnet, 3).map(|ip| ip.to_string())
487    }
488
489    fn offset_ip_from_subnet(subnet: &str, offset: u32) -> Result<std::net::Ipv4Addr> {
490        let (base, prefix) = subnet.split_once('/').ok_or_else(|| {
491            NucleusError::NetworkError(format!("Invalid CIDR (missing /prefix): '{}'", subnet))
492        })?;
493        let prefix = prefix.parse::<u8>().map_err(|e| {
494            NucleusError::NetworkError(format!("Invalid CIDR prefix '{}': {}", subnet, e))
495        })?;
496        let base_ip = base.parse::<std::net::Ipv4Addr>().map_err(|e| {
497            NucleusError::NetworkError(format!("Invalid CIDR base '{}': {}", subnet, e))
498        })?;
499
500        let host_capacity = if prefix == 32 {
501            1u64
502        } else {
503            1u64 << (32 - prefix)
504        };
505        if offset as u64 >= host_capacity {
506            return Err(NucleusError::NetworkError(format!(
507                "CIDR '{}' does not have room for host offset {}",
508                subnet, offset
509            )));
510        }
511
512        let candidate = u32::from(base_ip)
513            .checked_add(offset)
514            .ok_or_else(|| NucleusError::NetworkError(format!("CIDR '{}' overflowed", subnet)))?;
515        Ok(std::net::Ipv4Addr::from(candidate))
516    }
517}
518
519impl Drop for UserspaceNetwork {
520    fn drop(&mut self) {
521        self.cleanup_best_effort();
522    }
523}
524
525#[cfg(test)]
526mod tests {
527    use super::*;
528    use crate::network::Protocol;
529
530    #[test]
531    fn test_auto_nat_backend_prefers_kernel_for_rootful_hosts() {
532        let cfg = BridgeConfig::default();
533        assert_eq!(cfg.selected_nat_backend(true, false), NatBackend::Kernel);
534        assert_eq!(cfg.selected_nat_backend(true, true), NatBackend::Userspace);
535        assert_eq!(cfg.selected_nat_backend(false, true), NatBackend::Userspace);
536    }
537
538    #[test]
539    fn test_userspace_backend_rejects_too_small_subnets() {
540        let cfg = BridgeConfig {
541            subnet: "10.0.42.0/26".to_string(),
542            ..BridgeConfig::default()
543        };
544
545        let guest_ip = UserspaceNetwork::guest_ip_from_subnet(&cfg.subnet).unwrap_err();
546        assert!(
547            guest_ip.to_string().contains("does not have room"),
548            "unexpected error: {guest_ip}"
549        );
550    }
551
552    #[test]
553    fn test_userspace_backend_rejects_custom_guest_ip() {
554        let cfg = BridgeConfig {
555            container_ip: Some("10.0.42.2".to_string()),
556            ..BridgeConfig::default()
557        };
558
559        let err = UserspaceNetwork::validate_userspace_config(&cfg, "10.0.42.100").unwrap_err();
560        assert!(err
561            .to_string()
562            .contains("requested container IP 10.0.42.2 is unsupported"));
563    }
564
565    #[test]
566    fn test_slirp_command_args_disable_builtin_dns_when_explicit_dns_is_set() {
567        let cfg = BridgeConfig::default().with_dns(vec!["1.1.1.1".to_string()]);
568        let args =
569            UserspaceNetwork::command_args(4242, &cfg, true, Path::new("/tmp/slirp.sock"), 5, 6);
570
571        assert!(args.iter().any(|arg| arg == "--disable-dns"));
572        assert!(args.iter().any(|arg| arg == "--userns-path"));
573    }
574
575    #[test]
576    fn test_userspace_port_forward_request_uses_slirp_hostfwd_shape() {
577        let pf = PortForward {
578            host_ip: Some(std::net::Ipv4Addr::new(127, 0, 0, 1)),
579            host_port: 8080,
580            container_port: 80,
581            protocol: Protocol::Tcp,
582        };
583
584        let mut arguments = serde_json::Map::new();
585        arguments.insert("proto".to_string(), json!(pf.protocol.as_str()));
586        arguments.insert("host_port".to_string(), json!(pf.host_port));
587        arguments.insert("guest_port".to_string(), json!(pf.container_port));
588        if let Some(host_ip) = pf.host_ip {
589            arguments.insert("host_addr".to_string(), json!(host_ip.to_string()));
590        }
591        let request = json!({
592            "execute": "add_hostfwd",
593            "arguments": arguments,
594        });
595
596        assert_eq!(request["execute"], "add_hostfwd");
597        assert_eq!(request["arguments"]["proto"], "tcp");
598        assert_eq!(request["arguments"]["host_addr"], "127.0.0.1");
599        assert_eq!(request["arguments"]["host_port"], 8080);
600        assert_eq!(request["arguments"]["guest_port"], 80);
601    }
602}