Skip to main content

sandlock_core/
sandbox.rs

1// Sandbox orchestrator — public API that coordinates fork, confinement,
2// and async supervision of sandboxed child processes.
3
4use std::ffi::CString;
5use std::os::fd::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd};
6use std::sync::Arc;
7use std::time::Duration;
8
9use tokio::sync::Mutex;
10use tokio::task::JoinHandle;
11
12use std::sync::atomic::{AtomicBool, Ordering};
13
14use crate::context::{self, PipePair, read_u32_fd, write_u32_fd};
15use crate::cow::{CowBranch, overlayfs::OverlayBranch, branchfs::BranchFsBranch};
16use crate::error::{SandboxError, SandlockError};
17use crate::network;
18use crate::policy::{BranchAction, FsIsolation, Policy};
19use crate::result::{ExitStatus, RunResult};
20use crate::seccomp::ctx::SupervisorCtx;
21use crate::seccomp::notif::{self, NotifPolicy};
22use crate::seccomp::state::{ChrootState, CowState, NetworkState, PolicyFnState, ProcfsState, ResourceState, TimeRandomState};
23use crate::sys::syscall;
24
25// ============================================================
26// Nesting detection
27// ============================================================
28
29/// Set after seccomp confinement in the child process.
30/// Any subsequent Sandbox in this process is nested.
31pub(crate) static CONFINED: AtomicBool = AtomicBool::new(false);
32
33/// Detect if this process is already inside a sandbox.
34///
35/// Checks both the in-process flag and /proc/self/status (Seccomp: 2)
36/// to catch cross-process nesting (e.g. `sandlock run -- python agent.py`
37/// where agent.py creates inner sandboxes).
38pub fn is_nested() -> bool {
39    if CONFINED.load(Ordering::Relaxed) {
40        return true;
41    }
42    // Check /proc/self/status for active seccomp filter
43    if let Ok(status) = std::fs::read_to_string("/proc/self/status") {
44        for line in status.lines() {
45            if line.starts_with("Seccomp:") {
46                return line.trim().ends_with('2');
47            }
48        }
49    }
50    false
51}
52
53// ============================================================
54// SandboxState
55// ============================================================
56
57enum SandboxState {
58    Created,
59    Running,
60    Paused,
61    Stopped(ExitStatus),
62}
63
64// ============================================================
65// Sandbox
66// ============================================================
67
68/// The main user-facing sandbox API.
69///
70/// Orchestrates fork, confinement (Landlock + seccomp), and async
71/// notification-based supervision of the sandboxed child process.
72pub struct Sandbox {
73    policy: Policy,
74    state: SandboxState,
75    child_pid: Option<i32>,
76    pidfd: Option<OwnedFd>,
77    notif_handle: Option<JoinHandle<()>>,
78    throttle_handle: Option<JoinHandle<()>>,
79    loadavg_handle: Option<JoinHandle<()>>,
80    /// Capture pipe read ends — kept alive so the child doesn't get SIGPIPE.
81    _stdout_read: Option<OwnedFd>,
82    _stderr_read: Option<OwnedFd>,
83    /// COW filesystem branch (OverlayFS or BranchFS).
84    cow_branch: Option<Box<dyn CowBranch>>,
85    /// Seccomp COW branch extracted from supervisor state after child exits.
86    seccomp_cow: Option<crate::cow::seccomp::SeccompCowBranch>,
87    /// Shared resource state for freeze/thaw and loadavg support.
88    supervisor_resource: Option<Arc<Mutex<ResourceState>>>,
89    /// Shared COW state for post-wait extraction.
90    supervisor_cow: Option<Arc<Mutex<CowState>>>,
91    /// Shared network state for port mapping queries.
92    supervisor_network: Option<Arc<Mutex<NetworkState>>>,
93    /// Control pipe for fork commands (parent end).
94    ctrl_fd: Option<OwnedFd>,
95    /// Stdout pipe read end (for fork clones — used by reduce).
96    stdout_pipe: Option<OwnedFd>,
97    /// Init function (runs once in child before fork).
98    init_fn: Option<Box<dyn FnOnce() + Send + 'static>>,
99    /// Work function (runs in each fork clone).
100    work_fn: Option<Arc<dyn Fn(u32) + Send + Sync + 'static>>,
101    /// Optional fd overrides for stdin/stdout/stderr (used by Pipeline).
102    io_overrides: Option<(Option<i32>, Option<i32>, Option<i32>)>,
103    /// Extra fd mappings for the child: (target_fd, source_fd).
104    /// Each pair dup2's source_fd to target_fd in the child before exec.
105    extra_fds: Vec<(i32, i32)>,
106    /// HTTP ACL proxy handle — kept alive so the proxy runs while the child is alive.
107    http_acl_handle: Option<crate::http_acl::HttpAclProxyHandle>,
108    /// Optional callback invoked when a port bind is recorded.
109    #[allow(clippy::type_complexity)]
110    on_bind: Option<Box<dyn Fn(&std::collections::HashMap<u16, u16>) + Send + Sync>>,
111}
112
113impl Sandbox {
114    /// Create a new sandbox in the `Created` state.
115    pub fn new(policy: &Policy) -> Result<Self, SandlockError> {
116        Ok(Self::create(policy))
117    }
118
119    /// Create a sandbox with init and work functions for COW forking.
120    ///
121    /// `init_fn` runs once in the child to load expensive state.
122    /// `work_fn` runs in each COW clone created by `fork(N)`.
123    ///
124    /// ```ignore
125    /// let mut sb = Sandbox::new_with_fns(&policy,
126    ///     || { load_model(); },
127    ///     |clone_id| { rollout(clone_id); },
128    /// )?;
129    /// let clones = sb.fork(1000).await?;
130    /// ```
131    pub fn new_with_fns(
132        policy: &Policy,
133        init_fn: impl FnOnce() + Send + 'static,
134        work_fn: impl Fn(u32) + Send + Sync + 'static,
135    ) -> Result<Self, SandlockError> {
136        let mut sb = Self::create(policy);
137        sb.init_fn = Some(Box::new(init_fn));
138        sb.work_fn = Some(Arc::new(work_fn));
139        Ok(sb)
140    }
141
142    fn create(policy: &Policy) -> Self {
143        Self {
144            policy: policy.clone(),
145            state: SandboxState::Created,
146            child_pid: None,
147            pidfd: None,
148            notif_handle: None,
149            throttle_handle: None,
150            loadavg_handle: None,
151            _stdout_read: None,
152            _stderr_read: None,
153            cow_branch: None,
154            seccomp_cow: None,
155            supervisor_resource: None,
156            supervisor_cow: None,
157            supervisor_network: None,
158            ctrl_fd: None,
159            stdout_pipe: None,
160            init_fn: None,
161            work_fn: None,
162            io_overrides: None,
163            extra_fds: Vec::new(),
164            http_acl_handle: None,
165            on_bind: None,
166        }
167    }
168
169    /// One-shot: spawn a sandboxed process, wait for it to exit, and return
170    /// the result. Stdout and stderr are captured.
171    pub async fn run(policy: &Policy, cmd: &[&str]) -> Result<RunResult, SandlockError> {
172        let mut sb = Self::new(policy)?;
173        sb.do_spawn(cmd, true).await?;
174        sb.wait().await
175    }
176
177    /// Run a sandboxed process with inherited stdio (interactive mode).
178    pub async fn run_interactive(policy: &Policy, cmd: &[&str]) -> Result<RunResult, SandlockError> {
179        let mut sb = Self::new(policy)?;
180        sb.do_spawn(cmd, false).await?;
181        sb.wait().await
182    }
183
184    /// Dry-run: spawn, wait, collect filesystem changes, then abort.
185    /// Returns the run result plus a list of changes that would have been
186    /// committed. The workdir is left unchanged.
187    pub async fn dry_run(policy: &Policy, cmd: &[&str]) -> Result<crate::dry_run::DryRunResult, SandlockError> {
188        let mut policy = policy.clone();
189        policy.on_exit = BranchAction::Keep;
190        policy.on_error = BranchAction::Keep;
191
192        let mut sb = Self::new(&policy)?;
193        sb.do_spawn(cmd, true).await?;
194        let run_result = sb.wait().await?;
195        let changes = sb.collect_changes().await;
196        sb.do_abort().await;
197        Ok(crate::dry_run::DryRunResult { run_result, changes })
198    }
199
200    /// Dry-run with inherited stdio (interactive mode).
201    pub async fn dry_run_interactive(policy: &Policy, cmd: &[&str]) -> Result<crate::dry_run::DryRunResult, SandlockError> {
202        let mut policy = policy.clone();
203        policy.on_exit = BranchAction::Keep;
204        policy.on_error = BranchAction::Keep;
205
206        let mut sb = Self::new(&policy)?;
207        sb.do_spawn(cmd, false).await?;
208        let run_result = sb.wait().await?;
209        let changes = sb.collect_changes().await;
210        sb.do_abort().await;
211        Ok(crate::dry_run::DryRunResult { run_result, changes })
212    }
213
214    /// Collect changes from whichever COW branch exists.
215    async fn collect_changes(&self) -> Vec<crate::dry_run::Change> {
216        if let Some(ref branch) = self.cow_branch {
217            return branch.changes().unwrap_or_default();
218        }
219        if let Some(ref cow) = self.seccomp_cow {
220            return cow.changes().unwrap_or_default();
221        }
222        Vec::new()
223    }
224
225    /// Abort both COW branch types (used by dry_run to discard changes).
226    async fn do_abort(&mut self) {
227        if let Some(branch) = self.cow_branch.take() {
228            let _ = branch.abort();
229        }
230        if let Some(ref mut cow) = self.seccomp_cow {
231            let _ = cow.abort();
232        }
233    }
234
235    /// Create N COW clones of this sandbox.
236    ///
237    /// Requires `new_with_fns()`. Forks a confined child, runs `init_fn`,
238    /// then forks N times using raw `fork()` (bypasses seccomp). Each
239    /// clone gets `CLONE_ID=0..N-1` and runs `work_fn(clone_id)`.
240    ///
241    /// Memory pages from `init_fn` are shared copy-on-write across all
242    /// clones — 1000 clones of a 50MB process use ~50MB total.
243    ///
244    /// Returns PIDs of all clones. Use `waitpid` to collect them.
245    /// Create N COW clones, each runs `work_fn(clone_id)`.
246    ///
247    /// Returns a Vec of Sandbox handles — one per clone. Each clone is
248    /// a live process that can be waited on, killed, or paused.
249    ///
250    /// ```ignore
251    /// let clones = sb.fork(4).await?;
252    /// for mut c in clones { c.wait().await?; }
253    /// ```
254    pub async fn fork(&mut self, n: u32) -> Result<Vec<Sandbox>, SandlockError> {
255        let init_fn = self.init_fn.take()
256            .ok_or_else(|| SandboxError::Child("fork() requires new_with_fns()".into()))?;
257        let work_fn = self.work_fn.take()
258            .ok_or_else(|| SandboxError::Child("fork() requires new_with_fns()".into()))?;
259
260        let policy = self.policy.clone();
261
262
263        // Create control pipe
264        let mut ctrl_fds = [0i32; 2];
265        if unsafe { libc::pipe2(ctrl_fds.as_mut_ptr(), 0) } < 0 {
266            return Err(SandboxError::Io(std::io::Error::last_os_error()).into());
267        }
268        let ctrl_parent = unsafe { OwnedFd::from_raw_fd(ctrl_fds[0]) };
269        let ctrl_child_fd = ctrl_fds[1];
270
271        // Create per-clone stdout pipes (parent keeps read ends)
272        let mut pipe_read_ends: Vec<OwnedFd> = Vec::with_capacity(n as usize);
273        let mut pipe_write_fds: Vec<i32> = Vec::with_capacity(n as usize);
274        for _ in 0..n {
275            let mut pfds = [0i32; 2];
276            if unsafe { libc::pipe(pfds.as_mut_ptr()) } >= 0 {
277                pipe_read_ends.push(unsafe { OwnedFd::from_raw_fd(pfds[0]) });
278                pipe_write_fds.push(pfds[1]);
279            } else {
280                pipe_write_fds.push(-1);
281            }
282        }
283
284        // Fork the template child
285        let pid = unsafe { libc::fork() };
286        if pid < 0 {
287            unsafe { libc::close(ctrl_child_fd) };
288            return Err(SandboxError::Fork(std::io::Error::last_os_error()).into());
289        }
290
291        if pid == 0 {
292            // ===== CHILD (template) =====
293            drop(ctrl_parent);
294
295            unsafe { libc::setpgid(0, 0) };
296            unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL) };
297            unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
298
299            let _ = crate::landlock::confine(&policy);
300
301            let deny = crate::context::deny_syscall_numbers(&policy);
302            let args = crate::context::arg_filters(&policy);
303            let filter = match crate::seccomp::bpf::assemble_filter(&[], &deny, &args) {
304                Ok(f) => f,
305                Err(_) => unsafe { libc::_exit(1) },
306            };
307            let _ = crate::seccomp::bpf::install_deny_filter(&filter);
308
309            CONFINED.store(true, std::sync::atomic::Ordering::Relaxed);
310
311            // Run init (loads expensive state, shared via COW)
312            init_fn();
313
314            // Close read ends in template (parent owns them)
315            drop(pipe_read_ends);
316
317            // Fork N clones, send PIDs, wait for all
318            crate::fork::fork_ready_loop_fn(ctrl_child_fd, n, &*work_fn, &pipe_write_fds);
319            unsafe { libc::_exit(0) };
320        }
321
322        // ===== PARENT =====
323        unsafe { libc::close(ctrl_child_fd) };
324        // Close write ends in parent (template/clones own them)
325        for wfd in &pipe_write_fds {
326            if *wfd >= 0 { unsafe { libc::close(*wfd) }; }
327        }
328        self.child_pid = Some(pid);
329        self.state = SandboxState::Running;
330
331        // Read N clone PIDs
332        let ctrl_fd = ctrl_parent.as_raw_fd();
333        let mut pid_buf = vec![0u8; n as usize * 4];
334        read_exact(ctrl_fd, &mut pid_buf);
335
336        let clone_pids: Vec<i32> = pid_buf.chunks(4)
337            .map(|c| u32::from_be_bytes(c.try_into().unwrap_or([0; 4])) as i32)
338            .collect();
339        let live_count = clone_pids.iter().filter(|&&p| p > 0).count();
340
341        // Read exit codes (template waits for all clones first)
342        let mut code_buf = vec![0u8; live_count * 4];
343        read_exact(ctrl_fd, &mut code_buf);
344        self.ctrl_fd = Some(ctrl_parent);
345
346        // Wait for template to exit
347        let mut status = 0i32;
348        unsafe { libc::waitpid(pid, &mut status, 0) };
349
350        // Create clone handles with stdout pipe read ends
351        let mut code_idx = 0;
352        let mut clones = Vec::with_capacity(live_count);
353        let mut pipe_iter = pipe_read_ends.into_iter();
354
355        for &clone_pid in &clone_pids {
356            let pipe = pipe_iter.next();
357            if clone_pid <= 0 { continue; }
358
359            let code = i32::from_be_bytes(
360                code_buf[code_idx * 4..(code_idx + 1) * 4].try_into().unwrap_or([0; 4])
361            );
362            code_idx += 1;
363
364            let mut sb = Sandbox::create(&policy);
365            sb.child_pid = Some(clone_pid);
366            sb.state = SandboxState::Stopped(if code == 0 {
367                ExitStatus::Code(0)
368            } else if code > 0 {
369                ExitStatus::Code(code)
370            } else {
371                ExitStatus::Killed
372            });
373            sb.stdout_pipe = pipe;
374            clones.push(sb);
375        }
376
377        Ok(clones)
378    }
379
380    /// Reduce: wait for all clones, then run a reducer command.
381    ///
382    /// Waits for every clone to finish, then runs `cmd` in this sandbox.
383    /// The reducer can read clone results from shared files, tmpdir, etc.
384    ///
385    /// ```ignore
386    /// let clones = mapper.fork(4).await?;
387    /// let result = reducer.reduce(&["python3", "sum.py"], &mut clones).await?;
388    /// ```
389    pub async fn reduce(
390        &self,
391        cmd: &[&str],
392        clones: &mut [Sandbox],
393    ) -> Result<RunResult, SandlockError> {
394        // Read each clone's stdout pipe and concatenate
395        let mut combined = Vec::new();
396        for clone in clones.iter_mut() {
397            if let Some(pipe) = clone.stdout_pipe.take() {
398                combined.extend_from_slice(&read_fd_to_end(pipe));
399            }
400        }
401
402        // Create a pipe to feed combined data to reducer's stdin
403        let mut stdin_fds = [0i32; 2];
404        if unsafe { libc::pipe2(stdin_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
405            return Err(SandboxError::Io(std::io::Error::last_os_error()).into());
406        }
407
408        // Write combined data in a blocking thread (avoid deadlock with large data)
409        let write_fd = stdin_fds[1];
410        let write_handle = tokio::task::spawn_blocking(move || {
411            unsafe {
412                libc::write(write_fd, combined.as_ptr() as *const _, combined.len());
413                libc::close(write_fd);
414            }
415        });
416
417        // Spawn reducer with stdin from pipe, capture stdout
418        let mut reducer = Sandbox::new(&self.policy)?;
419        reducer.io_overrides = Some((Some(stdin_fds[0]), None, None));
420        reducer.do_spawn(cmd, true).await?;
421        unsafe { libc::close(stdin_fds[0]) };
422
423        let _ = write_handle.await;
424        reducer.wait().await
425    }
426
427    /// Wait for the child process to exit.
428    pub async fn wait(&mut self) -> Result<RunResult, SandlockError> {
429        let pid = self.child_pid.ok_or(SandboxError::NotRunning)?;
430
431        if let SandboxState::Stopped(ref es) = self.state {
432            return Ok(RunResult {
433                exit_status: es.clone(),
434                stdout: None,
435                stderr: None,
436            });
437        }
438
439        // Blocking waitpid in a blocking thread so we don't block the tokio runtime.
440        let exit_status = tokio::task::spawn_blocking(move || -> ExitStatus {
441            let mut status: i32 = 0;
442            loop {
443                let ret = unsafe { libc::waitpid(pid, &mut status, 0) };
444                if ret < 0 {
445                    let err = std::io::Error::last_os_error();
446                    if err.raw_os_error() == Some(libc::EINTR) {
447                        continue;
448                    }
449                    // Child already reaped or invalid pid
450                    return ExitStatus::Killed;
451                }
452                break;
453            }
454            wait_status_to_exit(status)
455        })
456        .await
457        .unwrap_or(ExitStatus::Killed);
458
459        self.state = SandboxState::Stopped(exit_status.clone());
460
461        // Abort supervisor tasks now that the child is gone.
462        if let Some(h) = self.notif_handle.take() {
463            h.abort();
464        }
465        if let Some(h) = self.throttle_handle.take() {
466            h.abort();
467        }
468        if let Some(h) = self.loadavg_handle.take() {
469            h.abort();
470        }
471
472        // Extract seccomp COW branch while we're still in async context
473        // (can properly .lock().await the tokio Mutex).  This avoids the
474        // try_lock() race in sync drop() that could skip cleanup entirely.
475        if let Some(ref cow_state) = self.supervisor_cow {
476            let mut cow = cow_state.lock().await;
477            self.seccomp_cow = cow.branch.take();
478        }
479
480        // Drain captured stdout/stderr if available
481        let stdout = self._stdout_read.take().map(|fd| read_fd_to_end(fd));
482        let stderr = self._stderr_read.take().map(|fd| read_fd_to_end(fd));
483
484        Ok(RunResult {
485            exit_status,
486            stdout,
487            stderr,
488        })
489    }
490
491    /// Send SIGSTOP to the child's process group.
492    pub fn pause(&mut self) -> Result<(), SandlockError> {
493        let pid = self.child_pid.ok_or(SandboxError::NotRunning)?;
494        let ret = unsafe { libc::killpg(pid, libc::SIGSTOP) };
495        if ret < 0 {
496            return Err(SandboxError::Io(std::io::Error::last_os_error()).into());
497        }
498        self.state = SandboxState::Paused;
499        Ok(())
500    }
501
502    /// Send SIGCONT to the child's process group.
503    pub fn resume(&mut self) -> Result<(), SandlockError> {
504        let pid = self.child_pid.ok_or(SandboxError::NotRunning)?;
505        let ret = unsafe { libc::killpg(pid, libc::SIGCONT) };
506        if ret < 0 {
507            return Err(SandboxError::Io(std::io::Error::last_os_error()).into());
508        }
509        self.state = SandboxState::Running;
510        Ok(())
511    }
512
513    /// Send SIGKILL to the child's process group.
514    pub fn kill(&mut self) -> Result<(), SandlockError> {
515        let pid = self.child_pid.ok_or(SandboxError::NotRunning)?;
516        let ret = unsafe { libc::killpg(pid, libc::SIGKILL) };
517        if ret < 0 {
518            let err = std::io::Error::last_os_error();
519            // ESRCH means the process is already gone — not an error.
520            if err.raw_os_error() != Some(libc::ESRCH) {
521                return Err(SandboxError::Io(err).into());
522            }
523        }
524        Ok(())
525    }
526
527    /// Return the child PID, if spawned.
528    pub fn pid(&self) -> Option<i32> {
529        self.child_pid
530    }
531
532    /// Set a callback invoked whenever a port bind is recorded.
533    pub fn set_on_bind(&mut self, cb: impl Fn(&std::collections::HashMap<u16, u16>) + Send + Sync + 'static) {
534        self.on_bind = Some(Box::new(cb));
535    }
536
537    /// Return the current virtual-to-real port mappings.
538    ///
539    /// Returns a snapshot of all ports where the real (host) port differs from
540    /// the virtual port the sandbox requested. Empty if port_remap is disabled
541    /// or no ports have been remapped.
542    pub async fn port_mappings(&self) -> std::collections::HashMap<u16, u16> {
543        if let Some(ref net) = self.supervisor_network {
544            let ns = net.lock().await;
545            ns.port_map.virtual_to_real.clone()
546        } else {
547            std::collections::HashMap::new()
548        }
549    }
550
551    /// Return whether the child is currently running.
552    #[doc(hidden)]
553    pub fn is_running(&self) -> bool {
554        matches!(self.state, SandboxState::Running | SandboxState::Paused)
555    }
556
557    /// Return a reference to the policy.
558    pub fn policy(&self) -> &Policy {
559        &self.policy
560    }
561
562    /// Commit COW writes to the original directory.
563    #[doc(hidden)]
564    pub async fn commit(&mut self) -> Result<(), SandlockError> {
565        if let Some(branch) = self.cow_branch.take() {
566            branch.commit().map_err(|e| SandlockError::Sandbox(SandboxError::Branch(e)))?;
567        }
568        Ok(())
569    }
570
571    /// Discard COW writes.
572    #[doc(hidden)]
573    pub async fn abort_branch(&mut self) -> Result<(), SandlockError> {
574        if let Some(branch) = self.cow_branch.take() {
575            branch.abort().map_err(|e| SandlockError::Sandbox(SandboxError::Branch(e)))?;
576        }
577        Ok(())
578    }
579
580    /// Freeze the sandbox: hold all fork notifications + SIGSTOP the process group.
581    pub(crate) async fn freeze(&self) -> Result<(), SandlockError> {
582        let pid = self.child_pid.ok_or(SandlockError::Sandbox(SandboxError::NotRunning))?;
583
584        // Set hold_forks in resource state
585        if let Some(ref resource) = self.supervisor_resource {
586            let mut rs = resource.lock().await;
587            rs.hold_forks = true;
588        }
589
590        // SIGSTOP the process group
591        unsafe { libc::killpg(pid, libc::SIGSTOP); }
592        Ok(())
593    }
594
595    /// Thaw the sandbox: release held fork notifications + SIGCONT.
596    pub(crate) async fn thaw(&self) -> Result<(), SandlockError> {
597        let pid = self.child_pid.ok_or(SandlockError::Sandbox(SandboxError::NotRunning))?;
598
599        // Release held forks
600        if let Some(ref resource) = self.supervisor_resource {
601            let mut rs = resource.lock().await;
602            rs.hold_forks = false;
603            rs.held_notif_ids.clear();
604        }
605
606        // SIGCONT the process group
607        unsafe { libc::killpg(pid, libc::SIGCONT); }
608        Ok(())
609    }
610
611    /// Spawn a sandboxed process without waiting for it to exit.
612    /// Use `wait()` to collect the exit status when done.
613    #[doc(hidden)]
614    pub async fn spawn(&mut self, cmd: &[&str]) -> Result<(), SandlockError> {
615        self.do_spawn(cmd, false).await
616    }
617
618    /// Like `spawn` but captures stdout and stderr (available via `wait()`).
619    /// Not part of the public API — used by the FFI crate.
620    #[doc(hidden)]
621    pub async fn spawn_captured(&mut self, cmd: &[&str]) -> Result<(), SandlockError> {
622        self.do_spawn(cmd, true).await
623    }
624
625    /// Spawn with explicit stdin/stdout/stderr fd redirection.
626    ///
627    /// Each `Option<RawFd>` overrides the corresponding fd in the child:
628    /// - `stdin_fd`: dup2'd to fd 0
629    /// - `stdout_fd`: dup2'd to fd 1
630    /// - `stderr_fd`: dup2'd to fd 2
631    ///
632    /// The caller is responsible for closing the fds after this call.
633    #[doc(hidden)]
634    pub async fn spawn_with_io(
635        &mut self,
636        cmd: &[&str],
637        stdin_fd: Option<std::os::unix::io::RawFd>,
638        stdout_fd: Option<std::os::unix::io::RawFd>,
639        stderr_fd: Option<std::os::unix::io::RawFd>,
640    ) -> Result<(), SandlockError> {
641        self.io_overrides = Some((stdin_fd, stdout_fd, stderr_fd));
642        self.do_spawn(cmd, false).await
643    }
644
645    /// Like `spawn_with_io` but also maps extra fds into the child.
646    /// `extra_fds` is a list of (target_fd, source_fd) pairs.
647    #[doc(hidden)]
648    pub async fn spawn_with_gather_io(
649        &mut self,
650        cmd: &[&str],
651        stdin_fd: Option<std::os::unix::io::RawFd>,
652        stdout_fd: Option<std::os::unix::io::RawFd>,
653        stderr_fd: Option<std::os::unix::io::RawFd>,
654        extra_fds: Vec<(i32, i32)>,
655    ) -> Result<(), SandlockError> {
656        self.io_overrides = Some((stdin_fd, stdout_fd, stderr_fd));
657        self.extra_fds = extra_fds;
658        self.do_spawn(cmd, false).await
659    }
660
661    /// Capture a checkpoint of the running sandbox.
662    pub async fn checkpoint(&self) -> Result<crate::checkpoint::Checkpoint, SandlockError> {
663        let pid = self.child_pid.ok_or(SandlockError::Sandbox(SandboxError::NotRunning))?;
664
665        // Freeze
666        self.freeze().await?;
667
668        // Capture state
669        let cp = crate::checkpoint::capture(pid, &self.policy);
670
671        // Thaw regardless of capture result
672        self.thaw().await?;
673
674        cp
675    }
676
677    // ============================================================
678    // Internal: do_spawn
679    // ============================================================
680
681    /// Fork a child, apply confinement, and start the supervisor.
682    async fn do_spawn(&mut self, cmd: &[&str], capture: bool) -> Result<(), SandlockError> {
683        // 1. Validate state
684        if !matches!(self.state, SandboxState::Created) {
685            return Err(SandboxError::Child("sandbox already spawned".into()).into());
686        }
687
688        if cmd.is_empty() {
689            return Err(SandboxError::Child("empty command".into()).into());
690        }
691
692        // 2. Convert cmd to Vec<CString>
693        let c_cmd: Vec<CString> = cmd
694            .iter()
695            .map(|s| CString::new(*s).map_err(|_| SandboxError::Child("invalid command string".into())))
696            .collect::<Result<Vec<_>, _>>()?;
697
698        // 3. Detect nesting (before fork, in parent)
699        let nested = is_nested();
700
701        // 4. Create synchronization pipes
702        let pipes = PipePair::new().map_err(SandboxError::Io)?;
703
704        // 4. Resolve net_allow_hosts to IPs + build virtual /etc/hosts
705        //
706        // Semantics:
707        //   None               -> unrestricted (no virtualization, no IP allowlist)
708        //   Some(empty)        -> deny all (empty virtual /etc/hosts, empty allowlist)
709        //   Some(nonempty)     -> resolve and allowlist
710        let (resolved_ips, virtual_etc_hosts) = match self.policy.net_allow_hosts.as_deref() {
711            None => (std::collections::HashSet::new(), None),
712            Some([]) => (
713                std::collections::HashSet::new(),
714                Some(String::new()),
715            ),
716            Some(hosts) => {
717                let resolved = network::resolve_hosts(hosts)
718                    .await
719                    .map_err(SandboxError::Io)?;
720                (resolved.ips, Some(resolved.etc_hosts))
721            }
722        };
723
724        // 5. Spawn HTTP ACL proxy if rules are configured
725        if !self.policy.http_allow.is_empty() || !self.policy.http_deny.is_empty() {
726            let handle = crate::http_acl::spawn_http_acl_proxy(
727                self.policy.http_allow.clone(),
728                self.policy.http_deny.clone(),
729                self.policy.https_ca.as_deref(),
730                self.policy.https_key.as_deref(),
731            ).await.map_err(SandboxError::Io)?;
732            self.http_acl_handle = Some(handle);
733        }
734
735        // 6. Create COW branch if requested
736        let cow_branch: Option<Box<dyn CowBranch>> = match self.policy.fs_isolation {
737            FsIsolation::OverlayFs => {
738                let workdir = self.policy.workdir.as_ref()
739                    .ok_or_else(|| SandlockError::Sandbox(SandboxError::Child("OverlayFs requires workdir".into())))?;
740                let storage = self.policy.fs_storage.as_ref()
741                    .cloned()
742                    .unwrap_or_else(|| std::env::temp_dir().join("sandlock-overlay"));
743                std::fs::create_dir_all(&storage)
744                    .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
745                let branch = OverlayBranch::create(workdir, &storage)
746                    .map_err(|e| SandlockError::Sandbox(SandboxError::Branch(e)))?;
747                Some(Box::new(branch))
748            }
749            FsIsolation::BranchFs => {
750                let workdir = self.policy.workdir.as_ref()
751                    .ok_or_else(|| SandlockError::Sandbox(SandboxError::Child("BranchFs requires workdir".into())))?;
752                let branch = BranchFsBranch::create(workdir)
753                    .map_err(|e| SandlockError::Sandbox(SandboxError::Branch(e)))?;
754                Some(Box::new(branch))
755            }
756            FsIsolation::None => None,
757        };
758
759        // Ask the backend for mount config (only OverlayFS needs one).
760        let cow_config = cow_branch.as_ref().and_then(|b| b.child_mount_config());
761
762        // 6. Create stdout/stderr capture pipes (if capture mode)
763        let (stdout_r, stderr_r) = if capture {
764            let mut stdout_fds = [0i32; 2];
765            let mut stderr_fds = [0i32; 2];
766            if unsafe { libc::pipe2(stdout_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
767                return Err(SandboxError::Io(std::io::Error::last_os_error()).into());
768            }
769            if unsafe { libc::pipe2(stderr_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
770                unsafe {
771                    libc::close(stdout_fds[0]);
772                    libc::close(stdout_fds[1]);
773                }
774                return Err(SandboxError::Io(std::io::Error::last_os_error()).into());
775            }
776            (
777                Some((
778                    unsafe { OwnedFd::from_raw_fd(stdout_fds[0]) },
779                    unsafe { OwnedFd::from_raw_fd(stdout_fds[1]) },
780                )),
781                Some((
782                    unsafe { OwnedFd::from_raw_fd(stderr_fds[0]) },
783                    unsafe { OwnedFd::from_raw_fd(stderr_fds[1]) },
784                )),
785            )
786        } else {
787            (None, None)
788        };
789
790        // 6. Fork
791        let pid = unsafe { libc::fork() };
792        if pid < 0 {
793            return Err(SandboxError::Fork(std::io::Error::last_os_error()).into());
794        }
795
796        if pid == 0 {
797            // ===== CHILD PROCESS =====
798            // Drop parent's pipe ends by leaking them (they are OwnedFd and would
799            // close the fd on drop, but we only want to close OUR ends).
800            // The child does not use notif_r or ready_w.
801            // We must forget them so that Drop doesn't close the raw fds that
802            // confine_child may still use.
803            //
804            // We use std::mem::forget on the read end of notif and write end of ready
805            // because confine_child uses notif_w and ready_r (via the PipePair reference).
806            // The parent's ends (notif_r, ready_w) need to be closed in the child.
807            // However, since PipePair owns all four fds and confine_child takes
808            // a reference to it, we pass the whole PipePair and let confine_child
809            // handle it. confine_child never returns.
810
811            // Apply io_overrides (from spawn_with_io / pipeline)
812            if let Some((stdin_fd, stdout_fd, stderr_fd)) = self.io_overrides {
813                if let Some(fd) = stdin_fd {
814                    unsafe { libc::dup2(fd, 0) };
815                }
816                if let Some(fd) = stdout_fd {
817                    unsafe { libc::dup2(fd, 1) };
818                }
819                if let Some(fd) = stderr_fd {
820                    unsafe { libc::dup2(fd, 2) };
821                }
822            }
823
824            // Apply extra fd mappings (from gather)
825            for &(target_fd, source_fd) in &self.extra_fds {
826                unsafe { libc::dup2(source_fd, target_fd) };
827            }
828
829            // Redirect stdout/stderr if capturing
830            if let Some((_, ref stdout_w)) = stdout_r {
831                unsafe { libc::dup2(stdout_w.as_raw_fd(), 1) };
832            }
833            if let Some((_, ref stderr_w)) = stderr_r {
834                unsafe { libc::dup2(stderr_w.as_raw_fd(), 2) };
835            }
836            // Drop capture pipe read ends in child (they belong to parent).
837            // The write ends will be closed by O_CLOEXEC on exec.
838            drop(stdout_r);
839            drop(stderr_r);
840
841            // Collect target fds from gather that must survive close_fds_above
842            let gather_keep_fds: Vec<i32> = self.extra_fds.iter().map(|&(target, _)| target).collect();
843
844            // This never returns.
845            context::confine_child(&self.policy, &c_cmd, &pipes, cow_config.as_ref(), nested, &gather_keep_fds);
846        }
847
848        // ===== PARENT PROCESS =====
849
850        // Store COW branch in parent
851        self.cow_branch = cow_branch;
852
853        // 7. Close child's pipe ends
854        drop(pipes.notif_w);
855        drop(pipes.ready_r);
856
857        // Drop capture pipe write ends in parent (they belong to child).
858        // Store the read ends so the child doesn't get SIGPIPE.
859        self._stdout_read = stdout_r.map(|(r, _w)| r);
860        self._stderr_read = stderr_r.map(|(r, _w)| r);
861
862        // 8. Set child_pid, state=Running
863        self.child_pid = Some(pid);
864        self.state = SandboxState::Running;
865
866        // 9. Open pidfd via syscall::pidfd_open
867        let pidfd = match syscall::pidfd_open(pid as u32, 0) {
868            Ok(fd) => Some(fd),
869            Err(_) => None, // pidfd not available on older kernels — proceed without
870        };
871
872        // 10. Read notif fd number from pipe (what child wrote)
873        //     0 = nested mode (no supervisor needed)
874        let notif_fd_num = read_u32_fd(pipes.notif_r.as_raw_fd())
875            .map_err(|e| SandboxError::Child(format!("read notif fd from child: {}", e)))?;
876
877        let is_nested = notif_fd_num == 0;
878
879        // 11. Copy notif fd from child (skip if nested)
880        let notif_fd = if is_nested {
881            None
882        } else if let Some(ref pfd) = pidfd {
883            Some(syscall::pidfd_getfd(pfd, notif_fd_num as i32, 0)
884                .map_err(|e| SandboxError::Child(format!("pidfd_getfd: {}", e)))?)
885        } else {
886            let path = format!("/proc/{}/fd/{}", pid, notif_fd_num);
887            let cpath = CString::new(path).unwrap();
888            let raw = unsafe { libc::open(cpath.as_ptr(), libc::O_RDWR) };
889            if raw < 0 {
890                return Err(
891                    SandboxError::Child("failed to open notif fd from /proc".into()).into(),
892                );
893            }
894            Some(unsafe { OwnedFd::from_raw_fd(raw) })
895        };
896
897        // 11b–14. Supervisor setup (skip in nested mode)
898        if let Some(notif_fd) = notif_fd {
899            // vDSO patching for determinism
900            if self.policy.time_start.is_some() || self.policy.random_seed.is_some() {
901                let time_offset = self.policy.time_start.map(|t| crate::time::calculate_time_offset(t));
902                if let Err(e) = crate::vdso::patch(pid, time_offset, self.policy.random_seed.is_some()) {
903                    eprintln!("sandlock: pre-exec vDSO patching failed (will retry after exec): {}", e);
904                }
905            }
906
907            // Build NotifPolicy
908            let time_offset_val = self.policy.time_start
909                .map(|t| crate::time::calculate_time_offset(t))
910                .unwrap_or(0);
911
912            let notif_policy = NotifPolicy {
913                max_memory_bytes: self.policy.max_memory.map(|m| m.0).unwrap_or(0),
914                max_processes: self.policy.max_processes,
915                has_memory_limit: self.policy.max_memory.is_some(),
916                has_net_allowlist: self.policy.net_allow_hosts.is_some()
917                    || self.policy.policy_fn.is_some()
918                    || !self.policy.http_allow.is_empty()
919                    || !self.policy.http_deny.is_empty(),
920                has_random_seed: self.policy.random_seed.is_some(),
921                has_time_start: self.policy.time_start.is_some(),
922                time_offset: time_offset_val,
923                num_cpus: self.policy.num_cpus,
924                port_remap: self.policy.port_remap,
925                cow_enabled: self.policy.workdir.is_some() && self.policy.fs_isolation == FsIsolation::None,
926                chroot_root: self.policy.chroot.as_ref().and_then(|p| std::fs::canonicalize(p).ok()),
927                chroot_readable: self.policy.fs_readable.clone(),
928                chroot_writable: self.policy.fs_writable.clone(),
929                chroot_denied: self.policy.fs_denied.clone(),
930                chroot_mounts: self.policy.fs_mount.iter().map(|(vp, hp)| {
931                    (vp.clone(), std::fs::canonicalize(hp).unwrap_or_else(|_| hp.clone()))
932                }).collect(),
933                deterministic_dirs: self.policy.deterministic_dirs,
934                hostname: self.policy.hostname.clone(),
935                has_http_acl: !self.policy.http_allow.is_empty() || !self.policy.http_deny.is_empty(),
936                virtual_etc_hosts,
937            };
938
939            // Create domain states
940            use rand::SeedableRng;
941            use rand_chacha::ChaCha8Rng;
942
943            let random_state = self.policy.random_seed.map(|seed| ChaCha8Rng::seed_from_u64(seed));
944            let time_offset = self.policy.time_start.map(|t| crate::time::calculate_time_offset(t));
945
946            // TimeRandomState
947            let time_random_state = TimeRandomState::new(time_offset, random_state);
948
949            // NetworkState
950            let mut net_state = NetworkState::new();
951            net_state.network_policy = if self.policy.net_allow_hosts.is_some() {
952                crate::seccomp::notif::NetworkPolicy::AllowList(resolved_ips)
953            } else {
954                crate::seccomp::notif::NetworkPolicy::Unrestricted
955            };
956            net_state.http_acl_addr = self.http_acl_handle.as_ref().map(|h| h.addr);
957            net_state.http_acl_ports = self.policy.http_ports.iter().copied().collect();
958            net_state.http_acl_orig_dest = self.http_acl_handle.as_ref().map(|h| h.orig_dest.clone());
959            if let Some(cb) = self.on_bind.take() {
960                net_state.port_map.on_bind = Some(cb);
961            }
962
963            // ProcfsState (sandbox membership lives in ProcessIndex now).
964            let procfs_state = ProcfsState::new();
965
966            // ResourceState
967            let mut res_state = ResourceState::new(
968                notif_policy.max_memory_bytes,
969                notif_policy.max_processes,
970            );
971            res_state.proc_count = 1;
972
973            // CowState
974            let mut cow_state = CowState::new();
975            if self.policy.workdir.is_some() && self.policy.fs_isolation == FsIsolation::None {
976                let workdir = self.policy.workdir.as_ref().unwrap();
977                let storage = self.policy.fs_storage.as_deref();
978                let max_disk = self.policy.max_disk.map(|b| b.0).unwrap_or(0);
979                match crate::cow::seccomp::SeccompCowBranch::create(workdir, storage, max_disk) {
980                    Ok(branch) => { cow_state.branch = Some(branch); }
981                    Err(e) => { eprintln!("sandlock: seccomp COW branch creation failed: {}", e); }
982                }
983            }
984
985            // PolicyFnState
986            let mut policy_fn_state = PolicyFnState::new();
987
988            if let Ok(mut denied) = policy_fn_state.denied_paths.write() {
989                for path in &self.policy.fs_denied {
990                    denied.insert(path.to_string_lossy().into_owned());
991                }
992            }
993
994            if let Some(ref callback) = self.policy.policy_fn {
995                let live = crate::policy_fn::LivePolicy {
996                    allowed_ips: match &net_state.network_policy {
997                        crate::seccomp::notif::NetworkPolicy::AllowList(ips) => ips.clone(),
998                        crate::seccomp::notif::NetworkPolicy::Unrestricted => std::collections::HashSet::new(),
999                    },
1000                    max_memory_bytes: notif_policy.max_memory_bytes,
1001                    max_processes: notif_policy.max_processes,
1002                };
1003                let ceiling = live.clone();
1004                let live = std::sync::Arc::new(std::sync::RwLock::new(live));
1005                let denied_paths = policy_fn_state.denied_paths.clone();
1006                let pid_overrides = net_state.pid_ip_overrides.clone();
1007                policy_fn_state.live_policy = Some(live.clone());
1008                let tx = crate::policy_fn::spawn_policy_fn(
1009                    callback.clone(), live, ceiling, pid_overrides, denied_paths,
1010                );
1011                policy_fn_state.event_tx = Some(tx);
1012            }
1013
1014            // ChrootState
1015            let chroot_state = ChrootState::new();
1016
1017            use std::os::unix::io::AsRawFd;
1018            let notif_raw_fd = notif_fd.as_raw_fd();
1019            let child_pidfd_raw = pidfd.as_ref().map(|pfd| pfd.as_raw_fd());
1020
1021            let res_state = Arc::new(Mutex::new(res_state));
1022            self.supervisor_resource = Some(Arc::clone(&res_state));
1023
1024            let cow_state = Arc::new(Mutex::new(cow_state));
1025            self.supervisor_cow = Some(Arc::clone(&cow_state));
1026
1027            let net_state = Arc::new(Mutex::new(net_state));
1028            self.supervisor_network = Some(Arc::clone(&net_state));
1029
1030            let procfs_state = Arc::new(Mutex::new(procfs_state));
1031            let time_random_state = Arc::new(Mutex::new(time_random_state));
1032            let policy_fn_state = Arc::new(Mutex::new(policy_fn_state));
1033            let chroot_state = Arc::new(Mutex::new(chroot_state));
1034            // Root child is registered (with watcher) on its first
1035            // notification, the same path grandchildren take.
1036            let processes = Arc::new(crate::seccomp::state::ProcessIndex::new());
1037
1038            let ctx = Arc::new(SupervisorCtx {
1039                resource: Arc::clone(&res_state),
1040                cow: Arc::clone(&cow_state),
1041                procfs: Arc::clone(&procfs_state),
1042                network: Arc::clone(&net_state),
1043                time_random: Arc::clone(&time_random_state),
1044                policy_fn: Arc::clone(&policy_fn_state),
1045                chroot: Arc::clone(&chroot_state),
1046                netlink: Arc::new(crate::netlink::NetlinkState::new()),
1047                processes: Arc::clone(&processes),
1048                policy: Arc::new(notif_policy),
1049                child_pidfd: child_pidfd_raw,
1050                notif_fd: notif_raw_fd,
1051            });
1052
1053            // Spawn notif supervisor
1054            self.notif_handle = Some(tokio::spawn(
1055                notif::supervisor(notif_fd, ctx),
1056            ));
1057
1058            // Spawn load average sampling task (every 5s, like the kernel)
1059            let la_resource = Arc::clone(&res_state);
1060            self.loadavg_handle = Some(tokio::spawn(async move {
1061                let mut interval = tokio::time::interval(Duration::from_secs(5));
1062                interval.tick().await; // skip immediate first tick
1063                loop {
1064                    interval.tick().await;
1065                    let mut rs = la_resource.lock().await;
1066                    let running = rs.proc_count;
1067                    rs.load_avg.sample(running);
1068                }
1069            }));
1070        }
1071
1072        // 15. Optionally spawn CPU throttle task
1073        if let Some(cpu_pct) = self.policy.max_cpu {
1074            if cpu_pct < 100 {
1075                let child_pid = pid;
1076                self.throttle_handle = Some(tokio::spawn(throttle_cpu(child_pid, cpu_pct)));
1077            }
1078        }
1079
1080        // 16. Signal child "ready" via pipe
1081        write_u32_fd(pipes.ready_w.as_raw_fd(), 1)
1082            .map_err(|e| SandboxError::Child(format!("write ready signal: {}", e)))?;
1083
1084        // 17. Store pidfd
1085        self.pidfd = pidfd;
1086
1087        Ok(())
1088    }
1089}
1090
1091// ============================================================
1092// Drop — kill and reap child if still running
1093// ============================================================
1094
1095impl Drop for Sandbox {
1096    fn drop(&mut self) {
1097        if let Some(pid) = self.child_pid {
1098            if matches!(self.state, SandboxState::Running | SandboxState::Paused) {
1099                // Kill the entire process group
1100                unsafe { libc::killpg(pid, libc::SIGKILL) };
1101                // Reap the zombie
1102                let mut status: i32 = 0;
1103                unsafe { libc::waitpid(pid, &mut status, 0) };
1104            }
1105        }
1106
1107        if let Some(h) = self.notif_handle.take() {
1108            h.abort();
1109        }
1110        if let Some(h) = self.throttle_handle.take() {
1111            h.abort();
1112        }
1113        if let Some(h) = self.loadavg_handle.take() {
1114            h.abort();
1115        }
1116
1117        // COW cleanup based on exit status.
1118        // Determine action once, then apply to whichever branch exists.
1119        let is_error = matches!(
1120            self.state,
1121            SandboxState::Stopped(ref s) if !matches!(s, ExitStatus::Code(0))
1122        );
1123        let action = if is_error {
1124            &self.policy.on_error
1125        } else {
1126            &self.policy.on_exit
1127        };
1128
1129        // OverlayFS / BranchFS COW branch
1130        if let Some(ref branch) = self.cow_branch {
1131            match action {
1132                BranchAction::Commit => { let _ = branch.commit(); }
1133                BranchAction::Abort => { let _ = branch.abort(); }
1134                BranchAction::Keep => {}
1135            }
1136        }
1137
1138        // Seccomp COW branch (extracted from supervisor state in wait())
1139        if let Some(ref mut cow) = self.seccomp_cow {
1140            match action {
1141                BranchAction::Commit => { let _ = cow.commit(); }
1142                BranchAction::Abort => { let _ = cow.abort(); }
1143                BranchAction::Keep => {}
1144            }
1145        }
1146    }
1147}
1148
1149// ============================================================
1150// CPU throttle
1151// ============================================================
1152
1153/// Periodically SIGSTOP/SIGCONT the child process group to throttle CPU usage.
1154async fn throttle_cpu(pid: i32, cpu_pct: u8) {
1155    let period = Duration::from_millis(100);
1156    let run_time = period * cpu_pct as u32 / 100;
1157    let stop_time = period - run_time;
1158
1159    loop {
1160        tokio::time::sleep(run_time).await;
1161        if unsafe { libc::killpg(pid, libc::SIGSTOP) } < 0 {
1162            break;
1163        }
1164        tokio::time::sleep(stop_time).await;
1165        if unsafe { libc::killpg(pid, libc::SIGCONT) } < 0 {
1166            break;
1167        }
1168    }
1169}
1170
1171// ============================================================
1172// Helpers
1173// ============================================================
1174
1175/// Convert a raw waitpid status to our ExitStatus enum.
1176/// Read all bytes from a file descriptor until EOF.
1177/// Read exactly `buf.len()` bytes from a raw fd.
1178fn read_exact(fd: i32, buf: &mut [u8]) {
1179    let mut off = 0;
1180    while off < buf.len() {
1181        let r = unsafe { libc::read(fd, buf[off..].as_mut_ptr() as *mut _, buf.len() - off) };
1182        if r <= 0 { break; }
1183        off += r as usize;
1184    }
1185}
1186
1187fn read_fd_to_end(fd: OwnedFd) -> Vec<u8> {
1188    use std::io::Read;
1189    let mut file = unsafe { std::fs::File::from_raw_fd(fd.into_raw_fd()) };
1190    let mut buf = Vec::new();
1191    let _ = file.read_to_end(&mut buf);
1192    buf
1193}
1194
1195fn wait_status_to_exit(status: i32) -> ExitStatus {
1196    if libc::WIFEXITED(status) {
1197        ExitStatus::Code(libc::WEXITSTATUS(status))
1198    } else if libc::WIFSIGNALED(status) {
1199        let sig = libc::WTERMSIG(status);
1200        if sig == libc::SIGKILL {
1201            ExitStatus::Killed
1202        } else {
1203            ExitStatus::Signal(sig)
1204        }
1205    } else {
1206        ExitStatus::Killed
1207    }
1208}