Skip to main content

sandlock_core/
sandbox.rs

1use std::collections::HashMap;
2use std::os::fd::AsRawFd;
3use std::path::PathBuf;
4use std::sync::Arc;
5use std::time::SystemTime;
6
7use serde::{Deserialize, Serialize};
8use tokio::task::JoinHandle;
9
10use crate::context;
11use crate::error::SandboxError;
12pub use crate::http::{http_acl_check, normalize_path, prefix_or_exact_match, HttpRule};
13pub use crate::network::{NetAllow, Protocol};
14
15/// A byte size value.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
17pub struct ByteSize(pub u64);
18
19impl ByteSize {
20    pub fn bytes(n: u64) -> Self {
21        ByteSize(n)
22    }
23
24    pub fn kib(n: u64) -> Self {
25        ByteSize(n * 1024)
26    }
27
28    pub fn mib(n: u64) -> Self {
29        ByteSize(n * 1024 * 1024)
30    }
31
32    pub fn gib(n: u64) -> Self {
33        ByteSize(n * 1024 * 1024 * 1024)
34    }
35
36    pub fn parse(s: &str) -> Result<Self, SandboxError> {
37        let s = s.trim();
38        if s.is_empty() {
39            return Err(SandboxError::Invalid("empty byte size string".into()));
40        }
41
42        // Check for suffix
43        let last = s.chars().last().unwrap();
44        if last.is_ascii_alphabetic() {
45            let (num_str, suffix) = s.split_at(s.len() - 1);
46            let n: u64 = num_str
47                .trim()
48                .parse()
49                .map_err(|_| SandboxError::Invalid(format!("invalid byte size: {}", s)))?;
50            match suffix.to_ascii_uppercase().as_str() {
51                "K" => Ok(ByteSize::kib(n)),
52                "M" => Ok(ByteSize::mib(n)),
53                "G" => Ok(ByteSize::gib(n)),
54                other => Err(SandboxError::Invalid(format!("unknown byte size suffix: {}", other))),
55            }
56        } else {
57            let n: u64 = s
58                .parse()
59                .map_err(|_| SandboxError::Invalid(format!("invalid byte size: {}", s)))?;
60            Ok(ByteSize(n))
61        }
62    }
63}
64
65/// Confinement for confining the current process in place.
66#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
67pub struct Confinement {
68    pub fs_writable: Vec<PathBuf>,
69    pub fs_readable: Vec<PathBuf>,
70}
71
72impl Confinement {
73    pub fn builder() -> ConfinementBuilder {
74        ConfinementBuilder::default()
75    }
76}
77
78#[derive(Default)]
79pub struct ConfinementBuilder {
80    fs_writable: Vec<PathBuf>,
81    fs_readable: Vec<PathBuf>,
82}
83
84impl ConfinementBuilder {
85    pub fn fs_write(mut self, path: impl Into<PathBuf>) -> Self {
86        self.fs_writable.push(path.into());
87        self
88    }
89
90    pub fn fs_read(mut self, path: impl Into<PathBuf>) -> Self {
91        self.fs_readable.push(path.into());
92        self
93    }
94
95    pub fn build(self) -> Confinement {
96        Confinement {
97            fs_writable: self.fs_writable,
98            fs_readable: self.fs_readable,
99        }
100    }
101}
102
103impl TryFrom<&Sandbox> for Confinement {
104    type Error = SandboxError;
105
106    fn try_from(sandbox: &Sandbox) -> Result<Self, Self::Error> {
107        let mut unsupported = Vec::new();
108        if !sandbox.fs_denied.is_empty() { unsupported.push("fs_denied"); }
109        if !sandbox.extra_deny_syscalls.is_empty() { unsupported.push("extra_deny_syscalls"); }
110        if !sandbox.net_allow.is_empty() { unsupported.push("net_allow"); }
111        if !sandbox.net_bind.is_empty() { unsupported.push("net_bind"); }
112        if sandbox.allows_sysv_ipc() { unsupported.push("extra_allow_syscalls=[\"sysv_ipc\"]"); }
113        if !sandbox.http_allow.is_empty() { unsupported.push("http_allow"); }
114        if !sandbox.http_deny.is_empty() { unsupported.push("http_deny"); }
115        if !sandbox.http_ports.is_empty() { unsupported.push("http_ports"); }
116        if sandbox.http_ca.is_some() { unsupported.push("http_ca"); }
117        if sandbox.http_key.is_some() { unsupported.push("http_key"); }
118        if sandbox.max_memory.is_some() { unsupported.push("max_memory"); }
119        if sandbox.max_processes != 64 { unsupported.push("max_processes"); }
120        if sandbox.max_open_files.is_some() { unsupported.push("max_open_files"); }
121        if sandbox.max_cpu.is_some() { unsupported.push("max_cpu"); }
122        if sandbox.random_seed.is_some() { unsupported.push("random_seed"); }
123        if sandbox.time_start.is_some() { unsupported.push("time_start"); }
124        if sandbox.no_randomize_memory { unsupported.push("no_randomize_memory"); }
125        if sandbox.no_huge_pages { unsupported.push("no_huge_pages"); }
126        if sandbox.no_coredump { unsupported.push("no_coredump"); }
127        if sandbox.deterministic_dirs { unsupported.push("deterministic_dirs"); }
128        if sandbox.fs_isolation != FsIsolation::None { unsupported.push("fs_isolation"); }
129        if sandbox.workdir.is_some() { unsupported.push("workdir"); }
130        if sandbox.cwd.is_some() { unsupported.push("cwd"); }
131        if sandbox.fs_storage.is_some() { unsupported.push("fs_storage"); }
132        if sandbox.max_disk.is_some() { unsupported.push("max_disk"); }
133        if sandbox.on_exit != BranchAction::Commit { unsupported.push("on_exit"); }
134        if sandbox.on_error != BranchAction::Abort { unsupported.push("on_error"); }
135        if !sandbox.fs_mount.is_empty() { unsupported.push("fs_mount"); }
136        if sandbox.chroot.is_some() { unsupported.push("chroot"); }
137        if sandbox.clean_env { unsupported.push("clean_env"); }
138        if !sandbox.env.is_empty() { unsupported.push("env"); }
139        if sandbox.gpu_devices.is_some() { unsupported.push("gpu_devices"); }
140        if sandbox.cpu_cores.is_some() { unsupported.push("cpu_cores"); }
141        if sandbox.num_cpus.is_some() { unsupported.push("num_cpus"); }
142        if sandbox.port_remap { unsupported.push("port_remap"); }
143        if sandbox.uid.is_some() { unsupported.push("uid"); }
144        if sandbox.policy_fn.is_some() { unsupported.push("policy_fn"); }
145
146        if !unsupported.is_empty() {
147            return Err(SandboxError::UnsupportedForConfine(unsupported.join(", ")));
148        }
149
150        Ok(Self {
151            fs_writable: sandbox.fs_writable.clone(),
152            fs_readable: sandbox.fs_readable.clone(),
153        })
154    }
155}
156
157/// Filesystem isolation mode.
158#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
159pub enum FsIsolation {
160    #[default]
161    None,
162    OverlayFs,
163    BranchFs,
164}
165
166/// Action to take on branch exit.
167#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
168pub enum BranchAction {
169    #[default]
170    Commit,
171    Abort,
172    Keep,
173}
174
175// ============================================================
176// Runtime — private heap-allocated state, present only while running
177// ============================================================
178
179/// Private runtime state.  Only allocated after `start()` / `run()` is
180/// called; `None` for config-only `Sandbox` instances.
181struct Runtime {
182    name: String,
183    state: RuntimeState,
184    child_pid: Option<i32>,
185    pidfd: Option<std::os::fd::OwnedFd>,
186    notif_handle: Option<JoinHandle<()>>,
187    throttle_handle: Option<JoinHandle<()>>,
188    loadavg_handle: Option<JoinHandle<()>>,
189    _stdout_read: Option<std::os::fd::OwnedFd>,
190    _stderr_read: Option<std::os::fd::OwnedFd>,
191    cow_branch: Option<Box<dyn crate::cow::CowBranch>>,
192    seccomp_cow: Option<crate::cow::seccomp::SeccompCowBranch>,
193    supervisor_resource: Option<Arc<tokio::sync::Mutex<crate::seccomp::state::ResourceState>>>,
194    supervisor_cow: Option<Arc<tokio::sync::Mutex<crate::seccomp::state::CowState>>>,
195    supervisor_network: Option<Arc<tokio::sync::Mutex<crate::seccomp::state::NetworkState>>>,
196    ctrl_fd: Option<std::os::fd::OwnedFd>,
197    stdout_pipe: Option<std::os::fd::OwnedFd>,
198    io_overrides: Option<(Option<i32>, Option<i32>, Option<i32>)>,
199    extra_fds: Vec<(i32, i32)>,
200    http_acl_handle: Option<crate::http_acl::HttpAclProxyHandle>,
201    #[allow(clippy::type_complexity)]
202    on_bind: Option<Box<dyn Fn(&HashMap<u16, u16>) + Send + Sync>>,
203    handlers: Vec<(i64, Arc<dyn crate::seccomp::dispatch::Handler>)>,
204    ready_w: Option<std::os::fd::OwnedFd>,
205}
206
207/// Lifecycle state for the runtime.
208enum RuntimeState {
209    Created,
210    Running,
211    Paused,
212    Stopped(crate::result::ExitStatus),
213}
214
215/// Sandbox configuration.
216#[derive(Serialize, Deserialize)]
217pub struct Sandbox {
218    // Filesystem access
219    pub fs_writable: Vec<PathBuf>,
220    pub fs_readable: Vec<PathBuf>,
221    pub fs_denied: Vec<PathBuf>,
222
223    // Extra syscall filtering on top of Sandlock's default blocklist.
224    pub extra_deny_syscalls: Vec<String>,
225    pub extra_allow_syscalls: Vec<String>,
226
227    // Network
228    /// Outbound endpoint allowlist as a list of `(protocol, host?, ports)`
229    /// rules. Each rule names a protocol (TCP/UDP/ICMP) and either a
230    /// concrete host or "any IP." TCP and UDP rules carry ports; ICMP
231    /// rules have none.
232    ///
233    /// **Protocol gating falls out of rule presence.** Sandlock denies
234    /// UDP and ICMP socket creation by default; opting in is "list at
235    /// least one rule for that protocol" (e.g. `udp://*:*` for any UDP,
236    /// `icmp://*` for any ICMP echo). TCP is always permitted.
237    ///
238    /// Empty `net_allow` and empty `http_allow`/`http_deny` together
239    /// mean "deny all outbound" (Landlock direct path denies, no
240    /// on-behalf path is enabled). Otherwise, the on-behalf path
241    /// enforces these rules: a destination is permitted iff any rule
242    /// matches the protocol, destination IP (or has `host: None` = any
243    /// IP), and destination port (N/A for ICMP).
244    ///
245    /// HTTP rules with concrete hosts auto-add a matching
246    /// `(Tcp, host, [80])` (and `(Tcp, host, [443])` when `--http-ca`
247    /// is set) entry at build time so the proxy's intercept ports
248    /// remain reachable. HTTP rules with wildcard hosts auto-add
249    /// `(Tcp, None, [80])` instead.
250    pub net_allow: Vec<NetAllow>,
251    pub net_bind: Vec<u16>,
252    // HTTP ACL
253    pub http_allow: Vec<HttpRule>,
254    pub http_deny: Vec<HttpRule>,
255    /// TCP ports to intercept for HTTP ACL. Defaults to [80] (plus 443 when
256    /// http_ca is set). Override with `http_ports` to intercept custom ports.
257    pub http_ports: Vec<u16>,
258    /// PEM CA cert for HTTPS MITM. When set, port 443 is also intercepted.
259    pub http_ca: Option<PathBuf>,
260    /// PEM CA key for HTTPS MITM. Required when http_ca is set.
261    pub http_key: Option<PathBuf>,
262
263    // Resource limits
264    pub max_memory: Option<ByteSize>,
265    pub max_processes: u32,
266    pub max_open_files: Option<u32>,
267    pub max_cpu: Option<u8>,
268
269    // Reproducibility
270    pub random_seed: Option<u64>,
271    pub time_start: Option<SystemTime>,
272    pub no_randomize_memory: bool,
273    pub no_huge_pages: bool,
274    pub no_coredump: bool,
275    pub deterministic_dirs: bool,
276
277    // Filesystem branch
278    pub fs_isolation: FsIsolation,
279    pub workdir: Option<PathBuf>,
280    pub cwd: Option<PathBuf>,
281    pub fs_storage: Option<PathBuf>,
282    pub max_disk: Option<ByteSize>,
283    pub on_exit: BranchAction,
284    pub on_error: BranchAction,
285
286    // Mount mappings: (virtual_path_inside_chroot, host_path_on_disk)
287    pub fs_mount: Vec<(PathBuf, PathBuf)>,
288
289    // Environment
290    pub chroot: Option<PathBuf>,
291    pub clean_env: bool,
292    pub env: HashMap<String, String>,
293    // Devices
294    pub gpu_devices: Option<Vec<u32>>,
295
296    // CPU
297    pub cpu_cores: Option<Vec<u32>>,
298    pub num_cpus: Option<u32>,
299    pub port_remap: bool,
300
301    // User namespace
302    pub uid: Option<u32>,
303
304    // Dynamic policy callback
305    #[serde(skip)]
306    pub policy_fn: Option<crate::policy_fn::PolicyCallback>,
307
308    // Sandbox instance name (exposed as virtual hostname; auto-generated if None).
309    // Not serialized — instance names are set at runtime, not in the policy file.
310    #[serde(skip)]
311    pub name: Option<String>,
312
313    // COW fork init function — runs once in the child before COW cloning.
314    // Not serialized; not cloned (FnOnce can't be cloned — drops to None on clone).
315    #[serde(skip)]
316    init_fn: Option<Box<dyn FnOnce() + Send + 'static>>,
317
318    // COW fork work function — runs in each COW clone.
319    // Not serialized; cloned via Arc (cheap).
320    #[serde(skip)]
321    work_fn: Option<Arc<dyn Fn(u32) + Send + Sync + 'static>>,
322
323    // Heap-allocated runtime state; `None` when not started.
324    #[serde(skip)]
325    runtime: Option<Box<Runtime>>,
326}
327
328impl std::fmt::Debug for Sandbox {
329    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
330        f.debug_struct("Sandbox")
331            .field("fs_readable", &self.fs_readable)
332            .field("fs_writable", &self.fs_writable)
333            .field("max_memory", &self.max_memory)
334            .field("max_processes", &self.max_processes)
335            .field("policy_fn", &self.policy_fn.as_ref().map(|_| "<callback>"))
336            .field("name", &self.name)
337            .field("runtime", &self.runtime.as_ref().map(|_| "<runtime>"))
338            .finish_non_exhaustive()
339    }
340}
341
342impl Clone for Sandbox {
343    /// Clone a `Sandbox` — config and runtime-kwargs fields are cloned; the
344    /// runtime state is not (the clone starts with `runtime: None`).
345    ///
346    /// Field clone semantics:
347    /// - `policy_fn` — Arc bump (cheap).
348    /// - `work_fn`   — Arc bump (cheap); multiple Sandboxes share the closure.
349    /// - `init_fn`   — **dropped to `None`** (FnOnce can't be cloned). If the
350    ///   clone also needs an init function, call `.init_fn(...)` on it
351    ///   separately or set it via `SandboxBuilder::init_fn`.
352    /// - `runtime`   — always `None`; the clone is a fresh, un-started Sandbox.
353    fn clone(&self) -> Self {
354        Self {
355            fs_writable: self.fs_writable.clone(),
356            fs_readable: self.fs_readable.clone(),
357            fs_denied: self.fs_denied.clone(),
358            extra_deny_syscalls: self.extra_deny_syscalls.clone(),
359            extra_allow_syscalls: self.extra_allow_syscalls.clone(),
360            net_allow: self.net_allow.clone(),
361            net_bind: self.net_bind.clone(),
362            http_allow: self.http_allow.clone(),
363            http_deny: self.http_deny.clone(),
364            http_ports: self.http_ports.clone(),
365            http_ca: self.http_ca.clone(),
366            http_key: self.http_key.clone(),
367            max_memory: self.max_memory,
368            max_processes: self.max_processes,
369            max_open_files: self.max_open_files,
370            max_cpu: self.max_cpu,
371            random_seed: self.random_seed,
372            time_start: self.time_start,
373            no_randomize_memory: self.no_randomize_memory,
374            no_huge_pages: self.no_huge_pages,
375            no_coredump: self.no_coredump,
376            deterministic_dirs: self.deterministic_dirs,
377            fs_isolation: self.fs_isolation.clone(),
378            workdir: self.workdir.clone(),
379            cwd: self.cwd.clone(),
380            fs_storage: self.fs_storage.clone(),
381            max_disk: self.max_disk,
382            on_exit: self.on_exit.clone(),
383            on_error: self.on_error.clone(),
384            fs_mount: self.fs_mount.clone(),
385            chroot: self.chroot.clone(),
386            clean_env: self.clean_env,
387            env: self.env.clone(),
388            gpu_devices: self.gpu_devices.clone(),
389            cpu_cores: self.cpu_cores.clone(),
390            num_cpus: self.num_cpus,
391            port_remap: self.port_remap,
392            uid: self.uid,
393            policy_fn: self.policy_fn.clone(),
394            name: self.name.clone(),
395            // init_fn (FnOnce) cannot be cloned — the clone gets None.
396            // If the clone also needs an init function, set it explicitly.
397            init_fn: None,
398            // work_fn is Arc-wrapped — clone bumps the reference count.
399            work_fn: self.work_fn.clone(),
400            // Runtime is NOT cloned — the clone starts with no runtime.
401            runtime: None,
402        }
403    }
404}
405
406impl Sandbox {
407    pub fn builder() -> SandboxBuilder {
408        SandboxBuilder::default()
409    }
410
411    /// Returns true iff the policy grants the `sysv_ipc` syscall group.
412    pub fn allows_sysv_ipc(&self) -> bool {
413        self.extra_allow_syscalls.iter().any(|s| s == "sysv_ipc")
414    }
415
416    /// Validate cross-section invariants — checks that span multiple fields.
417    ///
418    /// Currently:
419    /// - `fs_isolation != "none"` requires `workdir` to be set.
420    ///
421    /// Idempotent: calling repeatedly is safe.
422    pub fn validate(&self) -> Result<(), SandboxError> {
423        if self.fs_isolation != FsIsolation::None && self.workdir.is_none() {
424            return Err(SandboxError::FsIsolationRequiresWorkdir);
425        }
426        Ok(())
427    }
428
429    // ================================================================
430    // Runtime accessor helpers (private)
431    // ================================================================
432
433    fn rt(&self) -> &Runtime {
434        self.runtime.as_ref().expect("sandbox not started")
435    }
436
437    fn rt_mut(&mut self) -> &mut Runtime {
438        self.runtime.as_mut().expect("sandbox not started")
439    }
440
441    // ================================================================
442    // Runtime lifecycle API (public)
443    // ================================================================
444
445    /// Set the sandbox instance name (also exposed as the virtual hostname).
446    /// Auto-generated if not set.
447    pub fn set_name(&mut self, name: impl Into<String>) {
448        self.name = Some(name.into());
449    }
450
451    /// Set the sandbox instance name and return `self`. Convenience for
452    /// pipeline fan-out where a base config is cloned and each clone gets a
453    /// fresh name:
454    ///
455    /// ```ignore
456    /// let template = Sandbox::builder()...build()?;
457    /// let mut s1 = template.clone().with_name("worker-1");
458    /// let mut s2 = template.clone().with_name("worker-2");
459    /// ```
460    pub fn with_name(mut self, name: impl Into<String>) -> Self {
461        self.name = Some(name.into());
462        self
463    }
464
465    /// Set the COW-fork init function and return `self`.
466    ///
467    /// The init function runs once in the child process before any COW clones
468    /// are created. Use it to load expensive shared state.
469    pub fn with_init_fn(mut self, f: impl FnOnce() + Send + 'static) -> Self {
470        self.init_fn = Some(Box::new(f));
471        self
472    }
473
474    /// Set the COW-fork work function and return `self`.
475    ///
476    /// The work function runs in each COW clone (`fork(N)` produces N clones).
477    pub fn with_work_fn(mut self, f: impl Fn(u32) + Send + Sync + 'static) -> Self {
478        self.work_fn = Some(Arc::new(f));
479        self
480    }
481
482    /// Return the sandbox name if set, or `None` if not yet started.
483    pub fn instance_name(&self) -> Option<&str> {
484        self.runtime.as_ref().map(|r| r.name.as_str())
485            .or_else(|| self.name.as_deref())
486    }
487
488    /// Return the child PID if spawned.
489    pub fn pid(&self) -> Option<i32> {
490        self.runtime.as_ref().and_then(|r| r.child_pid)
491    }
492
493    /// Return whether the child is currently running or paused.
494    pub fn is_running(&self) -> bool {
495        self.runtime.as_ref().map(|r| {
496            matches!(r.state, RuntimeState::Running | RuntimeState::Paused)
497        }).unwrap_or(false)
498    }
499
500    /// Send SIGSTOP to the child's process group.
501    pub fn pause(&mut self) -> Result<(), crate::error::SandlockError> {
502        use crate::error::SandboxRuntimeError;
503        let pid = self.runtime.as_ref()
504            .and_then(|rt| rt.child_pid)
505            .ok_or(SandboxRuntimeError::NotRunning)?;
506        let ret = unsafe { libc::killpg(pid, libc::SIGSTOP) };
507        if ret < 0 {
508            return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
509        }
510        self.rt_mut().state = RuntimeState::Paused;
511        Ok(())
512    }
513
514    /// Send SIGCONT to the child's process group.
515    pub fn resume(&mut self) -> Result<(), crate::error::SandlockError> {
516        use crate::error::SandboxRuntimeError;
517        let pid = self.runtime.as_ref()
518            .and_then(|rt| rt.child_pid)
519            .ok_or(SandboxRuntimeError::NotRunning)?;
520        let ret = unsafe { libc::killpg(pid, libc::SIGCONT) };
521        if ret < 0 {
522            return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
523        }
524        self.rt_mut().state = RuntimeState::Running;
525        Ok(())
526    }
527
528    /// Send SIGKILL to the child's process group.
529    pub fn kill(&mut self) -> Result<(), crate::error::SandlockError> {
530        use crate::error::SandboxRuntimeError;
531        let pid = self.runtime.as_ref()
532            .and_then(|rt| rt.child_pid)
533            .ok_or(SandboxRuntimeError::NotRunning)?;
534        let ret = unsafe { libc::killpg(pid, libc::SIGKILL) };
535        if ret < 0 {
536            let err = std::io::Error::last_os_error();
537            if err.raw_os_error() != Some(libc::ESRCH) {
538                return Err(SandboxRuntimeError::Io(err).into());
539            }
540        }
541        Ok(())
542    }
543
544    /// Set a callback invoked whenever a port bind is recorded.
545    pub fn set_on_bind(&mut self, cb: impl Fn(&HashMap<u16, u16>) + Send + Sync + 'static) {
546        // Ensure runtime exists so we have somewhere to store the callback.
547        // In practice, set_on_bind is always called before spawn.
548        let _ = self.ensure_runtime();
549        self.rt_mut().on_bind = Some(Box::new(cb));
550    }
551
552    /// Return the current virtual-to-real port mappings.
553    pub async fn port_mappings(&self) -> HashMap<u16, u16> {
554        if let Some(ref rt) = self.runtime {
555            if let Some(ref net) = rt.supervisor_network {
556                let ns = net.lock().await;
557                return ns.port_map.virtual_to_real.clone();
558            }
559        }
560        HashMap::new()
561    }
562
563    /// Wait for the child process to exit.
564    pub async fn wait(&mut self) -> Result<crate::result::RunResult, crate::error::SandlockError> {
565        use crate::error::SandboxRuntimeError;
566        use crate::result::{ExitStatus, RunResult};
567
568        let pid = self.rt().child_pid.ok_or(SandboxRuntimeError::NotRunning)?;
569
570        if let RuntimeState::Stopped(ref es) = self.rt().state {
571            return Ok(RunResult {
572                exit_status: es.clone(),
573                stdout: None,
574                stderr: None,
575            });
576        }
577
578        let exit_status = tokio::task::spawn_blocking(move || -> ExitStatus {
579            let mut status: i32 = 0;
580            loop {
581                let ret = unsafe { libc::waitpid(pid, &mut status, 0) };
582                if ret < 0 {
583                    let err = std::io::Error::last_os_error();
584                    if err.raw_os_error() == Some(libc::EINTR) {
585                        continue;
586                    }
587                    return ExitStatus::Killed;
588                }
589                break;
590            }
591            sandbox_wait_status_to_exit(status)
592        })
593        .await
594        .unwrap_or(ExitStatus::Killed);
595
596        self.rt_mut().state = RuntimeState::Stopped(exit_status.clone());
597
598        let rt = self.rt_mut();
599        if let Some(h) = rt.notif_handle.take() { h.abort(); }
600        if let Some(h) = rt.throttle_handle.take() { h.abort(); }
601        if let Some(h) = rt.loadavg_handle.take() { h.abort(); }
602
603        if let Some(ref cow_state) = self.rt().supervisor_cow.clone() {
604            let mut cow = cow_state.lock().await;
605            self.rt_mut().seccomp_cow = cow.branch.take();
606        }
607
608        let stdout = self.rt_mut()._stdout_read.take().map(sandbox_read_fd_to_end);
609        let stderr = self.rt_mut()._stderr_read.take().map(sandbox_read_fd_to_end);
610
611        Ok(RunResult { exit_status, stdout, stderr })
612    }
613
614    /// Fork the sandboxed child and install policy (seccomp + notif
615    /// supervisor + rlimits + landlock + COW + network/HTTP proxies).
616    /// The child is parked between policy install and `execve`; call
617    /// `start()` to release it. Stdout/stderr are captured for later
618    /// retrieval via `wait()`.
619    pub async fn create(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
620        self.do_create(cmd, true).await
621    }
622
623    /// Like `create` but inherits stdio (no capture).
624    pub async fn create_interactive(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
625        self.do_create(cmd, false).await
626    }
627
628    /// Release a previously `create()`d child to `execve` the configured
629    /// command. Returns immediately; use `wait()` to collect the exit
630    /// status when the child finishes.
631    pub fn start(&mut self) -> Result<(), crate::error::SandlockError> {
632        self.do_start()
633    }
634
635    /// Sugar for `create()` + `start()` that also blocks until the child
636    /// has completed `execve()` and is executing user code. After this
637    /// returns, operations that read user-code state (e.g. `checkpoint()`,
638    /// `/proc/<pid>/exe`) observe the requested binary rather than the
639    /// supervisor.
640    pub async fn spawn(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
641        self.create(cmd).await?;
642        self.start()?;
643        self.wait_until_exec().await
644    }
645
646    /// Like `spawn` but inherits stdio (no capture).
647    pub async fn spawn_interactive(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
648        self.create_interactive(cmd).await?;
649        self.start()?;
650        self.wait_until_exec().await
651    }
652
653    /// Wait for the child to finish `execve`. Detected by `/proc/<pid>/exe`
654    /// no longer matching `/proc/self/exe` (before execve the child still
655    /// shares the supervisor's binary). The kernel offers no direct event
656    /// for execve completion, so this polls every 1ms with a 5s ceiling.
657    async fn wait_until_exec(&self) -> Result<(), crate::error::SandlockError> {
658        use crate::error::SandboxRuntimeError;
659        let pid = self.pid().ok_or(SandboxRuntimeError::NotRunning)?;
660        let Some(our_exe) = std::fs::read_link("/proc/self/exe").ok() else {
661            return Ok(());
662        };
663        let child_link = format!("/proc/{}/exe", pid);
664        let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
665        loop {
666            if let Ok(child_exe) = std::fs::read_link(&child_link) {
667                if child_exe != our_exe {
668                    return Ok(());
669                }
670            }
671            if std::time::Instant::now() >= deadline {
672                return Err(SandboxRuntimeError::Child(
673                    "child did not exec() within 5s".into(),
674                ).into());
675            }
676            tokio::time::sleep(std::time::Duration::from_millis(1)).await;
677        }
678    }
679
680    /// Create with explicit stdin/stdout/stderr fd redirection. Child is
681    /// parked after policy install; call `start()` to release.
682    #[doc(hidden)]
683    pub async fn create_with_io(
684        &mut self,
685        cmd: &[&str],
686        stdin_fd: Option<std::os::unix::io::RawFd>,
687        stdout_fd: Option<std::os::unix::io::RawFd>,
688        stderr_fd: Option<std::os::unix::io::RawFd>,
689    ) -> Result<(), crate::error::SandlockError> {
690        self.ensure_runtime()?;
691        self.rt_mut().io_overrides = Some((stdin_fd, stdout_fd, stderr_fd));
692        self.do_create(cmd, false).await
693    }
694
695    /// Like `create_with_io` but also maps extra fds into the child.
696    #[doc(hidden)]
697    pub async fn create_with_gather_io(
698        &mut self,
699        cmd: &[&str],
700        stdin_fd: Option<std::os::unix::io::RawFd>,
701        stdout_fd: Option<std::os::unix::io::RawFd>,
702        stderr_fd: Option<std::os::unix::io::RawFd>,
703        extra_fds: Vec<(i32, i32)>,
704    ) -> Result<(), crate::error::SandlockError> {
705        self.ensure_runtime()?;
706        self.rt_mut().io_overrides = Some((stdin_fd, stdout_fd, stderr_fd));
707        self.rt_mut().extra_fds = extra_fds;
708        self.do_create(cmd, false).await
709    }
710
711    /// Commit COW writes to the original directory.
712    #[doc(hidden)]
713    pub async fn commit(&mut self) -> Result<(), crate::error::SandlockError> {
714        use crate::error::{SandboxRuntimeError, SandlockError};
715        if let Some(ref mut rt) = self.runtime {
716            if let Some(branch) = rt.cow_branch.take() {
717                branch.commit().map_err(|e| SandlockError::Runtime(SandboxRuntimeError::Branch(e)))?;
718            }
719        }
720        Ok(())
721    }
722
723    /// Discard COW writes.
724    #[doc(hidden)]
725    pub async fn abort_branch(&mut self) -> Result<(), crate::error::SandlockError> {
726        use crate::error::{SandboxRuntimeError, SandlockError};
727        if let Some(ref mut rt) = self.runtime {
728            if let Some(branch) = rt.cow_branch.take() {
729                branch.abort().map_err(|e| SandlockError::Runtime(SandboxRuntimeError::Branch(e)))?;
730            }
731        }
732        Ok(())
733    }
734
735    /// Freeze the sandbox: hold fork notifications + SIGSTOP the process group.
736    pub(crate) async fn freeze(&self) -> Result<(), crate::error::SandlockError> {
737        use crate::error::{SandboxRuntimeError, SandlockError};
738        let rt = self.runtime.as_ref().ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
739        let pid = rt.child_pid.ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
740        if let Some(ref resource) = rt.supervisor_resource {
741            let mut rs = resource.lock().await;
742            rs.hold_forks = true;
743        }
744        unsafe { libc::killpg(pid, libc::SIGSTOP); }
745        Ok(())
746    }
747
748    /// Thaw the sandbox: release held fork notifications + SIGCONT.
749    pub(crate) async fn thaw(&self) -> Result<(), crate::error::SandlockError> {
750        use crate::error::{SandboxRuntimeError, SandlockError};
751        let rt = self.runtime.as_ref().ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
752        let pid = rt.child_pid.ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
753        if let Some(ref resource) = rt.supervisor_resource {
754            let mut rs = resource.lock().await;
755            rs.hold_forks = false;
756            rs.held_notif_ids.clear();
757        }
758        unsafe { libc::killpg(pid, libc::SIGCONT); }
759        Ok(())
760    }
761
762    /// Capture a checkpoint of the running sandbox.
763    pub async fn checkpoint(&self) -> Result<crate::checkpoint::Checkpoint, crate::error::SandlockError> {
764        use crate::error::{SandboxRuntimeError, SandlockError};
765        let pid = self.runtime.as_ref()
766            .and_then(|rt| rt.child_pid)
767            .ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
768        self.freeze().await?;
769        let cp = crate::checkpoint::capture(pid, self);
770        self.thaw().await?;
771        cp
772    }
773
774    // ================================================================
775    // One-shot / lifecycle instance API
776    // ================================================================
777
778    /// One-shot: spawn, wait, and return the result. Stdout and stderr are
779    /// captured. This is the primary way to run a sandboxed command:
780    ///
781    /// ```ignore
782    /// let mut sandbox = Sandbox::builder()
783    ///     .fs_read("/usr")
784    ///     .name("my-sandbox")
785    ///     .build()?;
786    /// let result = sandbox.run(&["echo", "hello"]).await?;
787    /// ```
788    pub async fn run(
789        &mut self,
790        cmd: &[&str],
791    ) -> Result<crate::result::RunResult, crate::error::SandlockError> {
792        self.do_create(cmd, true).await?;
793        self.do_start()?;
794        self.wait().await
795    }
796
797    /// Run with inherited stdio (interactive mode).
798    pub async fn run_interactive(
799        &mut self,
800        cmd: &[&str],
801    ) -> Result<crate::result::RunResult, crate::error::SandlockError> {
802        self.do_create(cmd, false).await?;
803        self.do_start()?;
804        self.wait().await
805    }
806
807    /// One-shot run with user-supplied syscall handlers.
808    pub async fn run_with_handlers<I, S, H>(
809        &mut self,
810        cmd: &[&str],
811        handlers: I,
812    ) -> Result<crate::result::RunResult, crate::error::SandlockError>
813    where
814        I: IntoIterator<Item = (S, H)>,
815        S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
816        H: crate::seccomp::dispatch::Handler,
817    {
818        let pending = sandbox_collect_handlers(handlers, self)?;
819        self.ensure_runtime()?;
820        self.rt_mut().handlers = pending;
821        self.do_create(cmd, true).await?;
822        self.do_start()?;
823        self.wait().await
824    }
825
826    /// Interactive-stdio counterpart of `run_with_handlers`.
827    pub async fn run_interactive_with_handlers<I, S, H>(
828        &mut self,
829        cmd: &[&str],
830        handlers: I,
831    ) -> Result<crate::result::RunResult, crate::error::SandlockError>
832    where
833        I: IntoIterator<Item = (S, H)>,
834        S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
835        H: crate::seccomp::dispatch::Handler,
836    {
837        let pending = sandbox_collect_handlers(handlers, self)?;
838        self.ensure_runtime()?;
839        self.rt_mut().handlers = pending;
840        self.do_create(cmd, false).await?;
841        self.do_start()?;
842        self.wait().await
843    }
844
845    /// Dry-run: create, start, wait, collect filesystem changes, then abort.
846    pub async fn dry_run(
847        &mut self,
848        cmd: &[&str],
849    ) -> Result<crate::dry_run::DryRunResult, crate::error::SandlockError> {
850        self.on_exit = BranchAction::Keep;
851        self.on_error = BranchAction::Keep;
852        self.do_create(cmd, true).await?;
853        self.do_start()?;
854        let run_result = self.wait().await?;
855        let changes = self.collect_changes().await;
856        self.do_abort().await;
857        Ok(crate::dry_run::DryRunResult { run_result, changes })
858    }
859
860    /// Dry-run with inherited stdio.
861    pub async fn dry_run_interactive(
862        &mut self,
863        cmd: &[&str],
864    ) -> Result<crate::dry_run::DryRunResult, crate::error::SandlockError> {
865        self.on_exit = BranchAction::Keep;
866        self.on_error = BranchAction::Keep;
867        self.do_create(cmd, false).await?;
868        self.do_start()?;
869        let run_result = self.wait().await?;
870        let changes = self.collect_changes().await;
871        self.do_abort().await;
872        Ok(crate::dry_run::DryRunResult { run_result, changes })
873    }
874
875    /// Create N COW clones of this sandbox.
876    ///
877    /// `fork()` requires `init_fn` and `work_fn` to be set on the sandbox (via
878    /// `SandboxBuilder::init_fn` / `work_fn`, or `Sandbox::with_init_fn` /
879    /// `with_work_fn`). Returns an error if either is missing.
880    pub async fn fork(&mut self, n: u32) -> Result<Vec<Sandbox>, crate::error::SandlockError> {
881        use crate::error::SandboxRuntimeError;
882        use std::os::fd::{FromRawFd, OwnedFd};
883
884        // Pull init_fn / work_fn directly from self (they live on Sandbox, not
885        // Runtime, so ensure_runtime hasn't consumed them yet).
886        let init_fn = self.init_fn.take()
887            .ok_or_else(|| SandboxRuntimeError::Child("fork() requires init_fn and work_fn — use SandboxBuilder::init_fn() / work_fn() or Sandbox::with_init_fn() / with_work_fn()".into()))?;
888        let work_fn = self.work_fn.take()
889            .ok_or_else(|| SandboxRuntimeError::Child("fork() requires init_fn and work_fn — use SandboxBuilder::init_fn() / work_fn() or Sandbox::with_init_fn() / with_work_fn()".into()))?;
890
891        // Initialize the runtime block so we can record child PID / state below.
892        self.ensure_runtime()?;
893
894        let sandbox_cfg = self.clone(); // config only, no runtime
895
896        let mut ctrl_fds = [0i32; 2];
897        if unsafe { libc::pipe2(ctrl_fds.as_mut_ptr(), 0) } < 0 {
898            return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
899        }
900        let ctrl_parent = unsafe { OwnedFd::from_raw_fd(ctrl_fds[0]) };
901        let ctrl_child_fd = ctrl_fds[1];
902
903        let mut pipe_read_ends: Vec<OwnedFd> = Vec::with_capacity(n as usize);
904        let mut pipe_write_fds: Vec<i32> = Vec::with_capacity(n as usize);
905        for _ in 0..n {
906            let mut pfds = [0i32; 2];
907            if unsafe { libc::pipe(pfds.as_mut_ptr()) } >= 0 {
908                pipe_read_ends.push(unsafe { OwnedFd::from_raw_fd(pfds[0]) });
909                pipe_write_fds.push(pfds[1]);
910            } else {
911                pipe_write_fds.push(-1);
912            }
913        }
914
915        let pid = unsafe { libc::fork() };
916        if pid < 0 {
917            unsafe { libc::close(ctrl_child_fd) };
918            return Err(SandboxRuntimeError::Fork(std::io::Error::last_os_error()).into());
919        }
920
921        if pid == 0 {
922            drop(ctrl_parent);
923            unsafe { libc::setpgid(0, 0) };
924            unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL) };
925            unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
926
927            let _ = crate::landlock::confine(&sandbox_cfg);
928
929            let deny = crate::context::blocklist_syscall_numbers(&sandbox_cfg);
930            let args = crate::context::arg_filters(&sandbox_cfg);
931            let filter = match crate::seccomp::bpf::assemble_filter(&[], &deny, &args) {
932                Ok(f) => f,
933                Err(_) => unsafe { libc::_exit(1) },
934            };
935            let _ = crate::seccomp::bpf::install_deny_filter(&filter);
936
937            crate::process::CONFINED.store(true, std::sync::atomic::Ordering::Relaxed);
938
939            init_fn();
940
941            drop(pipe_read_ends);
942            crate::fork::fork_ready_loop_fn(ctrl_child_fd, n, &*work_fn, &pipe_write_fds);
943            unsafe { libc::_exit(0) };
944        }
945
946        unsafe { libc::close(ctrl_child_fd) };
947        for wfd in &pipe_write_fds {
948            if *wfd >= 0 { unsafe { libc::close(*wfd) }; }
949        }
950        self.rt_mut().child_pid = Some(pid);
951        self.rt_mut().state = RuntimeState::Running;
952
953        let ctrl_fd = ctrl_parent.as_raw_fd();
954        let mut pid_buf = vec![0u8; n as usize * 4];
955        sandbox_read_exact(ctrl_fd, &mut pid_buf);
956
957        let clone_pids: Vec<i32> = pid_buf.chunks(4)
958            .map(|c| u32::from_be_bytes(c.try_into().unwrap_or([0; 4])) as i32)
959            .collect();
960        let live_count = clone_pids.iter().filter(|&&p| p > 0).count();
961
962        let mut code_buf = vec![0u8; live_count * 4];
963        sandbox_read_exact(ctrl_fd, &mut code_buf);
964        self.rt_mut().ctrl_fd = Some(ctrl_parent);
965
966        let mut status = 0i32;
967        unsafe { libc::waitpid(pid, &mut status, 0) };
968
969        let mut code_idx = 0;
970        let mut clones = Vec::with_capacity(live_count);
971        let mut pipe_iter = pipe_read_ends.into_iter();
972
973        let rt_name = self.rt().name.clone();
974        for &clone_pid in &clone_pids {
975            let pipe = pipe_iter.next();
976            if clone_pid <= 0 { continue; }
977
978            let code = i32::from_be_bytes(
979                code_buf[code_idx * 4..(code_idx + 1) * 4].try_into().unwrap_or([0; 4])
980            );
981            code_idx += 1;
982
983            let mut clone_sb = sandbox_cfg.clone();
984            let clone_name = format!("{}-fork-{}", rt_name, clone_pid);
985            clone_sb.runtime = Some(Box::new(Runtime {
986                name: clone_name,
987                state: RuntimeState::Stopped(if code == 0 {
988                    crate::result::ExitStatus::Code(0)
989                } else if code > 0 {
990                    crate::result::ExitStatus::Code(code)
991                } else {
992                    crate::result::ExitStatus::Killed
993                }),
994                child_pid: Some(clone_pid),
995                pidfd: None,
996                notif_handle: None,
997                throttle_handle: None,
998                loadavg_handle: None,
999                _stdout_read: None,
1000                _stderr_read: None,
1001                cow_branch: None,
1002                seccomp_cow: None,
1003                supervisor_resource: None,
1004                supervisor_cow: None,
1005                supervisor_network: None,
1006                ctrl_fd: None,
1007                stdout_pipe: pipe,
1008                io_overrides: None,
1009                extra_fds: Vec::new(),
1010                http_acl_handle: None,
1011                on_bind: None,
1012                handlers: Vec::new(),
1013                ready_w: None,
1014            }));
1015            clones.push(clone_sb);
1016        }
1017
1018        Ok(clones)
1019    }
1020
1021    /// Reduce: wait for all clones, then run a reducer command.
1022    pub async fn reduce(
1023        &self,
1024        cmd: &[&str],
1025        clones: &mut [Sandbox],
1026    ) -> Result<crate::result::RunResult, crate::error::SandlockError> {
1027        use crate::error::SandboxRuntimeError;
1028
1029        let mut combined = Vec::new();
1030        for clone in clones.iter_mut() {
1031            if let Some(ref mut rt) = clone.runtime {
1032                if let Some(pipe) = rt.stdout_pipe.take() {
1033                    combined.extend_from_slice(&sandbox_read_fd_to_end(pipe));
1034                }
1035            }
1036        }
1037
1038        let mut stdin_fds = [0i32; 2];
1039        if unsafe { libc::pipe2(stdin_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
1040            return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
1041        }
1042
1043        let write_fd = stdin_fds[1];
1044        let write_handle = tokio::task::spawn_blocking(move || {
1045            unsafe {
1046                libc::write(write_fd, combined.as_ptr() as *const _, combined.len());
1047                libc::close(write_fd);
1048            }
1049        });
1050
1051        let base_name = self.instance_name()
1052            .unwrap_or("sandbox")
1053            .to_owned();
1054        let reducer_name = base_name + "-reduce";
1055        let mut reducer = self.clone().with_name(reducer_name);
1056        reducer.ensure_runtime()?;
1057        reducer.rt_mut().io_overrides = Some((Some(stdin_fds[0]), None, None));
1058        reducer.do_create(cmd, true).await?;
1059        reducer.do_start()?;
1060        unsafe { libc::close(stdin_fds[0]) };
1061
1062        let _ = write_handle.await;
1063        reducer.wait().await
1064    }
1065
1066    /// Lazily initialize the runtime block.
1067    ///
1068    /// Called by lifecycle methods (`spawn`, `run`, `fork`, etc.) on first
1069    /// use. Validates and resolves the sandbox name. Idempotent: returns
1070    /// immediately if runtime is already set.
1071    fn ensure_runtime(&mut self) -> Result<(), crate::error::SandlockError> {
1072        if self.runtime.is_some() {
1073            return Ok(());
1074        }
1075        let name = sandbox_resolve_name(self.name.as_deref())?;
1076        self.runtime = Some(Box::new(Runtime {
1077            name,
1078            state: RuntimeState::Created,
1079            child_pid: None,
1080            pidfd: None,
1081            notif_handle: None,
1082            throttle_handle: None,
1083            loadavg_handle: None,
1084            _stdout_read: None,
1085            _stderr_read: None,
1086            cow_branch: None,
1087            seccomp_cow: None,
1088            supervisor_resource: None,
1089            supervisor_cow: None,
1090            supervisor_network: None,
1091            ctrl_fd: None,
1092            stdout_pipe: None,
1093            io_overrides: None,
1094            extra_fds: Vec::new(),
1095            http_acl_handle: None,
1096            on_bind: None,
1097            handlers: Vec::new(),
1098            ready_w: None,
1099        }));
1100        Ok(())
1101    }
1102
1103    // ================================================================
1104    // Internal: collect_changes / do_abort
1105    // ================================================================
1106
1107    async fn collect_changes(&self) -> Vec<crate::dry_run::Change> {
1108        if let Some(ref rt) = self.runtime {
1109            if let Some(ref branch) = rt.cow_branch {
1110                return branch.changes().unwrap_or_default();
1111            }
1112            if let Some(ref cow) = rt.seccomp_cow {
1113                return cow.changes().unwrap_or_default();
1114            }
1115        }
1116        Vec::new()
1117    }
1118
1119    async fn do_abort(&mut self) {
1120        if let Some(ref mut rt) = self.runtime {
1121            if let Some(branch) = rt.cow_branch.take() {
1122                let _ = branch.abort();
1123            }
1124            if let Some(ref mut cow) = rt.seccomp_cow {
1125                let _ = cow.abort();
1126            }
1127        }
1128    }
1129
1130    // ================================================================
1131    // Internal: do_create (fork + policy install; child parks at the
1132    // ready_r read, awaiting do_start to release it to execve).
1133    // ================================================================
1134
1135    async fn do_create(&mut self, cmd: &[&str], capture: bool) -> Result<(), crate::error::SandlockError> {
1136        use std::ffi::CString;
1137        use std::os::fd::{AsRawFd, FromRawFd, OwnedFd};
1138        use crate::error::SandboxRuntimeError;
1139        use crate::context::{PipePair, read_u32_fd};
1140        use crate::cow::{CowBranch, overlayfs::OverlayBranch, branchfs::BranchFsBranch};
1141        use crate::network;
1142        use crate::seccomp::ctx::SupervisorCtx;
1143        use crate::seccomp::notif::{self, NotifPolicy};
1144        use crate::seccomp::state::{ChrootState, CowState, NetworkState, PolicyFnState, ProcfsState, ResourceState, TimeRandomState};
1145        use crate::sys::syscall;
1146        use std::time::Duration;
1147
1148        self.ensure_runtime()?;
1149
1150        if !matches!(self.rt().state, RuntimeState::Created) {
1151            return Err(SandboxRuntimeError::Child("sandbox already spawned".into()).into());
1152        }
1153
1154        if cmd.is_empty() {
1155            return Err(SandboxRuntimeError::Child("empty command".into()).into());
1156        }
1157
1158        let c_cmd: Vec<CString> = cmd
1159            .iter()
1160            .map(|s| CString::new(*s).map_err(|_| SandboxRuntimeError::Child("invalid command string".into())))
1161            .collect::<Result<Vec<_>, _>>()?;
1162
1163        let nested = crate::process::is_nested();
1164
1165        let pipes = PipePair::new().map_err(SandboxRuntimeError::Io)?;
1166
1167        let resolved_net_allow = network::resolve_net_allow(&self.net_allow)
1168            .await
1169            .map_err(SandboxRuntimeError::Io)?;
1170        let virtual_etc_hosts = resolved_net_allow.etc_hosts.clone();
1171
1172        if !self.http_allow.is_empty() || !self.http_deny.is_empty() {
1173            let handle = crate::http_acl::spawn_http_acl_proxy(
1174                self.http_allow.clone(),
1175                self.http_deny.clone(),
1176                self.http_ca.as_deref(),
1177                self.http_key.as_deref(),
1178            ).await.map_err(SandboxRuntimeError::Io)?;
1179            self.rt_mut().http_acl_handle = Some(handle);
1180        }
1181
1182        let cow_branch: Option<Box<dyn CowBranch>> = match self.fs_isolation {
1183            FsIsolation::OverlayFs => {
1184                let workdir = self.workdir.as_ref()
1185                    .ok_or_else(|| crate::error::SandlockError::Runtime(SandboxRuntimeError::Child("OverlayFs requires workdir".into())))?;
1186                let storage = self.fs_storage.as_ref()
1187                    .cloned()
1188                    .unwrap_or_else(|| std::env::temp_dir().join("sandlock-overlay"));
1189                std::fs::create_dir_all(&storage)
1190                    .map_err(|e| crate::error::SandlockError::Runtime(SandboxRuntimeError::Io(e)))?;
1191                let branch = OverlayBranch::create(workdir, &storage)
1192                    .map_err(|e| crate::error::SandlockError::Runtime(SandboxRuntimeError::Branch(e)))?;
1193                Some(Box::new(branch))
1194            }
1195            FsIsolation::BranchFs => {
1196                let workdir = self.workdir.as_ref()
1197                    .ok_or_else(|| crate::error::SandlockError::Runtime(SandboxRuntimeError::Child("BranchFs requires workdir".into())))?;
1198                let branch = BranchFsBranch::create(workdir)
1199                    .map_err(|e| crate::error::SandlockError::Runtime(SandboxRuntimeError::Branch(e)))?;
1200                Some(Box::new(branch))
1201            }
1202            FsIsolation::None => None,
1203        };
1204
1205        let cow_config = cow_branch.as_ref().and_then(|b| b.child_mount_config());
1206
1207        // Seccomp COW: create the branch before fork so the child's Landlock
1208        // ruleset can include the upper layer. Binaries created inside the
1209        // workdir live in the upper dir, and Landlock checks EXECUTE on the
1210        // file's real path at execve time — so the upper dir must be granted
1211        // read+execute (READ_ACCESS) or `./created-binary` fails with EACCES.
1212        let seccomp_cow_branch = if !nested && self.workdir.is_some() && self.fs_isolation == FsIsolation::None {
1213            let workdir = self.workdir.as_ref().unwrap().clone();
1214            let storage = self.fs_storage.clone();
1215            let max_disk = self.max_disk.map(|b| b.0).unwrap_or(0);
1216            match crate::cow::seccomp::SeccompCowBranch::create(&workdir, storage.as_deref(), max_disk) {
1217                Ok(branch) => {
1218                    self.fs_readable.push(branch.upper_dir().to_path_buf());
1219                    Some(branch)
1220                }
1221                Err(e) => {
1222                    eprintln!("sandlock: seccomp COW branch creation failed: {}", e);
1223                    None
1224                }
1225            }
1226        } else {
1227            None
1228        };
1229
1230        let (stdout_r, stderr_r) = if capture {
1231            let mut stdout_fds = [0i32; 2];
1232            let mut stderr_fds = [0i32; 2];
1233            if unsafe { libc::pipe2(stdout_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
1234                return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
1235            }
1236            if unsafe { libc::pipe2(stderr_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
1237                unsafe {
1238                    libc::close(stdout_fds[0]);
1239                    libc::close(stdout_fds[1]);
1240                }
1241                return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
1242            }
1243            (
1244                Some((
1245                    unsafe { OwnedFd::from_raw_fd(stdout_fds[0]) },
1246                    unsafe { OwnedFd::from_raw_fd(stdout_fds[1]) },
1247                )),
1248                Some((
1249                    unsafe { OwnedFd::from_raw_fd(stderr_fds[0]) },
1250                    unsafe { OwnedFd::from_raw_fd(stderr_fds[1]) },
1251                )),
1252            )
1253        } else {
1254            (None, None)
1255        };
1256
1257        // Capture our PID before fork so the child can detect parent death
1258        // without assuming PID 1 is always init (wrong in containers).
1259        let parent_pid = unsafe { libc::getpid() };
1260
1261        let pid = unsafe { libc::fork() };
1262        if pid < 0 {
1263            return Err(SandboxRuntimeError::Fork(std::io::Error::last_os_error()).into());
1264        }
1265
1266        if pid == 0 {
1267            // ===== CHILD PROCESS =====
1268            let io_overrides = self.rt().io_overrides;
1269            if let Some((stdin_fd, stdout_fd, stderr_fd)) = io_overrides {
1270                if let Some(fd) = stdin_fd { unsafe { libc::dup2(fd, 0) }; }
1271                if let Some(fd) = stdout_fd { unsafe { libc::dup2(fd, 1) }; }
1272                if let Some(fd) = stderr_fd { unsafe { libc::dup2(fd, 2) }; }
1273            }
1274
1275            let extra_fds_copy = self.rt().extra_fds.clone();
1276            for &(target_fd, source_fd) in &extra_fds_copy {
1277                unsafe { libc::dup2(source_fd, target_fd) };
1278            }
1279
1280            if let Some((_, ref stdout_w)) = stdout_r {
1281                unsafe { libc::dup2(stdout_w.as_raw_fd(), 1) };
1282            }
1283            if let Some((_, ref stderr_w)) = stderr_r {
1284                unsafe { libc::dup2(stderr_w.as_raw_fd(), 2) };
1285            }
1286            drop(stdout_r);
1287            drop(stderr_r);
1288
1289            let gather_keep_fds: Vec<i32> = extra_fds_copy.iter().map(|&(target, _)| target).collect();
1290
1291            let extra_syscalls: Vec<u32> = self.rt().handlers
1292                .iter()
1293                .map(|h| h.0 as u32)
1294                .collect();
1295
1296            let sandbox_name = self.rt().name.clone();
1297            context::confine_child(context::ChildSpawnArgs {
1298                sandbox: self,
1299                cmd: &c_cmd,
1300                pipes: &pipes,
1301                cow_config: cow_config.as_ref(),
1302                nested,
1303                keep_fds: &gather_keep_fds,
1304                sandbox_name: Some(sandbox_name.as_str()),
1305                extra_syscalls: &extra_syscalls,
1306                parent_pid,
1307            });
1308        }
1309
1310        // ===== PARENT PROCESS =====
1311        self.rt_mut().cow_branch = cow_branch;
1312
1313        drop(pipes.notif_w);
1314        drop(pipes.ready_r);
1315
1316        self.rt_mut()._stdout_read = stdout_r.map(|(r, _w)| r);
1317        self.rt_mut()._stderr_read = stderr_r.map(|(r, _w)| r);
1318
1319        self.rt_mut().child_pid = Some(pid);
1320        // State remains `Created` until `do_start` writes ready_w to release
1321        // the child to execve.
1322
1323        let pidfd = match syscall::pidfd_open(pid as u32, 0) {
1324            Ok(fd) => Some(fd),
1325            Err(_) => None,
1326        };
1327
1328        let notif_fd_num = read_u32_fd(pipes.notif_r.as_raw_fd())
1329            .map_err(|e| SandboxRuntimeError::Child(format!("read notif fd from child: {}", e)))?;
1330
1331        let is_nested_mode = notif_fd_num == 0;
1332
1333        let notif_fd = if is_nested_mode {
1334            None
1335        } else if let Some(ref pfd) = pidfd {
1336            Some(syscall::pidfd_getfd(pfd, notif_fd_num as i32, 0)
1337                .map_err(|e| SandboxRuntimeError::Child(format!("pidfd_getfd: {}", e)))?)
1338        } else {
1339            let path = format!("/proc/{}/fd/{}", pid, notif_fd_num);
1340            let cpath = CString::new(path).unwrap();
1341            let raw = unsafe { libc::open(cpath.as_ptr(), libc::O_RDWR) };
1342            if raw < 0 {
1343                return Err(SandboxRuntimeError::Child("failed to open notif fd from /proc".into()).into());
1344            }
1345            Some(unsafe { OwnedFd::from_raw_fd(raw) })
1346        };
1347
1348        if let Some(notif_fd) = notif_fd {
1349            if self.time_start.is_some() || self.random_seed.is_some() {
1350                let time_offset = self.time_start.map(|t| crate::time::calculate_time_offset(t));
1351                if let Err(e) = crate::vdso::patch(pid, time_offset, self.random_seed.is_some()) {
1352                    eprintln!("sandlock: pre-exec vDSO patching failed (will retry after exec): {}", e);
1353                }
1354            }
1355
1356            let time_offset_val = self.time_start
1357                .map(|t| crate::time::calculate_time_offset(t))
1358                .unwrap_or(0);
1359
1360            let rt_name = self.rt().name.clone();
1361            let notif_policy = NotifPolicy {
1362                max_memory_bytes: self.max_memory.map(|m| m.0).unwrap_or(0),
1363                max_processes: self.max_processes,
1364                has_memory_limit: self.max_memory.is_some(),
1365                has_net_allowlist: !self.net_allow.is_empty()
1366                    || self.policy_fn.is_some()
1367                    || !self.http_allow.is_empty()
1368                    || !self.http_deny.is_empty(),
1369                has_random_seed: self.random_seed.is_some(),
1370                has_time_start: self.time_start.is_some(),
1371                argv_safety_required: self.policy_fn.is_some()
1372                    || self.rt().handlers.iter().any(|h| {
1373                        h.0 == libc::SYS_execve || h.0 == libc::SYS_execveat
1374                    }),
1375                time_offset: time_offset_val,
1376                num_cpus: self.num_cpus,
1377                port_remap: self.port_remap,
1378                cow_enabled: self.workdir.is_some() && self.fs_isolation == FsIsolation::None,
1379                chroot_root: self.chroot.as_ref().and_then(|p| std::fs::canonicalize(p).ok()),
1380                chroot_readable: self.fs_readable.clone(),
1381                chroot_writable: self.fs_writable.clone(),
1382                chroot_denied: self.fs_denied.clone(),
1383                chroot_mounts: self.fs_mount.iter().map(|(vp, hp)| {
1384                    (vp.clone(), std::fs::canonicalize(hp).unwrap_or_else(|_| hp.clone()))
1385                }).collect(),
1386                deterministic_dirs: self.deterministic_dirs,
1387                virtual_hostname: Some(rt_name),
1388                has_http_acl: !self.http_allow.is_empty() || !self.http_deny.is_empty(),
1389                virtual_etc_hosts,
1390            };
1391
1392            use rand::SeedableRng;
1393            use rand_chacha::ChaCha8Rng;
1394
1395            let random_state = self.random_seed.map(|seed| ChaCha8Rng::seed_from_u64(seed));
1396            let time_offset = self.time_start.map(|t| crate::time::calculate_time_offset(t));
1397
1398            let time_random_state = TimeRandomState::new(time_offset, random_state);
1399
1400            let mut net_state = NetworkState::new();
1401            let no_rules = self.net_allow.is_empty();
1402            let policy_from = |resolved: &network::ResolvedNetAllow| {
1403                if no_rules || resolved.any_ip_all_ports {
1404                    crate::seccomp::notif::NetworkPolicy::Unrestricted
1405                } else {
1406                    use crate::seccomp::notif::PortAllow;
1407                    let per_ip = resolved
1408                        .per_ip
1409                        .iter()
1410                        .map(|(ip, ports)| {
1411                            let allow = if resolved.per_ip_all_ports.contains(ip) {
1412                                PortAllow::Any
1413                            } else {
1414                                PortAllow::Specific(ports.clone())
1415                            };
1416                            (*ip, allow)
1417                        })
1418                        .collect();
1419                    crate::seccomp::notif::NetworkPolicy::AllowList {
1420                        per_ip,
1421                        any_ip_ports: resolved.any_ip_ports.clone(),
1422                    }
1423                }
1424            };
1425            net_state.tcp_policy = policy_from(&resolved_net_allow.tcp);
1426            net_state.udp_policy = policy_from(&resolved_net_allow.udp);
1427            net_state.icmp_policy = policy_from(&resolved_net_allow.icmp);
1428            net_state.http_acl_addr = self.rt().http_acl_handle.as_ref().map(|h| h.addr);
1429            net_state.http_acl_ports = self.http_ports.iter().copied().collect();
1430            net_state.http_acl_orig_dest = self.rt().http_acl_handle.as_ref().map(|h| h.orig_dest.clone());
1431            if let Some(cb) = self.rt_mut().on_bind.take() {
1432                net_state.port_map.on_bind = Some(cb);
1433            }
1434
1435            let procfs_state = ProcfsState::new();
1436
1437            let mut res_state = ResourceState::new(
1438                notif_policy.max_memory_bytes,
1439                notif_policy.max_processes,
1440            );
1441            res_state.proc_count = 1;
1442
1443            let mut cow_state = CowState::new();
1444            cow_state.branch = seccomp_cow_branch;
1445
1446            let mut policy_fn_state = PolicyFnState::new();
1447
1448            if let Ok(mut denied) = policy_fn_state.denied_paths.write() {
1449                for path in &self.fs_denied {
1450                    denied.insert(path.to_string_lossy().into_owned());
1451                }
1452            }
1453
1454            if let Some(ref callback) = self.policy_fn {
1455                let mut allowed_ips: std::collections::HashSet<std::net::IpAddr> =
1456                    std::collections::HashSet::new();
1457                for p in [&net_state.tcp_policy, &net_state.udp_policy, &net_state.icmp_policy] {
1458                    if let crate::seccomp::notif::NetworkPolicy::AllowList { per_ip, .. } = p {
1459                        allowed_ips.extend(per_ip.keys().copied());
1460                    }
1461                }
1462                let live = crate::policy_fn::LivePolicy {
1463                    allowed_ips,
1464                    max_memory_bytes: notif_policy.max_memory_bytes,
1465                    max_processes: notif_policy.max_processes,
1466                };
1467                let ceiling = live.clone();
1468                let live = std::sync::Arc::new(std::sync::RwLock::new(live));
1469                let denied_paths = policy_fn_state.denied_paths.clone();
1470                let pid_overrides = net_state.pid_ip_overrides.clone();
1471                policy_fn_state.live_policy = Some(live.clone());
1472                let tx = crate::policy_fn::spawn_policy_fn(
1473                    callback.clone(), live, ceiling, pid_overrides, denied_paths,
1474                );
1475                policy_fn_state.event_tx = Some(tx);
1476            }
1477
1478            let chroot_state = ChrootState::new();
1479
1480            let notif_raw_fd = notif_fd.as_raw_fd();
1481            let child_pidfd_raw = pidfd.as_ref().map(|pfd| pfd.as_raw_fd());
1482
1483            let res_state = Arc::new(tokio::sync::Mutex::new(res_state));
1484            self.rt_mut().supervisor_resource = Some(Arc::clone(&res_state));
1485
1486            let cow_state = Arc::new(tokio::sync::Mutex::new(cow_state));
1487            self.rt_mut().supervisor_cow = Some(Arc::clone(&cow_state));
1488
1489            let net_state = Arc::new(tokio::sync::Mutex::new(net_state));
1490            self.rt_mut().supervisor_network = Some(Arc::clone(&net_state));
1491
1492            let procfs_state = Arc::new(tokio::sync::Mutex::new(procfs_state));
1493            let time_random_state = Arc::new(tokio::sync::Mutex::new(time_random_state));
1494            let policy_fn_state = Arc::new(tokio::sync::Mutex::new(policy_fn_state));
1495            let chroot_state = Arc::new(tokio::sync::Mutex::new(chroot_state));
1496            let processes = Arc::new(crate::seccomp::state::ProcessIndex::new());
1497
1498            let ctx = Arc::new(SupervisorCtx {
1499                resource: Arc::clone(&res_state),
1500                cow: Arc::clone(&cow_state),
1501                procfs: Arc::clone(&procfs_state),
1502                network: Arc::clone(&net_state),
1503                time_random: Arc::clone(&time_random_state),
1504                policy_fn: Arc::clone(&policy_fn_state),
1505                chroot: Arc::clone(&chroot_state),
1506                netlink: Arc::new(crate::netlink::NetlinkState::new()),
1507                processes: Arc::clone(&processes),
1508                policy: Arc::new(notif_policy),
1509                child_pidfd: child_pidfd_raw,
1510                notif_fd: notif_raw_fd,
1511            });
1512
1513            let handlers = std::mem::take(&mut self.rt_mut().handlers);
1514            let (startup_tx, startup_rx) = tokio::sync::oneshot::channel();
1515            self.rt_mut().notif_handle = Some(tokio::spawn(
1516                notif::supervisor(notif_fd, ctx, handlers, startup_tx),
1517            ));
1518            // Wait for the supervisor to register the notif fd with the IO
1519            // driver before we release the child to execve. Otherwise an
1520            // early traced syscall would queue a notification on a fd no
1521            // one is polling, and the child would block until the next
1522            // `block_on` re-enters the runtime. Critical for current-thread
1523            // runtimes, harmless overhead for multi-thread.
1524            match startup_rx.await {
1525                Ok(Ok(())) => {}
1526                Ok(Err(e)) => return Err(SandboxRuntimeError::Io(e).into()),
1527                Err(_) => {
1528                    return Err(SandboxRuntimeError::Child(
1529                        "seccomp supervisor exited during startup".into(),
1530                    ).into());
1531                }
1532            }
1533
1534            let la_resource = Arc::clone(&res_state);
1535            self.rt_mut().loadavg_handle = Some(tokio::spawn(async move {
1536                let mut interval = tokio::time::interval(Duration::from_secs(5));
1537                interval.tick().await;
1538                loop {
1539                    interval.tick().await;
1540                    let mut rs = la_resource.lock().await;
1541                    let running = rs.proc_count;
1542                    rs.load_avg.sample(running);
1543                }
1544            }));
1545        }
1546
1547        if let Some(cpu_pct) = self.max_cpu {
1548            if cpu_pct < 100 {
1549                let child_pid = pid;
1550                self.rt_mut().throttle_handle = Some(tokio::spawn(sandbox_throttle_cpu(child_pid, cpu_pct)));
1551            }
1552        }
1553
1554        self.rt_mut().pidfd = pidfd;
1555        self.rt_mut().ready_w = Some(pipes.ready_w);
1556
1557        Ok(())
1558    }
1559
1560    // ================================================================
1561    // Internal: do_start (release the parked child to execve)
1562    // ================================================================
1563
1564    fn do_start(&mut self) -> Result<(), crate::error::SandlockError> {
1565        use std::os::fd::AsRawFd;
1566        use crate::context::write_u32_fd;
1567        use crate::error::SandboxRuntimeError;
1568
1569        if !matches!(self.rt().state, RuntimeState::Created) {
1570            return Err(SandboxRuntimeError::Child("start() requires a created sandbox".into()).into());
1571        }
1572        let ready_w = self.rt_mut().ready_w.take()
1573            .ok_or_else(|| SandboxRuntimeError::Child("start() called without a prior create()".into()))?;
1574        write_u32_fd(ready_w.as_raw_fd(), 1)
1575            .map_err(|e| SandboxRuntimeError::Child(format!("write ready signal: {}", e)))?;
1576        drop(ready_w);
1577        self.rt_mut().state = RuntimeState::Running;
1578        Ok(())
1579    }
1580}
1581
1582// ================================================================
1583// Drop for Sandbox — kills and reaps child if still running
1584// ================================================================
1585
1586impl Drop for Sandbox {
1587    fn drop(&mut self) {
1588        if let Some(ref mut rt) = self.runtime {
1589            if let Some(pid) = rt.child_pid {
1590                if matches!(rt.state, RuntimeState::Created | RuntimeState::Running | RuntimeState::Paused) {
1591                    unsafe { libc::killpg(pid, libc::SIGKILL) };
1592                    let mut status: i32 = 0;
1593                    unsafe { libc::waitpid(pid, &mut status, 0) };
1594                }
1595            }
1596
1597            if let Some(h) = rt.notif_handle.take() { h.abort(); }
1598            if let Some(h) = rt.throttle_handle.take() { h.abort(); }
1599            if let Some(h) = rt.loadavg_handle.take() { h.abort(); }
1600
1601            let is_error = matches!(
1602                rt.state,
1603                RuntimeState::Stopped(ref s) if !matches!(s, crate::result::ExitStatus::Code(0))
1604            );
1605            let action = if is_error { &self.on_error } else { &self.on_exit };
1606            let action = action.clone();
1607
1608            if let Some(ref branch) = rt.cow_branch {
1609                match action {
1610                    BranchAction::Commit => { let _ = branch.commit(); }
1611                    BranchAction::Abort => { let _ = branch.abort(); }
1612                    BranchAction::Keep => {}
1613                }
1614            }
1615
1616            if let Some(ref mut cow) = rt.seccomp_cow {
1617                match action {
1618                    BranchAction::Commit => { let _ = cow.commit(); }
1619                    BranchAction::Abort => { let _ = cow.abort(); }
1620                    BranchAction::Keep => {}
1621                }
1622            }
1623        }
1624    }
1625}
1626
1627// ================================================================
1628// CPU throttle
1629// ================================================================
1630
1631async fn sandbox_throttle_cpu(pid: i32, cpu_pct: u8) {
1632    use std::time::Duration;
1633    let period = Duration::from_millis(100);
1634    let run_time = period * cpu_pct as u32 / 100;
1635    let stop_time = period - run_time;
1636    loop {
1637        tokio::time::sleep(run_time).await;
1638        if unsafe { libc::killpg(pid, libc::SIGSTOP) } < 0 { break; }
1639        tokio::time::sleep(stop_time).await;
1640        if unsafe { libc::killpg(pid, libc::SIGCONT) } < 0 { break; }
1641    }
1642}
1643
1644// ================================================================
1645// Process name resolution
1646// ================================================================
1647
1648static NEXT_SANDBOX_NAME: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(1);
1649
1650fn sandbox_resolve_name(name: Option<&str>) -> Result<String, crate::error::SandlockError> {
1651    match name {
1652        Some(n) => sandbox_validate_name(n.to_string()),
1653        None => Ok(format!(
1654            "sandbox-{}-{}",
1655            std::process::id(),
1656            NEXT_SANDBOX_NAME.fetch_add(1, std::sync::atomic::Ordering::Relaxed),
1657        )),
1658    }
1659}
1660
1661fn sandbox_validate_name(name: String) -> Result<String, crate::error::SandlockError> {
1662    use crate::error::SandboxRuntimeError;
1663    if name.is_empty() {
1664        return Err(SandboxRuntimeError::Child("sandbox name must not be empty".into()).into());
1665    }
1666    if name.len() > 64 {
1667        return Err(SandboxRuntimeError::Child("sandbox name must be at most 64 bytes".into()).into());
1668    }
1669    if name.as_bytes().contains(&0) {
1670        return Err(SandboxRuntimeError::Child("sandbox name must not contain NUL bytes".into()).into());
1671    }
1672    Ok(name)
1673}
1674
1675// ================================================================
1676// I/O helpers (private)
1677// ================================================================
1678
1679fn sandbox_read_exact(fd: i32, buf: &mut [u8]) {
1680    let mut off = 0;
1681    while off < buf.len() {
1682        let r = unsafe { libc::read(fd, buf[off..].as_mut_ptr() as *mut _, buf.len() - off) };
1683        if r <= 0 { break; }
1684        off += r as usize;
1685    }
1686}
1687
1688fn sandbox_read_fd_to_end(fd: std::os::fd::OwnedFd) -> Vec<u8> {
1689    use std::io::Read;
1690    use std::os::fd::IntoRawFd;
1691    use std::os::unix::io::FromRawFd;
1692    let mut file = unsafe { std::fs::File::from_raw_fd(fd.into_raw_fd()) };
1693    let mut buf = Vec::new();
1694    let _ = file.read_to_end(&mut buf);
1695    buf
1696}
1697
1698fn sandbox_wait_status_to_exit(status: i32) -> crate::result::ExitStatus {
1699    use crate::result::ExitStatus;
1700    if libc::WIFEXITED(status) {
1701        ExitStatus::Code(libc::WEXITSTATUS(status))
1702    } else if libc::WIFSIGNALED(status) {
1703        let sig = libc::WTERMSIG(status);
1704        if sig == libc::SIGKILL {
1705            ExitStatus::Killed
1706        } else {
1707            ExitStatus::Signal(sig)
1708        }
1709    } else {
1710        ExitStatus::Killed
1711    }
1712}
1713
1714fn sandbox_collect_handlers<I, S, H>(
1715    handlers: I,
1716    sandbox: &Sandbox,
1717) -> Result<Vec<(i64, Arc<dyn crate::seccomp::dispatch::Handler>)>, crate::error::SandlockError>
1718where
1719    I: IntoIterator<Item = (S, H)>,
1720    S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
1721    H: crate::seccomp::dispatch::Handler,
1722{
1723    use crate::seccomp::dispatch::{Handler, HandlerError};
1724
1725    let pending: Vec<(i64, Arc<dyn Handler>)> = handlers
1726        .into_iter()
1727        .map(|(syscall, handler)| {
1728            let nr = syscall.try_into().map_err(HandlerError::from)?.raw();
1729            let h: Arc<dyn Handler> = Arc::new(handler);
1730            Ok::<_, HandlerError>((nr, h))
1731        })
1732        .collect::<Result<_, _>>()?;
1733
1734    let nrs: Vec<i64> = pending.iter().map(|(nr, _)| *nr).collect();
1735    crate::seccomp::dispatch::validate_handler_syscalls_against_policy(&nrs, sandbox)
1736        .map_err(|syscall_nr| HandlerError::OnDenySyscall { syscall_nr })?;
1737
1738    Ok(pending)
1739}
1740
1741fn validate_syscall_names(names: &[String]) -> Result<(), SandboxError> {
1742    let unknown: Vec<&str> = names
1743        .iter()
1744        .map(String::as_str)
1745        .filter(|name| crate::context::syscall_name_to_nr(name).is_none())
1746        .collect();
1747    if unknown.is_empty() {
1748        Ok(())
1749    } else {
1750        Err(SandboxError::Invalid(format!(
1751            "unknown syscall name(s): {}",
1752            unknown.join(", ")
1753        )))
1754    }
1755}
1756
1757/// Fluent builder for `Sandbox`.
1758///
1759/// When the `cli` feature is enabled this struct also derives `clap::Args` so
1760/// that the CLI can expose all per-field flags via `#[clap(flatten)]` without
1761/// duplicating the flag declarations.
1762#[derive(Default)]
1763#[cfg_attr(feature = "cli", derive(clap::Args))]
1764pub struct SandboxBuilder {
1765    #[cfg_attr(feature = "cli", arg(short = 'r', long = "fs-read", value_name = "PATH"))]
1766    pub fs_readable: Vec<PathBuf>,
1767
1768    #[cfg_attr(feature = "cli", arg(short = 'w', long = "fs-write", value_name = "PATH"))]
1769    pub fs_writable: Vec<PathBuf>,
1770
1771    #[cfg_attr(feature = "cli", arg(long = "fs-deny", value_name = "PATH"))]
1772    pub fs_denied: Vec<PathBuf>,
1773
1774    /// Extra syscall names to deny (in addition to Sandlock's default blocklist)
1775    #[cfg_attr(feature = "cli", arg(long = "extra-deny-syscall", value_name = "NAME"))]
1776    pub extra_deny_syscalls: Vec<String>,
1777
1778    /// Extra syscall group names to allow (e.g. sysv_ipc)
1779    #[cfg_attr(feature = "cli", arg(long = "extra-allow-syscall", value_name = "NAME"))]
1780    pub extra_allow_syscalls: Vec<String>,
1781
1782    /// Outbound endpoint allow rule. Repeatable. Each value is
1783    /// `host:port[,port,...]` (IP-restricted), `:port` or `*:port`
1784    /// (any IP), or `udp://...` / `icmp://...` for UDP/ICMP.
1785    /// Examples: `api.openai.com:443`, `github.com:22,443`, `:8080`.
1786    #[cfg_attr(feature = "cli", arg(long = "net-allow", value_name = "SPEC"))]
1787    pub net_allow: Vec<String>,
1788
1789    #[cfg_attr(feature = "cli", arg(long = "net-bind"))]
1790    pub net_bind: Vec<u16>,
1791
1792    #[cfg_attr(feature = "cli", arg(long = "http-allow", value_name = "RULE"))]
1793    pub http_allow: Vec<String>,
1794
1795    #[cfg_attr(feature = "cli", arg(long = "http-deny", value_name = "RULE"))]
1796    pub http_deny: Vec<String>,
1797
1798    /// TCP ports to intercept for HTTP ACL (default: 80, plus 443 with --http-ca)
1799    #[cfg_attr(feature = "cli", arg(long = "http-port", value_name = "PORT"))]
1800    pub http_ports: Vec<u16>,
1801
1802    /// PEM CA certificate for HTTPS MITM (enables port 443 interception)
1803    #[cfg_attr(feature = "cli", arg(long = "http-ca", value_name = "PATH"))]
1804    pub http_ca: Option<PathBuf>,
1805
1806    /// PEM CA private key for HTTPS MITM (required with --http-ca)
1807    #[cfg_attr(feature = "cli", arg(long = "http-key", value_name = "PATH"))]
1808    pub http_key: Option<PathBuf>,
1809
1810    // max_memory uses a string in the CLI (e.g. "512M"); not directly clap-friendly as ByteSize.
1811    #[cfg_attr(feature = "cli", clap(skip))]
1812    pub max_memory: Option<ByteSize>,
1813
1814    #[cfg_attr(feature = "cli", arg(short = 'P', long = "max-processes"))]
1815    pub max_processes: Option<u32>,
1816
1817    #[cfg_attr(feature = "cli", arg(long = "max-open-files"))]
1818    pub max_open_files: Option<u32>,
1819
1820    #[cfg_attr(feature = "cli", arg(short = 'c', long = "cpu"))]
1821    pub max_cpu: Option<u8>,
1822
1823    #[cfg_attr(feature = "cli", arg(long = "random-seed"))]
1824    pub random_seed: Option<u64>,
1825
1826    // time_start requires ISO 8601 string parsing; not directly clap-friendly as SystemTime.
1827    #[cfg_attr(feature = "cli", clap(skip))]
1828    pub time_start: Option<SystemTime>,
1829
1830    #[cfg_attr(feature = "cli", arg(long = "no-randomize-memory"))]
1831    pub no_randomize_memory: bool,
1832
1833    #[cfg_attr(feature = "cli", arg(long = "no-huge-pages"))]
1834    pub no_huge_pages: bool,
1835
1836    #[cfg_attr(feature = "cli", arg(long = "no-coredump"))]
1837    pub no_coredump: bool,
1838
1839    #[cfg_attr(feature = "cli", arg(long = "deterministic-dirs"))]
1840    pub deterministic_dirs: bool,
1841
1842    // fs_isolation requires string-to-enum parsing; not directly clap-friendly as FsIsolation.
1843    #[cfg_attr(feature = "cli", clap(skip))]
1844    pub fs_isolation: Option<FsIsolation>,
1845
1846    #[cfg_attr(feature = "cli", arg(long = "workdir"))]
1847    pub workdir: Option<PathBuf>,
1848
1849    #[cfg_attr(feature = "cli", arg(long = "cwd"))]
1850    pub cwd: Option<PathBuf>,
1851
1852    #[cfg_attr(feature = "cli", arg(long = "fs-storage", value_name = "PATH"))]
1853    pub fs_storage: Option<PathBuf>,
1854
1855    // max_disk uses a string in the CLI (e.g. "10G"); not directly clap-friendly as ByteSize.
1856    #[cfg_attr(feature = "cli", clap(skip))]
1857    pub max_disk: Option<ByteSize>,
1858
1859    // on_exit/on_error are not exposed as CLI flags.
1860    #[cfg_attr(feature = "cli", clap(skip))]
1861    pub on_exit: Option<BranchAction>,
1862
1863    #[cfg_attr(feature = "cli", clap(skip))]
1864    pub on_error: Option<BranchAction>,
1865
1866    // fs_mount requires VIRTUAL:HOST string splitting; not directly clap-friendly as Vec<(PathBuf,PathBuf)>.
1867    #[cfg_attr(feature = "cli", clap(skip))]
1868    pub fs_mount: Vec<(PathBuf, PathBuf)>,
1869
1870    #[cfg_attr(feature = "cli", arg(long = "chroot"))]
1871    pub chroot: Option<PathBuf>,
1872
1873    #[cfg_attr(feature = "cli", arg(long = "clean-env"))]
1874    pub clean_env: bool,
1875
1876    // env requires KEY=VALUE string splitting; not directly clap-friendly as HashMap.
1877    #[cfg_attr(feature = "cli", clap(skip))]
1878    pub env: HashMap<String, String>,
1879
1880    // gpu_devices in CLI uses Vec<u32> with value_delimiter; SandboxBuilder stores Option<Vec<u32>>.
1881    #[cfg_attr(feature = "cli", clap(skip))]
1882    pub gpu_devices: Option<Vec<u32>>,
1883
1884    // cpu_cores in CLI uses Vec<u32> with value_delimiter; SandboxBuilder stores Option<Vec<u32>>.
1885    #[cfg_attr(feature = "cli", clap(skip))]
1886    pub cpu_cores: Option<Vec<u32>>,
1887
1888    #[cfg_attr(feature = "cli", arg(long = "num-cpus"))]
1889    pub num_cpus: Option<u32>,
1890
1891    #[cfg_attr(feature = "cli", arg(long = "port-remap"))]
1892    pub port_remap: bool,
1893
1894    #[cfg_attr(feature = "cli", arg(long = "uid"))]
1895    pub uid: Option<u32>,
1896
1897    // Internal callback — never a CLI flag.
1898    #[cfg_attr(feature = "cli", clap(skip))]
1899    pub policy_fn: Option<crate::policy_fn::PolicyCallback>,
1900
1901    // Sandbox instance name — stored for transfer into the Sandbox at build time.
1902    #[cfg_attr(feature = "cli", clap(skip))]
1903    pub name: Option<String>,
1904
1905    // COW fork init function — runs once in the child before COW cloning.
1906    #[cfg_attr(feature = "cli", clap(skip))]
1907    pub(crate) init_fn: Option<Box<dyn FnOnce() + Send + 'static>>,
1908
1909    // COW fork work function — runs in each COW clone.
1910    #[cfg_attr(feature = "cli", clap(skip))]
1911    pub(crate) work_fn: Option<Arc<dyn Fn(u32) + Send + Sync + 'static>>,
1912}
1913
1914impl std::fmt::Debug for SandboxBuilder {
1915    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1916        f.debug_struct("SandboxBuilder")
1917            .field("fs_readable", &self.fs_readable)
1918            .field("fs_writable", &self.fs_writable)
1919            .field("max_memory", &self.max_memory)
1920            .field("max_processes", &self.max_processes)
1921            .field("policy_fn", &self.policy_fn.as_ref().map(|_| "<callback>"))
1922            .finish_non_exhaustive()
1923    }
1924}
1925
1926impl Clone for SandboxBuilder {
1927    /// Clone a `SandboxBuilder`. All config and callback fields are cloned.
1928    /// `init_fn` (FnOnce) is dropped to `None` on the clone; `work_fn` clones
1929    /// via Arc. If the clone also needs an init function, set it again with
1930    /// `.init_fn(...)`.
1931    fn clone(&self) -> Self {
1932        Self {
1933            fs_readable: self.fs_readable.clone(),
1934            fs_writable: self.fs_writable.clone(),
1935            fs_denied: self.fs_denied.clone(),
1936            extra_deny_syscalls: self.extra_deny_syscalls.clone(),
1937            extra_allow_syscalls: self.extra_allow_syscalls.clone(),
1938            net_allow: self.net_allow.clone(),
1939            net_bind: self.net_bind.clone(),
1940            http_allow: self.http_allow.clone(),
1941            http_deny: self.http_deny.clone(),
1942            http_ports: self.http_ports.clone(),
1943            http_ca: self.http_ca.clone(),
1944            http_key: self.http_key.clone(),
1945            max_memory: self.max_memory,
1946            max_processes: self.max_processes,
1947            max_open_files: self.max_open_files,
1948            max_cpu: self.max_cpu,
1949            random_seed: self.random_seed,
1950            time_start: self.time_start,
1951            no_randomize_memory: self.no_randomize_memory,
1952            no_huge_pages: self.no_huge_pages,
1953            no_coredump: self.no_coredump,
1954            deterministic_dirs: self.deterministic_dirs,
1955            fs_isolation: self.fs_isolation.clone(),
1956            workdir: self.workdir.clone(),
1957            cwd: self.cwd.clone(),
1958            fs_storage: self.fs_storage.clone(),
1959            max_disk: self.max_disk,
1960            on_exit: self.on_exit.clone(),
1961            on_error: self.on_error.clone(),
1962            fs_mount: self.fs_mount.clone(),
1963            chroot: self.chroot.clone(),
1964            clean_env: self.clean_env,
1965            env: self.env.clone(),
1966            gpu_devices: self.gpu_devices.clone(),
1967            cpu_cores: self.cpu_cores.clone(),
1968            num_cpus: self.num_cpus,
1969            port_remap: self.port_remap,
1970            uid: self.uid,
1971            policy_fn: self.policy_fn.clone(),
1972            name: self.name.clone(),
1973            // init_fn (FnOnce) cannot be cloned — drop to None.
1974            init_fn: None,
1975            // work_fn is Arc-wrapped — clone bumps the reference count.
1976            work_fn: self.work_fn.clone(),
1977        }
1978    }
1979}
1980
1981impl SandboxBuilder {
1982    pub fn fs_write(mut self, path: impl Into<PathBuf>) -> Self {
1983        self.fs_writable.push(path.into());
1984        self
1985    }
1986
1987    pub fn fs_read(mut self, path: impl Into<PathBuf>) -> Self {
1988        self.fs_readable.push(path.into());
1989        self
1990    }
1991
1992    pub fn fs_read_if_exists(self, path: impl Into<PathBuf>) -> Self {
1993        let path = path.into();
1994        if path.exists() {
1995            self.fs_read(path)
1996        } else {
1997            self
1998        }
1999    }
2000
2001    pub fn fs_deny(mut self, path: impl Into<PathBuf>) -> Self {
2002        self.fs_denied.push(path.into());
2003        self
2004    }
2005
2006    pub fn extra_deny_syscalls(mut self, calls: Vec<String>) -> Self {
2007        self.extra_deny_syscalls.extend(calls);
2008        self
2009    }
2010
2011    pub fn extra_allow_syscalls(mut self, names: Vec<String>) -> Self {
2012        self.extra_allow_syscalls.extend(names);
2013        self
2014    }
2015
2016    /// Add a network endpoint rule. Spec is `host:port[,port,...]`,
2017    /// `:port`, or `*:port`. Validated at `build()` time so callers
2018    /// receive parse errors via the standard `SandboxBuilder` flow.
2019    ///
2020    /// Examples:
2021    /// - `.net_allow("api.openai.com:443")` — HTTPS to OpenAI only
2022    /// - `.net_allow("github.com:22,443")` — SSH and HTTPS to GitHub
2023    /// - `.net_allow(":8080")` — any IP on port 8080
2024    pub fn net_allow(mut self, spec: impl Into<String>) -> Self {
2025        self.net_allow.push(spec.into());
2026        self
2027    }
2028
2029    pub fn net_bind_port(mut self, port: u16) -> Self {
2030        self.net_bind.push(port);
2031        self
2032    }
2033
2034    pub fn http_allow(mut self, rule: &str) -> Self {
2035        self.http_allow.push(rule.to_string());
2036        self
2037    }
2038
2039    pub fn http_deny(mut self, rule: &str) -> Self {
2040        self.http_deny.push(rule.to_string());
2041        self
2042    }
2043
2044    pub fn http_port(mut self, port: u16) -> Self {
2045        self.http_ports.push(port);
2046        self
2047    }
2048
2049    pub fn http_ca(mut self, path: impl Into<PathBuf>) -> Self {
2050        self.http_ca = Some(path.into());
2051        self
2052    }
2053
2054    pub fn http_key(mut self, path: impl Into<PathBuf>) -> Self {
2055        self.http_key = Some(path.into());
2056        self
2057    }
2058
2059    pub fn max_memory(mut self, size: ByteSize) -> Self {
2060        self.max_memory = Some(size);
2061        self
2062    }
2063
2064    pub fn max_processes(mut self, n: u32) -> Self {
2065        self.max_processes = Some(n);
2066        self
2067    }
2068
2069    pub fn max_open_files(mut self, n: u32) -> Self {
2070        self.max_open_files = Some(n);
2071        self
2072    }
2073
2074    pub fn max_cpu(mut self, pct: u8) -> Self {
2075        self.max_cpu = Some(pct);
2076        self
2077    }
2078
2079    pub fn random_seed(mut self, seed: u64) -> Self {
2080        self.random_seed = Some(seed);
2081        self
2082    }
2083
2084    pub fn time_start(mut self, t: SystemTime) -> Self {
2085        self.time_start = Some(t);
2086        self
2087    }
2088
2089    pub fn no_randomize_memory(mut self, v: bool) -> Self {
2090        self.no_randomize_memory = v;
2091        self
2092    }
2093
2094    pub fn no_huge_pages(mut self, v: bool) -> Self {
2095        self.no_huge_pages = v;
2096        self
2097    }
2098
2099    pub fn no_coredump(mut self, v: bool) -> Self {
2100        self.no_coredump = v;
2101        self
2102    }
2103
2104    pub fn deterministic_dirs(mut self, v: bool) -> Self {
2105        self.deterministic_dirs = v;
2106        self
2107    }
2108
2109    pub fn fs_isolation(mut self, iso: FsIsolation) -> Self {
2110        self.fs_isolation = Some(iso);
2111        self
2112    }
2113
2114    pub fn workdir(mut self, path: impl Into<PathBuf>) -> Self {
2115        self.workdir = Some(path.into());
2116        self
2117    }
2118
2119    pub fn cwd(mut self, path: impl Into<PathBuf>) -> Self {
2120        self.cwd = Some(path.into());
2121        self
2122    }
2123
2124    pub fn fs_storage(mut self, path: impl Into<PathBuf>) -> Self {
2125        self.fs_storage = Some(path.into());
2126        self
2127    }
2128
2129    pub fn max_disk(mut self, size: ByteSize) -> Self {
2130        self.max_disk = Some(size);
2131        self
2132    }
2133
2134    pub fn on_exit(mut self, action: BranchAction) -> Self {
2135        self.on_exit = Some(action);
2136        self
2137    }
2138
2139    pub fn on_error(mut self, action: BranchAction) -> Self {
2140        self.on_error = Some(action);
2141        self
2142    }
2143
2144    pub fn chroot(mut self, path: impl Into<PathBuf>) -> Self {
2145        self.chroot = Some(path.into());
2146        self
2147    }
2148
2149    pub fn fs_mount(mut self, virtual_path: impl Into<PathBuf>, host_path: impl Into<PathBuf>) -> Self {
2150        self.fs_mount.push((virtual_path.into(), host_path.into()));
2151        self
2152    }
2153
2154    pub fn clean_env(mut self, v: bool) -> Self {
2155        self.clean_env = v;
2156        self
2157    }
2158
2159    pub fn env_var(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
2160        self.env.insert(key.into(), value.into());
2161        self
2162    }
2163
2164
2165    pub fn gpu_devices(mut self, devices: Vec<u32>) -> Self {
2166        self.gpu_devices = Some(devices);
2167        self
2168    }
2169
2170    pub fn cpu_cores(mut self, cores: Vec<u32>) -> Self {
2171        self.cpu_cores = Some(cores);
2172        self
2173    }
2174
2175    pub fn num_cpus(mut self, n: u32) -> Self {
2176        self.num_cpus = Some(n);
2177        self
2178    }
2179
2180    pub fn port_remap(mut self, v: bool) -> Self {
2181        self.port_remap = v;
2182        self
2183    }
2184
2185    pub fn policy_fn(
2186        mut self,
2187        f: impl Fn(crate::policy_fn::SyscallEvent, &mut crate::policy_fn::PolicyContext) -> crate::policy_fn::Verdict + Send + Sync + 'static,
2188    ) -> Self {
2189        self.policy_fn = Some(std::sync::Arc::new(f));
2190        self
2191    }
2192
2193    pub fn uid(mut self, id: u32) -> Self {
2194        self.uid = Some(id);
2195        self
2196    }
2197
2198    /// Set the sandbox instance name (exposed as the virtual hostname).
2199    /// Auto-generated if not set.
2200    pub fn name(mut self, name: impl Into<String>) -> Self {
2201        self.name = Some(name.into());
2202        self
2203    }
2204
2205    /// Set the COW-fork init function.
2206    ///
2207    /// The init function runs once in the child process before any COW clones
2208    /// are created. Required for `Sandbox::fork()`.
2209    pub fn init_fn(mut self, f: impl FnOnce() + Send + 'static) -> Self {
2210        self.init_fn = Some(Box::new(f));
2211        self
2212    }
2213
2214    /// Set the COW-fork work function.
2215    ///
2216    /// The work function runs in each COW clone (`fork(N)` produces N clones).
2217    /// Required for `Sandbox::fork()`.
2218    pub fn work_fn(mut self, f: impl Fn(u32) + Send + Sync + 'static) -> Self {
2219        self.work_fn = Some(Arc::new(f));
2220        self
2221    }
2222
2223    /// Build a `Sandbox`, parsing all string fields and running per-field
2224    /// validation, but **without** the cross-section checks that
2225    /// `Sandbox::validate` performs. Use this in tests that deliberately
2226    /// construct sandboxes violating cross-section invariants.
2227    pub fn build_unchecked(self) -> Result<Sandbox, SandboxError> {
2228        validate_syscall_names(&self.extra_deny_syscalls)?;
2229
2230        // Validate: max_cpu must be 1-100
2231        if let Some(cpu) = self.max_cpu {
2232            if cpu == 0 || cpu > 100 {
2233                return Err(SandboxError::InvalidCpuPercent(cpu));
2234            }
2235        }
2236
2237        // Validate: http_ca and http_key must both be set or both unset
2238        if self.http_ca.is_some() != self.http_key.is_some() {
2239            return Err(SandboxError::Invalid(
2240                "--http-ca and --http-key must both be provided together".into(),
2241            ));
2242        }
2243
2244        // Parse HTTP rules (deferred from builder methods to propagate errors)
2245        let http_allow: Vec<HttpRule> = self
2246            .http_allow
2247            .into_iter()
2248            .map(|s| HttpRule::parse(&s))
2249            .collect::<Result<_, _>>()?;
2250        let http_deny: Vec<HttpRule> = self
2251            .http_deny
2252            .into_iter()
2253            .map(|s| HttpRule::parse(&s))
2254            .collect::<Result<_, _>>()?;
2255
2256        // Default HTTP intercept ports: 80 always, 443 when HTTPS CA is configured.
2257        let http_ports = if self.http_ports.is_empty() && (!http_allow.is_empty() || !http_deny.is_empty()) {
2258            let mut ports = vec![80];
2259            if self.http_ca.is_some() {
2260                ports.push(443);
2261            }
2262            ports
2263        } else {
2264            self.http_ports
2265        };
2266
2267        // Parse user-supplied --net-allow specs.
2268        let mut net_allow: Vec<NetAllow> = self
2269            .net_allow
2270            .into_iter()
2271            .map(|s| NetAllow::parse(&s))
2272            .collect::<Result<_, _>>()?;
2273
2274        // Auto-merge HTTP rules into the network allowlist so the proxy's
2275        // intercept ports remain reachable. A rule with a concrete host
2276        // tightens the IP allowlist (only that host on http_ports);
2277        // wildcard hosts add a `:port` (any IP) rule. This mirrors the
2278        // intent of the old `http_port → net_connect` merge but at the
2279        // endpoint level so HTTP and net_allow stay aligned.
2280        if !http_ports.is_empty() {
2281            let mut wildcard_seen = false;
2282            let mut concrete_hosts: Vec<String> = Vec::new();
2283            for rule in http_allow.iter().chain(http_deny.iter()) {
2284                if rule.host == "*" {
2285                    wildcard_seen = true;
2286                } else if !concrete_hosts.iter().any(|h| h.eq_ignore_ascii_case(&rule.host)) {
2287                    concrete_hosts.push(rule.host.clone());
2288                }
2289            }
2290            if wildcard_seen || (http_allow.is_empty() && http_deny.is_empty()) {
2291                // Fallback: explicit --http-port without rules, or wildcard rules.
2292                net_allow.push(NetAllow {
2293                    protocol: Protocol::Tcp,
2294                    host: None,
2295                    ports: http_ports.clone(),
2296                    all_ports: false,
2297                });
2298            }
2299            for h in concrete_hosts {
2300                net_allow.push(NetAllow {
2301                    protocol: Protocol::Tcp,
2302                    host: Some(h),
2303                    ports: http_ports.clone(),
2304                    all_ports: false,
2305                });
2306            }
2307        }
2308
2309        let fs_isolation = self.fs_isolation.unwrap_or_default();
2310        Ok(Sandbox {
2311            fs_writable: self.fs_writable,
2312            fs_readable: self.fs_readable,
2313            fs_denied: self.fs_denied,
2314            extra_deny_syscalls: self.extra_deny_syscalls,
2315            extra_allow_syscalls: self.extra_allow_syscalls,
2316            net_allow,
2317            net_bind: self.net_bind,
2318            http_allow,
2319            http_deny,
2320            http_ports,
2321            http_ca: self.http_ca,
2322            http_key: self.http_key,
2323            max_memory: self.max_memory,
2324            max_processes: self.max_processes.unwrap_or(64),
2325            max_open_files: self.max_open_files,
2326            max_cpu: self.max_cpu,
2327            random_seed: self.random_seed,
2328            time_start: self.time_start,
2329            no_randomize_memory: self.no_randomize_memory,
2330            no_huge_pages: self.no_huge_pages,
2331            no_coredump: self.no_coredump,
2332            deterministic_dirs: self.deterministic_dirs,
2333            fs_isolation,
2334            workdir: self.workdir,
2335            cwd: self.cwd,
2336            fs_storage: self.fs_storage,
2337            max_disk: self.max_disk,
2338            on_exit: self.on_exit.unwrap_or_default(),
2339            on_error: self.on_error.unwrap_or_default(),
2340            fs_mount: self.fs_mount,
2341            chroot: self.chroot,
2342            clean_env: self.clean_env,
2343            env: self.env,
2344            gpu_devices: self.gpu_devices,
2345            cpu_cores: self.cpu_cores,
2346            num_cpus: self.num_cpus,
2347            port_remap: self.port_remap,
2348            uid: self.uid,
2349            policy_fn: self.policy_fn,
2350            name: self.name,
2351            init_fn: self.init_fn,
2352            work_fn: self.work_fn,
2353            runtime: None,
2354        })
2355    }
2356
2357    /// Build a `Sandbox`, parsing all string fields, running per-field validation,
2358    /// and verifying cross-section invariants via `Sandbox::validate`.
2359    pub fn build(self) -> Result<Sandbox, SandboxError> {
2360        let p = self.build_unchecked()?;
2361        p.validate()?;
2362        Ok(p)
2363    }
2364}
2365
2366#[cfg(test)]
2367mod tests {
2368    use super::*;
2369
2370    // --- SandboxBuilder integration ---
2371
2372    #[test]
2373    fn builder_http_rules() {
2374        let policy = Sandbox::builder()
2375            .http_allow("GET api.example.com/v1/*")
2376            .http_deny("* */admin/*")
2377            .build()
2378            .unwrap();
2379        assert_eq!(policy.http_allow.len(), 1);
2380        assert_eq!(policy.http_deny.len(), 1);
2381        assert_eq!(policy.http_allow[0].method, "GET");
2382        assert_eq!(policy.http_deny[0].host, "*");
2383    }
2384
2385    #[test]
2386    fn builder_invalid_http_allow_returns_error() {
2387        let result = Sandbox::builder()
2388            .http_allow("GETexample.com")
2389            .build();
2390        assert!(result.is_err());
2391    }
2392
2393    #[test]
2394    fn builder_invalid_http_deny_returns_error() {
2395        let result = Sandbox::builder()
2396            .http_deny("BADRULE")
2397            .build();
2398        assert!(result.is_err());
2399    }
2400
2401    #[test]
2402    fn builder_http_ca_without_key_returns_error() {
2403        let result = Sandbox::builder()
2404            .http_ca("/tmp/ca.pem")
2405            .build();
2406        assert!(result.is_err());
2407    }
2408
2409    #[test]
2410    fn builder_http_key_without_ca_returns_error() {
2411        let result = Sandbox::builder()
2412            .http_key("/tmp/key.pem")
2413            .build();
2414        assert!(result.is_err());
2415    }
2416
2417    #[test]
2418    fn builder_http_ca_and_key_together_ok() {
2419        let policy = Sandbox::builder()
2420            .http_ca("/tmp/ca.pem")
2421            .http_key("/tmp/key.pem")
2422            .build()
2423            .unwrap();
2424        assert!(policy.http_ca.is_some());
2425        assert!(policy.http_key.is_some());
2426    }
2427
2428    #[test]
2429    fn allows_sysv_ipc_reads_extra_allow_syscalls() {
2430        let p = Sandbox::builder()
2431            .extra_allow_syscalls(vec!["sysv_ipc".into()])
2432            .build()
2433            .unwrap();
2434        assert!(p.allows_sysv_ipc());
2435
2436        let p2 = Sandbox::builder().build().unwrap();
2437        assert!(!p2.allows_sysv_ipc());
2438
2439        let p3 = Sandbox::builder()
2440            .extra_allow_syscalls(vec!["other_group".into()])
2441            .build()
2442            .unwrap();
2443        assert!(!p3.allows_sysv_ipc());
2444    }
2445
2446}