Skip to main content

sandlock_core/
sandbox.rs

1use std::collections::HashMap;
2use std::os::fd::AsRawFd;
3use std::path::PathBuf;
4use std::sync::Arc;
5use std::time::SystemTime;
6
7use serde::{Deserialize, Serialize};
8use tokio::task::JoinHandle;
9
10use crate::context;
11use crate::error::SandboxError;
12pub use crate::http::{http_acl_check, normalize_path, prefix_or_exact_match, HttpRule};
13pub use crate::network::{NetAllow, Protocol};
14
15/// A byte size value.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
17pub struct ByteSize(pub u64);
18
19impl ByteSize {
20    pub fn bytes(n: u64) -> Self {
21        ByteSize(n)
22    }
23
24    pub fn kib(n: u64) -> Self {
25        ByteSize(n * 1024)
26    }
27
28    pub fn mib(n: u64) -> Self {
29        ByteSize(n * 1024 * 1024)
30    }
31
32    pub fn gib(n: u64) -> Self {
33        ByteSize(n * 1024 * 1024 * 1024)
34    }
35
36    pub fn parse(s: &str) -> Result<Self, SandboxError> {
37        let s = s.trim();
38        if s.is_empty() {
39            return Err(SandboxError::Invalid("empty byte size string".into()));
40        }
41
42        // Check for suffix
43        let last = s.chars().last().unwrap();
44        if last.is_ascii_alphabetic() {
45            let (num_str, suffix) = s.split_at(s.len() - 1);
46            let n: u64 = num_str
47                .trim()
48                .parse()
49                .map_err(|_| SandboxError::Invalid(format!("invalid byte size: {}", s)))?;
50            match suffix.to_ascii_uppercase().as_str() {
51                "K" => Ok(ByteSize::kib(n)),
52                "M" => Ok(ByteSize::mib(n)),
53                "G" => Ok(ByteSize::gib(n)),
54                other => Err(SandboxError::Invalid(format!("unknown byte size suffix: {}", other))),
55            }
56        } else {
57            let n: u64 = s
58                .parse()
59                .map_err(|_| SandboxError::Invalid(format!("invalid byte size: {}", s)))?;
60            Ok(ByteSize(n))
61        }
62    }
63}
64
65/// Confinement for confining the current process in place.
66#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
67pub struct Confinement {
68    pub fs_writable: Vec<PathBuf>,
69    pub fs_readable: Vec<PathBuf>,
70}
71
72impl Confinement {
73    pub fn builder() -> ConfinementBuilder {
74        ConfinementBuilder::default()
75    }
76}
77
78#[derive(Default)]
79pub struct ConfinementBuilder {
80    fs_writable: Vec<PathBuf>,
81    fs_readable: Vec<PathBuf>,
82}
83
84impl ConfinementBuilder {
85    pub fn fs_write(mut self, path: impl Into<PathBuf>) -> Self {
86        self.fs_writable.push(path.into());
87        self
88    }
89
90    pub fn fs_read(mut self, path: impl Into<PathBuf>) -> Self {
91        self.fs_readable.push(path.into());
92        self
93    }
94
95    pub fn build(self) -> Confinement {
96        Confinement {
97            fs_writable: self.fs_writable,
98            fs_readable: self.fs_readable,
99        }
100    }
101}
102
103impl TryFrom<&Sandbox> for Confinement {
104    type Error = SandboxError;
105
106    fn try_from(sandbox: &Sandbox) -> Result<Self, Self::Error> {
107        let mut unsupported = Vec::new();
108        if !sandbox.fs_denied.is_empty() { unsupported.push("fs_denied"); }
109        if !sandbox.extra_deny_syscalls.is_empty() { unsupported.push("extra_deny_syscalls"); }
110        if !sandbox.net_allow.is_empty() { unsupported.push("net_allow"); }
111        if !sandbox.net_bind.is_empty() { unsupported.push("net_bind"); }
112        if sandbox.allows_sysv_ipc() { unsupported.push("extra_allow_syscalls=[\"sysv_ipc\"]"); }
113        if !sandbox.http_allow.is_empty() { unsupported.push("http_allow"); }
114        if !sandbox.http_deny.is_empty() { unsupported.push("http_deny"); }
115        if !sandbox.http_ports.is_empty() { unsupported.push("http_ports"); }
116        if sandbox.http_ca.is_some() { unsupported.push("http_ca"); }
117        if sandbox.http_key.is_some() { unsupported.push("http_key"); }
118        if sandbox.max_memory.is_some() { unsupported.push("max_memory"); }
119        if sandbox.max_processes != 64 { unsupported.push("max_processes"); }
120        if sandbox.max_open_files.is_some() { unsupported.push("max_open_files"); }
121        if sandbox.max_cpu.is_some() { unsupported.push("max_cpu"); }
122        if sandbox.random_seed.is_some() { unsupported.push("random_seed"); }
123        if sandbox.time_start.is_some() { unsupported.push("time_start"); }
124        if sandbox.no_randomize_memory { unsupported.push("no_randomize_memory"); }
125        if sandbox.no_huge_pages { unsupported.push("no_huge_pages"); }
126        if sandbox.no_coredump { unsupported.push("no_coredump"); }
127        if sandbox.deterministic_dirs { unsupported.push("deterministic_dirs"); }
128        if sandbox.workdir.is_some() { unsupported.push("workdir"); }
129        if sandbox.cwd.is_some() { unsupported.push("cwd"); }
130        if sandbox.fs_storage.is_some() { unsupported.push("fs_storage"); }
131        if sandbox.max_disk.is_some() { unsupported.push("max_disk"); }
132        if sandbox.on_exit != BranchAction::Commit { unsupported.push("on_exit"); }
133        if sandbox.on_error != BranchAction::Abort { unsupported.push("on_error"); }
134        if !sandbox.fs_mount.is_empty() { unsupported.push("fs_mount"); }
135        if sandbox.chroot.is_some() { unsupported.push("chroot"); }
136        if sandbox.clean_env { unsupported.push("clean_env"); }
137        if !sandbox.env.is_empty() { unsupported.push("env"); }
138        if sandbox.gpu_devices.is_some() { unsupported.push("gpu_devices"); }
139        if sandbox.cpu_cores.is_some() { unsupported.push("cpu_cores"); }
140        if sandbox.num_cpus.is_some() { unsupported.push("num_cpus"); }
141        if sandbox.port_remap { unsupported.push("port_remap"); }
142        if sandbox.uid.is_some() { unsupported.push("uid"); }
143        if sandbox.policy_fn.is_some() { unsupported.push("policy_fn"); }
144
145        if !unsupported.is_empty() {
146            return Err(SandboxError::UnsupportedForConfine(unsupported.join(", ")));
147        }
148
149        Ok(Self {
150            fs_writable: sandbox.fs_writable.clone(),
151            fs_readable: sandbox.fs_readable.clone(),
152        })
153    }
154}
155
156/// Action to take on branch exit.
157#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
158pub enum BranchAction {
159    #[default]
160    Commit,
161    Abort,
162    Keep,
163}
164
165// ============================================================
166// Runtime — private heap-allocated state, present only while running
167// ============================================================
168
169/// Private runtime state.  Only allocated after `start()` / `run()` is
170/// called; `None` for config-only `Sandbox` instances.
171struct Runtime {
172    name: String,
173    state: RuntimeState,
174    child_pid: Option<i32>,
175    pidfd: Option<std::os::fd::OwnedFd>,
176    notif_handle: Option<JoinHandle<()>>,
177    throttle_handle: Option<JoinHandle<()>>,
178    loadavg_handle: Option<JoinHandle<()>>,
179    _stdout_read: Option<std::os::fd::OwnedFd>,
180    _stderr_read: Option<std::os::fd::OwnedFd>,
181    seccomp_cow: Option<crate::cow::seccomp::SeccompCowBranch>,
182    supervisor_resource: Option<Arc<tokio::sync::Mutex<crate::seccomp::state::ResourceState>>>,
183    supervisor_cow: Option<Arc<tokio::sync::Mutex<crate::seccomp::state::CowState>>>,
184    supervisor_network: Option<Arc<tokio::sync::Mutex<crate::seccomp::state::NetworkState>>>,
185    ctrl_fd: Option<std::os::fd::OwnedFd>,
186    stdout_pipe: Option<std::os::fd::OwnedFd>,
187    io_overrides: Option<(Option<i32>, Option<i32>, Option<i32>)>,
188    extra_fds: Vec<(i32, i32)>,
189    http_acl_handle: Option<crate::http_acl::HttpAclProxyHandle>,
190    #[allow(clippy::type_complexity)]
191    on_bind: Option<Box<dyn Fn(&HashMap<u16, u16>) + Send + Sync>>,
192    handlers: Vec<(i64, Arc<dyn crate::seccomp::dispatch::Handler>)>,
193    ready_w: Option<std::os::fd::OwnedFd>,
194}
195
196/// Lifecycle state for the runtime.
197enum RuntimeState {
198    Created,
199    Running,
200    Paused,
201    Stopped(crate::result::ExitStatus),
202}
203
204/// Sandbox configuration.
205#[derive(Serialize, Deserialize)]
206pub struct Sandbox {
207    // Filesystem access
208    pub fs_writable: Vec<PathBuf>,
209    pub fs_readable: Vec<PathBuf>,
210    pub fs_denied: Vec<PathBuf>,
211
212    // Extra syscall filtering on top of Sandlock's default blocklist.
213    pub extra_deny_syscalls: Vec<String>,
214    pub extra_allow_syscalls: Vec<String>,
215
216    // Network
217    /// Outbound endpoint allowlist as a list of `(protocol, host?, ports)`
218    /// rules. Each rule names a protocol (TCP/UDP/ICMP) and either a
219    /// concrete host or "any IP." TCP and UDP rules carry ports; ICMP
220    /// rules have none.
221    ///
222    /// **Protocol gating falls out of rule presence.** Sandlock denies
223    /// UDP and ICMP socket creation by default; opting in is "list at
224    /// least one rule for that protocol" (e.g. `udp://*:*` for any UDP,
225    /// `icmp://*` for any ICMP echo). TCP is always permitted.
226    ///
227    /// Empty `net_allow` and empty `http_allow`/`http_deny` together
228    /// mean "deny all outbound" (Landlock direct path denies, no
229    /// on-behalf path is enabled). Otherwise, the on-behalf path
230    /// enforces these rules: a destination is permitted iff any rule
231    /// matches the protocol, destination IP (or has `host: None` = any
232    /// IP), and destination port (N/A for ICMP).
233    ///
234    /// HTTP rules with concrete hosts auto-add a matching
235    /// `(Tcp, host, [80])` (and `(Tcp, host, [443])` when `--http-ca`
236    /// is set) entry at build time so the proxy's intercept ports
237    /// remain reachable. HTTP rules with wildcard hosts auto-add
238    /// `(Tcp, None, [80])` instead.
239    pub net_allow: Vec<NetAllow>,
240    pub net_bind: Vec<u16>,
241    // HTTP ACL
242    pub http_allow: Vec<HttpRule>,
243    pub http_deny: Vec<HttpRule>,
244    /// TCP ports to intercept for HTTP ACL. Defaults to [80] (plus 443 when
245    /// http_ca is set). Override with `http_ports` to intercept custom ports.
246    pub http_ports: Vec<u16>,
247    /// PEM CA cert for HTTPS MITM. When set, port 443 is also intercepted.
248    pub http_ca: Option<PathBuf>,
249    /// PEM CA key for HTTPS MITM. Required when http_ca is set.
250    pub http_key: Option<PathBuf>,
251
252    // Resource limits
253    pub max_memory: Option<ByteSize>,
254    pub max_processes: u32,
255    pub max_open_files: Option<u32>,
256    pub max_cpu: Option<u8>,
257
258    // Reproducibility
259    pub random_seed: Option<u64>,
260    pub time_start: Option<SystemTime>,
261    pub no_randomize_memory: bool,
262    pub no_huge_pages: bool,
263    pub no_coredump: bool,
264    pub deterministic_dirs: bool,
265
266    // Filesystem branch
267    pub workdir: Option<PathBuf>,
268    pub cwd: Option<PathBuf>,
269    pub fs_storage: Option<PathBuf>,
270    pub max_disk: Option<ByteSize>,
271    pub on_exit: BranchAction,
272    pub on_error: BranchAction,
273
274    // Mount mappings: (virtual_path_inside_chroot, host_path_on_disk)
275    pub fs_mount: Vec<(PathBuf, PathBuf)>,
276
277    // Environment
278    pub chroot: Option<PathBuf>,
279    pub clean_env: bool,
280    pub env: HashMap<String, String>,
281    // Devices
282    pub gpu_devices: Option<Vec<u32>>,
283
284    // CPU
285    pub cpu_cores: Option<Vec<u32>>,
286    pub num_cpus: Option<u32>,
287    pub port_remap: bool,
288
289    /// Skip the seccomp user-notification supervisor. The sandbox runs
290    /// with Landlock + a kernel-only deny filter, with none of the
291    /// supervisor-mediated features (IP allowlist, resource limits,
292    /// COW, chroot mediation, /proc virtualization, custom handlers).
293    /// Required when nesting inside another sandlock — the kernel only
294    /// allows one `SECCOMP_FILTER_FLAG_NEW_LISTENER` per task.
295    pub no_supervisor: bool,
296
297    // User namespace
298    pub uid: Option<u32>,
299
300    // Dynamic policy callback
301    #[serde(skip)]
302    pub policy_fn: Option<crate::policy_fn::PolicyCallback>,
303
304    // Sandbox instance name (exposed as virtual hostname; auto-generated if None).
305    // Not serialized — instance names are set at runtime, not in the policy file.
306    #[serde(skip)]
307    pub name: Option<String>,
308
309    // COW fork init function — runs once in the child before COW cloning.
310    // Not serialized; not cloned (FnOnce can't be cloned — drops to None on clone).
311    #[serde(skip)]
312    init_fn: Option<Box<dyn FnOnce() + Send + 'static>>,
313
314    // COW fork work function — runs in each COW clone.
315    // Not serialized; cloned via Arc (cheap).
316    #[serde(skip)]
317    work_fn: Option<Arc<dyn Fn(u32) + Send + Sync + 'static>>,
318
319    // Heap-allocated runtime state; `None` when not started.
320    #[serde(skip)]
321    runtime: Option<Box<Runtime>>,
322}
323
324impl std::fmt::Debug for Sandbox {
325    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
326        f.debug_struct("Sandbox")
327            .field("fs_readable", &self.fs_readable)
328            .field("fs_writable", &self.fs_writable)
329            .field("max_memory", &self.max_memory)
330            .field("max_processes", &self.max_processes)
331            .field("policy_fn", &self.policy_fn.as_ref().map(|_| "<callback>"))
332            .field("name", &self.name)
333            .field("runtime", &self.runtime.as_ref().map(|_| "<runtime>"))
334            .finish_non_exhaustive()
335    }
336}
337
338impl Clone for Sandbox {
339    /// Clone a `Sandbox` — config and runtime-kwargs fields are cloned; the
340    /// runtime state is not (the clone starts with `runtime: None`).
341    ///
342    /// Field clone semantics:
343    /// - `policy_fn` — Arc bump (cheap).
344    /// - `work_fn`   — Arc bump (cheap); multiple Sandboxes share the closure.
345    /// - `init_fn`   — **dropped to `None`** (FnOnce can't be cloned). If the
346    ///   clone also needs an init function, call `.init_fn(...)` on it
347    ///   separately or set it via `SandboxBuilder::init_fn`.
348    /// - `runtime`   — always `None`; the clone is a fresh, un-started Sandbox.
349    fn clone(&self) -> Self {
350        Self {
351            fs_writable: self.fs_writable.clone(),
352            fs_readable: self.fs_readable.clone(),
353            fs_denied: self.fs_denied.clone(),
354            extra_deny_syscalls: self.extra_deny_syscalls.clone(),
355            extra_allow_syscalls: self.extra_allow_syscalls.clone(),
356            net_allow: self.net_allow.clone(),
357            net_bind: self.net_bind.clone(),
358            http_allow: self.http_allow.clone(),
359            http_deny: self.http_deny.clone(),
360            http_ports: self.http_ports.clone(),
361            http_ca: self.http_ca.clone(),
362            http_key: self.http_key.clone(),
363            max_memory: self.max_memory,
364            max_processes: self.max_processes,
365            max_open_files: self.max_open_files,
366            max_cpu: self.max_cpu,
367            random_seed: self.random_seed,
368            time_start: self.time_start,
369            no_randomize_memory: self.no_randomize_memory,
370            no_huge_pages: self.no_huge_pages,
371            no_coredump: self.no_coredump,
372            deterministic_dirs: self.deterministic_dirs,
373            workdir: self.workdir.clone(),
374            cwd: self.cwd.clone(),
375            fs_storage: self.fs_storage.clone(),
376            max_disk: self.max_disk,
377            on_exit: self.on_exit.clone(),
378            on_error: self.on_error.clone(),
379            fs_mount: self.fs_mount.clone(),
380            chroot: self.chroot.clone(),
381            clean_env: self.clean_env,
382            env: self.env.clone(),
383            gpu_devices: self.gpu_devices.clone(),
384            cpu_cores: self.cpu_cores.clone(),
385            num_cpus: self.num_cpus,
386            port_remap: self.port_remap,
387            no_supervisor: self.no_supervisor,
388            uid: self.uid,
389            policy_fn: self.policy_fn.clone(),
390            name: self.name.clone(),
391            // init_fn (FnOnce) cannot be cloned — the clone gets None.
392            // If the clone also needs an init function, set it explicitly.
393            init_fn: None,
394            // work_fn is Arc-wrapped — clone bumps the reference count.
395            work_fn: self.work_fn.clone(),
396            // Runtime is NOT cloned — the clone starts with no runtime.
397            runtime: None,
398        }
399    }
400}
401
402impl Sandbox {
403    pub fn builder() -> SandboxBuilder {
404        SandboxBuilder::default()
405    }
406
407    /// Returns true iff the policy grants the `sysv_ipc` syscall group.
408    pub fn allows_sysv_ipc(&self) -> bool {
409        self.extra_allow_syscalls.iter().any(|s| s == "sysv_ipc")
410    }
411
412    /// Validate cross-section invariants — checks that span multiple fields.
413    ///
414    /// Currently a no-op; retained as an extension point and for API
415    /// stability. Idempotent: calling repeatedly is safe.
416    pub fn validate(&self) -> Result<(), SandboxError> {
417        Ok(())
418    }
419
420    // ================================================================
421    // Runtime accessor helpers (private)
422    // ================================================================
423
424    fn rt(&self) -> &Runtime {
425        self.runtime.as_ref().expect("sandbox not started")
426    }
427
428    fn rt_mut(&mut self) -> &mut Runtime {
429        self.runtime.as_mut().expect("sandbox not started")
430    }
431
432    // ================================================================
433    // Runtime lifecycle API (public)
434    // ================================================================
435
436    /// Set the sandbox instance name (also exposed as the virtual hostname).
437    /// Auto-generated if not set.
438    pub fn set_name(&mut self, name: impl Into<String>) {
439        self.name = Some(name.into());
440    }
441
442    /// Set the sandbox instance name and return `self`. Convenience for
443    /// pipeline fan-out where a base config is cloned and each clone gets a
444    /// fresh name:
445    ///
446    /// ```ignore
447    /// let template = Sandbox::builder()...build()?;
448    /// let mut s1 = template.clone().with_name("worker-1");
449    /// let mut s2 = template.clone().with_name("worker-2");
450    /// ```
451    pub fn with_name(mut self, name: impl Into<String>) -> Self {
452        self.name = Some(name.into());
453        self
454    }
455
456    /// Set the COW-fork init function and return `self`.
457    ///
458    /// The init function runs once in the child process before any COW clones
459    /// are created. Use it to load expensive shared state.
460    pub fn with_init_fn(mut self, f: impl FnOnce() + Send + 'static) -> Self {
461        self.init_fn = Some(Box::new(f));
462        self
463    }
464
465    /// Set the COW-fork work function and return `self`.
466    ///
467    /// The work function runs in each COW clone (`fork(N)` produces N clones).
468    pub fn with_work_fn(mut self, f: impl Fn(u32) + Send + Sync + 'static) -> Self {
469        self.work_fn = Some(Arc::new(f));
470        self
471    }
472
473    /// Return the sandbox name if set, or `None` if not yet started.
474    pub fn instance_name(&self) -> Option<&str> {
475        self.runtime.as_ref().map(|r| r.name.as_str())
476            .or_else(|| self.name.as_deref())
477    }
478
479    /// Return the child PID if spawned.
480    pub fn pid(&self) -> Option<i32> {
481        self.runtime.as_ref().and_then(|r| r.child_pid)
482    }
483
484    /// Return whether the child is currently running or paused.
485    pub fn is_running(&self) -> bool {
486        self.runtime.as_ref().map(|r| {
487            matches!(r.state, RuntimeState::Running | RuntimeState::Paused)
488        }).unwrap_or(false)
489    }
490
491    /// Send SIGSTOP to the child's process group.
492    pub fn pause(&mut self) -> Result<(), crate::error::SandlockError> {
493        use crate::error::SandboxRuntimeError;
494        let pid = self.runtime.as_ref()
495            .and_then(|rt| rt.child_pid)
496            .ok_or(SandboxRuntimeError::NotRunning)?;
497        let ret = unsafe { libc::killpg(pid, libc::SIGSTOP) };
498        if ret < 0 {
499            return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
500        }
501        self.rt_mut().state = RuntimeState::Paused;
502        Ok(())
503    }
504
505    /// Send SIGCONT to the child's process group.
506    pub fn resume(&mut self) -> Result<(), crate::error::SandlockError> {
507        use crate::error::SandboxRuntimeError;
508        let pid = self.runtime.as_ref()
509            .and_then(|rt| rt.child_pid)
510            .ok_or(SandboxRuntimeError::NotRunning)?;
511        let ret = unsafe { libc::killpg(pid, libc::SIGCONT) };
512        if ret < 0 {
513            return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
514        }
515        self.rt_mut().state = RuntimeState::Running;
516        Ok(())
517    }
518
519    /// Send SIGKILL to the child's process group.
520    pub fn kill(&mut self) -> Result<(), crate::error::SandlockError> {
521        use crate::error::SandboxRuntimeError;
522        let pid = self.runtime.as_ref()
523            .and_then(|rt| rt.child_pid)
524            .ok_or(SandboxRuntimeError::NotRunning)?;
525        let ret = unsafe { libc::killpg(pid, libc::SIGKILL) };
526        if ret < 0 {
527            let err = std::io::Error::last_os_error();
528            if err.raw_os_error() != Some(libc::ESRCH) {
529                return Err(SandboxRuntimeError::Io(err).into());
530            }
531        }
532        Ok(())
533    }
534
535    /// Set a callback invoked whenever a port bind is recorded.
536    pub fn set_on_bind(&mut self, cb: impl Fn(&HashMap<u16, u16>) + Send + Sync + 'static) {
537        // Ensure runtime exists so we have somewhere to store the callback.
538        // In practice, set_on_bind is always called before spawn.
539        let _ = self.ensure_runtime();
540        self.rt_mut().on_bind = Some(Box::new(cb));
541    }
542
543    /// Return the current virtual-to-real port mappings.
544    pub async fn port_mappings(&self) -> HashMap<u16, u16> {
545        if let Some(ref rt) = self.runtime {
546            if let Some(ref net) = rt.supervisor_network {
547                let ns = net.lock().await;
548                return ns.port_map.virtual_to_real.clone();
549            }
550        }
551        HashMap::new()
552    }
553
554    /// Wait for the child process to exit.
555    pub async fn wait(&mut self) -> Result<crate::result::RunResult, crate::error::SandlockError> {
556        use crate::error::SandboxRuntimeError;
557        use crate::result::{ExitStatus, RunResult};
558
559        let pid = self.rt().child_pid.ok_or(SandboxRuntimeError::NotRunning)?;
560
561        if let RuntimeState::Stopped(ref es) = self.rt().state {
562            return Ok(RunResult {
563                exit_status: es.clone(),
564                stdout: None,
565                stderr: None,
566            });
567        }
568
569        let exit_status = tokio::task::spawn_blocking(move || -> ExitStatus {
570            let mut status: i32 = 0;
571            loop {
572                let ret = unsafe { libc::waitpid(pid, &mut status, 0) };
573                if ret < 0 {
574                    let err = std::io::Error::last_os_error();
575                    if err.raw_os_error() == Some(libc::EINTR) {
576                        continue;
577                    }
578                    return ExitStatus::Killed;
579                }
580                break;
581            }
582            sandbox_wait_status_to_exit(status)
583        })
584        .await
585        .unwrap_or(ExitStatus::Killed);
586
587        self.rt_mut().state = RuntimeState::Stopped(exit_status.clone());
588
589        let rt = self.rt_mut();
590        if let Some(h) = rt.notif_handle.take() { h.abort(); }
591        if let Some(h) = rt.throttle_handle.take() { h.abort(); }
592        if let Some(h) = rt.loadavg_handle.take() { h.abort(); }
593
594        if let Some(ref cow_state) = self.rt().supervisor_cow.clone() {
595            let mut cow = cow_state.lock().await;
596            self.rt_mut().seccomp_cow = cow.branch.take();
597        }
598
599        let stdout = self.rt_mut()._stdout_read.take().map(sandbox_read_fd_to_end);
600        let stderr = self.rt_mut()._stderr_read.take().map(sandbox_read_fd_to_end);
601
602        Ok(RunResult { exit_status, stdout, stderr })
603    }
604
605    /// Fork the sandboxed child and install policy (seccomp + notif
606    /// supervisor + rlimits + landlock + COW + network/HTTP proxies).
607    /// The child is parked between policy install and `execve`; call
608    /// `start()` to release it. Stdout/stderr are captured for later
609    /// retrieval via `wait()`.
610    pub async fn create(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
611        self.do_create(cmd, true).await
612    }
613
614    /// Like `create` but inherits stdio (no capture).
615    pub async fn create_interactive(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
616        self.do_create(cmd, false).await
617    }
618
619    /// Release a previously `create()`d child to `execve` the configured
620    /// command. Returns immediately; use `wait()` to collect the exit
621    /// status when the child finishes.
622    pub fn start(&mut self) -> Result<(), crate::error::SandlockError> {
623        self.do_start()
624    }
625
626    /// Sugar for `create()` + `start()` that also blocks until the child
627    /// has completed `execve()` and is executing user code. After this
628    /// returns, operations that read user-code state (e.g. `checkpoint()`,
629    /// `/proc/<pid>/exe`) observe the requested binary rather than the
630    /// supervisor.
631    pub async fn spawn(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
632        self.create(cmd).await?;
633        self.start()?;
634        self.wait_until_exec().await
635    }
636
637    /// Like `spawn` but inherits stdio (no capture).
638    pub async fn spawn_interactive(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
639        self.create_interactive(cmd).await?;
640        self.start()?;
641        self.wait_until_exec().await
642    }
643
644    /// Wait for the child to finish `execve`. Detected by `/proc/<pid>/exe`
645    /// no longer matching `/proc/self/exe` (before execve the child still
646    /// shares the supervisor's binary). The kernel offers no direct event
647    /// for execve completion, so this polls every 1ms with a 5s ceiling.
648    async fn wait_until_exec(&self) -> Result<(), crate::error::SandlockError> {
649        use crate::error::SandboxRuntimeError;
650        let pid = self.pid().ok_or(SandboxRuntimeError::NotRunning)?;
651        let Some(our_exe) = std::fs::read_link("/proc/self/exe").ok() else {
652            return Ok(());
653        };
654        let child_link = format!("/proc/{}/exe", pid);
655        let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
656        loop {
657            if let Ok(child_exe) = std::fs::read_link(&child_link) {
658                if child_exe != our_exe {
659                    return Ok(());
660                }
661            }
662            if std::time::Instant::now() >= deadline {
663                return Err(SandboxRuntimeError::Child(
664                    "child did not exec() within 5s".into(),
665                ).into());
666            }
667            tokio::time::sleep(std::time::Duration::from_millis(1)).await;
668        }
669    }
670
671    /// Create with explicit stdin/stdout/stderr fd redirection. Child is
672    /// parked after policy install; call `start()` to release.
673    #[doc(hidden)]
674    pub async fn create_with_io(
675        &mut self,
676        cmd: &[&str],
677        stdin_fd: Option<std::os::unix::io::RawFd>,
678        stdout_fd: Option<std::os::unix::io::RawFd>,
679        stderr_fd: Option<std::os::unix::io::RawFd>,
680    ) -> Result<(), crate::error::SandlockError> {
681        self.ensure_runtime()?;
682        self.rt_mut().io_overrides = Some((stdin_fd, stdout_fd, stderr_fd));
683        self.do_create(cmd, false).await
684    }
685
686    /// Like `create_with_io` but also maps extra fds into the child.
687    #[doc(hidden)]
688    pub async fn create_with_gather_io(
689        &mut self,
690        cmd: &[&str],
691        stdin_fd: Option<std::os::unix::io::RawFd>,
692        stdout_fd: Option<std::os::unix::io::RawFd>,
693        stderr_fd: Option<std::os::unix::io::RawFd>,
694        extra_fds: Vec<(i32, i32)>,
695    ) -> Result<(), crate::error::SandlockError> {
696        self.ensure_runtime()?;
697        self.rt_mut().io_overrides = Some((stdin_fd, stdout_fd, stderr_fd));
698        self.rt_mut().extra_fds = extra_fds;
699        self.do_create(cmd, false).await
700    }
701
702    /// Freeze the sandbox: hold fork notifications + SIGSTOP the process group.
703    pub(crate) async fn freeze(&self) -> Result<(), crate::error::SandlockError> {
704        use crate::error::{SandboxRuntimeError, SandlockError};
705        let rt = self.runtime.as_ref().ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
706        let pid = rt.child_pid.ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
707        if let Some(ref resource) = rt.supervisor_resource {
708            let mut rs = resource.lock().await;
709            rs.hold_forks = true;
710        }
711        unsafe { libc::killpg(pid, libc::SIGSTOP); }
712        Ok(())
713    }
714
715    /// Thaw the sandbox: release held fork notifications + SIGCONT.
716    pub(crate) async fn thaw(&self) -> Result<(), crate::error::SandlockError> {
717        use crate::error::{SandboxRuntimeError, SandlockError};
718        let rt = self.runtime.as_ref().ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
719        let pid = rt.child_pid.ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
720        if let Some(ref resource) = rt.supervisor_resource {
721            let mut rs = resource.lock().await;
722            rs.hold_forks = false;
723            rs.held_notif_ids.clear();
724        }
725        unsafe { libc::killpg(pid, libc::SIGCONT); }
726        Ok(())
727    }
728
729    /// Capture a checkpoint of the running sandbox.
730    pub async fn checkpoint(&self) -> Result<crate::checkpoint::Checkpoint, crate::error::SandlockError> {
731        use crate::error::{SandboxRuntimeError, SandlockError};
732        let pid = self.runtime.as_ref()
733            .and_then(|rt| rt.child_pid)
734            .ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
735        self.freeze().await?;
736        let cp = crate::checkpoint::capture(pid, self);
737        self.thaw().await?;
738        cp
739    }
740
741    // ================================================================
742    // One-shot / lifecycle instance API
743    // ================================================================
744
745    /// One-shot: spawn, wait, and return the result. Stdout and stderr are
746    /// captured. This is the primary way to run a sandboxed command:
747    ///
748    /// ```ignore
749    /// let mut sandbox = Sandbox::builder()
750    ///     .fs_read("/usr")
751    ///     .name("my-sandbox")
752    ///     .build()?;
753    /// let result = sandbox.run(&["echo", "hello"]).await?;
754    /// ```
755    pub async fn run(
756        &mut self,
757        cmd: &[&str],
758    ) -> Result<crate::result::RunResult, crate::error::SandlockError> {
759        self.do_create(cmd, true).await?;
760        self.do_start()?;
761        self.wait().await
762    }
763
764    /// Run with inherited stdio (interactive mode).
765    pub async fn run_interactive(
766        &mut self,
767        cmd: &[&str],
768    ) -> Result<crate::result::RunResult, crate::error::SandlockError> {
769        self.do_create(cmd, false).await?;
770        self.do_start()?;
771        self.wait().await
772    }
773
774    /// One-shot run with user-supplied syscall handlers.
775    pub async fn run_with_handlers<I, S, H>(
776        &mut self,
777        cmd: &[&str],
778        handlers: I,
779    ) -> Result<crate::result::RunResult, crate::error::SandlockError>
780    where
781        I: IntoIterator<Item = (S, H)>,
782        S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
783        H: crate::seccomp::dispatch::Handler,
784    {
785        let pending = sandbox_collect_handlers(handlers, self)?;
786        self.ensure_runtime()?;
787        self.rt_mut().handlers = pending;
788        self.do_create(cmd, true).await?;
789        self.do_start()?;
790        self.wait().await
791    }
792
793    /// Interactive-stdio counterpart of `run_with_handlers`.
794    pub async fn run_interactive_with_handlers<I, S, H>(
795        &mut self,
796        cmd: &[&str],
797        handlers: I,
798    ) -> Result<crate::result::RunResult, crate::error::SandlockError>
799    where
800        I: IntoIterator<Item = (S, H)>,
801        S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
802        H: crate::seccomp::dispatch::Handler,
803    {
804        let pending = sandbox_collect_handlers(handlers, self)?;
805        self.ensure_runtime()?;
806        self.rt_mut().handlers = pending;
807        self.do_create(cmd, false).await?;
808        self.do_start()?;
809        self.wait().await
810    }
811
812    /// Dry-run: create, start, wait, collect filesystem changes, then abort.
813    pub async fn dry_run(
814        &mut self,
815        cmd: &[&str],
816    ) -> Result<crate::dry_run::DryRunResult, crate::error::SandlockError> {
817        self.on_exit = BranchAction::Keep;
818        self.on_error = BranchAction::Keep;
819        self.do_create(cmd, true).await?;
820        self.do_start()?;
821        let run_result = self.wait().await?;
822        let changes = self.collect_changes().await;
823        self.do_abort().await;
824        Ok(crate::dry_run::DryRunResult { run_result, changes })
825    }
826
827    /// Dry-run with inherited stdio.
828    pub async fn dry_run_interactive(
829        &mut self,
830        cmd: &[&str],
831    ) -> Result<crate::dry_run::DryRunResult, crate::error::SandlockError> {
832        self.on_exit = BranchAction::Keep;
833        self.on_error = BranchAction::Keep;
834        self.do_create(cmd, false).await?;
835        self.do_start()?;
836        let run_result = self.wait().await?;
837        let changes = self.collect_changes().await;
838        self.do_abort().await;
839        Ok(crate::dry_run::DryRunResult { run_result, changes })
840    }
841
842    /// Create N COW clones of this sandbox.
843    ///
844    /// `fork()` requires `init_fn` and `work_fn` to be set on the sandbox (via
845    /// `SandboxBuilder::init_fn` / `work_fn`, or `Sandbox::with_init_fn` /
846    /// `with_work_fn`). Returns an error if either is missing.
847    pub async fn fork(&mut self, n: u32) -> Result<Vec<Sandbox>, crate::error::SandlockError> {
848        use crate::error::SandboxRuntimeError;
849        use std::os::fd::{FromRawFd, OwnedFd};
850
851        // Pull init_fn / work_fn directly from self (they live on Sandbox, not
852        // Runtime, so ensure_runtime hasn't consumed them yet).
853        let init_fn = self.init_fn.take()
854            .ok_or_else(|| SandboxRuntimeError::Child("fork() requires init_fn and work_fn — use SandboxBuilder::init_fn() / work_fn() or Sandbox::with_init_fn() / with_work_fn()".into()))?;
855        let work_fn = self.work_fn.take()
856            .ok_or_else(|| SandboxRuntimeError::Child("fork() requires init_fn and work_fn — use SandboxBuilder::init_fn() / work_fn() or Sandbox::with_init_fn() / with_work_fn()".into()))?;
857
858        // Initialize the runtime block so we can record child PID / state below.
859        self.ensure_runtime()?;
860
861        let sandbox_cfg = self.clone(); // config only, no runtime
862
863        let mut ctrl_fds = [0i32; 2];
864        if unsafe { libc::pipe2(ctrl_fds.as_mut_ptr(), 0) } < 0 {
865            return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
866        }
867        let ctrl_parent = unsafe { OwnedFd::from_raw_fd(ctrl_fds[0]) };
868        let ctrl_child_fd = ctrl_fds[1];
869
870        let mut pipe_read_ends: Vec<OwnedFd> = Vec::with_capacity(n as usize);
871        let mut pipe_write_fds: Vec<i32> = Vec::with_capacity(n as usize);
872        for _ in 0..n {
873            let mut pfds = [0i32; 2];
874            if unsafe { libc::pipe(pfds.as_mut_ptr()) } >= 0 {
875                pipe_read_ends.push(unsafe { OwnedFd::from_raw_fd(pfds[0]) });
876                pipe_write_fds.push(pfds[1]);
877            } else {
878                pipe_write_fds.push(-1);
879            }
880        }
881
882        let pid = unsafe { libc::fork() };
883        if pid < 0 {
884            unsafe { libc::close(ctrl_child_fd) };
885            return Err(SandboxRuntimeError::Fork(std::io::Error::last_os_error()).into());
886        }
887
888        if pid == 0 {
889            drop(ctrl_parent);
890            unsafe { libc::setpgid(0, 0) };
891            unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL) };
892            unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
893
894            let _ = crate::landlock::confine(&sandbox_cfg);
895
896            let deny = crate::context::blocklist_syscall_numbers(&sandbox_cfg);
897            let args = crate::context::arg_filters(&sandbox_cfg);
898            let filter = match crate::seccomp::bpf::assemble_filter(&[], &deny, &args) {
899                Ok(f) => f,
900                Err(_) => unsafe { libc::_exit(1) },
901            };
902            let _ = crate::seccomp::bpf::install_deny_filter(&filter);
903
904            init_fn();
905
906            drop(pipe_read_ends);
907            crate::fork::fork_ready_loop_fn(ctrl_child_fd, n, &*work_fn, &pipe_write_fds);
908            unsafe { libc::_exit(0) };
909        }
910
911        unsafe { libc::close(ctrl_child_fd) };
912        for wfd in &pipe_write_fds {
913            if *wfd >= 0 { unsafe { libc::close(*wfd) }; }
914        }
915        self.rt_mut().child_pid = Some(pid);
916        self.rt_mut().state = RuntimeState::Running;
917
918        let ctrl_fd = ctrl_parent.as_raw_fd();
919        let mut pid_buf = vec![0u8; n as usize * 4];
920        sandbox_read_exact(ctrl_fd, &mut pid_buf);
921
922        let clone_pids: Vec<i32> = pid_buf.chunks(4)
923            .map(|c| u32::from_be_bytes(c.try_into().unwrap_or([0; 4])) as i32)
924            .collect();
925        let live_count = clone_pids.iter().filter(|&&p| p > 0).count();
926
927        let mut code_buf = vec![0u8; live_count * 4];
928        sandbox_read_exact(ctrl_fd, &mut code_buf);
929        self.rt_mut().ctrl_fd = Some(ctrl_parent);
930
931        let mut status = 0i32;
932        unsafe { libc::waitpid(pid, &mut status, 0) };
933
934        let mut code_idx = 0;
935        let mut clones = Vec::with_capacity(live_count);
936        let mut pipe_iter = pipe_read_ends.into_iter();
937
938        let rt_name = self.rt().name.clone();
939        for &clone_pid in &clone_pids {
940            let pipe = pipe_iter.next();
941            if clone_pid <= 0 { continue; }
942
943            let code = i32::from_be_bytes(
944                code_buf[code_idx * 4..(code_idx + 1) * 4].try_into().unwrap_or([0; 4])
945            );
946            code_idx += 1;
947
948            let mut clone_sb = sandbox_cfg.clone();
949            let clone_name = format!("{}-fork-{}", rt_name, clone_pid);
950            clone_sb.runtime = Some(Box::new(Runtime {
951                name: clone_name,
952                state: RuntimeState::Stopped(if code == 0 {
953                    crate::result::ExitStatus::Code(0)
954                } else if code > 0 {
955                    crate::result::ExitStatus::Code(code)
956                } else {
957                    crate::result::ExitStatus::Killed
958                }),
959                child_pid: Some(clone_pid),
960                pidfd: None,
961                notif_handle: None,
962                throttle_handle: None,
963                loadavg_handle: None,
964                _stdout_read: None,
965                _stderr_read: None,
966                seccomp_cow: None,
967                supervisor_resource: None,
968                supervisor_cow: None,
969                supervisor_network: None,
970                ctrl_fd: None,
971                stdout_pipe: pipe,
972                io_overrides: None,
973                extra_fds: Vec::new(),
974                http_acl_handle: None,
975                on_bind: None,
976                handlers: Vec::new(),
977                ready_w: None,
978            }));
979            clones.push(clone_sb);
980        }
981
982        Ok(clones)
983    }
984
985    /// Reduce: wait for all clones, then run a reducer command.
986    pub async fn reduce(
987        &self,
988        cmd: &[&str],
989        clones: &mut [Sandbox],
990    ) -> Result<crate::result::RunResult, crate::error::SandlockError> {
991        use crate::error::SandboxRuntimeError;
992
993        let mut combined = Vec::new();
994        for clone in clones.iter_mut() {
995            if let Some(ref mut rt) = clone.runtime {
996                if let Some(pipe) = rt.stdout_pipe.take() {
997                    combined.extend_from_slice(&sandbox_read_fd_to_end(pipe));
998                }
999            }
1000        }
1001
1002        let mut stdin_fds = [0i32; 2];
1003        if unsafe { libc::pipe2(stdin_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
1004            return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
1005        }
1006
1007        let write_fd = stdin_fds[1];
1008        let write_handle = tokio::task::spawn_blocking(move || {
1009            unsafe {
1010                libc::write(write_fd, combined.as_ptr() as *const _, combined.len());
1011                libc::close(write_fd);
1012            }
1013        });
1014
1015        let base_name = self.instance_name()
1016            .unwrap_or("sandbox")
1017            .to_owned();
1018        let reducer_name = base_name + "-reduce";
1019        let mut reducer = self.clone().with_name(reducer_name);
1020        reducer.ensure_runtime()?;
1021        reducer.rt_mut().io_overrides = Some((Some(stdin_fds[0]), None, None));
1022        reducer.do_create(cmd, true).await?;
1023        reducer.do_start()?;
1024        unsafe { libc::close(stdin_fds[0]) };
1025
1026        let _ = write_handle.await;
1027        reducer.wait().await
1028    }
1029
1030    /// Lazily initialize the runtime block.
1031    ///
1032    /// Called by lifecycle methods (`spawn`, `run`, `fork`, etc.) on first
1033    /// use. Validates and resolves the sandbox name. Idempotent: returns
1034    /// immediately if runtime is already set.
1035    fn ensure_runtime(&mut self) -> Result<(), crate::error::SandlockError> {
1036        if self.runtime.is_some() {
1037            return Ok(());
1038        }
1039        let name = sandbox_resolve_name(self.name.as_deref())?;
1040        self.runtime = Some(Box::new(Runtime {
1041            name,
1042            state: RuntimeState::Created,
1043            child_pid: None,
1044            pidfd: None,
1045            notif_handle: None,
1046            throttle_handle: None,
1047            loadavg_handle: None,
1048            _stdout_read: None,
1049            _stderr_read: None,
1050            seccomp_cow: None,
1051            supervisor_resource: None,
1052            supervisor_cow: None,
1053            supervisor_network: None,
1054            ctrl_fd: None,
1055            stdout_pipe: None,
1056            io_overrides: None,
1057            extra_fds: Vec::new(),
1058            http_acl_handle: None,
1059            on_bind: None,
1060            handlers: Vec::new(),
1061            ready_w: None,
1062        }));
1063        Ok(())
1064    }
1065
1066    // ================================================================
1067    // Internal: collect_changes / do_abort
1068    // ================================================================
1069
1070    async fn collect_changes(&self) -> Vec<crate::dry_run::Change> {
1071        if let Some(ref rt) = self.runtime {
1072            if let Some(ref cow) = rt.seccomp_cow {
1073                return cow.changes().unwrap_or_default();
1074            }
1075        }
1076        Vec::new()
1077    }
1078
1079    async fn do_abort(&mut self) {
1080        if let Some(ref mut rt) = self.runtime {
1081            if let Some(ref mut cow) = rt.seccomp_cow {
1082                let _ = cow.abort();
1083            }
1084        }
1085    }
1086
1087    // ================================================================
1088    // Internal: do_create (fork + policy install; child parks at the
1089    // ready_r read, awaiting do_start to release it to execve).
1090    // ================================================================
1091
1092    async fn do_create(&mut self, cmd: &[&str], capture: bool) -> Result<(), crate::error::SandlockError> {
1093        use std::ffi::CString;
1094        use std::os::fd::{AsRawFd, FromRawFd, OwnedFd};
1095        use crate::error::SandboxRuntimeError;
1096        use crate::context::{PipePair, read_u32_fd};
1097        use crate::network;
1098        use crate::seccomp::ctx::SupervisorCtx;
1099        use crate::seccomp::notif::{self, NotifPolicy};
1100        use crate::seccomp::state::{ChrootState, CowState, NetworkState, PolicyFnState, ProcfsState, ResourceState, TimeRandomState};
1101        use crate::sys::syscall;
1102        use std::time::Duration;
1103
1104        self.ensure_runtime()?;
1105
1106        if !matches!(self.rt().state, RuntimeState::Created) {
1107            return Err(SandboxRuntimeError::Child("sandbox already spawned".into()).into());
1108        }
1109
1110        if cmd.is_empty() {
1111            return Err(SandboxRuntimeError::Child("empty command".into()).into());
1112        }
1113
1114        // Resolve the chroot root eagerly, before any fork or confinement work:
1115        // a configured-but-missing chroot must be a hard error, never a silent
1116        // drop to "no confinement".
1117        let chroot_root = crate::chroot::resolve::resolve_chroot_root(self.chroot.as_deref())?;
1118
1119        let c_cmd: Vec<CString> = cmd
1120            .iter()
1121            .map(|s| CString::new(*s).map_err(|_| SandboxRuntimeError::Child("invalid command string".into())))
1122            .collect::<Result<Vec<_>, _>>()?;
1123
1124        let no_supervisor = self.no_supervisor;
1125
1126        let pipes = PipePair::new().map_err(SandboxRuntimeError::Io)?;
1127
1128        let resolved_net_allow = network::resolve_net_allow(&self.net_allow)
1129            .await
1130            .map_err(SandboxRuntimeError::Io)?;
1131        // In chroot/image mode, seed the synthetic /etc/hosts from the
1132        // rootfs's own file so entries baked into the image (private
1133        // registries, internal hostnames, etc.) survive virtualization.
1134        // Without a chroot, the helper returns the fixed loopback base.
1135        // Either way, concrete-host rules from `net_allow` are appended
1136        // on top.
1137        let virtual_etc_hosts = network::compose_virtual_etc_hosts(
1138            self.chroot.as_deref(),
1139            &resolved_net_allow.concrete_host_entries,
1140        );
1141
1142        if !self.http_allow.is_empty() || !self.http_deny.is_empty() {
1143            let handle = crate::http_acl::spawn_http_acl_proxy(
1144                self.http_allow.clone(),
1145                self.http_deny.clone(),
1146                self.http_ca.as_deref(),
1147                self.http_key.as_deref(),
1148            ).await.map_err(SandboxRuntimeError::Io)?;
1149            self.rt_mut().http_acl_handle = Some(handle);
1150        }
1151
1152        // Seccomp COW: create the branch before fork so the child's Landlock
1153        // ruleset can include the upper layer. Binaries created inside the
1154        // workdir live in the upper dir, and Landlock checks EXECUTE on the
1155        // file's real path at execve time — so the upper dir must be granted
1156        // read+execute (READ_ACCESS) or `./created-binary` fails with EACCES.
1157        let seccomp_cow_branch = if !no_supervisor && self.workdir.is_some() {
1158            let workdir = self.workdir.as_ref().unwrap().clone();
1159            let storage = self.fs_storage.clone();
1160            let max_disk = self.max_disk.map(|b| b.0).unwrap_or(0);
1161            match crate::cow::seccomp::SeccompCowBranch::create(&workdir, storage.as_deref(), max_disk) {
1162                Ok(branch) => {
1163                    self.fs_readable.push(branch.upper_dir().to_path_buf());
1164                    Some(branch)
1165                }
1166                Err(e) => {
1167                    eprintln!("sandlock: seccomp COW branch creation failed: {}", e);
1168                    None
1169                }
1170            }
1171        } else {
1172            None
1173        };
1174
1175        let (stdout_r, stderr_r) = if capture {
1176            let mut stdout_fds = [0i32; 2];
1177            let mut stderr_fds = [0i32; 2];
1178            if unsafe { libc::pipe2(stdout_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
1179                return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
1180            }
1181            if unsafe { libc::pipe2(stderr_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
1182                unsafe {
1183                    libc::close(stdout_fds[0]);
1184                    libc::close(stdout_fds[1]);
1185                }
1186                return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
1187            }
1188            (
1189                Some((
1190                    unsafe { OwnedFd::from_raw_fd(stdout_fds[0]) },
1191                    unsafe { OwnedFd::from_raw_fd(stdout_fds[1]) },
1192                )),
1193                Some((
1194                    unsafe { OwnedFd::from_raw_fd(stderr_fds[0]) },
1195                    unsafe { OwnedFd::from_raw_fd(stderr_fds[1]) },
1196                )),
1197            )
1198        } else {
1199            (None, None)
1200        };
1201
1202        // Capture our PID before fork so the child can detect parent death
1203        // without assuming PID 1 is always init (wrong in containers).
1204        let parent_pid = unsafe { libc::getpid() };
1205
1206        let pid = unsafe { libc::fork() };
1207        if pid < 0 {
1208            return Err(SandboxRuntimeError::Fork(std::io::Error::last_os_error()).into());
1209        }
1210
1211        if pid == 0 {
1212            // ===== CHILD PROCESS =====
1213            let io_overrides = self.rt().io_overrides;
1214            if let Some((stdin_fd, stdout_fd, stderr_fd)) = io_overrides {
1215                if let Some(fd) = stdin_fd { unsafe { libc::dup2(fd, 0) }; }
1216                if let Some(fd) = stdout_fd { unsafe { libc::dup2(fd, 1) }; }
1217                if let Some(fd) = stderr_fd { unsafe { libc::dup2(fd, 2) }; }
1218            }
1219
1220            let extra_fds_copy = self.rt().extra_fds.clone();
1221            for &(target_fd, source_fd) in &extra_fds_copy {
1222                unsafe { libc::dup2(source_fd, target_fd) };
1223            }
1224
1225            if let Some((_, ref stdout_w)) = stdout_r {
1226                unsafe { libc::dup2(stdout_w.as_raw_fd(), 1) };
1227            }
1228            if let Some((_, ref stderr_w)) = stderr_r {
1229                unsafe { libc::dup2(stderr_w.as_raw_fd(), 2) };
1230            }
1231            drop(stdout_r);
1232            drop(stderr_r);
1233
1234            let gather_keep_fds: Vec<i32> = extra_fds_copy.iter().map(|&(target, _)| target).collect();
1235
1236            let extra_syscalls: Vec<u32> = self.rt().handlers
1237                .iter()
1238                .map(|h| h.0 as u32)
1239                .collect();
1240
1241            let sandbox_name = self.rt().name.clone();
1242            context::confine_child(context::ChildSpawnArgs {
1243                sandbox: self,
1244                cmd: &c_cmd,
1245                pipes: &pipes,
1246                no_supervisor,
1247                keep_fds: &gather_keep_fds,
1248                sandbox_name: Some(sandbox_name.as_str()),
1249                extra_syscalls: &extra_syscalls,
1250                parent_pid,
1251            });
1252        }
1253
1254        // ===== PARENT PROCESS =====
1255        drop(pipes.notif_w);
1256        drop(pipes.ready_r);
1257
1258        self.rt_mut()._stdout_read = stdout_r.map(|(r, _w)| r);
1259        self.rt_mut()._stderr_read = stderr_r.map(|(r, _w)| r);
1260
1261        self.rt_mut().child_pid = Some(pid);
1262        // State remains `Created` until `do_start` writes ready_w to release
1263        // the child to execve.
1264
1265        let pidfd = match syscall::pidfd_open(pid as u32, 0) {
1266            Ok(fd) => Some(fd),
1267            Err(_) => None,
1268        };
1269
1270        let notif_fd_num = read_u32_fd(pipes.notif_r.as_raw_fd())
1271            .map_err(|e| SandboxRuntimeError::Child(format!("read notif fd from child: {}", e)))?;
1272
1273        let is_nested_mode = notif_fd_num == 0;
1274
1275        let notif_fd = if is_nested_mode {
1276            None
1277        } else if let Some(ref pfd) = pidfd {
1278            Some(syscall::pidfd_getfd(pfd, notif_fd_num as i32, 0)
1279                .map_err(|e| SandboxRuntimeError::Child(format!("pidfd_getfd: {}", e)))?)
1280        } else {
1281            let path = format!("/proc/{}/fd/{}", pid, notif_fd_num);
1282            let cpath = CString::new(path).unwrap();
1283            let raw = unsafe { libc::open(cpath.as_ptr(), libc::O_RDWR) };
1284            if raw < 0 {
1285                return Err(SandboxRuntimeError::Child("failed to open notif fd from /proc".into()).into());
1286            }
1287            Some(unsafe { OwnedFd::from_raw_fd(raw) })
1288        };
1289
1290        if let Some(notif_fd) = notif_fd {
1291            if self.time_start.is_some() || self.random_seed.is_some() {
1292                let time_offset = self.time_start.map(|t| crate::time::calculate_time_offset(t));
1293                if let Err(e) = crate::vdso::patch(pid, time_offset, self.random_seed.is_some()) {
1294                    eprintln!("sandlock: pre-exec vDSO patching failed (will retry after exec): {}", e);
1295                }
1296            }
1297
1298            let time_offset_val = self.time_start
1299                .map(|t| crate::time::calculate_time_offset(t))
1300                .unwrap_or(0);
1301
1302            let rt_name = self.rt().name.clone();
1303            let notif_policy = NotifPolicy {
1304                max_memory_bytes: self.max_memory.map(|m| m.0).unwrap_or(0),
1305                max_processes: self.max_processes,
1306                has_memory_limit: self.max_memory.is_some(),
1307                has_net_allowlist: !self.net_allow.is_empty()
1308                    || self.policy_fn.is_some()
1309                    || !self.http_allow.is_empty()
1310                    || !self.http_deny.is_empty(),
1311                has_random_seed: self.random_seed.is_some(),
1312                has_time_start: self.time_start.is_some(),
1313                argv_safety_required: self.policy_fn.is_some()
1314                    || self.rt().handlers.iter().any(|h| {
1315                        h.0 == libc::SYS_execve || h.0 == libc::SYS_execveat
1316                    }),
1317                time_offset: time_offset_val,
1318                num_cpus: self.num_cpus,
1319                port_remap: self.port_remap,
1320                cow_enabled: self.workdir.is_some(),
1321                chroot_root: chroot_root.clone(),
1322                chroot_readable: self.fs_readable.clone(),
1323                chroot_writable: self.fs_writable.clone(),
1324                chroot_denied: self.fs_denied.clone(),
1325                chroot_mounts: crate::chroot::resolve::resolve_chroot_mounts(&self.fs_mount),
1326                deterministic_dirs: self.deterministic_dirs,
1327                virtual_hostname: Some(rt_name),
1328                has_http_acl: !self.http_allow.is_empty() || !self.http_deny.is_empty(),
1329                virtual_etc_hosts,
1330            };
1331
1332            use rand::SeedableRng;
1333            use rand_chacha::ChaCha8Rng;
1334
1335            let random_state = self.random_seed.map(|seed| ChaCha8Rng::seed_from_u64(seed));
1336            let time_offset = self.time_start.map(|t| crate::time::calculate_time_offset(t));
1337
1338            let time_random_state = TimeRandomState::new(time_offset, random_state);
1339
1340            let mut net_state = NetworkState::new();
1341            let no_rules = self.net_allow.is_empty();
1342            let policy_from = |resolved: &network::ResolvedNetAllow| {
1343                if no_rules || resolved.any_ip_all_ports {
1344                    crate::seccomp::notif::NetworkPolicy::Unrestricted
1345                } else {
1346                    use crate::seccomp::notif::PortAllow;
1347                    let per_ip = resolved
1348                        .per_ip
1349                        .iter()
1350                        .map(|(ip, ports)| {
1351                            let allow = if resolved.per_ip_all_ports.contains(ip) {
1352                                PortAllow::Any
1353                            } else {
1354                                PortAllow::Specific(ports.clone())
1355                            };
1356                            (*ip, allow)
1357                        })
1358                        .collect();
1359                    crate::seccomp::notif::NetworkPolicy::AllowList {
1360                        per_ip,
1361                        any_ip_ports: resolved.any_ip_ports.clone(),
1362                    }
1363                }
1364            };
1365            net_state.tcp_policy = policy_from(&resolved_net_allow.tcp);
1366            net_state.udp_policy = policy_from(&resolved_net_allow.udp);
1367            net_state.icmp_policy = policy_from(&resolved_net_allow.icmp);
1368            net_state.http_acl_addr = self.rt().http_acl_handle.as_ref().map(|h| h.addr);
1369            net_state.http_acl_ports = self.http_ports.iter().copied().collect();
1370            net_state.http_acl_orig_dest = self.rt().http_acl_handle.as_ref().map(|h| h.orig_dest.clone());
1371            if let Some(cb) = self.rt_mut().on_bind.take() {
1372                net_state.port_map.on_bind = Some(cb);
1373            }
1374
1375            let procfs_state = ProcfsState::new();
1376
1377            let mut res_state = ResourceState::new(
1378                notif_policy.max_memory_bytes,
1379                notif_policy.max_processes,
1380            );
1381            res_state.proc_count = 1;
1382
1383            let mut cow_state = CowState::new();
1384            cow_state.branch = seccomp_cow_branch;
1385
1386            let mut policy_fn_state = PolicyFnState::new();
1387
1388            if let Ok(mut denied) = policy_fn_state.denied_paths.write() {
1389                for path in &self.fs_denied {
1390                    denied.insert(path.to_string_lossy().into_owned());
1391                }
1392            }
1393
1394            if let Some(ref callback) = self.policy_fn {
1395                let mut allowed_ips: std::collections::HashSet<std::net::IpAddr> =
1396                    std::collections::HashSet::new();
1397                for p in [&net_state.tcp_policy, &net_state.udp_policy, &net_state.icmp_policy] {
1398                    if let crate::seccomp::notif::NetworkPolicy::AllowList { per_ip, .. } = p {
1399                        allowed_ips.extend(per_ip.keys().copied());
1400                    }
1401                }
1402                let live = crate::policy_fn::LivePolicy {
1403                    allowed_ips,
1404                    max_memory_bytes: notif_policy.max_memory_bytes,
1405                    max_processes: notif_policy.max_processes,
1406                };
1407                let ceiling = live.clone();
1408                let live = std::sync::Arc::new(std::sync::RwLock::new(live));
1409                let denied_paths = policy_fn_state.denied_paths.clone();
1410                let pid_overrides = net_state.pid_ip_overrides.clone();
1411                policy_fn_state.live_policy = Some(live.clone());
1412                let tx = crate::policy_fn::spawn_policy_fn(
1413                    callback.clone(), live, ceiling, pid_overrides, denied_paths,
1414                );
1415                policy_fn_state.event_tx = Some(tx);
1416            }
1417
1418            let chroot_state = ChrootState::new();
1419
1420            let notif_raw_fd = notif_fd.as_raw_fd();
1421            let child_pidfd_raw = pidfd.as_ref().map(|pfd| pfd.as_raw_fd());
1422
1423            let res_state = Arc::new(tokio::sync::Mutex::new(res_state));
1424            self.rt_mut().supervisor_resource = Some(Arc::clone(&res_state));
1425
1426            let cow_state = Arc::new(tokio::sync::Mutex::new(cow_state));
1427            self.rt_mut().supervisor_cow = Some(Arc::clone(&cow_state));
1428
1429            let net_state = Arc::new(tokio::sync::Mutex::new(net_state));
1430            self.rt_mut().supervisor_network = Some(Arc::clone(&net_state));
1431
1432            let procfs_state = Arc::new(tokio::sync::Mutex::new(procfs_state));
1433            let time_random_state = Arc::new(tokio::sync::Mutex::new(time_random_state));
1434            let policy_fn_state = Arc::new(tokio::sync::Mutex::new(policy_fn_state));
1435            let chroot_state = Arc::new(tokio::sync::Mutex::new(chroot_state));
1436            let processes = Arc::new(crate::seccomp::state::ProcessIndex::new());
1437
1438            let ctx = Arc::new(SupervisorCtx {
1439                resource: Arc::clone(&res_state),
1440                cow: Arc::clone(&cow_state),
1441                procfs: Arc::clone(&procfs_state),
1442                network: Arc::clone(&net_state),
1443                time_random: Arc::clone(&time_random_state),
1444                policy_fn: Arc::clone(&policy_fn_state),
1445                chroot: Arc::clone(&chroot_state),
1446                netlink: Arc::new(crate::netlink::NetlinkState::new()),
1447                processes: Arc::clone(&processes),
1448                policy: Arc::new(notif_policy),
1449                child_pidfd: child_pidfd_raw,
1450                notif_fd: notif_raw_fd,
1451            });
1452
1453            let handlers = std::mem::take(&mut self.rt_mut().handlers);
1454            let (startup_tx, startup_rx) = tokio::sync::oneshot::channel();
1455            self.rt_mut().notif_handle = Some(tokio::spawn(
1456                notif::supervisor(notif_fd, ctx, handlers, startup_tx),
1457            ));
1458            // Wait for the supervisor to register the notif fd with the IO
1459            // driver before we release the child to execve. Otherwise an
1460            // early traced syscall would queue a notification on a fd no
1461            // one is polling, and the child would block until the next
1462            // `block_on` re-enters the runtime. Critical for current-thread
1463            // runtimes, harmless overhead for multi-thread.
1464            match startup_rx.await {
1465                Ok(Ok(())) => {}
1466                Ok(Err(e)) => return Err(SandboxRuntimeError::Io(e).into()),
1467                Err(_) => {
1468                    return Err(SandboxRuntimeError::Child(
1469                        "seccomp supervisor exited during startup".into(),
1470                    ).into());
1471                }
1472            }
1473
1474            let la_resource = Arc::clone(&res_state);
1475            self.rt_mut().loadavg_handle = Some(tokio::spawn(async move {
1476                let mut interval = tokio::time::interval(Duration::from_secs(5));
1477                interval.tick().await;
1478                loop {
1479                    interval.tick().await;
1480                    let mut rs = la_resource.lock().await;
1481                    let running = rs.proc_count;
1482                    rs.load_avg.sample(running);
1483                }
1484            }));
1485        }
1486
1487        if let Some(cpu_pct) = self.max_cpu {
1488            if cpu_pct < 100 {
1489                let child_pid = pid;
1490                self.rt_mut().throttle_handle = Some(tokio::spawn(sandbox_throttle_cpu(child_pid, cpu_pct)));
1491            }
1492        }
1493
1494        self.rt_mut().pidfd = pidfd;
1495        self.rt_mut().ready_w = Some(pipes.ready_w);
1496
1497        Ok(())
1498    }
1499
1500    // ================================================================
1501    // Internal: do_start (release the parked child to execve)
1502    // ================================================================
1503
1504    fn do_start(&mut self) -> Result<(), crate::error::SandlockError> {
1505        use std::os::fd::AsRawFd;
1506        use crate::context::write_u32_fd;
1507        use crate::error::SandboxRuntimeError;
1508
1509        if !matches!(self.rt().state, RuntimeState::Created) {
1510            return Err(SandboxRuntimeError::Child("start() requires a created sandbox".into()).into());
1511        }
1512        let ready_w = self.rt_mut().ready_w.take()
1513            .ok_or_else(|| SandboxRuntimeError::Child("start() called without a prior create()".into()))?;
1514        write_u32_fd(ready_w.as_raw_fd(), 1)
1515            .map_err(|e| SandboxRuntimeError::Child(format!("write ready signal: {}", e)))?;
1516        drop(ready_w);
1517        self.rt_mut().state = RuntimeState::Running;
1518        Ok(())
1519    }
1520}
1521
1522// ================================================================
1523// Drop for Sandbox — kills and reaps child if still running
1524// ================================================================
1525
1526impl Drop for Sandbox {
1527    fn drop(&mut self) {
1528        if let Some(ref mut rt) = self.runtime {
1529            if let Some(pid) = rt.child_pid {
1530                if matches!(rt.state, RuntimeState::Created | RuntimeState::Running | RuntimeState::Paused) {
1531                    unsafe { libc::killpg(pid, libc::SIGKILL) };
1532                    let mut status: i32 = 0;
1533                    unsafe { libc::waitpid(pid, &mut status, 0) };
1534                }
1535            }
1536
1537            if let Some(h) = rt.notif_handle.take() { h.abort(); }
1538            if let Some(h) = rt.throttle_handle.take() { h.abort(); }
1539            if let Some(h) = rt.loadavg_handle.take() { h.abort(); }
1540
1541            let is_error = matches!(
1542                rt.state,
1543                RuntimeState::Stopped(ref s) if !matches!(s, crate::result::ExitStatus::Code(0))
1544            );
1545            let action = if is_error { &self.on_error } else { &self.on_exit };
1546            let action = action.clone();
1547
1548            if let Some(ref mut cow) = rt.seccomp_cow {
1549                match action {
1550                    BranchAction::Commit => { let _ = cow.commit(); }
1551                    BranchAction::Abort => { let _ = cow.abort(); }
1552                    BranchAction::Keep => {}
1553                }
1554            }
1555        }
1556    }
1557}
1558
1559// ================================================================
1560// CPU throttle
1561// ================================================================
1562
1563async fn sandbox_throttle_cpu(pid: i32, cpu_pct: u8) {
1564    use std::time::Duration;
1565    let period = Duration::from_millis(100);
1566    let run_time = period * cpu_pct as u32 / 100;
1567    let stop_time = period - run_time;
1568    loop {
1569        tokio::time::sleep(run_time).await;
1570        if unsafe { libc::killpg(pid, libc::SIGSTOP) } < 0 { break; }
1571        tokio::time::sleep(stop_time).await;
1572        if unsafe { libc::killpg(pid, libc::SIGCONT) } < 0 { break; }
1573    }
1574}
1575
1576// ================================================================
1577// Process name resolution
1578// ================================================================
1579
1580static NEXT_SANDBOX_NAME: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(1);
1581
1582fn sandbox_resolve_name(name: Option<&str>) -> Result<String, crate::error::SandlockError> {
1583    match name {
1584        Some(n) => sandbox_validate_name(n.to_string()),
1585        None => Ok(format!(
1586            "sandbox-{}-{}",
1587            std::process::id(),
1588            NEXT_SANDBOX_NAME.fetch_add(1, std::sync::atomic::Ordering::Relaxed),
1589        )),
1590    }
1591}
1592
1593fn sandbox_validate_name(name: String) -> Result<String, crate::error::SandlockError> {
1594    use crate::error::SandboxRuntimeError;
1595    if name.is_empty() {
1596        return Err(SandboxRuntimeError::Child("sandbox name must not be empty".into()).into());
1597    }
1598    if name.len() > 64 {
1599        return Err(SandboxRuntimeError::Child("sandbox name must be at most 64 bytes".into()).into());
1600    }
1601    if name.as_bytes().contains(&0) {
1602        return Err(SandboxRuntimeError::Child("sandbox name must not contain NUL bytes".into()).into());
1603    }
1604    Ok(name)
1605}
1606
1607// ================================================================
1608// I/O helpers (private)
1609// ================================================================
1610
1611fn sandbox_read_exact(fd: i32, buf: &mut [u8]) {
1612    let mut off = 0;
1613    while off < buf.len() {
1614        let r = unsafe { libc::read(fd, buf[off..].as_mut_ptr() as *mut _, buf.len() - off) };
1615        if r <= 0 { break; }
1616        off += r as usize;
1617    }
1618}
1619
1620fn sandbox_read_fd_to_end(fd: std::os::fd::OwnedFd) -> Vec<u8> {
1621    use std::io::Read;
1622    use std::os::fd::IntoRawFd;
1623    use std::os::unix::io::FromRawFd;
1624    let mut file = unsafe { std::fs::File::from_raw_fd(fd.into_raw_fd()) };
1625    let mut buf = Vec::new();
1626    let _ = file.read_to_end(&mut buf);
1627    buf
1628}
1629
1630fn sandbox_wait_status_to_exit(status: i32) -> crate::result::ExitStatus {
1631    use crate::result::ExitStatus;
1632    if libc::WIFEXITED(status) {
1633        ExitStatus::Code(libc::WEXITSTATUS(status))
1634    } else if libc::WIFSIGNALED(status) {
1635        let sig = libc::WTERMSIG(status);
1636        if sig == libc::SIGKILL {
1637            ExitStatus::Killed
1638        } else {
1639            ExitStatus::Signal(sig)
1640        }
1641    } else {
1642        ExitStatus::Killed
1643    }
1644}
1645
1646fn sandbox_collect_handlers<I, S, H>(
1647    handlers: I,
1648    sandbox: &Sandbox,
1649) -> Result<Vec<(i64, Arc<dyn crate::seccomp::dispatch::Handler>)>, crate::error::SandlockError>
1650where
1651    I: IntoIterator<Item = (S, H)>,
1652    S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
1653    H: crate::seccomp::dispatch::Handler,
1654{
1655    use crate::seccomp::dispatch::{Handler, HandlerError};
1656
1657    let pending: Vec<(i64, Arc<dyn Handler>)> = handlers
1658        .into_iter()
1659        .map(|(syscall, handler)| {
1660            let nr = syscall.try_into().map_err(HandlerError::from)?.raw();
1661            let h: Arc<dyn Handler> = Arc::new(handler);
1662            Ok::<_, HandlerError>((nr, h))
1663        })
1664        .collect::<Result<_, _>>()?;
1665
1666    let nrs: Vec<i64> = pending.iter().map(|(nr, _)| *nr).collect();
1667    crate::seccomp::dispatch::validate_handler_syscalls_against_policy(&nrs, sandbox)
1668        .map_err(|syscall_nr| HandlerError::OnDenySyscall { syscall_nr })?;
1669
1670    Ok(pending)
1671}
1672
1673fn validate_syscall_names(names: &[String]) -> Result<(), SandboxError> {
1674    let unknown: Vec<&str> = names
1675        .iter()
1676        .map(String::as_str)
1677        .filter(|name| crate::context::syscall_name_to_nr(name).is_none())
1678        .collect();
1679    if unknown.is_empty() {
1680        Ok(())
1681    } else {
1682        Err(SandboxError::Invalid(format!(
1683            "unknown syscall name(s): {}",
1684            unknown.join(", ")
1685        )))
1686    }
1687}
1688
1689/// Fluent builder for `Sandbox`.
1690///
1691/// When the `cli` feature is enabled this struct also derives `clap::Args` so
1692/// that the CLI can expose all per-field flags via `#[clap(flatten)]` without
1693/// duplicating the flag declarations.
1694#[derive(Default)]
1695#[cfg_attr(feature = "cli", derive(clap::Args))]
1696pub struct SandboxBuilder {
1697    #[cfg_attr(feature = "cli", arg(short = 'r', long = "fs-read", value_name = "PATH"))]
1698    pub fs_readable: Vec<PathBuf>,
1699
1700    #[cfg_attr(feature = "cli", arg(short = 'w', long = "fs-write", value_name = "PATH"))]
1701    pub fs_writable: Vec<PathBuf>,
1702
1703    #[cfg_attr(feature = "cli", arg(long = "fs-deny", value_name = "PATH"))]
1704    pub fs_denied: Vec<PathBuf>,
1705
1706    /// Extra syscall names to deny (in addition to Sandlock's default blocklist)
1707    #[cfg_attr(feature = "cli", arg(long = "extra-deny-syscall", value_name = "NAME"))]
1708    pub extra_deny_syscalls: Vec<String>,
1709
1710    /// Extra syscall group names to allow (e.g. sysv_ipc)
1711    #[cfg_attr(feature = "cli", arg(long = "extra-allow-syscall", value_name = "NAME"))]
1712    pub extra_allow_syscalls: Vec<String>,
1713
1714    /// Outbound endpoint allow rule. Repeatable. Each value is
1715    /// `host:port[,port,...]` (IP-restricted), `:port` or `*:port`
1716    /// (any IP), or `udp://...` / `icmp://...` for UDP/ICMP.
1717    /// Examples: `api.openai.com:443`, `github.com:22,443`, `:8080`.
1718    #[cfg_attr(feature = "cli", arg(long = "net-allow", value_name = "SPEC"))]
1719    pub net_allow: Vec<String>,
1720
1721    #[cfg_attr(feature = "cli", arg(long = "net-bind"))]
1722    pub net_bind: Vec<u16>,
1723
1724    #[cfg_attr(feature = "cli", arg(long = "http-allow", value_name = "RULE"))]
1725    pub http_allow: Vec<String>,
1726
1727    #[cfg_attr(feature = "cli", arg(long = "http-deny", value_name = "RULE"))]
1728    pub http_deny: Vec<String>,
1729
1730    /// TCP ports to intercept for HTTP ACL (default: 80, plus 443 with --http-ca)
1731    #[cfg_attr(feature = "cli", arg(long = "http-port", value_name = "PORT"))]
1732    pub http_ports: Vec<u16>,
1733
1734    /// PEM CA certificate for HTTPS MITM (enables port 443 interception)
1735    #[cfg_attr(feature = "cli", arg(long = "http-ca", value_name = "PATH"))]
1736    pub http_ca: Option<PathBuf>,
1737
1738    /// PEM CA private key for HTTPS MITM (required with --http-ca)
1739    #[cfg_attr(feature = "cli", arg(long = "http-key", value_name = "PATH"))]
1740    pub http_key: Option<PathBuf>,
1741
1742    // max_memory uses a string in the CLI (e.g. "512M"); not directly clap-friendly as ByteSize.
1743    #[cfg_attr(feature = "cli", clap(skip))]
1744    pub max_memory: Option<ByteSize>,
1745
1746    #[cfg_attr(feature = "cli", arg(short = 'P', long = "max-processes"))]
1747    pub max_processes: Option<u32>,
1748
1749    #[cfg_attr(feature = "cli", arg(long = "max-open-files"))]
1750    pub max_open_files: Option<u32>,
1751
1752    #[cfg_attr(feature = "cli", arg(short = 'c', long = "cpu"))]
1753    pub max_cpu: Option<u8>,
1754
1755    #[cfg_attr(feature = "cli", arg(long = "random-seed"))]
1756    pub random_seed: Option<u64>,
1757
1758    // time_start requires ISO 8601 string parsing; not directly clap-friendly as SystemTime.
1759    #[cfg_attr(feature = "cli", clap(skip))]
1760    pub time_start: Option<SystemTime>,
1761
1762    #[cfg_attr(feature = "cli", arg(long = "no-randomize-memory"))]
1763    pub no_randomize_memory: bool,
1764
1765    #[cfg_attr(feature = "cli", arg(long = "no-huge-pages"))]
1766    pub no_huge_pages: bool,
1767
1768    #[cfg_attr(feature = "cli", arg(long = "no-coredump"))]
1769    pub no_coredump: bool,
1770
1771    #[cfg_attr(feature = "cli", arg(long = "deterministic-dirs"))]
1772    pub deterministic_dirs: bool,
1773
1774    #[cfg_attr(feature = "cli", arg(long = "workdir"))]
1775    pub workdir: Option<PathBuf>,
1776
1777    #[cfg_attr(feature = "cli", arg(long = "cwd"))]
1778    pub cwd: Option<PathBuf>,
1779
1780    #[cfg_attr(feature = "cli", arg(long = "fs-storage", value_name = "PATH"))]
1781    pub fs_storage: Option<PathBuf>,
1782
1783    // max_disk uses a string in the CLI (e.g. "10G"); not directly clap-friendly as ByteSize.
1784    #[cfg_attr(feature = "cli", clap(skip))]
1785    pub max_disk: Option<ByteSize>,
1786
1787    // on_exit/on_error are not exposed as CLI flags.
1788    #[cfg_attr(feature = "cli", clap(skip))]
1789    pub on_exit: Option<BranchAction>,
1790
1791    #[cfg_attr(feature = "cli", clap(skip))]
1792    pub on_error: Option<BranchAction>,
1793
1794    // fs_mount requires VIRTUAL:HOST string splitting; not directly clap-friendly as Vec<(PathBuf,PathBuf)>.
1795    #[cfg_attr(feature = "cli", clap(skip))]
1796    pub fs_mount: Vec<(PathBuf, PathBuf)>,
1797
1798    #[cfg_attr(feature = "cli", arg(long = "chroot"))]
1799    pub chroot: Option<PathBuf>,
1800
1801    #[cfg_attr(feature = "cli", arg(long = "clean-env"))]
1802    pub clean_env: bool,
1803
1804    // env requires KEY=VALUE string splitting; not directly clap-friendly as HashMap.
1805    #[cfg_attr(feature = "cli", clap(skip))]
1806    pub env: HashMap<String, String>,
1807
1808    // gpu_devices in CLI uses Vec<u32> with value_delimiter; SandboxBuilder stores Option<Vec<u32>>.
1809    #[cfg_attr(feature = "cli", clap(skip))]
1810    pub gpu_devices: Option<Vec<u32>>,
1811
1812    // cpu_cores in CLI uses Vec<u32> with value_delimiter; SandboxBuilder stores Option<Vec<u32>>.
1813    #[cfg_attr(feature = "cli", clap(skip))]
1814    pub cpu_cores: Option<Vec<u32>>,
1815
1816    #[cfg_attr(feature = "cli", arg(long = "num-cpus"))]
1817    pub num_cpus: Option<u32>,
1818
1819    #[cfg_attr(feature = "cli", arg(long = "port-remap"))]
1820    pub port_remap: bool,
1821
1822    /// Skip the seccomp user-notification supervisor. The CLI exposes
1823    /// its own `--no-supervisor` flag on `RunArgs` (which short-circuits
1824    /// to a direct exec); this field is the API-level counterpart used
1825    /// when the caller still wants the normal `Sandbox::run` lifecycle
1826    /// but cannot install a listener (e.g. nested inside another
1827    /// sandbox).
1828    #[cfg_attr(feature = "cli", clap(skip))]
1829    pub no_supervisor: bool,
1830
1831    #[cfg_attr(feature = "cli", arg(long = "uid"))]
1832    pub uid: Option<u32>,
1833
1834    // Internal callback — never a CLI flag.
1835    #[cfg_attr(feature = "cli", clap(skip))]
1836    pub policy_fn: Option<crate::policy_fn::PolicyCallback>,
1837
1838    // Sandbox instance name — stored for transfer into the Sandbox at build time.
1839    #[cfg_attr(feature = "cli", clap(skip))]
1840    pub name: Option<String>,
1841
1842    // COW fork init function — runs once in the child before COW cloning.
1843    #[cfg_attr(feature = "cli", clap(skip))]
1844    pub(crate) init_fn: Option<Box<dyn FnOnce() + Send + 'static>>,
1845
1846    // COW fork work function — runs in each COW clone.
1847    #[cfg_attr(feature = "cli", clap(skip))]
1848    pub(crate) work_fn: Option<Arc<dyn Fn(u32) + Send + Sync + 'static>>,
1849}
1850
1851impl std::fmt::Debug for SandboxBuilder {
1852    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1853        f.debug_struct("SandboxBuilder")
1854            .field("fs_readable", &self.fs_readable)
1855            .field("fs_writable", &self.fs_writable)
1856            .field("max_memory", &self.max_memory)
1857            .field("max_processes", &self.max_processes)
1858            .field("policy_fn", &self.policy_fn.as_ref().map(|_| "<callback>"))
1859            .finish_non_exhaustive()
1860    }
1861}
1862
1863impl Clone for SandboxBuilder {
1864    /// Clone a `SandboxBuilder`. All config and callback fields are cloned.
1865    /// `init_fn` (FnOnce) is dropped to `None` on the clone; `work_fn` clones
1866    /// via Arc. If the clone also needs an init function, set it again with
1867    /// `.init_fn(...)`.
1868    fn clone(&self) -> Self {
1869        Self {
1870            fs_readable: self.fs_readable.clone(),
1871            fs_writable: self.fs_writable.clone(),
1872            fs_denied: self.fs_denied.clone(),
1873            extra_deny_syscalls: self.extra_deny_syscalls.clone(),
1874            extra_allow_syscalls: self.extra_allow_syscalls.clone(),
1875            net_allow: self.net_allow.clone(),
1876            net_bind: self.net_bind.clone(),
1877            http_allow: self.http_allow.clone(),
1878            http_deny: self.http_deny.clone(),
1879            http_ports: self.http_ports.clone(),
1880            http_ca: self.http_ca.clone(),
1881            http_key: self.http_key.clone(),
1882            max_memory: self.max_memory,
1883            max_processes: self.max_processes,
1884            max_open_files: self.max_open_files,
1885            max_cpu: self.max_cpu,
1886            random_seed: self.random_seed,
1887            time_start: self.time_start,
1888            no_randomize_memory: self.no_randomize_memory,
1889            no_huge_pages: self.no_huge_pages,
1890            no_coredump: self.no_coredump,
1891            deterministic_dirs: self.deterministic_dirs,
1892            workdir: self.workdir.clone(),
1893            cwd: self.cwd.clone(),
1894            fs_storage: self.fs_storage.clone(),
1895            max_disk: self.max_disk,
1896            on_exit: self.on_exit.clone(),
1897            on_error: self.on_error.clone(),
1898            fs_mount: self.fs_mount.clone(),
1899            chroot: self.chroot.clone(),
1900            clean_env: self.clean_env,
1901            env: self.env.clone(),
1902            gpu_devices: self.gpu_devices.clone(),
1903            cpu_cores: self.cpu_cores.clone(),
1904            num_cpus: self.num_cpus,
1905            port_remap: self.port_remap,
1906            no_supervisor: self.no_supervisor,
1907            uid: self.uid,
1908            policy_fn: self.policy_fn.clone(),
1909            name: self.name.clone(),
1910            // init_fn (FnOnce) cannot be cloned — drop to None.
1911            init_fn: None,
1912            // work_fn is Arc-wrapped — clone bumps the reference count.
1913            work_fn: self.work_fn.clone(),
1914        }
1915    }
1916}
1917
1918impl SandboxBuilder {
1919    pub fn fs_write(mut self, path: impl Into<PathBuf>) -> Self {
1920        self.fs_writable.push(path.into());
1921        self
1922    }
1923
1924    pub fn fs_read(mut self, path: impl Into<PathBuf>) -> Self {
1925        self.fs_readable.push(path.into());
1926        self
1927    }
1928
1929    pub fn fs_read_if_exists(self, path: impl Into<PathBuf>) -> Self {
1930        let path = path.into();
1931        if path.exists() {
1932            self.fs_read(path)
1933        } else {
1934            self
1935        }
1936    }
1937
1938    pub fn fs_deny(mut self, path: impl Into<PathBuf>) -> Self {
1939        self.fs_denied.push(path.into());
1940        self
1941    }
1942
1943    pub fn extra_deny_syscalls(mut self, calls: Vec<String>) -> Self {
1944        self.extra_deny_syscalls.extend(calls);
1945        self
1946    }
1947
1948    pub fn extra_allow_syscalls(mut self, names: Vec<String>) -> Self {
1949        self.extra_allow_syscalls.extend(names);
1950        self
1951    }
1952
1953    /// Add a network endpoint rule. Spec is `host:port[,port,...]`,
1954    /// `:port`, or `*:port`. Validated at `build()` time so callers
1955    /// receive parse errors via the standard `SandboxBuilder` flow.
1956    ///
1957    /// Examples:
1958    /// - `.net_allow("api.openai.com:443")` — HTTPS to OpenAI only
1959    /// - `.net_allow("github.com:22,443")` — SSH and HTTPS to GitHub
1960    /// - `.net_allow(":8080")` — any IP on port 8080
1961    pub fn net_allow(mut self, spec: impl Into<String>) -> Self {
1962        self.net_allow.push(spec.into());
1963        self
1964    }
1965
1966    pub fn net_bind_port(mut self, port: u16) -> Self {
1967        self.net_bind.push(port);
1968        self
1969    }
1970
1971    pub fn http_allow(mut self, rule: &str) -> Self {
1972        self.http_allow.push(rule.to_string());
1973        self
1974    }
1975
1976    pub fn http_deny(mut self, rule: &str) -> Self {
1977        self.http_deny.push(rule.to_string());
1978        self
1979    }
1980
1981    pub fn http_port(mut self, port: u16) -> Self {
1982        self.http_ports.push(port);
1983        self
1984    }
1985
1986    pub fn http_ca(mut self, path: impl Into<PathBuf>) -> Self {
1987        self.http_ca = Some(path.into());
1988        self
1989    }
1990
1991    pub fn http_key(mut self, path: impl Into<PathBuf>) -> Self {
1992        self.http_key = Some(path.into());
1993        self
1994    }
1995
1996    pub fn max_memory(mut self, size: ByteSize) -> Self {
1997        self.max_memory = Some(size);
1998        self
1999    }
2000
2001    pub fn max_processes(mut self, n: u32) -> Self {
2002        self.max_processes = Some(n);
2003        self
2004    }
2005
2006    pub fn max_open_files(mut self, n: u32) -> Self {
2007        self.max_open_files = Some(n);
2008        self
2009    }
2010
2011    pub fn max_cpu(mut self, pct: u8) -> Self {
2012        self.max_cpu = Some(pct);
2013        self
2014    }
2015
2016    pub fn random_seed(mut self, seed: u64) -> Self {
2017        self.random_seed = Some(seed);
2018        self
2019    }
2020
2021    pub fn time_start(mut self, t: SystemTime) -> Self {
2022        self.time_start = Some(t);
2023        self
2024    }
2025
2026    pub fn no_randomize_memory(mut self, v: bool) -> Self {
2027        self.no_randomize_memory = v;
2028        self
2029    }
2030
2031    pub fn no_huge_pages(mut self, v: bool) -> Self {
2032        self.no_huge_pages = v;
2033        self
2034    }
2035
2036    pub fn no_coredump(mut self, v: bool) -> Self {
2037        self.no_coredump = v;
2038        self
2039    }
2040
2041    pub fn deterministic_dirs(mut self, v: bool) -> Self {
2042        self.deterministic_dirs = v;
2043        self
2044    }
2045
2046    pub fn workdir(mut self, path: impl Into<PathBuf>) -> Self {
2047        self.workdir = Some(path.into());
2048        self
2049    }
2050
2051    pub fn cwd(mut self, path: impl Into<PathBuf>) -> Self {
2052        self.cwd = Some(path.into());
2053        self
2054    }
2055
2056    pub fn fs_storage(mut self, path: impl Into<PathBuf>) -> Self {
2057        self.fs_storage = Some(path.into());
2058        self
2059    }
2060
2061    pub fn max_disk(mut self, size: ByteSize) -> Self {
2062        self.max_disk = Some(size);
2063        self
2064    }
2065
2066    pub fn on_exit(mut self, action: BranchAction) -> Self {
2067        self.on_exit = Some(action);
2068        self
2069    }
2070
2071    pub fn on_error(mut self, action: BranchAction) -> Self {
2072        self.on_error = Some(action);
2073        self
2074    }
2075
2076    pub fn chroot(mut self, path: impl Into<PathBuf>) -> Self {
2077        self.chroot = Some(path.into());
2078        self
2079    }
2080
2081    pub fn fs_mount(mut self, virtual_path: impl Into<PathBuf>, host_path: impl Into<PathBuf>) -> Self {
2082        self.fs_mount.push((virtual_path.into(), host_path.into()));
2083        self
2084    }
2085
2086    pub fn clean_env(mut self, v: bool) -> Self {
2087        self.clean_env = v;
2088        self
2089    }
2090
2091    pub fn env_var(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
2092        self.env.insert(key.into(), value.into());
2093        self
2094    }
2095
2096
2097    pub fn gpu_devices(mut self, devices: Vec<u32>) -> Self {
2098        self.gpu_devices = Some(devices);
2099        self
2100    }
2101
2102    pub fn cpu_cores(mut self, cores: Vec<u32>) -> Self {
2103        self.cpu_cores = Some(cores);
2104        self
2105    }
2106
2107    pub fn num_cpus(mut self, n: u32) -> Self {
2108        self.num_cpus = Some(n);
2109        self
2110    }
2111
2112    pub fn port_remap(mut self, v: bool) -> Self {
2113        self.port_remap = v;
2114        self
2115    }
2116
2117    /// Skip the seccomp user-notification supervisor. The sandbox keeps
2118    /// Landlock and the kernel-level deny filter but loses every
2119    /// supervisor-mediated feature (IP allowlist, resource limits, COW,
2120    /// chroot mediation, /proc virtualization, custom handlers). The
2121    /// kernel only permits one `SECCOMP_FILTER_FLAG_NEW_LISTENER` per
2122    /// task, so set this when nesting `Sandbox::run` inside an already-
2123    /// confined process; otherwise the inner seccomp install returns
2124    /// `EBUSY`.
2125    pub fn no_supervisor(mut self, v: bool) -> Self {
2126        self.no_supervisor = v;
2127        self
2128    }
2129
2130    pub fn policy_fn(
2131        mut self,
2132        f: impl Fn(crate::policy_fn::SyscallEvent, &mut crate::policy_fn::PolicyContext) -> crate::policy_fn::Verdict + Send + Sync + 'static,
2133    ) -> Self {
2134        self.policy_fn = Some(std::sync::Arc::new(f));
2135        self
2136    }
2137
2138    pub fn uid(mut self, id: u32) -> Self {
2139        self.uid = Some(id);
2140        self
2141    }
2142
2143    /// Set the sandbox instance name (exposed as the virtual hostname).
2144    /// Auto-generated if not set.
2145    pub fn name(mut self, name: impl Into<String>) -> Self {
2146        self.name = Some(name.into());
2147        self
2148    }
2149
2150    /// Set the COW-fork init function.
2151    ///
2152    /// The init function runs once in the child process before any COW clones
2153    /// are created. Required for `Sandbox::fork()`.
2154    pub fn init_fn(mut self, f: impl FnOnce() + Send + 'static) -> Self {
2155        self.init_fn = Some(Box::new(f));
2156        self
2157    }
2158
2159    /// Set the COW-fork work function.
2160    ///
2161    /// The work function runs in each COW clone (`fork(N)` produces N clones).
2162    /// Required for `Sandbox::fork()`.
2163    pub fn work_fn(mut self, f: impl Fn(u32) + Send + Sync + 'static) -> Self {
2164        self.work_fn = Some(Arc::new(f));
2165        self
2166    }
2167
2168    /// Build a `Sandbox`, parsing all string fields and running per-field
2169    /// validation, but **without** the cross-section checks that
2170    /// `Sandbox::validate` performs. Use this in tests that deliberately
2171    /// construct sandboxes violating cross-section invariants.
2172    pub fn build_unchecked(self) -> Result<Sandbox, SandboxError> {
2173        validate_syscall_names(&self.extra_deny_syscalls)?;
2174
2175        // Validate: max_cpu must be 1-100
2176        if let Some(cpu) = self.max_cpu {
2177            if cpu == 0 || cpu > 100 {
2178                return Err(SandboxError::InvalidCpuPercent(cpu));
2179            }
2180        }
2181
2182        // Validate: http_ca and http_key must both be set or both unset
2183        if self.http_ca.is_some() != self.http_key.is_some() {
2184            return Err(SandboxError::Invalid(
2185                "--http-ca and --http-key must both be provided together".into(),
2186            ));
2187        }
2188
2189        // Parse HTTP rules (deferred from builder methods to propagate errors)
2190        let http_allow: Vec<HttpRule> = self
2191            .http_allow
2192            .into_iter()
2193            .map(|s| HttpRule::parse(&s))
2194            .collect::<Result<_, _>>()?;
2195        let http_deny: Vec<HttpRule> = self
2196            .http_deny
2197            .into_iter()
2198            .map(|s| HttpRule::parse(&s))
2199            .collect::<Result<_, _>>()?;
2200
2201        // Default HTTP intercept ports: 80 always, 443 when HTTPS CA is configured.
2202        let http_ports = if self.http_ports.is_empty() && (!http_allow.is_empty() || !http_deny.is_empty()) {
2203            let mut ports = vec![80];
2204            if self.http_ca.is_some() {
2205                ports.push(443);
2206            }
2207            ports
2208        } else {
2209            self.http_ports
2210        };
2211
2212        // Parse user-supplied --net-allow specs.
2213        let mut net_allow: Vec<NetAllow> = self
2214            .net_allow
2215            .into_iter()
2216            .map(|s| NetAllow::parse(&s))
2217            .collect::<Result<_, _>>()?;
2218
2219        // Auto-merge HTTP rules into the network allowlist so the proxy's
2220        // intercept ports remain reachable. A rule with a concrete host
2221        // tightens the IP allowlist (only that host on http_ports);
2222        // wildcard hosts add a `:port` (any IP) rule. This mirrors the
2223        // intent of the old `http_port → net_connect` merge but at the
2224        // endpoint level so HTTP and net_allow stay aligned.
2225        if !http_ports.is_empty() {
2226            let mut wildcard_seen = false;
2227            let mut concrete_hosts: Vec<String> = Vec::new();
2228            for rule in http_allow.iter().chain(http_deny.iter()) {
2229                if rule.host == "*" {
2230                    wildcard_seen = true;
2231                } else if !concrete_hosts.iter().any(|h| h.eq_ignore_ascii_case(&rule.host)) {
2232                    concrete_hosts.push(rule.host.clone());
2233                }
2234            }
2235            if wildcard_seen || (http_allow.is_empty() && http_deny.is_empty()) {
2236                // Fallback: explicit --http-port without rules, or wildcard rules.
2237                net_allow.push(NetAllow {
2238                    protocol: Protocol::Tcp,
2239                    host: None,
2240                    ports: http_ports.clone(),
2241                    all_ports: false,
2242                });
2243            }
2244            for h in concrete_hosts {
2245                net_allow.push(NetAllow {
2246                    protocol: Protocol::Tcp,
2247                    host: Some(h),
2248                    ports: http_ports.clone(),
2249                    all_ports: false,
2250                });
2251            }
2252        }
2253
2254        Ok(Sandbox {
2255            fs_writable: self.fs_writable,
2256            fs_readable: self.fs_readable,
2257            fs_denied: self.fs_denied,
2258            extra_deny_syscalls: self.extra_deny_syscalls,
2259            extra_allow_syscalls: self.extra_allow_syscalls,
2260            net_allow,
2261            net_bind: self.net_bind,
2262            http_allow,
2263            http_deny,
2264            http_ports,
2265            http_ca: self.http_ca,
2266            http_key: self.http_key,
2267            max_memory: self.max_memory,
2268            max_processes: self.max_processes.unwrap_or(64),
2269            max_open_files: self.max_open_files,
2270            max_cpu: self.max_cpu,
2271            random_seed: self.random_seed,
2272            time_start: self.time_start,
2273            no_randomize_memory: self.no_randomize_memory,
2274            no_huge_pages: self.no_huge_pages,
2275            no_coredump: self.no_coredump,
2276            deterministic_dirs: self.deterministic_dirs,
2277            workdir: self.workdir,
2278            cwd: self.cwd,
2279            fs_storage: self.fs_storage,
2280            max_disk: self.max_disk,
2281            on_exit: self.on_exit.unwrap_or_default(),
2282            on_error: self.on_error.unwrap_or_default(),
2283            fs_mount: self.fs_mount,
2284            chroot: self.chroot,
2285            clean_env: self.clean_env,
2286            env: self.env,
2287            gpu_devices: self.gpu_devices,
2288            cpu_cores: self.cpu_cores,
2289            num_cpus: self.num_cpus,
2290            port_remap: self.port_remap,
2291            no_supervisor: self.no_supervisor,
2292            uid: self.uid,
2293            policy_fn: self.policy_fn,
2294            name: self.name,
2295            init_fn: self.init_fn,
2296            work_fn: self.work_fn,
2297            runtime: None,
2298        })
2299    }
2300
2301    /// Build a `Sandbox`, parsing all string fields, running per-field validation,
2302    /// and verifying cross-section invariants via `Sandbox::validate`.
2303    pub fn build(self) -> Result<Sandbox, SandboxError> {
2304        let p = self.build_unchecked()?;
2305        p.validate()?;
2306        Ok(p)
2307    }
2308}
2309
2310#[cfg(test)]
2311mod tests {
2312    use super::*;
2313
2314    // --- SandboxBuilder integration ---
2315
2316    #[test]
2317    fn builder_http_rules() {
2318        let policy = Sandbox::builder()
2319            .http_allow("GET api.example.com/v1/*")
2320            .http_deny("* */admin/*")
2321            .build()
2322            .unwrap();
2323        assert_eq!(policy.http_allow.len(), 1);
2324        assert_eq!(policy.http_deny.len(), 1);
2325        assert_eq!(policy.http_allow[0].method, "GET");
2326        assert_eq!(policy.http_deny[0].host, "*");
2327    }
2328
2329    #[test]
2330    fn builder_invalid_http_allow_returns_error() {
2331        let result = Sandbox::builder()
2332            .http_allow("GETexample.com")
2333            .build();
2334        assert!(result.is_err());
2335    }
2336
2337    #[test]
2338    fn builder_invalid_http_deny_returns_error() {
2339        let result = Sandbox::builder()
2340            .http_deny("BADRULE")
2341            .build();
2342        assert!(result.is_err());
2343    }
2344
2345    #[test]
2346    fn builder_http_ca_without_key_returns_error() {
2347        let result = Sandbox::builder()
2348            .http_ca("/tmp/ca.pem")
2349            .build();
2350        assert!(result.is_err());
2351    }
2352
2353    #[test]
2354    fn builder_http_key_without_ca_returns_error() {
2355        let result = Sandbox::builder()
2356            .http_key("/tmp/key.pem")
2357            .build();
2358        assert!(result.is_err());
2359    }
2360
2361    #[test]
2362    fn builder_http_ca_and_key_together_ok() {
2363        let policy = Sandbox::builder()
2364            .http_ca("/tmp/ca.pem")
2365            .http_key("/tmp/key.pem")
2366            .build()
2367            .unwrap();
2368        assert!(policy.http_ca.is_some());
2369        assert!(policy.http_key.is_some());
2370    }
2371
2372    #[test]
2373    fn allows_sysv_ipc_reads_extra_allow_syscalls() {
2374        let p = Sandbox::builder()
2375            .extra_allow_syscalls(vec!["sysv_ipc".into()])
2376            .build()
2377            .unwrap();
2378        assert!(p.allows_sysv_ipc());
2379
2380        let p2 = Sandbox::builder().build().unwrap();
2381        assert!(!p2.allows_sysv_ipc());
2382
2383        let p3 = Sandbox::builder()
2384            .extra_allow_syscalls(vec!["other_group".into()])
2385            .build()
2386            .unwrap();
2387        assert!(!p3.allows_sysv_ipc());
2388    }
2389
2390}