Skip to main content

sandlock_core/
sandbox.rs

1use std::collections::HashMap;
2use std::os::fd::AsRawFd;
3use std::path::PathBuf;
4use std::sync::Arc;
5use std::time::SystemTime;
6
7use serde::{Deserialize, Serialize};
8use tokio::task::JoinHandle;
9
10use crate::context;
11use crate::error::SandboxError;
12pub use crate::http::{http_acl_check, normalize_path, prefix_or_exact_match, HttpRule};
13pub use crate::network::{IpCidr, NetAllow, NetDeny, NetRule, NetTarget, Protocol};
14use crate::protection::{Protection, ProtectionPolicy, ProtectionState, ProtectionStatus};
15
16/// A byte size value.
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
18pub struct ByteSize(pub u64);
19
20impl ByteSize {
21    pub fn bytes(n: u64) -> Self {
22        ByteSize(n)
23    }
24
25    pub fn kib(n: u64) -> Self {
26        ByteSize(n * 1024)
27    }
28
29    pub fn mib(n: u64) -> Self {
30        ByteSize(n * 1024 * 1024)
31    }
32
33    pub fn gib(n: u64) -> Self {
34        ByteSize(n * 1024 * 1024 * 1024)
35    }
36
37    pub fn parse(s: &str) -> Result<Self, SandboxError> {
38        let s = s.trim();
39        if s.is_empty() {
40            return Err(SandboxError::Invalid("empty byte size string".into()));
41        }
42
43        // Check for suffix
44        let last = s.chars().last().unwrap();
45        if last.is_ascii_alphabetic() {
46            let (num_str, suffix) = s.split_at(s.len() - 1);
47            let n: u64 = num_str
48                .trim()
49                .parse()
50                .map_err(|_| SandboxError::Invalid(format!("invalid byte size: {}", s)))?;
51            match suffix.to_ascii_uppercase().as_str() {
52                "K" => Ok(ByteSize::kib(n)),
53                "M" => Ok(ByteSize::mib(n)),
54                "G" => Ok(ByteSize::gib(n)),
55                other => Err(SandboxError::Invalid(format!("unknown byte size suffix: {}", other))),
56            }
57        } else {
58            let n: u64 = s
59                .parse()
60                .map_err(|_| SandboxError::Invalid(format!("invalid byte size: {}", s)))?;
61            Ok(ByteSize(n))
62        }
63    }
64}
65
66/// Confinement for confining the current process in place.
67#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
68pub struct Confinement {
69    pub fs_writable: Vec<PathBuf>,
70    pub fs_readable: Vec<PathBuf>,
71}
72
73impl Confinement {
74    pub fn builder() -> ConfinementBuilder {
75        ConfinementBuilder::default()
76    }
77}
78
79#[derive(Default)]
80pub struct ConfinementBuilder {
81    fs_writable: Vec<PathBuf>,
82    fs_readable: Vec<PathBuf>,
83}
84
85impl ConfinementBuilder {
86    pub fn fs_write(mut self, path: impl Into<PathBuf>) -> Self {
87        self.fs_writable.push(path.into());
88        self
89    }
90
91    pub fn fs_read(mut self, path: impl Into<PathBuf>) -> Self {
92        self.fs_readable.push(path.into());
93        self
94    }
95
96    pub fn build(self) -> Confinement {
97        Confinement {
98            fs_writable: self.fs_writable,
99            fs_readable: self.fs_readable,
100        }
101    }
102}
103
104impl TryFrom<&Sandbox> for Confinement {
105    type Error = SandboxError;
106
107    fn try_from(sandbox: &Sandbox) -> Result<Self, Self::Error> {
108        let mut unsupported = Vec::new();
109        if !sandbox.fs_denied.is_empty() { unsupported.push("fs_denied"); }
110        if !sandbox.extra_deny_syscalls.is_empty() { unsupported.push("extra_deny_syscalls"); }
111        if !sandbox.net_allow.is_empty() { unsupported.push("net_allow"); }
112        if !sandbox.net_deny.is_empty() { unsupported.push("net_deny"); }
113        if !sandbox.net_allow_bind.is_empty() { unsupported.push("net_allow_bind"); }
114        if !sandbox.net_deny_bind.is_empty() { unsupported.push("net_deny_bind"); }
115        if sandbox.allows_sysv_ipc() { unsupported.push("extra_allow_syscalls=[\"sysv_ipc\"]"); }
116        if !sandbox.http_allow.is_empty() { unsupported.push("http_allow"); }
117        if !sandbox.http_deny.is_empty() { unsupported.push("http_deny"); }
118        if !sandbox.http_ports.is_empty() { unsupported.push("http_ports"); }
119        if sandbox.http_ca.is_some() { unsupported.push("http_ca"); }
120        if sandbox.http_key.is_some() { unsupported.push("http_key"); }
121        if !sandbox.http_inject_ca.is_empty() { unsupported.push("http_inject_ca"); }
122        if sandbox.http_ca_out.is_some() { unsupported.push("http_ca_out"); }
123        if sandbox.max_memory.is_some() { unsupported.push("max_memory"); }
124        if sandbox.max_processes != 64 { unsupported.push("max_processes"); }
125        if sandbox.max_open_files.is_some() { unsupported.push("max_open_files"); }
126        if sandbox.max_cpu.is_some() { unsupported.push("max_cpu"); }
127        if sandbox.random_seed.is_some() { unsupported.push("random_seed"); }
128        if sandbox.time_start.is_some() { unsupported.push("time_start"); }
129        if sandbox.no_randomize_memory { unsupported.push("no_randomize_memory"); }
130        if sandbox.no_huge_pages { unsupported.push("no_huge_pages"); }
131        if sandbox.no_coredump { unsupported.push("no_coredump"); }
132        if sandbox.deterministic_dirs { unsupported.push("deterministic_dirs"); }
133        if sandbox.workdir.is_some() { unsupported.push("workdir"); }
134        if sandbox.cwd.is_some() { unsupported.push("cwd"); }
135        if sandbox.fs_storage.is_some() { unsupported.push("fs_storage"); }
136        if sandbox.max_disk.is_some() { unsupported.push("max_disk"); }
137        if sandbox.on_exit != BranchAction::Commit { unsupported.push("on_exit"); }
138        if sandbox.on_error != BranchAction::Abort { unsupported.push("on_error"); }
139        if !sandbox.fs_mount.is_empty() { unsupported.push("fs_mount"); }
140        if sandbox.chroot.is_some() { unsupported.push("chroot"); }
141        if sandbox.clean_env { unsupported.push("clean_env"); }
142        if !sandbox.env.is_empty() { unsupported.push("env"); }
143        if sandbox.gpu_devices.is_some() { unsupported.push("gpu_devices"); }
144        if sandbox.cpu_cores.is_some() { unsupported.push("cpu_cores"); }
145        if sandbox.num_cpus.is_some() { unsupported.push("num_cpus"); }
146        if sandbox.port_remap { unsupported.push("port_remap"); }
147        if sandbox.uid.is_some() { unsupported.push("uid"); }
148        if sandbox.policy_fn.is_some() { unsupported.push("policy_fn"); }
149
150        if !unsupported.is_empty() {
151            return Err(SandboxError::UnsupportedForConfine(unsupported.join(", ")));
152        }
153
154        Ok(Self {
155            fs_writable: sandbox.fs_writable.clone(),
156            fs_readable: sandbox.fs_readable.clone(),
157        })
158    }
159}
160
161/// Action to take on branch exit.
162#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
163pub enum BranchAction {
164    #[default]
165    Commit,
166    Abort,
167    Keep,
168}
169
170// ============================================================
171// Runtime — private heap-allocated state, present only while running
172// ============================================================
173
174/// Private runtime state.  Only allocated after `start()` / `run()` is
175/// called; `None` for config-only `Sandbox` instances.
176struct Runtime {
177    name: String,
178    state: RuntimeState,
179    child_pid: Option<i32>,
180    pidfd: Option<std::os::fd::OwnedFd>,
181    notif_handle: Option<JoinHandle<()>>,
182    throttle_handle: Option<JoinHandle<()>>,
183    loadavg_handle: Option<JoinHandle<()>>,
184    _stdout_read: Option<std::os::fd::OwnedFd>,
185    _stderr_read: Option<std::os::fd::OwnedFd>,
186    seccomp_cow: Option<crate::cow::seccomp::SeccompCowBranch>,
187    supervisor_resource: Option<Arc<tokio::sync::Mutex<crate::seccomp::state::ResourceState>>>,
188    supervisor_cow: Option<Arc<tokio::sync::Mutex<crate::seccomp::state::CowState>>>,
189    supervisor_network: Option<Arc<tokio::sync::Mutex<crate::seccomp::state::NetworkState>>>,
190    ctrl_fd: Option<std::os::fd::OwnedFd>,
191    stdout_pipe: Option<std::os::fd::OwnedFd>,
192    io_overrides: Option<(Option<i32>, Option<i32>, Option<i32>)>,
193    extra_fds: Vec<(i32, i32)>,
194    http_acl_handle: Option<crate::transparent_proxy::HttpAclProxyHandle>,
195    #[allow(clippy::type_complexity)]
196    on_bind: Option<Box<dyn Fn(&HashMap<u16, u16>) + Send + Sync>>,
197    handlers: Vec<(i64, Arc<dyn crate::seccomp::dispatch::Handler>)>,
198    ready_w: Option<std::os::fd::OwnedFd>,
199}
200
201/// Lifecycle state for the runtime.
202enum RuntimeState {
203    Created,
204    Running,
205    Paused,
206    Stopped(crate::result::ExitStatus),
207}
208
209/// Sandbox configuration.
210#[derive(Serialize, Deserialize)]
211pub struct Sandbox {
212    // Filesystem access
213    pub fs_writable: Vec<PathBuf>,
214    pub fs_readable: Vec<PathBuf>,
215    pub fs_denied: Vec<PathBuf>,
216
217    // Extra syscall filtering on top of Sandlock's default blocklist.
218    pub extra_deny_syscalls: Vec<String>,
219    pub extra_allow_syscalls: Vec<String>,
220
221    /// Per-protection enforcement policy. Default
222    /// (`ProtectionPolicy::strict_all()`) preserves the historical hard
223    /// `MIN_ABI = 6` behaviour; `SandboxBuilder::allow_degraded` /
224    /// `::disable` deviate from strict-all per protection.
225    ///
226    /// Part of the checkpoint: a saved sandbox restores with its exact
227    /// protection posture. Without this, a sandbox built with a
228    /// `disable()` opt-out (required on, e.g., a v5 host that cannot
229    /// provide a v6 scope) would silently reset to `strict_all()` on
230    /// load and fail to restore.
231    pub protection_policy: ProtectionPolicy,
232
233    // Network
234    /// Outbound endpoint allowlist as a list of `(protocol, host?, ports)`
235    /// rules. Each rule names a protocol (TCP/UDP/ICMP) and either a
236    /// concrete host or "any IP." TCP and UDP rules carry ports; ICMP
237    /// rules have none.
238    ///
239    /// **Protocol gating falls out of rule presence.** Sandlock denies
240    /// UDP and ICMP socket creation by default; opting in is "list at
241    /// least one rule for that protocol" (e.g. `udp://*:*` for any UDP,
242    /// `icmp://*` for any ICMP echo). TCP is always permitted.
243    ///
244    /// Empty `net_allow` and empty `http_allow`/`http_deny` together
245    /// mean "deny all outbound" (Landlock direct path denies, no
246    /// on-behalf path is enabled). Otherwise, the on-behalf path
247    /// enforces these rules: a destination is permitted iff any rule
248    /// matches the protocol, destination IP (or has `host: None` = any
249    /// IP), and destination port (N/A for ICMP).
250    ///
251    /// HTTP rules with concrete hosts auto-add a matching
252    /// `(Tcp, host, [80])` (and `(Tcp, host, [443])` when `--http-ca`
253    /// is set) entry at build time so the proxy's intercept ports
254    /// remain reachable. HTTP rules with wildcard hosts auto-add
255    /// `(Tcp, None, [80])` instead.
256    pub net_allow: Vec<NetAllow>,
257    /// Parsed `--net-deny` rules (default-allow, IP/CIDR/port denylist).
258    /// Mutually exclusive with `net_allow`.
259    pub net_deny: Vec<NetDeny>,
260    /// `--net-allow-bind`: TCP ports the sandbox may bind (default-deny
261    /// allowlist, Landlock-enforced). Mutually exclusive with `net_deny_bind`.
262    pub net_allow_bind: Vec<u16>,
263    /// `--net-deny-bind`: TCP ports the sandbox may NOT bind (default-allow
264    /// denylist, enforced on the on-behalf `bind()` path). Mutually
265    /// exclusive with `net_allow_bind`.
266    pub net_deny_bind: Vec<u16>,
267    // HTTP ACL
268    pub http_allow: Vec<HttpRule>,
269    pub http_deny: Vec<HttpRule>,
270    /// TCP ports to intercept for HTTP ACL. Defaults to [80] (plus 443 when
271    /// http_ca is set). Override with `http_ports` to intercept custom ports.
272    pub http_ports: Vec<u16>,
273    /// PEM CA cert for HTTPS MITM. When set, port 443 is also intercepted.
274    pub http_ca: Option<PathBuf>,
275    /// PEM CA key for HTTPS MITM. Required when http_ca is set.
276    pub http_key: Option<PathBuf>,
277    /// Trust-bundle paths to splice the MITM CA into (zero-config HTTPS).
278    pub http_inject_ca: Vec<PathBuf>,
279    /// Path to write the active MITM CA public cert (PEM) for external trust
280    /// wiring (e.g. NODE_EXTRA_CA_CERTS). Never writes the private key.
281    pub http_ca_out: Option<PathBuf>,
282
283    // Resource limits
284    pub max_memory: Option<ByteSize>,
285    pub max_processes: u32,
286    pub max_open_files: Option<u32>,
287    pub max_cpu: Option<u8>,
288
289    // Reproducibility
290    pub random_seed: Option<u64>,
291    pub time_start: Option<SystemTime>,
292    pub no_randomize_memory: bool,
293    pub no_huge_pages: bool,
294    pub no_coredump: bool,
295    pub deterministic_dirs: bool,
296
297    // Filesystem branch
298    pub workdir: Option<PathBuf>,
299    pub cwd: Option<PathBuf>,
300    pub fs_storage: Option<PathBuf>,
301    pub max_disk: Option<ByteSize>,
302    pub on_exit: BranchAction,
303    pub on_error: BranchAction,
304
305    // Mount mappings: (virtual_path_inside_chroot, host_path_on_disk)
306    pub fs_mount: Vec<(PathBuf, PathBuf)>,
307
308    // Environment
309    pub chroot: Option<PathBuf>,
310    pub clean_env: bool,
311    pub env: HashMap<String, String>,
312    // Devices
313    pub gpu_devices: Option<Vec<u32>>,
314
315    // CPU
316    pub cpu_cores: Option<Vec<u32>>,
317    pub num_cpus: Option<u32>,
318    pub port_remap: bool,
319
320    /// Skip the seccomp user-notification supervisor. The sandbox runs
321    /// with Landlock + a kernel-only deny filter, with none of the
322    /// supervisor-mediated features (IP allowlist, resource limits,
323    /// COW, chroot mediation, /proc virtualization, custom handlers).
324    /// Required when nesting inside another sandlock — the kernel only
325    /// allows one `SECCOMP_FILTER_FLAG_NEW_LISTENER` per task.
326    pub no_supervisor: bool,
327
328    // User namespace
329    pub uid: Option<u32>,
330
331    // Dynamic policy callback
332    #[serde(skip)]
333    pub policy_fn: Option<crate::policy_fn::PolicyCallback>,
334
335    // Sandbox instance name (exposed as virtual hostname; auto-generated if None).
336    // Not serialized — instance names are set at runtime, not in the policy file.
337    #[serde(skip)]
338    pub name: Option<String>,
339
340    // COW fork init function — runs once in the child before COW cloning.
341    // Not serialized; not cloned (FnOnce can't be cloned — drops to None on clone).
342    #[serde(skip)]
343    init_fn: Option<Box<dyn FnOnce() + Send + 'static>>,
344
345    // COW fork work function — runs in each COW clone.
346    // Not serialized; cloned via Arc (cheap).
347    #[serde(skip)]
348    work_fn: Option<Arc<dyn Fn(u32) + Send + Sync + 'static>>,
349
350    // Heap-allocated runtime state; `None` when not started.
351    #[serde(skip)]
352    runtime: Option<Box<Runtime>>,
353}
354
355impl std::fmt::Debug for Sandbox {
356    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
357        f.debug_struct("Sandbox")
358            .field("fs_readable", &self.fs_readable)
359            .field("fs_writable", &self.fs_writable)
360            .field("max_memory", &self.max_memory)
361            .field("max_processes", &self.max_processes)
362            .field("policy_fn", &self.policy_fn.as_ref().map(|_| "<callback>"))
363            .field("name", &self.name)
364            .field("runtime", &self.runtime.as_ref().map(|_| "<runtime>"))
365            .finish_non_exhaustive()
366    }
367}
368
369impl Clone for Sandbox {
370    /// Clone a `Sandbox` — config and runtime-kwargs fields are cloned; the
371    /// runtime state is not (the clone starts with `runtime: None`).
372    ///
373    /// Field clone semantics:
374    /// - `policy_fn` — Arc bump (cheap).
375    /// - `work_fn`   — Arc bump (cheap); multiple Sandboxes share the closure.
376    /// - `init_fn`   — **dropped to `None`** (FnOnce can't be cloned). If the
377    ///   clone also needs an init function, call `.init_fn(...)` on it
378    ///   separately or set it via `SandboxBuilder::init_fn`.
379    /// - `runtime`   — always `None`; the clone is a fresh, un-started Sandbox.
380    fn clone(&self) -> Self {
381        Self {
382            fs_writable: self.fs_writable.clone(),
383            fs_readable: self.fs_readable.clone(),
384            fs_denied: self.fs_denied.clone(),
385            extra_deny_syscalls: self.extra_deny_syscalls.clone(),
386            extra_allow_syscalls: self.extra_allow_syscalls.clone(),
387            protection_policy: self.protection_policy.clone(),
388            net_allow: self.net_allow.clone(),
389            net_deny: self.net_deny.clone(),
390            net_allow_bind: self.net_allow_bind.clone(),
391            net_deny_bind: self.net_deny_bind.clone(),
392            http_allow: self.http_allow.clone(),
393            http_deny: self.http_deny.clone(),
394            http_ports: self.http_ports.clone(),
395            http_ca: self.http_ca.clone(),
396            http_key: self.http_key.clone(),
397            http_inject_ca: self.http_inject_ca.clone(),
398            http_ca_out: self.http_ca_out.clone(),
399            max_memory: self.max_memory,
400            max_processes: self.max_processes,
401            max_open_files: self.max_open_files,
402            max_cpu: self.max_cpu,
403            random_seed: self.random_seed,
404            time_start: self.time_start,
405            no_randomize_memory: self.no_randomize_memory,
406            no_huge_pages: self.no_huge_pages,
407            no_coredump: self.no_coredump,
408            deterministic_dirs: self.deterministic_dirs,
409            workdir: self.workdir.clone(),
410            cwd: self.cwd.clone(),
411            fs_storage: self.fs_storage.clone(),
412            max_disk: self.max_disk,
413            on_exit: self.on_exit.clone(),
414            on_error: self.on_error.clone(),
415            fs_mount: self.fs_mount.clone(),
416            chroot: self.chroot.clone(),
417            clean_env: self.clean_env,
418            env: self.env.clone(),
419            gpu_devices: self.gpu_devices.clone(),
420            cpu_cores: self.cpu_cores.clone(),
421            num_cpus: self.num_cpus,
422            port_remap: self.port_remap,
423            no_supervisor: self.no_supervisor,
424            uid: self.uid,
425            policy_fn: self.policy_fn.clone(),
426            name: self.name.clone(),
427            // init_fn (FnOnce) cannot be cloned — the clone gets None.
428            // If the clone also needs an init function, set it explicitly.
429            init_fn: None,
430            // work_fn is Arc-wrapped — clone bumps the reference count.
431            work_fn: self.work_fn.clone(),
432            // Runtime is NOT cloned — the clone starts with no runtime.
433            runtime: None,
434        }
435    }
436}
437
438impl Sandbox {
439    pub fn builder() -> SandboxBuilder {
440        SandboxBuilder::default()
441    }
442
443    /// Returns true iff the policy grants the `sysv_ipc` syscall group.
444    pub fn allows_sysv_ipc(&self) -> bool {
445        self.extra_allow_syscalls.iter().any(|s| s == "sysv_ipc")
446    }
447
448    /// Validate cross-section invariants — checks that span multiple fields.
449    ///
450    /// Currently a no-op; retained as an extension point and for API
451    /// stability. Idempotent: calling repeatedly is safe.
452    pub fn validate(&self) -> Result<(), SandboxError> {
453        Ok(())
454    }
455
456    /// Resolve the per-protection state against the host's current
457    /// Landlock ABI. Returns one entry per `Protection`. Useful for
458    /// post-`build()` posture inspection.
459    pub fn active_protections(&self) -> Result<Vec<(Protection, ProtectionStatus)>, crate::error::SandlockError> {
460        let host_abi = crate::landlock::abi_version().map_err(|e| {
461            crate::error::SandlockError::Runtime(crate::error::SandboxRuntimeError::Confinement(e))
462        })?;
463        Ok(Protection::all()
464            .map(|p| (p, ProtectionStatus::resolve(p, host_abi, &self.protection_policy)))
465            .collect())
466    }
467
468    // ================================================================
469    // Runtime accessor helpers (private)
470    // ================================================================
471
472    fn rt(&self) -> &Runtime {
473        self.runtime.as_ref().expect("sandbox not started")
474    }
475
476    fn rt_mut(&mut self) -> &mut Runtime {
477        self.runtime.as_mut().expect("sandbox not started")
478    }
479
480    // ================================================================
481    // Runtime lifecycle API (public)
482    // ================================================================
483
484    /// Set the sandbox instance name (also exposed as the virtual hostname).
485    /// Auto-generated if not set.
486    pub fn set_name(&mut self, name: impl Into<String>) {
487        self.name = Some(name.into());
488    }
489
490    /// Set the sandbox instance name and return `self`. Convenience for
491    /// pipeline fan-out where a base config is cloned and each clone gets a
492    /// fresh name:
493    ///
494    /// ```ignore
495    /// let template = Sandbox::builder()...build()?;
496    /// let mut s1 = template.clone().with_name("worker-1");
497    /// let mut s2 = template.clone().with_name("worker-2");
498    /// ```
499    pub fn with_name(mut self, name: impl Into<String>) -> Self {
500        self.name = Some(name.into());
501        self
502    }
503
504    /// Set the COW-fork init function and return `self`.
505    ///
506    /// The init function runs once in the child process before any COW clones
507    /// are created. Use it to load expensive shared state.
508    pub fn with_init_fn(mut self, f: impl FnOnce() + Send + 'static) -> Self {
509        self.init_fn = Some(Box::new(f));
510        self
511    }
512
513    /// Set the COW-fork work function and return `self`.
514    ///
515    /// The work function runs in each COW clone (`fork(N)` produces N clones).
516    pub fn with_work_fn(mut self, f: impl Fn(u32) + Send + Sync + 'static) -> Self {
517        self.work_fn = Some(Arc::new(f));
518        self
519    }
520
521    /// Return the sandbox name if set, or `None` if not yet started.
522    pub fn instance_name(&self) -> Option<&str> {
523        self.runtime.as_ref().map(|r| r.name.as_str())
524            .or_else(|| self.name.as_deref())
525    }
526
527    /// Return the child PID if spawned.
528    pub fn pid(&self) -> Option<i32> {
529        self.runtime.as_ref().and_then(|r| r.child_pid)
530    }
531
532    /// Return whether the child is currently running or paused.
533    pub fn is_running(&self) -> bool {
534        self.runtime.as_ref().map(|r| {
535            matches!(r.state, RuntimeState::Running | RuntimeState::Paused)
536        }).unwrap_or(false)
537    }
538
539    /// Send SIGSTOP to the child's process group.
540    pub fn pause(&mut self) -> Result<(), crate::error::SandlockError> {
541        use crate::error::SandboxRuntimeError;
542        let pid = self.runtime.as_ref()
543            .and_then(|rt| rt.child_pid)
544            .ok_or(SandboxRuntimeError::NotRunning)?;
545        let ret = unsafe { libc::killpg(pid, libc::SIGSTOP) };
546        if ret < 0 {
547            return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
548        }
549        self.rt_mut().state = RuntimeState::Paused;
550        Ok(())
551    }
552
553    /// Send SIGCONT to the child's process group.
554    pub fn resume(&mut self) -> Result<(), crate::error::SandlockError> {
555        use crate::error::SandboxRuntimeError;
556        let pid = self.runtime.as_ref()
557            .and_then(|rt| rt.child_pid)
558            .ok_or(SandboxRuntimeError::NotRunning)?;
559        let ret = unsafe { libc::killpg(pid, libc::SIGCONT) };
560        if ret < 0 {
561            return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
562        }
563        self.rt_mut().state = RuntimeState::Running;
564        Ok(())
565    }
566
567    /// Send SIGKILL to the child's process group.
568    pub fn kill(&mut self) -> Result<(), crate::error::SandlockError> {
569        use crate::error::SandboxRuntimeError;
570        let pid = self.runtime.as_ref()
571            .and_then(|rt| rt.child_pid)
572            .ok_or(SandboxRuntimeError::NotRunning)?;
573        let ret = unsafe { libc::killpg(pid, libc::SIGKILL) };
574        if ret < 0 {
575            let err = std::io::Error::last_os_error();
576            if err.raw_os_error() != Some(libc::ESRCH) {
577                return Err(SandboxRuntimeError::Io(err).into());
578            }
579        }
580        Ok(())
581    }
582
583    /// Set a callback invoked whenever a port bind is recorded.
584    pub fn set_on_bind(&mut self, cb: impl Fn(&HashMap<u16, u16>) + Send + Sync + 'static) {
585        // Ensure runtime exists so we have somewhere to store the callback.
586        // In practice, set_on_bind is always called before spawn.
587        let _ = self.ensure_runtime();
588        self.rt_mut().on_bind = Some(Box::new(cb));
589    }
590
591    /// Return the current virtual-to-real port mappings.
592    pub async fn port_mappings(&self) -> HashMap<u16, u16> {
593        if let Some(ref rt) = self.runtime {
594            if let Some(ref net) = rt.supervisor_network {
595                let ns = net.lock().await;
596                return ns.port_map.virtual_to_real.clone();
597            }
598        }
599        HashMap::new()
600    }
601
602    /// Wait for the child process to exit.
603    pub async fn wait(&mut self) -> Result<crate::result::RunResult, crate::error::SandlockError> {
604        use crate::error::SandboxRuntimeError;
605        use crate::result::{ExitStatus, RunResult};
606
607        let pid = self.rt().child_pid.ok_or(SandboxRuntimeError::NotRunning)?;
608
609        if let RuntimeState::Stopped(ref es) = self.rt().state {
610            return Ok(RunResult {
611                exit_status: es.clone(),
612                stdout: None,
613                stderr: None,
614            });
615        }
616
617        let exit_status = tokio::task::spawn_blocking(move || -> ExitStatus {
618            let mut status: i32 = 0;
619            loop {
620                let ret = unsafe { libc::waitpid(pid, &mut status, 0) };
621                if ret < 0 {
622                    let err = std::io::Error::last_os_error();
623                    if err.raw_os_error() == Some(libc::EINTR) {
624                        continue;
625                    }
626                    return ExitStatus::Killed;
627                }
628                break;
629            }
630            sandbox_wait_status_to_exit(status)
631        })
632        .await
633        .unwrap_or(ExitStatus::Killed);
634
635        self.rt_mut().state = RuntimeState::Stopped(exit_status.clone());
636
637        let rt = self.rt_mut();
638        if let Some(h) = rt.notif_handle.take() { h.abort(); }
639        if let Some(h) = rt.throttle_handle.take() { h.abort(); }
640        if let Some(h) = rt.loadavg_handle.take() { h.abort(); }
641
642        if let Some(ref cow_state) = self.rt().supervisor_cow.clone() {
643            let mut cow = cow_state.lock().await;
644            self.rt_mut().seccomp_cow = cow.branch.take();
645        }
646
647        let stdout = self.rt_mut()._stdout_read.take().map(sandbox_read_fd_to_end);
648        let stderr = self.rt_mut()._stderr_read.take().map(sandbox_read_fd_to_end);
649
650        Ok(RunResult { exit_status, stdout, stderr })
651    }
652
653    /// Fork the sandboxed child and install policy (seccomp + notif
654    /// supervisor + rlimits + landlock + COW + network/HTTP proxies).
655    /// The child is parked between policy install and `execve`; call
656    /// `start()` to release it. Stdout/stderr are captured for later
657    /// retrieval via `wait()`.
658    pub async fn create(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
659        self.do_create(cmd, true).await
660    }
661
662    /// Like `create` but inherits stdio (no capture).
663    pub async fn create_interactive(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
664        self.do_create(cmd, false).await
665    }
666
667    /// Release a previously `create()`d child to `execve` the configured
668    /// command. Returns immediately; use `wait()` to collect the exit
669    /// status when the child finishes.
670    pub fn start(&mut self) -> Result<(), crate::error::SandlockError> {
671        self.do_start()
672    }
673
674    /// Sugar for `create()` + `start()` that also blocks until the child
675    /// has completed `execve()` and is executing user code. After this
676    /// returns, operations that read user-code state (e.g. `checkpoint()`,
677    /// `/proc/<pid>/exe`) observe the requested binary rather than the
678    /// supervisor.
679    pub async fn spawn(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
680        self.create(cmd).await?;
681        self.start()?;
682        self.wait_until_exec().await
683    }
684
685    /// Like `spawn` but inherits stdio (no capture).
686    pub async fn spawn_interactive(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
687        self.create_interactive(cmd).await?;
688        self.start()?;
689        self.wait_until_exec().await
690    }
691
692    /// Wait for the child to finish `execve`. Detected by `/proc/<pid>/exe`
693    /// no longer matching `/proc/self/exe` (before execve the child still
694    /// shares the supervisor's binary). The kernel offers no direct event
695    /// for execve completion, so this polls every 1ms with a 5s ceiling.
696    async fn wait_until_exec(&self) -> Result<(), crate::error::SandlockError> {
697        use crate::error::SandboxRuntimeError;
698        let pid = self.pid().ok_or(SandboxRuntimeError::NotRunning)?;
699        let Some(our_exe) = std::fs::read_link("/proc/self/exe").ok() else {
700            return Ok(());
701        };
702        let child_link = format!("/proc/{}/exe", pid);
703        let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
704        loop {
705            if let Ok(child_exe) = std::fs::read_link(&child_link) {
706                if child_exe != our_exe {
707                    return Ok(());
708                }
709            }
710            if std::time::Instant::now() >= deadline {
711                return Err(SandboxRuntimeError::Child(
712                    "child did not exec() within 5s".into(),
713                ).into());
714            }
715            tokio::time::sleep(std::time::Duration::from_millis(1)).await;
716        }
717    }
718
719    /// Create with explicit stdin/stdout/stderr fd redirection. Child is
720    /// parked after policy install; call `start()` to release.
721    #[doc(hidden)]
722    pub async fn create_with_io(
723        &mut self,
724        cmd: &[&str],
725        stdin_fd: Option<std::os::unix::io::RawFd>,
726        stdout_fd: Option<std::os::unix::io::RawFd>,
727        stderr_fd: Option<std::os::unix::io::RawFd>,
728    ) -> Result<(), crate::error::SandlockError> {
729        self.ensure_runtime()?;
730        self.rt_mut().io_overrides = Some((stdin_fd, stdout_fd, stderr_fd));
731        self.do_create(cmd, false).await
732    }
733
734    /// Like `create_with_io` but also maps extra fds into the child.
735    #[doc(hidden)]
736    pub async fn create_with_gather_io(
737        &mut self,
738        cmd: &[&str],
739        stdin_fd: Option<std::os::unix::io::RawFd>,
740        stdout_fd: Option<std::os::unix::io::RawFd>,
741        stderr_fd: Option<std::os::unix::io::RawFd>,
742        extra_fds: Vec<(i32, i32)>,
743    ) -> Result<(), crate::error::SandlockError> {
744        self.ensure_runtime()?;
745        self.rt_mut().io_overrides = Some((stdin_fd, stdout_fd, stderr_fd));
746        self.rt_mut().extra_fds = extra_fds;
747        self.do_create(cmd, false).await
748    }
749
750    /// Freeze the sandbox: hold fork notifications + SIGSTOP the process group.
751    pub(crate) async fn freeze(&self) -> Result<(), crate::error::SandlockError> {
752        use crate::error::{SandboxRuntimeError, SandlockError};
753        let rt = self.runtime.as_ref().ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
754        let pid = rt.child_pid.ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
755        if let Some(ref resource) = rt.supervisor_resource {
756            let mut rs = resource.lock().await;
757            rs.hold_forks = true;
758        }
759        unsafe { libc::killpg(pid, libc::SIGSTOP); }
760        Ok(())
761    }
762
763    /// Thaw the sandbox: release held fork notifications + SIGCONT.
764    pub(crate) async fn thaw(&self) -> Result<(), crate::error::SandlockError> {
765        use crate::error::{SandboxRuntimeError, SandlockError};
766        let rt = self.runtime.as_ref().ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
767        let pid = rt.child_pid.ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
768        if let Some(ref resource) = rt.supervisor_resource {
769            let mut rs = resource.lock().await;
770            rs.hold_forks = false;
771            rs.held_notif_ids.clear();
772        }
773        unsafe { libc::killpg(pid, libc::SIGCONT); }
774        Ok(())
775    }
776
777    /// Capture a checkpoint of the running sandbox.
778    pub async fn checkpoint(&self) -> Result<crate::checkpoint::Checkpoint, crate::error::SandlockError> {
779        use crate::error::{SandboxRuntimeError, SandlockError};
780        let pid = self.runtime.as_ref()
781            .and_then(|rt| rt.child_pid)
782            .ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
783        self.freeze().await?;
784        let cp = crate::checkpoint::capture(pid, self);
785        self.thaw().await?;
786        cp
787    }
788
789    // ================================================================
790    // One-shot / lifecycle instance API
791    // ================================================================
792
793    /// One-shot: spawn, wait, and return the result. Stdout and stderr are
794    /// captured. This is the primary way to run a sandboxed command:
795    ///
796    /// ```ignore
797    /// let mut sandbox = Sandbox::builder()
798    ///     .fs_read("/usr")
799    ///     .name("my-sandbox")
800    ///     .build()?;
801    /// let result = sandbox.run(&["echo", "hello"]).await?;
802    /// ```
803    pub async fn run(
804        &mut self,
805        cmd: &[&str],
806    ) -> Result<crate::result::RunResult, crate::error::SandlockError> {
807        self.do_create(cmd, true).await?;
808        self.do_start()?;
809        self.wait().await
810    }
811
812    /// Run with inherited stdio (interactive mode).
813    pub async fn run_interactive(
814        &mut self,
815        cmd: &[&str],
816    ) -> Result<crate::result::RunResult, crate::error::SandlockError> {
817        self.do_create(cmd, false).await?;
818        self.do_start()?;
819        self.wait().await
820    }
821
822    /// One-shot run with user-supplied syscall handlers.
823    pub async fn run_with_handlers<I, S, H>(
824        &mut self,
825        cmd: &[&str],
826        handlers: I,
827    ) -> Result<crate::result::RunResult, crate::error::SandlockError>
828    where
829        I: IntoIterator<Item = (S, H)>,
830        S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
831        H: crate::seccomp::dispatch::Handler,
832    {
833        let pending = sandbox_collect_handlers(handlers, self)?;
834        self.ensure_runtime()?;
835        self.rt_mut().handlers = pending;
836        self.do_create(cmd, true).await?;
837        self.do_start()?;
838        self.wait().await
839    }
840
841    /// Interactive-stdio counterpart of `run_with_handlers`.
842    pub async fn run_interactive_with_handlers<I, S, H>(
843        &mut self,
844        cmd: &[&str],
845        handlers: I,
846    ) -> Result<crate::result::RunResult, crate::error::SandlockError>
847    where
848        I: IntoIterator<Item = (S, H)>,
849        S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
850        H: crate::seccomp::dispatch::Handler,
851    {
852        let pending = sandbox_collect_handlers(handlers, self)?;
853        self.ensure_runtime()?;
854        self.rt_mut().handlers = pending;
855        self.do_create(cmd, false).await?;
856        self.do_start()?;
857        self.wait().await
858    }
859
860    /// Dry-run: create, start, wait, collect filesystem changes, then abort.
861    pub async fn dry_run(
862        &mut self,
863        cmd: &[&str],
864    ) -> Result<crate::dry_run::DryRunResult, crate::error::SandlockError> {
865        self.on_exit = BranchAction::Keep;
866        self.on_error = BranchAction::Keep;
867        self.do_create(cmd, true).await?;
868        self.do_start()?;
869        let run_result = self.wait().await?;
870        let changes = self.collect_changes().await;
871        self.do_abort().await;
872        Ok(crate::dry_run::DryRunResult { run_result, changes })
873    }
874
875    /// Dry-run with inherited stdio.
876    pub async fn dry_run_interactive(
877        &mut self,
878        cmd: &[&str],
879    ) -> Result<crate::dry_run::DryRunResult, crate::error::SandlockError> {
880        self.on_exit = BranchAction::Keep;
881        self.on_error = BranchAction::Keep;
882        self.do_create(cmd, false).await?;
883        self.do_start()?;
884        let run_result = self.wait().await?;
885        let changes = self.collect_changes().await;
886        self.do_abort().await;
887        Ok(crate::dry_run::DryRunResult { run_result, changes })
888    }
889
890    /// Create N COW clones of this sandbox.
891    ///
892    /// `fork()` requires `init_fn` and `work_fn` to be set on the sandbox (via
893    /// `SandboxBuilder::init_fn` / `work_fn`, or `Sandbox::with_init_fn` /
894    /// `with_work_fn`). Returns an error if either is missing.
895    pub async fn fork(&mut self, n: u32) -> Result<Vec<Sandbox>, crate::error::SandlockError> {
896        use crate::error::SandboxRuntimeError;
897        use std::os::fd::{FromRawFd, OwnedFd};
898
899        // Pull init_fn / work_fn directly from self (they live on Sandbox, not
900        // Runtime, so ensure_runtime hasn't consumed them yet).
901        let init_fn = self.init_fn.take()
902            .ok_or_else(|| SandboxRuntimeError::Child("fork() requires init_fn and work_fn — use SandboxBuilder::init_fn() / work_fn() or Sandbox::with_init_fn() / with_work_fn()".into()))?;
903        let work_fn = self.work_fn.take()
904            .ok_or_else(|| SandboxRuntimeError::Child("fork() requires init_fn and work_fn — use SandboxBuilder::init_fn() / work_fn() or Sandbox::with_init_fn() / with_work_fn()".into()))?;
905
906        // Initialize the runtime block so we can record child PID / state below.
907        self.ensure_runtime()?;
908
909        let sandbox_cfg = self.clone(); // config only, no runtime
910
911        let mut ctrl_fds = [0i32; 2];
912        if unsafe { libc::pipe2(ctrl_fds.as_mut_ptr(), 0) } < 0 {
913            return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
914        }
915        let ctrl_parent = unsafe { OwnedFd::from_raw_fd(ctrl_fds[0]) };
916        let ctrl_child_fd = ctrl_fds[1];
917
918        let mut pipe_read_ends: Vec<OwnedFd> = Vec::with_capacity(n as usize);
919        let mut pipe_write_fds: Vec<i32> = Vec::with_capacity(n as usize);
920        for _ in 0..n {
921            let mut pfds = [0i32; 2];
922            if unsafe { libc::pipe(pfds.as_mut_ptr()) } >= 0 {
923                pipe_read_ends.push(unsafe { OwnedFd::from_raw_fd(pfds[0]) });
924                pipe_write_fds.push(pfds[1]);
925            } else {
926                pipe_write_fds.push(-1);
927            }
928        }
929
930        let pid = unsafe { libc::fork() };
931        if pid < 0 {
932            unsafe { libc::close(ctrl_child_fd) };
933            return Err(SandboxRuntimeError::Fork(std::io::Error::last_os_error()).into());
934        }
935
936        if pid == 0 {
937            drop(ctrl_parent);
938            unsafe { libc::setpgid(0, 0) };
939            unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL) };
940            unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
941
942            let _ = crate::landlock::confine(&sandbox_cfg);
943
944            let deny = crate::context::blocklist_syscall_numbers(&sandbox_cfg);
945            let args = crate::context::arg_filters(&sandbox_cfg);
946            let filter = match crate::seccomp::bpf::assemble_filter(&[], &deny, &args) {
947                Ok(f) => f,
948                Err(_) => unsafe { libc::_exit(1) },
949            };
950            let _ = crate::seccomp::bpf::install_deny_filter(&filter);
951
952            init_fn();
953
954            drop(pipe_read_ends);
955            crate::fork::fork_ready_loop_fn(ctrl_child_fd, n, &*work_fn, &pipe_write_fds);
956            unsafe { libc::_exit(0) };
957        }
958
959        unsafe { libc::close(ctrl_child_fd) };
960        for wfd in &pipe_write_fds {
961            if *wfd >= 0 { unsafe { libc::close(*wfd) }; }
962        }
963        self.rt_mut().child_pid = Some(pid);
964        self.rt_mut().state = RuntimeState::Running;
965
966        let ctrl_fd = ctrl_parent.as_raw_fd();
967        let mut pid_buf = vec![0u8; n as usize * 4];
968        sandbox_read_exact(ctrl_fd, &mut pid_buf);
969
970        let clone_pids: Vec<i32> = pid_buf.chunks(4)
971            .map(|c| u32::from_be_bytes(c.try_into().unwrap_or([0; 4])) as i32)
972            .collect();
973        let live_count = clone_pids.iter().filter(|&&p| p > 0).count();
974
975        let mut code_buf = vec![0u8; live_count * 4];
976        sandbox_read_exact(ctrl_fd, &mut code_buf);
977        self.rt_mut().ctrl_fd = Some(ctrl_parent);
978
979        let mut status = 0i32;
980        unsafe { libc::waitpid(pid, &mut status, 0) };
981
982        let mut code_idx = 0;
983        let mut clones = Vec::with_capacity(live_count);
984        let mut pipe_iter = pipe_read_ends.into_iter();
985
986        let rt_name = self.rt().name.clone();
987        for &clone_pid in &clone_pids {
988            let pipe = pipe_iter.next();
989            if clone_pid <= 0 { continue; }
990
991            let code = i32::from_be_bytes(
992                code_buf[code_idx * 4..(code_idx + 1) * 4].try_into().unwrap_or([0; 4])
993            );
994            code_idx += 1;
995
996            let mut clone_sb = sandbox_cfg.clone();
997            let clone_name = format!("{}-fork-{}", rt_name, clone_pid);
998            clone_sb.runtime = Some(Box::new(Runtime {
999                name: clone_name,
1000                state: RuntimeState::Stopped(if code == 0 {
1001                    crate::result::ExitStatus::Code(0)
1002                } else if code > 0 {
1003                    crate::result::ExitStatus::Code(code)
1004                } else {
1005                    crate::result::ExitStatus::Killed
1006                }),
1007                child_pid: Some(clone_pid),
1008                pidfd: None,
1009                notif_handle: None,
1010                throttle_handle: None,
1011                loadavg_handle: None,
1012                _stdout_read: None,
1013                _stderr_read: None,
1014                seccomp_cow: None,
1015                supervisor_resource: None,
1016                supervisor_cow: None,
1017                supervisor_network: None,
1018                ctrl_fd: None,
1019                stdout_pipe: pipe,
1020                io_overrides: None,
1021                extra_fds: Vec::new(),
1022                http_acl_handle: None,
1023                on_bind: None,
1024                handlers: Vec::new(),
1025                ready_w: None,
1026            }));
1027            clones.push(clone_sb);
1028        }
1029
1030        Ok(clones)
1031    }
1032
1033    /// Reduce: wait for all clones, then run a reducer command.
1034    pub async fn reduce(
1035        &self,
1036        cmd: &[&str],
1037        clones: &mut [Sandbox],
1038    ) -> Result<crate::result::RunResult, crate::error::SandlockError> {
1039        use crate::error::SandboxRuntimeError;
1040
1041        let mut combined = Vec::new();
1042        for clone in clones.iter_mut() {
1043            if let Some(ref mut rt) = clone.runtime {
1044                if let Some(pipe) = rt.stdout_pipe.take() {
1045                    combined.extend_from_slice(&sandbox_read_fd_to_end(pipe));
1046                }
1047            }
1048        }
1049
1050        let mut stdin_fds = [0i32; 2];
1051        if unsafe { libc::pipe2(stdin_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
1052            return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
1053        }
1054
1055        let write_fd = stdin_fds[1];
1056        let write_handle = tokio::task::spawn_blocking(move || {
1057            unsafe {
1058                libc::write(write_fd, combined.as_ptr() as *const _, combined.len());
1059                libc::close(write_fd);
1060            }
1061        });
1062
1063        let base_name = self.instance_name()
1064            .unwrap_or("sandbox")
1065            .to_owned();
1066        let reducer_name = base_name + "-reduce";
1067        let mut reducer = self.clone().with_name(reducer_name);
1068        reducer.ensure_runtime()?;
1069        reducer.rt_mut().io_overrides = Some((Some(stdin_fds[0]), None, None));
1070        reducer.do_create(cmd, true).await?;
1071        reducer.do_start()?;
1072        unsafe { libc::close(stdin_fds[0]) };
1073
1074        let _ = write_handle.await;
1075        reducer.wait().await
1076    }
1077
1078    /// Lazily initialize the runtime block.
1079    ///
1080    /// Called by lifecycle methods (`spawn`, `run`, `fork`, etc.) on first
1081    /// use. Validates and resolves the sandbox name. Idempotent: returns
1082    /// immediately if runtime is already set.
1083    fn ensure_runtime(&mut self) -> Result<(), crate::error::SandlockError> {
1084        if self.runtime.is_some() {
1085            return Ok(());
1086        }
1087        let name = sandbox_resolve_name(self.name.as_deref())?;
1088        self.runtime = Some(Box::new(Runtime {
1089            name,
1090            state: RuntimeState::Created,
1091            child_pid: None,
1092            pidfd: None,
1093            notif_handle: None,
1094            throttle_handle: None,
1095            loadavg_handle: None,
1096            _stdout_read: None,
1097            _stderr_read: None,
1098            seccomp_cow: None,
1099            supervisor_resource: None,
1100            supervisor_cow: None,
1101            supervisor_network: None,
1102            ctrl_fd: None,
1103            stdout_pipe: None,
1104            io_overrides: None,
1105            extra_fds: Vec::new(),
1106            http_acl_handle: None,
1107            on_bind: None,
1108            handlers: Vec::new(),
1109            ready_w: None,
1110        }));
1111        Ok(())
1112    }
1113
1114    // ================================================================
1115    // Internal: collect_changes / do_abort
1116    // ================================================================
1117
1118    async fn collect_changes(&self) -> Vec<crate::dry_run::Change> {
1119        if let Some(ref rt) = self.runtime {
1120            if let Some(ref cow) = rt.seccomp_cow {
1121                return cow.changes().unwrap_or_default();
1122            }
1123        }
1124        Vec::new()
1125    }
1126
1127    async fn do_abort(&mut self) {
1128        if let Some(ref mut rt) = self.runtime {
1129            if let Some(ref mut cow) = rt.seccomp_cow {
1130                let _ = cow.abort();
1131            }
1132        }
1133    }
1134
1135    // ================================================================
1136    // Internal: do_create (fork + policy install; child parks at the
1137    // ready_r read, awaiting do_start to release it to execve).
1138    // ================================================================
1139
1140    async fn do_create(&mut self, cmd: &[&str], capture: bool) -> Result<(), crate::error::SandlockError> {
1141        use std::ffi::CString;
1142        use std::os::fd::{AsRawFd, FromRawFd, OwnedFd};
1143        use crate::error::SandboxRuntimeError;
1144        use crate::context::{PipePair, read_u32_fd};
1145        use crate::network;
1146        use crate::seccomp::ctx::SupervisorCtx;
1147        use crate::seccomp::notif::{self, NotifPolicy};
1148        use crate::seccomp::state::{ChrootState, CowState, NetworkState, PolicyFnState, ProcfsState, ResourceState, TimeRandomState};
1149        use crate::sys::syscall;
1150        use std::time::Duration;
1151
1152        self.ensure_runtime()?;
1153
1154        if !matches!(self.rt().state, RuntimeState::Created) {
1155            return Err(SandboxRuntimeError::Child("sandbox already spawned".into()).into());
1156        }
1157
1158        if cmd.is_empty() {
1159            return Err(SandboxRuntimeError::Child("empty command".into()).into());
1160        }
1161
1162        // Resolve the chroot root eagerly, before any fork or confinement work:
1163        // a configured-but-missing chroot must be a hard error, never a silent
1164        // drop to "no confinement".
1165        let chroot_root = crate::chroot::resolve::resolve_chroot_root(self.chroot.as_deref())?;
1166
1167        // Each --http-inject-ca target must exist in the sandbox's view, or the
1168        // CA cannot be spliced into it and TLS interception silently fails. A
1169        // configured-but-missing trust bundle is a hard error, resolved through
1170        // --fs-mount and chroot so the check matches the workload's view.
1171        if !self.http_inject_ca.is_empty() {
1172            let mounts = crate::chroot::resolve::resolve_chroot_mounts(&self.fs_mount);
1173            for p in &self.http_inject_ca {
1174                let host = resolve_sandbox_path_to_host(p, chroot_root.as_deref(), &mounts);
1175                if !host.exists() {
1176                    return Err(SandboxRuntimeError::Child(format!(
1177                        "--http-inject-ca {:?} not found in the sandbox view (resolved to {:?}); \
1178                         the CA cannot be injected into it. Point it at the trust bundle the \
1179                         workload actually reads (e.g. /etc/ssl/certs/ca-certificates.crt, or \
1180                         certifi's cacert.pem).",
1181                        p, host
1182                    ))
1183                    .into());
1184                }
1185            }
1186        }
1187
1188        let c_cmd: Vec<CString> = cmd
1189            .iter()
1190            .map(|s| CString::new(*s).map_err(|_| SandboxRuntimeError::Child("invalid command string".into())))
1191            .collect::<Result<Vec<_>, _>>()?;
1192
1193        let no_supervisor = self.no_supervisor;
1194
1195        let pipes = PipePair::new().map_err(SandboxRuntimeError::Io)?;
1196
1197        let resolved_net_allow = network::resolve_net_allow(&self.net_allow)
1198            .await
1199            .map_err(SandboxRuntimeError::Io)?;
1200        // In chroot/image mode, seed the synthetic /etc/hosts from the
1201        // rootfs's own file so entries baked into the image (private
1202        // registries, internal hostnames, etc.) survive virtualization.
1203        // Without a chroot, the helper returns the fixed loopback base.
1204        // Either way, concrete-host rules from `net_allow` are appended
1205        // on top.
1206        let virtual_etc_hosts = network::compose_virtual_etc_hosts(
1207            self.chroot.as_deref(),
1208            &resolved_net_allow.concrete_host_entries,
1209        );
1210
1211        let mut ca_inject_pem: Option<std::sync::Arc<Vec<u8>>> = None;
1212        if !self.http_allow.is_empty() || !self.http_deny.is_empty() {
1213            // Generate an ephemeral CA when injection is requested without BYO.
1214            let generate = !self.http_inject_ca.is_empty();
1215            let ca_material = crate::transparent_proxy::resolve_ca(
1216                self.http_ca.as_deref(),
1217                self.http_key.as_deref(),
1218                generate,
1219            )
1220            .map_err(SandboxRuntimeError::Io)?;
1221
1222            // Export the public cert if requested.
1223            if let (Some(out), Some(cm)) = (self.http_ca_out.as_deref(), ca_material.as_ref()) {
1224                std::fs::write(out, cm.cert_pem.as_bytes()).map_err(SandboxRuntimeError::Io)?;
1225            }
1226
1227            // Keep the public cert for trust injection (only when paths declared).
1228            if !self.http_inject_ca.is_empty() {
1229                if let Some(cm) = ca_material.as_ref() {
1230                    ca_inject_pem = Some(std::sync::Arc::new(cm.cert_pem.clone().into_bytes()));
1231                }
1232            }
1233
1234            let (cert_pem, key_pem) = match ca_material.as_ref() {
1235                Some(cm) => (Some(cm.cert_pem.as_str()), Some(cm.key_pem.as_str())),
1236                None => (None, None),
1237            };
1238
1239            let handle = crate::transparent_proxy::spawn_transparent_proxy(
1240                self.http_allow.clone(),
1241                self.http_deny.clone(),
1242                cert_pem,
1243                key_pem,
1244            )
1245            .await
1246            .map_err(SandboxRuntimeError::Io)?;
1247            self.rt_mut().http_acl_handle = Some(handle);
1248        }
1249
1250        // Seccomp COW: create the branch before fork so the child's Landlock
1251        // ruleset can include the upper layer. Binaries created inside the
1252        // workdir live in the upper dir, and Landlock checks EXECUTE on the
1253        // file's real path at execve time — so the upper dir must be granted
1254        // read+execute (READ_ACCESS) or `./created-binary` fails with EACCES.
1255        let seccomp_cow_branch = if !no_supervisor && self.workdir.is_some() {
1256            let workdir = self.workdir.as_ref().unwrap().clone();
1257            let storage = self.fs_storage.clone();
1258            let max_disk = self.max_disk.map(|b| b.0).unwrap_or(0);
1259            match crate::cow::seccomp::SeccompCowBranch::create(&workdir, storage.as_deref(), max_disk) {
1260                Ok(branch) => {
1261                    self.fs_readable.push(branch.upper_dir().to_path_buf());
1262                    Some(branch)
1263                }
1264                Err(e) => {
1265                    eprintln!("sandlock: seccomp COW branch creation failed: {}", e);
1266                    None
1267                }
1268            }
1269        } else {
1270            None
1271        };
1272
1273        let (stdout_r, stderr_r) = if capture {
1274            let mut stdout_fds = [0i32; 2];
1275            let mut stderr_fds = [0i32; 2];
1276            if unsafe { libc::pipe2(stdout_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
1277                return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
1278            }
1279            if unsafe { libc::pipe2(stderr_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
1280                unsafe {
1281                    libc::close(stdout_fds[0]);
1282                    libc::close(stdout_fds[1]);
1283                }
1284                return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
1285            }
1286            (
1287                Some((
1288                    unsafe { OwnedFd::from_raw_fd(stdout_fds[0]) },
1289                    unsafe { OwnedFd::from_raw_fd(stdout_fds[1]) },
1290                )),
1291                Some((
1292                    unsafe { OwnedFd::from_raw_fd(stderr_fds[0]) },
1293                    unsafe { OwnedFd::from_raw_fd(stderr_fds[1]) },
1294                )),
1295            )
1296        } else {
1297            (None, None)
1298        };
1299
1300        // Capture our PID before fork so the child can detect parent death
1301        // without assuming PID 1 is always init (wrong in containers).
1302        let parent_pid = unsafe { libc::getpid() };
1303
1304        let pid = unsafe { libc::fork() };
1305        if pid < 0 {
1306            return Err(SandboxRuntimeError::Fork(std::io::Error::last_os_error()).into());
1307        }
1308
1309        if pid == 0 {
1310            // ===== CHILD PROCESS =====
1311            let io_overrides = self.rt().io_overrides;
1312            if let Some((stdin_fd, stdout_fd, stderr_fd)) = io_overrides {
1313                if let Some(fd) = stdin_fd { unsafe { libc::dup2(fd, 0) }; }
1314                if let Some(fd) = stdout_fd { unsafe { libc::dup2(fd, 1) }; }
1315                if let Some(fd) = stderr_fd { unsafe { libc::dup2(fd, 2) }; }
1316            }
1317
1318            let extra_fds_copy = self.rt().extra_fds.clone();
1319            for &(target_fd, source_fd) in &extra_fds_copy {
1320                unsafe { libc::dup2(source_fd, target_fd) };
1321            }
1322
1323            if let Some((_, ref stdout_w)) = stdout_r {
1324                unsafe { libc::dup2(stdout_w.as_raw_fd(), 1) };
1325            }
1326            if let Some((_, ref stderr_w)) = stderr_r {
1327                unsafe { libc::dup2(stderr_w.as_raw_fd(), 2) };
1328            }
1329            drop(stdout_r);
1330            drop(stderr_r);
1331
1332            let gather_keep_fds: Vec<i32> = extra_fds_copy.iter().map(|&(target, _)| target).collect();
1333
1334            let extra_syscalls: Vec<u32> = self.rt().handlers
1335                .iter()
1336                .map(|h| h.0 as u32)
1337                .collect();
1338
1339            let sandbox_name = self.rt().name.clone();
1340            context::confine_child(context::ChildSpawnArgs {
1341                sandbox: self,
1342                cmd: &c_cmd,
1343                pipes: &pipes,
1344                no_supervisor,
1345                keep_fds: &gather_keep_fds,
1346                sandbox_name: Some(sandbox_name.as_str()),
1347                extra_syscalls: &extra_syscalls,
1348                parent_pid,
1349            });
1350        }
1351
1352        // ===== PARENT PROCESS =====
1353        drop(pipes.notif_w);
1354        drop(pipes.ready_r);
1355
1356        self.rt_mut()._stdout_read = stdout_r.map(|(r, _w)| r);
1357        self.rt_mut()._stderr_read = stderr_r.map(|(r, _w)| r);
1358
1359        self.rt_mut().child_pid = Some(pid);
1360        // State remains `Created` until `do_start` writes ready_w to release
1361        // the child to execve.
1362
1363        let pidfd = match syscall::pidfd_open(pid as u32, 0) {
1364            Ok(fd) => Some(fd),
1365            Err(_) => None,
1366        };
1367
1368        let notif_fd_num = read_u32_fd(pipes.notif_r.as_raw_fd())
1369            .map_err(|e| SandboxRuntimeError::Child(format!("read notif fd from child: {}", e)))?;
1370
1371        let is_nested_mode = notif_fd_num == 0;
1372
1373        let notif_fd = if is_nested_mode {
1374            None
1375        } else if let Some(ref pfd) = pidfd {
1376            Some(syscall::pidfd_getfd(pfd, notif_fd_num as i32, 0)
1377                .map_err(|e| SandboxRuntimeError::Child(format!("pidfd_getfd: {}", e)))?)
1378        } else {
1379            let path = format!("/proc/{}/fd/{}", pid, notif_fd_num);
1380            let cpath = CString::new(path).unwrap();
1381            let raw = unsafe { libc::open(cpath.as_ptr(), libc::O_RDWR) };
1382            if raw < 0 {
1383                return Err(SandboxRuntimeError::Child("failed to open notif fd from /proc".into()).into());
1384            }
1385            Some(unsafe { OwnedFd::from_raw_fd(raw) })
1386        };
1387
1388        if let Some(notif_fd) = notif_fd {
1389            if self.time_start.is_some() || self.random_seed.is_some() {
1390                let time_offset = self.time_start.map(|t| crate::time::calculate_time_offset(t));
1391                if let Err(e) = crate::vdso::patch(pid, time_offset, self.random_seed.is_some()) {
1392                    eprintln!("sandlock: pre-exec vDSO patching failed (will retry after exec): {}", e);
1393                }
1394            }
1395
1396            let time_offset_val = self.time_start
1397                .map(|t| crate::time::calculate_time_offset(t))
1398                .unwrap_or(0);
1399
1400            let rt_name = self.rt().name.clone();
1401            let notif_policy = NotifPolicy {
1402                max_memory_bytes: self.max_memory.map(|m| m.0).unwrap_or(0),
1403                max_processes: self.max_processes,
1404                has_memory_limit: self.max_memory.is_some(),
1405                has_net_allowlist: !self.net_allow.is_empty()
1406                    || !self.net_deny.is_empty()
1407                    || self.policy_fn.is_some()
1408                    || !self.http_allow.is_empty()
1409                    || !self.http_deny.is_empty(),
1410                has_bind_denylist: !self.net_deny_bind.is_empty(),
1411                has_random_seed: self.random_seed.is_some(),
1412                has_time_start: self.time_start.is_some(),
1413                argv_safety_required: self.policy_fn.is_some()
1414                    || self.rt().handlers.iter().any(|h| {
1415                        h.0 == libc::SYS_execve || h.0 == libc::SYS_execveat
1416                    }),
1417                time_offset: time_offset_val,
1418                num_cpus: self.num_cpus,
1419                port_remap: self.port_remap,
1420                cow_enabled: self.workdir.is_some(),
1421                chroot_root: chroot_root.clone(),
1422                chroot_readable: self.fs_readable.clone(),
1423                chroot_writable: self.fs_writable.clone(),
1424                chroot_denied: self.fs_denied.clone(),
1425                chroot_mounts: crate::chroot::resolve::resolve_chroot_mounts(&self.fs_mount),
1426                deterministic_dirs: self.deterministic_dirs,
1427                virtual_hostname: Some(rt_name),
1428                has_http_acl: !self.http_allow.is_empty() || !self.http_deny.is_empty(),
1429                virtual_etc_hosts,
1430                ca_inject_paths: self.http_inject_ca.clone(),
1431                ca_inject_pem: ca_inject_pem.clone(),
1432            };
1433
1434            use rand::SeedableRng;
1435            use rand_chacha::ChaCha8Rng;
1436
1437            let random_state = self.random_seed.map(|seed| ChaCha8Rng::seed_from_u64(seed));
1438            let time_offset = self.time_start.map(|t| crate::time::calculate_time_offset(t));
1439
1440            let time_random_state = TimeRandomState::new(time_offset, random_state);
1441
1442            let mut net_state = NetworkState::new();
1443            if !self.net_deny.is_empty() {
1444                let resolved_deny = network::resolve_net_deny(&self.net_deny);
1445                net_state.tcp_policy = resolved_deny.tcp;
1446                net_state.udp_policy = resolved_deny.udp;
1447                net_state.icmp_policy = resolved_deny.icmp;
1448            } else {
1449                let no_rules = self.net_allow.is_empty();
1450                let policy_from = |resolved: &network::ResolvedNetAllow| {
1451                    if no_rules || resolved.any_ip_all_ports {
1452                        crate::seccomp::notif::NetworkPolicy::Unrestricted
1453                    } else {
1454                        use crate::seccomp::notif::PortAllow;
1455                        let per_ip = resolved
1456                            .per_ip
1457                            .iter()
1458                            .map(|(ip, ports)| {
1459                                let allow = if resolved.per_ip_all_ports.contains(ip) {
1460                                    PortAllow::Any
1461                                } else {
1462                                    PortAllow::Specific(ports.clone())
1463                                };
1464                                (*ip, allow)
1465                            })
1466                            .collect();
1467                        crate::seccomp::notif::NetworkPolicy::AllowList {
1468                            per_ip,
1469                            cidrs: resolved.cidrs.clone(),
1470                            any_ip_ports: resolved.any_ip_ports.clone(),
1471                        }
1472                    }
1473                };
1474                net_state.tcp_policy = policy_from(&resolved_net_allow.tcp);
1475                net_state.udp_policy = policy_from(&resolved_net_allow.udp);
1476                net_state.icmp_policy = policy_from(&resolved_net_allow.icmp);
1477            }
1478            net_state.http_acl_addr = self.rt().http_acl_handle.as_ref().map(|h| h.addr);
1479            net_state.http_acl_ports = self.http_ports.iter().copied().collect();
1480            net_state.http_acl_orig_dest = self.rt().http_acl_handle.as_ref().map(|h| h.orig_dest.clone());
1481            net_state.bind_deny_ports = self.net_deny_bind.iter().copied().collect();
1482            if let Some(cb) = self.rt_mut().on_bind.take() {
1483                net_state.port_map.on_bind = Some(cb);
1484            }
1485
1486            let procfs_state = ProcfsState::new();
1487
1488            let mut res_state = ResourceState::new(
1489                notif_policy.max_memory_bytes,
1490                notif_policy.max_processes,
1491            );
1492            res_state.proc_count = 1;
1493
1494            let mut cow_state = CowState::new();
1495            cow_state.branch = seccomp_cow_branch;
1496
1497            let mut policy_fn_state = PolicyFnState::new();
1498
1499            if let Ok(mut denied) = policy_fn_state.denied_paths.write() {
1500                for path in &self.fs_denied {
1501                    denied.insert(path.to_string_lossy().into_owned());
1502                }
1503            }
1504
1505            if let Some(ref callback) = self.policy_fn {
1506                let mut allowed_ips: std::collections::HashSet<std::net::IpAddr> =
1507                    std::collections::HashSet::new();
1508                for p in [&net_state.tcp_policy, &net_state.udp_policy, &net_state.icmp_policy] {
1509                    if let crate::seccomp::notif::NetworkPolicy::AllowList { per_ip, cidrs, .. } = p {
1510                        allowed_ips.extend(per_ip.keys().copied());
1511                        // IP literals resolve to single-host CIDRs (/32 or
1512                        // /128); surface them as concrete allowed IPs too.
1513                        for (net, _) in cidrs {
1514                            if net.is_single_host() {
1515                                allowed_ips.insert(net.addr);
1516                            }
1517                        }
1518                    }
1519                }
1520                let live = crate::policy_fn::LivePolicy {
1521                    allowed_ips,
1522                    max_memory_bytes: notif_policy.max_memory_bytes,
1523                    max_processes: notif_policy.max_processes,
1524                };
1525                let ceiling = live.clone();
1526                let live = std::sync::Arc::new(std::sync::RwLock::new(live));
1527                let denied_paths = policy_fn_state.denied_paths.clone();
1528                let pid_overrides = net_state.pid_ip_overrides.clone();
1529                policy_fn_state.live_policy = Some(live.clone());
1530                let tx = crate::policy_fn::spawn_policy_fn(
1531                    callback.clone(), live, ceiling, pid_overrides, denied_paths,
1532                );
1533                policy_fn_state.event_tx = Some(tx);
1534            }
1535
1536            let chroot_state = ChrootState::new();
1537
1538            let notif_raw_fd = notif_fd.as_raw_fd();
1539            let child_pidfd_raw = pidfd.as_ref().map(|pfd| pfd.as_raw_fd());
1540
1541            let res_state = Arc::new(tokio::sync::Mutex::new(res_state));
1542            self.rt_mut().supervisor_resource = Some(Arc::clone(&res_state));
1543
1544            let cow_state = Arc::new(tokio::sync::Mutex::new(cow_state));
1545            self.rt_mut().supervisor_cow = Some(Arc::clone(&cow_state));
1546
1547            let net_state = Arc::new(tokio::sync::Mutex::new(net_state));
1548            self.rt_mut().supervisor_network = Some(Arc::clone(&net_state));
1549
1550            let procfs_state = Arc::new(tokio::sync::Mutex::new(procfs_state));
1551            let time_random_state = Arc::new(tokio::sync::Mutex::new(time_random_state));
1552            let policy_fn_state = Arc::new(tokio::sync::Mutex::new(policy_fn_state));
1553            let chroot_state = Arc::new(tokio::sync::Mutex::new(chroot_state));
1554            let processes = Arc::new(crate::seccomp::state::ProcessIndex::new());
1555
1556            let ctx = Arc::new(SupervisorCtx {
1557                resource: Arc::clone(&res_state),
1558                cow: Arc::clone(&cow_state),
1559                procfs: Arc::clone(&procfs_state),
1560                network: Arc::clone(&net_state),
1561                time_random: Arc::clone(&time_random_state),
1562                policy_fn: Arc::clone(&policy_fn_state),
1563                chroot: Arc::clone(&chroot_state),
1564                netlink: Arc::new(crate::netlink::NetlinkState::new()),
1565                processes: Arc::clone(&processes),
1566                policy: Arc::new(notif_policy),
1567                child_pidfd: child_pidfd_raw,
1568                notif_fd: notif_raw_fd,
1569            });
1570
1571            let handlers = std::mem::take(&mut self.rt_mut().handlers);
1572            let (startup_tx, startup_rx) = tokio::sync::oneshot::channel();
1573            self.rt_mut().notif_handle = Some(tokio::spawn(
1574                notif::supervisor(notif_fd, ctx, handlers, startup_tx),
1575            ));
1576            // Wait for the supervisor to register the notif fd with the IO
1577            // driver before we release the child to execve. Otherwise an
1578            // early traced syscall would queue a notification on a fd no
1579            // one is polling, and the child would block until the next
1580            // `block_on` re-enters the runtime. Critical for current-thread
1581            // runtimes, harmless overhead for multi-thread.
1582            match startup_rx.await {
1583                Ok(Ok(())) => {}
1584                Ok(Err(e)) => return Err(SandboxRuntimeError::Io(e).into()),
1585                Err(_) => {
1586                    return Err(SandboxRuntimeError::Child(
1587                        "seccomp supervisor exited during startup".into(),
1588                    ).into());
1589                }
1590            }
1591
1592            let la_resource = Arc::clone(&res_state);
1593            self.rt_mut().loadavg_handle = Some(tokio::spawn(async move {
1594                let mut interval = tokio::time::interval(Duration::from_secs(5));
1595                interval.tick().await;
1596                loop {
1597                    interval.tick().await;
1598                    let mut rs = la_resource.lock().await;
1599                    let running = rs.proc_count;
1600                    rs.load_avg.sample(running);
1601                }
1602            }));
1603        }
1604
1605        if let Some(cpu_pct) = self.max_cpu {
1606            if cpu_pct < 100 {
1607                let child_pid = pid;
1608                self.rt_mut().throttle_handle = Some(tokio::spawn(sandbox_throttle_cpu(child_pid, cpu_pct)));
1609            }
1610        }
1611
1612        self.rt_mut().pidfd = pidfd;
1613        self.rt_mut().ready_w = Some(pipes.ready_w);
1614
1615        Ok(())
1616    }
1617
1618    // ================================================================
1619    // Internal: do_start (release the parked child to execve)
1620    // ================================================================
1621
1622    fn do_start(&mut self) -> Result<(), crate::error::SandlockError> {
1623        use std::os::fd::AsRawFd;
1624        use crate::context::write_u32_fd;
1625        use crate::error::SandboxRuntimeError;
1626
1627        if !matches!(self.rt().state, RuntimeState::Created) {
1628            return Err(SandboxRuntimeError::Child("start() requires a created sandbox".into()).into());
1629        }
1630        let ready_w = self.rt_mut().ready_w.take()
1631            .ok_or_else(|| SandboxRuntimeError::Child("start() called without a prior create()".into()))?;
1632        write_u32_fd(ready_w.as_raw_fd(), 1)
1633            .map_err(|e| SandboxRuntimeError::Child(format!("write ready signal: {}", e)))?;
1634        drop(ready_w);
1635        self.rt_mut().state = RuntimeState::Running;
1636        Ok(())
1637    }
1638}
1639
1640// ================================================================
1641// Drop for Sandbox — kills and reaps child if still running
1642// ================================================================
1643
1644impl Drop for Sandbox {
1645    fn drop(&mut self) {
1646        if let Some(ref mut rt) = self.runtime {
1647            if let Some(pid) = rt.child_pid {
1648                if matches!(rt.state, RuntimeState::Created | RuntimeState::Running | RuntimeState::Paused) {
1649                    unsafe { libc::killpg(pid, libc::SIGKILL) };
1650                    let mut status: i32 = 0;
1651                    unsafe { libc::waitpid(pid, &mut status, 0) };
1652                }
1653            }
1654
1655            if let Some(h) = rt.notif_handle.take() { h.abort(); }
1656            if let Some(h) = rt.throttle_handle.take() { h.abort(); }
1657            if let Some(h) = rt.loadavg_handle.take() { h.abort(); }
1658
1659            let is_error = matches!(
1660                rt.state,
1661                RuntimeState::Stopped(ref s) if !matches!(s, crate::result::ExitStatus::Code(0))
1662            );
1663            let action = if is_error { &self.on_error } else { &self.on_exit };
1664            let action = action.clone();
1665
1666            if let Some(ref mut cow) = rt.seccomp_cow {
1667                match action {
1668                    BranchAction::Commit => { let _ = cow.commit(); }
1669                    BranchAction::Abort => { let _ = cow.abort(); }
1670                    BranchAction::Keep => {}
1671                }
1672            }
1673        }
1674    }
1675}
1676
1677// ================================================================
1678// CPU throttle
1679// ================================================================
1680
1681async fn sandbox_throttle_cpu(pid: i32, cpu_pct: u8) {
1682    use std::time::Duration;
1683    let period = Duration::from_millis(100);
1684    let run_time = period * cpu_pct as u32 / 100;
1685    let stop_time = period - run_time;
1686    loop {
1687        tokio::time::sleep(run_time).await;
1688        if unsafe { libc::killpg(pid, libc::SIGSTOP) } < 0 { break; }
1689        tokio::time::sleep(stop_time).await;
1690        if unsafe { libc::killpg(pid, libc::SIGCONT) } < 0 { break; }
1691    }
1692}
1693
1694// ================================================================
1695// Process name resolution
1696// ================================================================
1697
1698static NEXT_SANDBOX_NAME: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(1);
1699
1700fn sandbox_resolve_name(name: Option<&str>) -> Result<String, crate::error::SandlockError> {
1701    match name {
1702        Some(n) => sandbox_validate_name(n.to_string()),
1703        None => Ok(format!(
1704            "sandbox-{}-{}",
1705            std::process::id(),
1706            NEXT_SANDBOX_NAME.fetch_add(1, std::sync::atomic::Ordering::Relaxed),
1707        )),
1708    }
1709}
1710
1711fn sandbox_validate_name(name: String) -> Result<String, crate::error::SandlockError> {
1712    use crate::error::SandboxRuntimeError;
1713    if name.is_empty() {
1714        return Err(SandboxRuntimeError::Child("sandbox name must not be empty".into()).into());
1715    }
1716    if name.len() > 64 {
1717        return Err(SandboxRuntimeError::Child("sandbox name must be at most 64 bytes".into()).into());
1718    }
1719    if name.as_bytes().contains(&0) {
1720        return Err(SandboxRuntimeError::Child("sandbox name must not contain NUL bytes".into()).into());
1721    }
1722    Ok(name)
1723}
1724
1725// ================================================================
1726// I/O helpers (private)
1727// ================================================================
1728
1729fn sandbox_read_exact(fd: i32, buf: &mut [u8]) {
1730    let mut off = 0;
1731    while off < buf.len() {
1732        let r = unsafe { libc::read(fd, buf[off..].as_mut_ptr() as *mut _, buf.len() - off) };
1733        if r <= 0 { break; }
1734        off += r as usize;
1735    }
1736}
1737
1738fn sandbox_read_fd_to_end(fd: std::os::fd::OwnedFd) -> Vec<u8> {
1739    use std::io::Read;
1740    use std::os::fd::IntoRawFd;
1741    use std::os::unix::io::FromRawFd;
1742    let mut file = unsafe { std::fs::File::from_raw_fd(fd.into_raw_fd()) };
1743    let mut buf = Vec::new();
1744    let _ = file.read_to_end(&mut buf);
1745    buf
1746}
1747
1748fn sandbox_wait_status_to_exit(status: i32) -> crate::result::ExitStatus {
1749    use crate::result::ExitStatus;
1750    if libc::WIFEXITED(status) {
1751        ExitStatus::Code(libc::WEXITSTATUS(status))
1752    } else if libc::WIFSIGNALED(status) {
1753        let sig = libc::WTERMSIG(status);
1754        if sig == libc::SIGKILL {
1755            ExitStatus::Killed
1756        } else {
1757            ExitStatus::Signal(sig)
1758        }
1759    } else {
1760        ExitStatus::Killed
1761    }
1762}
1763
1764fn sandbox_collect_handlers<I, S, H>(
1765    handlers: I,
1766    sandbox: &Sandbox,
1767) -> Result<Vec<(i64, Arc<dyn crate::seccomp::dispatch::Handler>)>, crate::error::SandlockError>
1768where
1769    I: IntoIterator<Item = (S, H)>,
1770    S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
1771    H: crate::seccomp::dispatch::Handler,
1772{
1773    use crate::seccomp::dispatch::{Handler, HandlerError};
1774
1775    let pending: Vec<(i64, Arc<dyn Handler>)> = handlers
1776        .into_iter()
1777        .map(|(syscall, handler)| {
1778            let nr = syscall.try_into().map_err(HandlerError::from)?.raw();
1779            let h: Arc<dyn Handler> = Arc::new(handler);
1780            Ok::<_, HandlerError>((nr, h))
1781        })
1782        .collect::<Result<_, _>>()?;
1783
1784    let nrs: Vec<i64> = pending.iter().map(|(nr, _)| *nr).collect();
1785    crate::seccomp::dispatch::validate_handler_syscalls_against_policy(&nrs, sandbox)
1786        .map_err(|syscall_nr| HandlerError::OnDenySyscall { syscall_nr })?;
1787
1788    Ok(pending)
1789}
1790
1791fn validate_syscall_names(names: &[String]) -> Result<(), SandboxError> {
1792    let unknown: Vec<&str> = names
1793        .iter()
1794        .map(String::as_str)
1795        .filter(|name| crate::seccomp::syscall::syscall_name_to_nr(name).is_none())
1796        .collect();
1797    if unknown.is_empty() {
1798        Ok(())
1799    } else {
1800        Err(SandboxError::Invalid(format!(
1801            "unknown syscall name(s): {}",
1802            unknown.join(", ")
1803        )))
1804    }
1805}
1806
1807/// Fluent builder for `Sandbox`.
1808///
1809/// When the `cli` feature is enabled this struct also derives `clap::Args` so
1810/// that the CLI can expose all per-field flags via `#[clap(flatten)]` without
1811/// duplicating the flag declarations.
1812#[derive(Default)]
1813#[cfg_attr(feature = "cli", derive(clap::Args))]
1814pub struct SandboxBuilder {
1815    #[cfg_attr(feature = "cli", arg(short = 'r', long = "fs-read", value_name = "PATH"))]
1816    pub fs_readable: Vec<PathBuf>,
1817
1818    #[cfg_attr(feature = "cli", arg(short = 'w', long = "fs-write", value_name = "PATH"))]
1819    pub fs_writable: Vec<PathBuf>,
1820
1821    #[cfg_attr(feature = "cli", arg(long = "fs-deny", value_name = "PATH"))]
1822    pub fs_denied: Vec<PathBuf>,
1823
1824    /// Extra syscall names to deny (in addition to Sandlock's default blocklist)
1825    #[cfg_attr(feature = "cli", arg(long = "extra-deny-syscall", value_name = "NAME"))]
1826    pub extra_deny_syscalls: Vec<String>,
1827
1828    /// Extra syscall group names to allow (e.g. sysv_ipc)
1829    #[cfg_attr(feature = "cli", arg(long = "extra-allow-syscall", value_name = "NAME"))]
1830    pub extra_allow_syscalls: Vec<String>,
1831
1832    /// Outbound endpoint allow rule. Repeatable. Each value is
1833    /// `host:port[,port,...]` (IP-restricted), `:port` or `*:port`
1834    /// (any IP), or `udp://...` / `icmp://...` for UDP/ICMP.
1835    /// Examples: `api.openai.com:443`, `github.com:22,443`, `:8080`.
1836    #[cfg_attr(feature = "cli", arg(long = "net-allow", value_name = "SPEC"))]
1837    pub net_allow: Vec<String>,
1838
1839    /// `--net-deny`: default-allow networking, block these IPs/CIDRs/ports.
1840    /// Accepts `<ip>`, `<cidr>`, `<cidr>:<port[,port]>`, `:<port>`, `*`, and
1841    /// `[<ipv6>]:<port>`. The port is optional (no `:port` means all ports).
1842    /// Hostnames are rejected; use `--http-deny` for domains. Repeat the flag
1843    /// for multiple rules. Mutually exclusive with `--net-allow`.
1844    #[cfg_attr(feature = "cli", arg(long = "net-deny", value_name = "SPEC"))]
1845    pub net_deny: Vec<String>,
1846
1847    /// `--net-allow-bind`: TCP ports the sandbox may bind/listen on
1848    /// (default-deny). Each value is a comma-separated list of single ports
1849    /// or inclusive `lo-hi` ranges, e.g. `8080,9000-9005`. Repeatable.
1850    #[cfg_attr(feature = "cli", arg(long = "net-allow-bind", value_name = "PORTS"))]
1851    pub net_allow_bind: Vec<String>,
1852
1853    /// `--net-deny-bind`: TCP ports the sandbox may NOT bind/listen on
1854    /// (default-allow denylist; the inverse of `--net-allow-bind`). Same
1855    /// port syntax (comma-separated ports / `lo-hi` ranges). Repeatable.
1856    /// Mutually exclusive with `--net-allow-bind`.
1857    #[cfg_attr(feature = "cli", arg(long = "net-deny-bind", value_name = "PORTS"))]
1858    pub net_deny_bind: Vec<String>,
1859
1860    #[cfg_attr(feature = "cli", arg(long = "http-allow", value_name = "RULE"))]
1861    pub http_allow: Vec<String>,
1862
1863    #[cfg_attr(feature = "cli", arg(long = "http-deny", value_name = "RULE"))]
1864    pub http_deny: Vec<String>,
1865
1866    /// TCP ports to intercept for HTTP ACL (default: 80, plus 443 with --http-ca)
1867    #[cfg_attr(feature = "cli", arg(long = "http-port", value_name = "PORT"))]
1868    pub http_ports: Vec<u16>,
1869
1870    /// PEM CA certificate for HTTPS MITM (enables port 443 interception)
1871    #[cfg_attr(feature = "cli", arg(long = "http-ca", value_name = "PATH"))]
1872    pub http_ca: Option<PathBuf>,
1873
1874    /// PEM CA private key for HTTPS MITM (required with --http-ca)
1875    #[cfg_attr(feature = "cli", arg(long = "http-key", value_name = "PATH"))]
1876    pub http_key: Option<PathBuf>,
1877
1878    /// Inject the MITM CA into these trust bundle paths (repeatable). Without
1879    /// --http-ca this generates an ephemeral CA and intercepts port 443.
1880    #[cfg_attr(feature = "cli", arg(long = "http-inject-ca", value_name = "PATH"))]
1881    pub http_inject_ca: Vec<PathBuf>,
1882
1883    /// Write the active MITM CA public certificate (PEM) to this path.
1884    #[cfg_attr(feature = "cli", arg(long = "http-ca-out", value_name = "PATH"))]
1885    pub http_ca_out: Option<PathBuf>,
1886
1887    // max_memory uses a string in the CLI (e.g. "512M"); not directly clap-friendly as ByteSize.
1888    #[cfg_attr(feature = "cli", clap(skip))]
1889    pub max_memory: Option<ByteSize>,
1890
1891    #[cfg_attr(feature = "cli", arg(short = 'P', long = "max-processes"))]
1892    pub max_processes: Option<u32>,
1893
1894    #[cfg_attr(feature = "cli", arg(long = "max-open-files"))]
1895    pub max_open_files: Option<u32>,
1896
1897    #[cfg_attr(feature = "cli", arg(short = 'c', long = "cpu"))]
1898    pub max_cpu: Option<u8>,
1899
1900    #[cfg_attr(feature = "cli", arg(long = "random-seed"))]
1901    pub random_seed: Option<u64>,
1902
1903    // time_start requires ISO 8601 string parsing; not directly clap-friendly as SystemTime.
1904    #[cfg_attr(feature = "cli", clap(skip))]
1905    pub time_start: Option<SystemTime>,
1906
1907    #[cfg_attr(feature = "cli", arg(long = "no-randomize-memory"))]
1908    pub no_randomize_memory: bool,
1909
1910    #[cfg_attr(feature = "cli", arg(long = "no-huge-pages"))]
1911    pub no_huge_pages: bool,
1912
1913    #[cfg_attr(feature = "cli", arg(long = "no-coredump"))]
1914    pub no_coredump: bool,
1915
1916    #[cfg_attr(feature = "cli", arg(long = "deterministic-dirs"))]
1917    pub deterministic_dirs: bool,
1918
1919    #[cfg_attr(feature = "cli", arg(long = "workdir"))]
1920    pub workdir: Option<PathBuf>,
1921
1922    #[cfg_attr(feature = "cli", arg(long = "cwd"))]
1923    pub cwd: Option<PathBuf>,
1924
1925    #[cfg_attr(feature = "cli", arg(long = "fs-storage", value_name = "PATH"))]
1926    pub fs_storage: Option<PathBuf>,
1927
1928    // max_disk uses a string in the CLI (e.g. "10G"); not directly clap-friendly as ByteSize.
1929    #[cfg_attr(feature = "cli", clap(skip))]
1930    pub max_disk: Option<ByteSize>,
1931
1932    // on_exit/on_error are not exposed as CLI flags.
1933    #[cfg_attr(feature = "cli", clap(skip))]
1934    pub on_exit: Option<BranchAction>,
1935
1936    #[cfg_attr(feature = "cli", clap(skip))]
1937    pub on_error: Option<BranchAction>,
1938
1939    // fs_mount requires VIRTUAL:HOST string splitting; not directly clap-friendly as Vec<(PathBuf,PathBuf)>.
1940    #[cfg_attr(feature = "cli", clap(skip))]
1941    pub fs_mount: Vec<(PathBuf, PathBuf)>,
1942
1943    #[cfg_attr(feature = "cli", arg(long = "chroot"))]
1944    pub chroot: Option<PathBuf>,
1945
1946    #[cfg_attr(feature = "cli", arg(long = "clean-env"))]
1947    pub clean_env: bool,
1948
1949    // env requires KEY=VALUE string splitting; not directly clap-friendly as HashMap.
1950    #[cfg_attr(feature = "cli", clap(skip))]
1951    pub env: HashMap<String, String>,
1952
1953    // gpu_devices in CLI uses Vec<u32> with value_delimiter; SandboxBuilder stores Option<Vec<u32>>.
1954    #[cfg_attr(feature = "cli", clap(skip))]
1955    pub gpu_devices: Option<Vec<u32>>,
1956
1957    // cpu_cores in CLI uses Vec<u32> with value_delimiter; SandboxBuilder stores Option<Vec<u32>>.
1958    #[cfg_attr(feature = "cli", clap(skip))]
1959    pub cpu_cores: Option<Vec<u32>>,
1960
1961    #[cfg_attr(feature = "cli", arg(long = "num-cpus"))]
1962    pub num_cpus: Option<u32>,
1963
1964    #[cfg_attr(feature = "cli", arg(long = "port-remap"))]
1965    pub port_remap: bool,
1966
1967    /// Skip the seccomp user-notification supervisor. The CLI exposes
1968    /// its own `--no-supervisor` flag on `RunArgs` (which short-circuits
1969    /// to a direct exec); this field is the API-level counterpart used
1970    /// when the caller still wants the normal `Sandbox::run` lifecycle
1971    /// but cannot install a listener (e.g. nested inside another
1972    /// sandbox).
1973    #[cfg_attr(feature = "cli", clap(skip))]
1974    pub no_supervisor: bool,
1975
1976    #[cfg_attr(feature = "cli", arg(long = "uid"))]
1977    pub uid: Option<u32>,
1978
1979    /// Per-protection state overrides. Defaults to `strict_all` — every
1980    /// protection enforced, matching the historical `MIN_ABI = 6` floor.
1981    /// Use the `allow_degraded` / `disable` builder methods to deviate.
1982    #[cfg_attr(feature = "cli", clap(skip))]
1983    pub protection_policy: ProtectionPolicy,
1984
1985    // Internal callback — never a CLI flag.
1986    #[cfg_attr(feature = "cli", clap(skip))]
1987    pub policy_fn: Option<crate::policy_fn::PolicyCallback>,
1988
1989    // Sandbox instance name — stored for transfer into the Sandbox at build time.
1990    #[cfg_attr(feature = "cli", clap(skip))]
1991    pub name: Option<String>,
1992
1993    // COW fork init function — runs once in the child before COW cloning.
1994    #[cfg_attr(feature = "cli", clap(skip))]
1995    pub(crate) init_fn: Option<Box<dyn FnOnce() + Send + 'static>>,
1996
1997    // COW fork work function — runs in each COW clone.
1998    #[cfg_attr(feature = "cli", clap(skip))]
1999    pub(crate) work_fn: Option<Arc<dyn Fn(u32) + Send + Sync + 'static>>,
2000}
2001
2002impl std::fmt::Debug for SandboxBuilder {
2003    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2004        f.debug_struct("SandboxBuilder")
2005            .field("fs_readable", &self.fs_readable)
2006            .field("fs_writable", &self.fs_writable)
2007            .field("max_memory", &self.max_memory)
2008            .field("max_processes", &self.max_processes)
2009            .field("policy_fn", &self.policy_fn.as_ref().map(|_| "<callback>"))
2010            .finish_non_exhaustive()
2011    }
2012}
2013
2014impl Clone for SandboxBuilder {
2015    /// Clone a `SandboxBuilder`. All config and callback fields are cloned.
2016    /// `init_fn` (FnOnce) is dropped to `None` on the clone; `work_fn` clones
2017    /// via Arc. If the clone also needs an init function, set it again with
2018    /// `.init_fn(...)`.
2019    fn clone(&self) -> Self {
2020        Self {
2021            fs_readable: self.fs_readable.clone(),
2022            fs_writable: self.fs_writable.clone(),
2023            fs_denied: self.fs_denied.clone(),
2024            extra_deny_syscalls: self.extra_deny_syscalls.clone(),
2025            extra_allow_syscalls: self.extra_allow_syscalls.clone(),
2026            net_allow: self.net_allow.clone(),
2027            net_deny: self.net_deny.clone(),
2028            net_allow_bind: self.net_allow_bind.clone(),
2029            net_deny_bind: self.net_deny_bind.clone(),
2030            http_allow: self.http_allow.clone(),
2031            http_deny: self.http_deny.clone(),
2032            http_ports: self.http_ports.clone(),
2033            http_ca: self.http_ca.clone(),
2034            http_key: self.http_key.clone(),
2035            http_inject_ca: self.http_inject_ca.clone(),
2036            http_ca_out: self.http_ca_out.clone(),
2037            max_memory: self.max_memory,
2038            max_processes: self.max_processes,
2039            max_open_files: self.max_open_files,
2040            max_cpu: self.max_cpu,
2041            random_seed: self.random_seed,
2042            time_start: self.time_start,
2043            no_randomize_memory: self.no_randomize_memory,
2044            no_huge_pages: self.no_huge_pages,
2045            no_coredump: self.no_coredump,
2046            deterministic_dirs: self.deterministic_dirs,
2047            workdir: self.workdir.clone(),
2048            cwd: self.cwd.clone(),
2049            fs_storage: self.fs_storage.clone(),
2050            max_disk: self.max_disk,
2051            on_exit: self.on_exit.clone(),
2052            on_error: self.on_error.clone(),
2053            fs_mount: self.fs_mount.clone(),
2054            chroot: self.chroot.clone(),
2055            clean_env: self.clean_env,
2056            env: self.env.clone(),
2057            gpu_devices: self.gpu_devices.clone(),
2058            cpu_cores: self.cpu_cores.clone(),
2059            num_cpus: self.num_cpus,
2060            port_remap: self.port_remap,
2061            no_supervisor: self.no_supervisor,
2062            uid: self.uid,
2063            protection_policy: self.protection_policy.clone(),
2064            policy_fn: self.policy_fn.clone(),
2065            name: self.name.clone(),
2066            // init_fn (FnOnce) cannot be cloned — drop to None.
2067            init_fn: None,
2068            // work_fn is Arc-wrapped — clone bumps the reference count.
2069            work_fn: self.work_fn.clone(),
2070        }
2071    }
2072}
2073
2074impl SandboxBuilder {
2075    /// Permit `protection` to be enforced when the host kernel
2076    /// supports it, and silently skipped when it does not (fallback
2077    /// for kernels below the protection's `min_abi()`).
2078    ///
2079    /// The default policy enforces every protection strictly; calling
2080    /// `allow_degraded` lifts the strictness for the named protection
2081    /// only. `sandlock check` and `Sandbox::active_protections()`
2082    /// continue to report the degraded protection so the posture is
2083    /// observable.
2084    pub fn allow_degraded(mut self, protection: Protection) -> Self {
2085        self.protection_policy.set(protection, ProtectionState::Degradable);
2086        self
2087    }
2088
2089    /// Never enforce `protection`, even on a host kernel that supports
2090    /// it. Intended for workloads that legitimately need the capability
2091    /// the protection blocks (e.g. signalling a sibling process when
2092    /// `SignalScope` would normally prevent it).
2093    ///
2094    /// `Protection::FsRefer` cannot be disabled: Landlock denies REFER
2095    /// (cross-directory rename/link) by default in every ruleset even when
2096    /// it is not handled, so disabling it only tightens the sandbox rather
2097    /// than loosening it. `build()` (and `build_unchecked()`) return
2098    /// `SandboxError::Invalid` if `disable(Protection::FsRefer)` was called.
2099    /// Use [`allow_degraded`](Self::allow_degraded) if you want REFER
2100    /// enforced only where the kernel supports it.
2101    pub fn disable(mut self, protection: Protection) -> Self {
2102        self.protection_policy.set(protection, ProtectionState::Disabled);
2103        self
2104    }
2105
2106    pub fn fs_write(mut self, path: impl Into<PathBuf>) -> Self {
2107        self.fs_writable.push(path.into());
2108        self
2109    }
2110
2111    pub fn fs_read(mut self, path: impl Into<PathBuf>) -> Self {
2112        self.fs_readable.push(path.into());
2113        self
2114    }
2115
2116    pub fn fs_read_if_exists(self, path: impl Into<PathBuf>) -> Self {
2117        let path = path.into();
2118        if path.exists() {
2119            self.fs_read(path)
2120        } else {
2121            self
2122        }
2123    }
2124
2125    pub fn fs_deny(mut self, path: impl Into<PathBuf>) -> Self {
2126        self.fs_denied.push(path.into());
2127        self
2128    }
2129
2130    pub fn extra_deny_syscalls(mut self, calls: Vec<String>) -> Self {
2131        self.extra_deny_syscalls.extend(calls);
2132        self
2133    }
2134
2135    pub fn extra_allow_syscalls(mut self, names: Vec<String>) -> Self {
2136        self.extra_allow_syscalls.extend(names);
2137        self
2138    }
2139
2140    /// Add a network endpoint rule. Spec is `host:port[,port,...]`,
2141    /// `:port`, or `*:port`. Validated at `build()` time so callers
2142    /// receive parse errors via the standard `SandboxBuilder` flow.
2143    ///
2144    /// Examples:
2145    /// - `.net_allow("api.openai.com:443")` — HTTPS to OpenAI only
2146    /// - `.net_allow("github.com:22,443")` — SSH and HTTPS to GitHub
2147    /// - `.net_allow(":8080")` — any IP on port 8080
2148    pub fn net_allow(mut self, spec: impl Into<String>) -> Self {
2149        self.net_allow.push(spec.into());
2150        self
2151    }
2152
2153    /// Add a `--net-deny` rule. See the field docs for accepted forms.
2154    pub fn net_deny(mut self, spec: impl Into<String>) -> Self {
2155        self.net_deny.push(spec.into());
2156        self
2157    }
2158
2159    /// Allow binding a single TCP port. For comma-separated lists or
2160    /// `lo-hi` ranges, use [`net_allow_bind`](Self::net_allow_bind).
2161    pub fn net_allow_bind_port(mut self, port: u16) -> Self {
2162        self.net_allow_bind.push(port.to_string());
2163        self
2164    }
2165
2166    /// Allow binding TCP ports from a spec: a comma-separated list of single
2167    /// ports or inclusive `lo-hi` ranges (e.g. `"8080,9000-9005"`).
2168    pub fn net_allow_bind(mut self, spec: impl Into<String>) -> Self {
2169        self.net_allow_bind.push(spec.into());
2170        self
2171    }
2172
2173    /// Deny binding a single TCP port (default-allow denylist). For
2174    /// comma-separated lists or `lo-hi` ranges, use
2175    /// [`net_deny_bind`](Self::net_deny_bind).
2176    pub fn net_deny_bind_port(mut self, port: u16) -> Self {
2177        self.net_deny_bind.push(port.to_string());
2178        self
2179    }
2180
2181    /// Deny binding TCP ports from a spec: a comma-separated list of single
2182    /// ports or inclusive `lo-hi` ranges (e.g. `"8080,9000-9005"`). The
2183    /// inverse of [`net_allow_bind`](Self::net_allow_bind).
2184    pub fn net_deny_bind(mut self, spec: impl Into<String>) -> Self {
2185        self.net_deny_bind.push(spec.into());
2186        self
2187    }
2188
2189    pub fn http_allow(mut self, rule: &str) -> Self {
2190        self.http_allow.push(rule.to_string());
2191        self
2192    }
2193
2194    pub fn http_deny(mut self, rule: &str) -> Self {
2195        self.http_deny.push(rule.to_string());
2196        self
2197    }
2198
2199    pub fn http_port(mut self, port: u16) -> Self {
2200        self.http_ports.push(port);
2201        self
2202    }
2203
2204    pub fn http_ca(mut self, path: impl Into<PathBuf>) -> Self {
2205        self.http_ca = Some(path.into());
2206        self
2207    }
2208
2209    pub fn http_key(mut self, path: impl Into<PathBuf>) -> Self {
2210        self.http_key = Some(path.into());
2211        self
2212    }
2213
2214    pub fn http_inject_ca(mut self, path: impl Into<PathBuf>) -> Self {
2215        self.http_inject_ca.push(path.into());
2216        self
2217    }
2218
2219    pub fn http_ca_out(mut self, path: impl Into<PathBuf>) -> Self {
2220        self.http_ca_out = Some(path.into());
2221        self
2222    }
2223
2224    pub fn max_memory(mut self, size: ByteSize) -> Self {
2225        self.max_memory = Some(size);
2226        self
2227    }
2228
2229    pub fn max_processes(mut self, n: u32) -> Self {
2230        self.max_processes = Some(n);
2231        self
2232    }
2233
2234    pub fn max_open_files(mut self, n: u32) -> Self {
2235        self.max_open_files = Some(n);
2236        self
2237    }
2238
2239    pub fn max_cpu(mut self, pct: u8) -> Self {
2240        self.max_cpu = Some(pct);
2241        self
2242    }
2243
2244    pub fn random_seed(mut self, seed: u64) -> Self {
2245        self.random_seed = Some(seed);
2246        self
2247    }
2248
2249    pub fn time_start(mut self, t: SystemTime) -> Self {
2250        self.time_start = Some(t);
2251        self
2252    }
2253
2254    pub fn no_randomize_memory(mut self, v: bool) -> Self {
2255        self.no_randomize_memory = v;
2256        self
2257    }
2258
2259    pub fn no_huge_pages(mut self, v: bool) -> Self {
2260        self.no_huge_pages = v;
2261        self
2262    }
2263
2264    pub fn no_coredump(mut self, v: bool) -> Self {
2265        self.no_coredump = v;
2266        self
2267    }
2268
2269    pub fn deterministic_dirs(mut self, v: bool) -> Self {
2270        self.deterministic_dirs = v;
2271        self
2272    }
2273
2274    pub fn workdir(mut self, path: impl Into<PathBuf>) -> Self {
2275        self.workdir = Some(path.into());
2276        self
2277    }
2278
2279    pub fn cwd(mut self, path: impl Into<PathBuf>) -> Self {
2280        self.cwd = Some(path.into());
2281        self
2282    }
2283
2284    pub fn fs_storage(mut self, path: impl Into<PathBuf>) -> Self {
2285        self.fs_storage = Some(path.into());
2286        self
2287    }
2288
2289    pub fn max_disk(mut self, size: ByteSize) -> Self {
2290        self.max_disk = Some(size);
2291        self
2292    }
2293
2294    pub fn on_exit(mut self, action: BranchAction) -> Self {
2295        self.on_exit = Some(action);
2296        self
2297    }
2298
2299    pub fn on_error(mut self, action: BranchAction) -> Self {
2300        self.on_error = Some(action);
2301        self
2302    }
2303
2304    pub fn chroot(mut self, path: impl Into<PathBuf>) -> Self {
2305        self.chroot = Some(path.into());
2306        self
2307    }
2308
2309    pub fn fs_mount(mut self, virtual_path: impl Into<PathBuf>, host_path: impl Into<PathBuf>) -> Self {
2310        self.fs_mount.push((virtual_path.into(), host_path.into()));
2311        self
2312    }
2313
2314    pub fn clean_env(mut self, v: bool) -> Self {
2315        self.clean_env = v;
2316        self
2317    }
2318
2319    pub fn env_var(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
2320        self.env.insert(key.into(), value.into());
2321        self
2322    }
2323
2324
2325    pub fn gpu_devices(mut self, devices: Vec<u32>) -> Self {
2326        self.gpu_devices = Some(devices);
2327        self
2328    }
2329
2330    pub fn cpu_cores(mut self, cores: Vec<u32>) -> Self {
2331        self.cpu_cores = Some(cores);
2332        self
2333    }
2334
2335    pub fn num_cpus(mut self, n: u32) -> Self {
2336        self.num_cpus = Some(n);
2337        self
2338    }
2339
2340    pub fn port_remap(mut self, v: bool) -> Self {
2341        self.port_remap = v;
2342        self
2343    }
2344
2345    /// Skip the seccomp user-notification supervisor. The sandbox keeps
2346    /// Landlock and the kernel-level deny filter but loses every
2347    /// supervisor-mediated feature (IP allowlist, resource limits, COW,
2348    /// chroot mediation, /proc virtualization, custom handlers). The
2349    /// kernel only permits one `SECCOMP_FILTER_FLAG_NEW_LISTENER` per
2350    /// task, so set this when nesting `Sandbox::run` inside an already-
2351    /// confined process; otherwise the inner seccomp install returns
2352    /// `EBUSY`.
2353    pub fn no_supervisor(mut self, v: bool) -> Self {
2354        self.no_supervisor = v;
2355        self
2356    }
2357
2358    pub fn policy_fn(
2359        mut self,
2360        f: impl Fn(crate::policy_fn::SyscallEvent, &mut crate::policy_fn::PolicyContext) -> crate::policy_fn::Verdict + Send + Sync + 'static,
2361    ) -> Self {
2362        self.policy_fn = Some(std::sync::Arc::new(f));
2363        self
2364    }
2365
2366    pub fn uid(mut self, id: u32) -> Self {
2367        self.uid = Some(id);
2368        self
2369    }
2370
2371    /// Set the sandbox instance name (exposed as the virtual hostname).
2372    /// Auto-generated if not set.
2373    pub fn name(mut self, name: impl Into<String>) -> Self {
2374        self.name = Some(name.into());
2375        self
2376    }
2377
2378    /// Set the COW-fork init function.
2379    ///
2380    /// The init function runs once in the child process before any COW clones
2381    /// are created. Required for `Sandbox::fork()`.
2382    pub fn init_fn(mut self, f: impl FnOnce() + Send + 'static) -> Self {
2383        self.init_fn = Some(Box::new(f));
2384        self
2385    }
2386
2387    /// Set the COW-fork work function.
2388    ///
2389    /// The work function runs in each COW clone (`fork(N)` produces N clones).
2390    /// Required for `Sandbox::fork()`.
2391    pub fn work_fn(mut self, f: impl Fn(u32) + Send + Sync + 'static) -> Self {
2392        self.work_fn = Some(Arc::new(f));
2393        self
2394    }
2395
2396    /// Build a `Sandbox`, parsing all string fields and running per-field
2397    /// validation, but **without** the cross-section checks that
2398    /// `Sandbox::validate` performs. Use this in tests that deliberately
2399    /// construct sandboxes violating cross-section invariants.
2400    pub fn build_unchecked(self) -> Result<Sandbox, SandboxError> {
2401        validate_syscall_names(&self.extra_deny_syscalls)?;
2402
2403        // Reject disable(FsRefer): the kernel denies REFER (cross-directory
2404        // rename/link) by default in every ruleset even when REFER is not
2405        // handled. Controlled cross-directory rename within writable areas
2406        // works precisely *because* REFER is handled and granted on writable
2407        // paths (the Strict and Degradable states do this). Disabling REFER
2408        // un-handles it, which can only make rename stricter, never looser,
2409        // so it cannot do what disable() promises and is a footgun. Degrading
2410        // (allow_degraded) REFER is still meaningful and remains allowed.
2411        if self.protection_policy.state(Protection::FsRefer) == ProtectionState::Disabled {
2412            return Err(SandboxError::Invalid(
2413                "disable(Protection::FsRefer) is not permitted: Landlock denies \
2414                 REFER (cross-directory rename/link) by default even when it is \
2415                 not handled, so disabling it only tightens the sandbox, never \
2416                 loosens it. Remove the disable() call (use allow_degraded() if \
2417                 you wanted REFER enforced only where the kernel supports it)."
2418                    .into(),
2419            ));
2420        }
2421
2422        // Validate: max_cpu must be 1-100
2423        if let Some(cpu) = self.max_cpu {
2424            if cpu == 0 || cpu > 100 {
2425                return Err(SandboxError::InvalidCpuPercent(cpu));
2426            }
2427        }
2428
2429        // Validate: http_ca and http_key must both be set or both unset
2430        if self.http_ca.is_some() != self.http_key.is_some() {
2431            return Err(SandboxError::Invalid(
2432                "--http-ca and --http-key must both be provided together".into(),
2433            ));
2434        }
2435
2436        // --http-inject-ca / --http-ca-out are meaningless without an HTTP ACL
2437        // proxy to do MITM, which only spawns when http rules exist.
2438        let has_http_rules = !self.http_allow.is_empty() || !self.http_deny.is_empty();
2439        if !self.http_inject_ca.is_empty() && !has_http_rules {
2440            return Err(SandboxError::Invalid(
2441                "--http-inject-ca requires --http-allow or --http-deny".into(),
2442            ));
2443        }
2444        // --http-ca-out needs an actual CA to export (BYO or generated).
2445        if self.http_ca_out.is_some()
2446            && self.http_ca.is_none()
2447            && self.http_inject_ca.is_empty()
2448        {
2449            return Err(SandboxError::Invalid(
2450                "--http-ca-out requires --http-ca or --http-inject-ca".into(),
2451            ));
2452        }
2453
2454        // Parse HTTP rules (deferred from builder methods to propagate errors)
2455        let http_allow: Vec<HttpRule> = self
2456            .http_allow
2457            .into_iter()
2458            .map(|s| HttpRule::parse(&s))
2459            .collect::<Result<_, _>>()?;
2460        let http_deny: Vec<HttpRule> = self
2461            .http_deny
2462            .into_iter()
2463            .map(|s| HttpRule::parse(&s))
2464            .collect::<Result<_, _>>()?;
2465
2466        // Default HTTP intercept ports: 80 always, 443 when HTTPS CA is configured.
2467        let http_ports = if self.http_ports.is_empty() && (!http_allow.is_empty() || !http_deny.is_empty()) {
2468            let mut ports = vec![80];
2469            if self.http_ca.is_some() || !self.http_inject_ca.is_empty() {
2470                ports.push(443);
2471            }
2472            ports
2473        } else {
2474            self.http_ports
2475        };
2476
2477        // Parse user-supplied --net-allow specs.
2478        let mut net_allow: Vec<NetAllow> = self
2479            .net_allow
2480            .into_iter()
2481            .map(|s| NetRule::parse_allow(&s))
2482            .collect::<Result<_, _>>()?;
2483
2484        // Parse --net-deny rules (one rule per spec).
2485        let net_deny: Vec<NetDeny> = self
2486            .net_deny
2487            .into_iter()
2488            .map(|s| NetRule::parse_deny(&s))
2489            .collect::<Result<_, _>>()?;
2490
2491        // --net-allow and --net-deny are mutually exclusive. Check the
2492        // user-supplied allow count (the original specs), not the post-HTTP
2493        // extension, so a coexisting --http-deny does not false-trigger.
2494        if !net_allow.is_empty() && !net_deny.is_empty() {
2495            return Err(SandboxError::Invalid(
2496                "--net-allow and --net-deny are mutually exclusive".into(),
2497            ));
2498        }
2499
2500        // Expand bind port specs. --net-allow-bind (default-deny allowlist)
2501        // and --net-deny-bind (default-allow denylist) are contradictory.
2502        let net_allow_bind = parse_bind_ports(&self.net_allow_bind, "--net-allow-bind")?;
2503        let net_deny_bind = parse_bind_ports(&self.net_deny_bind, "--net-deny-bind")?;
2504        if !net_allow_bind.is_empty() && !net_deny_bind.is_empty() {
2505            return Err(SandboxError::Invalid(
2506                "--net-allow-bind and --net-deny-bind are mutually exclusive".into(),
2507            ));
2508        }
2509
2510        crate::http::extend_net_allow_for_http(
2511            &mut net_allow,
2512            &http_allow,
2513            &http_deny,
2514            &http_ports,
2515        );
2516
2517        Ok(Sandbox {
2518            fs_writable: self.fs_writable,
2519            fs_readable: self.fs_readable,
2520            fs_denied: self.fs_denied,
2521            extra_deny_syscalls: self.extra_deny_syscalls,
2522            extra_allow_syscalls: self.extra_allow_syscalls,
2523            protection_policy: self.protection_policy,
2524            net_allow,
2525            net_deny,
2526            net_allow_bind,
2527            net_deny_bind,
2528            http_allow,
2529            http_deny,
2530            http_ports,
2531            http_ca: self.http_ca,
2532            http_key: self.http_key,
2533            http_inject_ca: self.http_inject_ca,
2534            http_ca_out: self.http_ca_out,
2535            max_memory: self.max_memory,
2536            max_processes: self.max_processes.unwrap_or(64),
2537            max_open_files: self.max_open_files,
2538            max_cpu: self.max_cpu,
2539            random_seed: self.random_seed,
2540            time_start: self.time_start,
2541            no_randomize_memory: self.no_randomize_memory,
2542            no_huge_pages: self.no_huge_pages,
2543            no_coredump: self.no_coredump,
2544            deterministic_dirs: self.deterministic_dirs,
2545            workdir: self.workdir,
2546            cwd: self.cwd,
2547            fs_storage: self.fs_storage,
2548            max_disk: self.max_disk,
2549            on_exit: self.on_exit.unwrap_or_default(),
2550            on_error: self.on_error.unwrap_or_default(),
2551            fs_mount: self.fs_mount,
2552            chroot: self.chroot,
2553            clean_env: self.clean_env,
2554            env: self.env,
2555            gpu_devices: self.gpu_devices,
2556            cpu_cores: self.cpu_cores,
2557            num_cpus: self.num_cpus,
2558            port_remap: self.port_remap,
2559            no_supervisor: self.no_supervisor,
2560            uid: self.uid,
2561            policy_fn: self.policy_fn,
2562            name: self.name,
2563            init_fn: self.init_fn,
2564            work_fn: self.work_fn,
2565            runtime: None,
2566        })
2567    }
2568
2569    /// Build a `Sandbox`, parsing all string fields, running per-field validation,
2570    /// and verifying cross-section invariants via `Sandbox::validate`.
2571    pub fn build(self) -> Result<Sandbox, SandboxError> {
2572        let p = self.build_unchecked()?;
2573        p.validate()?;
2574        Ok(p)
2575    }
2576}
2577
2578/// Expand `--net-allow-bind` specs into a sorted, deduplicated port list.
2579/// Each spec is a comma-separated list of single ports (`8080`) or inclusive
2580/// `lo-hi` ranges (`8000-8010`). Mirrors the Python SDK's `parse_ports`.
2581fn parse_bind_ports(specs: &[String], label: &str) -> Result<Vec<u16>, SandboxError> {
2582    let mut ports: std::collections::BTreeSet<u16> = std::collections::BTreeSet::new();
2583    for spec in specs {
2584        for part in spec.split(',') {
2585            let part = part.trim();
2586            if part.is_empty() {
2587                return Err(SandboxError::Invalid(format!(
2588                    "{}: empty port in `{}`",
2589                    label, spec
2590                )));
2591            }
2592            match part.split_once('-') {
2593                Some((lo, hi)) => {
2594                    let lo: u16 = lo.trim().parse().map_err(|_| {
2595                        SandboxError::Invalid(format!("{}: invalid port range `{}`", label, part))
2596                    })?;
2597                    let hi: u16 = hi.trim().parse().map_err(|_| {
2598                        SandboxError::Invalid(format!("{}: invalid port range `{}`", label, part))
2599                    })?;
2600                    if lo > hi {
2601                        return Err(SandboxError::Invalid(format!(
2602                            "{}: reversed port range `{}` (lo > hi)",
2603                            label, part
2604                        )));
2605                    }
2606                    ports.extend(lo..=hi);
2607                }
2608                None => {
2609                    let p: u16 = part.parse().map_err(|_| {
2610                        SandboxError::Invalid(format!("{}: invalid port `{}`", label, part))
2611                    })?;
2612                    ports.insert(p);
2613                }
2614            }
2615        }
2616    }
2617    Ok(ports.into_iter().collect())
2618}
2619
2620/// Resolve a path as seen inside the sandbox to its host-side location, so its
2621/// existence can be checked before spawn. Honors `--fs-mount` (virtual:host)
2622/// mappings (which take precedence) and chroot. Used to validate
2623/// `--http-inject-ca` targets.
2624fn resolve_sandbox_path_to_host(
2625    child_path: &std::path::Path,
2626    chroot_root: Option<&std::path::Path>,
2627    mounts: &[(std::path::PathBuf, std::path::PathBuf)],
2628) -> std::path::PathBuf {
2629    for (virt, host) in mounts {
2630        if let Ok(rest) = child_path.strip_prefix(virt) {
2631            return host.join(rest);
2632        }
2633    }
2634    if let Some(root) = chroot_root {
2635        if let Ok(rest) = child_path.strip_prefix("/") {
2636            return root.join(rest);
2637        }
2638    }
2639    child_path.to_path_buf()
2640}
2641
2642#[cfg(test)]
2643mod tests {
2644    use super::*;
2645    use std::path::{Path, PathBuf};
2646
2647    #[test]
2648    fn resolve_sandbox_path_plain() {
2649        let r = resolve_sandbox_path_to_host(Path::new("/etc/ssl/x.pem"), None, &[]);
2650        assert_eq!(r, PathBuf::from("/etc/ssl/x.pem"));
2651    }
2652
2653    #[test]
2654    fn resolve_sandbox_path_under_chroot() {
2655        let r = resolve_sandbox_path_to_host(
2656            Path::new("/etc/ssl/x.pem"),
2657            Some(Path::new("/srv/root")),
2658            &[],
2659        );
2660        assert_eq!(r, PathBuf::from("/srv/root/etc/ssl/x.pem"));
2661    }
2662
2663    #[test]
2664    fn resolve_sandbox_path_mount_takes_precedence() {
2665        let mounts = vec![(PathBuf::from("/etc/ssl"), PathBuf::from("/host/ssl"))];
2666        let r = resolve_sandbox_path_to_host(
2667            Path::new("/etc/ssl/x.pem"),
2668            Some(Path::new("/srv/root")),
2669            &mounts,
2670        );
2671        assert_eq!(r, PathBuf::from("/host/ssl/x.pem"));
2672    }
2673
2674    #[tokio::test]
2675    async fn inject_ca_nonexistent_path_errors_at_run() {
2676        // Wildcard host rule avoids DNS; the missing inject path must error
2677        // before any fork or network work.
2678        let mut policy = Sandbox::builder()
2679            .http_allow("GET */*")
2680            .http_inject_ca("/definitely/not/here/sandlock-bundle.pem")
2681            .build()
2682            .unwrap();
2683        let res = policy.run(&["true"]).await;
2684        assert!(res.is_err(), "expected error for missing --http-inject-ca path");
2685    }
2686
2687    // --- SandboxBuilder integration ---
2688
2689    #[test]
2690    fn builder_http_rules() {
2691        let policy = Sandbox::builder()
2692            .http_allow("GET api.example.com/v1/*")
2693            .http_deny("* */admin/*")
2694            .build()
2695            .unwrap();
2696        assert_eq!(policy.http_allow.len(), 1);
2697        assert_eq!(policy.http_deny.len(), 1);
2698        assert_eq!(policy.http_allow[0].method, "GET");
2699        assert_eq!(policy.http_deny[0].host, "*");
2700    }
2701
2702    #[test]
2703    fn builder_invalid_http_allow_returns_error() {
2704        let result = Sandbox::builder()
2705            .http_allow("GETexample.com")
2706            .build();
2707        assert!(result.is_err());
2708    }
2709
2710    #[test]
2711    fn builder_invalid_http_deny_returns_error() {
2712        let result = Sandbox::builder()
2713            .http_deny("BADRULE")
2714            .build();
2715        assert!(result.is_err());
2716    }
2717
2718    #[test]
2719    fn builder_http_ca_without_key_returns_error() {
2720        let result = Sandbox::builder()
2721            .http_ca("/tmp/ca.pem")
2722            .build();
2723        assert!(result.is_err());
2724    }
2725
2726    #[test]
2727    fn builder_http_key_without_ca_returns_error() {
2728        let result = Sandbox::builder()
2729            .http_key("/tmp/key.pem")
2730            .build();
2731        assert!(result.is_err());
2732    }
2733
2734    #[test]
2735    fn builder_http_ca_and_key_together_ok() {
2736        let policy = Sandbox::builder()
2737            .http_ca("/tmp/ca.pem")
2738            .http_key("/tmp/key.pem")
2739            .build()
2740            .unwrap();
2741        assert!(policy.http_ca.is_some());
2742        assert!(policy.http_key.is_some());
2743    }
2744
2745    #[test]
2746    fn inject_ca_adds_443_and_requires_http_rule() {
2747        // No http rule -> error.
2748        let err = Sandbox::builder()
2749            .http_inject_ca("/etc/ssl/certs/ca-certificates.crt")
2750            .build();
2751        assert!(err.is_err());
2752
2753        // With an http rule -> ok, and 443 is intercepted.
2754        let policy = Sandbox::builder()
2755            .http_allow("GET example.com/*")
2756            .http_inject_ca("/etc/ssl/certs/ca-certificates.crt")
2757            .build()
2758            .unwrap();
2759        assert!(policy.http_ports.contains(&443));
2760        assert_eq!(policy.http_inject_ca.len(), 1);
2761    }
2762
2763    #[test]
2764    fn http_ca_out_requires_trigger() {
2765        let err = Sandbox::builder()
2766            .http_allow("GET example.com/*")
2767            .http_ca_out("/tmp/out.pem")
2768            .build();
2769        assert!(err.is_err());
2770
2771        let ok = Sandbox::builder()
2772            .http_allow("GET example.com/*")
2773            .http_inject_ca("/etc/ssl/certs/ca-certificates.crt")
2774            .http_ca_out("/tmp/out.pem")
2775            .build();
2776        assert!(ok.is_ok());
2777    }
2778
2779    #[test]
2780    fn allows_sysv_ipc_reads_extra_allow_syscalls() {
2781        let p = Sandbox::builder()
2782            .extra_allow_syscalls(vec!["sysv_ipc".into()])
2783            .build()
2784            .unwrap();
2785        assert!(p.allows_sysv_ipc());
2786
2787        let p2 = Sandbox::builder().build().unwrap();
2788        assert!(!p2.allows_sysv_ipc());
2789
2790        let p3 = Sandbox::builder()
2791            .extra_allow_syscalls(vec!["other_group".into()])
2792            .build()
2793            .unwrap();
2794        assert!(!p3.allows_sysv_ipc());
2795    }
2796
2797    #[test]
2798    fn builder_parses_net_deny() {
2799        let policy = Sandbox::builder()
2800            .net_deny("10.0.0.0/8")
2801            .build()
2802            .unwrap();
2803        assert_eq!(policy.net_deny.len(), 1);
2804    }
2805
2806    #[test]
2807    fn builder_net_allow_bind_comma_and_ranges() {
2808        // Comma-separated ports and `lo-hi` ranges expand, sort, and dedup.
2809        let policy = Sandbox::builder()
2810            .net_allow_bind("8080,9000-9002")
2811            .net_allow_bind_port(443)
2812            .net_allow_bind("9001,443") // overlaps dedup away
2813            .build()
2814            .unwrap();
2815        assert_eq!(policy.net_allow_bind, vec![443, 8080, 9000, 9001, 9002]);
2816    }
2817
2818    #[test]
2819    fn builder_net_allow_bind_rejects_bad_specs() {
2820        assert!(Sandbox::builder().net_allow_bind("9000-8000").build().is_err()); // reversed
2821        assert!(Sandbox::builder().net_allow_bind("80,abc").build().is_err());    // bad port
2822        assert!(Sandbox::builder().net_allow_bind("70000").build().is_err());     // > u16
2823        assert!(Sandbox::builder().net_allow_bind("8080,").build().is_err());     // empty part
2824    }
2825
2826    #[test]
2827    fn builder_rejects_net_allow_and_net_deny_together() {
2828        let err = Sandbox::builder()
2829            .net_allow("github.com:443")
2830            .net_deny("10.0.0.0/8")
2831            .build();
2832        assert!(err.is_err());
2833    }
2834
2835    #[test]
2836    fn builder_net_deny_bind_comma_and_ranges() {
2837        // Same port grammar as --net-allow-bind (comma lists + lo-hi ranges).
2838        let policy = Sandbox::builder()
2839            .net_deny_bind("8080,9000-9002")
2840            .net_deny_bind_port(443)
2841            .build()
2842            .unwrap();
2843        assert_eq!(policy.net_deny_bind, vec![443, 8080, 9000, 9001, 9002]);
2844        assert!(policy.net_allow_bind.is_empty());
2845    }
2846
2847    #[test]
2848    fn builder_rejects_allow_bind_and_deny_bind_together() {
2849        let err = Sandbox::builder()
2850            .net_allow_bind("8080")
2851            .net_deny_bind("9090")
2852            .build();
2853        assert!(err.is_err());
2854        assert!(format!("{}", err.unwrap_err()).contains("mutually exclusive"));
2855    }
2856
2857    #[test]
2858    fn builder_net_deny_rejects_hostname() {
2859        let err = Sandbox::builder().net_deny("evil.com:443").build();
2860        assert!(err.is_err());
2861    }
2862
2863    #[test]
2864    fn net_deny_resolves_to_denylist_policies() {
2865        let policy = Sandbox::builder().net_deny("10.0.0.0/8").build().unwrap();
2866        let set = crate::network::resolve_net_deny(&policy.net_deny);
2867        assert!(!set.tcp.allows("10.0.0.5".parse().unwrap(), 443));
2868        assert!(set.tcp.allows("8.8.8.8".parse().unwrap(), 443));
2869    }
2870
2871}