1use std::collections::HashMap;
2use std::os::fd::AsRawFd;
3use std::path::PathBuf;
4use std::sync::Arc;
5use std::time::SystemTime;
6
7use serde::{Deserialize, Serialize};
8use tokio::task::JoinHandle;
9
10use crate::context;
11use crate::error::SandboxError;
12pub use crate::http::{http_acl_check, normalize_path, prefix_or_exact_match, HttpRule};
13pub use crate::network::{NetAllow, Protocol};
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
17pub struct ByteSize(pub u64);
18
19impl ByteSize {
20 pub fn bytes(n: u64) -> Self {
21 ByteSize(n)
22 }
23
24 pub fn kib(n: u64) -> Self {
25 ByteSize(n * 1024)
26 }
27
28 pub fn mib(n: u64) -> Self {
29 ByteSize(n * 1024 * 1024)
30 }
31
32 pub fn gib(n: u64) -> Self {
33 ByteSize(n * 1024 * 1024 * 1024)
34 }
35
36 pub fn parse(s: &str) -> Result<Self, SandboxError> {
37 let s = s.trim();
38 if s.is_empty() {
39 return Err(SandboxError::Invalid("empty byte size string".into()));
40 }
41
42 let last = s.chars().last().unwrap();
44 if last.is_ascii_alphabetic() {
45 let (num_str, suffix) = s.split_at(s.len() - 1);
46 let n: u64 = num_str
47 .trim()
48 .parse()
49 .map_err(|_| SandboxError::Invalid(format!("invalid byte size: {}", s)))?;
50 match suffix.to_ascii_uppercase().as_str() {
51 "K" => Ok(ByteSize::kib(n)),
52 "M" => Ok(ByteSize::mib(n)),
53 "G" => Ok(ByteSize::gib(n)),
54 other => Err(SandboxError::Invalid(format!("unknown byte size suffix: {}", other))),
55 }
56 } else {
57 let n: u64 = s
58 .parse()
59 .map_err(|_| SandboxError::Invalid(format!("invalid byte size: {}", s)))?;
60 Ok(ByteSize(n))
61 }
62 }
63}
64
65#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
67pub struct Confinement {
68 pub fs_writable: Vec<PathBuf>,
69 pub fs_readable: Vec<PathBuf>,
70}
71
72impl Confinement {
73 pub fn builder() -> ConfinementBuilder {
74 ConfinementBuilder::default()
75 }
76}
77
78#[derive(Default)]
79pub struct ConfinementBuilder {
80 fs_writable: Vec<PathBuf>,
81 fs_readable: Vec<PathBuf>,
82}
83
84impl ConfinementBuilder {
85 pub fn fs_write(mut self, path: impl Into<PathBuf>) -> Self {
86 self.fs_writable.push(path.into());
87 self
88 }
89
90 pub fn fs_read(mut self, path: impl Into<PathBuf>) -> Self {
91 self.fs_readable.push(path.into());
92 self
93 }
94
95 pub fn build(self) -> Confinement {
96 Confinement {
97 fs_writable: self.fs_writable,
98 fs_readable: self.fs_readable,
99 }
100 }
101}
102
103impl TryFrom<&Sandbox> for Confinement {
104 type Error = SandboxError;
105
106 fn try_from(sandbox: &Sandbox) -> Result<Self, Self::Error> {
107 let mut unsupported = Vec::new();
108 if !sandbox.fs_denied.is_empty() { unsupported.push("fs_denied"); }
109 if !sandbox.extra_deny_syscalls.is_empty() { unsupported.push("extra_deny_syscalls"); }
110 if !sandbox.net_allow.is_empty() { unsupported.push("net_allow"); }
111 if !sandbox.net_bind.is_empty() { unsupported.push("net_bind"); }
112 if sandbox.allows_sysv_ipc() { unsupported.push("extra_allow_syscalls=[\"sysv_ipc\"]"); }
113 if !sandbox.http_allow.is_empty() { unsupported.push("http_allow"); }
114 if !sandbox.http_deny.is_empty() { unsupported.push("http_deny"); }
115 if !sandbox.http_ports.is_empty() { unsupported.push("http_ports"); }
116 if sandbox.http_ca.is_some() { unsupported.push("http_ca"); }
117 if sandbox.http_key.is_some() { unsupported.push("http_key"); }
118 if sandbox.max_memory.is_some() { unsupported.push("max_memory"); }
119 if sandbox.max_processes != 64 { unsupported.push("max_processes"); }
120 if sandbox.max_open_files.is_some() { unsupported.push("max_open_files"); }
121 if sandbox.max_cpu.is_some() { unsupported.push("max_cpu"); }
122 if sandbox.random_seed.is_some() { unsupported.push("random_seed"); }
123 if sandbox.time_start.is_some() { unsupported.push("time_start"); }
124 if sandbox.no_randomize_memory { unsupported.push("no_randomize_memory"); }
125 if sandbox.no_huge_pages { unsupported.push("no_huge_pages"); }
126 if sandbox.no_coredump { unsupported.push("no_coredump"); }
127 if sandbox.deterministic_dirs { unsupported.push("deterministic_dirs"); }
128 if sandbox.fs_isolation != FsIsolation::None { unsupported.push("fs_isolation"); }
129 if sandbox.workdir.is_some() { unsupported.push("workdir"); }
130 if sandbox.cwd.is_some() { unsupported.push("cwd"); }
131 if sandbox.fs_storage.is_some() { unsupported.push("fs_storage"); }
132 if sandbox.max_disk.is_some() { unsupported.push("max_disk"); }
133 if sandbox.on_exit != BranchAction::Commit { unsupported.push("on_exit"); }
134 if sandbox.on_error != BranchAction::Abort { unsupported.push("on_error"); }
135 if !sandbox.fs_mount.is_empty() { unsupported.push("fs_mount"); }
136 if sandbox.chroot.is_some() { unsupported.push("chroot"); }
137 if sandbox.clean_env { unsupported.push("clean_env"); }
138 if !sandbox.env.is_empty() { unsupported.push("env"); }
139 if sandbox.gpu_devices.is_some() { unsupported.push("gpu_devices"); }
140 if sandbox.cpu_cores.is_some() { unsupported.push("cpu_cores"); }
141 if sandbox.num_cpus.is_some() { unsupported.push("num_cpus"); }
142 if sandbox.port_remap { unsupported.push("port_remap"); }
143 if sandbox.uid.is_some() { unsupported.push("uid"); }
144 if sandbox.policy_fn.is_some() { unsupported.push("policy_fn"); }
145
146 if !unsupported.is_empty() {
147 return Err(SandboxError::UnsupportedForConfine(unsupported.join(", ")));
148 }
149
150 Ok(Self {
151 fs_writable: sandbox.fs_writable.clone(),
152 fs_readable: sandbox.fs_readable.clone(),
153 })
154 }
155}
156
157#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
159pub enum FsIsolation {
160 #[default]
161 None,
162 OverlayFs,
163 BranchFs,
164}
165
166#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
168pub enum BranchAction {
169 #[default]
170 Commit,
171 Abort,
172 Keep,
173}
174
175struct Runtime {
182 name: String,
183 state: RuntimeState,
184 child_pid: Option<i32>,
185 pidfd: Option<std::os::fd::OwnedFd>,
186 notif_handle: Option<JoinHandle<()>>,
187 throttle_handle: Option<JoinHandle<()>>,
188 loadavg_handle: Option<JoinHandle<()>>,
189 _stdout_read: Option<std::os::fd::OwnedFd>,
190 _stderr_read: Option<std::os::fd::OwnedFd>,
191 cow_branch: Option<Box<dyn crate::cow::CowBranch>>,
192 seccomp_cow: Option<crate::cow::seccomp::SeccompCowBranch>,
193 supervisor_resource: Option<Arc<tokio::sync::Mutex<crate::seccomp::state::ResourceState>>>,
194 supervisor_cow: Option<Arc<tokio::sync::Mutex<crate::seccomp::state::CowState>>>,
195 supervisor_network: Option<Arc<tokio::sync::Mutex<crate::seccomp::state::NetworkState>>>,
196 ctrl_fd: Option<std::os::fd::OwnedFd>,
197 stdout_pipe: Option<std::os::fd::OwnedFd>,
198 io_overrides: Option<(Option<i32>, Option<i32>, Option<i32>)>,
199 extra_fds: Vec<(i32, i32)>,
200 http_acl_handle: Option<crate::http_acl::HttpAclProxyHandle>,
201 #[allow(clippy::type_complexity)]
202 on_bind: Option<Box<dyn Fn(&HashMap<u16, u16>) + Send + Sync>>,
203 handlers: Vec<(i64, Arc<dyn crate::seccomp::dispatch::Handler>)>,
204 ready_w: Option<std::os::fd::OwnedFd>,
205}
206
207enum RuntimeState {
209 Created,
210 Running,
211 Paused,
212 Stopped(crate::result::ExitStatus),
213}
214
215#[derive(Serialize, Deserialize)]
217pub struct Sandbox {
218 pub fs_writable: Vec<PathBuf>,
220 pub fs_readable: Vec<PathBuf>,
221 pub fs_denied: Vec<PathBuf>,
222
223 pub extra_deny_syscalls: Vec<String>,
225 pub extra_allow_syscalls: Vec<String>,
226
227 pub net_allow: Vec<NetAllow>,
251 pub net_bind: Vec<u16>,
252 pub http_allow: Vec<HttpRule>,
254 pub http_deny: Vec<HttpRule>,
255 pub http_ports: Vec<u16>,
258 pub http_ca: Option<PathBuf>,
260 pub http_key: Option<PathBuf>,
262
263 pub max_memory: Option<ByteSize>,
265 pub max_processes: u32,
266 pub max_open_files: Option<u32>,
267 pub max_cpu: Option<u8>,
268
269 pub random_seed: Option<u64>,
271 pub time_start: Option<SystemTime>,
272 pub no_randomize_memory: bool,
273 pub no_huge_pages: bool,
274 pub no_coredump: bool,
275 pub deterministic_dirs: bool,
276
277 pub fs_isolation: FsIsolation,
279 pub workdir: Option<PathBuf>,
280 pub cwd: Option<PathBuf>,
281 pub fs_storage: Option<PathBuf>,
282 pub max_disk: Option<ByteSize>,
283 pub on_exit: BranchAction,
284 pub on_error: BranchAction,
285
286 pub fs_mount: Vec<(PathBuf, PathBuf)>,
288
289 pub chroot: Option<PathBuf>,
291 pub clean_env: bool,
292 pub env: HashMap<String, String>,
293 pub gpu_devices: Option<Vec<u32>>,
295
296 pub cpu_cores: Option<Vec<u32>>,
298 pub num_cpus: Option<u32>,
299 pub port_remap: bool,
300
301 pub uid: Option<u32>,
303
304 #[serde(skip)]
306 pub policy_fn: Option<crate::policy_fn::PolicyCallback>,
307
308 #[serde(skip)]
311 pub name: Option<String>,
312
313 #[serde(skip)]
316 init_fn: Option<Box<dyn FnOnce() + Send + 'static>>,
317
318 #[serde(skip)]
321 work_fn: Option<Arc<dyn Fn(u32) + Send + Sync + 'static>>,
322
323 #[serde(skip)]
325 runtime: Option<Box<Runtime>>,
326}
327
328impl std::fmt::Debug for Sandbox {
329 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
330 f.debug_struct("Sandbox")
331 .field("fs_readable", &self.fs_readable)
332 .field("fs_writable", &self.fs_writable)
333 .field("max_memory", &self.max_memory)
334 .field("max_processes", &self.max_processes)
335 .field("policy_fn", &self.policy_fn.as_ref().map(|_| "<callback>"))
336 .field("name", &self.name)
337 .field("runtime", &self.runtime.as_ref().map(|_| "<runtime>"))
338 .finish_non_exhaustive()
339 }
340}
341
342impl Clone for Sandbox {
343 fn clone(&self) -> Self {
354 Self {
355 fs_writable: self.fs_writable.clone(),
356 fs_readable: self.fs_readable.clone(),
357 fs_denied: self.fs_denied.clone(),
358 extra_deny_syscalls: self.extra_deny_syscalls.clone(),
359 extra_allow_syscalls: self.extra_allow_syscalls.clone(),
360 net_allow: self.net_allow.clone(),
361 net_bind: self.net_bind.clone(),
362 http_allow: self.http_allow.clone(),
363 http_deny: self.http_deny.clone(),
364 http_ports: self.http_ports.clone(),
365 http_ca: self.http_ca.clone(),
366 http_key: self.http_key.clone(),
367 max_memory: self.max_memory,
368 max_processes: self.max_processes,
369 max_open_files: self.max_open_files,
370 max_cpu: self.max_cpu,
371 random_seed: self.random_seed,
372 time_start: self.time_start,
373 no_randomize_memory: self.no_randomize_memory,
374 no_huge_pages: self.no_huge_pages,
375 no_coredump: self.no_coredump,
376 deterministic_dirs: self.deterministic_dirs,
377 fs_isolation: self.fs_isolation.clone(),
378 workdir: self.workdir.clone(),
379 cwd: self.cwd.clone(),
380 fs_storage: self.fs_storage.clone(),
381 max_disk: self.max_disk,
382 on_exit: self.on_exit.clone(),
383 on_error: self.on_error.clone(),
384 fs_mount: self.fs_mount.clone(),
385 chroot: self.chroot.clone(),
386 clean_env: self.clean_env,
387 env: self.env.clone(),
388 gpu_devices: self.gpu_devices.clone(),
389 cpu_cores: self.cpu_cores.clone(),
390 num_cpus: self.num_cpus,
391 port_remap: self.port_remap,
392 uid: self.uid,
393 policy_fn: self.policy_fn.clone(),
394 name: self.name.clone(),
395 init_fn: None,
398 work_fn: self.work_fn.clone(),
400 runtime: None,
402 }
403 }
404}
405
406impl Sandbox {
407 pub fn builder() -> SandboxBuilder {
408 SandboxBuilder::default()
409 }
410
411 pub fn allows_sysv_ipc(&self) -> bool {
413 self.extra_allow_syscalls.iter().any(|s| s == "sysv_ipc")
414 }
415
416 pub fn validate(&self) -> Result<(), SandboxError> {
423 if self.fs_isolation != FsIsolation::None && self.workdir.is_none() {
424 return Err(SandboxError::FsIsolationRequiresWorkdir);
425 }
426 Ok(())
427 }
428
429 fn rt(&self) -> &Runtime {
434 self.runtime.as_ref().expect("sandbox not started")
435 }
436
437 fn rt_mut(&mut self) -> &mut Runtime {
438 self.runtime.as_mut().expect("sandbox not started")
439 }
440
441 pub fn set_name(&mut self, name: impl Into<String>) {
448 self.name = Some(name.into());
449 }
450
451 pub fn with_name(mut self, name: impl Into<String>) -> Self {
461 self.name = Some(name.into());
462 self
463 }
464
465 pub fn with_init_fn(mut self, f: impl FnOnce() + Send + 'static) -> Self {
470 self.init_fn = Some(Box::new(f));
471 self
472 }
473
474 pub fn with_work_fn(mut self, f: impl Fn(u32) + Send + Sync + 'static) -> Self {
478 self.work_fn = Some(Arc::new(f));
479 self
480 }
481
482 pub fn instance_name(&self) -> Option<&str> {
484 self.runtime.as_ref().map(|r| r.name.as_str())
485 .or_else(|| self.name.as_deref())
486 }
487
488 pub fn pid(&self) -> Option<i32> {
490 self.runtime.as_ref().and_then(|r| r.child_pid)
491 }
492
493 pub fn is_running(&self) -> bool {
495 self.runtime.as_ref().map(|r| {
496 matches!(r.state, RuntimeState::Running | RuntimeState::Paused)
497 }).unwrap_or(false)
498 }
499
500 pub fn pause(&mut self) -> Result<(), crate::error::SandlockError> {
502 use crate::error::SandboxRuntimeError;
503 let pid = self.runtime.as_ref()
504 .and_then(|rt| rt.child_pid)
505 .ok_or(SandboxRuntimeError::NotRunning)?;
506 let ret = unsafe { libc::killpg(pid, libc::SIGSTOP) };
507 if ret < 0 {
508 return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
509 }
510 self.rt_mut().state = RuntimeState::Paused;
511 Ok(())
512 }
513
514 pub fn resume(&mut self) -> Result<(), crate::error::SandlockError> {
516 use crate::error::SandboxRuntimeError;
517 let pid = self.runtime.as_ref()
518 .and_then(|rt| rt.child_pid)
519 .ok_or(SandboxRuntimeError::NotRunning)?;
520 let ret = unsafe { libc::killpg(pid, libc::SIGCONT) };
521 if ret < 0 {
522 return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
523 }
524 self.rt_mut().state = RuntimeState::Running;
525 Ok(())
526 }
527
528 pub fn kill(&mut self) -> Result<(), crate::error::SandlockError> {
530 use crate::error::SandboxRuntimeError;
531 let pid = self.runtime.as_ref()
532 .and_then(|rt| rt.child_pid)
533 .ok_or(SandboxRuntimeError::NotRunning)?;
534 let ret = unsafe { libc::killpg(pid, libc::SIGKILL) };
535 if ret < 0 {
536 let err = std::io::Error::last_os_error();
537 if err.raw_os_error() != Some(libc::ESRCH) {
538 return Err(SandboxRuntimeError::Io(err).into());
539 }
540 }
541 Ok(())
542 }
543
544 pub fn set_on_bind(&mut self, cb: impl Fn(&HashMap<u16, u16>) + Send + Sync + 'static) {
546 let _ = self.ensure_runtime();
549 self.rt_mut().on_bind = Some(Box::new(cb));
550 }
551
552 pub async fn port_mappings(&self) -> HashMap<u16, u16> {
554 if let Some(ref rt) = self.runtime {
555 if let Some(ref net) = rt.supervisor_network {
556 let ns = net.lock().await;
557 return ns.port_map.virtual_to_real.clone();
558 }
559 }
560 HashMap::new()
561 }
562
563 pub async fn wait(&mut self) -> Result<crate::result::RunResult, crate::error::SandlockError> {
565 use crate::error::SandboxRuntimeError;
566 use crate::result::{ExitStatus, RunResult};
567
568 let pid = self.rt().child_pid.ok_or(SandboxRuntimeError::NotRunning)?;
569
570 if let RuntimeState::Stopped(ref es) = self.rt().state {
571 return Ok(RunResult {
572 exit_status: es.clone(),
573 stdout: None,
574 stderr: None,
575 });
576 }
577
578 let exit_status = tokio::task::spawn_blocking(move || -> ExitStatus {
579 let mut status: i32 = 0;
580 loop {
581 let ret = unsafe { libc::waitpid(pid, &mut status, 0) };
582 if ret < 0 {
583 let err = std::io::Error::last_os_error();
584 if err.raw_os_error() == Some(libc::EINTR) {
585 continue;
586 }
587 return ExitStatus::Killed;
588 }
589 break;
590 }
591 sandbox_wait_status_to_exit(status)
592 })
593 .await
594 .unwrap_or(ExitStatus::Killed);
595
596 self.rt_mut().state = RuntimeState::Stopped(exit_status.clone());
597
598 let rt = self.rt_mut();
599 if let Some(h) = rt.notif_handle.take() { h.abort(); }
600 if let Some(h) = rt.throttle_handle.take() { h.abort(); }
601 if let Some(h) = rt.loadavg_handle.take() { h.abort(); }
602
603 if let Some(ref cow_state) = self.rt().supervisor_cow.clone() {
604 let mut cow = cow_state.lock().await;
605 self.rt_mut().seccomp_cow = cow.branch.take();
606 }
607
608 let stdout = self.rt_mut()._stdout_read.take().map(sandbox_read_fd_to_end);
609 let stderr = self.rt_mut()._stderr_read.take().map(sandbox_read_fd_to_end);
610
611 Ok(RunResult { exit_status, stdout, stderr })
612 }
613
614 pub async fn create(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
620 self.do_create(cmd, true).await
621 }
622
623 pub async fn create_interactive(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
625 self.do_create(cmd, false).await
626 }
627
628 pub fn start(&mut self) -> Result<(), crate::error::SandlockError> {
632 self.do_start()
633 }
634
635 pub async fn spawn(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
641 self.create(cmd).await?;
642 self.start()?;
643 self.wait_until_exec().await
644 }
645
646 pub async fn spawn_interactive(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
648 self.create_interactive(cmd).await?;
649 self.start()?;
650 self.wait_until_exec().await
651 }
652
653 async fn wait_until_exec(&self) -> Result<(), crate::error::SandlockError> {
658 use crate::error::SandboxRuntimeError;
659 let pid = self.pid().ok_or(SandboxRuntimeError::NotRunning)?;
660 let Some(our_exe) = std::fs::read_link("/proc/self/exe").ok() else {
661 return Ok(());
662 };
663 let child_link = format!("/proc/{}/exe", pid);
664 let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
665 loop {
666 if let Ok(child_exe) = std::fs::read_link(&child_link) {
667 if child_exe != our_exe {
668 return Ok(());
669 }
670 }
671 if std::time::Instant::now() >= deadline {
672 return Err(SandboxRuntimeError::Child(
673 "child did not exec() within 5s".into(),
674 ).into());
675 }
676 tokio::time::sleep(std::time::Duration::from_millis(1)).await;
677 }
678 }
679
680 #[doc(hidden)]
683 pub async fn create_with_io(
684 &mut self,
685 cmd: &[&str],
686 stdin_fd: Option<std::os::unix::io::RawFd>,
687 stdout_fd: Option<std::os::unix::io::RawFd>,
688 stderr_fd: Option<std::os::unix::io::RawFd>,
689 ) -> Result<(), crate::error::SandlockError> {
690 self.ensure_runtime()?;
691 self.rt_mut().io_overrides = Some((stdin_fd, stdout_fd, stderr_fd));
692 self.do_create(cmd, false).await
693 }
694
695 #[doc(hidden)]
697 pub async fn create_with_gather_io(
698 &mut self,
699 cmd: &[&str],
700 stdin_fd: Option<std::os::unix::io::RawFd>,
701 stdout_fd: Option<std::os::unix::io::RawFd>,
702 stderr_fd: Option<std::os::unix::io::RawFd>,
703 extra_fds: Vec<(i32, i32)>,
704 ) -> Result<(), crate::error::SandlockError> {
705 self.ensure_runtime()?;
706 self.rt_mut().io_overrides = Some((stdin_fd, stdout_fd, stderr_fd));
707 self.rt_mut().extra_fds = extra_fds;
708 self.do_create(cmd, false).await
709 }
710
711 #[doc(hidden)]
713 pub async fn commit(&mut self) -> Result<(), crate::error::SandlockError> {
714 use crate::error::{SandboxRuntimeError, SandlockError};
715 if let Some(ref mut rt) = self.runtime {
716 if let Some(branch) = rt.cow_branch.take() {
717 branch.commit().map_err(|e| SandlockError::Runtime(SandboxRuntimeError::Branch(e)))?;
718 }
719 }
720 Ok(())
721 }
722
723 #[doc(hidden)]
725 pub async fn abort_branch(&mut self) -> Result<(), crate::error::SandlockError> {
726 use crate::error::{SandboxRuntimeError, SandlockError};
727 if let Some(ref mut rt) = self.runtime {
728 if let Some(branch) = rt.cow_branch.take() {
729 branch.abort().map_err(|e| SandlockError::Runtime(SandboxRuntimeError::Branch(e)))?;
730 }
731 }
732 Ok(())
733 }
734
735 pub(crate) async fn freeze(&self) -> Result<(), crate::error::SandlockError> {
737 use crate::error::{SandboxRuntimeError, SandlockError};
738 let rt = self.runtime.as_ref().ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
739 let pid = rt.child_pid.ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
740 if let Some(ref resource) = rt.supervisor_resource {
741 let mut rs = resource.lock().await;
742 rs.hold_forks = true;
743 }
744 unsafe { libc::killpg(pid, libc::SIGSTOP); }
745 Ok(())
746 }
747
748 pub(crate) async fn thaw(&self) -> Result<(), crate::error::SandlockError> {
750 use crate::error::{SandboxRuntimeError, SandlockError};
751 let rt = self.runtime.as_ref().ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
752 let pid = rt.child_pid.ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
753 if let Some(ref resource) = rt.supervisor_resource {
754 let mut rs = resource.lock().await;
755 rs.hold_forks = false;
756 rs.held_notif_ids.clear();
757 }
758 unsafe { libc::killpg(pid, libc::SIGCONT); }
759 Ok(())
760 }
761
762 pub async fn checkpoint(&self) -> Result<crate::checkpoint::Checkpoint, crate::error::SandlockError> {
764 use crate::error::{SandboxRuntimeError, SandlockError};
765 let pid = self.runtime.as_ref()
766 .and_then(|rt| rt.child_pid)
767 .ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
768 self.freeze().await?;
769 let cp = crate::checkpoint::capture(pid, self);
770 self.thaw().await?;
771 cp
772 }
773
774 pub async fn run(
789 &mut self,
790 cmd: &[&str],
791 ) -> Result<crate::result::RunResult, crate::error::SandlockError> {
792 self.do_create(cmd, true).await?;
793 self.do_start()?;
794 self.wait().await
795 }
796
797 pub async fn run_interactive(
799 &mut self,
800 cmd: &[&str],
801 ) -> Result<crate::result::RunResult, crate::error::SandlockError> {
802 self.do_create(cmd, false).await?;
803 self.do_start()?;
804 self.wait().await
805 }
806
807 pub async fn run_with_handlers<I, S, H>(
809 &mut self,
810 cmd: &[&str],
811 handlers: I,
812 ) -> Result<crate::result::RunResult, crate::error::SandlockError>
813 where
814 I: IntoIterator<Item = (S, H)>,
815 S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
816 H: crate::seccomp::dispatch::Handler,
817 {
818 let pending = sandbox_collect_handlers(handlers, self)?;
819 self.ensure_runtime()?;
820 self.rt_mut().handlers = pending;
821 self.do_create(cmd, true).await?;
822 self.do_start()?;
823 self.wait().await
824 }
825
826 pub async fn run_interactive_with_handlers<I, S, H>(
828 &mut self,
829 cmd: &[&str],
830 handlers: I,
831 ) -> Result<crate::result::RunResult, crate::error::SandlockError>
832 where
833 I: IntoIterator<Item = (S, H)>,
834 S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
835 H: crate::seccomp::dispatch::Handler,
836 {
837 let pending = sandbox_collect_handlers(handlers, self)?;
838 self.ensure_runtime()?;
839 self.rt_mut().handlers = pending;
840 self.do_create(cmd, false).await?;
841 self.do_start()?;
842 self.wait().await
843 }
844
845 pub async fn dry_run(
847 &mut self,
848 cmd: &[&str],
849 ) -> Result<crate::dry_run::DryRunResult, crate::error::SandlockError> {
850 self.on_exit = BranchAction::Keep;
851 self.on_error = BranchAction::Keep;
852 self.do_create(cmd, true).await?;
853 self.do_start()?;
854 let run_result = self.wait().await?;
855 let changes = self.collect_changes().await;
856 self.do_abort().await;
857 Ok(crate::dry_run::DryRunResult { run_result, changes })
858 }
859
860 pub async fn dry_run_interactive(
862 &mut self,
863 cmd: &[&str],
864 ) -> Result<crate::dry_run::DryRunResult, crate::error::SandlockError> {
865 self.on_exit = BranchAction::Keep;
866 self.on_error = BranchAction::Keep;
867 self.do_create(cmd, false).await?;
868 self.do_start()?;
869 let run_result = self.wait().await?;
870 let changes = self.collect_changes().await;
871 self.do_abort().await;
872 Ok(crate::dry_run::DryRunResult { run_result, changes })
873 }
874
875 pub async fn fork(&mut self, n: u32) -> Result<Vec<Sandbox>, crate::error::SandlockError> {
881 use crate::error::SandboxRuntimeError;
882 use std::os::fd::{FromRawFd, OwnedFd};
883
884 let init_fn = self.init_fn.take()
887 .ok_or_else(|| SandboxRuntimeError::Child("fork() requires init_fn and work_fn — use SandboxBuilder::init_fn() / work_fn() or Sandbox::with_init_fn() / with_work_fn()".into()))?;
888 let work_fn = self.work_fn.take()
889 .ok_or_else(|| SandboxRuntimeError::Child("fork() requires init_fn and work_fn — use SandboxBuilder::init_fn() / work_fn() or Sandbox::with_init_fn() / with_work_fn()".into()))?;
890
891 self.ensure_runtime()?;
893
894 let sandbox_cfg = self.clone(); let mut ctrl_fds = [0i32; 2];
897 if unsafe { libc::pipe2(ctrl_fds.as_mut_ptr(), 0) } < 0 {
898 return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
899 }
900 let ctrl_parent = unsafe { OwnedFd::from_raw_fd(ctrl_fds[0]) };
901 let ctrl_child_fd = ctrl_fds[1];
902
903 let mut pipe_read_ends: Vec<OwnedFd> = Vec::with_capacity(n as usize);
904 let mut pipe_write_fds: Vec<i32> = Vec::with_capacity(n as usize);
905 for _ in 0..n {
906 let mut pfds = [0i32; 2];
907 if unsafe { libc::pipe(pfds.as_mut_ptr()) } >= 0 {
908 pipe_read_ends.push(unsafe { OwnedFd::from_raw_fd(pfds[0]) });
909 pipe_write_fds.push(pfds[1]);
910 } else {
911 pipe_write_fds.push(-1);
912 }
913 }
914
915 let pid = unsafe { libc::fork() };
916 if pid < 0 {
917 unsafe { libc::close(ctrl_child_fd) };
918 return Err(SandboxRuntimeError::Fork(std::io::Error::last_os_error()).into());
919 }
920
921 if pid == 0 {
922 drop(ctrl_parent);
923 unsafe { libc::setpgid(0, 0) };
924 unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL) };
925 unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
926
927 let _ = crate::landlock::confine(&sandbox_cfg);
928
929 let deny = crate::context::blocklist_syscall_numbers(&sandbox_cfg);
930 let args = crate::context::arg_filters(&sandbox_cfg);
931 let filter = match crate::seccomp::bpf::assemble_filter(&[], &deny, &args) {
932 Ok(f) => f,
933 Err(_) => unsafe { libc::_exit(1) },
934 };
935 let _ = crate::seccomp::bpf::install_deny_filter(&filter);
936
937 crate::process::CONFINED.store(true, std::sync::atomic::Ordering::Relaxed);
938
939 init_fn();
940
941 drop(pipe_read_ends);
942 crate::fork::fork_ready_loop_fn(ctrl_child_fd, n, &*work_fn, &pipe_write_fds);
943 unsafe { libc::_exit(0) };
944 }
945
946 unsafe { libc::close(ctrl_child_fd) };
947 for wfd in &pipe_write_fds {
948 if *wfd >= 0 { unsafe { libc::close(*wfd) }; }
949 }
950 self.rt_mut().child_pid = Some(pid);
951 self.rt_mut().state = RuntimeState::Running;
952
953 let ctrl_fd = ctrl_parent.as_raw_fd();
954 let mut pid_buf = vec![0u8; n as usize * 4];
955 sandbox_read_exact(ctrl_fd, &mut pid_buf);
956
957 let clone_pids: Vec<i32> = pid_buf.chunks(4)
958 .map(|c| u32::from_be_bytes(c.try_into().unwrap_or([0; 4])) as i32)
959 .collect();
960 let live_count = clone_pids.iter().filter(|&&p| p > 0).count();
961
962 let mut code_buf = vec![0u8; live_count * 4];
963 sandbox_read_exact(ctrl_fd, &mut code_buf);
964 self.rt_mut().ctrl_fd = Some(ctrl_parent);
965
966 let mut status = 0i32;
967 unsafe { libc::waitpid(pid, &mut status, 0) };
968
969 let mut code_idx = 0;
970 let mut clones = Vec::with_capacity(live_count);
971 let mut pipe_iter = pipe_read_ends.into_iter();
972
973 let rt_name = self.rt().name.clone();
974 for &clone_pid in &clone_pids {
975 let pipe = pipe_iter.next();
976 if clone_pid <= 0 { continue; }
977
978 let code = i32::from_be_bytes(
979 code_buf[code_idx * 4..(code_idx + 1) * 4].try_into().unwrap_or([0; 4])
980 );
981 code_idx += 1;
982
983 let mut clone_sb = sandbox_cfg.clone();
984 let clone_name = format!("{}-fork-{}", rt_name, clone_pid);
985 clone_sb.runtime = Some(Box::new(Runtime {
986 name: clone_name,
987 state: RuntimeState::Stopped(if code == 0 {
988 crate::result::ExitStatus::Code(0)
989 } else if code > 0 {
990 crate::result::ExitStatus::Code(code)
991 } else {
992 crate::result::ExitStatus::Killed
993 }),
994 child_pid: Some(clone_pid),
995 pidfd: None,
996 notif_handle: None,
997 throttle_handle: None,
998 loadavg_handle: None,
999 _stdout_read: None,
1000 _stderr_read: None,
1001 cow_branch: None,
1002 seccomp_cow: None,
1003 supervisor_resource: None,
1004 supervisor_cow: None,
1005 supervisor_network: None,
1006 ctrl_fd: None,
1007 stdout_pipe: pipe,
1008 io_overrides: None,
1009 extra_fds: Vec::new(),
1010 http_acl_handle: None,
1011 on_bind: None,
1012 handlers: Vec::new(),
1013 ready_w: None,
1014 }));
1015 clones.push(clone_sb);
1016 }
1017
1018 Ok(clones)
1019 }
1020
1021 pub async fn reduce(
1023 &self,
1024 cmd: &[&str],
1025 clones: &mut [Sandbox],
1026 ) -> Result<crate::result::RunResult, crate::error::SandlockError> {
1027 use crate::error::SandboxRuntimeError;
1028
1029 let mut combined = Vec::new();
1030 for clone in clones.iter_mut() {
1031 if let Some(ref mut rt) = clone.runtime {
1032 if let Some(pipe) = rt.stdout_pipe.take() {
1033 combined.extend_from_slice(&sandbox_read_fd_to_end(pipe));
1034 }
1035 }
1036 }
1037
1038 let mut stdin_fds = [0i32; 2];
1039 if unsafe { libc::pipe2(stdin_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
1040 return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
1041 }
1042
1043 let write_fd = stdin_fds[1];
1044 let write_handle = tokio::task::spawn_blocking(move || {
1045 unsafe {
1046 libc::write(write_fd, combined.as_ptr() as *const _, combined.len());
1047 libc::close(write_fd);
1048 }
1049 });
1050
1051 let base_name = self.instance_name()
1052 .unwrap_or("sandbox")
1053 .to_owned();
1054 let reducer_name = base_name + "-reduce";
1055 let mut reducer = self.clone().with_name(reducer_name);
1056 reducer.ensure_runtime()?;
1057 reducer.rt_mut().io_overrides = Some((Some(stdin_fds[0]), None, None));
1058 reducer.do_create(cmd, true).await?;
1059 reducer.do_start()?;
1060 unsafe { libc::close(stdin_fds[0]) };
1061
1062 let _ = write_handle.await;
1063 reducer.wait().await
1064 }
1065
1066 fn ensure_runtime(&mut self) -> Result<(), crate::error::SandlockError> {
1072 if self.runtime.is_some() {
1073 return Ok(());
1074 }
1075 let name = sandbox_resolve_name(self.name.as_deref())?;
1076 self.runtime = Some(Box::new(Runtime {
1077 name,
1078 state: RuntimeState::Created,
1079 child_pid: None,
1080 pidfd: None,
1081 notif_handle: None,
1082 throttle_handle: None,
1083 loadavg_handle: None,
1084 _stdout_read: None,
1085 _stderr_read: None,
1086 cow_branch: None,
1087 seccomp_cow: None,
1088 supervisor_resource: None,
1089 supervisor_cow: None,
1090 supervisor_network: None,
1091 ctrl_fd: None,
1092 stdout_pipe: None,
1093 io_overrides: None,
1094 extra_fds: Vec::new(),
1095 http_acl_handle: None,
1096 on_bind: None,
1097 handlers: Vec::new(),
1098 ready_w: None,
1099 }));
1100 Ok(())
1101 }
1102
1103 async fn collect_changes(&self) -> Vec<crate::dry_run::Change> {
1108 if let Some(ref rt) = self.runtime {
1109 if let Some(ref branch) = rt.cow_branch {
1110 return branch.changes().unwrap_or_default();
1111 }
1112 if let Some(ref cow) = rt.seccomp_cow {
1113 return cow.changes().unwrap_or_default();
1114 }
1115 }
1116 Vec::new()
1117 }
1118
1119 async fn do_abort(&mut self) {
1120 if let Some(ref mut rt) = self.runtime {
1121 if let Some(branch) = rt.cow_branch.take() {
1122 let _ = branch.abort();
1123 }
1124 if let Some(ref mut cow) = rt.seccomp_cow {
1125 let _ = cow.abort();
1126 }
1127 }
1128 }
1129
1130 async fn do_create(&mut self, cmd: &[&str], capture: bool) -> Result<(), crate::error::SandlockError> {
1136 use std::ffi::CString;
1137 use std::os::fd::{AsRawFd, FromRawFd, OwnedFd};
1138 use crate::error::SandboxRuntimeError;
1139 use crate::context::{PipePair, read_u32_fd};
1140 use crate::cow::{CowBranch, overlayfs::OverlayBranch, branchfs::BranchFsBranch};
1141 use crate::network;
1142 use crate::seccomp::ctx::SupervisorCtx;
1143 use crate::seccomp::notif::{self, NotifPolicy};
1144 use crate::seccomp::state::{ChrootState, CowState, NetworkState, PolicyFnState, ProcfsState, ResourceState, TimeRandomState};
1145 use crate::sys::syscall;
1146 use std::time::Duration;
1147
1148 self.ensure_runtime()?;
1149
1150 if !matches!(self.rt().state, RuntimeState::Created) {
1151 return Err(SandboxRuntimeError::Child("sandbox already spawned".into()).into());
1152 }
1153
1154 if cmd.is_empty() {
1155 return Err(SandboxRuntimeError::Child("empty command".into()).into());
1156 }
1157
1158 let c_cmd: Vec<CString> = cmd
1159 .iter()
1160 .map(|s| CString::new(*s).map_err(|_| SandboxRuntimeError::Child("invalid command string".into())))
1161 .collect::<Result<Vec<_>, _>>()?;
1162
1163 let nested = crate::process::is_nested();
1164
1165 let pipes = PipePair::new().map_err(SandboxRuntimeError::Io)?;
1166
1167 let resolved_net_allow = network::resolve_net_allow(&self.net_allow)
1168 .await
1169 .map_err(SandboxRuntimeError::Io)?;
1170 let virtual_etc_hosts = resolved_net_allow.etc_hosts.clone();
1171
1172 if !self.http_allow.is_empty() || !self.http_deny.is_empty() {
1173 let handle = crate::http_acl::spawn_http_acl_proxy(
1174 self.http_allow.clone(),
1175 self.http_deny.clone(),
1176 self.http_ca.as_deref(),
1177 self.http_key.as_deref(),
1178 ).await.map_err(SandboxRuntimeError::Io)?;
1179 self.rt_mut().http_acl_handle = Some(handle);
1180 }
1181
1182 let cow_branch: Option<Box<dyn CowBranch>> = match self.fs_isolation {
1183 FsIsolation::OverlayFs => {
1184 let workdir = self.workdir.as_ref()
1185 .ok_or_else(|| crate::error::SandlockError::Runtime(SandboxRuntimeError::Child("OverlayFs requires workdir".into())))?;
1186 let storage = self.fs_storage.as_ref()
1187 .cloned()
1188 .unwrap_or_else(|| std::env::temp_dir().join("sandlock-overlay"));
1189 std::fs::create_dir_all(&storage)
1190 .map_err(|e| crate::error::SandlockError::Runtime(SandboxRuntimeError::Io(e)))?;
1191 let branch = OverlayBranch::create(workdir, &storage)
1192 .map_err(|e| crate::error::SandlockError::Runtime(SandboxRuntimeError::Branch(e)))?;
1193 Some(Box::new(branch))
1194 }
1195 FsIsolation::BranchFs => {
1196 let workdir = self.workdir.as_ref()
1197 .ok_or_else(|| crate::error::SandlockError::Runtime(SandboxRuntimeError::Child("BranchFs requires workdir".into())))?;
1198 let branch = BranchFsBranch::create(workdir)
1199 .map_err(|e| crate::error::SandlockError::Runtime(SandboxRuntimeError::Branch(e)))?;
1200 Some(Box::new(branch))
1201 }
1202 FsIsolation::None => None,
1203 };
1204
1205 let cow_config = cow_branch.as_ref().and_then(|b| b.child_mount_config());
1206
1207 let seccomp_cow_branch = if !nested && self.workdir.is_some() && self.fs_isolation == FsIsolation::None {
1213 let workdir = self.workdir.as_ref().unwrap().clone();
1214 let storage = self.fs_storage.clone();
1215 let max_disk = self.max_disk.map(|b| b.0).unwrap_or(0);
1216 match crate::cow::seccomp::SeccompCowBranch::create(&workdir, storage.as_deref(), max_disk) {
1217 Ok(branch) => {
1218 self.fs_readable.push(branch.upper_dir().to_path_buf());
1219 Some(branch)
1220 }
1221 Err(e) => {
1222 eprintln!("sandlock: seccomp COW branch creation failed: {}", e);
1223 None
1224 }
1225 }
1226 } else {
1227 None
1228 };
1229
1230 let (stdout_r, stderr_r) = if capture {
1231 let mut stdout_fds = [0i32; 2];
1232 let mut stderr_fds = [0i32; 2];
1233 if unsafe { libc::pipe2(stdout_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
1234 return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
1235 }
1236 if unsafe { libc::pipe2(stderr_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
1237 unsafe {
1238 libc::close(stdout_fds[0]);
1239 libc::close(stdout_fds[1]);
1240 }
1241 return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
1242 }
1243 (
1244 Some((
1245 unsafe { OwnedFd::from_raw_fd(stdout_fds[0]) },
1246 unsafe { OwnedFd::from_raw_fd(stdout_fds[1]) },
1247 )),
1248 Some((
1249 unsafe { OwnedFd::from_raw_fd(stderr_fds[0]) },
1250 unsafe { OwnedFd::from_raw_fd(stderr_fds[1]) },
1251 )),
1252 )
1253 } else {
1254 (None, None)
1255 };
1256
1257 let parent_pid = unsafe { libc::getpid() };
1260
1261 let pid = unsafe { libc::fork() };
1262 if pid < 0 {
1263 return Err(SandboxRuntimeError::Fork(std::io::Error::last_os_error()).into());
1264 }
1265
1266 if pid == 0 {
1267 let io_overrides = self.rt().io_overrides;
1269 if let Some((stdin_fd, stdout_fd, stderr_fd)) = io_overrides {
1270 if let Some(fd) = stdin_fd { unsafe { libc::dup2(fd, 0) }; }
1271 if let Some(fd) = stdout_fd { unsafe { libc::dup2(fd, 1) }; }
1272 if let Some(fd) = stderr_fd { unsafe { libc::dup2(fd, 2) }; }
1273 }
1274
1275 let extra_fds_copy = self.rt().extra_fds.clone();
1276 for &(target_fd, source_fd) in &extra_fds_copy {
1277 unsafe { libc::dup2(source_fd, target_fd) };
1278 }
1279
1280 if let Some((_, ref stdout_w)) = stdout_r {
1281 unsafe { libc::dup2(stdout_w.as_raw_fd(), 1) };
1282 }
1283 if let Some((_, ref stderr_w)) = stderr_r {
1284 unsafe { libc::dup2(stderr_w.as_raw_fd(), 2) };
1285 }
1286 drop(stdout_r);
1287 drop(stderr_r);
1288
1289 let gather_keep_fds: Vec<i32> = extra_fds_copy.iter().map(|&(target, _)| target).collect();
1290
1291 let extra_syscalls: Vec<u32> = self.rt().handlers
1292 .iter()
1293 .map(|h| h.0 as u32)
1294 .collect();
1295
1296 let sandbox_name = self.rt().name.clone();
1297 context::confine_child(context::ChildSpawnArgs {
1298 sandbox: self,
1299 cmd: &c_cmd,
1300 pipes: &pipes,
1301 cow_config: cow_config.as_ref(),
1302 nested,
1303 keep_fds: &gather_keep_fds,
1304 sandbox_name: Some(sandbox_name.as_str()),
1305 extra_syscalls: &extra_syscalls,
1306 parent_pid,
1307 });
1308 }
1309
1310 self.rt_mut().cow_branch = cow_branch;
1312
1313 drop(pipes.notif_w);
1314 drop(pipes.ready_r);
1315
1316 self.rt_mut()._stdout_read = stdout_r.map(|(r, _w)| r);
1317 self.rt_mut()._stderr_read = stderr_r.map(|(r, _w)| r);
1318
1319 self.rt_mut().child_pid = Some(pid);
1320 let pidfd = match syscall::pidfd_open(pid as u32, 0) {
1324 Ok(fd) => Some(fd),
1325 Err(_) => None,
1326 };
1327
1328 let notif_fd_num = read_u32_fd(pipes.notif_r.as_raw_fd())
1329 .map_err(|e| SandboxRuntimeError::Child(format!("read notif fd from child: {}", e)))?;
1330
1331 let is_nested_mode = notif_fd_num == 0;
1332
1333 let notif_fd = if is_nested_mode {
1334 None
1335 } else if let Some(ref pfd) = pidfd {
1336 Some(syscall::pidfd_getfd(pfd, notif_fd_num as i32, 0)
1337 .map_err(|e| SandboxRuntimeError::Child(format!("pidfd_getfd: {}", e)))?)
1338 } else {
1339 let path = format!("/proc/{}/fd/{}", pid, notif_fd_num);
1340 let cpath = CString::new(path).unwrap();
1341 let raw = unsafe { libc::open(cpath.as_ptr(), libc::O_RDWR) };
1342 if raw < 0 {
1343 return Err(SandboxRuntimeError::Child("failed to open notif fd from /proc".into()).into());
1344 }
1345 Some(unsafe { OwnedFd::from_raw_fd(raw) })
1346 };
1347
1348 if let Some(notif_fd) = notif_fd {
1349 if self.time_start.is_some() || self.random_seed.is_some() {
1350 let time_offset = self.time_start.map(|t| crate::time::calculate_time_offset(t));
1351 if let Err(e) = crate::vdso::patch(pid, time_offset, self.random_seed.is_some()) {
1352 eprintln!("sandlock: pre-exec vDSO patching failed (will retry after exec): {}", e);
1353 }
1354 }
1355
1356 let time_offset_val = self.time_start
1357 .map(|t| crate::time::calculate_time_offset(t))
1358 .unwrap_or(0);
1359
1360 let rt_name = self.rt().name.clone();
1361 let notif_policy = NotifPolicy {
1362 max_memory_bytes: self.max_memory.map(|m| m.0).unwrap_or(0),
1363 max_processes: self.max_processes,
1364 has_memory_limit: self.max_memory.is_some(),
1365 has_net_allowlist: !self.net_allow.is_empty()
1366 || self.policy_fn.is_some()
1367 || !self.http_allow.is_empty()
1368 || !self.http_deny.is_empty(),
1369 has_random_seed: self.random_seed.is_some(),
1370 has_time_start: self.time_start.is_some(),
1371 argv_safety_required: self.policy_fn.is_some()
1372 || self.rt().handlers.iter().any(|h| {
1373 h.0 == libc::SYS_execve || h.0 == libc::SYS_execveat
1374 }),
1375 time_offset: time_offset_val,
1376 num_cpus: self.num_cpus,
1377 port_remap: self.port_remap,
1378 cow_enabled: self.workdir.is_some() && self.fs_isolation == FsIsolation::None,
1379 chroot_root: self.chroot.as_ref().and_then(|p| std::fs::canonicalize(p).ok()),
1380 chroot_readable: self.fs_readable.clone(),
1381 chroot_writable: self.fs_writable.clone(),
1382 chroot_denied: self.fs_denied.clone(),
1383 chroot_mounts: self.fs_mount.iter().map(|(vp, hp)| {
1384 (vp.clone(), std::fs::canonicalize(hp).unwrap_or_else(|_| hp.clone()))
1385 }).collect(),
1386 deterministic_dirs: self.deterministic_dirs,
1387 virtual_hostname: Some(rt_name),
1388 has_http_acl: !self.http_allow.is_empty() || !self.http_deny.is_empty(),
1389 virtual_etc_hosts,
1390 };
1391
1392 use rand::SeedableRng;
1393 use rand_chacha::ChaCha8Rng;
1394
1395 let random_state = self.random_seed.map(|seed| ChaCha8Rng::seed_from_u64(seed));
1396 let time_offset = self.time_start.map(|t| crate::time::calculate_time_offset(t));
1397
1398 let time_random_state = TimeRandomState::new(time_offset, random_state);
1399
1400 let mut net_state = NetworkState::new();
1401 let no_rules = self.net_allow.is_empty();
1402 let policy_from = |resolved: &network::ResolvedNetAllow| {
1403 if no_rules || resolved.any_ip_all_ports {
1404 crate::seccomp::notif::NetworkPolicy::Unrestricted
1405 } else {
1406 use crate::seccomp::notif::PortAllow;
1407 let per_ip = resolved
1408 .per_ip
1409 .iter()
1410 .map(|(ip, ports)| {
1411 let allow = if resolved.per_ip_all_ports.contains(ip) {
1412 PortAllow::Any
1413 } else {
1414 PortAllow::Specific(ports.clone())
1415 };
1416 (*ip, allow)
1417 })
1418 .collect();
1419 crate::seccomp::notif::NetworkPolicy::AllowList {
1420 per_ip,
1421 any_ip_ports: resolved.any_ip_ports.clone(),
1422 }
1423 }
1424 };
1425 net_state.tcp_policy = policy_from(&resolved_net_allow.tcp);
1426 net_state.udp_policy = policy_from(&resolved_net_allow.udp);
1427 net_state.icmp_policy = policy_from(&resolved_net_allow.icmp);
1428 net_state.http_acl_addr = self.rt().http_acl_handle.as_ref().map(|h| h.addr);
1429 net_state.http_acl_ports = self.http_ports.iter().copied().collect();
1430 net_state.http_acl_orig_dest = self.rt().http_acl_handle.as_ref().map(|h| h.orig_dest.clone());
1431 if let Some(cb) = self.rt_mut().on_bind.take() {
1432 net_state.port_map.on_bind = Some(cb);
1433 }
1434
1435 let procfs_state = ProcfsState::new();
1436
1437 let mut res_state = ResourceState::new(
1438 notif_policy.max_memory_bytes,
1439 notif_policy.max_processes,
1440 );
1441 res_state.proc_count = 1;
1442
1443 let mut cow_state = CowState::new();
1444 cow_state.branch = seccomp_cow_branch;
1445
1446 let mut policy_fn_state = PolicyFnState::new();
1447
1448 if let Ok(mut denied) = policy_fn_state.denied_paths.write() {
1449 for path in &self.fs_denied {
1450 denied.insert(path.to_string_lossy().into_owned());
1451 }
1452 }
1453
1454 if let Some(ref callback) = self.policy_fn {
1455 let mut allowed_ips: std::collections::HashSet<std::net::IpAddr> =
1456 std::collections::HashSet::new();
1457 for p in [&net_state.tcp_policy, &net_state.udp_policy, &net_state.icmp_policy] {
1458 if let crate::seccomp::notif::NetworkPolicy::AllowList { per_ip, .. } = p {
1459 allowed_ips.extend(per_ip.keys().copied());
1460 }
1461 }
1462 let live = crate::policy_fn::LivePolicy {
1463 allowed_ips,
1464 max_memory_bytes: notif_policy.max_memory_bytes,
1465 max_processes: notif_policy.max_processes,
1466 };
1467 let ceiling = live.clone();
1468 let live = std::sync::Arc::new(std::sync::RwLock::new(live));
1469 let denied_paths = policy_fn_state.denied_paths.clone();
1470 let pid_overrides = net_state.pid_ip_overrides.clone();
1471 policy_fn_state.live_policy = Some(live.clone());
1472 let tx = crate::policy_fn::spawn_policy_fn(
1473 callback.clone(), live, ceiling, pid_overrides, denied_paths,
1474 );
1475 policy_fn_state.event_tx = Some(tx);
1476 }
1477
1478 let chroot_state = ChrootState::new();
1479
1480 let notif_raw_fd = notif_fd.as_raw_fd();
1481 let child_pidfd_raw = pidfd.as_ref().map(|pfd| pfd.as_raw_fd());
1482
1483 let res_state = Arc::new(tokio::sync::Mutex::new(res_state));
1484 self.rt_mut().supervisor_resource = Some(Arc::clone(&res_state));
1485
1486 let cow_state = Arc::new(tokio::sync::Mutex::new(cow_state));
1487 self.rt_mut().supervisor_cow = Some(Arc::clone(&cow_state));
1488
1489 let net_state = Arc::new(tokio::sync::Mutex::new(net_state));
1490 self.rt_mut().supervisor_network = Some(Arc::clone(&net_state));
1491
1492 let procfs_state = Arc::new(tokio::sync::Mutex::new(procfs_state));
1493 let time_random_state = Arc::new(tokio::sync::Mutex::new(time_random_state));
1494 let policy_fn_state = Arc::new(tokio::sync::Mutex::new(policy_fn_state));
1495 let chroot_state = Arc::new(tokio::sync::Mutex::new(chroot_state));
1496 let processes = Arc::new(crate::seccomp::state::ProcessIndex::new());
1497
1498 let ctx = Arc::new(SupervisorCtx {
1499 resource: Arc::clone(&res_state),
1500 cow: Arc::clone(&cow_state),
1501 procfs: Arc::clone(&procfs_state),
1502 network: Arc::clone(&net_state),
1503 time_random: Arc::clone(&time_random_state),
1504 policy_fn: Arc::clone(&policy_fn_state),
1505 chroot: Arc::clone(&chroot_state),
1506 netlink: Arc::new(crate::netlink::NetlinkState::new()),
1507 processes: Arc::clone(&processes),
1508 policy: Arc::new(notif_policy),
1509 child_pidfd: child_pidfd_raw,
1510 notif_fd: notif_raw_fd,
1511 });
1512
1513 let handlers = std::mem::take(&mut self.rt_mut().handlers);
1514 let (startup_tx, startup_rx) = tokio::sync::oneshot::channel();
1515 self.rt_mut().notif_handle = Some(tokio::spawn(
1516 notif::supervisor(notif_fd, ctx, handlers, startup_tx),
1517 ));
1518 match startup_rx.await {
1525 Ok(Ok(())) => {}
1526 Ok(Err(e)) => return Err(SandboxRuntimeError::Io(e).into()),
1527 Err(_) => {
1528 return Err(SandboxRuntimeError::Child(
1529 "seccomp supervisor exited during startup".into(),
1530 ).into());
1531 }
1532 }
1533
1534 let la_resource = Arc::clone(&res_state);
1535 self.rt_mut().loadavg_handle = Some(tokio::spawn(async move {
1536 let mut interval = tokio::time::interval(Duration::from_secs(5));
1537 interval.tick().await;
1538 loop {
1539 interval.tick().await;
1540 let mut rs = la_resource.lock().await;
1541 let running = rs.proc_count;
1542 rs.load_avg.sample(running);
1543 }
1544 }));
1545 }
1546
1547 if let Some(cpu_pct) = self.max_cpu {
1548 if cpu_pct < 100 {
1549 let child_pid = pid;
1550 self.rt_mut().throttle_handle = Some(tokio::spawn(sandbox_throttle_cpu(child_pid, cpu_pct)));
1551 }
1552 }
1553
1554 self.rt_mut().pidfd = pidfd;
1555 self.rt_mut().ready_w = Some(pipes.ready_w);
1556
1557 Ok(())
1558 }
1559
1560 fn do_start(&mut self) -> Result<(), crate::error::SandlockError> {
1565 use std::os::fd::AsRawFd;
1566 use crate::context::write_u32_fd;
1567 use crate::error::SandboxRuntimeError;
1568
1569 if !matches!(self.rt().state, RuntimeState::Created) {
1570 return Err(SandboxRuntimeError::Child("start() requires a created sandbox".into()).into());
1571 }
1572 let ready_w = self.rt_mut().ready_w.take()
1573 .ok_or_else(|| SandboxRuntimeError::Child("start() called without a prior create()".into()))?;
1574 write_u32_fd(ready_w.as_raw_fd(), 1)
1575 .map_err(|e| SandboxRuntimeError::Child(format!("write ready signal: {}", e)))?;
1576 drop(ready_w);
1577 self.rt_mut().state = RuntimeState::Running;
1578 Ok(())
1579 }
1580}
1581
1582impl Drop for Sandbox {
1587 fn drop(&mut self) {
1588 if let Some(ref mut rt) = self.runtime {
1589 if let Some(pid) = rt.child_pid {
1590 if matches!(rt.state, RuntimeState::Created | RuntimeState::Running | RuntimeState::Paused) {
1591 unsafe { libc::killpg(pid, libc::SIGKILL) };
1592 let mut status: i32 = 0;
1593 unsafe { libc::waitpid(pid, &mut status, 0) };
1594 }
1595 }
1596
1597 if let Some(h) = rt.notif_handle.take() { h.abort(); }
1598 if let Some(h) = rt.throttle_handle.take() { h.abort(); }
1599 if let Some(h) = rt.loadavg_handle.take() { h.abort(); }
1600
1601 let is_error = matches!(
1602 rt.state,
1603 RuntimeState::Stopped(ref s) if !matches!(s, crate::result::ExitStatus::Code(0))
1604 );
1605 let action = if is_error { &self.on_error } else { &self.on_exit };
1606 let action = action.clone();
1607
1608 if let Some(ref branch) = rt.cow_branch {
1609 match action {
1610 BranchAction::Commit => { let _ = branch.commit(); }
1611 BranchAction::Abort => { let _ = branch.abort(); }
1612 BranchAction::Keep => {}
1613 }
1614 }
1615
1616 if let Some(ref mut cow) = rt.seccomp_cow {
1617 match action {
1618 BranchAction::Commit => { let _ = cow.commit(); }
1619 BranchAction::Abort => { let _ = cow.abort(); }
1620 BranchAction::Keep => {}
1621 }
1622 }
1623 }
1624 }
1625}
1626
1627async fn sandbox_throttle_cpu(pid: i32, cpu_pct: u8) {
1632 use std::time::Duration;
1633 let period = Duration::from_millis(100);
1634 let run_time = period * cpu_pct as u32 / 100;
1635 let stop_time = period - run_time;
1636 loop {
1637 tokio::time::sleep(run_time).await;
1638 if unsafe { libc::killpg(pid, libc::SIGSTOP) } < 0 { break; }
1639 tokio::time::sleep(stop_time).await;
1640 if unsafe { libc::killpg(pid, libc::SIGCONT) } < 0 { break; }
1641 }
1642}
1643
1644static NEXT_SANDBOX_NAME: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(1);
1649
1650fn sandbox_resolve_name(name: Option<&str>) -> Result<String, crate::error::SandlockError> {
1651 match name {
1652 Some(n) => sandbox_validate_name(n.to_string()),
1653 None => Ok(format!(
1654 "sandbox-{}-{}",
1655 std::process::id(),
1656 NEXT_SANDBOX_NAME.fetch_add(1, std::sync::atomic::Ordering::Relaxed),
1657 )),
1658 }
1659}
1660
1661fn sandbox_validate_name(name: String) -> Result<String, crate::error::SandlockError> {
1662 use crate::error::SandboxRuntimeError;
1663 if name.is_empty() {
1664 return Err(SandboxRuntimeError::Child("sandbox name must not be empty".into()).into());
1665 }
1666 if name.len() > 64 {
1667 return Err(SandboxRuntimeError::Child("sandbox name must be at most 64 bytes".into()).into());
1668 }
1669 if name.as_bytes().contains(&0) {
1670 return Err(SandboxRuntimeError::Child("sandbox name must not contain NUL bytes".into()).into());
1671 }
1672 Ok(name)
1673}
1674
1675fn sandbox_read_exact(fd: i32, buf: &mut [u8]) {
1680 let mut off = 0;
1681 while off < buf.len() {
1682 let r = unsafe { libc::read(fd, buf[off..].as_mut_ptr() as *mut _, buf.len() - off) };
1683 if r <= 0 { break; }
1684 off += r as usize;
1685 }
1686}
1687
1688fn sandbox_read_fd_to_end(fd: std::os::fd::OwnedFd) -> Vec<u8> {
1689 use std::io::Read;
1690 use std::os::fd::IntoRawFd;
1691 use std::os::unix::io::FromRawFd;
1692 let mut file = unsafe { std::fs::File::from_raw_fd(fd.into_raw_fd()) };
1693 let mut buf = Vec::new();
1694 let _ = file.read_to_end(&mut buf);
1695 buf
1696}
1697
1698fn sandbox_wait_status_to_exit(status: i32) -> crate::result::ExitStatus {
1699 use crate::result::ExitStatus;
1700 if libc::WIFEXITED(status) {
1701 ExitStatus::Code(libc::WEXITSTATUS(status))
1702 } else if libc::WIFSIGNALED(status) {
1703 let sig = libc::WTERMSIG(status);
1704 if sig == libc::SIGKILL {
1705 ExitStatus::Killed
1706 } else {
1707 ExitStatus::Signal(sig)
1708 }
1709 } else {
1710 ExitStatus::Killed
1711 }
1712}
1713
1714fn sandbox_collect_handlers<I, S, H>(
1715 handlers: I,
1716 sandbox: &Sandbox,
1717) -> Result<Vec<(i64, Arc<dyn crate::seccomp::dispatch::Handler>)>, crate::error::SandlockError>
1718where
1719 I: IntoIterator<Item = (S, H)>,
1720 S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
1721 H: crate::seccomp::dispatch::Handler,
1722{
1723 use crate::seccomp::dispatch::{Handler, HandlerError};
1724
1725 let pending: Vec<(i64, Arc<dyn Handler>)> = handlers
1726 .into_iter()
1727 .map(|(syscall, handler)| {
1728 let nr = syscall.try_into().map_err(HandlerError::from)?.raw();
1729 let h: Arc<dyn Handler> = Arc::new(handler);
1730 Ok::<_, HandlerError>((nr, h))
1731 })
1732 .collect::<Result<_, _>>()?;
1733
1734 let nrs: Vec<i64> = pending.iter().map(|(nr, _)| *nr).collect();
1735 crate::seccomp::dispatch::validate_handler_syscalls_against_policy(&nrs, sandbox)
1736 .map_err(|syscall_nr| HandlerError::OnDenySyscall { syscall_nr })?;
1737
1738 Ok(pending)
1739}
1740
1741fn validate_syscall_names(names: &[String]) -> Result<(), SandboxError> {
1742 let unknown: Vec<&str> = names
1743 .iter()
1744 .map(String::as_str)
1745 .filter(|name| crate::context::syscall_name_to_nr(name).is_none())
1746 .collect();
1747 if unknown.is_empty() {
1748 Ok(())
1749 } else {
1750 Err(SandboxError::Invalid(format!(
1751 "unknown syscall name(s): {}",
1752 unknown.join(", ")
1753 )))
1754 }
1755}
1756
1757#[derive(Default)]
1763#[cfg_attr(feature = "cli", derive(clap::Args))]
1764pub struct SandboxBuilder {
1765 #[cfg_attr(feature = "cli", arg(short = 'r', long = "fs-read", value_name = "PATH"))]
1766 pub fs_readable: Vec<PathBuf>,
1767
1768 #[cfg_attr(feature = "cli", arg(short = 'w', long = "fs-write", value_name = "PATH"))]
1769 pub fs_writable: Vec<PathBuf>,
1770
1771 #[cfg_attr(feature = "cli", arg(long = "fs-deny", value_name = "PATH"))]
1772 pub fs_denied: Vec<PathBuf>,
1773
1774 #[cfg_attr(feature = "cli", arg(long = "extra-deny-syscall", value_name = "NAME"))]
1776 pub extra_deny_syscalls: Vec<String>,
1777
1778 #[cfg_attr(feature = "cli", arg(long = "extra-allow-syscall", value_name = "NAME"))]
1780 pub extra_allow_syscalls: Vec<String>,
1781
1782 #[cfg_attr(feature = "cli", arg(long = "net-allow", value_name = "SPEC"))]
1787 pub net_allow: Vec<String>,
1788
1789 #[cfg_attr(feature = "cli", arg(long = "net-bind"))]
1790 pub net_bind: Vec<u16>,
1791
1792 #[cfg_attr(feature = "cli", arg(long = "http-allow", value_name = "RULE"))]
1793 pub http_allow: Vec<String>,
1794
1795 #[cfg_attr(feature = "cli", arg(long = "http-deny", value_name = "RULE"))]
1796 pub http_deny: Vec<String>,
1797
1798 #[cfg_attr(feature = "cli", arg(long = "http-port", value_name = "PORT"))]
1800 pub http_ports: Vec<u16>,
1801
1802 #[cfg_attr(feature = "cli", arg(long = "http-ca", value_name = "PATH"))]
1804 pub http_ca: Option<PathBuf>,
1805
1806 #[cfg_attr(feature = "cli", arg(long = "http-key", value_name = "PATH"))]
1808 pub http_key: Option<PathBuf>,
1809
1810 #[cfg_attr(feature = "cli", clap(skip))]
1812 pub max_memory: Option<ByteSize>,
1813
1814 #[cfg_attr(feature = "cli", arg(short = 'P', long = "max-processes"))]
1815 pub max_processes: Option<u32>,
1816
1817 #[cfg_attr(feature = "cli", arg(long = "max-open-files"))]
1818 pub max_open_files: Option<u32>,
1819
1820 #[cfg_attr(feature = "cli", arg(short = 'c', long = "cpu"))]
1821 pub max_cpu: Option<u8>,
1822
1823 #[cfg_attr(feature = "cli", arg(long = "random-seed"))]
1824 pub random_seed: Option<u64>,
1825
1826 #[cfg_attr(feature = "cli", clap(skip))]
1828 pub time_start: Option<SystemTime>,
1829
1830 #[cfg_attr(feature = "cli", arg(long = "no-randomize-memory"))]
1831 pub no_randomize_memory: bool,
1832
1833 #[cfg_attr(feature = "cli", arg(long = "no-huge-pages"))]
1834 pub no_huge_pages: bool,
1835
1836 #[cfg_attr(feature = "cli", arg(long = "no-coredump"))]
1837 pub no_coredump: bool,
1838
1839 #[cfg_attr(feature = "cli", arg(long = "deterministic-dirs"))]
1840 pub deterministic_dirs: bool,
1841
1842 #[cfg_attr(feature = "cli", clap(skip))]
1844 pub fs_isolation: Option<FsIsolation>,
1845
1846 #[cfg_attr(feature = "cli", arg(long = "workdir"))]
1847 pub workdir: Option<PathBuf>,
1848
1849 #[cfg_attr(feature = "cli", arg(long = "cwd"))]
1850 pub cwd: Option<PathBuf>,
1851
1852 #[cfg_attr(feature = "cli", arg(long = "fs-storage", value_name = "PATH"))]
1853 pub fs_storage: Option<PathBuf>,
1854
1855 #[cfg_attr(feature = "cli", clap(skip))]
1857 pub max_disk: Option<ByteSize>,
1858
1859 #[cfg_attr(feature = "cli", clap(skip))]
1861 pub on_exit: Option<BranchAction>,
1862
1863 #[cfg_attr(feature = "cli", clap(skip))]
1864 pub on_error: Option<BranchAction>,
1865
1866 #[cfg_attr(feature = "cli", clap(skip))]
1868 pub fs_mount: Vec<(PathBuf, PathBuf)>,
1869
1870 #[cfg_attr(feature = "cli", arg(long = "chroot"))]
1871 pub chroot: Option<PathBuf>,
1872
1873 #[cfg_attr(feature = "cli", arg(long = "clean-env"))]
1874 pub clean_env: bool,
1875
1876 #[cfg_attr(feature = "cli", clap(skip))]
1878 pub env: HashMap<String, String>,
1879
1880 #[cfg_attr(feature = "cli", clap(skip))]
1882 pub gpu_devices: Option<Vec<u32>>,
1883
1884 #[cfg_attr(feature = "cli", clap(skip))]
1886 pub cpu_cores: Option<Vec<u32>>,
1887
1888 #[cfg_attr(feature = "cli", arg(long = "num-cpus"))]
1889 pub num_cpus: Option<u32>,
1890
1891 #[cfg_attr(feature = "cli", arg(long = "port-remap"))]
1892 pub port_remap: bool,
1893
1894 #[cfg_attr(feature = "cli", arg(long = "uid"))]
1895 pub uid: Option<u32>,
1896
1897 #[cfg_attr(feature = "cli", clap(skip))]
1899 pub policy_fn: Option<crate::policy_fn::PolicyCallback>,
1900
1901 #[cfg_attr(feature = "cli", clap(skip))]
1903 pub name: Option<String>,
1904
1905 #[cfg_attr(feature = "cli", clap(skip))]
1907 pub(crate) init_fn: Option<Box<dyn FnOnce() + Send + 'static>>,
1908
1909 #[cfg_attr(feature = "cli", clap(skip))]
1911 pub(crate) work_fn: Option<Arc<dyn Fn(u32) + Send + Sync + 'static>>,
1912}
1913
1914impl std::fmt::Debug for SandboxBuilder {
1915 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1916 f.debug_struct("SandboxBuilder")
1917 .field("fs_readable", &self.fs_readable)
1918 .field("fs_writable", &self.fs_writable)
1919 .field("max_memory", &self.max_memory)
1920 .field("max_processes", &self.max_processes)
1921 .field("policy_fn", &self.policy_fn.as_ref().map(|_| "<callback>"))
1922 .finish_non_exhaustive()
1923 }
1924}
1925
1926impl Clone for SandboxBuilder {
1927 fn clone(&self) -> Self {
1932 Self {
1933 fs_readable: self.fs_readable.clone(),
1934 fs_writable: self.fs_writable.clone(),
1935 fs_denied: self.fs_denied.clone(),
1936 extra_deny_syscalls: self.extra_deny_syscalls.clone(),
1937 extra_allow_syscalls: self.extra_allow_syscalls.clone(),
1938 net_allow: self.net_allow.clone(),
1939 net_bind: self.net_bind.clone(),
1940 http_allow: self.http_allow.clone(),
1941 http_deny: self.http_deny.clone(),
1942 http_ports: self.http_ports.clone(),
1943 http_ca: self.http_ca.clone(),
1944 http_key: self.http_key.clone(),
1945 max_memory: self.max_memory,
1946 max_processes: self.max_processes,
1947 max_open_files: self.max_open_files,
1948 max_cpu: self.max_cpu,
1949 random_seed: self.random_seed,
1950 time_start: self.time_start,
1951 no_randomize_memory: self.no_randomize_memory,
1952 no_huge_pages: self.no_huge_pages,
1953 no_coredump: self.no_coredump,
1954 deterministic_dirs: self.deterministic_dirs,
1955 fs_isolation: self.fs_isolation.clone(),
1956 workdir: self.workdir.clone(),
1957 cwd: self.cwd.clone(),
1958 fs_storage: self.fs_storage.clone(),
1959 max_disk: self.max_disk,
1960 on_exit: self.on_exit.clone(),
1961 on_error: self.on_error.clone(),
1962 fs_mount: self.fs_mount.clone(),
1963 chroot: self.chroot.clone(),
1964 clean_env: self.clean_env,
1965 env: self.env.clone(),
1966 gpu_devices: self.gpu_devices.clone(),
1967 cpu_cores: self.cpu_cores.clone(),
1968 num_cpus: self.num_cpus,
1969 port_remap: self.port_remap,
1970 uid: self.uid,
1971 policy_fn: self.policy_fn.clone(),
1972 name: self.name.clone(),
1973 init_fn: None,
1975 work_fn: self.work_fn.clone(),
1977 }
1978 }
1979}
1980
1981impl SandboxBuilder {
1982 pub fn fs_write(mut self, path: impl Into<PathBuf>) -> Self {
1983 self.fs_writable.push(path.into());
1984 self
1985 }
1986
1987 pub fn fs_read(mut self, path: impl Into<PathBuf>) -> Self {
1988 self.fs_readable.push(path.into());
1989 self
1990 }
1991
1992 pub fn fs_read_if_exists(self, path: impl Into<PathBuf>) -> Self {
1993 let path = path.into();
1994 if path.exists() {
1995 self.fs_read(path)
1996 } else {
1997 self
1998 }
1999 }
2000
2001 pub fn fs_deny(mut self, path: impl Into<PathBuf>) -> Self {
2002 self.fs_denied.push(path.into());
2003 self
2004 }
2005
2006 pub fn extra_deny_syscalls(mut self, calls: Vec<String>) -> Self {
2007 self.extra_deny_syscalls.extend(calls);
2008 self
2009 }
2010
2011 pub fn extra_allow_syscalls(mut self, names: Vec<String>) -> Self {
2012 self.extra_allow_syscalls.extend(names);
2013 self
2014 }
2015
2016 pub fn net_allow(mut self, spec: impl Into<String>) -> Self {
2025 self.net_allow.push(spec.into());
2026 self
2027 }
2028
2029 pub fn net_bind_port(mut self, port: u16) -> Self {
2030 self.net_bind.push(port);
2031 self
2032 }
2033
2034 pub fn http_allow(mut self, rule: &str) -> Self {
2035 self.http_allow.push(rule.to_string());
2036 self
2037 }
2038
2039 pub fn http_deny(mut self, rule: &str) -> Self {
2040 self.http_deny.push(rule.to_string());
2041 self
2042 }
2043
2044 pub fn http_port(mut self, port: u16) -> Self {
2045 self.http_ports.push(port);
2046 self
2047 }
2048
2049 pub fn http_ca(mut self, path: impl Into<PathBuf>) -> Self {
2050 self.http_ca = Some(path.into());
2051 self
2052 }
2053
2054 pub fn http_key(mut self, path: impl Into<PathBuf>) -> Self {
2055 self.http_key = Some(path.into());
2056 self
2057 }
2058
2059 pub fn max_memory(mut self, size: ByteSize) -> Self {
2060 self.max_memory = Some(size);
2061 self
2062 }
2063
2064 pub fn max_processes(mut self, n: u32) -> Self {
2065 self.max_processes = Some(n);
2066 self
2067 }
2068
2069 pub fn max_open_files(mut self, n: u32) -> Self {
2070 self.max_open_files = Some(n);
2071 self
2072 }
2073
2074 pub fn max_cpu(mut self, pct: u8) -> Self {
2075 self.max_cpu = Some(pct);
2076 self
2077 }
2078
2079 pub fn random_seed(mut self, seed: u64) -> Self {
2080 self.random_seed = Some(seed);
2081 self
2082 }
2083
2084 pub fn time_start(mut self, t: SystemTime) -> Self {
2085 self.time_start = Some(t);
2086 self
2087 }
2088
2089 pub fn no_randomize_memory(mut self, v: bool) -> Self {
2090 self.no_randomize_memory = v;
2091 self
2092 }
2093
2094 pub fn no_huge_pages(mut self, v: bool) -> Self {
2095 self.no_huge_pages = v;
2096 self
2097 }
2098
2099 pub fn no_coredump(mut self, v: bool) -> Self {
2100 self.no_coredump = v;
2101 self
2102 }
2103
2104 pub fn deterministic_dirs(mut self, v: bool) -> Self {
2105 self.deterministic_dirs = v;
2106 self
2107 }
2108
2109 pub fn fs_isolation(mut self, iso: FsIsolation) -> Self {
2110 self.fs_isolation = Some(iso);
2111 self
2112 }
2113
2114 pub fn workdir(mut self, path: impl Into<PathBuf>) -> Self {
2115 self.workdir = Some(path.into());
2116 self
2117 }
2118
2119 pub fn cwd(mut self, path: impl Into<PathBuf>) -> Self {
2120 self.cwd = Some(path.into());
2121 self
2122 }
2123
2124 pub fn fs_storage(mut self, path: impl Into<PathBuf>) -> Self {
2125 self.fs_storage = Some(path.into());
2126 self
2127 }
2128
2129 pub fn max_disk(mut self, size: ByteSize) -> Self {
2130 self.max_disk = Some(size);
2131 self
2132 }
2133
2134 pub fn on_exit(mut self, action: BranchAction) -> Self {
2135 self.on_exit = Some(action);
2136 self
2137 }
2138
2139 pub fn on_error(mut self, action: BranchAction) -> Self {
2140 self.on_error = Some(action);
2141 self
2142 }
2143
2144 pub fn chroot(mut self, path: impl Into<PathBuf>) -> Self {
2145 self.chroot = Some(path.into());
2146 self
2147 }
2148
2149 pub fn fs_mount(mut self, virtual_path: impl Into<PathBuf>, host_path: impl Into<PathBuf>) -> Self {
2150 self.fs_mount.push((virtual_path.into(), host_path.into()));
2151 self
2152 }
2153
2154 pub fn clean_env(mut self, v: bool) -> Self {
2155 self.clean_env = v;
2156 self
2157 }
2158
2159 pub fn env_var(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
2160 self.env.insert(key.into(), value.into());
2161 self
2162 }
2163
2164
2165 pub fn gpu_devices(mut self, devices: Vec<u32>) -> Self {
2166 self.gpu_devices = Some(devices);
2167 self
2168 }
2169
2170 pub fn cpu_cores(mut self, cores: Vec<u32>) -> Self {
2171 self.cpu_cores = Some(cores);
2172 self
2173 }
2174
2175 pub fn num_cpus(mut self, n: u32) -> Self {
2176 self.num_cpus = Some(n);
2177 self
2178 }
2179
2180 pub fn port_remap(mut self, v: bool) -> Self {
2181 self.port_remap = v;
2182 self
2183 }
2184
2185 pub fn policy_fn(
2186 mut self,
2187 f: impl Fn(crate::policy_fn::SyscallEvent, &mut crate::policy_fn::PolicyContext) -> crate::policy_fn::Verdict + Send + Sync + 'static,
2188 ) -> Self {
2189 self.policy_fn = Some(std::sync::Arc::new(f));
2190 self
2191 }
2192
2193 pub fn uid(mut self, id: u32) -> Self {
2194 self.uid = Some(id);
2195 self
2196 }
2197
2198 pub fn name(mut self, name: impl Into<String>) -> Self {
2201 self.name = Some(name.into());
2202 self
2203 }
2204
2205 pub fn init_fn(mut self, f: impl FnOnce() + Send + 'static) -> Self {
2210 self.init_fn = Some(Box::new(f));
2211 self
2212 }
2213
2214 pub fn work_fn(mut self, f: impl Fn(u32) + Send + Sync + 'static) -> Self {
2219 self.work_fn = Some(Arc::new(f));
2220 self
2221 }
2222
2223 pub fn build_unchecked(self) -> Result<Sandbox, SandboxError> {
2228 validate_syscall_names(&self.extra_deny_syscalls)?;
2229
2230 if let Some(cpu) = self.max_cpu {
2232 if cpu == 0 || cpu > 100 {
2233 return Err(SandboxError::InvalidCpuPercent(cpu));
2234 }
2235 }
2236
2237 if self.http_ca.is_some() != self.http_key.is_some() {
2239 return Err(SandboxError::Invalid(
2240 "--http-ca and --http-key must both be provided together".into(),
2241 ));
2242 }
2243
2244 let http_allow: Vec<HttpRule> = self
2246 .http_allow
2247 .into_iter()
2248 .map(|s| HttpRule::parse(&s))
2249 .collect::<Result<_, _>>()?;
2250 let http_deny: Vec<HttpRule> = self
2251 .http_deny
2252 .into_iter()
2253 .map(|s| HttpRule::parse(&s))
2254 .collect::<Result<_, _>>()?;
2255
2256 let http_ports = if self.http_ports.is_empty() && (!http_allow.is_empty() || !http_deny.is_empty()) {
2258 let mut ports = vec![80];
2259 if self.http_ca.is_some() {
2260 ports.push(443);
2261 }
2262 ports
2263 } else {
2264 self.http_ports
2265 };
2266
2267 let mut net_allow: Vec<NetAllow> = self
2269 .net_allow
2270 .into_iter()
2271 .map(|s| NetAllow::parse(&s))
2272 .collect::<Result<_, _>>()?;
2273
2274 if !http_ports.is_empty() {
2281 let mut wildcard_seen = false;
2282 let mut concrete_hosts: Vec<String> = Vec::new();
2283 for rule in http_allow.iter().chain(http_deny.iter()) {
2284 if rule.host == "*" {
2285 wildcard_seen = true;
2286 } else if !concrete_hosts.iter().any(|h| h.eq_ignore_ascii_case(&rule.host)) {
2287 concrete_hosts.push(rule.host.clone());
2288 }
2289 }
2290 if wildcard_seen || (http_allow.is_empty() && http_deny.is_empty()) {
2291 net_allow.push(NetAllow {
2293 protocol: Protocol::Tcp,
2294 host: None,
2295 ports: http_ports.clone(),
2296 all_ports: false,
2297 });
2298 }
2299 for h in concrete_hosts {
2300 net_allow.push(NetAllow {
2301 protocol: Protocol::Tcp,
2302 host: Some(h),
2303 ports: http_ports.clone(),
2304 all_ports: false,
2305 });
2306 }
2307 }
2308
2309 let fs_isolation = self.fs_isolation.unwrap_or_default();
2310 Ok(Sandbox {
2311 fs_writable: self.fs_writable,
2312 fs_readable: self.fs_readable,
2313 fs_denied: self.fs_denied,
2314 extra_deny_syscalls: self.extra_deny_syscalls,
2315 extra_allow_syscalls: self.extra_allow_syscalls,
2316 net_allow,
2317 net_bind: self.net_bind,
2318 http_allow,
2319 http_deny,
2320 http_ports,
2321 http_ca: self.http_ca,
2322 http_key: self.http_key,
2323 max_memory: self.max_memory,
2324 max_processes: self.max_processes.unwrap_or(64),
2325 max_open_files: self.max_open_files,
2326 max_cpu: self.max_cpu,
2327 random_seed: self.random_seed,
2328 time_start: self.time_start,
2329 no_randomize_memory: self.no_randomize_memory,
2330 no_huge_pages: self.no_huge_pages,
2331 no_coredump: self.no_coredump,
2332 deterministic_dirs: self.deterministic_dirs,
2333 fs_isolation,
2334 workdir: self.workdir,
2335 cwd: self.cwd,
2336 fs_storage: self.fs_storage,
2337 max_disk: self.max_disk,
2338 on_exit: self.on_exit.unwrap_or_default(),
2339 on_error: self.on_error.unwrap_or_default(),
2340 fs_mount: self.fs_mount,
2341 chroot: self.chroot,
2342 clean_env: self.clean_env,
2343 env: self.env,
2344 gpu_devices: self.gpu_devices,
2345 cpu_cores: self.cpu_cores,
2346 num_cpus: self.num_cpus,
2347 port_remap: self.port_remap,
2348 uid: self.uid,
2349 policy_fn: self.policy_fn,
2350 name: self.name,
2351 init_fn: self.init_fn,
2352 work_fn: self.work_fn,
2353 runtime: None,
2354 })
2355 }
2356
2357 pub fn build(self) -> Result<Sandbox, SandboxError> {
2360 let p = self.build_unchecked()?;
2361 p.validate()?;
2362 Ok(p)
2363 }
2364}
2365
2366#[cfg(test)]
2367mod tests {
2368 use super::*;
2369
2370 #[test]
2373 fn builder_http_rules() {
2374 let policy = Sandbox::builder()
2375 .http_allow("GET api.example.com/v1/*")
2376 .http_deny("* */admin/*")
2377 .build()
2378 .unwrap();
2379 assert_eq!(policy.http_allow.len(), 1);
2380 assert_eq!(policy.http_deny.len(), 1);
2381 assert_eq!(policy.http_allow[0].method, "GET");
2382 assert_eq!(policy.http_deny[0].host, "*");
2383 }
2384
2385 #[test]
2386 fn builder_invalid_http_allow_returns_error() {
2387 let result = Sandbox::builder()
2388 .http_allow("GETexample.com")
2389 .build();
2390 assert!(result.is_err());
2391 }
2392
2393 #[test]
2394 fn builder_invalid_http_deny_returns_error() {
2395 let result = Sandbox::builder()
2396 .http_deny("BADRULE")
2397 .build();
2398 assert!(result.is_err());
2399 }
2400
2401 #[test]
2402 fn builder_http_ca_without_key_returns_error() {
2403 let result = Sandbox::builder()
2404 .http_ca("/tmp/ca.pem")
2405 .build();
2406 assert!(result.is_err());
2407 }
2408
2409 #[test]
2410 fn builder_http_key_without_ca_returns_error() {
2411 let result = Sandbox::builder()
2412 .http_key("/tmp/key.pem")
2413 .build();
2414 assert!(result.is_err());
2415 }
2416
2417 #[test]
2418 fn builder_http_ca_and_key_together_ok() {
2419 let policy = Sandbox::builder()
2420 .http_ca("/tmp/ca.pem")
2421 .http_key("/tmp/key.pem")
2422 .build()
2423 .unwrap();
2424 assert!(policy.http_ca.is_some());
2425 assert!(policy.http_key.is_some());
2426 }
2427
2428 #[test]
2429 fn allows_sysv_ipc_reads_extra_allow_syscalls() {
2430 let p = Sandbox::builder()
2431 .extra_allow_syscalls(vec!["sysv_ipc".into()])
2432 .build()
2433 .unwrap();
2434 assert!(p.allows_sysv_ipc());
2435
2436 let p2 = Sandbox::builder().build().unwrap();
2437 assert!(!p2.allows_sysv_ipc());
2438
2439 let p3 = Sandbox::builder()
2440 .extra_allow_syscalls(vec!["other_group".into()])
2441 .build()
2442 .unwrap();
2443 assert!(!p3.allows_sysv_ipc());
2444 }
2445
2446}