1use std::collections::HashMap;
2use std::os::fd::AsRawFd;
3use std::path::PathBuf;
4use std::sync::Arc;
5use std::time::SystemTime;
6
7use serde::{Deserialize, Serialize};
8use tokio::task::JoinHandle;
9
10use crate::context;
11use crate::error::SandboxError;
12pub use crate::http::{http_acl_check, normalize_path, prefix_or_exact_match, HttpRule};
13pub use crate::network::{NetAllow, Protocol};
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
17pub struct ByteSize(pub u64);
18
19impl ByteSize {
20 pub fn bytes(n: u64) -> Self {
21 ByteSize(n)
22 }
23
24 pub fn kib(n: u64) -> Self {
25 ByteSize(n * 1024)
26 }
27
28 pub fn mib(n: u64) -> Self {
29 ByteSize(n * 1024 * 1024)
30 }
31
32 pub fn gib(n: u64) -> Self {
33 ByteSize(n * 1024 * 1024 * 1024)
34 }
35
36 pub fn parse(s: &str) -> Result<Self, SandboxError> {
37 let s = s.trim();
38 if s.is_empty() {
39 return Err(SandboxError::Invalid("empty byte size string".into()));
40 }
41
42 let last = s.chars().last().unwrap();
44 if last.is_ascii_alphabetic() {
45 let (num_str, suffix) = s.split_at(s.len() - 1);
46 let n: u64 = num_str
47 .trim()
48 .parse()
49 .map_err(|_| SandboxError::Invalid(format!("invalid byte size: {}", s)))?;
50 match suffix.to_ascii_uppercase().as_str() {
51 "K" => Ok(ByteSize::kib(n)),
52 "M" => Ok(ByteSize::mib(n)),
53 "G" => Ok(ByteSize::gib(n)),
54 other => Err(SandboxError::Invalid(format!("unknown byte size suffix: {}", other))),
55 }
56 } else {
57 let n: u64 = s
58 .parse()
59 .map_err(|_| SandboxError::Invalid(format!("invalid byte size: {}", s)))?;
60 Ok(ByteSize(n))
61 }
62 }
63}
64
65#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
67pub struct Confinement {
68 pub fs_writable: Vec<PathBuf>,
69 pub fs_readable: Vec<PathBuf>,
70}
71
72impl Confinement {
73 pub fn builder() -> ConfinementBuilder {
74 ConfinementBuilder::default()
75 }
76}
77
78#[derive(Default)]
79pub struct ConfinementBuilder {
80 fs_writable: Vec<PathBuf>,
81 fs_readable: Vec<PathBuf>,
82}
83
84impl ConfinementBuilder {
85 pub fn fs_write(mut self, path: impl Into<PathBuf>) -> Self {
86 self.fs_writable.push(path.into());
87 self
88 }
89
90 pub fn fs_read(mut self, path: impl Into<PathBuf>) -> Self {
91 self.fs_readable.push(path.into());
92 self
93 }
94
95 pub fn build(self) -> Confinement {
96 Confinement {
97 fs_writable: self.fs_writable,
98 fs_readable: self.fs_readable,
99 }
100 }
101}
102
103impl TryFrom<&Sandbox> for Confinement {
104 type Error = SandboxError;
105
106 fn try_from(sandbox: &Sandbox) -> Result<Self, Self::Error> {
107 let mut unsupported = Vec::new();
108 if !sandbox.fs_denied.is_empty() { unsupported.push("fs_denied"); }
109 if !sandbox.extra_deny_syscalls.is_empty() { unsupported.push("extra_deny_syscalls"); }
110 if !sandbox.net_allow.is_empty() { unsupported.push("net_allow"); }
111 if !sandbox.net_bind.is_empty() { unsupported.push("net_bind"); }
112 if sandbox.allows_sysv_ipc() { unsupported.push("extra_allow_syscalls=[\"sysv_ipc\"]"); }
113 if !sandbox.http_allow.is_empty() { unsupported.push("http_allow"); }
114 if !sandbox.http_deny.is_empty() { unsupported.push("http_deny"); }
115 if !sandbox.http_ports.is_empty() { unsupported.push("http_ports"); }
116 if sandbox.http_ca.is_some() { unsupported.push("http_ca"); }
117 if sandbox.http_key.is_some() { unsupported.push("http_key"); }
118 if sandbox.max_memory.is_some() { unsupported.push("max_memory"); }
119 if sandbox.max_processes != 64 { unsupported.push("max_processes"); }
120 if sandbox.max_open_files.is_some() { unsupported.push("max_open_files"); }
121 if sandbox.max_cpu.is_some() { unsupported.push("max_cpu"); }
122 if sandbox.random_seed.is_some() { unsupported.push("random_seed"); }
123 if sandbox.time_start.is_some() { unsupported.push("time_start"); }
124 if sandbox.no_randomize_memory { unsupported.push("no_randomize_memory"); }
125 if sandbox.no_huge_pages { unsupported.push("no_huge_pages"); }
126 if sandbox.no_coredump { unsupported.push("no_coredump"); }
127 if sandbox.deterministic_dirs { unsupported.push("deterministic_dirs"); }
128 if sandbox.workdir.is_some() { unsupported.push("workdir"); }
129 if sandbox.cwd.is_some() { unsupported.push("cwd"); }
130 if sandbox.fs_storage.is_some() { unsupported.push("fs_storage"); }
131 if sandbox.max_disk.is_some() { unsupported.push("max_disk"); }
132 if sandbox.on_exit != BranchAction::Commit { unsupported.push("on_exit"); }
133 if sandbox.on_error != BranchAction::Abort { unsupported.push("on_error"); }
134 if !sandbox.fs_mount.is_empty() { unsupported.push("fs_mount"); }
135 if sandbox.chroot.is_some() { unsupported.push("chroot"); }
136 if sandbox.clean_env { unsupported.push("clean_env"); }
137 if !sandbox.env.is_empty() { unsupported.push("env"); }
138 if sandbox.gpu_devices.is_some() { unsupported.push("gpu_devices"); }
139 if sandbox.cpu_cores.is_some() { unsupported.push("cpu_cores"); }
140 if sandbox.num_cpus.is_some() { unsupported.push("num_cpus"); }
141 if sandbox.port_remap { unsupported.push("port_remap"); }
142 if sandbox.uid.is_some() { unsupported.push("uid"); }
143 if sandbox.policy_fn.is_some() { unsupported.push("policy_fn"); }
144
145 if !unsupported.is_empty() {
146 return Err(SandboxError::UnsupportedForConfine(unsupported.join(", ")));
147 }
148
149 Ok(Self {
150 fs_writable: sandbox.fs_writable.clone(),
151 fs_readable: sandbox.fs_readable.clone(),
152 })
153 }
154}
155
156#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
158pub enum BranchAction {
159 #[default]
160 Commit,
161 Abort,
162 Keep,
163}
164
165struct Runtime {
172 name: String,
173 state: RuntimeState,
174 child_pid: Option<i32>,
175 pidfd: Option<std::os::fd::OwnedFd>,
176 notif_handle: Option<JoinHandle<()>>,
177 throttle_handle: Option<JoinHandle<()>>,
178 loadavg_handle: Option<JoinHandle<()>>,
179 _stdout_read: Option<std::os::fd::OwnedFd>,
180 _stderr_read: Option<std::os::fd::OwnedFd>,
181 seccomp_cow: Option<crate::cow::seccomp::SeccompCowBranch>,
182 supervisor_resource: Option<Arc<tokio::sync::Mutex<crate::seccomp::state::ResourceState>>>,
183 supervisor_cow: Option<Arc<tokio::sync::Mutex<crate::seccomp::state::CowState>>>,
184 supervisor_network: Option<Arc<tokio::sync::Mutex<crate::seccomp::state::NetworkState>>>,
185 ctrl_fd: Option<std::os::fd::OwnedFd>,
186 stdout_pipe: Option<std::os::fd::OwnedFd>,
187 io_overrides: Option<(Option<i32>, Option<i32>, Option<i32>)>,
188 extra_fds: Vec<(i32, i32)>,
189 http_acl_handle: Option<crate::http_acl::HttpAclProxyHandle>,
190 #[allow(clippy::type_complexity)]
191 on_bind: Option<Box<dyn Fn(&HashMap<u16, u16>) + Send + Sync>>,
192 handlers: Vec<(i64, Arc<dyn crate::seccomp::dispatch::Handler>)>,
193 ready_w: Option<std::os::fd::OwnedFd>,
194}
195
196enum RuntimeState {
198 Created,
199 Running,
200 Paused,
201 Stopped(crate::result::ExitStatus),
202}
203
204#[derive(Serialize, Deserialize)]
206pub struct Sandbox {
207 pub fs_writable: Vec<PathBuf>,
209 pub fs_readable: Vec<PathBuf>,
210 pub fs_denied: Vec<PathBuf>,
211
212 pub extra_deny_syscalls: Vec<String>,
214 pub extra_allow_syscalls: Vec<String>,
215
216 pub net_allow: Vec<NetAllow>,
240 pub net_bind: Vec<u16>,
241 pub http_allow: Vec<HttpRule>,
243 pub http_deny: Vec<HttpRule>,
244 pub http_ports: Vec<u16>,
247 pub http_ca: Option<PathBuf>,
249 pub http_key: Option<PathBuf>,
251
252 pub max_memory: Option<ByteSize>,
254 pub max_processes: u32,
255 pub max_open_files: Option<u32>,
256 pub max_cpu: Option<u8>,
257
258 pub random_seed: Option<u64>,
260 pub time_start: Option<SystemTime>,
261 pub no_randomize_memory: bool,
262 pub no_huge_pages: bool,
263 pub no_coredump: bool,
264 pub deterministic_dirs: bool,
265
266 pub workdir: Option<PathBuf>,
268 pub cwd: Option<PathBuf>,
269 pub fs_storage: Option<PathBuf>,
270 pub max_disk: Option<ByteSize>,
271 pub on_exit: BranchAction,
272 pub on_error: BranchAction,
273
274 pub fs_mount: Vec<(PathBuf, PathBuf)>,
276
277 pub chroot: Option<PathBuf>,
279 pub clean_env: bool,
280 pub env: HashMap<String, String>,
281 pub gpu_devices: Option<Vec<u32>>,
283
284 pub cpu_cores: Option<Vec<u32>>,
286 pub num_cpus: Option<u32>,
287 pub port_remap: bool,
288
289 pub no_supervisor: bool,
296
297 pub uid: Option<u32>,
299
300 #[serde(skip)]
302 pub policy_fn: Option<crate::policy_fn::PolicyCallback>,
303
304 #[serde(skip)]
307 pub name: Option<String>,
308
309 #[serde(skip)]
312 init_fn: Option<Box<dyn FnOnce() + Send + 'static>>,
313
314 #[serde(skip)]
317 work_fn: Option<Arc<dyn Fn(u32) + Send + Sync + 'static>>,
318
319 #[serde(skip)]
321 runtime: Option<Box<Runtime>>,
322}
323
324impl std::fmt::Debug for Sandbox {
325 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
326 f.debug_struct("Sandbox")
327 .field("fs_readable", &self.fs_readable)
328 .field("fs_writable", &self.fs_writable)
329 .field("max_memory", &self.max_memory)
330 .field("max_processes", &self.max_processes)
331 .field("policy_fn", &self.policy_fn.as_ref().map(|_| "<callback>"))
332 .field("name", &self.name)
333 .field("runtime", &self.runtime.as_ref().map(|_| "<runtime>"))
334 .finish_non_exhaustive()
335 }
336}
337
338impl Clone for Sandbox {
339 fn clone(&self) -> Self {
350 Self {
351 fs_writable: self.fs_writable.clone(),
352 fs_readable: self.fs_readable.clone(),
353 fs_denied: self.fs_denied.clone(),
354 extra_deny_syscalls: self.extra_deny_syscalls.clone(),
355 extra_allow_syscalls: self.extra_allow_syscalls.clone(),
356 net_allow: self.net_allow.clone(),
357 net_bind: self.net_bind.clone(),
358 http_allow: self.http_allow.clone(),
359 http_deny: self.http_deny.clone(),
360 http_ports: self.http_ports.clone(),
361 http_ca: self.http_ca.clone(),
362 http_key: self.http_key.clone(),
363 max_memory: self.max_memory,
364 max_processes: self.max_processes,
365 max_open_files: self.max_open_files,
366 max_cpu: self.max_cpu,
367 random_seed: self.random_seed,
368 time_start: self.time_start,
369 no_randomize_memory: self.no_randomize_memory,
370 no_huge_pages: self.no_huge_pages,
371 no_coredump: self.no_coredump,
372 deterministic_dirs: self.deterministic_dirs,
373 workdir: self.workdir.clone(),
374 cwd: self.cwd.clone(),
375 fs_storage: self.fs_storage.clone(),
376 max_disk: self.max_disk,
377 on_exit: self.on_exit.clone(),
378 on_error: self.on_error.clone(),
379 fs_mount: self.fs_mount.clone(),
380 chroot: self.chroot.clone(),
381 clean_env: self.clean_env,
382 env: self.env.clone(),
383 gpu_devices: self.gpu_devices.clone(),
384 cpu_cores: self.cpu_cores.clone(),
385 num_cpus: self.num_cpus,
386 port_remap: self.port_remap,
387 no_supervisor: self.no_supervisor,
388 uid: self.uid,
389 policy_fn: self.policy_fn.clone(),
390 name: self.name.clone(),
391 init_fn: None,
394 work_fn: self.work_fn.clone(),
396 runtime: None,
398 }
399 }
400}
401
402impl Sandbox {
403 pub fn builder() -> SandboxBuilder {
404 SandboxBuilder::default()
405 }
406
407 pub fn allows_sysv_ipc(&self) -> bool {
409 self.extra_allow_syscalls.iter().any(|s| s == "sysv_ipc")
410 }
411
412 pub fn validate(&self) -> Result<(), SandboxError> {
417 Ok(())
418 }
419
420 fn rt(&self) -> &Runtime {
425 self.runtime.as_ref().expect("sandbox not started")
426 }
427
428 fn rt_mut(&mut self) -> &mut Runtime {
429 self.runtime.as_mut().expect("sandbox not started")
430 }
431
432 pub fn set_name(&mut self, name: impl Into<String>) {
439 self.name = Some(name.into());
440 }
441
442 pub fn with_name(mut self, name: impl Into<String>) -> Self {
452 self.name = Some(name.into());
453 self
454 }
455
456 pub fn with_init_fn(mut self, f: impl FnOnce() + Send + 'static) -> Self {
461 self.init_fn = Some(Box::new(f));
462 self
463 }
464
465 pub fn with_work_fn(mut self, f: impl Fn(u32) + Send + Sync + 'static) -> Self {
469 self.work_fn = Some(Arc::new(f));
470 self
471 }
472
473 pub fn instance_name(&self) -> Option<&str> {
475 self.runtime.as_ref().map(|r| r.name.as_str())
476 .or_else(|| self.name.as_deref())
477 }
478
479 pub fn pid(&self) -> Option<i32> {
481 self.runtime.as_ref().and_then(|r| r.child_pid)
482 }
483
484 pub fn is_running(&self) -> bool {
486 self.runtime.as_ref().map(|r| {
487 matches!(r.state, RuntimeState::Running | RuntimeState::Paused)
488 }).unwrap_or(false)
489 }
490
491 pub fn pause(&mut self) -> Result<(), crate::error::SandlockError> {
493 use crate::error::SandboxRuntimeError;
494 let pid = self.runtime.as_ref()
495 .and_then(|rt| rt.child_pid)
496 .ok_or(SandboxRuntimeError::NotRunning)?;
497 let ret = unsafe { libc::killpg(pid, libc::SIGSTOP) };
498 if ret < 0 {
499 return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
500 }
501 self.rt_mut().state = RuntimeState::Paused;
502 Ok(())
503 }
504
505 pub fn resume(&mut self) -> Result<(), crate::error::SandlockError> {
507 use crate::error::SandboxRuntimeError;
508 let pid = self.runtime.as_ref()
509 .and_then(|rt| rt.child_pid)
510 .ok_or(SandboxRuntimeError::NotRunning)?;
511 let ret = unsafe { libc::killpg(pid, libc::SIGCONT) };
512 if ret < 0 {
513 return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
514 }
515 self.rt_mut().state = RuntimeState::Running;
516 Ok(())
517 }
518
519 pub fn kill(&mut self) -> Result<(), crate::error::SandlockError> {
521 use crate::error::SandboxRuntimeError;
522 let pid = self.runtime.as_ref()
523 .and_then(|rt| rt.child_pid)
524 .ok_or(SandboxRuntimeError::NotRunning)?;
525 let ret = unsafe { libc::killpg(pid, libc::SIGKILL) };
526 if ret < 0 {
527 let err = std::io::Error::last_os_error();
528 if err.raw_os_error() != Some(libc::ESRCH) {
529 return Err(SandboxRuntimeError::Io(err).into());
530 }
531 }
532 Ok(())
533 }
534
535 pub fn set_on_bind(&mut self, cb: impl Fn(&HashMap<u16, u16>) + Send + Sync + 'static) {
537 let _ = self.ensure_runtime();
540 self.rt_mut().on_bind = Some(Box::new(cb));
541 }
542
543 pub async fn port_mappings(&self) -> HashMap<u16, u16> {
545 if let Some(ref rt) = self.runtime {
546 if let Some(ref net) = rt.supervisor_network {
547 let ns = net.lock().await;
548 return ns.port_map.virtual_to_real.clone();
549 }
550 }
551 HashMap::new()
552 }
553
554 pub async fn wait(&mut self) -> Result<crate::result::RunResult, crate::error::SandlockError> {
556 use crate::error::SandboxRuntimeError;
557 use crate::result::{ExitStatus, RunResult};
558
559 let pid = self.rt().child_pid.ok_or(SandboxRuntimeError::NotRunning)?;
560
561 if let RuntimeState::Stopped(ref es) = self.rt().state {
562 return Ok(RunResult {
563 exit_status: es.clone(),
564 stdout: None,
565 stderr: None,
566 });
567 }
568
569 let exit_status = tokio::task::spawn_blocking(move || -> ExitStatus {
570 let mut status: i32 = 0;
571 loop {
572 let ret = unsafe { libc::waitpid(pid, &mut status, 0) };
573 if ret < 0 {
574 let err = std::io::Error::last_os_error();
575 if err.raw_os_error() == Some(libc::EINTR) {
576 continue;
577 }
578 return ExitStatus::Killed;
579 }
580 break;
581 }
582 sandbox_wait_status_to_exit(status)
583 })
584 .await
585 .unwrap_or(ExitStatus::Killed);
586
587 self.rt_mut().state = RuntimeState::Stopped(exit_status.clone());
588
589 let rt = self.rt_mut();
590 if let Some(h) = rt.notif_handle.take() { h.abort(); }
591 if let Some(h) = rt.throttle_handle.take() { h.abort(); }
592 if let Some(h) = rt.loadavg_handle.take() { h.abort(); }
593
594 if let Some(ref cow_state) = self.rt().supervisor_cow.clone() {
595 let mut cow = cow_state.lock().await;
596 self.rt_mut().seccomp_cow = cow.branch.take();
597 }
598
599 let stdout = self.rt_mut()._stdout_read.take().map(sandbox_read_fd_to_end);
600 let stderr = self.rt_mut()._stderr_read.take().map(sandbox_read_fd_to_end);
601
602 Ok(RunResult { exit_status, stdout, stderr })
603 }
604
605 pub async fn create(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
611 self.do_create(cmd, true).await
612 }
613
614 pub async fn create_interactive(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
616 self.do_create(cmd, false).await
617 }
618
619 pub fn start(&mut self) -> Result<(), crate::error::SandlockError> {
623 self.do_start()
624 }
625
626 pub async fn spawn(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
632 self.create(cmd).await?;
633 self.start()?;
634 self.wait_until_exec().await
635 }
636
637 pub async fn spawn_interactive(&mut self, cmd: &[&str]) -> Result<(), crate::error::SandlockError> {
639 self.create_interactive(cmd).await?;
640 self.start()?;
641 self.wait_until_exec().await
642 }
643
644 async fn wait_until_exec(&self) -> Result<(), crate::error::SandlockError> {
649 use crate::error::SandboxRuntimeError;
650 let pid = self.pid().ok_or(SandboxRuntimeError::NotRunning)?;
651 let Some(our_exe) = std::fs::read_link("/proc/self/exe").ok() else {
652 return Ok(());
653 };
654 let child_link = format!("/proc/{}/exe", pid);
655 let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
656 loop {
657 if let Ok(child_exe) = std::fs::read_link(&child_link) {
658 if child_exe != our_exe {
659 return Ok(());
660 }
661 }
662 if std::time::Instant::now() >= deadline {
663 return Err(SandboxRuntimeError::Child(
664 "child did not exec() within 5s".into(),
665 ).into());
666 }
667 tokio::time::sleep(std::time::Duration::from_millis(1)).await;
668 }
669 }
670
671 #[doc(hidden)]
674 pub async fn create_with_io(
675 &mut self,
676 cmd: &[&str],
677 stdin_fd: Option<std::os::unix::io::RawFd>,
678 stdout_fd: Option<std::os::unix::io::RawFd>,
679 stderr_fd: Option<std::os::unix::io::RawFd>,
680 ) -> Result<(), crate::error::SandlockError> {
681 self.ensure_runtime()?;
682 self.rt_mut().io_overrides = Some((stdin_fd, stdout_fd, stderr_fd));
683 self.do_create(cmd, false).await
684 }
685
686 #[doc(hidden)]
688 pub async fn create_with_gather_io(
689 &mut self,
690 cmd: &[&str],
691 stdin_fd: Option<std::os::unix::io::RawFd>,
692 stdout_fd: Option<std::os::unix::io::RawFd>,
693 stderr_fd: Option<std::os::unix::io::RawFd>,
694 extra_fds: Vec<(i32, i32)>,
695 ) -> Result<(), crate::error::SandlockError> {
696 self.ensure_runtime()?;
697 self.rt_mut().io_overrides = Some((stdin_fd, stdout_fd, stderr_fd));
698 self.rt_mut().extra_fds = extra_fds;
699 self.do_create(cmd, false).await
700 }
701
702 pub(crate) async fn freeze(&self) -> Result<(), crate::error::SandlockError> {
704 use crate::error::{SandboxRuntimeError, SandlockError};
705 let rt = self.runtime.as_ref().ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
706 let pid = rt.child_pid.ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
707 if let Some(ref resource) = rt.supervisor_resource {
708 let mut rs = resource.lock().await;
709 rs.hold_forks = true;
710 }
711 unsafe { libc::killpg(pid, libc::SIGSTOP); }
712 Ok(())
713 }
714
715 pub(crate) async fn thaw(&self) -> Result<(), crate::error::SandlockError> {
717 use crate::error::{SandboxRuntimeError, SandlockError};
718 let rt = self.runtime.as_ref().ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
719 let pid = rt.child_pid.ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
720 if let Some(ref resource) = rt.supervisor_resource {
721 let mut rs = resource.lock().await;
722 rs.hold_forks = false;
723 rs.held_notif_ids.clear();
724 }
725 unsafe { libc::killpg(pid, libc::SIGCONT); }
726 Ok(())
727 }
728
729 pub async fn checkpoint(&self) -> Result<crate::checkpoint::Checkpoint, crate::error::SandlockError> {
731 use crate::error::{SandboxRuntimeError, SandlockError};
732 let pid = self.runtime.as_ref()
733 .and_then(|rt| rt.child_pid)
734 .ok_or(SandlockError::Runtime(SandboxRuntimeError::NotRunning))?;
735 self.freeze().await?;
736 let cp = crate::checkpoint::capture(pid, self);
737 self.thaw().await?;
738 cp
739 }
740
741 pub async fn run(
756 &mut self,
757 cmd: &[&str],
758 ) -> Result<crate::result::RunResult, crate::error::SandlockError> {
759 self.do_create(cmd, true).await?;
760 self.do_start()?;
761 self.wait().await
762 }
763
764 pub async fn run_interactive(
766 &mut self,
767 cmd: &[&str],
768 ) -> Result<crate::result::RunResult, crate::error::SandlockError> {
769 self.do_create(cmd, false).await?;
770 self.do_start()?;
771 self.wait().await
772 }
773
774 pub async fn run_with_handlers<I, S, H>(
776 &mut self,
777 cmd: &[&str],
778 handlers: I,
779 ) -> Result<crate::result::RunResult, crate::error::SandlockError>
780 where
781 I: IntoIterator<Item = (S, H)>,
782 S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
783 H: crate::seccomp::dispatch::Handler,
784 {
785 let pending = sandbox_collect_handlers(handlers, self)?;
786 self.ensure_runtime()?;
787 self.rt_mut().handlers = pending;
788 self.do_create(cmd, true).await?;
789 self.do_start()?;
790 self.wait().await
791 }
792
793 pub async fn run_interactive_with_handlers<I, S, H>(
795 &mut self,
796 cmd: &[&str],
797 handlers: I,
798 ) -> Result<crate::result::RunResult, crate::error::SandlockError>
799 where
800 I: IntoIterator<Item = (S, H)>,
801 S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
802 H: crate::seccomp::dispatch::Handler,
803 {
804 let pending = sandbox_collect_handlers(handlers, self)?;
805 self.ensure_runtime()?;
806 self.rt_mut().handlers = pending;
807 self.do_create(cmd, false).await?;
808 self.do_start()?;
809 self.wait().await
810 }
811
812 pub async fn dry_run(
814 &mut self,
815 cmd: &[&str],
816 ) -> Result<crate::dry_run::DryRunResult, crate::error::SandlockError> {
817 self.on_exit = BranchAction::Keep;
818 self.on_error = BranchAction::Keep;
819 self.do_create(cmd, true).await?;
820 self.do_start()?;
821 let run_result = self.wait().await?;
822 let changes = self.collect_changes().await;
823 self.do_abort().await;
824 Ok(crate::dry_run::DryRunResult { run_result, changes })
825 }
826
827 pub async fn dry_run_interactive(
829 &mut self,
830 cmd: &[&str],
831 ) -> Result<crate::dry_run::DryRunResult, crate::error::SandlockError> {
832 self.on_exit = BranchAction::Keep;
833 self.on_error = BranchAction::Keep;
834 self.do_create(cmd, false).await?;
835 self.do_start()?;
836 let run_result = self.wait().await?;
837 let changes = self.collect_changes().await;
838 self.do_abort().await;
839 Ok(crate::dry_run::DryRunResult { run_result, changes })
840 }
841
842 pub async fn fork(&mut self, n: u32) -> Result<Vec<Sandbox>, crate::error::SandlockError> {
848 use crate::error::SandboxRuntimeError;
849 use std::os::fd::{FromRawFd, OwnedFd};
850
851 let init_fn = self.init_fn.take()
854 .ok_or_else(|| SandboxRuntimeError::Child("fork() requires init_fn and work_fn — use SandboxBuilder::init_fn() / work_fn() or Sandbox::with_init_fn() / with_work_fn()".into()))?;
855 let work_fn = self.work_fn.take()
856 .ok_or_else(|| SandboxRuntimeError::Child("fork() requires init_fn and work_fn — use SandboxBuilder::init_fn() / work_fn() or Sandbox::with_init_fn() / with_work_fn()".into()))?;
857
858 self.ensure_runtime()?;
860
861 let sandbox_cfg = self.clone(); let mut ctrl_fds = [0i32; 2];
864 if unsafe { libc::pipe2(ctrl_fds.as_mut_ptr(), 0) } < 0 {
865 return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
866 }
867 let ctrl_parent = unsafe { OwnedFd::from_raw_fd(ctrl_fds[0]) };
868 let ctrl_child_fd = ctrl_fds[1];
869
870 let mut pipe_read_ends: Vec<OwnedFd> = Vec::with_capacity(n as usize);
871 let mut pipe_write_fds: Vec<i32> = Vec::with_capacity(n as usize);
872 for _ in 0..n {
873 let mut pfds = [0i32; 2];
874 if unsafe { libc::pipe(pfds.as_mut_ptr()) } >= 0 {
875 pipe_read_ends.push(unsafe { OwnedFd::from_raw_fd(pfds[0]) });
876 pipe_write_fds.push(pfds[1]);
877 } else {
878 pipe_write_fds.push(-1);
879 }
880 }
881
882 let pid = unsafe { libc::fork() };
883 if pid < 0 {
884 unsafe { libc::close(ctrl_child_fd) };
885 return Err(SandboxRuntimeError::Fork(std::io::Error::last_os_error()).into());
886 }
887
888 if pid == 0 {
889 drop(ctrl_parent);
890 unsafe { libc::setpgid(0, 0) };
891 unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL) };
892 unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
893
894 let _ = crate::landlock::confine(&sandbox_cfg);
895
896 let deny = crate::context::blocklist_syscall_numbers(&sandbox_cfg);
897 let args = crate::context::arg_filters(&sandbox_cfg);
898 let filter = match crate::seccomp::bpf::assemble_filter(&[], &deny, &args) {
899 Ok(f) => f,
900 Err(_) => unsafe { libc::_exit(1) },
901 };
902 let _ = crate::seccomp::bpf::install_deny_filter(&filter);
903
904 init_fn();
905
906 drop(pipe_read_ends);
907 crate::fork::fork_ready_loop_fn(ctrl_child_fd, n, &*work_fn, &pipe_write_fds);
908 unsafe { libc::_exit(0) };
909 }
910
911 unsafe { libc::close(ctrl_child_fd) };
912 for wfd in &pipe_write_fds {
913 if *wfd >= 0 { unsafe { libc::close(*wfd) }; }
914 }
915 self.rt_mut().child_pid = Some(pid);
916 self.rt_mut().state = RuntimeState::Running;
917
918 let ctrl_fd = ctrl_parent.as_raw_fd();
919 let mut pid_buf = vec![0u8; n as usize * 4];
920 sandbox_read_exact(ctrl_fd, &mut pid_buf);
921
922 let clone_pids: Vec<i32> = pid_buf.chunks(4)
923 .map(|c| u32::from_be_bytes(c.try_into().unwrap_or([0; 4])) as i32)
924 .collect();
925 let live_count = clone_pids.iter().filter(|&&p| p > 0).count();
926
927 let mut code_buf = vec![0u8; live_count * 4];
928 sandbox_read_exact(ctrl_fd, &mut code_buf);
929 self.rt_mut().ctrl_fd = Some(ctrl_parent);
930
931 let mut status = 0i32;
932 unsafe { libc::waitpid(pid, &mut status, 0) };
933
934 let mut code_idx = 0;
935 let mut clones = Vec::with_capacity(live_count);
936 let mut pipe_iter = pipe_read_ends.into_iter();
937
938 let rt_name = self.rt().name.clone();
939 for &clone_pid in &clone_pids {
940 let pipe = pipe_iter.next();
941 if clone_pid <= 0 { continue; }
942
943 let code = i32::from_be_bytes(
944 code_buf[code_idx * 4..(code_idx + 1) * 4].try_into().unwrap_or([0; 4])
945 );
946 code_idx += 1;
947
948 let mut clone_sb = sandbox_cfg.clone();
949 let clone_name = format!("{}-fork-{}", rt_name, clone_pid);
950 clone_sb.runtime = Some(Box::new(Runtime {
951 name: clone_name,
952 state: RuntimeState::Stopped(if code == 0 {
953 crate::result::ExitStatus::Code(0)
954 } else if code > 0 {
955 crate::result::ExitStatus::Code(code)
956 } else {
957 crate::result::ExitStatus::Killed
958 }),
959 child_pid: Some(clone_pid),
960 pidfd: None,
961 notif_handle: None,
962 throttle_handle: None,
963 loadavg_handle: None,
964 _stdout_read: None,
965 _stderr_read: None,
966 seccomp_cow: None,
967 supervisor_resource: None,
968 supervisor_cow: None,
969 supervisor_network: None,
970 ctrl_fd: None,
971 stdout_pipe: pipe,
972 io_overrides: None,
973 extra_fds: Vec::new(),
974 http_acl_handle: None,
975 on_bind: None,
976 handlers: Vec::new(),
977 ready_w: None,
978 }));
979 clones.push(clone_sb);
980 }
981
982 Ok(clones)
983 }
984
985 pub async fn reduce(
987 &self,
988 cmd: &[&str],
989 clones: &mut [Sandbox],
990 ) -> Result<crate::result::RunResult, crate::error::SandlockError> {
991 use crate::error::SandboxRuntimeError;
992
993 let mut combined = Vec::new();
994 for clone in clones.iter_mut() {
995 if let Some(ref mut rt) = clone.runtime {
996 if let Some(pipe) = rt.stdout_pipe.take() {
997 combined.extend_from_slice(&sandbox_read_fd_to_end(pipe));
998 }
999 }
1000 }
1001
1002 let mut stdin_fds = [0i32; 2];
1003 if unsafe { libc::pipe2(stdin_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
1004 return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
1005 }
1006
1007 let write_fd = stdin_fds[1];
1008 let write_handle = tokio::task::spawn_blocking(move || {
1009 unsafe {
1010 libc::write(write_fd, combined.as_ptr() as *const _, combined.len());
1011 libc::close(write_fd);
1012 }
1013 });
1014
1015 let base_name = self.instance_name()
1016 .unwrap_or("sandbox")
1017 .to_owned();
1018 let reducer_name = base_name + "-reduce";
1019 let mut reducer = self.clone().with_name(reducer_name);
1020 reducer.ensure_runtime()?;
1021 reducer.rt_mut().io_overrides = Some((Some(stdin_fds[0]), None, None));
1022 reducer.do_create(cmd, true).await?;
1023 reducer.do_start()?;
1024 unsafe { libc::close(stdin_fds[0]) };
1025
1026 let _ = write_handle.await;
1027 reducer.wait().await
1028 }
1029
1030 fn ensure_runtime(&mut self) -> Result<(), crate::error::SandlockError> {
1036 if self.runtime.is_some() {
1037 return Ok(());
1038 }
1039 let name = sandbox_resolve_name(self.name.as_deref())?;
1040 self.runtime = Some(Box::new(Runtime {
1041 name,
1042 state: RuntimeState::Created,
1043 child_pid: None,
1044 pidfd: None,
1045 notif_handle: None,
1046 throttle_handle: None,
1047 loadavg_handle: None,
1048 _stdout_read: None,
1049 _stderr_read: None,
1050 seccomp_cow: None,
1051 supervisor_resource: None,
1052 supervisor_cow: None,
1053 supervisor_network: None,
1054 ctrl_fd: None,
1055 stdout_pipe: None,
1056 io_overrides: None,
1057 extra_fds: Vec::new(),
1058 http_acl_handle: None,
1059 on_bind: None,
1060 handlers: Vec::new(),
1061 ready_w: None,
1062 }));
1063 Ok(())
1064 }
1065
1066 async fn collect_changes(&self) -> Vec<crate::dry_run::Change> {
1071 if let Some(ref rt) = self.runtime {
1072 if let Some(ref cow) = rt.seccomp_cow {
1073 return cow.changes().unwrap_or_default();
1074 }
1075 }
1076 Vec::new()
1077 }
1078
1079 async fn do_abort(&mut self) {
1080 if let Some(ref mut rt) = self.runtime {
1081 if let Some(ref mut cow) = rt.seccomp_cow {
1082 let _ = cow.abort();
1083 }
1084 }
1085 }
1086
1087 async fn do_create(&mut self, cmd: &[&str], capture: bool) -> Result<(), crate::error::SandlockError> {
1093 use std::ffi::CString;
1094 use std::os::fd::{AsRawFd, FromRawFd, OwnedFd};
1095 use crate::error::SandboxRuntimeError;
1096 use crate::context::{PipePair, read_u32_fd};
1097 use crate::network;
1098 use crate::seccomp::ctx::SupervisorCtx;
1099 use crate::seccomp::notif::{self, NotifPolicy};
1100 use crate::seccomp::state::{ChrootState, CowState, NetworkState, PolicyFnState, ProcfsState, ResourceState, TimeRandomState};
1101 use crate::sys::syscall;
1102 use std::time::Duration;
1103
1104 self.ensure_runtime()?;
1105
1106 if !matches!(self.rt().state, RuntimeState::Created) {
1107 return Err(SandboxRuntimeError::Child("sandbox already spawned".into()).into());
1108 }
1109
1110 if cmd.is_empty() {
1111 return Err(SandboxRuntimeError::Child("empty command".into()).into());
1112 }
1113
1114 let chroot_root = crate::chroot::resolve::resolve_chroot_root(self.chroot.as_deref())?;
1118
1119 let c_cmd: Vec<CString> = cmd
1120 .iter()
1121 .map(|s| CString::new(*s).map_err(|_| SandboxRuntimeError::Child("invalid command string".into())))
1122 .collect::<Result<Vec<_>, _>>()?;
1123
1124 let no_supervisor = self.no_supervisor;
1125
1126 let pipes = PipePair::new().map_err(SandboxRuntimeError::Io)?;
1127
1128 let resolved_net_allow = network::resolve_net_allow(&self.net_allow)
1129 .await
1130 .map_err(SandboxRuntimeError::Io)?;
1131 let virtual_etc_hosts = network::compose_virtual_etc_hosts(
1138 self.chroot.as_deref(),
1139 &resolved_net_allow.concrete_host_entries,
1140 );
1141
1142 if !self.http_allow.is_empty() || !self.http_deny.is_empty() {
1143 let handle = crate::http_acl::spawn_http_acl_proxy(
1144 self.http_allow.clone(),
1145 self.http_deny.clone(),
1146 self.http_ca.as_deref(),
1147 self.http_key.as_deref(),
1148 ).await.map_err(SandboxRuntimeError::Io)?;
1149 self.rt_mut().http_acl_handle = Some(handle);
1150 }
1151
1152 let seccomp_cow_branch = if !no_supervisor && self.workdir.is_some() {
1158 let workdir = self.workdir.as_ref().unwrap().clone();
1159 let storage = self.fs_storage.clone();
1160 let max_disk = self.max_disk.map(|b| b.0).unwrap_or(0);
1161 match crate::cow::seccomp::SeccompCowBranch::create(&workdir, storage.as_deref(), max_disk) {
1162 Ok(branch) => {
1163 self.fs_readable.push(branch.upper_dir().to_path_buf());
1164 Some(branch)
1165 }
1166 Err(e) => {
1167 eprintln!("sandlock: seccomp COW branch creation failed: {}", e);
1168 None
1169 }
1170 }
1171 } else {
1172 None
1173 };
1174
1175 let (stdout_r, stderr_r) = if capture {
1176 let mut stdout_fds = [0i32; 2];
1177 let mut stderr_fds = [0i32; 2];
1178 if unsafe { libc::pipe2(stdout_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
1179 return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
1180 }
1181 if unsafe { libc::pipe2(stderr_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
1182 unsafe {
1183 libc::close(stdout_fds[0]);
1184 libc::close(stdout_fds[1]);
1185 }
1186 return Err(SandboxRuntimeError::Io(std::io::Error::last_os_error()).into());
1187 }
1188 (
1189 Some((
1190 unsafe { OwnedFd::from_raw_fd(stdout_fds[0]) },
1191 unsafe { OwnedFd::from_raw_fd(stdout_fds[1]) },
1192 )),
1193 Some((
1194 unsafe { OwnedFd::from_raw_fd(stderr_fds[0]) },
1195 unsafe { OwnedFd::from_raw_fd(stderr_fds[1]) },
1196 )),
1197 )
1198 } else {
1199 (None, None)
1200 };
1201
1202 let parent_pid = unsafe { libc::getpid() };
1205
1206 let pid = unsafe { libc::fork() };
1207 if pid < 0 {
1208 return Err(SandboxRuntimeError::Fork(std::io::Error::last_os_error()).into());
1209 }
1210
1211 if pid == 0 {
1212 let io_overrides = self.rt().io_overrides;
1214 if let Some((stdin_fd, stdout_fd, stderr_fd)) = io_overrides {
1215 if let Some(fd) = stdin_fd { unsafe { libc::dup2(fd, 0) }; }
1216 if let Some(fd) = stdout_fd { unsafe { libc::dup2(fd, 1) }; }
1217 if let Some(fd) = stderr_fd { unsafe { libc::dup2(fd, 2) }; }
1218 }
1219
1220 let extra_fds_copy = self.rt().extra_fds.clone();
1221 for &(target_fd, source_fd) in &extra_fds_copy {
1222 unsafe { libc::dup2(source_fd, target_fd) };
1223 }
1224
1225 if let Some((_, ref stdout_w)) = stdout_r {
1226 unsafe { libc::dup2(stdout_w.as_raw_fd(), 1) };
1227 }
1228 if let Some((_, ref stderr_w)) = stderr_r {
1229 unsafe { libc::dup2(stderr_w.as_raw_fd(), 2) };
1230 }
1231 drop(stdout_r);
1232 drop(stderr_r);
1233
1234 let gather_keep_fds: Vec<i32> = extra_fds_copy.iter().map(|&(target, _)| target).collect();
1235
1236 let extra_syscalls: Vec<u32> = self.rt().handlers
1237 .iter()
1238 .map(|h| h.0 as u32)
1239 .collect();
1240
1241 let sandbox_name = self.rt().name.clone();
1242 context::confine_child(context::ChildSpawnArgs {
1243 sandbox: self,
1244 cmd: &c_cmd,
1245 pipes: &pipes,
1246 no_supervisor,
1247 keep_fds: &gather_keep_fds,
1248 sandbox_name: Some(sandbox_name.as_str()),
1249 extra_syscalls: &extra_syscalls,
1250 parent_pid,
1251 });
1252 }
1253
1254 drop(pipes.notif_w);
1256 drop(pipes.ready_r);
1257
1258 self.rt_mut()._stdout_read = stdout_r.map(|(r, _w)| r);
1259 self.rt_mut()._stderr_read = stderr_r.map(|(r, _w)| r);
1260
1261 self.rt_mut().child_pid = Some(pid);
1262 let pidfd = match syscall::pidfd_open(pid as u32, 0) {
1266 Ok(fd) => Some(fd),
1267 Err(_) => None,
1268 };
1269
1270 let notif_fd_num = read_u32_fd(pipes.notif_r.as_raw_fd())
1271 .map_err(|e| SandboxRuntimeError::Child(format!("read notif fd from child: {}", e)))?;
1272
1273 let is_nested_mode = notif_fd_num == 0;
1274
1275 let notif_fd = if is_nested_mode {
1276 None
1277 } else if let Some(ref pfd) = pidfd {
1278 Some(syscall::pidfd_getfd(pfd, notif_fd_num as i32, 0)
1279 .map_err(|e| SandboxRuntimeError::Child(format!("pidfd_getfd: {}", e)))?)
1280 } else {
1281 let path = format!("/proc/{}/fd/{}", pid, notif_fd_num);
1282 let cpath = CString::new(path).unwrap();
1283 let raw = unsafe { libc::open(cpath.as_ptr(), libc::O_RDWR) };
1284 if raw < 0 {
1285 return Err(SandboxRuntimeError::Child("failed to open notif fd from /proc".into()).into());
1286 }
1287 Some(unsafe { OwnedFd::from_raw_fd(raw) })
1288 };
1289
1290 if let Some(notif_fd) = notif_fd {
1291 if self.time_start.is_some() || self.random_seed.is_some() {
1292 let time_offset = self.time_start.map(|t| crate::time::calculate_time_offset(t));
1293 if let Err(e) = crate::vdso::patch(pid, time_offset, self.random_seed.is_some()) {
1294 eprintln!("sandlock: pre-exec vDSO patching failed (will retry after exec): {}", e);
1295 }
1296 }
1297
1298 let time_offset_val = self.time_start
1299 .map(|t| crate::time::calculate_time_offset(t))
1300 .unwrap_or(0);
1301
1302 let rt_name = self.rt().name.clone();
1303 let notif_policy = NotifPolicy {
1304 max_memory_bytes: self.max_memory.map(|m| m.0).unwrap_or(0),
1305 max_processes: self.max_processes,
1306 has_memory_limit: self.max_memory.is_some(),
1307 has_net_allowlist: !self.net_allow.is_empty()
1308 || self.policy_fn.is_some()
1309 || !self.http_allow.is_empty()
1310 || !self.http_deny.is_empty(),
1311 has_random_seed: self.random_seed.is_some(),
1312 has_time_start: self.time_start.is_some(),
1313 argv_safety_required: self.policy_fn.is_some()
1314 || self.rt().handlers.iter().any(|h| {
1315 h.0 == libc::SYS_execve || h.0 == libc::SYS_execveat
1316 }),
1317 time_offset: time_offset_val,
1318 num_cpus: self.num_cpus,
1319 port_remap: self.port_remap,
1320 cow_enabled: self.workdir.is_some(),
1321 chroot_root: chroot_root.clone(),
1322 chroot_readable: self.fs_readable.clone(),
1323 chroot_writable: self.fs_writable.clone(),
1324 chroot_denied: self.fs_denied.clone(),
1325 chroot_mounts: crate::chroot::resolve::resolve_chroot_mounts(&self.fs_mount),
1326 deterministic_dirs: self.deterministic_dirs,
1327 virtual_hostname: Some(rt_name),
1328 has_http_acl: !self.http_allow.is_empty() || !self.http_deny.is_empty(),
1329 virtual_etc_hosts,
1330 };
1331
1332 use rand::SeedableRng;
1333 use rand_chacha::ChaCha8Rng;
1334
1335 let random_state = self.random_seed.map(|seed| ChaCha8Rng::seed_from_u64(seed));
1336 let time_offset = self.time_start.map(|t| crate::time::calculate_time_offset(t));
1337
1338 let time_random_state = TimeRandomState::new(time_offset, random_state);
1339
1340 let mut net_state = NetworkState::new();
1341 let no_rules = self.net_allow.is_empty();
1342 let policy_from = |resolved: &network::ResolvedNetAllow| {
1343 if no_rules || resolved.any_ip_all_ports {
1344 crate::seccomp::notif::NetworkPolicy::Unrestricted
1345 } else {
1346 use crate::seccomp::notif::PortAllow;
1347 let per_ip = resolved
1348 .per_ip
1349 .iter()
1350 .map(|(ip, ports)| {
1351 let allow = if resolved.per_ip_all_ports.contains(ip) {
1352 PortAllow::Any
1353 } else {
1354 PortAllow::Specific(ports.clone())
1355 };
1356 (*ip, allow)
1357 })
1358 .collect();
1359 crate::seccomp::notif::NetworkPolicy::AllowList {
1360 per_ip,
1361 any_ip_ports: resolved.any_ip_ports.clone(),
1362 }
1363 }
1364 };
1365 net_state.tcp_policy = policy_from(&resolved_net_allow.tcp);
1366 net_state.udp_policy = policy_from(&resolved_net_allow.udp);
1367 net_state.icmp_policy = policy_from(&resolved_net_allow.icmp);
1368 net_state.http_acl_addr = self.rt().http_acl_handle.as_ref().map(|h| h.addr);
1369 net_state.http_acl_ports = self.http_ports.iter().copied().collect();
1370 net_state.http_acl_orig_dest = self.rt().http_acl_handle.as_ref().map(|h| h.orig_dest.clone());
1371 if let Some(cb) = self.rt_mut().on_bind.take() {
1372 net_state.port_map.on_bind = Some(cb);
1373 }
1374
1375 let procfs_state = ProcfsState::new();
1376
1377 let mut res_state = ResourceState::new(
1378 notif_policy.max_memory_bytes,
1379 notif_policy.max_processes,
1380 );
1381 res_state.proc_count = 1;
1382
1383 let mut cow_state = CowState::new();
1384 cow_state.branch = seccomp_cow_branch;
1385
1386 let mut policy_fn_state = PolicyFnState::new();
1387
1388 if let Ok(mut denied) = policy_fn_state.denied_paths.write() {
1389 for path in &self.fs_denied {
1390 denied.insert(path.to_string_lossy().into_owned());
1391 }
1392 }
1393
1394 if let Some(ref callback) = self.policy_fn {
1395 let mut allowed_ips: std::collections::HashSet<std::net::IpAddr> =
1396 std::collections::HashSet::new();
1397 for p in [&net_state.tcp_policy, &net_state.udp_policy, &net_state.icmp_policy] {
1398 if let crate::seccomp::notif::NetworkPolicy::AllowList { per_ip, .. } = p {
1399 allowed_ips.extend(per_ip.keys().copied());
1400 }
1401 }
1402 let live = crate::policy_fn::LivePolicy {
1403 allowed_ips,
1404 max_memory_bytes: notif_policy.max_memory_bytes,
1405 max_processes: notif_policy.max_processes,
1406 };
1407 let ceiling = live.clone();
1408 let live = std::sync::Arc::new(std::sync::RwLock::new(live));
1409 let denied_paths = policy_fn_state.denied_paths.clone();
1410 let pid_overrides = net_state.pid_ip_overrides.clone();
1411 policy_fn_state.live_policy = Some(live.clone());
1412 let tx = crate::policy_fn::spawn_policy_fn(
1413 callback.clone(), live, ceiling, pid_overrides, denied_paths,
1414 );
1415 policy_fn_state.event_tx = Some(tx);
1416 }
1417
1418 let chroot_state = ChrootState::new();
1419
1420 let notif_raw_fd = notif_fd.as_raw_fd();
1421 let child_pidfd_raw = pidfd.as_ref().map(|pfd| pfd.as_raw_fd());
1422
1423 let res_state = Arc::new(tokio::sync::Mutex::new(res_state));
1424 self.rt_mut().supervisor_resource = Some(Arc::clone(&res_state));
1425
1426 let cow_state = Arc::new(tokio::sync::Mutex::new(cow_state));
1427 self.rt_mut().supervisor_cow = Some(Arc::clone(&cow_state));
1428
1429 let net_state = Arc::new(tokio::sync::Mutex::new(net_state));
1430 self.rt_mut().supervisor_network = Some(Arc::clone(&net_state));
1431
1432 let procfs_state = Arc::new(tokio::sync::Mutex::new(procfs_state));
1433 let time_random_state = Arc::new(tokio::sync::Mutex::new(time_random_state));
1434 let policy_fn_state = Arc::new(tokio::sync::Mutex::new(policy_fn_state));
1435 let chroot_state = Arc::new(tokio::sync::Mutex::new(chroot_state));
1436 let processes = Arc::new(crate::seccomp::state::ProcessIndex::new());
1437
1438 let ctx = Arc::new(SupervisorCtx {
1439 resource: Arc::clone(&res_state),
1440 cow: Arc::clone(&cow_state),
1441 procfs: Arc::clone(&procfs_state),
1442 network: Arc::clone(&net_state),
1443 time_random: Arc::clone(&time_random_state),
1444 policy_fn: Arc::clone(&policy_fn_state),
1445 chroot: Arc::clone(&chroot_state),
1446 netlink: Arc::new(crate::netlink::NetlinkState::new()),
1447 processes: Arc::clone(&processes),
1448 policy: Arc::new(notif_policy),
1449 child_pidfd: child_pidfd_raw,
1450 notif_fd: notif_raw_fd,
1451 });
1452
1453 let handlers = std::mem::take(&mut self.rt_mut().handlers);
1454 let (startup_tx, startup_rx) = tokio::sync::oneshot::channel();
1455 self.rt_mut().notif_handle = Some(tokio::spawn(
1456 notif::supervisor(notif_fd, ctx, handlers, startup_tx),
1457 ));
1458 match startup_rx.await {
1465 Ok(Ok(())) => {}
1466 Ok(Err(e)) => return Err(SandboxRuntimeError::Io(e).into()),
1467 Err(_) => {
1468 return Err(SandboxRuntimeError::Child(
1469 "seccomp supervisor exited during startup".into(),
1470 ).into());
1471 }
1472 }
1473
1474 let la_resource = Arc::clone(&res_state);
1475 self.rt_mut().loadavg_handle = Some(tokio::spawn(async move {
1476 let mut interval = tokio::time::interval(Duration::from_secs(5));
1477 interval.tick().await;
1478 loop {
1479 interval.tick().await;
1480 let mut rs = la_resource.lock().await;
1481 let running = rs.proc_count;
1482 rs.load_avg.sample(running);
1483 }
1484 }));
1485 }
1486
1487 if let Some(cpu_pct) = self.max_cpu {
1488 if cpu_pct < 100 {
1489 let child_pid = pid;
1490 self.rt_mut().throttle_handle = Some(tokio::spawn(sandbox_throttle_cpu(child_pid, cpu_pct)));
1491 }
1492 }
1493
1494 self.rt_mut().pidfd = pidfd;
1495 self.rt_mut().ready_w = Some(pipes.ready_w);
1496
1497 Ok(())
1498 }
1499
1500 fn do_start(&mut self) -> Result<(), crate::error::SandlockError> {
1505 use std::os::fd::AsRawFd;
1506 use crate::context::write_u32_fd;
1507 use crate::error::SandboxRuntimeError;
1508
1509 if !matches!(self.rt().state, RuntimeState::Created) {
1510 return Err(SandboxRuntimeError::Child("start() requires a created sandbox".into()).into());
1511 }
1512 let ready_w = self.rt_mut().ready_w.take()
1513 .ok_or_else(|| SandboxRuntimeError::Child("start() called without a prior create()".into()))?;
1514 write_u32_fd(ready_w.as_raw_fd(), 1)
1515 .map_err(|e| SandboxRuntimeError::Child(format!("write ready signal: {}", e)))?;
1516 drop(ready_w);
1517 self.rt_mut().state = RuntimeState::Running;
1518 Ok(())
1519 }
1520}
1521
1522impl Drop for Sandbox {
1527 fn drop(&mut self) {
1528 if let Some(ref mut rt) = self.runtime {
1529 if let Some(pid) = rt.child_pid {
1530 if matches!(rt.state, RuntimeState::Created | RuntimeState::Running | RuntimeState::Paused) {
1531 unsafe { libc::killpg(pid, libc::SIGKILL) };
1532 let mut status: i32 = 0;
1533 unsafe { libc::waitpid(pid, &mut status, 0) };
1534 }
1535 }
1536
1537 if let Some(h) = rt.notif_handle.take() { h.abort(); }
1538 if let Some(h) = rt.throttle_handle.take() { h.abort(); }
1539 if let Some(h) = rt.loadavg_handle.take() { h.abort(); }
1540
1541 let is_error = matches!(
1542 rt.state,
1543 RuntimeState::Stopped(ref s) if !matches!(s, crate::result::ExitStatus::Code(0))
1544 );
1545 let action = if is_error { &self.on_error } else { &self.on_exit };
1546 let action = action.clone();
1547
1548 if let Some(ref mut cow) = rt.seccomp_cow {
1549 match action {
1550 BranchAction::Commit => { let _ = cow.commit(); }
1551 BranchAction::Abort => { let _ = cow.abort(); }
1552 BranchAction::Keep => {}
1553 }
1554 }
1555 }
1556 }
1557}
1558
1559async fn sandbox_throttle_cpu(pid: i32, cpu_pct: u8) {
1564 use std::time::Duration;
1565 let period = Duration::from_millis(100);
1566 let run_time = period * cpu_pct as u32 / 100;
1567 let stop_time = period - run_time;
1568 loop {
1569 tokio::time::sleep(run_time).await;
1570 if unsafe { libc::killpg(pid, libc::SIGSTOP) } < 0 { break; }
1571 tokio::time::sleep(stop_time).await;
1572 if unsafe { libc::killpg(pid, libc::SIGCONT) } < 0 { break; }
1573 }
1574}
1575
1576static NEXT_SANDBOX_NAME: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(1);
1581
1582fn sandbox_resolve_name(name: Option<&str>) -> Result<String, crate::error::SandlockError> {
1583 match name {
1584 Some(n) => sandbox_validate_name(n.to_string()),
1585 None => Ok(format!(
1586 "sandbox-{}-{}",
1587 std::process::id(),
1588 NEXT_SANDBOX_NAME.fetch_add(1, std::sync::atomic::Ordering::Relaxed),
1589 )),
1590 }
1591}
1592
1593fn sandbox_validate_name(name: String) -> Result<String, crate::error::SandlockError> {
1594 use crate::error::SandboxRuntimeError;
1595 if name.is_empty() {
1596 return Err(SandboxRuntimeError::Child("sandbox name must not be empty".into()).into());
1597 }
1598 if name.len() > 64 {
1599 return Err(SandboxRuntimeError::Child("sandbox name must be at most 64 bytes".into()).into());
1600 }
1601 if name.as_bytes().contains(&0) {
1602 return Err(SandboxRuntimeError::Child("sandbox name must not contain NUL bytes".into()).into());
1603 }
1604 Ok(name)
1605}
1606
1607fn sandbox_read_exact(fd: i32, buf: &mut [u8]) {
1612 let mut off = 0;
1613 while off < buf.len() {
1614 let r = unsafe { libc::read(fd, buf[off..].as_mut_ptr() as *mut _, buf.len() - off) };
1615 if r <= 0 { break; }
1616 off += r as usize;
1617 }
1618}
1619
1620fn sandbox_read_fd_to_end(fd: std::os::fd::OwnedFd) -> Vec<u8> {
1621 use std::io::Read;
1622 use std::os::fd::IntoRawFd;
1623 use std::os::unix::io::FromRawFd;
1624 let mut file = unsafe { std::fs::File::from_raw_fd(fd.into_raw_fd()) };
1625 let mut buf = Vec::new();
1626 let _ = file.read_to_end(&mut buf);
1627 buf
1628}
1629
1630fn sandbox_wait_status_to_exit(status: i32) -> crate::result::ExitStatus {
1631 use crate::result::ExitStatus;
1632 if libc::WIFEXITED(status) {
1633 ExitStatus::Code(libc::WEXITSTATUS(status))
1634 } else if libc::WIFSIGNALED(status) {
1635 let sig = libc::WTERMSIG(status);
1636 if sig == libc::SIGKILL {
1637 ExitStatus::Killed
1638 } else {
1639 ExitStatus::Signal(sig)
1640 }
1641 } else {
1642 ExitStatus::Killed
1643 }
1644}
1645
1646fn sandbox_collect_handlers<I, S, H>(
1647 handlers: I,
1648 sandbox: &Sandbox,
1649) -> Result<Vec<(i64, Arc<dyn crate::seccomp::dispatch::Handler>)>, crate::error::SandlockError>
1650where
1651 I: IntoIterator<Item = (S, H)>,
1652 S: TryInto<crate::seccomp::syscall::Syscall, Error = crate::seccomp::syscall::SyscallError>,
1653 H: crate::seccomp::dispatch::Handler,
1654{
1655 use crate::seccomp::dispatch::{Handler, HandlerError};
1656
1657 let pending: Vec<(i64, Arc<dyn Handler>)> = handlers
1658 .into_iter()
1659 .map(|(syscall, handler)| {
1660 let nr = syscall.try_into().map_err(HandlerError::from)?.raw();
1661 let h: Arc<dyn Handler> = Arc::new(handler);
1662 Ok::<_, HandlerError>((nr, h))
1663 })
1664 .collect::<Result<_, _>>()?;
1665
1666 let nrs: Vec<i64> = pending.iter().map(|(nr, _)| *nr).collect();
1667 crate::seccomp::dispatch::validate_handler_syscalls_against_policy(&nrs, sandbox)
1668 .map_err(|syscall_nr| HandlerError::OnDenySyscall { syscall_nr })?;
1669
1670 Ok(pending)
1671}
1672
1673fn validate_syscall_names(names: &[String]) -> Result<(), SandboxError> {
1674 let unknown: Vec<&str> = names
1675 .iter()
1676 .map(String::as_str)
1677 .filter(|name| crate::context::syscall_name_to_nr(name).is_none())
1678 .collect();
1679 if unknown.is_empty() {
1680 Ok(())
1681 } else {
1682 Err(SandboxError::Invalid(format!(
1683 "unknown syscall name(s): {}",
1684 unknown.join(", ")
1685 )))
1686 }
1687}
1688
1689#[derive(Default)]
1695#[cfg_attr(feature = "cli", derive(clap::Args))]
1696pub struct SandboxBuilder {
1697 #[cfg_attr(feature = "cli", arg(short = 'r', long = "fs-read", value_name = "PATH"))]
1698 pub fs_readable: Vec<PathBuf>,
1699
1700 #[cfg_attr(feature = "cli", arg(short = 'w', long = "fs-write", value_name = "PATH"))]
1701 pub fs_writable: Vec<PathBuf>,
1702
1703 #[cfg_attr(feature = "cli", arg(long = "fs-deny", value_name = "PATH"))]
1704 pub fs_denied: Vec<PathBuf>,
1705
1706 #[cfg_attr(feature = "cli", arg(long = "extra-deny-syscall", value_name = "NAME"))]
1708 pub extra_deny_syscalls: Vec<String>,
1709
1710 #[cfg_attr(feature = "cli", arg(long = "extra-allow-syscall", value_name = "NAME"))]
1712 pub extra_allow_syscalls: Vec<String>,
1713
1714 #[cfg_attr(feature = "cli", arg(long = "net-allow", value_name = "SPEC"))]
1719 pub net_allow: Vec<String>,
1720
1721 #[cfg_attr(feature = "cli", arg(long = "net-bind"))]
1722 pub net_bind: Vec<u16>,
1723
1724 #[cfg_attr(feature = "cli", arg(long = "http-allow", value_name = "RULE"))]
1725 pub http_allow: Vec<String>,
1726
1727 #[cfg_attr(feature = "cli", arg(long = "http-deny", value_name = "RULE"))]
1728 pub http_deny: Vec<String>,
1729
1730 #[cfg_attr(feature = "cli", arg(long = "http-port", value_name = "PORT"))]
1732 pub http_ports: Vec<u16>,
1733
1734 #[cfg_attr(feature = "cli", arg(long = "http-ca", value_name = "PATH"))]
1736 pub http_ca: Option<PathBuf>,
1737
1738 #[cfg_attr(feature = "cli", arg(long = "http-key", value_name = "PATH"))]
1740 pub http_key: Option<PathBuf>,
1741
1742 #[cfg_attr(feature = "cli", clap(skip))]
1744 pub max_memory: Option<ByteSize>,
1745
1746 #[cfg_attr(feature = "cli", arg(short = 'P', long = "max-processes"))]
1747 pub max_processes: Option<u32>,
1748
1749 #[cfg_attr(feature = "cli", arg(long = "max-open-files"))]
1750 pub max_open_files: Option<u32>,
1751
1752 #[cfg_attr(feature = "cli", arg(short = 'c', long = "cpu"))]
1753 pub max_cpu: Option<u8>,
1754
1755 #[cfg_attr(feature = "cli", arg(long = "random-seed"))]
1756 pub random_seed: Option<u64>,
1757
1758 #[cfg_attr(feature = "cli", clap(skip))]
1760 pub time_start: Option<SystemTime>,
1761
1762 #[cfg_attr(feature = "cli", arg(long = "no-randomize-memory"))]
1763 pub no_randomize_memory: bool,
1764
1765 #[cfg_attr(feature = "cli", arg(long = "no-huge-pages"))]
1766 pub no_huge_pages: bool,
1767
1768 #[cfg_attr(feature = "cli", arg(long = "no-coredump"))]
1769 pub no_coredump: bool,
1770
1771 #[cfg_attr(feature = "cli", arg(long = "deterministic-dirs"))]
1772 pub deterministic_dirs: bool,
1773
1774 #[cfg_attr(feature = "cli", arg(long = "workdir"))]
1775 pub workdir: Option<PathBuf>,
1776
1777 #[cfg_attr(feature = "cli", arg(long = "cwd"))]
1778 pub cwd: Option<PathBuf>,
1779
1780 #[cfg_attr(feature = "cli", arg(long = "fs-storage", value_name = "PATH"))]
1781 pub fs_storage: Option<PathBuf>,
1782
1783 #[cfg_attr(feature = "cli", clap(skip))]
1785 pub max_disk: Option<ByteSize>,
1786
1787 #[cfg_attr(feature = "cli", clap(skip))]
1789 pub on_exit: Option<BranchAction>,
1790
1791 #[cfg_attr(feature = "cli", clap(skip))]
1792 pub on_error: Option<BranchAction>,
1793
1794 #[cfg_attr(feature = "cli", clap(skip))]
1796 pub fs_mount: Vec<(PathBuf, PathBuf)>,
1797
1798 #[cfg_attr(feature = "cli", arg(long = "chroot"))]
1799 pub chroot: Option<PathBuf>,
1800
1801 #[cfg_attr(feature = "cli", arg(long = "clean-env"))]
1802 pub clean_env: bool,
1803
1804 #[cfg_attr(feature = "cli", clap(skip))]
1806 pub env: HashMap<String, String>,
1807
1808 #[cfg_attr(feature = "cli", clap(skip))]
1810 pub gpu_devices: Option<Vec<u32>>,
1811
1812 #[cfg_attr(feature = "cli", clap(skip))]
1814 pub cpu_cores: Option<Vec<u32>>,
1815
1816 #[cfg_attr(feature = "cli", arg(long = "num-cpus"))]
1817 pub num_cpus: Option<u32>,
1818
1819 #[cfg_attr(feature = "cli", arg(long = "port-remap"))]
1820 pub port_remap: bool,
1821
1822 #[cfg_attr(feature = "cli", clap(skip))]
1829 pub no_supervisor: bool,
1830
1831 #[cfg_attr(feature = "cli", arg(long = "uid"))]
1832 pub uid: Option<u32>,
1833
1834 #[cfg_attr(feature = "cli", clap(skip))]
1836 pub policy_fn: Option<crate::policy_fn::PolicyCallback>,
1837
1838 #[cfg_attr(feature = "cli", clap(skip))]
1840 pub name: Option<String>,
1841
1842 #[cfg_attr(feature = "cli", clap(skip))]
1844 pub(crate) init_fn: Option<Box<dyn FnOnce() + Send + 'static>>,
1845
1846 #[cfg_attr(feature = "cli", clap(skip))]
1848 pub(crate) work_fn: Option<Arc<dyn Fn(u32) + Send + Sync + 'static>>,
1849}
1850
1851impl std::fmt::Debug for SandboxBuilder {
1852 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1853 f.debug_struct("SandboxBuilder")
1854 .field("fs_readable", &self.fs_readable)
1855 .field("fs_writable", &self.fs_writable)
1856 .field("max_memory", &self.max_memory)
1857 .field("max_processes", &self.max_processes)
1858 .field("policy_fn", &self.policy_fn.as_ref().map(|_| "<callback>"))
1859 .finish_non_exhaustive()
1860 }
1861}
1862
1863impl Clone for SandboxBuilder {
1864 fn clone(&self) -> Self {
1869 Self {
1870 fs_readable: self.fs_readable.clone(),
1871 fs_writable: self.fs_writable.clone(),
1872 fs_denied: self.fs_denied.clone(),
1873 extra_deny_syscalls: self.extra_deny_syscalls.clone(),
1874 extra_allow_syscalls: self.extra_allow_syscalls.clone(),
1875 net_allow: self.net_allow.clone(),
1876 net_bind: self.net_bind.clone(),
1877 http_allow: self.http_allow.clone(),
1878 http_deny: self.http_deny.clone(),
1879 http_ports: self.http_ports.clone(),
1880 http_ca: self.http_ca.clone(),
1881 http_key: self.http_key.clone(),
1882 max_memory: self.max_memory,
1883 max_processes: self.max_processes,
1884 max_open_files: self.max_open_files,
1885 max_cpu: self.max_cpu,
1886 random_seed: self.random_seed,
1887 time_start: self.time_start,
1888 no_randomize_memory: self.no_randomize_memory,
1889 no_huge_pages: self.no_huge_pages,
1890 no_coredump: self.no_coredump,
1891 deterministic_dirs: self.deterministic_dirs,
1892 workdir: self.workdir.clone(),
1893 cwd: self.cwd.clone(),
1894 fs_storage: self.fs_storage.clone(),
1895 max_disk: self.max_disk,
1896 on_exit: self.on_exit.clone(),
1897 on_error: self.on_error.clone(),
1898 fs_mount: self.fs_mount.clone(),
1899 chroot: self.chroot.clone(),
1900 clean_env: self.clean_env,
1901 env: self.env.clone(),
1902 gpu_devices: self.gpu_devices.clone(),
1903 cpu_cores: self.cpu_cores.clone(),
1904 num_cpus: self.num_cpus,
1905 port_remap: self.port_remap,
1906 no_supervisor: self.no_supervisor,
1907 uid: self.uid,
1908 policy_fn: self.policy_fn.clone(),
1909 name: self.name.clone(),
1910 init_fn: None,
1912 work_fn: self.work_fn.clone(),
1914 }
1915 }
1916}
1917
1918impl SandboxBuilder {
1919 pub fn fs_write(mut self, path: impl Into<PathBuf>) -> Self {
1920 self.fs_writable.push(path.into());
1921 self
1922 }
1923
1924 pub fn fs_read(mut self, path: impl Into<PathBuf>) -> Self {
1925 self.fs_readable.push(path.into());
1926 self
1927 }
1928
1929 pub fn fs_read_if_exists(self, path: impl Into<PathBuf>) -> Self {
1930 let path = path.into();
1931 if path.exists() {
1932 self.fs_read(path)
1933 } else {
1934 self
1935 }
1936 }
1937
1938 pub fn fs_deny(mut self, path: impl Into<PathBuf>) -> Self {
1939 self.fs_denied.push(path.into());
1940 self
1941 }
1942
1943 pub fn extra_deny_syscalls(mut self, calls: Vec<String>) -> Self {
1944 self.extra_deny_syscalls.extend(calls);
1945 self
1946 }
1947
1948 pub fn extra_allow_syscalls(mut self, names: Vec<String>) -> Self {
1949 self.extra_allow_syscalls.extend(names);
1950 self
1951 }
1952
1953 pub fn net_allow(mut self, spec: impl Into<String>) -> Self {
1962 self.net_allow.push(spec.into());
1963 self
1964 }
1965
1966 pub fn net_bind_port(mut self, port: u16) -> Self {
1967 self.net_bind.push(port);
1968 self
1969 }
1970
1971 pub fn http_allow(mut self, rule: &str) -> Self {
1972 self.http_allow.push(rule.to_string());
1973 self
1974 }
1975
1976 pub fn http_deny(mut self, rule: &str) -> Self {
1977 self.http_deny.push(rule.to_string());
1978 self
1979 }
1980
1981 pub fn http_port(mut self, port: u16) -> Self {
1982 self.http_ports.push(port);
1983 self
1984 }
1985
1986 pub fn http_ca(mut self, path: impl Into<PathBuf>) -> Self {
1987 self.http_ca = Some(path.into());
1988 self
1989 }
1990
1991 pub fn http_key(mut self, path: impl Into<PathBuf>) -> Self {
1992 self.http_key = Some(path.into());
1993 self
1994 }
1995
1996 pub fn max_memory(mut self, size: ByteSize) -> Self {
1997 self.max_memory = Some(size);
1998 self
1999 }
2000
2001 pub fn max_processes(mut self, n: u32) -> Self {
2002 self.max_processes = Some(n);
2003 self
2004 }
2005
2006 pub fn max_open_files(mut self, n: u32) -> Self {
2007 self.max_open_files = Some(n);
2008 self
2009 }
2010
2011 pub fn max_cpu(mut self, pct: u8) -> Self {
2012 self.max_cpu = Some(pct);
2013 self
2014 }
2015
2016 pub fn random_seed(mut self, seed: u64) -> Self {
2017 self.random_seed = Some(seed);
2018 self
2019 }
2020
2021 pub fn time_start(mut self, t: SystemTime) -> Self {
2022 self.time_start = Some(t);
2023 self
2024 }
2025
2026 pub fn no_randomize_memory(mut self, v: bool) -> Self {
2027 self.no_randomize_memory = v;
2028 self
2029 }
2030
2031 pub fn no_huge_pages(mut self, v: bool) -> Self {
2032 self.no_huge_pages = v;
2033 self
2034 }
2035
2036 pub fn no_coredump(mut self, v: bool) -> Self {
2037 self.no_coredump = v;
2038 self
2039 }
2040
2041 pub fn deterministic_dirs(mut self, v: bool) -> Self {
2042 self.deterministic_dirs = v;
2043 self
2044 }
2045
2046 pub fn workdir(mut self, path: impl Into<PathBuf>) -> Self {
2047 self.workdir = Some(path.into());
2048 self
2049 }
2050
2051 pub fn cwd(mut self, path: impl Into<PathBuf>) -> Self {
2052 self.cwd = Some(path.into());
2053 self
2054 }
2055
2056 pub fn fs_storage(mut self, path: impl Into<PathBuf>) -> Self {
2057 self.fs_storage = Some(path.into());
2058 self
2059 }
2060
2061 pub fn max_disk(mut self, size: ByteSize) -> Self {
2062 self.max_disk = Some(size);
2063 self
2064 }
2065
2066 pub fn on_exit(mut self, action: BranchAction) -> Self {
2067 self.on_exit = Some(action);
2068 self
2069 }
2070
2071 pub fn on_error(mut self, action: BranchAction) -> Self {
2072 self.on_error = Some(action);
2073 self
2074 }
2075
2076 pub fn chroot(mut self, path: impl Into<PathBuf>) -> Self {
2077 self.chroot = Some(path.into());
2078 self
2079 }
2080
2081 pub fn fs_mount(mut self, virtual_path: impl Into<PathBuf>, host_path: impl Into<PathBuf>) -> Self {
2082 self.fs_mount.push((virtual_path.into(), host_path.into()));
2083 self
2084 }
2085
2086 pub fn clean_env(mut self, v: bool) -> Self {
2087 self.clean_env = v;
2088 self
2089 }
2090
2091 pub fn env_var(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
2092 self.env.insert(key.into(), value.into());
2093 self
2094 }
2095
2096
2097 pub fn gpu_devices(mut self, devices: Vec<u32>) -> Self {
2098 self.gpu_devices = Some(devices);
2099 self
2100 }
2101
2102 pub fn cpu_cores(mut self, cores: Vec<u32>) -> Self {
2103 self.cpu_cores = Some(cores);
2104 self
2105 }
2106
2107 pub fn num_cpus(mut self, n: u32) -> Self {
2108 self.num_cpus = Some(n);
2109 self
2110 }
2111
2112 pub fn port_remap(mut self, v: bool) -> Self {
2113 self.port_remap = v;
2114 self
2115 }
2116
2117 pub fn no_supervisor(mut self, v: bool) -> Self {
2126 self.no_supervisor = v;
2127 self
2128 }
2129
2130 pub fn policy_fn(
2131 mut self,
2132 f: impl Fn(crate::policy_fn::SyscallEvent, &mut crate::policy_fn::PolicyContext) -> crate::policy_fn::Verdict + Send + Sync + 'static,
2133 ) -> Self {
2134 self.policy_fn = Some(std::sync::Arc::new(f));
2135 self
2136 }
2137
2138 pub fn uid(mut self, id: u32) -> Self {
2139 self.uid = Some(id);
2140 self
2141 }
2142
2143 pub fn name(mut self, name: impl Into<String>) -> Self {
2146 self.name = Some(name.into());
2147 self
2148 }
2149
2150 pub fn init_fn(mut self, f: impl FnOnce() + Send + 'static) -> Self {
2155 self.init_fn = Some(Box::new(f));
2156 self
2157 }
2158
2159 pub fn work_fn(mut self, f: impl Fn(u32) + Send + Sync + 'static) -> Self {
2164 self.work_fn = Some(Arc::new(f));
2165 self
2166 }
2167
2168 pub fn build_unchecked(self) -> Result<Sandbox, SandboxError> {
2173 validate_syscall_names(&self.extra_deny_syscalls)?;
2174
2175 if let Some(cpu) = self.max_cpu {
2177 if cpu == 0 || cpu > 100 {
2178 return Err(SandboxError::InvalidCpuPercent(cpu));
2179 }
2180 }
2181
2182 if self.http_ca.is_some() != self.http_key.is_some() {
2184 return Err(SandboxError::Invalid(
2185 "--http-ca and --http-key must both be provided together".into(),
2186 ));
2187 }
2188
2189 let http_allow: Vec<HttpRule> = self
2191 .http_allow
2192 .into_iter()
2193 .map(|s| HttpRule::parse(&s))
2194 .collect::<Result<_, _>>()?;
2195 let http_deny: Vec<HttpRule> = self
2196 .http_deny
2197 .into_iter()
2198 .map(|s| HttpRule::parse(&s))
2199 .collect::<Result<_, _>>()?;
2200
2201 let http_ports = if self.http_ports.is_empty() && (!http_allow.is_empty() || !http_deny.is_empty()) {
2203 let mut ports = vec![80];
2204 if self.http_ca.is_some() {
2205 ports.push(443);
2206 }
2207 ports
2208 } else {
2209 self.http_ports
2210 };
2211
2212 let mut net_allow: Vec<NetAllow> = self
2214 .net_allow
2215 .into_iter()
2216 .map(|s| NetAllow::parse(&s))
2217 .collect::<Result<_, _>>()?;
2218
2219 if !http_ports.is_empty() {
2226 let mut wildcard_seen = false;
2227 let mut concrete_hosts: Vec<String> = Vec::new();
2228 for rule in http_allow.iter().chain(http_deny.iter()) {
2229 if rule.host == "*" {
2230 wildcard_seen = true;
2231 } else if !concrete_hosts.iter().any(|h| h.eq_ignore_ascii_case(&rule.host)) {
2232 concrete_hosts.push(rule.host.clone());
2233 }
2234 }
2235 if wildcard_seen || (http_allow.is_empty() && http_deny.is_empty()) {
2236 net_allow.push(NetAllow {
2238 protocol: Protocol::Tcp,
2239 host: None,
2240 ports: http_ports.clone(),
2241 all_ports: false,
2242 });
2243 }
2244 for h in concrete_hosts {
2245 net_allow.push(NetAllow {
2246 protocol: Protocol::Tcp,
2247 host: Some(h),
2248 ports: http_ports.clone(),
2249 all_ports: false,
2250 });
2251 }
2252 }
2253
2254 Ok(Sandbox {
2255 fs_writable: self.fs_writable,
2256 fs_readable: self.fs_readable,
2257 fs_denied: self.fs_denied,
2258 extra_deny_syscalls: self.extra_deny_syscalls,
2259 extra_allow_syscalls: self.extra_allow_syscalls,
2260 net_allow,
2261 net_bind: self.net_bind,
2262 http_allow,
2263 http_deny,
2264 http_ports,
2265 http_ca: self.http_ca,
2266 http_key: self.http_key,
2267 max_memory: self.max_memory,
2268 max_processes: self.max_processes.unwrap_or(64),
2269 max_open_files: self.max_open_files,
2270 max_cpu: self.max_cpu,
2271 random_seed: self.random_seed,
2272 time_start: self.time_start,
2273 no_randomize_memory: self.no_randomize_memory,
2274 no_huge_pages: self.no_huge_pages,
2275 no_coredump: self.no_coredump,
2276 deterministic_dirs: self.deterministic_dirs,
2277 workdir: self.workdir,
2278 cwd: self.cwd,
2279 fs_storage: self.fs_storage,
2280 max_disk: self.max_disk,
2281 on_exit: self.on_exit.unwrap_or_default(),
2282 on_error: self.on_error.unwrap_or_default(),
2283 fs_mount: self.fs_mount,
2284 chroot: self.chroot,
2285 clean_env: self.clean_env,
2286 env: self.env,
2287 gpu_devices: self.gpu_devices,
2288 cpu_cores: self.cpu_cores,
2289 num_cpus: self.num_cpus,
2290 port_remap: self.port_remap,
2291 no_supervisor: self.no_supervisor,
2292 uid: self.uid,
2293 policy_fn: self.policy_fn,
2294 name: self.name,
2295 init_fn: self.init_fn,
2296 work_fn: self.work_fn,
2297 runtime: None,
2298 })
2299 }
2300
2301 pub fn build(self) -> Result<Sandbox, SandboxError> {
2304 let p = self.build_unchecked()?;
2305 p.validate()?;
2306 Ok(p)
2307 }
2308}
2309
2310#[cfg(test)]
2311mod tests {
2312 use super::*;
2313
2314 #[test]
2317 fn builder_http_rules() {
2318 let policy = Sandbox::builder()
2319 .http_allow("GET api.example.com/v1/*")
2320 .http_deny("* */admin/*")
2321 .build()
2322 .unwrap();
2323 assert_eq!(policy.http_allow.len(), 1);
2324 assert_eq!(policy.http_deny.len(), 1);
2325 assert_eq!(policy.http_allow[0].method, "GET");
2326 assert_eq!(policy.http_deny[0].host, "*");
2327 }
2328
2329 #[test]
2330 fn builder_invalid_http_allow_returns_error() {
2331 let result = Sandbox::builder()
2332 .http_allow("GETexample.com")
2333 .build();
2334 assert!(result.is_err());
2335 }
2336
2337 #[test]
2338 fn builder_invalid_http_deny_returns_error() {
2339 let result = Sandbox::builder()
2340 .http_deny("BADRULE")
2341 .build();
2342 assert!(result.is_err());
2343 }
2344
2345 #[test]
2346 fn builder_http_ca_without_key_returns_error() {
2347 let result = Sandbox::builder()
2348 .http_ca("/tmp/ca.pem")
2349 .build();
2350 assert!(result.is_err());
2351 }
2352
2353 #[test]
2354 fn builder_http_key_without_ca_returns_error() {
2355 let result = Sandbox::builder()
2356 .http_key("/tmp/key.pem")
2357 .build();
2358 assert!(result.is_err());
2359 }
2360
2361 #[test]
2362 fn builder_http_ca_and_key_together_ok() {
2363 let policy = Sandbox::builder()
2364 .http_ca("/tmp/ca.pem")
2365 .http_key("/tmp/key.pem")
2366 .build()
2367 .unwrap();
2368 assert!(policy.http_ca.is_some());
2369 assert!(policy.http_key.is_some());
2370 }
2371
2372 #[test]
2373 fn allows_sysv_ipc_reads_extra_allow_syscalls() {
2374 let p = Sandbox::builder()
2375 .extra_allow_syscalls(vec!["sysv_ipc".into()])
2376 .build()
2377 .unwrap();
2378 assert!(p.allows_sysv_ipc());
2379
2380 let p2 = Sandbox::builder().build().unwrap();
2381 assert!(!p2.allows_sysv_ipc());
2382
2383 let p3 = Sandbox::builder()
2384 .extra_allow_syscalls(vec!["other_group".into()])
2385 .build()
2386 .unwrap();
2387 assert!(!p3.allows_sysv_ipc());
2388 }
2389
2390}