1use std::ffi::CString;
5use std::os::fd::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd};
6use std::sync::Arc;
7use std::time::Duration;
8
9use tokio::sync::Mutex;
10use tokio::task::JoinHandle;
11
12use std::sync::atomic::{AtomicBool, Ordering};
13
14use crate::context::{self, CowConfig, PipePair, read_u32_fd, write_u32_fd};
15use crate::cow::{CowBranch, overlayfs::OverlayBranch, branchfs::BranchFsBranch};
16use crate::error::{SandboxError, SandlockError};
17use crate::network;
18use crate::policy::{BranchAction, FsIsolation, Policy};
19use crate::result::{ExitStatus, RunResult};
20use crate::seccomp::notif::{self, NotifPolicy, SupervisorState};
21use crate::sys::syscall;
22
23pub(crate) static CONFINED: AtomicBool = AtomicBool::new(false);
30
31pub fn is_nested() -> bool {
37 if CONFINED.load(Ordering::Relaxed) {
38 return true;
39 }
40 if let Ok(status) = std::fs::read_to_string("/proc/self/status") {
42 for line in status.lines() {
43 if line.starts_with("Seccomp:") {
44 return line.trim().ends_with('2');
45 }
46 }
47 }
48 false
49}
50
51enum SandboxState {
56 Created,
57 Running,
58 Paused,
59 Stopped(ExitStatus),
60}
61
62pub struct Sandbox {
71 policy: Policy,
72 state: SandboxState,
73 child_pid: Option<i32>,
74 pidfd: Option<OwnedFd>,
75 notif_handle: Option<JoinHandle<()>>,
76 throttle_handle: Option<JoinHandle<()>>,
77 _stdout_read: Option<OwnedFd>,
79 _stderr_read: Option<OwnedFd>,
80 cow_branch: Option<Box<dyn CowBranch>>,
82 supervisor_state: Option<Arc<Mutex<SupervisorState>>>,
84 ctrl_fd: Option<OwnedFd>,
86 stdout_pipe: Option<OwnedFd>,
88 init_fn: Option<Box<dyn FnOnce() + Send + 'static>>,
90 work_fn: Option<Arc<dyn Fn(u32) + Send + Sync + 'static>>,
92 io_overrides: Option<(Option<i32>, Option<i32>, Option<i32>)>,
94}
95
96impl Sandbox {
97 pub fn new(policy: &Policy) -> Result<Self, SandlockError> {
99 Ok(Self::create(policy))
100 }
101
102 pub fn new_with_fns(
115 policy: &Policy,
116 init_fn: impl FnOnce() + Send + 'static,
117 work_fn: impl Fn(u32) + Send + Sync + 'static,
118 ) -> Result<Self, SandlockError> {
119 let mut sb = Self::create(policy);
120 sb.init_fn = Some(Box::new(init_fn));
121 sb.work_fn = Some(Arc::new(work_fn));
122 Ok(sb)
123 }
124
125 fn create(policy: &Policy) -> Self {
126 Self {
127 policy: policy.clone(),
128 state: SandboxState::Created,
129 child_pid: None,
130 pidfd: None,
131 notif_handle: None,
132 throttle_handle: None,
133 _stdout_read: None,
134 _stderr_read: None,
135 cow_branch: None,
136 supervisor_state: None,
137 ctrl_fd: None,
138 stdout_pipe: None,
139 init_fn: None,
140 work_fn: None,
141 io_overrides: None,
142 }
143 }
144
145 pub async fn run(policy: &Policy, cmd: &[&str]) -> Result<RunResult, SandlockError> {
148 let mut sb = Self::new(policy)?;
149 sb.do_spawn(cmd, true).await?;
150 sb.wait().await
151 }
152
153 pub async fn run_interactive(policy: &Policy, cmd: &[&str]) -> Result<RunResult, SandlockError> {
155 let mut sb = Self::new(policy)?;
156 sb.do_spawn(cmd, false).await?;
157 sb.wait().await
158 }
159
160 pub async fn fork(&mut self, n: u32) -> Result<Vec<Sandbox>, SandlockError> {
180 let init_fn = self.init_fn.take()
181 .ok_or_else(|| SandboxError::Child("fork() requires new_with_fns()".into()))?;
182 let work_fn = self.work_fn.take()
183 .ok_or_else(|| SandboxError::Child("fork() requires new_with_fns()".into()))?;
184
185 let policy = self.policy.clone();
186
187
188 let mut ctrl_fds = [0i32; 2];
190 if unsafe { libc::pipe2(ctrl_fds.as_mut_ptr(), 0) } < 0 {
191 return Err(SandboxError::Io(std::io::Error::last_os_error()).into());
192 }
193 let ctrl_parent = unsafe { OwnedFd::from_raw_fd(ctrl_fds[0]) };
194 let ctrl_child_fd = ctrl_fds[1];
195
196 let mut pipe_read_ends: Vec<OwnedFd> = Vec::with_capacity(n as usize);
198 let mut pipe_write_fds: Vec<i32> = Vec::with_capacity(n as usize);
199 for _ in 0..n {
200 let mut pfds = [0i32; 2];
201 if unsafe { libc::pipe(pfds.as_mut_ptr()) } >= 0 {
202 pipe_read_ends.push(unsafe { OwnedFd::from_raw_fd(pfds[0]) });
203 pipe_write_fds.push(pfds[1]);
204 } else {
205 pipe_write_fds.push(-1);
206 }
207 }
208
209 let pid = unsafe { libc::fork() };
211 if pid < 0 {
212 unsafe { libc::close(ctrl_child_fd) };
213 return Err(SandboxError::Fork(std::io::Error::last_os_error()).into());
214 }
215
216 if pid == 0 {
217 drop(ctrl_parent);
219
220 unsafe { libc::setpgid(0, 0) };
221 unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL) };
222 unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
223
224 let _ = crate::landlock::confine(&policy);
225
226 let deny = crate::context::deny_syscall_numbers(&policy);
227 let args = crate::context::arg_filters(&policy);
228 let filter = crate::seccomp::bpf::assemble_filter(&[], &deny, &args);
229 let _ = crate::seccomp::bpf::install_deny_filter(&filter);
230
231 CONFINED.store(true, std::sync::atomic::Ordering::Relaxed);
232
233 init_fn();
235
236 drop(pipe_read_ends);
238
239 crate::fork::fork_ready_loop_fn(ctrl_child_fd, n, &*work_fn, &pipe_write_fds);
241 unsafe { libc::_exit(0) };
242 }
243
244 unsafe { libc::close(ctrl_child_fd) };
246 for wfd in &pipe_write_fds {
248 if *wfd >= 0 { unsafe { libc::close(*wfd) }; }
249 }
250 self.child_pid = Some(pid);
251 self.state = SandboxState::Running;
252
253 let ctrl_fd = ctrl_parent.as_raw_fd();
255 let mut pid_buf = vec![0u8; n as usize * 4];
256 read_exact(ctrl_fd, &mut pid_buf);
257
258 let clone_pids: Vec<i32> = pid_buf.chunks(4)
259 .map(|c| u32::from_be_bytes(c.try_into().unwrap_or([0; 4])) as i32)
260 .collect();
261 let live_count = clone_pids.iter().filter(|&&p| p > 0).count();
262
263 let mut code_buf = vec![0u8; live_count * 4];
265 read_exact(ctrl_fd, &mut code_buf);
266 self.ctrl_fd = Some(ctrl_parent);
267
268 let mut status = 0i32;
270 unsafe { libc::waitpid(pid, &mut status, 0) };
271
272 let mut code_idx = 0;
274 let mut clones = Vec::with_capacity(live_count);
275 let mut pipe_iter = pipe_read_ends.into_iter();
276
277 for &clone_pid in &clone_pids {
278 let pipe = pipe_iter.next();
279 if clone_pid <= 0 { continue; }
280
281 let code = i32::from_be_bytes(
282 code_buf[code_idx * 4..(code_idx + 1) * 4].try_into().unwrap_or([0; 4])
283 );
284 code_idx += 1;
285
286 let mut sb = Sandbox::create(&policy);
287 sb.child_pid = Some(clone_pid);
288 sb.state = SandboxState::Stopped(if code == 0 {
289 ExitStatus::Code(0)
290 } else if code > 0 {
291 ExitStatus::Code(code)
292 } else {
293 ExitStatus::Killed
294 });
295 sb.stdout_pipe = pipe;
296 clones.push(sb);
297 }
298
299 Ok(clones)
300 }
301
302 pub async fn reduce(
312 &self,
313 cmd: &[&str],
314 clones: &mut [Sandbox],
315 ) -> Result<RunResult, SandlockError> {
316 let mut combined = Vec::new();
318 for clone in clones.iter_mut() {
319 if let Some(pipe) = clone.stdout_pipe.take() {
320 combined.extend_from_slice(&read_fd_to_end(pipe));
321 }
322 }
323
324 let mut stdin_fds = [0i32; 2];
326 if unsafe { libc::pipe2(stdin_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
327 return Err(SandboxError::Io(std::io::Error::last_os_error()).into());
328 }
329
330 let write_fd = stdin_fds[1];
332 let write_handle = tokio::task::spawn_blocking(move || {
333 unsafe {
334 libc::write(write_fd, combined.as_ptr() as *const _, combined.len());
335 libc::close(write_fd);
336 }
337 });
338
339 let mut reducer = Sandbox::new(&self.policy)?;
341 reducer.io_overrides = Some((Some(stdin_fds[0]), None, None));
342 reducer.do_spawn(cmd, true).await?;
343 unsafe { libc::close(stdin_fds[0]) };
344
345 let _ = write_handle.await;
346 reducer.wait().await
347 }
348
349 pub async fn wait(&mut self) -> Result<RunResult, SandlockError> {
351 let pid = self.child_pid.ok_or(SandboxError::NotRunning)?;
352
353 if let SandboxState::Stopped(ref es) = self.state {
354 return Ok(RunResult {
355 exit_status: es.clone(),
356 stdout: None,
357 stderr: None,
358 });
359 }
360
361 let exit_status = tokio::task::spawn_blocking(move || -> ExitStatus {
363 let mut status: i32 = 0;
364 loop {
365 let ret = unsafe { libc::waitpid(pid, &mut status, 0) };
366 if ret < 0 {
367 let err = std::io::Error::last_os_error();
368 if err.raw_os_error() == Some(libc::EINTR) {
369 continue;
370 }
371 return ExitStatus::Killed;
373 }
374 break;
375 }
376 wait_status_to_exit(status)
377 })
378 .await
379 .unwrap_or(ExitStatus::Killed);
380
381 self.state = SandboxState::Stopped(exit_status.clone());
382
383 if let Some(h) = self.notif_handle.take() {
385 h.abort();
386 }
387 if let Some(h) = self.throttle_handle.take() {
388 h.abort();
389 }
390
391 let stdout = self._stdout_read.take().map(|fd| read_fd_to_end(fd));
393 let stderr = self._stderr_read.take().map(|fd| read_fd_to_end(fd));
394
395 Ok(RunResult {
396 exit_status,
397 stdout,
398 stderr,
399 })
400 }
401
402 pub fn pause(&mut self) -> Result<(), SandlockError> {
404 let pid = self.child_pid.ok_or(SandboxError::NotRunning)?;
405 let ret = unsafe { libc::killpg(pid, libc::SIGSTOP) };
406 if ret < 0 {
407 return Err(SandboxError::Io(std::io::Error::last_os_error()).into());
408 }
409 self.state = SandboxState::Paused;
410 Ok(())
411 }
412
413 pub fn resume(&mut self) -> Result<(), SandlockError> {
415 let pid = self.child_pid.ok_or(SandboxError::NotRunning)?;
416 let ret = unsafe { libc::killpg(pid, libc::SIGCONT) };
417 if ret < 0 {
418 return Err(SandboxError::Io(std::io::Error::last_os_error()).into());
419 }
420 self.state = SandboxState::Running;
421 Ok(())
422 }
423
424 pub fn kill(&mut self) -> Result<(), SandlockError> {
426 let pid = self.child_pid.ok_or(SandboxError::NotRunning)?;
427 let ret = unsafe { libc::killpg(pid, libc::SIGKILL) };
428 if ret < 0 {
429 let err = std::io::Error::last_os_error();
430 if err.raw_os_error() != Some(libc::ESRCH) {
432 return Err(SandboxError::Io(err).into());
433 }
434 }
435 Ok(())
436 }
437
438 pub fn pid(&self) -> Option<i32> {
440 self.child_pid
441 }
442
443 #[doc(hidden)]
445 pub fn is_running(&self) -> bool {
446 matches!(self.state, SandboxState::Running | SandboxState::Paused)
447 }
448
449 pub fn policy(&self) -> &Policy {
451 &self.policy
452 }
453
454 #[doc(hidden)]
456 pub async fn commit(&mut self) -> Result<(), SandlockError> {
457 if let Some(branch) = self.cow_branch.take() {
458 branch.commit().map_err(|e| SandlockError::Sandbox(SandboxError::Branch(e)))?;
459 }
460 Ok(())
461 }
462
463 #[doc(hidden)]
465 pub async fn abort_branch(&mut self) -> Result<(), SandlockError> {
466 if let Some(branch) = self.cow_branch.take() {
467 branch.abort().map_err(|e| SandlockError::Sandbox(SandboxError::Branch(e)))?;
468 }
469 Ok(())
470 }
471
472 pub(crate) async fn freeze(&self) -> Result<(), SandlockError> {
474 let pid = self.child_pid.ok_or(SandlockError::Sandbox(SandboxError::NotRunning))?;
475
476 if let Some(ref state) = self.supervisor_state {
478 let mut st = state.lock().await;
479 st.hold_forks = true;
480 }
481
482 unsafe { libc::killpg(pid, libc::SIGSTOP); }
484 Ok(())
485 }
486
487 pub(crate) async fn thaw(&self) -> Result<(), SandlockError> {
489 let pid = self.child_pid.ok_or(SandlockError::Sandbox(SandboxError::NotRunning))?;
490
491 if let Some(ref state) = self.supervisor_state {
493 let mut st = state.lock().await;
494 st.hold_forks = false;
495 st.held_notif_ids.clear();
496 }
497
498 unsafe { libc::killpg(pid, libc::SIGCONT); }
500 Ok(())
501 }
502
503 #[doc(hidden)]
506 pub async fn spawn(&mut self, cmd: &[&str]) -> Result<(), SandlockError> {
507 self.do_spawn(cmd, false).await
508 }
509
510 #[doc(hidden)]
513 pub async fn spawn_captured(&mut self, cmd: &[&str]) -> Result<(), SandlockError> {
514 self.do_spawn(cmd, true).await
515 }
516
517 #[doc(hidden)]
526 pub async fn spawn_with_io(
527 &mut self,
528 cmd: &[&str],
529 stdin_fd: Option<std::os::unix::io::RawFd>,
530 stdout_fd: Option<std::os::unix::io::RawFd>,
531 stderr_fd: Option<std::os::unix::io::RawFd>,
532 ) -> Result<(), SandlockError> {
533 self.io_overrides = Some((stdin_fd, stdout_fd, stderr_fd));
534 self.do_spawn(cmd, false).await
535 }
536
537 pub async fn checkpoint(&self) -> Result<crate::checkpoint::Checkpoint, SandlockError> {
539 let pid = self.child_pid.ok_or(SandlockError::Sandbox(SandboxError::NotRunning))?;
540
541 self.freeze().await?;
543
544 let cp = crate::checkpoint::capture(pid, &self.policy);
546
547 self.thaw().await?;
549
550 cp
551 }
552
553 async fn do_spawn(&mut self, cmd: &[&str], capture: bool) -> Result<(), SandlockError> {
559 if !matches!(self.state, SandboxState::Created) {
561 return Err(SandboxError::Child("sandbox already spawned".into()).into());
562 }
563
564 if cmd.is_empty() {
565 return Err(SandboxError::Child("empty command".into()).into());
566 }
567
568 let c_cmd: Vec<CString> = cmd
570 .iter()
571 .map(|s| CString::new(*s).map_err(|_| SandboxError::Child("invalid command string".into())))
572 .collect::<Result<Vec<_>, _>>()?;
573
574 let nested = is_nested();
576
577 let pipes = PipePair::new().map_err(SandboxError::Io)?;
579
580 let resolved_ips = if !self.policy.net_allow_hosts.is_empty() {
582 network::resolve_hosts(&self.policy.net_allow_hosts)
583 .await
584 .map_err(SandboxError::Io)?
585 } else {
586 std::collections::HashSet::new()
587 };
588
589 let cow_branch: Option<Box<dyn CowBranch>> = match self.policy.fs_isolation {
591 FsIsolation::OverlayFs => {
592 let workdir = self.policy.workdir.as_ref()
593 .ok_or_else(|| SandlockError::Sandbox(SandboxError::Child("OverlayFs requires workdir".into())))?;
594 let storage = self.policy.fs_storage.as_ref()
595 .cloned()
596 .unwrap_or_else(|| std::env::temp_dir().join("sandlock-overlay"));
597 std::fs::create_dir_all(&storage)
598 .map_err(|e| SandlockError::Sandbox(SandboxError::Io(e)))?;
599 let branch = OverlayBranch::create(workdir, &storage)
600 .map_err(|e| SandlockError::Sandbox(SandboxError::Branch(e)))?;
601 Some(Box::new(branch))
602 }
603 FsIsolation::BranchFs => {
604 let workdir = self.policy.workdir.as_ref()
605 .ok_or_else(|| SandlockError::Sandbox(SandboxError::Child("BranchFs requires workdir".into())))?;
606 let branch = BranchFsBranch::create(workdir)
607 .map_err(|e| SandlockError::Sandbox(SandboxError::Branch(e)))?;
608 Some(Box::new(branch))
609 }
610 FsIsolation::None => None,
611 };
612
613 let cow_config = if let Some(ref branch) = cow_branch {
615 if self.policy.fs_isolation == FsIsolation::OverlayFs {
616 let workdir = self.policy.workdir.as_ref().unwrap();
621 let merged = branch.branch_path().to_path_buf();
622 let branch_dir = merged.parent().unwrap();
624 let upper = branch_dir.join("upper");
625 let work = branch_dir.join("work");
626 Some(CowConfig {
627 merged,
628 upper,
629 work,
630 lowers: vec![workdir.clone()],
631 })
632 } else {
633 None
634 }
635 } else {
636 None
637 };
638
639 let (stdout_r, stderr_r) = if capture {
641 let mut stdout_fds = [0i32; 2];
642 let mut stderr_fds = [0i32; 2];
643 if unsafe { libc::pipe2(stdout_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
644 return Err(SandboxError::Io(std::io::Error::last_os_error()).into());
645 }
646 if unsafe { libc::pipe2(stderr_fds.as_mut_ptr(), libc::O_CLOEXEC) } < 0 {
647 unsafe {
648 libc::close(stdout_fds[0]);
649 libc::close(stdout_fds[1]);
650 }
651 return Err(SandboxError::Io(std::io::Error::last_os_error()).into());
652 }
653 (
654 Some((
655 unsafe { OwnedFd::from_raw_fd(stdout_fds[0]) },
656 unsafe { OwnedFd::from_raw_fd(stdout_fds[1]) },
657 )),
658 Some((
659 unsafe { OwnedFd::from_raw_fd(stderr_fds[0]) },
660 unsafe { OwnedFd::from_raw_fd(stderr_fds[1]) },
661 )),
662 )
663 } else {
664 (None, None)
665 };
666
667 let pid = unsafe { libc::fork() };
669 if pid < 0 {
670 return Err(SandboxError::Fork(std::io::Error::last_os_error()).into());
671 }
672
673 if pid == 0 {
674 if let Some((stdin_fd, stdout_fd, stderr_fd)) = self.io_overrides {
690 if let Some(fd) = stdin_fd {
691 unsafe { libc::dup2(fd, 0) };
692 }
693 if let Some(fd) = stdout_fd {
694 unsafe { libc::dup2(fd, 1) };
695 }
696 if let Some(fd) = stderr_fd {
697 unsafe { libc::dup2(fd, 2) };
698 }
699 }
700
701 if let Some((_, ref stdout_w)) = stdout_r {
703 unsafe { libc::dup2(stdout_w.as_raw_fd(), 1) };
704 }
705 if let Some((_, ref stderr_w)) = stderr_r {
706 unsafe { libc::dup2(stderr_w.as_raw_fd(), 2) };
707 }
708 drop(stdout_r);
711 drop(stderr_r);
712
713 context::confine_child(&self.policy, &c_cmd, &pipes, cow_config.as_ref(), nested);
715 }
716
717 self.cow_branch = cow_branch;
721
722 drop(pipes.notif_w);
724 drop(pipes.ready_r);
725
726 self._stdout_read = stdout_r.map(|(r, _w)| r);
729 self._stderr_read = stderr_r.map(|(r, _w)| r);
730
731 self.child_pid = Some(pid);
733 self.state = SandboxState::Running;
734
735 let pidfd = match syscall::pidfd_open(pid as u32, 0) {
737 Ok(fd) => Some(fd),
738 Err(_) => None, };
740
741 let notif_fd_num = read_u32_fd(pipes.notif_r.as_raw_fd())
744 .map_err(|e| SandboxError::Child(format!("read notif fd from child: {}", e)))?;
745
746 let is_nested = notif_fd_num == 0;
747
748 let notif_fd = if is_nested {
750 None
751 } else if let Some(ref pfd) = pidfd {
752 Some(syscall::pidfd_getfd(pfd, notif_fd_num as i32, 0)
753 .map_err(|e| SandboxError::Child(format!("pidfd_getfd: {}", e)))?)
754 } else {
755 let path = format!("/proc/{}/fd/{}", pid, notif_fd_num);
756 let cpath = CString::new(path).unwrap();
757 let raw = unsafe { libc::open(cpath.as_ptr(), libc::O_RDWR) };
758 if raw < 0 {
759 return Err(
760 SandboxError::Child("failed to open notif fd from /proc".into()).into(),
761 );
762 }
763 Some(unsafe { OwnedFd::from_raw_fd(raw) })
764 };
765
766 if let Some(notif_fd) = notif_fd {
768 if self.policy.time_start.is_some() || self.policy.random_seed.is_some() {
770 let time_offset = self.policy.time_start.map(|t| crate::time::calculate_time_offset(t));
771 if let Err(e) = crate::vdso::patch(pid, time_offset, self.policy.random_seed.is_some()) {
772 eprintln!("sandlock: pre-exec vDSO patching failed (will retry after exec): {}", e);
773 }
774 }
775
776 let time_offset_val = self.policy.time_start
778 .map(|t| crate::time::calculate_time_offset(t))
779 .unwrap_or(0);
780
781 let notif_policy = NotifPolicy {
782 max_memory_bytes: self.policy.max_memory.map(|m| m.0).unwrap_or(0),
783 max_processes: self.policy.max_processes,
784 has_memory_limit: self.policy.max_memory.is_some(),
785 has_net_allowlist: !self.policy.net_allow_hosts.is_empty()
786 || self.policy.policy_fn.is_some(),
787 has_random_seed: self.policy.random_seed.is_some(),
788 has_time_start: self.policy.time_start.is_some(),
789 time_offset: time_offset_val,
790 num_cpus: self.policy.num_cpus,
791 has_proc_virt: self.policy.num_cpus.is_some() || self.policy.max_memory.is_some() || self.policy.isolate_pids || self.policy.port_remap,
792 isolate_pids: self.policy.isolate_pids,
793 port_remap: self.policy.port_remap,
794 cow_enabled: self.policy.workdir.is_some() && self.policy.fs_isolation == FsIsolation::None,
795 chroot_root: self.policy.chroot.clone(),
796 chroot_readable: self.policy.fs_readable.clone(),
797 chroot_writable: self.policy.fs_writable.clone(),
798 deterministic_dirs: self.policy.deterministic_dirs,
799 hostname: self.policy.hostname.clone(),
800 };
801
802 use rand::SeedableRng;
804 use rand_chacha::ChaCha8Rng;
805
806 let random_state = self.policy.random_seed.map(|seed| ChaCha8Rng::seed_from_u64(seed));
807 let time_offset = self.policy.time_start.map(|t| crate::time::calculate_time_offset(t));
808
809 let mut sup_state = SupervisorState::new(
810 notif_policy.max_memory_bytes,
811 notif_policy.max_processes,
812 time_offset,
813 random_state,
814 );
815 sup_state.network_policy = if self.policy.net_allow_hosts.is_empty() {
816 crate::seccomp::notif::NetworkPolicy::Unrestricted
817 } else {
818 crate::seccomp::notif::NetworkPolicy::AllowList(resolved_ips)
819 };
820
821 if let Some(ref pfd) = pidfd {
822 use std::os::unix::io::AsRawFd;
823 sup_state.child_pidfd = Some(pfd.as_raw_fd());
824 }
825
826 if self.policy.workdir.is_some() && self.policy.fs_isolation == FsIsolation::None {
828 let workdir = self.policy.workdir.as_ref().unwrap();
829 let storage = self.policy.fs_storage.as_deref();
830 match crate::cow::seccomp::SeccompCowBranch::create(workdir, storage) {
831 Ok(branch) => { sup_state.cow_branch = Some(branch); }
832 Err(e) => { eprintln!("sandlock: seccomp COW branch creation failed: {}", e); }
833 }
834 }
835
836 if let Some(ref callback) = self.policy.policy_fn {
838 let live = crate::policy_fn::LivePolicy {
839 allowed_ips: match &sup_state.network_policy {
840 crate::seccomp::notif::NetworkPolicy::AllowList(ips) => ips.clone(),
841 crate::seccomp::notif::NetworkPolicy::Unrestricted => std::collections::HashSet::new(),
842 },
843 max_memory_bytes: notif_policy.max_memory_bytes,
844 max_processes: notif_policy.max_processes,
845 };
846 let ceiling = live.clone();
847 let live = std::sync::Arc::new(std::sync::RwLock::new(live));
848 let denied_paths = sup_state.denied_paths.clone();
849 let pid_overrides = sup_state.pid_ip_overrides.clone();
850 sup_state.live_policy = Some(live.clone());
852 let tx = crate::policy_fn::spawn_policy_fn(
853 callback.clone(), live, ceiling, pid_overrides, denied_paths,
854 );
855 sup_state.policy_event_tx = Some(tx);
856 }
857
858 let sup_state = Arc::new(Mutex::new(sup_state));
859 self.supervisor_state = Some(Arc::clone(&sup_state));
860
861 self.notif_handle = Some(tokio::spawn(
863 notif::supervisor(notif_fd, notif_policy, sup_state),
864 ));
865 }
866
867 if let Some(cpu_pct) = self.policy.max_cpu {
869 if cpu_pct < 100 {
870 let child_pid = pid;
871 self.throttle_handle = Some(tokio::spawn(throttle_cpu(child_pid, cpu_pct)));
872 }
873 }
874
875 write_u32_fd(pipes.ready_w.as_raw_fd(), 1)
877 .map_err(|e| SandboxError::Child(format!("write ready signal: {}", e)))?;
878
879 self.pidfd = pidfd;
881
882 Ok(())
883 }
884}
885
886impl Drop for Sandbox {
891 fn drop(&mut self) {
892 if let Some(pid) = self.child_pid {
893 if matches!(self.state, SandboxState::Running | SandboxState::Paused) {
894 unsafe { libc::killpg(pid, libc::SIGKILL) };
896 let mut status: i32 = 0;
898 unsafe { libc::waitpid(pid, &mut status, 0) };
899 }
900 }
901
902 if let Some(h) = self.notif_handle.take() {
903 h.abort();
904 }
905 if let Some(h) = self.throttle_handle.take() {
906 h.abort();
907 }
908
909 if let Some(ref branch) = self.cow_branch {
911 let is_error = matches!(
912 self.state,
913 SandboxState::Stopped(ref s) if !matches!(s, ExitStatus::Code(0))
914 );
915 let action = if is_error {
916 &self.policy.on_error
917 } else {
918 &self.policy.on_exit
919 };
920 match action {
921 BranchAction::Commit => { let _ = branch.commit(); }
922 BranchAction::Abort => { let _ = branch.abort(); }
923 BranchAction::Keep => {} }
925 }
926
927 if let Some(ref state) = self.supervisor_state {
929 let Ok(mut st) = state.try_lock() else { return; };
930 if let Some(ref mut cow) = st.cow_branch {
931 let is_error = matches!(
932 self.state,
933 SandboxState::Stopped(ref s) if !matches!(s, ExitStatus::Code(0))
934 );
935 let action = if is_error {
936 &self.policy.on_error
937 } else {
938 &self.policy.on_exit
939 };
940 match action {
941 BranchAction::Commit => { let _ = cow.commit(); }
942 BranchAction::Abort => { let _ = cow.abort(); }
943 BranchAction::Keep => {}
944 }
945 }
946 }
947 }
948}
949
950async fn throttle_cpu(pid: i32, cpu_pct: u8) {
956 let period = Duration::from_millis(100);
957 let run_time = period * cpu_pct as u32 / 100;
958 let stop_time = period - run_time;
959
960 loop {
961 tokio::time::sleep(run_time).await;
962 if unsafe { libc::killpg(pid, libc::SIGSTOP) } < 0 {
963 break;
964 }
965 tokio::time::sleep(stop_time).await;
966 if unsafe { libc::killpg(pid, libc::SIGCONT) } < 0 {
967 break;
968 }
969 }
970}
971
972fn read_exact(fd: i32, buf: &mut [u8]) {
980 let mut off = 0;
981 while off < buf.len() {
982 let r = unsafe { libc::read(fd, buf[off..].as_mut_ptr() as *mut _, buf.len() - off) };
983 if r <= 0 { break; }
984 off += r as usize;
985 }
986}
987
988fn read_fd_to_end(fd: OwnedFd) -> Vec<u8> {
989 use std::io::Read;
990 let mut file = unsafe { std::fs::File::from_raw_fd(fd.into_raw_fd()) };
991 let mut buf = Vec::new();
992 let _ = file.read_to_end(&mut buf);
993 buf
994}
995
996fn wait_status_to_exit(status: i32) -> ExitStatus {
997 if libc::WIFEXITED(status) {
998 ExitStatus::Code(libc::WEXITSTATUS(status))
999 } else if libc::WIFSIGNALED(status) {
1000 let sig = libc::WTERMSIG(status);
1001 if sig == libc::SIGKILL {
1002 ExitStatus::Killed
1003 } else {
1004 ExitStatus::Signal(sig)
1005 }
1006 } else {
1007 ExitStatus::Killed
1008 }
1009}