1use std::collections::HashMap;
39use std::ffi::CString;
40use std::io::{self, Write as _};
41use std::os::fd::{AsRawFd, OwnedFd, RawFd};
42use std::path::PathBuf;
43use std::time::{Duration, Instant};
44
45use mio::unix::SourceFd;
46use mio::{Events as MioEvents, Interest, Poll, Token};
47use rustix::io::Errno;
48use rustix::process::{Pid, PidfdFlags, Signal, pidfd_open, pidfd_send_signal};
49use thiserror::Error;
50
51use evalbox_sys::seccomp::{
52 DEFAULT_WHITELIST, NOTIFY_FS_SYSCALLS, SockFprog, build_notify_filter, build_whitelist_filter,
53};
54use evalbox_sys::seccomp_notify::seccomp_set_mode_filter_listener;
55use evalbox_sys::{check, last_errno, seccomp::seccomp_set_mode_filter};
56
57use crate::isolation::{LockdownError, close_extra_fds, lockdown};
58use crate::monitor::{Output, Status, monitor, set_nonblocking, wait_for_exit, write_stdin};
59use crate::notify::scm_rights;
60use crate::plan::{Mount, NotifyMode, Plan};
61use crate::resolve::{ResolvedBinary, resolve_binary};
62use crate::validate::validate_cmd;
63use crate::workspace::Workspace;
64
65#[derive(Debug, Error)]
67pub enum ExecutorError {
68 #[error("system check: {0}")]
69 SystemCheck(String),
70
71 #[error("validation: {0}")]
72 Validation(#[from] crate::validate::ValidationError),
73
74 #[error("workspace: {0}")]
75 Workspace(io::Error),
76
77 #[error("fork: {0}")]
78 Fork(Errno),
79
80 #[error("lockdown: {0}")]
81 Lockdown(#[from] LockdownError),
82
83 #[error("exec: {0}")]
84 Exec(Errno),
85
86 #[error("monitor: {0}")]
87 Monitor(io::Error),
88
89 #[error("child setup: {0}")]
90 ChildSetup(String),
91
92 #[error("pidfd: {0}")]
93 Pidfd(Errno),
94
95 #[error("command not found: {0}")]
96 CommandNotFound(String),
97
98 #[error("seccomp notify: {0}")]
99 SeccompNotify(String),
100
101 #[error("io: {0}")]
102 Io(#[from] io::Error),
103}
104
105#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
106pub struct SandboxId(pub usize);
107
108impl std::fmt::Display for SandboxId {
109 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
110 write!(f, "Sandbox({})", self.0)
111 }
112}
113
114#[derive(Debug)]
116pub enum Event {
117 Completed { id: SandboxId, output: Output },
119 Timeout { id: SandboxId, output: Output },
121 Stdout { id: SandboxId, data: Vec<u8> },
123 Stderr { id: SandboxId, data: Vec<u8> },
125}
126
127struct ExecutionInfo {
128 binary_path: PathBuf,
129 extra_mounts: Vec<Mount>,
130}
131
132impl ExecutionInfo {
133 fn from_resolved(resolved: ResolvedBinary) -> Self {
134 let extra_mounts = resolved
135 .required_mounts
136 .into_iter()
137 .map(|m| Mount::bind(&m.source, &m.target))
138 .collect();
139 Self {
140 binary_path: resolved.path,
141 extra_mounts,
142 }
143 }
144
145 fn from_plan(plan: &Plan) -> Option<Self> {
146 plan.binary_path.as_ref().map(|path| Self {
147 binary_path: path.clone(),
148 extra_mounts: Vec::new(),
149 })
150 }
151}
152
153#[allow(dead_code)]
157struct SpawnedSandbox {
158 pidfd: OwnedFd,
159 stdin_fd: RawFd,
160 stdout_fd: RawFd,
161 stderr_fd: RawFd,
162 notify_fd: Option<OwnedFd>,
164 workspace: std::mem::ManuallyDrop<Workspace>,
166}
167
168struct SandboxState {
170 spawned: SpawnedSandbox,
171 deadline: Instant,
172 start: Instant,
173 stdout: Vec<u8>,
174 stderr: Vec<u8>,
175 max_output: u64,
176 pidfd_ready: bool,
177 stdout_closed: bool,
178 stderr_closed: bool,
179}
180
181impl SandboxState {
182 fn is_done(&self) -> bool {
183 self.pidfd_ready && self.stdout_closed && self.stderr_closed
184 }
185}
186
187const TOKEN_TYPE_BITS: usize = 2;
189const TOKEN_TYPE_MASK: usize = 0b11;
190const TOKEN_TYPE_PIDFD: usize = 0;
191const TOKEN_TYPE_STDOUT: usize = 1;
192const TOKEN_TYPE_STDERR: usize = 2;
193
194fn encode_token(sandbox_id: usize, token_type: usize) -> Token {
195 Token((sandbox_id << TOKEN_TYPE_BITS) | token_type)
196}
197
198fn decode_token(token: Token) -> (SandboxId, usize) {
199 let raw = token.0;
200 (SandboxId(raw >> TOKEN_TYPE_BITS), raw & TOKEN_TYPE_MASK)
201}
202
203pub struct Executor {
204 poll: Poll,
205 sandboxes: HashMap<SandboxId, SandboxState>,
206 next_id: usize,
207 mio_events: MioEvents,
208}
209
210impl Executor {
211 pub fn new() -> io::Result<Self> {
212 Ok(Self {
213 poll: Poll::new()?,
214 sandboxes: HashMap::new(),
215 next_id: 0,
216 mio_events: MioEvents::with_capacity(64),
217 })
218 }
219
220 pub fn run(plan: Plan) -> Result<Output, ExecutorError> {
222 let cmd_refs: Vec<&str> = plan.cmd.iter().map(|s| s.as_str()).collect();
223 validate_cmd(&cmd_refs).map_err(ExecutorError::Validation)?;
224
225 if let Err(e) = check::check() {
226 return Err(ExecutorError::SystemCheck(e.to_string()));
227 }
228
229 let exec_info = if let Some(info) = ExecutionInfo::from_plan(&plan) {
230 info
231 } else {
232 let resolved = resolve_binary(&plan.cmd[0])
233 .map_err(|e| ExecutorError::CommandNotFound(e.to_string()))?;
234 ExecutionInfo::from_resolved(resolved)
235 };
236
237 let workspace = Workspace::with_prefix("evalbox-").map_err(ExecutorError::Workspace)?;
238
239 workspace
240 .setup_sandbox_dirs()
241 .map_err(ExecutorError::Workspace)?;
242 for file in &plan.user_files {
243 let work_path = format!("work/{}", file.path);
244 workspace
245 .write_file(&work_path, &file.content, file.executable)
246 .map_err(ExecutorError::Workspace)?;
247 }
248
249 let notify_sockets = if plan.notify_mode != NotifyMode::Disabled {
251 Some(scm_rights::create_socketpair().map_err(ExecutorError::Workspace)?)
252 } else {
253 None
254 };
255
256 let child_pid = unsafe { libc::fork() };
257 if child_pid < 0 {
258 return Err(ExecutorError::Fork(last_errno()));
259 }
260
261 if child_pid == 0 {
262 let child_socket = notify_sockets.map(|(_, child)| child);
264 match child_process(&workspace, &plan, &exec_info, child_socket.as_ref()) {
265 Ok(()) => unsafe { libc::_exit(127) },
266 Err(e) => {
267 writeln!(io::stderr(), "sandbox error: {e}").ok();
268 unsafe { libc::_exit(126) }
269 }
270 }
271 }
272
273 let pid = unsafe { Pid::from_raw_unchecked(child_pid) };
274 let pidfd = pidfd_open(pid, PidfdFlags::empty()).map_err(ExecutorError::Pidfd)?;
275
276 let notify_fd = if let Some((parent_socket, _)) = notify_sockets {
278 poll_or_kill(
279 parent_socket.as_raw_fd(),
280 child_pid,
281 "timeout waiting for notify fd",
282 )?;
283 Some(
284 scm_rights::recv_fd(parent_socket.as_raw_fd())
285 .map_err(|e| ExecutorError::SeccompNotify(e.to_string()))?,
286 )
287 } else {
288 None
289 };
290
291 blocking_parent(child_pid, pidfd, notify_fd, workspace, plan)
292 }
293
294 pub fn spawn(&mut self, plan: Plan) -> Result<SandboxId, ExecutorError> {
296 let id = SandboxId(self.next_id);
297 self.next_id += 1;
298
299 let timeout = plan.timeout;
300 let max_output = plan.max_output;
301
302 let spawned = spawn_sandbox(plan)?;
303
304 let pidfd_token = encode_token(id.0, TOKEN_TYPE_PIDFD);
306 let stdout_token = encode_token(id.0, TOKEN_TYPE_STDOUT);
307 let stderr_token = encode_token(id.0, TOKEN_TYPE_STDERR);
308
309 self.poll.registry().register(
310 &mut SourceFd(&spawned.pidfd.as_raw_fd()),
311 pidfd_token,
312 Interest::READABLE,
313 )?;
314 self.poll.registry().register(
315 &mut SourceFd(&spawned.stdout_fd),
316 stdout_token,
317 Interest::READABLE,
318 )?;
319 self.poll.registry().register(
320 &mut SourceFd(&spawned.stderr_fd),
321 stderr_token,
322 Interest::READABLE,
323 )?;
324
325 let state = SandboxState {
326 spawned,
327 deadline: Instant::now() + timeout,
328 start: Instant::now(),
329 stdout: Vec::new(),
330 stderr: Vec::new(),
331 max_output,
332 pidfd_ready: false,
333 stdout_closed: false,
334 stderr_closed: false,
335 };
336
337 self.sandboxes.insert(id, state);
338 Ok(id)
339 }
340
341 pub fn poll(&mut self, events: &mut Vec<Event>, timeout: Option<Duration>) -> io::Result<()> {
343 events.clear();
344
345 if self.sandboxes.is_empty() {
346 return Ok(());
347 }
348
349 let effective_timeout = self.calculate_timeout(timeout);
350 self.poll.poll(&mut self.mio_events, effective_timeout)?;
351
352 let mut pidfd_ready: Vec<SandboxId> = Vec::new();
353 let mut read_stdout: Vec<SandboxId> = Vec::new();
354 let mut read_stderr: Vec<SandboxId> = Vec::new();
355
356 for mio_event in &self.mio_events {
357 let (sandbox_id, token_type) = decode_token(mio_event.token());
358 if self.sandboxes.contains_key(&sandbox_id) {
359 match token_type {
360 TOKEN_TYPE_PIDFD => pidfd_ready.push(sandbox_id),
361 TOKEN_TYPE_STDOUT => read_stdout.push(sandbox_id),
362 TOKEN_TYPE_STDERR => read_stderr.push(sandbox_id),
363 _ => {}
364 }
365 }
366 }
367
368 for id in pidfd_ready {
369 if let Some(state) = self.sandboxes.get_mut(&id) {
370 state.pidfd_ready = true;
371 }
372 }
373
374 for id in read_stdout {
375 self.read_pipe(id, true, events);
376 }
377
378 for id in read_stderr {
379 self.read_pipe(id, false, events);
380 }
381
382 self.check_completions(events)?;
383 Ok(())
384 }
385
386 pub fn active_count(&self) -> usize {
387 self.sandboxes.len()
388 }
389
390 pub fn kill(&mut self, id: SandboxId) -> io::Result<()> {
391 if let Some(state) = self.sandboxes.get(&id) {
392 pidfd_send_signal(&state.spawned.pidfd, Signal::KILL)?;
393 }
394 Ok(())
395 }
396
397 pub fn write_stdin(&mut self, id: SandboxId, data: &[u8]) -> io::Result<usize> {
399 if let Some(state) = self.sandboxes.get(&id) {
400 let fd = state.spawned.stdin_fd;
401 if fd < 0 {
402 return Err(io::Error::new(io::ErrorKind::BrokenPipe, "stdin closed"));
403 }
404 let ret = unsafe { libc::write(fd, data.as_ptr().cast(), data.len()) };
405 if ret < 0 {
406 Err(io::Error::last_os_error())
407 } else {
408 Ok(ret as usize)
409 }
410 } else {
411 Err(io::Error::new(io::ErrorKind::NotFound, "sandbox not found"))
412 }
413 }
414
415 pub fn close_stdin(&mut self, id: SandboxId) -> io::Result<()> {
417 if let Some(state) = self.sandboxes.get_mut(&id) {
418 if state.spawned.stdin_fd >= 0 {
419 unsafe { libc::close(state.spawned.stdin_fd) };
420 state.spawned.stdin_fd = -1;
421 }
422 }
423 Ok(())
424 }
425
426 fn calculate_timeout(&self, user_timeout: Option<Duration>) -> Option<Duration> {
427 let now = Instant::now();
428 let nearest_deadline = self.sandboxes.values().map(|s| s.deadline).min();
429
430 match (user_timeout, nearest_deadline) {
431 (Some(user), Some(deadline)) => Some(user.min(deadline.saturating_duration_since(now))),
432 (Some(user), None) => Some(user),
433 (None, Some(deadline)) => Some(deadline.saturating_duration_since(now)),
434 (None, None) => None,
435 }
436 }
437
438 fn read_pipe(&mut self, sandbox_id: SandboxId, is_stdout: bool, events: &mut Vec<Event>) {
439 let Some(state) = self.sandboxes.get_mut(&sandbox_id) else {
440 return;
441 };
442
443 let fd = if is_stdout {
444 state.spawned.stdout_fd
445 } else {
446 state.spawned.stderr_fd
447 };
448
449 let mut buf = [0u8; 4096];
450 loop {
451 let ret = unsafe { libc::read(fd, buf.as_mut_ptr().cast(), buf.len()) };
452
453 if ret < 0 {
454 let err = io::Error::last_os_error();
455 if err.kind() == io::ErrorKind::WouldBlock {
456 break;
457 }
458 if is_stdout {
459 state.stdout_closed = true;
460 } else {
461 state.stderr_closed = true;
462 }
463 break;
464 } else if ret == 0 {
465 if is_stdout {
466 state.stdout_closed = true;
467 } else {
468 state.stderr_closed = true;
469 }
470 break;
471 } else {
472 let n = ret as usize;
473 let data = buf[..n].to_vec();
474
475 if is_stdout {
476 state.stdout.extend_from_slice(&data);
477 events.push(Event::Stdout {
478 id: sandbox_id,
479 data,
480 });
481 } else {
482 state.stderr.extend_from_slice(&data);
483 events.push(Event::Stderr {
484 id: sandbox_id,
485 data,
486 });
487 }
488
489 let total = state.stdout.len() + state.stderr.len();
490 if total > state.max_output as usize {
491 pidfd_send_signal(&state.spawned.pidfd, Signal::KILL).ok();
492 break;
493 }
494 }
495 }
496 }
497
498 fn check_completions(&mut self, events: &mut Vec<Event>) -> io::Result<()> {
499 let now = Instant::now();
500 let mut to_remove = Vec::new();
501
502 for (&id, state) in &mut self.sandboxes {
503 if now >= state.deadline && !state.pidfd_ready {
504 pidfd_send_signal(&state.spawned.pidfd, Signal::KILL).ok();
505 state.pidfd_ready = true;
506 }
507 if state.is_done() {
508 to_remove.push(id);
509 }
510 }
511
512 for id in to_remove {
513 if let Some(state) = self.sandboxes.remove(&id) {
514 self.poll
515 .registry()
516 .deregister(&mut SourceFd(&state.spawned.pidfd.as_raw_fd()))
517 .ok();
518 self.poll
519 .registry()
520 .deregister(&mut SourceFd(&state.spawned.stdout_fd))
521 .ok();
522 self.poll
523 .registry()
524 .deregister(&mut SourceFd(&state.spawned.stderr_fd))
525 .ok();
526
527 let (exit_code, signal) = wait_for_exit(state.spawned.pidfd.as_raw_fd())?;
528 let duration = state.start.elapsed();
529 let timed_out = Instant::now() >= state.deadline;
530
531 let status = if timed_out {
532 Status::Timeout
533 } else if signal.is_some() {
534 Status::Signaled
535 } else if state.stdout.len() + state.stderr.len() > state.max_output as usize {
536 Status::OutputLimitExceeded
537 } else {
538 Status::Exited
539 };
540
541 let output = Output {
542 stdout: state.stdout,
543 stderr: state.stderr,
544 status,
545 duration,
546 exit_code,
547 signal,
548 };
549
550 if timed_out {
551 events.push(Event::Timeout { id, output });
552 } else {
553 events.push(Event::Completed { id, output });
554 }
555 }
556 }
557
558 Ok(())
559 }
560}
561
562fn close_parent_pipe_ends(workspace: &Workspace) {
564 unsafe {
565 libc::close(workspace.pipes.stdin.read.as_raw_fd());
566 libc::close(workspace.pipes.stdout.write.as_raw_fd());
567 libc::close(workspace.pipes.stderr.write.as_raw_fd());
568 }
569}
570
571fn poll_or_kill(fd: RawFd, child_pid: libc::pid_t, msg: &str) -> Result<(), ExecutorError> {
573 let mut pfd = libc::pollfd {
574 fd,
575 events: libc::POLLIN,
576 revents: 0,
577 };
578 if unsafe { libc::poll(&mut pfd, 1, 30000) } <= 0 {
579 unsafe { libc::kill(child_pid, libc::SIGKILL) };
580 return Err(ExecutorError::ChildSetup(msg.into()));
581 }
582 Ok(())
583}
584
585fn sync_with_child(workspace: &Workspace, child_pid: libc::pid_t) -> Result<(), ExecutorError> {
587 let child_ready_fd = workspace.pipes.sync.child_ready_fd();
588 poll_or_kill(child_ready_fd, child_pid, "timeout waiting for child")?;
589
590 let mut value: u64 = 0;
591 if unsafe { libc::read(child_ready_fd, (&mut value as *mut u64).cast(), 8) } != 8 {
592 unsafe { libc::kill(child_pid, libc::SIGKILL) };
593 return Err(ExecutorError::ChildSetup("eventfd read failed".into()));
594 }
595
596 let parent_done_fd = workspace.pipes.sync.parent_done_fd();
597 let signal_value: u64 = 1;
598 if unsafe { libc::write(parent_done_fd, (&signal_value as *const u64).cast(), 8) } != 8 {
599 unsafe { libc::kill(child_pid, libc::SIGKILL) };
600 return Err(ExecutorError::ChildSetup("eventfd write failed".into()));
601 }
602
603 Ok(())
604}
605
606fn spawn_sandbox(plan: Plan) -> Result<SpawnedSandbox, ExecutorError> {
607 let cmd_refs: Vec<&str> = plan.cmd.iter().map(|s| s.as_str()).collect();
608 validate_cmd(&cmd_refs).map_err(ExecutorError::Validation)?;
609
610 if let Err(e) = check::check() {
611 return Err(ExecutorError::SystemCheck(e.to_string()));
612 }
613
614 let exec_info = if let Some(info) = ExecutionInfo::from_plan(&plan) {
615 info
616 } else {
617 let resolved = resolve_binary(&plan.cmd[0])
618 .map_err(|e| ExecutorError::CommandNotFound(e.to_string()))?;
619 ExecutionInfo::from_resolved(resolved)
620 };
621
622 let workspace = Workspace::with_prefix("evalbox-").map_err(ExecutorError::Workspace)?;
623
624 workspace
625 .setup_sandbox_dirs()
626 .map_err(ExecutorError::Workspace)?;
627 for file in &plan.user_files {
628 let work_path = format!("work/{}", file.path);
629 workspace
630 .write_file(&work_path, &file.content, file.executable)
631 .map_err(ExecutorError::Workspace)?;
632 }
633
634 let notify_sockets = if plan.notify_mode != NotifyMode::Disabled {
636 Some(scm_rights::create_socketpair().map_err(ExecutorError::Workspace)?)
637 } else {
638 None
639 };
640
641 let child_pid = unsafe { libc::fork() };
642 if child_pid < 0 {
643 return Err(ExecutorError::Fork(last_errno()));
644 }
645
646 if child_pid == 0 {
647 let child_socket = notify_sockets.map(|(_, child)| child);
648 match child_process(&workspace, &plan, &exec_info, child_socket.as_ref()) {
649 Ok(()) => unsafe { libc::_exit(127) },
650 Err(e) => {
651 writeln!(io::stderr(), "sandbox error: {e}").ok();
652 unsafe { libc::_exit(126) }
653 }
654 }
655 }
656
657 let pid = unsafe { Pid::from_raw_unchecked(child_pid) };
658 let pidfd = pidfd_open(pid, PidfdFlags::empty()).map_err(ExecutorError::Pidfd)?;
659
660 let stdin_write_fd = workspace.pipes.stdin.write.as_raw_fd();
661 let stdout_read_fd = workspace.pipes.stdout.read.as_raw_fd();
662 let stderr_read_fd = workspace.pipes.stderr.read.as_raw_fd();
663
664 close_parent_pipe_ends(&workspace);
665
666 let notify_fd = if let Some((parent_socket, _)) = notify_sockets {
668 poll_or_kill(
669 parent_socket.as_raw_fd(),
670 child_pid,
671 "timeout waiting for notify fd",
672 )?;
673 Some(
674 scm_rights::recv_fd(parent_socket.as_raw_fd())
675 .map_err(|e| ExecutorError::SeccompNotify(e.to_string()))?,
676 )
677 } else {
678 None
679 };
680
681 sync_with_child(&workspace, child_pid)?;
682
683 if let Some(ref stdin_data) = plan.stdin {
685 write_stdin(&workspace, stdin_data).map_err(ExecutorError::Monitor)?;
686 unsafe { libc::close(stdin_write_fd) };
687 }
688
689 set_nonblocking(stdout_read_fd).map_err(ExecutorError::Monitor)?;
691 set_nonblocking(stderr_read_fd).map_err(ExecutorError::Monitor)?;
692
693 unsafe {
695 libc::close(workspace.pipes.sync.child_ready_fd());
696 libc::close(workspace.pipes.sync.parent_done_fd());
697 }
698
699 Ok(SpawnedSandbox {
700 pidfd,
701 stdin_fd: if plan.stdin.is_some() {
702 -1
703 } else {
704 stdin_write_fd
705 },
706 stdout_fd: stdout_read_fd,
707 stderr_fd: stderr_read_fd,
708 notify_fd,
709 workspace: std::mem::ManuallyDrop::new(workspace),
710 })
711}
712
713fn blocking_parent(
714 child_pid: libc::pid_t,
715 pidfd: OwnedFd,
716 _notify_fd: Option<OwnedFd>,
717 workspace: Workspace,
718 plan: Plan,
719) -> Result<Output, ExecutorError> {
720 let workspace = std::mem::ManuallyDrop::new(workspace);
721
722 close_parent_pipe_ends(&workspace);
723
724 sync_with_child(&workspace, child_pid)?;
725
726 if let Some(ref stdin_data) = plan.stdin {
727 write_stdin(&workspace, stdin_data).map_err(ExecutorError::Monitor)?;
728 }
729 unsafe { libc::close(workspace.pipes.stdin.write.as_raw_fd()) };
730
731 let result = monitor(pidfd, &workspace, &plan).map_err(ExecutorError::Monitor);
732
733 unsafe {
734 libc::close(workspace.pipes.stdout.read.as_raw_fd());
735 libc::close(workspace.pipes.stderr.read.as_raw_fd());
736 libc::close(workspace.pipes.sync.child_ready_fd());
737 libc::close(workspace.pipes.sync.parent_done_fd());
738 }
739
740 result
741}
742
743fn child_process(
756 workspace: &Workspace,
757 plan: &Plan,
758 exec_info: &ExecutionInfo,
759 notify_socket: Option<&OwnedFd>,
760) -> Result<(), ExecutorError> {
761 unsafe {
763 libc::close(workspace.pipes.stdin.write.as_raw_fd());
764 libc::close(workspace.pipes.stdout.read.as_raw_fd());
765 libc::close(workspace.pipes.stderr.read.as_raw_fd());
766 }
767
768 setup_stdio(workspace)?;
770
771 let work_dir = workspace.root().join("work");
773 let work_cstr = CString::new(work_dir.to_string_lossy().as_bytes())
774 .map_err(|_| ExecutorError::Exec(Errno::INVAL))?;
775 if unsafe { libc::chdir(work_cstr.as_ptr()) } != 0 {
776 return Err(ExecutorError::Exec(last_errno()));
777 }
778
779 let extra_paths: Vec<&str> = exec_info
781 .extra_mounts
782 .iter()
783 .filter_map(|m| m.source.to_str())
784 .collect();
785 lockdown(plan, workspace.root(), &extra_paths).map_err(ExecutorError::Lockdown)?;
786
787 if plan.notify_mode != NotifyMode::Disabled {
789 let notify_filter = build_notify_filter(NOTIFY_FS_SYSCALLS);
790 let fprog = SockFprog {
791 len: notify_filter.len() as u16,
792 filter: notify_filter.as_ptr(),
793 };
794 let listener_fd = unsafe { seccomp_set_mode_filter_listener(&fprog) }.map_err(|e| {
795 ExecutorError::SeccompNotify(format!("failed to install notify filter: {e}"))
796 })?;
797
798 if let Some(sock) = notify_socket {
800 scm_rights::send_fd(sock.as_raw_fd(), listener_fd.as_raw_fd()).map_err(|e| {
801 ExecutorError::SeccompNotify(format!("failed to send listener fd: {e}"))
802 })?;
803 }
804 }
805
806 apply_seccomp(plan)?;
808
809 let child_ready_fd = workspace.pipes.sync.child_ready_fd();
811 let signal_value: u64 = 1;
812 if unsafe { libc::write(child_ready_fd, (&signal_value as *const u64).cast(), 8) } != 8 {
813 return Err(ExecutorError::ChildSetup("eventfd write failed".into()));
814 }
815
816 let parent_done_fd = workspace.pipes.sync.parent_done_fd();
818 let mut value: u64 = 0;
819 if unsafe { libc::read(parent_done_fd, (&mut value as *mut u64).cast(), 8) } != 8 {
820 return Err(ExecutorError::ChildSetup("eventfd read failed".into()));
821 }
822
823 close_extra_fds();
825
826 exec_command(plan, exec_info)
828}
829
830fn setup_stdio(workspace: &Workspace) -> Result<(), ExecutorError> {
831 let stdin_fd = workspace.pipes.stdin.read.as_raw_fd();
832 let stdout_fd = workspace.pipes.stdout.write.as_raw_fd();
833 let stderr_fd = workspace.pipes.stderr.write.as_raw_fd();
834
835 unsafe {
836 libc::close(0);
837 libc::close(1);
838 libc::close(2);
839 if libc::dup2(stdin_fd, 0) < 0 {
840 return Err(ExecutorError::Exec(last_errno()));
841 }
842 if libc::dup2(stdout_fd, 1) < 0 {
843 return Err(ExecutorError::Exec(last_errno()));
844 }
845 if libc::dup2(stderr_fd, 2) < 0 {
846 return Err(ExecutorError::Exec(last_errno()));
847 }
848 }
849 Ok(())
850}
851
852fn apply_seccomp(plan: &Plan) -> Result<(), ExecutorError> {
853 let whitelist: Vec<i64> = if let Some(ref syscalls) = plan.syscalls {
854 let mut wl: Vec<i64> = DEFAULT_WHITELIST
855 .iter()
856 .copied()
857 .filter(|s| !syscalls.denied.contains(s))
858 .collect();
859 for s in &syscalls.allowed {
860 if !wl.contains(s) {
861 wl.push(*s);
862 }
863 }
864 wl
865 } else {
866 DEFAULT_WHITELIST.to_vec()
867 };
868
869 let filter = build_whitelist_filter(&whitelist);
870 let fprog = SockFprog {
871 len: filter.len() as u16,
872 filter: filter.as_ptr(),
873 };
874 unsafe { seccomp_set_mode_filter(&fprog) }
875 .map_err(|e| ExecutorError::Lockdown(LockdownError::Seccomp(e)))?;
876 Ok(())
877}
878
879fn exec_command(plan: &Plan, exec_info: &ExecutionInfo) -> Result<(), ExecutorError> {
880 let cmd_path = CString::new(exec_info.binary_path.to_string_lossy().as_bytes())
881 .map_err(|_| ExecutorError::Exec(Errno::INVAL))?;
882
883 let mut argv: Vec<CString> = Vec::with_capacity(plan.cmd.len());
884 argv.push(cmd_path.clone());
885 for arg in plan.cmd.iter().skip(1) {
886 argv.push(CString::new(arg.as_bytes()).map_err(|_| ExecutorError::Exec(Errno::INVAL))?);
887 }
888
889 let argv_ptrs: Vec<*const libc::c_char> = argv
890 .iter()
891 .map(|s| s.as_ptr())
892 .chain(std::iter::once(std::ptr::null()))
893 .collect();
894
895 let envp: Vec<CString> = plan
896 .env
897 .iter()
898 .map(|(k, v)| CString::new(format!("{k}={v}")))
899 .collect::<Result<Vec<_>, _>>()
900 .map_err(|_| ExecutorError::Exec(Errno::INVAL))?;
901
902 let envp_ptrs: Vec<*const libc::c_char> = envp
903 .iter()
904 .map(|s| s.as_ptr())
905 .chain(std::iter::once(std::ptr::null()))
906 .collect();
907
908 unsafe { libc::execve(cmd_path.as_ptr(), argv_ptrs.as_ptr(), envp_ptrs.as_ptr()) };
909
910 Err(ExecutorError::Exec(last_errno()))
911}
912
913#[cfg(test)]
914mod tests {
915 use super::*;
916
917 #[test]
918 fn token_encoding() {
919 let token = encode_token(42, TOKEN_TYPE_STDOUT);
920 let (id, ty) = decode_token(token);
921 assert_eq!(id.0, 42);
922 assert_eq!(ty, TOKEN_TYPE_STDOUT);
923 }
924
925 #[test]
926 fn sandbox_id_display() {
927 let id = SandboxId(123);
928 assert_eq!(format!("{id}"), "Sandbox(123)");
929 }
930}