Skip to main content

evalbox_sandbox/
executor.rs

1//! Sandbox executor for both blocking and concurrent execution.
2//!
3//! This module provides the unified API for sandbox execution:
4//!
5//! - `Executor::run()` - Blocking execution (single sandbox)
6//! - `Executor::spawn()` + `poll()` - Concurrent execution (multiple sandboxes)
7//!
8//! ## Blocking Example
9//!
10//! ```ignore
11//! use evalbox_sandbox::{Executor, Plan};
12//!
13//! let output = Executor::run(Plan::new(["echo", "hello"]))?;
14//! assert_eq!(output.stdout, b"hello\n");
15//! ```
16//!
17//! ## Concurrent Example
18//!
19//! ```ignore
20//! use evalbox_sandbox::{Executor, Plan, Event};
21//!
22//! let mut executor = Executor::new()?;
23//! let id = executor.spawn(Plan::new(["echo", "hello"]))?;
24//!
25//! let mut events = Vec::new();
26//! while executor.active_count() > 0 {
27//!     executor.poll(&mut events, None)?;
28//!     for event in events.drain(..) {
29//!         match event {
30//!             Event::Completed { id, output } => println!("Done: {:?}", output),
31//!             Event::Stdout { id, data } => print!("{}", String::from_utf8_lossy(&data)),
32//!             _ => {}
33//!         }
34//!     }
35//! }
36//! ```
37
38use std::collections::HashMap;
39use std::ffi::CString;
40use std::io::{self, Write as _};
41use std::os::fd::{AsRawFd, OwnedFd, RawFd};
42use std::path::PathBuf;
43use std::time::{Duration, Instant};
44
45use mio::unix::SourceFd;
46use mio::{Events as MioEvents, Interest, Poll, Token};
47use rustix::io::Errno;
48use rustix::process::{Pid, PidfdFlags, Signal, pidfd_open, pidfd_send_signal};
49use thiserror::Error;
50
51use evalbox_sys::seccomp::{
52    DEFAULT_WHITELIST, NOTIFY_FS_SYSCALLS, SockFprog, build_notify_filter, build_whitelist_filter,
53};
54use evalbox_sys::seccomp_notify::seccomp_set_mode_filter_listener;
55use evalbox_sys::{check, last_errno, seccomp::seccomp_set_mode_filter};
56
57use crate::isolation::{LockdownError, close_extra_fds, lockdown};
58use crate::monitor::{Output, Status, monitor, set_nonblocking, wait_for_exit, write_stdin};
59use crate::notify::scm_rights;
60use crate::plan::{Mount, NotifyMode, Plan};
61use crate::resolve::{ResolvedBinary, resolve_binary};
62use crate::validate::validate_cmd;
63use crate::workspace::Workspace;
64
65/// Error during sandbox execution.
66#[derive(Debug, Error)]
67pub enum ExecutorError {
68    #[error("system check: {0}")]
69    SystemCheck(String),
70
71    #[error("validation: {0}")]
72    Validation(#[from] crate::validate::ValidationError),
73
74    #[error("workspace: {0}")]
75    Workspace(io::Error),
76
77    #[error("fork: {0}")]
78    Fork(Errno),
79
80    #[error("lockdown: {0}")]
81    Lockdown(#[from] LockdownError),
82
83    #[error("exec: {0}")]
84    Exec(Errno),
85
86    #[error("monitor: {0}")]
87    Monitor(io::Error),
88
89    #[error("child setup: {0}")]
90    ChildSetup(String),
91
92    #[error("pidfd: {0}")]
93    Pidfd(Errno),
94
95    #[error("command not found: {0}")]
96    CommandNotFound(String),
97
98    #[error("seccomp notify: {0}")]
99    SeccompNotify(String),
100
101    #[error("io: {0}")]
102    Io(#[from] io::Error),
103}
104
105#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
106pub struct SandboxId(pub usize);
107
108impl std::fmt::Display for SandboxId {
109    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
110        write!(f, "Sandbox({})", self.0)
111    }
112}
113
114/// Events emitted by the Executor.
115#[derive(Debug)]
116pub enum Event {
117    /// Sandbox completed execution.
118    Completed { id: SandboxId, output: Output },
119    /// Sandbox timed out and was killed.
120    Timeout { id: SandboxId, output: Output },
121    /// Stdout data available (streaming mode).
122    Stdout { id: SandboxId, data: Vec<u8> },
123    /// Stderr data available (streaming mode).
124    Stderr { id: SandboxId, data: Vec<u8> },
125}
126
127struct ExecutionInfo {
128    binary_path: PathBuf,
129    extra_mounts: Vec<Mount>,
130}
131
132impl ExecutionInfo {
133    fn from_resolved(resolved: ResolvedBinary) -> Self {
134        let extra_mounts = resolved
135            .required_mounts
136            .into_iter()
137            .map(|m| Mount::bind(&m.source, &m.target))
138            .collect();
139        Self {
140            binary_path: resolved.path,
141            extra_mounts,
142        }
143    }
144
145    fn from_plan(plan: &Plan) -> Option<Self> {
146        plan.binary_path.as_ref().map(|path| Self {
147            binary_path: path.clone(),
148            extra_mounts: Vec::new(),
149        })
150    }
151}
152
153/// A spawned sandbox that hasn't been waited on yet.
154///
155/// Some fields are never read but kept alive for RAII (fd lifetime, temp dir cleanup).
156#[allow(dead_code)]
157struct SpawnedSandbox {
158    pidfd: OwnedFd,
159    stdin_fd: RawFd,
160    stdout_fd: RawFd,
161    stderr_fd: RawFd,
162    /// Seccomp listener fd kept alive for RAII; future supervisor integration.
163    notify_fd: Option<OwnedFd>,
164    /// Workspace kept alive so temp directory isn't deleted while sandbox runs.
165    workspace: std::mem::ManuallyDrop<Workspace>,
166}
167
168/// Internal state for a running sandbox.
169struct SandboxState {
170    spawned: SpawnedSandbox,
171    deadline: Instant,
172    start: Instant,
173    stdout: Vec<u8>,
174    stderr: Vec<u8>,
175    max_output: u64,
176    pidfd_ready: bool,
177    stdout_closed: bool,
178    stderr_closed: bool,
179}
180
181impl SandboxState {
182    fn is_done(&self) -> bool {
183        self.pidfd_ready && self.stdout_closed && self.stderr_closed
184    }
185}
186
187// Token encoding: [sandbox_id: 20 bits][type: 2 bits]
188const TOKEN_TYPE_BITS: usize = 2;
189const TOKEN_TYPE_MASK: usize = 0b11;
190const TOKEN_TYPE_PIDFD: usize = 0;
191const TOKEN_TYPE_STDOUT: usize = 1;
192const TOKEN_TYPE_STDERR: usize = 2;
193
194fn encode_token(sandbox_id: usize, token_type: usize) -> Token {
195    Token((sandbox_id << TOKEN_TYPE_BITS) | token_type)
196}
197
198fn decode_token(token: Token) -> (SandboxId, usize) {
199    let raw = token.0;
200    (SandboxId(raw >> TOKEN_TYPE_BITS), raw & TOKEN_TYPE_MASK)
201}
202
203pub struct Executor {
204    poll: Poll,
205    sandboxes: HashMap<SandboxId, SandboxState>,
206    next_id: usize,
207    mio_events: MioEvents,
208}
209
210impl Executor {
211    pub fn new() -> io::Result<Self> {
212        Ok(Self {
213            poll: Poll::new()?,
214            sandboxes: HashMap::new(),
215            next_id: 0,
216            mio_events: MioEvents::with_capacity(64),
217        })
218    }
219
220    /// Execute a sandbox and wait for completion (blocking).
221    pub fn run(plan: Plan) -> Result<Output, ExecutorError> {
222        let cmd_refs: Vec<&str> = plan.cmd.iter().map(|s| s.as_str()).collect();
223        validate_cmd(&cmd_refs).map_err(ExecutorError::Validation)?;
224
225        if let Err(e) = check::check() {
226            return Err(ExecutorError::SystemCheck(e.to_string()));
227        }
228
229        let exec_info = if let Some(info) = ExecutionInfo::from_plan(&plan) {
230            info
231        } else {
232            let resolved = resolve_binary(&plan.cmd[0])
233                .map_err(|e| ExecutorError::CommandNotFound(e.to_string()))?;
234            ExecutionInfo::from_resolved(resolved)
235        };
236
237        let workspace = Workspace::with_prefix("evalbox-").map_err(ExecutorError::Workspace)?;
238
239        workspace
240            .setup_sandbox_dirs()
241            .map_err(ExecutorError::Workspace)?;
242        for file in &plan.user_files {
243            let work_path = format!("work/{}", file.path);
244            workspace
245                .write_file(&work_path, &file.content, file.executable)
246                .map_err(ExecutorError::Workspace)?;
247        }
248
249        // Create socketpair for notify fd transfer (if needed)
250        let notify_sockets = if plan.notify_mode != NotifyMode::Disabled {
251            Some(scm_rights::create_socketpair().map_err(ExecutorError::Workspace)?)
252        } else {
253            None
254        };
255
256        let child_pid = unsafe { libc::fork() };
257        if child_pid < 0 {
258            return Err(ExecutorError::Fork(last_errno()));
259        }
260
261        if child_pid == 0 {
262            // In child: close parent's socket end
263            let child_socket = notify_sockets.map(|(_, child)| child);
264            match child_process(&workspace, &plan, &exec_info, child_socket.as_ref()) {
265                Ok(()) => unsafe { libc::_exit(127) },
266                Err(e) => {
267                    writeln!(io::stderr(), "sandbox error: {e}").ok();
268                    unsafe { libc::_exit(126) }
269                }
270            }
271        }
272
273        let pid = unsafe { Pid::from_raw_unchecked(child_pid) };
274        let pidfd = pidfd_open(pid, PidfdFlags::empty()).map_err(ExecutorError::Pidfd)?;
275
276        // Parent: receive notify fd if applicable
277        let notify_fd = if let Some((parent_socket, _)) = notify_sockets {
278            poll_or_kill(
279                parent_socket.as_raw_fd(),
280                child_pid,
281                "timeout waiting for notify fd",
282            )?;
283            Some(
284                scm_rights::recv_fd(parent_socket.as_raw_fd())
285                    .map_err(|e| ExecutorError::SeccompNotify(e.to_string()))?,
286            )
287        } else {
288            None
289        };
290
291        blocking_parent(child_pid, pidfd, notify_fd, workspace, plan)
292    }
293
294    /// Spawn a new sandbox. Returns immediately with a [`SandboxId`].
295    pub fn spawn(&mut self, plan: Plan) -> Result<SandboxId, ExecutorError> {
296        let id = SandboxId(self.next_id);
297        self.next_id += 1;
298
299        let timeout = plan.timeout;
300        let max_output = plan.max_output;
301
302        let spawned = spawn_sandbox(plan)?;
303
304        // Register with mio
305        let pidfd_token = encode_token(id.0, TOKEN_TYPE_PIDFD);
306        let stdout_token = encode_token(id.0, TOKEN_TYPE_STDOUT);
307        let stderr_token = encode_token(id.0, TOKEN_TYPE_STDERR);
308
309        self.poll.registry().register(
310            &mut SourceFd(&spawned.pidfd.as_raw_fd()),
311            pidfd_token,
312            Interest::READABLE,
313        )?;
314        self.poll.registry().register(
315            &mut SourceFd(&spawned.stdout_fd),
316            stdout_token,
317            Interest::READABLE,
318        )?;
319        self.poll.registry().register(
320            &mut SourceFd(&spawned.stderr_fd),
321            stderr_token,
322            Interest::READABLE,
323        )?;
324
325        let state = SandboxState {
326            spawned,
327            deadline: Instant::now() + timeout,
328            start: Instant::now(),
329            stdout: Vec::new(),
330            stderr: Vec::new(),
331            max_output,
332            pidfd_ready: false,
333            stdout_closed: false,
334            stderr_closed: false,
335        };
336
337        self.sandboxes.insert(id, state);
338        Ok(id)
339    }
340
341    /// Poll for events. Blocks until events are available or timeout expires.
342    pub fn poll(&mut self, events: &mut Vec<Event>, timeout: Option<Duration>) -> io::Result<()> {
343        events.clear();
344
345        if self.sandboxes.is_empty() {
346            return Ok(());
347        }
348
349        let effective_timeout = self.calculate_timeout(timeout);
350        self.poll.poll(&mut self.mio_events, effective_timeout)?;
351
352        let mut pidfd_ready: Vec<SandboxId> = Vec::new();
353        let mut read_stdout: Vec<SandboxId> = Vec::new();
354        let mut read_stderr: Vec<SandboxId> = Vec::new();
355
356        for mio_event in &self.mio_events {
357            let (sandbox_id, token_type) = decode_token(mio_event.token());
358            if self.sandboxes.contains_key(&sandbox_id) {
359                match token_type {
360                    TOKEN_TYPE_PIDFD => pidfd_ready.push(sandbox_id),
361                    TOKEN_TYPE_STDOUT => read_stdout.push(sandbox_id),
362                    TOKEN_TYPE_STDERR => read_stderr.push(sandbox_id),
363                    _ => {}
364                }
365            }
366        }
367
368        for id in pidfd_ready {
369            if let Some(state) = self.sandboxes.get_mut(&id) {
370                state.pidfd_ready = true;
371            }
372        }
373
374        for id in read_stdout {
375            self.read_pipe(id, true, events);
376        }
377
378        for id in read_stderr {
379            self.read_pipe(id, false, events);
380        }
381
382        self.check_completions(events)?;
383        Ok(())
384    }
385
386    pub fn active_count(&self) -> usize {
387        self.sandboxes.len()
388    }
389
390    pub fn kill(&mut self, id: SandboxId) -> io::Result<()> {
391        if let Some(state) = self.sandboxes.get(&id) {
392            pidfd_send_signal(&state.spawned.pidfd, Signal::KILL)?;
393        }
394        Ok(())
395    }
396
397    /// Write data to a sandbox's stdin.
398    pub fn write_stdin(&mut self, id: SandboxId, data: &[u8]) -> io::Result<usize> {
399        if let Some(state) = self.sandboxes.get(&id) {
400            let fd = state.spawned.stdin_fd;
401            if fd < 0 {
402                return Err(io::Error::new(io::ErrorKind::BrokenPipe, "stdin closed"));
403            }
404            let ret = unsafe { libc::write(fd, data.as_ptr().cast(), data.len()) };
405            if ret < 0 {
406                Err(io::Error::last_os_error())
407            } else {
408                Ok(ret as usize)
409            }
410        } else {
411            Err(io::Error::new(io::ErrorKind::NotFound, "sandbox not found"))
412        }
413    }
414
415    /// Close a sandbox's stdin (signal EOF).
416    pub fn close_stdin(&mut self, id: SandboxId) -> io::Result<()> {
417        if let Some(state) = self.sandboxes.get_mut(&id) {
418            if state.spawned.stdin_fd >= 0 {
419                unsafe { libc::close(state.spawned.stdin_fd) };
420                state.spawned.stdin_fd = -1;
421            }
422        }
423        Ok(())
424    }
425
426    fn calculate_timeout(&self, user_timeout: Option<Duration>) -> Option<Duration> {
427        let now = Instant::now();
428        let nearest_deadline = self.sandboxes.values().map(|s| s.deadline).min();
429
430        match (user_timeout, nearest_deadline) {
431            (Some(user), Some(deadline)) => Some(user.min(deadline.saturating_duration_since(now))),
432            (Some(user), None) => Some(user),
433            (None, Some(deadline)) => Some(deadline.saturating_duration_since(now)),
434            (None, None) => None,
435        }
436    }
437
438    fn read_pipe(&mut self, sandbox_id: SandboxId, is_stdout: bool, events: &mut Vec<Event>) {
439        let Some(state) = self.sandboxes.get_mut(&sandbox_id) else {
440            return;
441        };
442
443        let fd = if is_stdout {
444            state.spawned.stdout_fd
445        } else {
446            state.spawned.stderr_fd
447        };
448
449        let mut buf = [0u8; 4096];
450        loop {
451            let ret = unsafe { libc::read(fd, buf.as_mut_ptr().cast(), buf.len()) };
452
453            if ret < 0 {
454                let err = io::Error::last_os_error();
455                if err.kind() == io::ErrorKind::WouldBlock {
456                    break;
457                }
458                if is_stdout {
459                    state.stdout_closed = true;
460                } else {
461                    state.stderr_closed = true;
462                }
463                break;
464            } else if ret == 0 {
465                if is_stdout {
466                    state.stdout_closed = true;
467                } else {
468                    state.stderr_closed = true;
469                }
470                break;
471            } else {
472                let n = ret as usize;
473                let data = buf[..n].to_vec();
474
475                if is_stdout {
476                    state.stdout.extend_from_slice(&data);
477                    events.push(Event::Stdout {
478                        id: sandbox_id,
479                        data,
480                    });
481                } else {
482                    state.stderr.extend_from_slice(&data);
483                    events.push(Event::Stderr {
484                        id: sandbox_id,
485                        data,
486                    });
487                }
488
489                let total = state.stdout.len() + state.stderr.len();
490                if total > state.max_output as usize {
491                    pidfd_send_signal(&state.spawned.pidfd, Signal::KILL).ok();
492                    break;
493                }
494            }
495        }
496    }
497
498    fn check_completions(&mut self, events: &mut Vec<Event>) -> io::Result<()> {
499        let now = Instant::now();
500        let mut to_remove = Vec::new();
501
502        for (&id, state) in &mut self.sandboxes {
503            if now >= state.deadline && !state.pidfd_ready {
504                pidfd_send_signal(&state.spawned.pidfd, Signal::KILL).ok();
505                state.pidfd_ready = true;
506            }
507            if state.is_done() {
508                to_remove.push(id);
509            }
510        }
511
512        for id in to_remove {
513            if let Some(state) = self.sandboxes.remove(&id) {
514                self.poll
515                    .registry()
516                    .deregister(&mut SourceFd(&state.spawned.pidfd.as_raw_fd()))
517                    .ok();
518                self.poll
519                    .registry()
520                    .deregister(&mut SourceFd(&state.spawned.stdout_fd))
521                    .ok();
522                self.poll
523                    .registry()
524                    .deregister(&mut SourceFd(&state.spawned.stderr_fd))
525                    .ok();
526
527                let (exit_code, signal) = wait_for_exit(state.spawned.pidfd.as_raw_fd())?;
528                let duration = state.start.elapsed();
529                let timed_out = Instant::now() >= state.deadline;
530
531                let status = if timed_out {
532                    Status::Timeout
533                } else if signal.is_some() {
534                    Status::Signaled
535                } else if state.stdout.len() + state.stderr.len() > state.max_output as usize {
536                    Status::OutputLimitExceeded
537                } else {
538                    Status::Exited
539                };
540
541                let output = Output {
542                    stdout: state.stdout,
543                    stderr: state.stderr,
544                    status,
545                    duration,
546                    exit_code,
547                    signal,
548                };
549
550                if timed_out {
551                    events.push(Event::Timeout { id, output });
552                } else {
553                    events.push(Event::Completed { id, output });
554                }
555            }
556        }
557
558        Ok(())
559    }
560}
561
562/// Close the parent-side pipe ends that the child uses (stdin read, stdout write, stderr write).
563fn close_parent_pipe_ends(workspace: &Workspace) {
564    unsafe {
565        libc::close(workspace.pipes.stdin.read.as_raw_fd());
566        libc::close(workspace.pipes.stdout.write.as_raw_fd());
567        libc::close(workspace.pipes.stderr.write.as_raw_fd());
568    }
569}
570
571/// Poll an fd with a 30-second timeout; kill the child on timeout or error.
572fn poll_or_kill(fd: RawFd, child_pid: libc::pid_t, msg: &str) -> Result<(), ExecutorError> {
573    let mut pfd = libc::pollfd {
574        fd,
575        events: libc::POLLIN,
576        revents: 0,
577    };
578    if unsafe { libc::poll(&mut pfd, 1, 30000) } <= 0 {
579        unsafe { libc::kill(child_pid, libc::SIGKILL) };
580        return Err(ExecutorError::ChildSetup(msg.into()));
581    }
582    Ok(())
583}
584
585/// Wait for the child to signal readiness via eventfd, then signal back.
586fn sync_with_child(workspace: &Workspace, child_pid: libc::pid_t) -> Result<(), ExecutorError> {
587    let child_ready_fd = workspace.pipes.sync.child_ready_fd();
588    poll_or_kill(child_ready_fd, child_pid, "timeout waiting for child")?;
589
590    let mut value: u64 = 0;
591    if unsafe { libc::read(child_ready_fd, (&mut value as *mut u64).cast(), 8) } != 8 {
592        unsafe { libc::kill(child_pid, libc::SIGKILL) };
593        return Err(ExecutorError::ChildSetup("eventfd read failed".into()));
594    }
595
596    let parent_done_fd = workspace.pipes.sync.parent_done_fd();
597    let signal_value: u64 = 1;
598    if unsafe { libc::write(parent_done_fd, (&signal_value as *const u64).cast(), 8) } != 8 {
599        unsafe { libc::kill(child_pid, libc::SIGKILL) };
600        return Err(ExecutorError::ChildSetup("eventfd write failed".into()));
601    }
602
603    Ok(())
604}
605
606fn spawn_sandbox(plan: Plan) -> Result<SpawnedSandbox, ExecutorError> {
607    let cmd_refs: Vec<&str> = plan.cmd.iter().map(|s| s.as_str()).collect();
608    validate_cmd(&cmd_refs).map_err(ExecutorError::Validation)?;
609
610    if let Err(e) = check::check() {
611        return Err(ExecutorError::SystemCheck(e.to_string()));
612    }
613
614    let exec_info = if let Some(info) = ExecutionInfo::from_plan(&plan) {
615        info
616    } else {
617        let resolved = resolve_binary(&plan.cmd[0])
618            .map_err(|e| ExecutorError::CommandNotFound(e.to_string()))?;
619        ExecutionInfo::from_resolved(resolved)
620    };
621
622    let workspace = Workspace::with_prefix("evalbox-").map_err(ExecutorError::Workspace)?;
623
624    workspace
625        .setup_sandbox_dirs()
626        .map_err(ExecutorError::Workspace)?;
627    for file in &plan.user_files {
628        let work_path = format!("work/{}", file.path);
629        workspace
630            .write_file(&work_path, &file.content, file.executable)
631            .map_err(ExecutorError::Workspace)?;
632    }
633
634    // Create socketpair for notify fd transfer (if needed)
635    let notify_sockets = if plan.notify_mode != NotifyMode::Disabled {
636        Some(scm_rights::create_socketpair().map_err(ExecutorError::Workspace)?)
637    } else {
638        None
639    };
640
641    let child_pid = unsafe { libc::fork() };
642    if child_pid < 0 {
643        return Err(ExecutorError::Fork(last_errno()));
644    }
645
646    if child_pid == 0 {
647        let child_socket = notify_sockets.map(|(_, child)| child);
648        match child_process(&workspace, &plan, &exec_info, child_socket.as_ref()) {
649            Ok(()) => unsafe { libc::_exit(127) },
650            Err(e) => {
651                writeln!(io::stderr(), "sandbox error: {e}").ok();
652                unsafe { libc::_exit(126) }
653            }
654        }
655    }
656
657    let pid = unsafe { Pid::from_raw_unchecked(child_pid) };
658    let pidfd = pidfd_open(pid, PidfdFlags::empty()).map_err(ExecutorError::Pidfd)?;
659
660    let stdin_write_fd = workspace.pipes.stdin.write.as_raw_fd();
661    let stdout_read_fd = workspace.pipes.stdout.read.as_raw_fd();
662    let stderr_read_fd = workspace.pipes.stderr.read.as_raw_fd();
663
664    close_parent_pipe_ends(&workspace);
665
666    // Receive notify fd from child if applicable
667    let notify_fd = if let Some((parent_socket, _)) = notify_sockets {
668        poll_or_kill(
669            parent_socket.as_raw_fd(),
670            child_pid,
671            "timeout waiting for notify fd",
672        )?;
673        Some(
674            scm_rights::recv_fd(parent_socket.as_raw_fd())
675                .map_err(|e| ExecutorError::SeccompNotify(e.to_string()))?,
676        )
677    } else {
678        None
679    };
680
681    sync_with_child(&workspace, child_pid)?;
682
683    // Write stdin if provided
684    if let Some(ref stdin_data) = plan.stdin {
685        write_stdin(&workspace, stdin_data).map_err(ExecutorError::Monitor)?;
686        unsafe { libc::close(stdin_write_fd) };
687    }
688
689    // Set non-blocking for async reading
690    set_nonblocking(stdout_read_fd).map_err(ExecutorError::Monitor)?;
691    set_nonblocking(stderr_read_fd).map_err(ExecutorError::Monitor)?;
692
693    // Close sync fds
694    unsafe {
695        libc::close(workspace.pipes.sync.child_ready_fd());
696        libc::close(workspace.pipes.sync.parent_done_fd());
697    }
698
699    Ok(SpawnedSandbox {
700        pidfd,
701        stdin_fd: if plan.stdin.is_some() {
702            -1
703        } else {
704            stdin_write_fd
705        },
706        stdout_fd: stdout_read_fd,
707        stderr_fd: stderr_read_fd,
708        notify_fd,
709        workspace: std::mem::ManuallyDrop::new(workspace),
710    })
711}
712
713fn blocking_parent(
714    child_pid: libc::pid_t,
715    pidfd: OwnedFd,
716    _notify_fd: Option<OwnedFd>,
717    workspace: Workspace,
718    plan: Plan,
719) -> Result<Output, ExecutorError> {
720    let workspace = std::mem::ManuallyDrop::new(workspace);
721
722    close_parent_pipe_ends(&workspace);
723
724    sync_with_child(&workspace, child_pid)?;
725
726    if let Some(ref stdin_data) = plan.stdin {
727        write_stdin(&workspace, stdin_data).map_err(ExecutorError::Monitor)?;
728    }
729    unsafe { libc::close(workspace.pipes.stdin.write.as_raw_fd()) };
730
731    let result = monitor(pidfd, &workspace, &plan).map_err(ExecutorError::Monitor);
732
733    unsafe {
734        libc::close(workspace.pipes.stdout.read.as_raw_fd());
735        libc::close(workspace.pipes.stderr.read.as_raw_fd());
736        libc::close(workspace.pipes.sync.child_ready_fd());
737        libc::close(workspace.pipes.sync.parent_done_fd());
738    }
739
740    result
741}
742
743/// Child process flow (runs after fork in the child).
744///
745/// 1. Close parent pipe ends
746/// 2. Setup stdio (dup2 stdin/stdout/stderr)
747/// 3. chdir(workspace/work)
748/// 4. Landlock v5 + rlimits + securebits + drop caps (lockdown)
749/// 5. If `notify_mode` != Disabled: install notify filter, send listener fd
750/// 6. Install kill seccomp filter (whitelist)
751/// 7. Signal parent readiness
752/// 8. Wait for parent signal
753/// 9. `close_range(3, MAX, 0)`
754/// 10. execve
755fn child_process(
756    workspace: &Workspace,
757    plan: &Plan,
758    exec_info: &ExecutionInfo,
759    notify_socket: Option<&OwnedFd>,
760) -> Result<(), ExecutorError> {
761    // 1. Close parent pipe ends
762    unsafe {
763        libc::close(workspace.pipes.stdin.write.as_raw_fd());
764        libc::close(workspace.pipes.stdout.read.as_raw_fd());
765        libc::close(workspace.pipes.stderr.read.as_raw_fd());
766    }
767
768    // 2. Setup stdio
769    setup_stdio(workspace)?;
770
771    // 3. chdir to workspace/work
772    let work_dir = workspace.root().join("work");
773    let work_cstr = CString::new(work_dir.to_string_lossy().as_bytes())
774        .map_err(|_| ExecutorError::Exec(Errno::INVAL))?;
775    if unsafe { libc::chdir(work_cstr.as_ptr()) } != 0 {
776        return Err(ExecutorError::Exec(last_errno()));
777    }
778
779    // 4. Apply lockdown (Landlock v5 + rlimits + securebits + drop caps)
780    let extra_paths: Vec<&str> = exec_info
781        .extra_mounts
782        .iter()
783        .filter_map(|m| m.source.to_str())
784        .collect();
785    lockdown(plan, workspace.root(), &extra_paths).map_err(ExecutorError::Lockdown)?;
786
787    // 5. If notify mode != Disabled: install notify seccomp filter, send listener fd
788    if plan.notify_mode != NotifyMode::Disabled {
789        let notify_filter = build_notify_filter(NOTIFY_FS_SYSCALLS);
790        let fprog = SockFprog {
791            len: notify_filter.len() as u16,
792            filter: notify_filter.as_ptr(),
793        };
794        let listener_fd = unsafe { seccomp_set_mode_filter_listener(&fprog) }.map_err(|e| {
795            ExecutorError::SeccompNotify(format!("failed to install notify filter: {e}"))
796        })?;
797
798        // Send listener fd to parent via SCM_RIGHTS
799        if let Some(sock) = notify_socket {
800            scm_rights::send_fd(sock.as_raw_fd(), listener_fd.as_raw_fd()).map_err(|e| {
801                ExecutorError::SeccompNotify(format!("failed to send listener fd: {e}"))
802            })?;
803        }
804    }
805
806    // 6. Install kill seccomp filter (whitelist)
807    apply_seccomp(plan)?;
808
809    // 7. Signal parent readiness
810    let child_ready_fd = workspace.pipes.sync.child_ready_fd();
811    let signal_value: u64 = 1;
812    if unsafe { libc::write(child_ready_fd, (&signal_value as *const u64).cast(), 8) } != 8 {
813        return Err(ExecutorError::ChildSetup("eventfd write failed".into()));
814    }
815
816    // 8. Wait for parent signal
817    let parent_done_fd = workspace.pipes.sync.parent_done_fd();
818    let mut value: u64 = 0;
819    if unsafe { libc::read(parent_done_fd, (&mut value as *mut u64).cast(), 8) } != 8 {
820        return Err(ExecutorError::ChildSetup("eventfd read failed".into()));
821    }
822
823    // 9. Close all fds except 0,1,2
824    close_extra_fds();
825
826    // 10. execve
827    exec_command(plan, exec_info)
828}
829
830fn setup_stdio(workspace: &Workspace) -> Result<(), ExecutorError> {
831    let stdin_fd = workspace.pipes.stdin.read.as_raw_fd();
832    let stdout_fd = workspace.pipes.stdout.write.as_raw_fd();
833    let stderr_fd = workspace.pipes.stderr.write.as_raw_fd();
834
835    unsafe {
836        libc::close(0);
837        libc::close(1);
838        libc::close(2);
839        if libc::dup2(stdin_fd, 0) < 0 {
840            return Err(ExecutorError::Exec(last_errno()));
841        }
842        if libc::dup2(stdout_fd, 1) < 0 {
843            return Err(ExecutorError::Exec(last_errno()));
844        }
845        if libc::dup2(stderr_fd, 2) < 0 {
846            return Err(ExecutorError::Exec(last_errno()));
847        }
848    }
849    Ok(())
850}
851
852fn apply_seccomp(plan: &Plan) -> Result<(), ExecutorError> {
853    let whitelist: Vec<i64> = if let Some(ref syscalls) = plan.syscalls {
854        let mut wl: Vec<i64> = DEFAULT_WHITELIST
855            .iter()
856            .copied()
857            .filter(|s| !syscalls.denied.contains(s))
858            .collect();
859        for s in &syscalls.allowed {
860            if !wl.contains(s) {
861                wl.push(*s);
862            }
863        }
864        wl
865    } else {
866        DEFAULT_WHITELIST.to_vec()
867    };
868
869    let filter = build_whitelist_filter(&whitelist);
870    let fprog = SockFprog {
871        len: filter.len() as u16,
872        filter: filter.as_ptr(),
873    };
874    unsafe { seccomp_set_mode_filter(&fprog) }
875        .map_err(|e| ExecutorError::Lockdown(LockdownError::Seccomp(e)))?;
876    Ok(())
877}
878
879fn exec_command(plan: &Plan, exec_info: &ExecutionInfo) -> Result<(), ExecutorError> {
880    let cmd_path = CString::new(exec_info.binary_path.to_string_lossy().as_bytes())
881        .map_err(|_| ExecutorError::Exec(Errno::INVAL))?;
882
883    let mut argv: Vec<CString> = Vec::with_capacity(plan.cmd.len());
884    argv.push(cmd_path.clone());
885    for arg in plan.cmd.iter().skip(1) {
886        argv.push(CString::new(arg.as_bytes()).map_err(|_| ExecutorError::Exec(Errno::INVAL))?);
887    }
888
889    let argv_ptrs: Vec<*const libc::c_char> = argv
890        .iter()
891        .map(|s| s.as_ptr())
892        .chain(std::iter::once(std::ptr::null()))
893        .collect();
894
895    let envp: Vec<CString> = plan
896        .env
897        .iter()
898        .map(|(k, v)| CString::new(format!("{k}={v}")))
899        .collect::<Result<Vec<_>, _>>()
900        .map_err(|_| ExecutorError::Exec(Errno::INVAL))?;
901
902    let envp_ptrs: Vec<*const libc::c_char> = envp
903        .iter()
904        .map(|s| s.as_ptr())
905        .chain(std::iter::once(std::ptr::null()))
906        .collect();
907
908    unsafe { libc::execve(cmd_path.as_ptr(), argv_ptrs.as_ptr(), envp_ptrs.as_ptr()) };
909
910    Err(ExecutorError::Exec(last_errno()))
911}
912
913#[cfg(test)]
914mod tests {
915    use super::*;
916
917    #[test]
918    fn token_encoding() {
919        let token = encode_token(42, TOKEN_TYPE_STDOUT);
920        let (id, ty) = decode_token(token);
921        assert_eq!(id.0, 42);
922        assert_eq!(ty, TOKEN_TYPE_STDOUT);
923    }
924
925    #[test]
926    fn sandbox_id_display() {
927        let id = SandboxId(123);
928        assert_eq!(format!("{id}"), "Sandbox(123)");
929    }
930}