Skip to main content

microsandbox_agentd/
session.rs

1//! Exec session management: spawning processes with PTY or pipe I/O.
2
3use std::ffi::{CStr, CString};
4use std::mem::MaybeUninit;
5use std::os::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd};
6use std::process::Stdio;
7use std::{iter, mem, ptr};
8
9use nix::pty;
10use nix::sys::signal::{self, Signal};
11use nix::unistd::Pid;
12use tokio::io::AsyncReadExt;
13use tokio::process::{Child, Command};
14use tokio::sync::mpsc;
15
16use microsandbox_protocol::exec::{ExecFailed, ExecFailureKind, ExecRequest};
17
18use crate::error::{AgentdError, AgentdResult};
19use crate::rlimit;
20
21//--------------------------------------------------------------------------------------------------
22// Constants
23//--------------------------------------------------------------------------------------------------
24
25const LINUX_CAPABILITY_VERSION_3: u32 = 0x20080522;
26const CAP_SYS_ADMIN: u32 = 21;
27const CAP_WORD_BITS: u32 = 32;
28const PR_CAPBSET_DROP: libc::c_int = 24;
29const PR_CAP_AMBIENT: libc::c_int = 47;
30const PR_CAP_AMBIENT_CLEAR_ALL: libc::c_int = 4;
31const DEFAULT_USER_SPEC: &str = "0:0";
32
33//--------------------------------------------------------------------------------------------------
34// Functions: classify
35//--------------------------------------------------------------------------------------------------
36
37/// Map an `errno` integer to its standard symbolic name. Returns
38/// `None` for unrecognized values; we only enumerate the ones that
39/// can plausibly come out of fork/exec/setrlimit/setuid paths.
40fn errno_name(e: i32) -> Option<&'static str> {
41    match e {
42        libc::E2BIG => Some("E2BIG"),
43        libc::EACCES => Some("EACCES"),
44        libc::EAGAIN => Some("EAGAIN"),
45        libc::EBUSY => Some("EBUSY"),
46        libc::EFAULT => Some("EFAULT"),
47        libc::EINVAL => Some("EINVAL"),
48        libc::EIO => Some("EIO"),
49        libc::EISDIR => Some("EISDIR"),
50        libc::ELOOP => Some("ELOOP"),
51        libc::EMFILE => Some("EMFILE"),
52        libc::ENAMETOOLONG => Some("ENAMETOOLONG"),
53        libc::ENFILE => Some("ENFILE"),
54        libc::ENOENT => Some("ENOENT"),
55        libc::ENOEXEC => Some("ENOEXEC"),
56        libc::ENOMEM => Some("ENOMEM"),
57        libc::ENOSYS => Some("ENOSYS"),
58        libc::ENOTDIR => Some("ENOTDIR"),
59        libc::ENXIO => Some("ENXIO"),
60        libc::EPERM => Some("EPERM"),
61        libc::ETXTBSY => Some("ETXTBSY"),
62        _ => None,
63    }
64}
65
66/// Classify a fork/exec-time `errno` into one of the
67/// `ExecFailureKind` buckets.
68///
69/// ENOENT is ambiguous in principle (missing binary vs. missing
70/// cwd), but in practice it's overwhelmingly the binary — the cwd
71/// is set in `pre_exec` *before* execvp, and a bad cwd would more
72/// commonly produce ENOTDIR (path component isn't a directory) or
73/// EACCES (no permission to chdir). We classify ENOENT as
74/// `NotFound` and ENOTDIR as `BadCwd`. Edge cases of "bad cwd that
75/// happens to ENOENT" fall through with the message "spawn 'cmd':
76/// No such file or directory" which is still understandable.
77fn classify_spawn_errno(errno: i32) -> ExecFailureKind {
78    match errno {
79        libc::ENOENT => ExecFailureKind::NotFound,
80        libc::ENOTDIR => ExecFailureKind::BadCwd,
81        libc::EACCES | libc::EPERM => ExecFailureKind::PermissionDenied,
82        libc::ENOEXEC => ExecFailureKind::NotExecutable,
83        libc::EISDIR => ExecFailureKind::NotExecutable,
84        libc::ETXTBSY => ExecFailureKind::NotExecutable,
85        libc::E2BIG | libc::ELOOP | libc::ENAMETOOLONG | libc::EFAULT => ExecFailureKind::BadArgs,
86        libc::EMFILE | libc::ENFILE => ExecFailureKind::ResourceLimit,
87        libc::EAGAIN => ExecFailureKind::ResourceLimit,
88        libc::ENOMEM => ExecFailureKind::OutOfMemory,
89        libc::EINVAL => ExecFailureKind::Other,
90        _ => ExecFailureKind::Other,
91    }
92}
93
94/// Build a `ExecFailed` payload from a spawn-time `io::Error`.
95fn exec_failed_from_io_error(err: &std::io::Error, cmd: &str, stage: &str) -> ExecFailed {
96    let errno = err.raw_os_error();
97    let kind = errno
98        .map(classify_spawn_errno)
99        .unwrap_or(ExecFailureKind::Other);
100    let errno_name = errno.and_then(errno_name).map(str::to_string);
101    let message = format!("spawn {cmd:?}: {err}");
102    ExecFailed {
103        kind,
104        errno,
105        errno_name,
106        message,
107        stage: Some(stage.to_string()),
108    }
109}
110
111//--------------------------------------------------------------------------------------------------
112// Types
113//--------------------------------------------------------------------------------------------------
114
115/// An active exec session handle for sending input to a running process.
116///
117/// Output reading is handled by a background task that sends events
118/// via the `mpsc` channel provided at spawn time.
119#[derive(Debug)]
120pub struct ExecSession {
121    /// The PID of the spawned process.
122    pid: i32,
123
124    /// The PTY master fd (only for PTY mode, used for writing and resize).
125    pty_master: Option<OwnedFd>,
126
127    /// The child's stdin (only for pipe mode).
128    stdin: Option<tokio::process::ChildStdin>,
129}
130
131/// Output from a session that the agent loop should forward to the host.
132pub enum SessionOutput {
133    /// Data from stdout (or PTY master).
134    Stdout(Vec<u8>),
135
136    /// Data from stderr (pipe mode only).
137    Stderr(Vec<u8>),
138
139    /// The process has exited with the given code.
140    Exited(i32),
141
142    /// Pre-encoded frame bytes to write directly to the serial output buffer.
143    ///
144    /// Used by filesystem streaming operations that encode their own
145    /// `FsData`/`FsResponse` messages.
146    Raw(Vec<u8>),
147}
148
149struct ResolvedUser {
150    uid: libc::uid_t,
151    gid: libc::gid_t,
152    initgroups_user: Option<CString>,
153    home_dir: Option<CString>,
154}
155
156struct PasswdEntry {
157    name: String,
158    uid: libc::uid_t,
159    gid: libc::gid_t,
160    home_dir: Option<String>,
161}
162
163struct GroupEntry {
164    gid: libc::gid_t,
165}
166
167struct ExecErrorPipe {
168    read_end: OwnedFd,
169    write_end: OwnedFd,
170}
171
172#[repr(C)]
173#[derive(Clone, Copy)]
174struct CapUserHeader {
175    version: u32,
176    pid: libc::c_int,
177}
178
179#[repr(C)]
180#[derive(Clone, Copy)]
181struct CapUserData {
182    effective: u32,
183    permitted: u32,
184    inheritable: u32,
185}
186
187//--------------------------------------------------------------------------------------------------
188// Methods
189//--------------------------------------------------------------------------------------------------
190
191impl ExecSession {
192    /// Spawns a new exec session.
193    ///
194    /// If `req.tty` is true, uses a PTY. Otherwise, uses piped stdin/stdout/stderr.
195    /// A background task is spawned to read output and send events via `tx`.
196    pub fn spawn(
197        id: u32,
198        req: &ExecRequest,
199        tx: mpsc::UnboundedSender<(u32, SessionOutput)>,
200        default_user: Option<&str>,
201    ) -> AgentdResult<Self> {
202        if req.tty {
203            Self::spawn_pty(id, req, tx, default_user)
204        } else {
205            Self::spawn_pipe(id, req, tx, default_user)
206        }
207    }
208
209    /// Returns the PID of the spawned process (as u32 for the protocol).
210    pub fn pid(&self) -> u32 {
211        self.pid as u32
212    }
213
214    /// Writes data to the process's stdin (or PTY master).
215    pub async fn write_stdin(&self, data: &[u8]) -> AgentdResult<()> {
216        if let Some(ref master) = self.pty_master {
217            blocking_write_fd(master.as_raw_fd(), data).await
218        } else if let Some(ref stdin) = self.stdin {
219            blocking_write_fd(stdin.as_raw_fd(), data).await
220        } else {
221            Ok(())
222        }
223    }
224
225    /// Resizes the PTY (only applicable for TTY sessions).
226    pub fn resize(&self, rows: u16, cols: u16) -> AgentdResult<()> {
227        if let Some(ref master) = self.pty_master {
228            let ws = libc::winsize {
229                ws_row: rows,
230                ws_col: cols,
231                ws_xpixel: 0,
232                ws_ypixel: 0,
233            };
234            let ret = unsafe { libc::ioctl(master.as_raw_fd(), libc::TIOCSWINSZ, &ws) };
235            if ret < 0 {
236                return Err(std::io::Error::last_os_error().into());
237            }
238        }
239        Ok(())
240    }
241
242    /// Sends a signal to the spawned process.
243    pub fn send_signal(&self, signum: i32) -> AgentdResult<()> {
244        let sig = Signal::try_from(signum)
245            .map_err(|e| AgentdError::ExecSession(format!("invalid signal {signum}: {e}")))?;
246        signal::kill(Pid::from_raw(self.pid), sig)?;
247        Ok(())
248    }
249
250    /// Closes the process's stdin.
251    ///
252    /// For pipe mode, drops the `ChildStdin` handle which closes the fd.
253    /// For PTY mode, this is a no-op (the PTY master stays open for output).
254    pub fn close_stdin(&mut self) {
255        self.stdin.take();
256    }
257}
258
259impl ExecSession {
260    /// Spawns a process with a PTY.
261    fn spawn_pty(
262        id: u32,
263        req: &ExecRequest,
264        tx: mpsc::UnboundedSender<(u32, SessionOutput)>,
265        default_user: Option<&str>,
266    ) -> AgentdResult<Self> {
267        let pty = pty::openpty(None, None)?;
268        let err_pipe = new_exec_error_pipe()?;
269
270        // Set initial window size.
271        let ws = libc::winsize {
272            ws_row: req.rows,
273            ws_col: req.cols,
274            ws_xpixel: 0,
275            ws_ypixel: 0,
276        };
277        let ret = unsafe { libc::ioctl(pty.master.as_raw_fd(), libc::TIOCSWINSZ, &ws) };
278        if ret < 0 {
279            return Err(std::io::Error::last_os_error().into());
280        }
281
282        let slave_fd = pty.slave.as_raw_fd();
283
284        // Pre-build all strings before fork to avoid allocating in the child.
285        let c_cmd = CString::new(req.cmd.as_str())
286            .map_err(|e| AgentdError::ExecSession(format!("invalid command: {e}")))?;
287        let mut c_args: Vec<CString> = vec![c_cmd.clone()];
288        for arg in &req.args {
289            c_args.push(
290                CString::new(arg.as_str())
291                    .map_err(|e| AgentdError::ExecSession(format!("invalid arg: {e}")))?,
292            );
293        }
294
295        // Build argv pointer array (null-terminated).
296        let argv_ptrs: Vec<*const libc::c_char> = c_args
297            .iter()
298            .map(|s| s.as_ptr())
299            .chain(iter::once(ptr::null()))
300            .collect();
301
302        // Pre-parse environment variables into CStrings.
303        let c_env: Vec<(CString, CString)> = req
304            .env
305            .iter()
306            .filter_map(|var| {
307                let (key, val) = var.split_once('=')?;
308                let k = CString::new(key).ok()?;
309                let v = CString::new(val).ok()?;
310                Some((k, v))
311            })
312            .collect();
313
314        // Pre-build cwd CString.
315        let c_cwd = req
316            .cwd
317            .as_ref()
318            .map(|dir| CString::new(dir.as_str()))
319            .transpose()
320            .map_err(|e| AgentdError::ExecSession(format!("invalid cwd: {e}")))?;
321
322        let resolved_user = resolve_requested_user(req, default_user)?;
323        let default_home = default_home_dir(req, resolved_user.as_ref())?;
324        let home_key = default_home
325            .as_ref()
326            .map(|_| {
327                CString::new("HOME")
328                    .map_err(|e| AgentdError::ExecSession(format!("invalid home env key: {e}")))
329            })
330            .transpose()?;
331
332        // Pre-parse rlimits before fork (no allocations in child).
333        let parsed_rlimits = rlimit::to_libc(&req.rlimits);
334
335        // Fork.
336        let pid = unsafe { libc::fork() };
337        if pid < 0 {
338            let io_err = std::io::Error::last_os_error();
339            return Err(AgentdError::ExecSpawnFailed(exec_failed_from_io_error(
340                &io_err, &req.cmd, "fork",
341            )));
342        }
343
344        #[allow(unreachable_code)]
345        if pid == 0 {
346            // Child process — only async-signal-safe operations from here.
347            drop(pty.master);
348            drop(err_pipe.read_end);
349
350            // Create new session.
351            if unsafe { libc::setsid() } < 0 {
352                unsafe { libc::_exit(1) };
353            }
354
355            // Set controlling terminal.
356            if unsafe { libc::ioctl(slave_fd, libc::TIOCSCTTY, 0) } < 0 {
357                unsafe { libc::_exit(1) };
358            }
359
360            // Dup slave to stdin/stdout/stderr.
361            unsafe {
362                if libc::dup2(slave_fd, 0) < 0 {
363                    libc::_exit(1);
364                }
365                if libc::dup2(slave_fd, 1) < 0 {
366                    libc::_exit(1);
367                }
368                if libc::dup2(slave_fd, 2) < 0 {
369                    libc::_exit(1);
370                }
371                if slave_fd > 2 {
372                    libc::close(slave_fd);
373                }
374            }
375
376            // Set environment variables using pre-built CStrings.
377            for (key, val) in &c_env {
378                unsafe {
379                    libc::setenv(key.as_ptr(), val.as_ptr(), 1);
380                }
381            }
382
383            // Set working directory.
384            if let Some(ref dir) = c_cwd {
385                unsafe {
386                    libc::chdir(dir.as_ptr());
387                }
388            }
389
390            if drop_mount_admin_privileges().is_err() {
391                unsafe { libc::_exit(1) };
392            }
393
394            if let Some(ref user) = resolved_user
395                && apply_resolved_user(user).is_err()
396            {
397                unsafe { libc::_exit(1) };
398            }
399
400            if let (Some(key), Some(home)) = (&home_key, &default_home) {
401                unsafe {
402                    libc::setenv(key.as_ptr(), home.as_ptr(), 1);
403                }
404            }
405
406            // Apply resource limits.
407            for (resource, limit) in &parsed_rlimits {
408                if unsafe { libc::setrlimit(*resource as _, limit) } != 0 {
409                    unsafe { libc::_exit(1) };
410                }
411            }
412
413            // execvp — on success this never returns.
414            unsafe {
415                libc::execvp(argv_ptrs[0], argv_ptrs.as_ptr());
416            }
417
418            // If execvp returns, it failed.
419            write_exec_error_and_exit(err_pipe.write_end.as_raw_fd());
420        }
421
422        // Parent process.
423        drop(pty.slave);
424        drop(err_pipe.write_end);
425
426        if let Some(exec_errno) = read_exec_error(err_pipe.read_end.as_raw_fd())? {
427            let _ = wait_for_exec_failure_child(pid);
428            let io_err = std::io::Error::from_raw_os_error(exec_errno);
429            return Err(AgentdError::ExecSpawnFailed(exec_failed_from_io_error(
430                &io_err, &req.cmd, "execvp",
431            )));
432        }
433
434        // Dup the master fd for the reader task.
435        let reader_fd = unsafe { libc::dup(pty.master.as_raw_fd()) };
436        if reader_fd < 0 {
437            return Err(std::io::Error::last_os_error().into());
438        }
439        let reader_fd = unsafe { OwnedFd::from_raw_fd(reader_fd) };
440
441        // Spawn background reader task.
442        tokio::spawn(pty_reader_task(id, pid, reader_fd, tx));
443
444        Ok(Self {
445            pid,
446            pty_master: Some(pty.master),
447            stdin: None,
448        })
449    }
450
451    /// Spawns a process with piped stdio.
452    fn spawn_pipe(
453        id: u32,
454        req: &ExecRequest,
455        tx: mpsc::UnboundedSender<(u32, SessionOutput)>,
456        default_user: Option<&str>,
457    ) -> AgentdResult<Self> {
458        let mut cmd = Command::new(&req.cmd);
459        cmd.args(&req.args)
460            .stdin(Stdio::piped())
461            .stdout(Stdio::piped())
462            .stderr(Stdio::piped());
463
464        for var in &req.env {
465            if let Some((key, val)) = var.split_once('=') {
466                cmd.env(key, val);
467            }
468        }
469
470        if let Some(ref dir) = req.cwd {
471            cmd.current_dir(dir);
472        }
473
474        let resolved_user = resolve_requested_user(req, default_user)?;
475        if let Some(home) = default_home_dir(req, resolved_user.as_ref())? {
476            cmd.env("HOME", home.to_string_lossy().into_owned());
477        }
478
479        // Drop mount privileges and apply resource limits in the child before exec.
480        let parsed_rlimits = rlimit::to_libc(&req.rlimits);
481        unsafe {
482            cmd.pre_exec(move || {
483                drop_mount_admin_privileges().map_err(agentd_to_io_error)?;
484                if let Some(ref user) = resolved_user {
485                    apply_resolved_user(user).map_err(agentd_to_io_error)?;
486                }
487                for (resource, limit) in &parsed_rlimits {
488                    if libc::setrlimit(*resource as _, limit) != 0 {
489                        return Err(std::io::Error::last_os_error());
490                    }
491                }
492                Ok(())
493            });
494        }
495
496        let cmd_label = req.cmd.clone();
497        let mut child = cmd.spawn().map_err(|err| {
498            AgentdError::ExecSpawnFailed(exec_failed_from_io_error(
499                &err,
500                &cmd_label,
501                "Command::spawn",
502            ))
503        })?;
504        let pid = child.id().unwrap_or(0) as i32;
505        let stdin = child.stdin.take();
506        let stdout = child.stdout.take();
507        let stderr = child.stderr.take();
508
509        // Spawn background reader task.
510        tokio::spawn(pipe_reader_task(id, child, stdout, stderr, tx));
511
512        Ok(Self {
513            pid,
514            pty_master: None,
515            stdin,
516        })
517    }
518}
519
520//--------------------------------------------------------------------------------------------------
521// Functions
522//--------------------------------------------------------------------------------------------------
523
524fn new_exec_error_pipe() -> AgentdResult<ExecErrorPipe> {
525    let mut fds = [0; 2];
526    let ret = unsafe { libc::pipe2(fds.as_mut_ptr(), libc::O_CLOEXEC) };
527    if ret != 0 {
528        return Err(std::io::Error::last_os_error().into());
529    }
530
531    Ok(ExecErrorPipe {
532        read_end: unsafe { OwnedFd::from_raw_fd(fds[0]) },
533        write_end: unsafe { OwnedFd::from_raw_fd(fds[1]) },
534    })
535}
536
537fn write_exec_error_and_exit(err_fd: RawFd) -> ! {
538    let errno = unsafe { *libc::__errno_location() };
539    let bytes = errno.to_ne_bytes();
540    let _ = unsafe { libc::write(err_fd, bytes.as_ptr() as *const libc::c_void, bytes.len()) };
541    unsafe { libc::_exit(127) }
542}
543
544fn read_exec_error(err_fd: RawFd) -> AgentdResult<Option<i32>> {
545    let mut buf = [0u8; mem::size_of::<i32>()];
546    let n = unsafe { libc::read(err_fd, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) };
547    if n < 0 {
548        return Err(std::io::Error::last_os_error().into());
549    }
550    if n == 0 {
551        return Ok(None);
552    }
553    if n as usize != buf.len() {
554        return Err(AgentdError::ExecSession(format!(
555            "short exec error report: expected {} bytes, got {n}",
556            buf.len()
557        )));
558    }
559    Ok(Some(i32::from_ne_bytes(buf)))
560}
561
562fn wait_for_exec_failure_child(pid: i32) -> AgentdResult<()> {
563    let ret = unsafe { libc::waitpid(pid, ptr::null_mut(), 0) };
564    if ret < 0 {
565        return Err(std::io::Error::last_os_error().into());
566    }
567    Ok(())
568}
569
570fn drop_mount_admin_privileges() -> AgentdResult<()> {
571    if unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) } != 0 {
572        return Err(std::io::Error::last_os_error().into());
573    }
574
575    let ret = unsafe { libc::prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0) };
576    if ret != 0 {
577        let err = std::io::Error::last_os_error();
578        if err.raw_os_error() != Some(libc::EINVAL) {
579            return Err(err.into());
580        }
581    }
582
583    let mut header = CapUserHeader {
584        version: LINUX_CAPABILITY_VERSION_3,
585        pid: 0,
586    };
587    let mut data = [CapUserData {
588        effective: 0,
589        permitted: 0,
590        inheritable: 0,
591    }; 2];
592
593    if unsafe { libc::syscall(libc::SYS_capget, &mut header, data.as_mut_ptr()) } != 0 {
594        return Err(std::io::Error::last_os_error().into());
595    }
596
597    let index = (CAP_SYS_ADMIN / CAP_WORD_BITS) as usize;
598    let mask = 1u32 << (CAP_SYS_ADMIN % CAP_WORD_BITS);
599    let had_sys_admin = data[index].effective & mask != 0
600        || data[index].permitted & mask != 0
601        || data[index].inheritable & mask != 0;
602
603    if had_sys_admin {
604        data[index].effective &= !mask;
605        data[index].permitted &= !mask;
606        data[index].inheritable &= !mask;
607
608        if unsafe { libc::syscall(libc::SYS_capset, &mut header, data.as_ptr()) } != 0 {
609            return Err(std::io::Error::last_os_error().into());
610        }
611    }
612
613    let ret = unsafe { libc::prctl(PR_CAPBSET_DROP, CAP_SYS_ADMIN, 0, 0, 0) };
614    if ret != 0 {
615        let err = std::io::Error::last_os_error();
616        let errno = err.raw_os_error();
617        // Already-unprivileged callers may also lack CAP_SETPCAP for the bounding-set drop.
618        let already_unprivileged = !had_sys_admin && errno == Some(libc::EPERM);
619        if errno != Some(libc::EINVAL) && !already_unprivileged {
620            return Err(err.into());
621        }
622    }
623
624    Ok(())
625}
626
627pub(crate) fn resolve_default_user(default_user: Option<&str>) -> AgentdResult<(u32, u32)> {
628    let Some(spec) = default_user
629        .map(str::trim)
630        .filter(|value| !value.is_empty())
631    else {
632        return Ok((0, 0));
633    };
634
635    let resolved = resolve_user_spec(spec)?;
636    Ok((resolved.uid, resolved.gid))
637}
638
639fn resolve_requested_user(
640    req: &ExecRequest,
641    default_user: Option<&str>,
642) -> AgentdResult<Option<ResolvedUser>> {
643    let default_user = default_user
644        .map(str::trim)
645        .filter(|value| !value.is_empty());
646    let requested = req
647        .user
648        .as_deref()
649        .map(str::trim)
650        .filter(|value| !value.is_empty())
651        .or(default_user);
652
653    requested.map(resolve_user_spec).transpose()
654}
655
656fn resolve_user_spec(spec: &str) -> AgentdResult<ResolvedUser> {
657    let (user_part, group_part) = match spec.split_once(':') {
658        Some((user, group)) => (user.trim(), Some(group.trim())),
659        None => (spec.trim(), None),
660    };
661
662    if user_part.is_empty() {
663        return Err(AgentdError::ExecSession("user spec has empty user".into()));
664    }
665
666    let passwd = if let Ok(uid) = parse_id(user_part) {
667        lookup_passwd_by_uid(uid)?
668    } else {
669        lookup_passwd_by_name(user_part)?
670            .ok_or_else(|| AgentdError::ExecSession(format!("guest user not found: {user_part}")))?
671            .into()
672    };
673
674    let (uid, passwd_entry) = match passwd {
675        ResolvedUserLookup::Known(entry) => (entry.uid, Some(entry)),
676        ResolvedUserLookup::Numeric(uid) => (uid, None),
677    };
678
679    let gid = match group_part {
680        Some("") => {
681            return Err(AgentdError::ExecSession("user spec has empty group".into()));
682        }
683        Some(group) => resolve_group_spec(group)?,
684        None => passwd_entry
685            .as_ref()
686            .map(|entry| entry.gid)
687            .unwrap_or_else(|| unsafe { libc::getgid() }),
688    };
689
690    let initgroups_user = passwd_entry
691        .as_ref()
692        .map(|entry| CString::new(entry.name.as_str()))
693        .transpose()
694        .map_err(|e| AgentdError::ExecSession(format!("invalid guest user name: {e}")))?;
695
696    Ok(ResolvedUser {
697        uid,
698        gid,
699        initgroups_user,
700        home_dir: passwd_entry
701            .as_ref()
702            .and_then(|entry| entry.home_dir.as_deref())
703            .map(CString::new)
704            .transpose()
705            .map_err(|e| AgentdError::ExecSession(format!("invalid guest home directory: {e}")))?,
706    })
707}
708
709enum ResolvedUserLookup {
710    Known(PasswdEntry),
711    Numeric(libc::uid_t),
712}
713
714impl From<PasswdEntry> for ResolvedUserLookup {
715    fn from(value: PasswdEntry) -> Self {
716        Self::Known(value)
717    }
718}
719
720fn resolve_group_spec(spec: &str) -> AgentdResult<libc::gid_t> {
721    if let Ok(gid) = parse_id(spec) {
722        return Ok(gid);
723    }
724
725    lookup_group_by_name(spec)?
726        .map(|entry| entry.gid)
727        .ok_or_else(|| AgentdError::ExecSession(format!("guest group not found: {spec}")))
728}
729
730fn parse_id(value: &str) -> Result<u32, std::num::ParseIntError> {
731    value.parse::<u32>()
732}
733
734fn lookup_passwd_by_name(name: &str) -> AgentdResult<Option<PasswdEntry>> {
735    let name = CString::new(name)
736        .map_err(|e| AgentdError::ExecSession(format!("invalid guest user name: {e}")))?;
737    let mut pwd = MaybeUninit::<libc::passwd>::uninit();
738    let mut result = ptr::null_mut();
739    let mut buf = vec![0u8; lookup_buffer_len()];
740    let rc = unsafe {
741        libc::getpwnam_r(
742            name.as_ptr(),
743            pwd.as_mut_ptr(),
744            buf.as_mut_ptr().cast(),
745            buf.len(),
746            &mut result,
747        )
748    };
749    if rc != 0 {
750        return Err(AgentdError::ExecSession(format!(
751            "failed to resolve guest user {name:?}: {}",
752            std::io::Error::from_raw_os_error(rc)
753        )));
754    }
755    if result.is_null() {
756        return Ok(None);
757    }
758
759    let pwd = unsafe { pwd.assume_init() };
760    let name = unsafe { CStr::from_ptr(pwd.pw_name) }
761        .to_string_lossy()
762        .into_owned();
763    let home_dir = unsafe { CStr::from_ptr(pwd.pw_dir) }
764        .to_string_lossy()
765        .into_owned();
766    Ok(Some(PasswdEntry {
767        name,
768        uid: pwd.pw_uid,
769        gid: pwd.pw_gid,
770        home_dir: (!home_dir.is_empty()).then_some(home_dir),
771    }))
772}
773
774fn lookup_passwd_by_uid(uid: libc::uid_t) -> AgentdResult<ResolvedUserLookup> {
775    let mut pwd = MaybeUninit::<libc::passwd>::uninit();
776    let mut result = ptr::null_mut();
777    let mut buf = vec![0u8; lookup_buffer_len()];
778    let rc = unsafe {
779        libc::getpwuid_r(
780            uid,
781            pwd.as_mut_ptr(),
782            buf.as_mut_ptr().cast(),
783            buf.len(),
784            &mut result,
785        )
786    };
787    if rc != 0 {
788        return Err(AgentdError::ExecSession(format!(
789            "failed to resolve guest uid {uid}: {}",
790            std::io::Error::from_raw_os_error(rc)
791        )));
792    }
793    if result.is_null() {
794        return Ok(ResolvedUserLookup::Numeric(uid));
795    }
796
797    let pwd = unsafe { pwd.assume_init() };
798    let name = unsafe { CStr::from_ptr(pwd.pw_name) }
799        .to_string_lossy()
800        .into_owned();
801    let home_dir = unsafe { CStr::from_ptr(pwd.pw_dir) }
802        .to_string_lossy()
803        .into_owned();
804    Ok(ResolvedUserLookup::Known(PasswdEntry {
805        name,
806        uid: pwd.pw_uid,
807        gid: pwd.pw_gid,
808        home_dir: (!home_dir.is_empty()).then_some(home_dir),
809    }))
810}
811
812fn lookup_group_by_name(name: &str) -> AgentdResult<Option<GroupEntry>> {
813    let name = CString::new(name)
814        .map_err(|e| AgentdError::ExecSession(format!("invalid guest group name: {e}")))?;
815    let mut grp = MaybeUninit::<libc::group>::uninit();
816    let mut result = ptr::null_mut();
817    let mut buf = vec![0u8; lookup_buffer_len()];
818    let rc = unsafe {
819        libc::getgrnam_r(
820            name.as_ptr(),
821            grp.as_mut_ptr(),
822            buf.as_mut_ptr().cast(),
823            buf.len(),
824            &mut result,
825        )
826    };
827    if rc != 0 {
828        return Err(AgentdError::ExecSession(format!(
829            "failed to resolve guest group {name:?}: {}",
830            std::io::Error::from_raw_os_error(rc)
831        )));
832    }
833    if result.is_null() {
834        return Ok(None);
835    }
836
837    let grp = unsafe { grp.assume_init() };
838    Ok(Some(GroupEntry { gid: grp.gr_gid }))
839}
840
841fn lookup_buffer_len() -> usize {
842    let size = unsafe { libc::sysconf(libc::_SC_GETPW_R_SIZE_MAX) };
843    if size > 0 { size as usize } else { 16 * 1024 }
844}
845
846fn apply_resolved_user(user: &ResolvedUser) -> AgentdResult<()> {
847    if let Some(ref name) = user.initgroups_user {
848        if unsafe { libc::initgroups(name.as_ptr(), user.gid) } != 0 {
849            return Err(std::io::Error::last_os_error().into());
850        }
851    } else if unsafe { libc::setgroups(0, ptr::null()) } != 0 {
852        return Err(std::io::Error::last_os_error().into());
853    }
854
855    if unsafe { libc::setgid(user.gid) } != 0 {
856        return Err(std::io::Error::last_os_error().into());
857    }
858    if unsafe { libc::setuid(user.uid) } != 0 {
859        return Err(std::io::Error::last_os_error().into());
860    }
861
862    Ok(())
863}
864
865fn default_home_dir(
866    req: &ExecRequest,
867    user: Option<&ResolvedUser>,
868) -> AgentdResult<Option<CString>> {
869    if env_contains_key(&req.env, "HOME") {
870        return Ok(None);
871    }
872
873    if let Some(user) = user {
874        return Ok(user.home_dir.clone());
875    }
876
877    Ok(resolve_user_spec(DEFAULT_USER_SPEC)?.home_dir)
878}
879
880fn env_contains_key(env: &[String], key: &str) -> bool {
881    env.iter().any(|entry| {
882        entry
883            .split_once('=')
884            .map(|(entry_key, _)| entry_key == key)
885            .unwrap_or(false)
886    })
887}
888
889fn agentd_to_io_error(err: AgentdError) -> std::io::Error {
890    std::io::Error::other(err.to_string())
891}
892
893/// Writes data to a raw fd using a blocking task, handling short writes.
894async fn blocking_write_fd(fd: RawFd, data: &[u8]) -> AgentdResult<()> {
895    let data = data.to_vec();
896    tokio::task::spawn_blocking(move || {
897        let mut written = 0;
898        while written < data.len() {
899            let ptr = unsafe { data.as_ptr().add(written) as *const libc::c_void };
900            let ret = unsafe { libc::write(fd, ptr, data.len() - written) };
901            if ret < 0 {
902                let err = std::io::Error::last_os_error();
903                let code = err.raw_os_error();
904                if code == Some(libc::EAGAIN) || code == Some(libc::EWOULDBLOCK) {
905                    wait_fd_writable(fd)?;
906                    continue;
907                }
908                if code == Some(libc::EINTR) {
909                    continue;
910                }
911                return Err(AgentdError::Io(err));
912            }
913            if ret == 0 {
914                wait_fd_writable(fd)?;
915                continue;
916            }
917            written += ret as usize;
918        }
919        Ok(())
920    })
921    .await
922    .map_err(|e| AgentdError::ExecSession(format!("stdin write join error: {e}")))?
923}
924
925fn wait_fd_writable(fd: RawFd) -> AgentdResult<()> {
926    let mut pollfd = libc::pollfd {
927        fd,
928        events: libc::POLLOUT,
929        revents: 0,
930    };
931
932    loop {
933        let ret = unsafe { libc::poll(&mut pollfd, 1, -1) };
934        if ret < 0 {
935            let err = std::io::Error::last_os_error();
936            if err.raw_os_error() == Some(libc::EINTR) {
937                continue;
938            }
939            return Err(AgentdError::Io(err));
940        }
941        if ret == 0 {
942            continue;
943        }
944        // Any positive return means the fd is actionable: POLLOUT lets the
945        // next write make progress, and POLLHUP/POLLERR/POLLNVAL will cause
946        // the next write to fail with a real errno (typically EPIPE) which
947        // is more meaningful than poll's revents.
948        return Ok(());
949    }
950}
951
952/// Background task that reads from a PTY master fd and sends output events.
953async fn pty_reader_task(
954    id: u32,
955    pid: i32,
956    master_fd: OwnedFd,
957    tx: mpsc::UnboundedSender<(u32, SessionOutput)>,
958) {
959    let tx_output = tx.clone();
960    let read_result = tokio::task::spawn_blocking(move || {
961        // PTY masters are safer with a dedicated blocking read loop than with
962        // edge-driven readiness. Fast writers followed by process exit can
963        // strand the tail behind a missed wakeup/HUP transition.
964        let raw = master_fd.as_raw_fd();
965        let flags = unsafe { libc::fcntl(raw, libc::F_GETFL) };
966        if flags >= 0 {
967            unsafe { libc::fcntl(raw, libc::F_SETFL, flags & !libc::O_NONBLOCK) };
968        }
969
970        loop {
971            let mut buf = [0u8; 4096];
972            let n = unsafe { libc::read(raw, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) };
973
974            if n > 0 {
975                if tx_output
976                    .send((id, SessionOutput::Stdout(buf[..n as usize].to_vec())))
977                    .is_err()
978                {
979                    break;
980                }
981                continue;
982            }
983
984            if n == 0 {
985                break;
986            }
987
988            let err = std::io::Error::last_os_error();
989            match err.raw_os_error() {
990                Some(libc::EINTR) => continue,
991                Some(libc::EIO) => break,
992                _ => break,
993            }
994        }
995    })
996    .await;
997
998    let _ = read_result;
999
1000    let code = wait_for_pid(pid).await;
1001    let _ = tx.send((id, SessionOutput::Exited(code)));
1002}
1003
1004/// Background task that reads from piped stdout/stderr and sends output events.
1005async fn pipe_reader_task(
1006    id: u32,
1007    mut child: Child,
1008    stdout: Option<tokio::process::ChildStdout>,
1009    stderr: Option<tokio::process::ChildStderr>,
1010    tx: mpsc::UnboundedSender<(u32, SessionOutput)>,
1011) {
1012    let mut stdout = stdout;
1013    let mut stderr = stderr;
1014    let mut stdout_eof = stdout.is_none();
1015    let mut stderr_eof = stderr.is_none();
1016
1017    while !stdout_eof || !stderr_eof {
1018        let mut stdout_buf = [0u8; 4096];
1019        let mut stderr_buf = [0u8; 4096];
1020
1021        tokio::select! {
1022            result = async {
1023                match stdout.as_mut() {
1024                    Some(out) => out.read(&mut stdout_buf).await,
1025                    None => std::future::pending().await,
1026                }
1027            }, if !stdout_eof => {
1028                match result {
1029                    Ok(0) | Err(_) => {
1030                        stdout = None;
1031                        stdout_eof = true;
1032                    }
1033                    Ok(n) => {
1034                        let _ = tx.send((id, SessionOutput::Stdout(stdout_buf[..n].to_vec())));
1035                    }
1036                }
1037            }
1038            result = async {
1039                match stderr.as_mut() {
1040                    Some(err) => err.read(&mut stderr_buf).await,
1041                    None => std::future::pending().await,
1042                }
1043            }, if !stderr_eof => {
1044                match result {
1045                    Ok(0) | Err(_) => {
1046                        stderr = None;
1047                        stderr_eof = true;
1048                    }
1049                    Ok(n) => {
1050                        let _ = tx.send((id, SessionOutput::Stderr(stderr_buf[..n].to_vec())));
1051                    }
1052                }
1053            }
1054        }
1055    }
1056
1057    // Both streams are done — wait for process exit.
1058    let code = match child.wait().await {
1059        Ok(status) => status.code().unwrap_or(-1),
1060        Err(_) => -1,
1061    };
1062
1063    let _ = tx.send((id, SessionOutput::Exited(code)));
1064}
1065
1066/// Waits for a process to exit by PID and returns the exit code.
1067async fn wait_for_pid(pid: i32) -> i32 {
1068    tokio::task::spawn_blocking(move || {
1069        let mut status: i32 = 0;
1070        unsafe {
1071            libc::waitpid(pid, &mut status, 0);
1072        }
1073        if libc::WIFEXITED(status) {
1074            libc::WEXITSTATUS(status)
1075        } else {
1076            -1
1077        }
1078    })
1079    .await
1080    .unwrap_or(-1)
1081}
1082
1083//--------------------------------------------------------------------------------------------------
1084// Tests
1085//--------------------------------------------------------------------------------------------------
1086
1087#[cfg(test)]
1088mod tests {
1089    use std::time::Duration;
1090
1091    use tokio::time;
1092
1093    use microsandbox_protocol::exec::ExecRequest;
1094
1095    use super::*;
1096
1097    #[tokio::test]
1098    async fn test_pty_reader_drains_ready_fd() {
1099        let (tx, mut rx) = mpsc::unbounded_channel();
1100        let req = ExecRequest {
1101            cmd: "/bin/sh".to_string(),
1102            args: vec![
1103                "-c".to_string(),
1104                "i=0; while [ $i -lt 256 ]; do printf AAAA; i=$((i+1)); done; printf SECOND; sleep 0.1; printf '<END>\\n'; sleep 0.1; exit 0"
1105                    .to_string(),
1106            ],
1107            env: vec!["PATH=/usr/local/bin:/usr/bin:/bin".to_string()],
1108            cwd: None,
1109            user: None,
1110            tty: true,
1111            rows: 24,
1112            cols: 80,
1113            rlimits: Vec::new(),
1114        };
1115
1116        let session = ExecSession::spawn(7, &req, tx, None).expect("spawn pty session");
1117        let mut stdout = Vec::new();
1118        let mut exit = None;
1119
1120        let recv_result = time::timeout(Duration::from_secs(15), async {
1121            while let Some((id, output)) = rx.recv().await {
1122                assert_eq!(id, 7);
1123                match output {
1124                    SessionOutput::Stdout(data) => stdout.extend_from_slice(&data),
1125                    SessionOutput::Exited(code) => {
1126                        exit = Some(code);
1127                        break;
1128                    }
1129                    SessionOutput::Stderr(_) | SessionOutput::Raw(_) => {}
1130                }
1131            }
1132        })
1133        .await;
1134
1135        if recv_result.is_err() {
1136            let _ = session.send_signal(libc::SIGKILL);
1137            panic!("timed out waiting for PTY output");
1138        }
1139
1140        assert_eq!(exit, Some(0));
1141
1142        let second = stdout
1143            .windows(b"SECOND".len())
1144            .position(|window| window == b"SECOND");
1145        let end = stdout
1146            .windows(b"<END>".len())
1147            .position(|window| window == b"<END>");
1148
1149        assert!(
1150            matches!((second, end), (Some(second), Some(end)) if second < end),
1151            "expected immediate PTY write to arrive before later output; got {:?}",
1152            String::from_utf8_lossy(&stdout),
1153        );
1154    }
1155
1156    #[test]
1157    fn test_resolve_user_spec_for_current_uid_gid() {
1158        let uid = unsafe { libc::getuid() };
1159        let gid = unsafe { libc::getgid() };
1160        let resolved = resolve_user_spec(&format!("{uid}:{gid}")).expect("resolve numeric user");
1161        assert_eq!(resolved.uid, uid);
1162        assert_eq!(resolved.gid, gid);
1163    }
1164
1165    #[test]
1166    fn test_request_user_overrides_config_default() {
1167        let req = ExecRequest {
1168            cmd: "/bin/true".to_string(),
1169            args: Vec::new(),
1170            env: Vec::new(),
1171            cwd: None,
1172            user: Some("1:1".to_string()),
1173            tty: false,
1174            rows: 24,
1175            cols: 80,
1176            rlimits: Vec::new(),
1177        };
1178
1179        let resolved = resolve_requested_user(&req, Some("0:0")).expect("resolve requested user");
1180        assert_eq!(resolved.unwrap().uid, 1);
1181    }
1182
1183    #[test]
1184    fn test_config_default_user_used_when_request_has_none() {
1185        let req = ExecRequest {
1186            cmd: "/bin/true".to_string(),
1187            args: Vec::new(),
1188            env: Vec::new(),
1189            cwd: None,
1190            user: None,
1191            tty: false,
1192            rows: 24,
1193            cols: 80,
1194            rlimits: Vec::new(),
1195        };
1196
1197        let uid = unsafe { libc::getuid() };
1198        let gid = unsafe { libc::getgid() };
1199        let resolved = resolve_requested_user(&req, Some(&format!("{uid}:{gid}")))
1200            .expect("resolve with config default");
1201        let resolved = resolved.expect("should resolve to a user");
1202        assert_eq!(resolved.uid, uid);
1203        assert_eq!(resolved.gid, gid);
1204    }
1205
1206    #[test]
1207    fn test_request_without_user_does_not_apply_user_switch() {
1208        let req = ExecRequest {
1209            cmd: "/bin/true".to_string(),
1210            args: Vec::new(),
1211            env: Vec::new(),
1212            cwd: None,
1213            user: None,
1214            tty: false,
1215            rows: 24,
1216            cols: 80,
1217            rlimits: Vec::new(),
1218        };
1219
1220        let resolved = resolve_requested_user(&req, None).expect("resolve absent user");
1221        assert!(resolved.is_none());
1222    }
1223
1224    #[test]
1225    fn test_default_user_absent_resolves_to_root() {
1226        let resolved = resolve_default_user(None).expect("resolve absent default user");
1227        assert_eq!(resolved, (0, 0));
1228    }
1229
1230    #[test]
1231    fn test_default_home_dir_uses_resolved_user_home() {
1232        let req = ExecRequest {
1233            cmd: "/bin/true".to_string(),
1234            args: Vec::new(),
1235            env: Vec::new(),
1236            cwd: None,
1237            user: None,
1238            tty: false,
1239            rows: 24,
1240            cols: 80,
1241            rlimits: Vec::new(),
1242        };
1243        let user = ResolvedUser {
1244            uid: 1000,
1245            gid: 1000,
1246            initgroups_user: None,
1247            home_dir: Some(CString::new("/home/tester").unwrap()),
1248        };
1249
1250        assert_eq!(
1251            default_home_dir(&req, Some(&user))
1252                .expect("resolve default home")
1253                .as_deref()
1254                .map(CStr::to_string_lossy),
1255            Some("/home/tester".into()),
1256        );
1257    }
1258
1259    #[test]
1260    fn test_default_home_dir_uses_root_when_user_absent() {
1261        let req = ExecRequest {
1262            cmd: "/bin/true".to_string(),
1263            args: Vec::new(),
1264            env: Vec::new(),
1265            cwd: None,
1266            user: None,
1267            tty: false,
1268            rows: 24,
1269            cols: 80,
1270            rlimits: Vec::new(),
1271        };
1272        let root = resolve_user_spec(DEFAULT_USER_SPEC).expect("resolve implicit root");
1273
1274        assert_eq!(
1275            default_home_dir(&req, None)
1276                .expect("resolve default home")
1277                .as_deref()
1278                .map(CStr::to_string_lossy),
1279            root.home_dir.as_deref().map(CStr::to_string_lossy),
1280        );
1281    }
1282
1283    #[test]
1284    fn test_default_home_dir_respects_explicit_home_env() {
1285        let req = ExecRequest {
1286            cmd: "/bin/true".to_string(),
1287            args: Vec::new(),
1288            env: vec!["HOME=/tmp/custom".to_string()],
1289            cwd: None,
1290            user: None,
1291            tty: false,
1292            rows: 24,
1293            cols: 80,
1294            rlimits: Vec::new(),
1295        };
1296        let user = ResolvedUser {
1297            uid: 1000,
1298            gid: 1000,
1299            initgroups_user: None,
1300            home_dir: Some(CString::new("/home/tester").unwrap()),
1301        };
1302
1303        assert!(
1304            default_home_dir(&req, Some(&user))
1305                .expect("resolve default home")
1306                .is_none()
1307        );
1308    }
1309
1310    #[tokio::test]
1311    async fn test_spawn_pipe_error_does_not_include_probe_details() {
1312        let (tx, _rx) = mpsc::unbounded_channel();
1313        let req = ExecRequest {
1314            cmd: "/definitely/not/a/real/binary".to_string(),
1315            args: Vec::new(),
1316            env: Vec::new(),
1317            cwd: None,
1318            user: None,
1319            tty: false,
1320            rows: 24,
1321            cols: 80,
1322            rlimits: Vec::new(),
1323        };
1324
1325        let err = ExecSession::spawn(9, &req, tx, None).expect_err("spawn should fail");
1326
1327        // Spawn failures now produce the typed `ExecSpawnFailed` so
1328        // the host can render a useful message + hint. The classifier
1329        // maps ENOENT on the binary path to `NotFound`.
1330        let payload = match &err {
1331            AgentdError::ExecSpawnFailed(p) => p,
1332            other => panic!("expected ExecSpawnFailed, got: {other:?}"),
1333        };
1334        assert_eq!(payload.kind, ExecFailureKind::NotFound);
1335        assert_eq!(payload.errno, Some(libc::ENOENT));
1336        assert_eq!(payload.errno_name.as_deref(), Some("ENOENT"));
1337
1338        // The original intent of the test: probe internals leak into
1339        // the error message. The format is now
1340        // `spawn "<cmd>": <io::Error>` from
1341        // `exec_failed_from_io_error`. Verify that none of the old
1342        // probe-detail keys snuck back into the message.
1343        let message = &payload.message;
1344        assert!(message.contains("spawn"));
1345        assert!(!message.contains("symlink_metadata="));
1346        assert!(!message.contains("metadata="));
1347        assert!(!message.contains("magic="));
1348        assert!(!message.contains("path_probe="));
1349        assert!(!message.contains("cwd_probe="));
1350        assert!(!message.contains("target_probe="));
1351    }
1352}